New local_state_space_iteration_k MEX, for nonlinear filters at k-order

It applies the approximated policy function to a set of particles, using
Dynare++ routines.

There is support for parallelization, using Dynare++ multithreading
model (itself based on C++11 threads; we don’t use OpenMP because it is
incompatible with MKL). For the time being, default to a single thread. This
should be later refined through empirical testing.
time-shift
Sébastien Villemot 2019-06-28 18:30:28 +02:00 committed by Stéphane Adjemian (Charybdis)
parent f8fb8c0450
commit 996bdd6c64
10 changed files with 265 additions and 9 deletions

View File

@ -180,6 +180,7 @@ public:
protected:
void fillTensors(const _Tg &g, double sigma);
void centralize(const DecisionRuleImpl &dr);
public:
void eval(emethod em, Vector &out, const ConstVector &v) const override;
};

View File

@ -71,8 +71,8 @@ class TLException
{
const std::string fname;
int lnum;
const std::string message;
public:
const std::string message;
TLException(std::string fname_arg, int lnum_arg, std::string message_arg)
: fname{std::move(fname_arg)},
lnum{lnum_arg},

View File

@ -71,6 +71,7 @@ options_.huge_number = 1e7;
% Default number of threads for parallelized mex files.
options_.threads.kronecker.sparse_hessian_times_B_kronecker_C = num_procs;
options_.threads.local_state_space_iteration_2 = 1;
options_.threads.local_state_space_iteration_k = 1;
options_.threads.perfect_foresight_problem = num_procs;
options_.threads.k_order_perturbation = max(1, num_procs/2);

View File

@ -40,6 +40,8 @@ switch mexname
options_.threads.kronecker.sparse_hessian_times_B_kronecker_C = n;
case 'local_state_space_iteration_2'
options_.threads.local_state_space_iteration_2 = n;
case 'local_state_space_iteration_k'
options_.threads.local_state_space_iteration_2 = n;
case 'perfect_foresight_problem'
options_.threads.perfect_foresight_problem = n;
case 'k_order_perturbation'

View File

@ -1,12 +1,19 @@
mex_PROGRAMS = local_state_space_iteration_2
mex_PROGRAMS = local_state_space_iteration_2 local_state_space_iteration_k
nodist_local_state_space_iteration_2_SOURCES = local_state_space_iteration_2.cc
nodist_local_state_space_iteration_k_SOURCES = local_state_space_iteration_k.cc
local_state_space_iteration_2_CXXFLAGS = $(AM_CXXFLAGS) -fopenmp
local_state_space_iteration_2_LDFLAGS = $(AM_LDFLAGS) $(OPENMP_LDFLAGS)
BUILT_SOURCES = $(nodist_local_state_space_iteration_2_SOURCES)
CLEANFILES = $(nodist_local_state_space_iteration_2_SOURCES)
local_state_space_iteration_k_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_srcdir)/../../../dynare++/sylv/cc -I$(top_srcdir)/../../../dynare++/tl/cc -I$(top_srcdir)/../../../dynare++/kord -I$(top_srcdir)/../../../dynare++/utils/cc $(CPPFLAGS_MATIO)
local_state_space_iteration_k_LDFLAGS = $(AM_LDFLAGS) $(LDFLAGS_MATIO)
local_state_space_iteration_k_LDADD = ../libdynare++/libdynare++.a $(LIBADD_MATIO)
BUILT_SOURCES = $(nodist_local_state_space_iteration_2_SOURCES) \
$(nodist_local_state_space_iteration_k_SOURCES)
CLEANFILES = $(nodist_local_state_space_iteration_2_SOURCES) \
$(nodist_local_state_space_iteration_k_SOURCES)
%.cc: $(top_srcdir)/../../sources/local_state_space_iterations/%.cc
$(LN_S) -f $< $@

View File

@ -1,10 +1,10 @@
ACLOCAL_AMFLAGS = -I ../../../m4
SUBDIRS = mjdgges kronecker bytecode block_kalman_filter sobol local_state_space_iterations perfect_foresight_problem num_procs
SUBDIRS = mjdgges kronecker bytecode block_kalman_filter sobol perfect_foresight_problem num_procs
# libdynare++ must come before gensylv, k_order_perturbation, dynare_simul_
if ENABLE_MEX_DYNAREPLUSPLUS
SUBDIRS += libdynare++ gensylv k_order_perturbation dynare_simul_
SUBDIRS += libdynare++ gensylv k_order_perturbation dynare_simul_ local_state_space_iterations
endif
if ENABLE_MEX_MS_SBVAR

View File

@ -1,10 +1,10 @@
ACLOCAL_AMFLAGS = -I ../../../m4
SUBDIRS = mjdgges kronecker bytecode block_kalman_filter sobol local_state_space_iterations perfect_foresight_problem num_procs
SUBDIRS = mjdgges kronecker bytecode block_kalman_filter sobol perfect_foresight_problem num_procs
# libdynare++ must come before gensylv, k_order_perturbation, dynare_simul_
if ENABLE_MEX_DYNAREPLUSPLUS
SUBDIRS += libdynare++ gensylv k_order_perturbation dynare_simul_
SUBDIRS += libdynare++ gensylv k_order_perturbation dynare_simul_ local_state_space_iterations
endif
if ENABLE_MEX_MS_SBVAR

View File

@ -0,0 +1,178 @@
/*
* Copyright © 2019 Dynare Team
*
* This file is part of Dynare.
*
* Dynare is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Dynare is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Dynare. If not, see <http://www.gnu.org/licenses/>.
*/
#include <memory>
#include <string>
#include <utility>
#include <dynmex.h>
#include "tl_static.hh"
#include "decision_rule.hh"
#include "sthread.hh"
/* The class that does the real job. It computes the next iteration for a given
range of particles. There will be as many instances as there are parallel
threads.
Note that we cant use OpenMP since it is incompatible with MKL, which is
used internally by Dynare++ routines under MATLAB. Hence we fall back to
the Dynare++ multithreading abstraction. */
struct ParticleWorker : public sthread::detach_thread
{
const int npred_both, exo_nbr;
const std::pair<size_t,size_t> particle_range;
const ConstGeneralMatrix &yhat, &epsilon;
const Vector &ys_reordered;
const UnfoldDecisionRule &dr;
GeneralMatrix &ynext;
ParticleWorker(int npred_both_arg, int exo_nbr_arg, std::pair<size_t,size_t> particle_range_arg,
const ConstGeneralMatrix &yhat_arg, const ConstGeneralMatrix &epsilon_arg,
const Vector &ys_reordered_arg, const UnfoldDecisionRule &dr_arg,
GeneralMatrix &ynext_arg)
: npred_both{npred_both_arg}, exo_nbr{exo_nbr_arg}, particle_range{std::move(particle_range_arg)},
yhat{yhat_arg}, epsilon{epsilon_arg}, ys_reordered{ys_reordered_arg}, dr{dr_arg},
ynext{ynext_arg}
{
}
void operator()(std::mutex &mut) override
{
Vector dyu(npred_both+exo_nbr);
Vector dy(dyu, 0, npred_both);
Vector u(dyu, npred_both, exo_nbr);
for (size_t i = particle_range.first; i < particle_range.second; i++)
{
dy = yhat.getCol(i);
u = epsilon.getCol(i);
Vector ynext_col{ynext.getCol(i)};
dr.eval(DecisionRule::emethod::horner, ynext_col, dyu);
ynext_col.add(1.0, ys_reordered);
}
}
};
void
mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
if (nrhs != 5 || nlhs != 1)
mexErrMsgTxt("Must have 5 input arguments and 1 output argument");
// Give explicit names to input arguments
const mxArray *yhat_mx = prhs[0];
const mxArray *epsilon_mx = prhs[1];
const mxArray *dr_mx = prhs[2];
const mxArray *M_mx = prhs[3];
const mxArray *options_mx = prhs[4];
auto get_int_field = [](const mxArray *struct_mx, const std::string &fieldname)
{
mxArray *field_mx = mxGetField(struct_mx, 0, fieldname.c_str());
if (!(field_mx && mxIsScalar(field_mx) && mxIsNumeric(field_mx)))
mexErrMsgTxt(("Field `" + fieldname + "' should be a numeric scalar").c_str());
return static_cast<int>(mxGetScalar(field_mx));
};
int nstatic = get_int_field(M_mx, "nstatic");
int npred = get_int_field(M_mx, "npred");
int nboth = get_int_field(M_mx, "nboth");
int nfwrd = get_int_field(M_mx, "nfwrd");
int endo_nbr = nstatic + npred + nboth + nfwrd;
int exo_nbr = get_int_field(M_mx, "exo_nbr");
int order = get_int_field(options_mx, "order");
const mxArray *order_var_mx = mxGetField(dr_mx, 0, "order_var");
if (!(order_var_mx && mxIsDouble(order_var_mx) && mxGetNumberOfElements(order_var_mx) == static_cast<size_t>(endo_nbr)))
mexErrMsgTxt("Field dr.order_var should be a double precision vector with endo_nbr elements");
const mxArray *ys_mx = mxGetField(dr_mx, 0, "ys");
if (!ys_mx || !mxIsDouble(ys_mx) || mxGetNumberOfElements(ys_mx) != static_cast<size_t>(endo_nbr))
mexErrMsgTxt("Field dr.ys should be a double precision vector with endo_nbr elements");
size_t nparticles = mxGetN(yhat_mx);
if (mxGetN(epsilon_mx) != nparticles)
mexErrMsgTxt("epsilon and yhat don't have the same number of columns");
if (!(mxIsDouble(yhat_mx) && mxGetM(yhat_mx) == static_cast<size_t>(npred + nboth)))
mexErrMsgTxt("yhat should be a double precision matrix with npred+nboth rows");
if (!(mxIsDouble(epsilon_mx) && mxGetM(epsilon_mx) == static_cast<size_t>(exo_nbr)))
mexErrMsgTxt("epsilon should be a double precision matrix with exo_nbr rows");
const mxArray *threads_mx = mxGetField(options_mx, 0, "threads");
if (!threads_mx)
mexErrMsgTxt("Can't find field `threads' in options_");
int num_threads = get_int_field(threads_mx, "local_state_space_iteration_k");
ConstGeneralMatrix yhat{yhat_mx};
ConstGeneralMatrix epsilon{epsilon_mx};
ConstVector ys{ys_mx};
const double *order_var = mxGetPr(order_var_mx);
try
{
TLStatic::init(order, npred+nboth+exo_nbr);
}
catch (TLException &e)
{
mexErrMsgTxt(("Dynare++ error: " + e.message).c_str());
}
// Form the polynomial (copied from dynare_simul_.cc)
UTensorPolynomial pol(endo_nbr, npred+nboth+exo_nbr);
for (int dim = 0; dim <= order; dim++)
{
const mxArray *gk_m = mxGetField(dr_mx, 0, ("g_" + std::to_string(dim)).c_str());
if (!gk_m)
mexErrMsgTxt(("Can't find field `g_" + std::to_string(dim) + "' in dr structure").c_str());
ConstTwoDMatrix gk{gk_m};
FFSTensor ft{endo_nbr, npred+nboth+exo_nbr, dim};
if (ft.ncols() != gk.ncols())
mexErrMsgTxt(("Wrong number of columns for folded tensor: got " + std::to_string(gk.ncols()) + " but i want " + std::to_string(ft.ncols()) + '\n').c_str());
if (ft.nrows() != gk.nrows())
mexErrMsgTxt(("Wrong number of rows for folded tensor: got " + std::to_string(gk.nrows()) + " but i want " + std::to_string(ft.nrows()) + '\n').c_str());
ft.zeros();
ft.add(1.0, gk);
pol.insert(std::make_unique<UFSTensor>(ft));
}
// Construct the reordered steady state (dr.ys(dr.order_var))
Vector ys_reordered(endo_nbr);
for (int i = 0; i < endo_nbr; i++)
ys_reordered[i] = ys[static_cast<int>(order_var[i])-1];
// Form the decision rule
UnfoldDecisionRule dr(pol, PartitionY(nstatic, npred, nboth, nfwrd),
exo_nbr, ys_reordered);
// Create the result matrix
plhs[0] = mxCreateDoubleMatrix(endo_nbr, nparticles, mxREAL);
GeneralMatrix ynext{plhs[0]};
// Run the real job in parallel
sthread::detach_thread_group::max_parallel_threads = num_threads;
sthread::detach_thread_group group;
// The following is equivalent to ceil(nparticles/num_threads), but with integer arithmetic
int part_by_thread = nparticles / num_threads + (nparticles % num_threads > 0);
for (size_t i = 0; i < nparticles; i += part_by_thread)
group.insert(std::make_unique<ParticleWorker>(npred+nboth, exo_nbr,
std::make_pair(i, std::min(i+part_by_thread, nparticles)),
yhat, epsilon, ys_reordered, dr, ynext));
group.run();
}

View File

@ -373,7 +373,8 @@ MODFILES = \
bgp/solow-1/solow.mod \
bgp/nk-1/nk.mod \
bgp/ramsey-1/ramsey.mod \
dynare-command-options/ramst.mod
dynare-command-options/ramst.mod \
particle/local_state_space_iteration_k_test.mod
PARTICLEFILES = \
particle/dsge_base2.mod \

View File

@ -0,0 +1,66 @@
/*
Tests that local_state_space_iteration_2 and local_state_space_iteration_k
(for k=2) return the same results.
*/
var y, k, a, h, b, c;
varexo e, u;
parameters beta, rho, alpha, delta, theta, psi, tau;
alpha = 0.36;
rho = 0.95;
tau = 0.025;
beta = 0.99;
delta = 0.025;
psi = 0;
theta = 2.95;
phi = 0.1;
model;
c*theta*h^(1+psi)=(1-alpha)*y;
k = beta*(((exp(b)*c)/(exp(b(+1))*c(+1)))
*(exp(b(+1))*alpha*y(+1)+(1-delta)*k));
y = exp(a)*(k(-1)^alpha)*(h^(1-alpha));
k = exp(b)*(y-c)+(1-delta)*k(-1);
a = rho*a(-1)+tau*b(-1) + e;
b = tau*a(-1)+rho*b(-1) + u;
end;
initval;
y = 1.08068253095672;
c = 0.80359242014163;
h = 0.29175631001732;
k = 11.08360443260358;
a = 0;
b = 0;
e = 0;
u = 0;
end;
shocks;
var e; stderr 0.009;
var u; stderr 0.009;
var e, u = phi*0.009*0.009;
end;
stoch_simul(order=2, irf=0, k_order_solver);
nparticles = 100;
/* We generate particles using realistic distributions (though this is not
strictly needed) */
state_idx = oo_.dr.order_var((M_.nstatic+1):(M_.nstatic+M_.npred+M_.nboth));
yhat = chol(oo_.var(state_idx,state_idx))*randn(M_.npred+M_.nboth, nparticles);
epsilon = chol(M_.Sigma_e)*randn(M_.exo_nbr, nparticles);
dr = oo_.dr;
ynext = local_state_space_iteration_2(yhat, epsilon, dr.ghx, dr.ghu, dr.ys(dr.order_var)+0.5*dr.ghs2, dr.ghxx, dr.ghuu, dr.ghxu, 1);
ynext2 = local_state_space_iteration_k(yhat, epsilon, oo_.dr, M_, options_);
if max(max(abs(ynext-ynext2))) > 1e-14
error('Inconsistency between local_state_space_iteration_2 and local_state_space_iteration_k')
end