New local_state_space_iteration_k MEX, for nonlinear filters at k-order

It applies the approximated policy function to a set of particles, using Dynare++ routines. There is support for parallelization, using Dynare++ multithreading model (itself based on C++11 threads; we don’t use OpenMP because it is incompatible with MKL). For the time being, default to a single thread. This should be later refined through empirical testing.
2019-06-28 18:30:28 +02:00 · 2019-06-28 18:30:28 +02:00 · 996bdd6c64
parent f8fb8c0450
commit 996bdd6c64
10 changed files with 265 additions and 9 deletions
--- a/dynare++/kord/decision_rule.hh
+++ b/dynare++/kord/decision_rule.hh
@ -180,6 +180,7 @@ public:
 protected:
  void fillTensors(const _Tg &g, double sigma);
  void centralize(const DecisionRuleImpl &dr);
+public:
  void eval(emethod em, Vector &out, const ConstVector &v) const override;
 };

--- a/dynare++/tl/cc/tl_exception.hh
+++ b/dynare++/tl/cc/tl_exception.hh
@ -71,8 +71,8 @@ class TLException
 {
  const std::string fname;
  int lnum;
-  const std::string message;
 public:
+  const std::string message;
  TLException(std::string fname_arg, int lnum_arg, std::string message_arg)
    : fname{std::move(fname_arg)},
      lnum{lnum_arg},
--- a/matlab/default_option_values.m
+++ b/matlab/default_option_values.m
@ -71,6 +71,7 @@ options_.huge_number = 1e7;
 % Default number of threads for parallelized mex files.
 options_.threads.kronecker.sparse_hessian_times_B_kronecker_C = num_procs;
 options_.threads.local_state_space_iteration_2 = 1;
+options_.threads.local_state_space_iteration_k = 1;
 options_.threads.perfect_foresight_problem = num_procs;
 options_.threads.k_order_perturbation = max(1, num_procs/2);

--- a/matlab/set_dynare_threads.m
+++ b/matlab/set_dynare_threads.m
@ -40,6 +40,8 @@ switch mexname
    options_.threads.kronecker.sparse_hessian_times_B_kronecker_C = n;
  case 'local_state_space_iteration_2'
    options_.threads.local_state_space_iteration_2 = n;
+  case 'local_state_space_iteration_k'
+    options_.threads.local_state_space_iteration_2 = n;
  case 'perfect_foresight_problem'
    options_.threads.perfect_foresight_problem = n;
  case 'k_order_perturbation'
--- a/mex/build/local_state_space_iterations.am
+++ b/mex/build/local_state_space_iterations.am
@ -1,12 +1,19 @@
-mex_PROGRAMS = local_state_space_iteration_2
+mex_PROGRAMS = local_state_space_iteration_2 local_state_space_iteration_k

 nodist_local_state_space_iteration_2_SOURCES = local_state_space_iteration_2.cc
+nodist_local_state_space_iteration_k_SOURCES = local_state_space_iteration_k.cc

 local_state_space_iteration_2_CXXFLAGS = $(AM_CXXFLAGS) -fopenmp
 local_state_space_iteration_2_LDFLAGS = $(AM_LDFLAGS) $(OPENMP_LDFLAGS)

-BUILT_SOURCES = $(nodist_local_state_space_iteration_2_SOURCES)
-CLEANFILES = $(nodist_local_state_space_iteration_2_SOURCES)
+local_state_space_iteration_k_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_srcdir)/../../../dynare++/sylv/cc -I$(top_srcdir)/../../../dynare++/tl/cc -I$(top_srcdir)/../../../dynare++/kord -I$(top_srcdir)/../../../dynare++/utils/cc $(CPPFLAGS_MATIO)
+local_state_space_iteration_k_LDFLAGS = $(AM_LDFLAGS) $(LDFLAGS_MATIO)
+local_state_space_iteration_k_LDADD = ../libdynare++/libdynare++.a  $(LIBADD_MATIO)
+
+BUILT_SOURCES = $(nodist_local_state_space_iteration_2_SOURCES) \
+		$(nodist_local_state_space_iteration_k_SOURCES)
+CLEANFILES = $(nodist_local_state_space_iteration_2_SOURCES) \
+	     $(nodist_local_state_space_iteration_k_SOURCES)

 %.cc: $(top_srcdir)/../../sources/local_state_space_iterations/%.cc
 	$(LN_S) -f $< $@
--- a/mex/build/matlab/Makefile.am
+++ b/mex/build/matlab/Makefile.am
@ -1,10 +1,10 @@
 ACLOCAL_AMFLAGS = -I ../../../m4

-SUBDIRS = mjdgges kronecker bytecode block_kalman_filter sobol local_state_space_iterations perfect_foresight_problem num_procs
+SUBDIRS = mjdgges kronecker bytecode block_kalman_filter sobol perfect_foresight_problem num_procs

 # libdynare++ must come before gensylv, k_order_perturbation, dynare_simul_
 if ENABLE_MEX_DYNAREPLUSPLUS
-SUBDIRS += libdynare++ gensylv k_order_perturbation dynare_simul_
+SUBDIRS += libdynare++ gensylv k_order_perturbation dynare_simul_ local_state_space_iterations
 endif

 if ENABLE_MEX_MS_SBVAR
--- a/mex/build/octave/Makefile.am
+++ b/mex/build/octave/Makefile.am
@ -1,10 +1,10 @@
 ACLOCAL_AMFLAGS = -I ../../../m4

-SUBDIRS = mjdgges kronecker bytecode block_kalman_filter sobol local_state_space_iterations perfect_foresight_problem num_procs
+SUBDIRS = mjdgges kronecker bytecode block_kalman_filter sobol perfect_foresight_problem num_procs

 # libdynare++ must come before gensylv, k_order_perturbation, dynare_simul_
 if ENABLE_MEX_DYNAREPLUSPLUS
-SUBDIRS += libdynare++ gensylv k_order_perturbation dynare_simul_
+SUBDIRS += libdynare++ gensylv k_order_perturbation dynare_simul_ local_state_space_iterations
 endif

 if ENABLE_MEX_MS_SBVAR
--- a/mex/sources/local_state_space_iterations/local_state_space_iteration_k.cc
+++ b/mex/sources/local_state_space_iterations/local_state_space_iteration_k.cc
@ -0,0 +1,178 @@
+/*
+ * Copyright © 2019 Dynare Team
+ *
+ * This file is part of Dynare.
+ *
+ * Dynare is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Dynare is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Dynare.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include <dynmex.h>
+
+#include "tl_static.hh"
+#include "decision_rule.hh"
+#include "sthread.hh"
+
+/* The class that does the real job. It computes the next iteration for a given
+   range of particles. There will be as many instances as there are parallel
+   threads.
+
+   Note that we can’t use OpenMP since it is incompatible with MKL, which is
+   used internally by Dynare++ routines under MATLAB. Hence we fall back to
+   the Dynare++ multithreading abstraction. */
+struct ParticleWorker : public sthread::detach_thread
+{
+  const int npred_both, exo_nbr;
+  const std::pair<size_t,size_t> particle_range;
+  const ConstGeneralMatrix &yhat, &epsilon;
+  const Vector &ys_reordered;
+  const UnfoldDecisionRule &dr;
+  GeneralMatrix &ynext;
+
+  ParticleWorker(int npred_both_arg, int exo_nbr_arg, std::pair<size_t,size_t> particle_range_arg,
+                 const ConstGeneralMatrix &yhat_arg, const ConstGeneralMatrix &epsilon_arg,
+                 const Vector &ys_reordered_arg, const UnfoldDecisionRule &dr_arg,
+                 GeneralMatrix &ynext_arg)
+    : npred_both{npred_both_arg}, exo_nbr{exo_nbr_arg}, particle_range{std::move(particle_range_arg)},
+      yhat{yhat_arg}, epsilon{epsilon_arg}, ys_reordered{ys_reordered_arg}, dr{dr_arg},
+      ynext{ynext_arg}
+  {
+  }
+  void operator()(std::mutex &mut) override
+  {
+    Vector dyu(npred_both+exo_nbr);
+    Vector dy(dyu, 0, npred_both);
+    Vector u(dyu, npred_both, exo_nbr);
+
+    for (size_t i = particle_range.first; i < particle_range.second; i++)
+      {
+        dy = yhat.getCol(i);
+        u = epsilon.getCol(i);
+        Vector ynext_col{ynext.getCol(i)};
+
+        dr.eval(DecisionRule::emethod::horner, ynext_col, dyu);
+
+        ynext_col.add(1.0, ys_reordered);
+      }
+  }
+};
+
+void
+mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
+{
+  if (nrhs != 5 || nlhs != 1)
+    mexErrMsgTxt("Must have 5 input arguments and 1 output argument");
+
+  // Give explicit names to input arguments
+  const mxArray *yhat_mx = prhs[0];
+  const mxArray *epsilon_mx = prhs[1];
+  const mxArray *dr_mx = prhs[2];
+  const mxArray *M_mx = prhs[3];
+  const mxArray *options_mx = prhs[4];
+
+  auto get_int_field = [](const mxArray *struct_mx, const std::string &fieldname)
+                       {
+                         mxArray *field_mx = mxGetField(struct_mx, 0, fieldname.c_str());
+                         if (!(field_mx && mxIsScalar(field_mx) && mxIsNumeric(field_mx)))
+                           mexErrMsgTxt(("Field `" + fieldname + "' should be a numeric scalar").c_str());
+                         return static_cast<int>(mxGetScalar(field_mx));
+                       };
+
+  int nstatic = get_int_field(M_mx, "nstatic");
+  int npred = get_int_field(M_mx, "npred");
+  int nboth = get_int_field(M_mx, "nboth");
+  int nfwrd = get_int_field(M_mx, "nfwrd");
+  int endo_nbr = nstatic + npred + nboth + nfwrd;
+  int exo_nbr = get_int_field(M_mx, "exo_nbr");
+  int order = get_int_field(options_mx, "order");
+
+  const mxArray *order_var_mx = mxGetField(dr_mx, 0, "order_var");
+  if (!(order_var_mx && mxIsDouble(order_var_mx) && mxGetNumberOfElements(order_var_mx) == static_cast<size_t>(endo_nbr)))
+    mexErrMsgTxt("Field dr.order_var should be a double precision vector with endo_nbr elements");
+  const mxArray *ys_mx = mxGetField(dr_mx, 0, "ys");
+  if (!ys_mx || !mxIsDouble(ys_mx) || mxGetNumberOfElements(ys_mx) != static_cast<size_t>(endo_nbr))
+    mexErrMsgTxt("Field dr.ys should be a double precision vector with endo_nbr elements");
+
+  size_t nparticles = mxGetN(yhat_mx);
+  if (mxGetN(epsilon_mx) != nparticles)
+    mexErrMsgTxt("epsilon and yhat don't have the same number of columns");
+  if (!(mxIsDouble(yhat_mx) && mxGetM(yhat_mx) == static_cast<size_t>(npred + nboth)))
+    mexErrMsgTxt("yhat should be a double precision matrix with npred+nboth rows");
+  if (!(mxIsDouble(epsilon_mx) && mxGetM(epsilon_mx) == static_cast<size_t>(exo_nbr)))
+    mexErrMsgTxt("epsilon should be a double precision matrix with exo_nbr rows");
+
+  const mxArray *threads_mx = mxGetField(options_mx, 0, "threads");
+  if (!threads_mx)
+    mexErrMsgTxt("Can't find field `threads' in options_");
+  int num_threads = get_int_field(threads_mx, "local_state_space_iteration_k");
+
+  ConstGeneralMatrix yhat{yhat_mx};
+  ConstGeneralMatrix epsilon{epsilon_mx};
+  ConstVector ys{ys_mx};
+  const double *order_var = mxGetPr(order_var_mx);
+
+  try
+    {
+      TLStatic::init(order, npred+nboth+exo_nbr);
+    }
+  catch (TLException &e)
+    {
+      mexErrMsgTxt(("Dynare++ error: " + e.message).c_str());
+    }
+
+  // Form the polynomial (copied from dynare_simul_.cc)
+  UTensorPolynomial pol(endo_nbr, npred+nboth+exo_nbr);
+  for (int dim = 0; dim <= order; dim++)
+    {
+      const mxArray *gk_m = mxGetField(dr_mx, 0, ("g_" + std::to_string(dim)).c_str());
+      if (!gk_m)
+        mexErrMsgTxt(("Can't find field `g_" + std::to_string(dim) + "' in dr structure").c_str());
+      ConstTwoDMatrix gk{gk_m};
+      FFSTensor ft{endo_nbr, npred+nboth+exo_nbr, dim};
+      if (ft.ncols() != gk.ncols())
+        mexErrMsgTxt(("Wrong number of columns for folded tensor: got " + std::to_string(gk.ncols()) + " but i want " + std::to_string(ft.ncols()) + '\n').c_str());
+      if (ft.nrows() != gk.nrows())
+        mexErrMsgTxt(("Wrong number of rows for folded tensor: got " + std::to_string(gk.nrows()) + " but i want " + std::to_string(ft.nrows()) + '\n').c_str());
+      ft.zeros();
+      ft.add(1.0, gk);
+      pol.insert(std::make_unique<UFSTensor>(ft));
+    }
+
+  // Construct the reordered steady state (dr.ys(dr.order_var))
+  Vector ys_reordered(endo_nbr);
+  for (int i = 0; i < endo_nbr; i++)
+    ys_reordered[i] = ys[static_cast<int>(order_var[i])-1];
+
+  // Form the decision rule
+  UnfoldDecisionRule dr(pol, PartitionY(nstatic, npred, nboth, nfwrd),
+                        exo_nbr, ys_reordered);
+
+  // Create the result matrix
+  plhs[0] = mxCreateDoubleMatrix(endo_nbr, nparticles, mxREAL);
+  GeneralMatrix ynext{plhs[0]};
+
+  // Run the real job in parallel
+  sthread::detach_thread_group::max_parallel_threads = num_threads;
+  sthread::detach_thread_group group;
+  // The following is equivalent to ceil(nparticles/num_threads), but with integer arithmetic
+  int part_by_thread = nparticles / num_threads + (nparticles % num_threads > 0);
+  for (size_t i = 0; i < nparticles; i += part_by_thread)
+    group.insert(std::make_unique<ParticleWorker>(npred+nboth, exo_nbr,
+                                                  std::make_pair(i, std::min(i+part_by_thread, nparticles)),
+                                                  yhat, epsilon, ys_reordered, dr, ynext));
+  group.run();
+}
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@ -373,7 +373,8 @@ MODFILES = \
 	bgp/solow-1/solow.mod \
 	bgp/nk-1/nk.mod \
 	bgp/ramsey-1/ramsey.mod \
-	dynare-command-options/ramst.mod
+	dynare-command-options/ramst.mod \
+	particle/local_state_space_iteration_k_test.mod

 PARTICLEFILES = \
 	particle/dsge_base2.mod \
--- a/tests/particle/local_state_space_iteration_k_test.mod
+++ b/tests/particle/local_state_space_iteration_k_test.mod
@ -0,0 +1,66 @@
+/*
+  Tests that local_state_space_iteration_2 and local_state_space_iteration_k
+  (for k=2) return the same results.
+*/
+
+var y, k, a, h, b, c;
+varexo e, u;
+
+parameters beta, rho, alpha, delta, theta, psi, tau;
+
+alpha = 0.36;
+rho   = 0.95;
+tau   = 0.025;
+beta  = 0.99;
+delta = 0.025;
+psi   = 0;
+theta = 2.95;
+
+phi   = 0.1;
+
+model;
+c*theta*h^(1+psi)=(1-alpha)*y;
+k = beta*(((exp(b)*c)/(exp(b(+1))*c(+1)))
+    *(exp(b(+1))*alpha*y(+1)+(1-delta)*k));
+y = exp(a)*(k(-1)^alpha)*(h^(1-alpha));
+k = exp(b)*(y-c)+(1-delta)*k(-1);
+a = rho*a(-1)+tau*b(-1) + e;
+b = tau*a(-1)+rho*b(-1) + u;
+end;
+
+initval;
+y = 1.08068253095672;
+c = 0.80359242014163;
+h = 0.29175631001732;
+k = 11.08360443260358;
+a = 0;
+b = 0;
+e = 0;
+u = 0;
+end;
+
+shocks;
+var e; stderr 0.009;
+var u; stderr 0.009;
+var e, u = phi*0.009*0.009;
+end;
+
+stoch_simul(order=2, irf=0, k_order_solver);
+
+nparticles = 100;
+
+/* We generate particles using realistic distributions (though this is not
+   strictly needed) */
+state_idx = oo_.dr.order_var((M_.nstatic+1):(M_.nstatic+M_.npred+M_.nboth));
+yhat = chol(oo_.var(state_idx,state_idx))*randn(M_.npred+M_.nboth, nparticles);
+epsilon = chol(M_.Sigma_e)*randn(M_.exo_nbr, nparticles);
+
+dr = oo_.dr;
+
+ynext = local_state_space_iteration_2(yhat, epsilon, dr.ghx, dr.ghu, dr.ys(dr.order_var)+0.5*dr.ghs2, dr.ghxx, dr.ghuu, dr.ghxu, 1);
+
+ynext2 = local_state_space_iteration_k(yhat, epsilon, oo_.dr, M_, options_);
+
+if max(max(abs(ynext-ynext2))) > 1e-14
+    error('Inconsistency between local_state_space_iteration_2 and local_state_space_iteration_k')
+end