local_state_space_iteration_k MEX now uses the Fortran code (instead of the C++ code)

Closes: #1802
trustregion
Sébastien Villemot 2022-02-04 11:31:09 +01:00
parent 10a4ec53e9
commit 39f47dc957
No known key found for this signature in database
GPG Key ID: 2CECE9350ECEBE4A
17 changed files with 23 additions and 333 deletions

View File

@ -26,7 +26,7 @@ function [fval,info,exit_flag,DLIK,Hess,ys,trend_coeff,Model,DynareOptions,Bayes
% - BayesInfo [struct] See INPUTS section.
% - DynareResults [struct] Updated DynareResults structure described in INPUTS section.
% Copyright (C) 2010-2021 Dynare Team
% Copyright (C) 2010-2022 Dynare Team
%
% This file is part of Dynare.
%
@ -127,6 +127,7 @@ ReducedForm.mf1 = mf1;
if DynareOptions.k_order_solver && ~(DynareOptions.particle.pruning && DynareOptions.order==2)
ReducedForm.use_k_order_solver = true;
ReducedForm.dr = dr;
ReducedForm.udr = folded_to_unfolded_dr(dr, Model, DynareOptions);
else
ReducedForm.use_k_order_solver = false;
ReducedForm.ghx = dr.ghx(restrict_variables_idx,:);

@ -1 +1 @@
Subproject commit 2e18ca8ff584d1ab47658f329ab73ab1c27cdf6f
Subproject commit d20be2c271b442d91bd36aea300e4837a066ae2d

View File

@ -1,14 +0,0 @@
mex_PROGRAMS = local_state_space_iteration_fortran
local_state_space_iteration_fortran_FCFLAGS = $(AM_FCFLAGS) -I../libkordersim -pthread
nodist_local_state_space_iteration_fortran_SOURCES = \
mexFunction.f08
local_state_space_iteration_fortran_LDADD = ../libkordersim/libkordersim.a
BUILT_SOURCES = $(nodist_local_state_space_iteration_fortran_SOURCES)
CLEANFILES = $(nodist_local_state_space_iteration_fortran_SOURCES)
%.f08: $(top_srcdir)/../../sources/local_state_space_iteration_fortran/%.f08
$(LN_S) -f $< $@

View File

@ -1,14 +1,13 @@
mex_PROGRAMS = local_state_space_iteration_2 local_state_space_iteration_k
nodist_local_state_space_iteration_2_SOURCES = local_state_space_iteration_2.cc
nodist_local_state_space_iteration_k_SOURCES = local_state_space_iteration_k.cc
nodist_local_state_space_iteration_k_SOURCES = local_state_space_iteration_k.f08
local_state_space_iteration_2_CXXFLAGS = $(AM_CXXFLAGS) -fopenmp
local_state_space_iteration_2_LDFLAGS = $(AM_LDFLAGS) $(OPENMP_LDFLAGS)
local_state_space_iteration_k_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_srcdir)/../../../dynare++/sylv/cc -I$(top_srcdir)/../../../dynare++/tl/cc -I$(top_srcdir)/../../../dynare++/kord -I$(top_srcdir)/../../../dynare++/utils/cc $(CPPFLAGS_MATIO)
local_state_space_iteration_k_LDFLAGS = $(AM_LDFLAGS) $(LDFLAGS_MATIO)
local_state_space_iteration_k_LDADD = ../libdynare++/libdynare++.a $(LIBADD_MATIO)
local_state_space_iteration_k_FCFLAGS = $(AM_FCFLAGS) -I../libkordersim -pthread
local_state_space_iteration_k_LDADD = ../libkordersim/libkordersim.a
BUILT_SOURCES = $(nodist_local_state_space_iteration_2_SOURCES) \
$(nodist_local_state_space_iteration_k_SOURCES)
@ -17,3 +16,6 @@ CLEANFILES = $(nodist_local_state_space_iteration_2_SOURCES) \
%.cc: $(top_srcdir)/../../sources/local_state_space_iterations/%.cc
$(LN_S) -f $< $@
%.f08: $(top_srcdir)/../../sources/local_state_space_iterations/%.f08
$(LN_S) -f $< $@

View File

@ -1,10 +1,10 @@
ACLOCAL_AMFLAGS = -I ../../../m4
SUBDIRS = mjdgges kronecker bytecode block_kalman_filter sobol perfect_foresight_problem num_procs block_trust_region disclyap_fast
SUBDIRS = mjdgges kronecker bytecode block_kalman_filter sobol perfect_foresight_problem num_procs block_trust_region disclyap_fast libkordersim local_state_space_iterations folded_to_unfolded_dr k_order_simul k_order_mean
# libdynare++ must come before gensylv and k_order_perturbation
if ENABLE_MEX_DYNAREPLUSPLUS
SUBDIRS += libdynare++ gensylv libkorder k_order_perturbation k_order_welfare local_state_space_iterations libkordersim folded_to_unfolded_dr local_state_space_iteration_fortran k_order_simul k_order_mean
SUBDIRS += libdynare++ gensylv libkorder k_order_perturbation k_order_welfare
endif
if ENABLE_MEX_MS_SBVAR

View File

@ -1,6 +1,6 @@
dnl Process this file with autoconf to produce a configure script.
dnl Copyright © 2009-2021 Dynare Team
dnl Copyright © 2009-2022 Dynare Team
dnl
dnl This file is part of Dynare.
dnl
@ -165,7 +165,6 @@ AC_CONFIG_FILES([Makefile
local_state_space_iterations/Makefile
libkordersim/Makefile
folded_to_unfolded_dr/Makefile
local_state_space_iteration_fortran/Makefile
k_order_simul/Makefile
k_order_mean/Makefile
perfect_foresight_problem/Makefile

View File

@ -1,2 +0,0 @@
include ../mex.am
include ../../local_state_space_iteration_fortran.am

View File

@ -1,10 +1,10 @@
ACLOCAL_AMFLAGS = -I ../../../m4
SUBDIRS = mjdgges kronecker bytecode block_kalman_filter sobol perfect_foresight_problem num_procs block_trust_region disclyap_fast
SUBDIRS = mjdgges kronecker bytecode block_kalman_filter sobol perfect_foresight_problem num_procs block_trust_region disclyap_fast libkordersim local_state_space_iterations folded_to_unfolded_dr k_order_simul k_order_mean
# libdynare++ must come before gensylv and k_order_perturbation
if ENABLE_MEX_DYNAREPLUSPLUS
SUBDIRS += libdynare++ gensylv libkorder k_order_perturbation k_order_welfare local_state_space_iterations libkordersim folded_to_unfolded_dr local_state_space_iteration_fortran k_order_simul k_order_mean
SUBDIRS += libdynare++ gensylv libkorder k_order_perturbation k_order_welfare
endif
if ENABLE_MEX_MS_SBVAR

View File

@ -1,6 +1,6 @@
dnl Process this file with autoconf to produce a configure script.
dnl Copyright © 2009-2021 Dynare Team
dnl Copyright © 2009-2022 Dynare Team
dnl
dnl This file is part of Dynare.
dnl
@ -166,7 +166,6 @@ AC_CONFIG_FILES([Makefile
local_state_space_iterations/Makefile
libkordersim/Makefile
folded_to_unfolded_dr/Makefile
local_state_space_iteration_fortran/Makefile
k_order_simul/Makefile
k_order_mean/Makefile
perfect_foresight_problem/Makefile

View File

@ -1,3 +0,0 @@
EXEEXT = .mex
include ../mex.am
include ../../local_state_space_iteration_fortran.am

View File

@ -18,7 +18,6 @@ EXTRA_DIST = \
local_state_space_iterations \
libkordersim \
folded_to_unfolded_dr \
local_state_space_iteration_fortran \
k_order_simul \
k_order_mean \
gensylv \

View File

@ -1,4 +1,4 @@
! Copyright © 2021 Dynare Team
! Copyright © 2021-2022 Dynare Team
!
! This file is part of Dynare.
!
@ -15,7 +15,7 @@
! You should have received a copy of the GNU General Public License
! along with Dynare. If not, see <https://www.gnu.org/licenses/>.
! Routines and data structures for multithreading over particles in local_state_space_iteration_fortran
! Routines and data structures for multithreading over particles in local_state_space_iteration_k
module pparticle
use iso_c_binding
use simulation
@ -75,4 +75,4 @@ contains
end subroutine thread_eval
end module pparticle
end module pparticle

View File

@ -1,191 +0,0 @@
/*
* Copyright © 2019-2021 Dynare Team
*
* This file is part of Dynare.
*
* Dynare is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Dynare is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Dynare. If not, see <https://www.gnu.org/licenses/>.
*/
#include <memory>
#include <string>
#include <utility>
#include <dynmex.h>
#include "tl_static.hh"
#include "decision_rule.hh"
#include "sthread.hh"
/* The class that does the real job. It computes the next iteration for a given
range of particles. There will be as many instances as there are parallel
threads.
Note that we cant use OpenMP since it is incompatible with MKL, which is
used internally by Dynare++ routines under MATLAB. Hence we fall back to
the Dynare++ multithreading abstraction. */
struct ParticleWorker : public sthread::detach_thread
{
const int npred_both, exo_nbr;
const std::pair<size_t, size_t> particle_range;
const ConstGeneralMatrix &yhat, &epsilon;
const Vector &ys_reordered;
const UnfoldDecisionRule &dr;
const ConstVector &restrict_var_list;
GeneralMatrix &ynext;
ParticleWorker(int npred_both_arg, int exo_nbr_arg, std::pair<size_t, size_t> particle_range_arg,
const ConstGeneralMatrix &yhat_arg, const ConstGeneralMatrix &epsilon_arg,
const Vector &ys_reordered_arg, const UnfoldDecisionRule &dr_arg,
const ConstVector &restrict_var_list_arg, GeneralMatrix &ynext_arg)
: npred_both{npred_both_arg}, exo_nbr{exo_nbr_arg}, particle_range{std::move(particle_range_arg)},
yhat{yhat_arg}, epsilon{epsilon_arg}, ys_reordered{ys_reordered_arg}, dr{dr_arg},
restrict_var_list{restrict_var_list_arg}, ynext{ynext_arg}
{
}
void
operator()(std::mutex &mut) override
{
Vector dyu(npred_both+exo_nbr);
Vector dy(dyu, 0, npred_both);
Vector u(dyu, npred_both, exo_nbr);
Vector ynext_col_allvars(ys_reordered.length());
for (size_t i = particle_range.first; i < particle_range.second; i++)
{
dy = yhat.getCol(i);
u = epsilon.getCol(i);
dr.eval(DecisionRule::emethod::horner, ynext_col_allvars, dyu);
ynext_col_allvars.add(1.0, ys_reordered);
/* Select only the variables in restrict_var_list, and copy back to the
result matrix */
Vector ynext_col{ynext.getCol(i)};
for (int j = 0; j < restrict_var_list.length(); j++)
ynext_col[j] = ynext_col_allvars[restrict_var_list[j]-1];
}
}
};
void
mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
if (nrhs != 5 || nlhs != 1)
mexErrMsgTxt("Must have 5 input arguments and 1 output argument");
// Give explicit names to input arguments
const mxArray *yhat_mx = prhs[0];
const mxArray *epsilon_mx = prhs[1];
const mxArray *dr_mx = prhs[2];
const mxArray *M_mx = prhs[3];
const mxArray *options_mx = prhs[4];
auto get_int_field = [](const mxArray *struct_mx, const std::string &fieldname)
{
mxArray *field_mx = mxGetField(struct_mx, 0, fieldname.c_str());
if (!(field_mx && mxIsScalar(field_mx) && mxIsNumeric(field_mx)))
mexErrMsgTxt(("Field `" + fieldname + "' should be a numeric scalar").c_str());
return static_cast<int>(mxGetScalar(field_mx));
};
int nstatic = get_int_field(M_mx, "nstatic");
int npred = get_int_field(M_mx, "npred");
int nboth = get_int_field(M_mx, "nboth");
int nfwrd = get_int_field(M_mx, "nfwrd");
int endo_nbr = nstatic + npred + nboth + nfwrd;
int exo_nbr = get_int_field(M_mx, "exo_nbr");
int order = get_int_field(options_mx, "order");
const mxArray *order_var_mx = mxGetField(dr_mx, 0, "order_var");
if (!(order_var_mx && mxIsDouble(order_var_mx) && mxGetNumberOfElements(order_var_mx) == static_cast<size_t>(endo_nbr)))
mexErrMsgTxt("Field dr.order_var should be a double precision vector with endo_nbr elements");
const mxArray *ys_mx = mxGetField(dr_mx, 0, "ys");
if (!ys_mx || !mxIsDouble(ys_mx) || mxGetNumberOfElements(ys_mx) != static_cast<size_t>(endo_nbr))
mexErrMsgTxt("Field dr.ys should be a double precision vector with endo_nbr elements");
const mxArray *restrict_var_list_mx = mxGetField(dr_mx, 0, "restrict_var_list");
if (!(restrict_var_list_mx && mxIsDouble(restrict_var_list_mx)))
mexErrMsgTxt("Field dr.restrict_var_list should be a double precision vector");
size_t nparticles = mxGetN(yhat_mx);
if (mxGetN(epsilon_mx) != nparticles)
mexErrMsgTxt("epsilon and yhat don't have the same number of columns");
if (!(mxIsDouble(yhat_mx) && mxGetM(yhat_mx) == static_cast<size_t>(npred + nboth)))
mexErrMsgTxt("yhat should be a double precision matrix with npred+nboth rows");
if (!(mxIsDouble(epsilon_mx) && mxGetM(epsilon_mx) == static_cast<size_t>(exo_nbr)))
mexErrMsgTxt("epsilon should be a double precision matrix with exo_nbr rows");
const mxArray *threads_mx = mxGetField(options_mx, 0, "threads");
if (!threads_mx)
mexErrMsgTxt("Can't find field `threads' in options_");
int num_threads = get_int_field(threads_mx, "local_state_space_iteration_k");
ConstGeneralMatrix yhat{yhat_mx};
ConstGeneralMatrix epsilon{epsilon_mx};
ConstVector ys{ys_mx};
const double *order_var = mxGetPr(order_var_mx);
ConstVector restrict_var_list{restrict_var_list_mx};
try
{
TLStatic::init(order, npred+nboth+exo_nbr);
}
catch (TLException &e)
{
mexErrMsgTxt(("Dynare++ error: " + e.message).c_str());
}
// Form the polynomial (copied from dynare_simul_.cc)
UTensorPolynomial pol(endo_nbr, npred+nboth+exo_nbr);
for (int dim = 0; dim <= order; dim++)
{
const mxArray *gk_m = mxGetField(dr_mx, 0, ("g_" + std::to_string(dim)).c_str());
if (!gk_m)
mexErrMsgTxt(("Can't find field `g_" + std::to_string(dim) + "' in dr structure").c_str());
ConstTwoDMatrix gk{gk_m};
FFSTensor ft{endo_nbr, npred+nboth+exo_nbr, dim};
if (ft.ncols() != gk.ncols())
mexErrMsgTxt(("Wrong number of columns for folded tensor: got " + std::to_string(gk.ncols()) + " but i want " + std::to_string(ft.ncols()) + '\n').c_str());
if (ft.nrows() != gk.nrows())
mexErrMsgTxt(("Wrong number of rows for folded tensor: got " + std::to_string(gk.nrows()) + " but i want " + std::to_string(ft.nrows()) + '\n').c_str());
ft.zeros();
ft.add(1.0, gk);
pol.insert(std::make_unique<UFSTensor>(ft));
}
// Construct the reordered steady state (dr.ys(dr.order_var))
Vector ys_reordered(endo_nbr);
for (int i = 0; i < endo_nbr; i++)
ys_reordered[i] = ys[static_cast<int>(order_var[i])-1];
// Form the decision rule
UnfoldDecisionRule dr(pol, PartitionY(nstatic, npred, nboth, nfwrd),
exo_nbr, ys_reordered);
// Create the result matrix
plhs[0] = mxCreateDoubleMatrix(restrict_var_list.length(), nparticles, mxREAL);
GeneralMatrix ynext{plhs[0]};
// Run the real job in parallel
sthread::detach_thread_group::max_parallel_threads = num_threads;
sthread::detach_thread_group group;
// The following is equivalent to ceil(nparticles/num_threads), but with integer arithmetic
int part_by_thread = nparticles / num_threads + (nparticles % num_threads > 0);
for (size_t i = 0; i < nparticles; i += part_by_thread)
group.insert(std::make_unique<ParticleWorker>(npred+nboth, exo_nbr,
std::make_pair(i, std::min(i+part_by_thread, nparticles)),
yhat, epsilon, ys_reordered, dr, restrict_var_list,
ynext));
group.run();
}

View File

@ -1,4 +1,4 @@
! Copyright © 2021 Dynare Team
! Copyright © 2021-2022 Dynare Team
!
! This file is part of Dynare.
!
@ -118,7 +118,7 @@ subroutine mexFunction(nlhs, plhs, nrhs, prhs) bind(c, name='mexFunction')
if (.not. c_associated(thread_mx)) then
call mexErrMsgTxt("Cannot find `threads' in options_")
end if
nm = get_int_field(thread_mx, "local_state_space_iteration_fortran")
nm = get_int_field(thread_mx, "local_state_space_iteration_k")
end associate
nparticles = int(mxGetN(yhat_mx));

View File

@ -618,8 +618,7 @@ PARTICLEFILES = \
particle/dsge_base2.mod \
particle/first_spec.mod \
particle/first_spec_MCMC.mod \
particle/local_state_space_iteration_k_test.mod \
particle/local_it_k_test_parallel.mod
particle/local_state_space_iteration_k_test.mod
XFAIL_MODFILES = ramst_xfail.mod \

View File

@ -1,58 +0,0 @@
/*
Tests that the parallelized version of local_state_space_iteration_k and local_state_space_iteration_fortran
(for k=3) return the same results.
This file must be run after first_spec.mod (both are based on the same model).
*/
@#include "first_spec_common.inc"
varobs q ca;
shocks;
var eeps = 0.04^2;
var nnu = 0.03^2;
var q = 0.01^2;
var ca = 0.01^2;
end;
// Initialize various structures
estimation(datafile='my_data.mat',order=2,mode_compute=0,mh_replic=0,filter_algorithm=sis,nonlinear_filter_initialization=2
,cova_compute=0 %tell program that no covariance matrix was computed
);
stoch_simul(order=2, periods=200, irf=0, k_order_solver);
// Really perform the test
nparticles = options_.particle.number_of_particles;
nsims = 1e6/nparticles;
/* We generate particles using realistic distributions (though this is not
strictly needed) */
state_idx = oo_.dr.order_var((M_.nstatic+1):(M_.nstatic+M_.npred+M_.nboth));
yhat = chol(oo_.var(state_idx,state_idx))*randn(M_.npred+M_.nboth, nparticles);
epsilon = chol(M_.Sigma_e)*randn(M_.exo_nbr, nparticles);
stoch_simul(order=3, periods=200, irf=0, k_order_solver);
options_.threads.local_state_space_iteration_k = 2;
options_.threads.local_state_space_iteration_fortran = 2;
tStart1 = tic; for i=1:nsims, ynext1 = local_state_space_iteration_k(yhat, epsilon, oo_.dr, M_, options_); end, tElapsed1 = toc(tStart1);
tStart2 = tic; [udr] = folded_to_unfolded_dr(oo_.dr, M_, options_); for i=1:nsims, ynext2 = local_state_space_iteration_fortran(yhat, epsilon, oo_.dr, M_, options_, udr); end, tElapsed2 = toc(tStart2);
if max(max(abs(ynext1-ynext2))) > 1e-14
error('Inconsistency between local_state_space_iteration_k and local_state_space_iteration_fortran')
end
if tElapsed2<tElapsed1
skipline()
dprintf('local_state_space_iteration_fortran is %5.2f times faster than local_state_space_iteration_k', tElapsed1/tElapsed2)
skipline()
else
skipline()
dprintf('local_state_space_iteration_fortran is %5.2f times slower than local_state_space_iteration_k', tElapsed2/tElapsed1)
skipline()
end

View File

@ -1,8 +1,5 @@
/*
Tests that :
(i) local_state_space_iteration_2, local_state_space_iteration_k and local_state_space_iteration_fortran
(for k=2) return the same results
(ii) local_state_space_iteration_k and local_state_space_iteration_fortran return the same results for k=3 without parallelization.
Tests that local_state_space_iteration_2 and local_state_space_iteration_k (for k=2) return the same results
This file must be run after first_spec.mod (both are based on the same model).
*/
@ -37,8 +34,6 @@ yhat = chol(oo_.var(state_idx,state_idx))*randn(M_.npred+M_.nboth, nparticles);
epsilon = chol(M_.Sigma_e)*randn(M_.exo_nbr, nparticles);
dr = oo_.dr;
options_.threads.local_state_space_iteration_fortran = 1;
options_.threads.local_state_space_iteration_k = 1;
// rf stands for Reduced Form
@ -52,17 +47,11 @@ rf_ghxu = dr.ghxu(dr.restrict_var_list, :);
tStart1 = tic; for i=1:nsims, ynext1 = local_state_space_iteration_2(yhat, epsilon, rf_ghx, rf_ghu, rf_constant, rf_ghxx, rf_ghuu, rf_ghxu, options_.threads.local_state_space_iteration_2); end, tElapsed1 = toc(tStart1);
tStart2 = tic; for i=1:nsims, ynext2 = local_state_space_iteration_k(yhat, epsilon, dr, M_, options_); end, tElapsed2 = toc(tStart2);
tStart3 = tic; [udr] = folded_to_unfolded_dr(dr, M_, options_); for i=1:nsims, ynext3 = local_state_space_iteration_fortran(yhat, epsilon, dr, M_, options_, udr); end, tElapsed3 = toc(tStart3);
tStart2 = tic; [udr] = folded_to_unfolded_dr(dr, M_, options_); for i=1:nsims, ynext2 = local_state_space_iteration_k(yhat, epsilon, dr, M_, options_, udr); end, tElapsed2 = toc(tStart2);
if max(max(abs(ynext1-ynext2))) > 1e-14
error('Inconsistency between local_state_space_iteration_2 and local_state_space_iteration_k')
end
if max(max(abs(ynext1-ynext3))) > 1e-14
error('Inconsistency between local_state_space_iteration_2 and local_state_space_iteration_fortran')
end
if tElapsed1<tElapsed2
skipline()
@ -73,33 +62,3 @@ else
dprintf('local_state_space_iteration_2 is %5.2f times slower than local_state_space_iteration_k', tElapsed1/tElapsed2)
skipline()
end
if tElapsed1<tElapsed3
skipline()
dprintf('local_state_space_iteration_2 is %5.2f times faster than local_state_space_iteration_fortran', tElapsed3/tElapsed1)
skipline()
else
skipline()
dprintf('local_state_space_iteration_2 is %5.2f times slower than local_state_space_iteration_fortran', tElapsed1/tElapsed3)
skipline()
end
stoch_simul(order=3, periods=200, irf=0, k_order_solver);
tStart31 = tic; for i=1:nsims, ynext31 = local_state_space_iteration_k(yhat, epsilon, oo_.dr, M_, options_); end, tElapsed31 = toc(tStart31);
tStart32 = tic; [udr] = folded_to_unfolded_dr(oo_.dr, M_, options_); for i=1:nsims, ynext32 = local_state_space_iteration_fortran(yhat, epsilon, oo_.dr, M_, options_, udr); end, tElapsed32 = toc(tStart32);
if max(max(abs(ynext31-ynext32))) > 1e-14
error('Inconsistency between local_state_space_iteration_k and local_state_space_iteration_fortran')
end
if tElapsed32<tElapsed31
skipline()
dprintf('local_state_space_iteration_fortran is %5.2f times faster than local_state_space_iteration_k', tElapsed31/tElapsed32)
skipline()
else
skipline()
dprintf('local_state_space_iteration_fortran is %5.2f times slower than local_state_space_iteration_k', tElapsed32/tElapsed31)
skipline()
end