298 lines
13 KiB
C++
298 lines
13 KiB
C++
/*
|
|
* Copyright © 2019-2023 Dynare Team
|
|
*
|
|
* This file is part of Dynare.
|
|
*
|
|
* Dynare is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Dynare is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Dynare. If not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <string>
|
|
#include <memory>
|
|
#include <algorithm>
|
|
|
|
#include <dynmex.h>
|
|
|
|
#include "DynamicModelCaller.hh"
|
|
|
|
void
|
|
mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
|
|
{
|
|
if (nlhs < 1 || nlhs > 2 || nrhs != 9)
|
|
mexErrMsgTxt("Must have 9 input arguments and 1 or 2 output arguments");
|
|
bool compute_jacobian = nlhs == 2;
|
|
|
|
// Give explicit names to input arguments
|
|
const mxArray *y_mx = prhs[0];
|
|
const mxArray *y0_mx = prhs[1];
|
|
const mxArray *yT_mx = prhs[2];
|
|
const mxArray *exo_path_mx = prhs[3];
|
|
const mxArray *params_mx = prhs[4];
|
|
const mxArray *steady_state_mx = prhs[5];
|
|
const mxArray *periods_mx = prhs[6];
|
|
const mxArray *M_mx = prhs[7];
|
|
const mxArray *options_mx = prhs[8];
|
|
|
|
// Extract various fields from M_
|
|
const mxArray *basename_mx = mxGetField(M_mx, 0, "fname");
|
|
if (!(basename_mx && mxIsChar(basename_mx) && mxGetM(basename_mx) == 1))
|
|
mexErrMsgTxt("M_.fname should be a character string");
|
|
std::string basename{mxArrayToString(basename_mx)};
|
|
|
|
const mxArray *endo_nbr_mx = mxGetField(M_mx, 0, "endo_nbr");
|
|
if (!(endo_nbr_mx && mxIsScalar(endo_nbr_mx) && mxIsNumeric(endo_nbr_mx)))
|
|
mexErrMsgTxt("M_.endo_nbr should be a numeric scalar");
|
|
mwIndex ny = static_cast<mwIndex>(mxGetScalar(endo_nbr_mx));
|
|
|
|
const mxArray *maximum_lag_mx = mxGetField(M_mx, 0, "maximum_lag");
|
|
if (!(maximum_lag_mx && mxIsScalar(maximum_lag_mx) && mxIsNumeric(maximum_lag_mx)))
|
|
mexErrMsgTxt("M_.maximum_lag should be a numeric scalar");
|
|
mwIndex maximum_lag = static_cast<mwIndex>(mxGetScalar(maximum_lag_mx));
|
|
|
|
const mxArray *dynamic_tmp_nbr_mx = mxGetField(M_mx, 0, "dynamic_tmp_nbr");
|
|
if (!(dynamic_tmp_nbr_mx && mxIsDouble(dynamic_tmp_nbr_mx) && mxGetNumberOfElements(dynamic_tmp_nbr_mx) >= 2)
|
|
|| mxIsComplex(dynamic_tmp_nbr_mx) || mxIsSparse(dynamic_tmp_nbr_mx))
|
|
mexErrMsgTxt("M_.dynamic_tmp_nbr should be a real dense array of at least 2 elements");
|
|
size_t ntt {static_cast<size_t>(mxGetPr(dynamic_tmp_nbr_mx)[0]) +
|
|
(compute_jacobian ? static_cast<size_t>(mxGetPr(dynamic_tmp_nbr_mx)[1]) : 0)};
|
|
|
|
const mxArray *has_external_function_mx = mxGetField(M_mx, 0, "has_external_function");
|
|
if (!(has_external_function_mx && mxIsLogicalScalar(has_external_function_mx)))
|
|
mexErrMsgTxt("M_.has_external_function should be a logical scalar");
|
|
bool has_external_function = static_cast<bool>(mxGetScalar(has_external_function_mx));
|
|
|
|
// Extract various fields from options_
|
|
const mxArray *use_dll_mx = mxGetField(options_mx, 0, "use_dll");
|
|
if (!(use_dll_mx && mxIsLogicalScalar(use_dll_mx)))
|
|
mexErrMsgTxt("options_.use_dll should be a logical scalar");
|
|
bool use_dll = static_cast<bool>(mxGetScalar(use_dll_mx));
|
|
|
|
const mxArray *linear_mx = mxGetField(options_mx, 0, "linear");
|
|
if (!(linear_mx && mxIsLogicalScalar(linear_mx)))
|
|
mexErrMsgTxt("options_.linear should be a logical scalar");
|
|
bool linear = static_cast<bool>(mxGetScalar(linear_mx));
|
|
|
|
const mxArray *threads_mx = mxGetField(options_mx, 0, "threads");
|
|
if (!threads_mx)
|
|
mexErrMsgTxt("Can't find field options_.threads");
|
|
const mxArray *num_threads_mx = mxGetField(threads_mx, 0, "perfect_foresight_problem");
|
|
if (!(num_threads_mx && mxIsScalar(num_threads_mx) && mxIsNumeric(num_threads_mx)))
|
|
mexErrMsgTxt("options_.threads.perfect_foresight_problem should be a numeric scalar");
|
|
int num_threads = static_cast<int>(mxGetScalar(num_threads_mx));
|
|
|
|
// Check other input and map it to local variables
|
|
if (!(mxIsScalar(periods_mx) && mxIsNumeric(periods_mx)))
|
|
mexErrMsgTxt("periods should be a numeric scalar");
|
|
mwIndex periods = static_cast<mwIndex>(mxGetScalar(periods_mx));
|
|
|
|
if (!(mxIsDouble(y_mx) && mxGetM(y_mx) == static_cast<size_t>(ny*periods) && mxGetN(y_mx) == 1))
|
|
mexErrMsgTxt("y should be a double precision column-vector of M_.endo_nbr*periods elements");
|
|
const double *y = mxGetPr(y_mx);
|
|
|
|
if (!(mxIsDouble(y0_mx) && mxGetM(y0_mx) == static_cast<size_t>(ny) && mxGetN(y0_mx) == 1))
|
|
mexErrMsgTxt("y0 should be a double precision column-vector of M_.endo_nbr elements");
|
|
const double *y0 = mxGetPr(y0_mx);
|
|
|
|
if (!(mxIsDouble(yT_mx) && mxGetM(yT_mx) == static_cast<size_t>(ny) && mxGetN(yT_mx) == 1))
|
|
mexErrMsgTxt("yT should be a double precision column-vector of M_.endo_nbr elements");
|
|
const double *yT = mxGetPr(yT_mx);
|
|
|
|
if (!(mxIsDouble(exo_path_mx) && mxGetM(exo_path_mx) >= static_cast<size_t>(periods+maximum_lag)))
|
|
mexErrMsgTxt("exo_path should be a double precision matrix with at least periods+M_.maximum_lag rows");
|
|
mwIndex nx = static_cast<mwIndex>(mxGetN(exo_path_mx));
|
|
size_t nb_row_x = mxGetM(exo_path_mx);
|
|
const double *exo_path = mxGetPr(exo_path_mx);
|
|
|
|
const mxArray *g1_sparse_rowval_mx {mxGetField(M_mx, 0, "dynamic_g1_sparse_rowval")};
|
|
if (!(mxIsInt32(g1_sparse_rowval_mx)))
|
|
mexErrMsgTxt("M_.dynamic_g1_sparse_rowval should be an int32 vector");
|
|
#if MX_HAS_INTERLEAVED_COMPLEX
|
|
const int32_T *g1_sparse_rowval {mxGetInt32s(g1_sparse_rowval_mx)};
|
|
#else
|
|
const int32_T *g1_sparse_rowval {static_cast<const int32_T *>(mxGetData(g1_sparse_rowval_mx))};
|
|
#endif
|
|
|
|
const mxArray *g1_sparse_colval_mx {mxGetField(M_mx, 0, "dynamic_g1_sparse_colval")};
|
|
if (!(mxIsInt32(g1_sparse_colval_mx)))
|
|
mexErrMsgTxt("M_.dynamic_g1_sparse_colval should be an int32 vector");
|
|
if (mxGetNumberOfElements(g1_sparse_colval_mx) != mxGetNumberOfElements(g1_sparse_rowval_mx))
|
|
mexErrMsgTxt("M_.dynamic_g1_sparse_colval should have the same length as M_.dynamic_g1_sparse_rowval");
|
|
|
|
const mxArray *g1_sparse_colptr_mx {mxGetField(M_mx, 0, "dynamic_g1_sparse_colptr")};
|
|
if (!(mxIsInt32(g1_sparse_colptr_mx) && mxGetNumberOfElements(g1_sparse_colptr_mx) == static_cast<size_t>(3*ny+nx+1)))
|
|
mexErrMsgTxt(("M_.dynamic_g1_sparse_colptr should be an int32 vector with " + std::to_string(3*ny+nx+1) + " elements").c_str());
|
|
#if MX_HAS_INTERLEAVED_COMPLEX
|
|
const int32_T *g1_sparse_colptr {mxGetInt32s(g1_sparse_colptr_mx)};
|
|
#else
|
|
const int32_T *g1_sparse_colptr {static_cast<const int32_T *>(mxGetData(g1_sparse_colptr_mx))};
|
|
#endif
|
|
if (static_cast<size_t>(g1_sparse_colptr[3*ny+nx])-1 != mxGetNumberOfElements(g1_sparse_rowval_mx))
|
|
mexErrMsgTxt("The size of M_.dynamic_g1_sparse_rowval is not consistent with the last element of M_.dynamic_g1_sparse_colptr");
|
|
|
|
if (!(mxIsDouble(params_mx) && mxGetN(params_mx) == 1))
|
|
mexErrMsgTxt("params should be a double precision column-vector");
|
|
const double *params = mxGetPr(params_mx);
|
|
|
|
if (!(mxIsDouble(steady_state_mx) && mxGetN(steady_state_mx) == 1))
|
|
mexErrMsgTxt("steady_state should be a double precision column-vector");
|
|
const double *steady_state = mxGetPr(steady_state_mx);
|
|
|
|
// Allocate output matrices
|
|
plhs[0] = mxCreateDoubleMatrix(periods*ny, 1, mxREAL);
|
|
double *stacked_residual = mxGetPr(plhs[0]);
|
|
|
|
/* Number of non-zero values in the stacked Jacobian.
|
|
Contemporaneous derivatives appear at all periods, while lag or lead
|
|
derivatives appear at all periods except one. */
|
|
mwSize nzmax {static_cast<mwSize>((g1_sparse_colptr[3*ny]-1)*(periods-1) + (g1_sparse_colptr[2*ny]-g1_sparse_colptr[ny]))};
|
|
|
|
double *stacked_jacobian = nullptr;
|
|
mwIndex *ir = nullptr, *jc = nullptr;
|
|
if (compute_jacobian)
|
|
{
|
|
plhs[1] = mxCreateSparse(periods*ny, periods*ny, nzmax, mxREAL);
|
|
stacked_jacobian = mxGetPr(plhs[1]);
|
|
ir = mxGetIr(plhs[1]);
|
|
jc = mxGetJc(plhs[1]);
|
|
|
|
/* Create the index vectors (IR, JC) of the sparse stacked jacobian. This
|
|
makes parallelization across periods possible when evaluating the model,
|
|
since all indices are known ex ante. */
|
|
mwIndex k = 0;
|
|
jc[0] = 0;
|
|
for (mwIndex T = 0; T < periods; T++)
|
|
for (mwIndex j {0}; j < ny; j++) // Column within the period (i.e. variable)
|
|
{
|
|
if (T > 0)
|
|
for (int32_T idx {g1_sparse_colptr[2*ny+j]-1}; idx < g1_sparse_colptr[2*ny+j+1]-1; idx++)
|
|
ir[k++] = (T-1)*ny + g1_sparse_rowval[idx]-1; // Derivatives w.r.t. y_{t+1} in T-1
|
|
for (int32_T idx {g1_sparse_colptr[ny+j]-1}; idx < g1_sparse_colptr[ny+j+1]-1; idx++)
|
|
ir[k++] = T*ny + g1_sparse_rowval[idx]-1; // Derivatives w.r.t. y_t in T
|
|
if (T < periods-1)
|
|
for (int32_T idx {g1_sparse_colptr[j]-1}; idx < g1_sparse_colptr[j+1]-1; idx++)
|
|
ir[k++] = (T+1)*ny + g1_sparse_rowval[idx]-1; // Derivatives w.r.t. y_{t-1} in T+1
|
|
jc[T*ny+j+1] = k;
|
|
}
|
|
}
|
|
|
|
if (use_dll)
|
|
DynamicModelDllCaller::load_dll(basename);
|
|
|
|
DynamicModelCaller::error_msg.clear();
|
|
|
|
/* Parallelize the main loop, if use_dll and no external function (to avoid
|
|
parallel calls to MATLAB) */
|
|
#pragma omp parallel num_threads(num_threads) if (use_dll && !has_external_function)
|
|
{
|
|
// Allocate (thread-private) model evaluator (which allocates space for temporaries)
|
|
std::unique_ptr<DynamicModelCaller> m;
|
|
if (use_dll)
|
|
m = std::make_unique<DynamicModelDllCaller>(ntt, ny, nx, params, steady_state, g1_sparse_colptr, linear, compute_jacobian);
|
|
else
|
|
m = std::make_unique<DynamicModelMatlabCaller>(basename, ny, nx, params_mx, steady_state_mx, g1_sparse_rowval_mx, g1_sparse_colval_mx, g1_sparse_colptr_mx, linear, compute_jacobian);
|
|
|
|
// Main computing loop
|
|
#pragma omp for
|
|
for (mwIndex T = 0; T < periods; T++)
|
|
{
|
|
// Fill vector of dynamic variables
|
|
if (T > 0 && T < periods-1)
|
|
std::copy_n(y+(T-1)*ny, 3*ny, m->y());
|
|
else if (T > 0) // Last simulation period
|
|
{
|
|
std::copy_n(y+(T-1)*ny, 2*ny, m->y());
|
|
std::copy_n(yT, ny, m->y() + 2*ny);
|
|
}
|
|
else if (T < periods-1) // First simulation period
|
|
{
|
|
std::copy_n(y0, ny, m->y());
|
|
std::copy_n(y+T*ny, 2*ny, m->y() + ny);
|
|
}
|
|
else // Special case: periods=1 (and so T=0)
|
|
{
|
|
std::copy_n(y0, ny, m->y());
|
|
std::copy_n(y, ny, m->y() + ny);
|
|
std::copy_n(yT, ny, m->y() + 2*ny);
|
|
}
|
|
|
|
// Fill exogenous
|
|
for (mwIndex j {0}; j < nx; j++)
|
|
m->x()[j] = exo_path[T+maximum_lag + nb_row_x*j];
|
|
|
|
// Compute the residual and Jacobian, and fill the stacked residual
|
|
m->eval(stacked_residual+T*ny);
|
|
|
|
if (compute_jacobian)
|
|
{
|
|
// Fill the stacked jacobian
|
|
for (mwIndex col { T > 1 ? (T-1)*ny : 0 }; // We can't use std::max() here, because mwIndex is unsigned under MATLAB
|
|
col < std::min(periods*ny, (T+2)*ny); col++)
|
|
{
|
|
mwIndex k = jc[col];
|
|
while (k < jc[col+1])
|
|
{
|
|
if (ir[k] >= (T+1)*ny)
|
|
break; // Nothing to copy for this column
|
|
if (ir[k] >= T*ny)
|
|
{
|
|
/* Within the current column, this is the first line of
|
|
the stacked Jacobian that contains elements from the
|
|
(small) Jacobian just computed, so copy the whole
|
|
column of the latter to the former. */
|
|
m->copy_jacobian_column(col - (T-1)*ny, stacked_jacobian + k);
|
|
break;
|
|
}
|
|
k++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Mimic a try/catch using a global string, since exceptions are not allowed
|
|
to cross OpenMP boundary */
|
|
if (!DynamicModelCaller::error_msg.empty())
|
|
mexErrMsgTxt(DynamicModelCaller::error_msg.c_str());
|
|
|
|
if (compute_jacobian)
|
|
{
|
|
/* The stacked jacobian so far constructed can still reference some zero
|
|
elements, because some expressions that are symbolically different from
|
|
zero may still evaluate to zero for some endogenous/parameter values. The
|
|
following code further compresses the Jacobian by removing the references
|
|
to those extra zeros. This makes a significant speed difference when
|
|
inversing the Jacobian for some large models. */
|
|
mwIndex k_orig = 0, k_new = 0;
|
|
for (mwIndex col = 0; col < periods*ny; col++)
|
|
{
|
|
while (k_orig < jc[col+1])
|
|
{
|
|
if (stacked_jacobian[k_orig] != 0.0)
|
|
{
|
|
if (k_new != k_orig)
|
|
{
|
|
stacked_jacobian[k_new] = stacked_jacobian[k_orig];
|
|
ir[k_new] = ir[k_orig];
|
|
}
|
|
k_new++;
|
|
}
|
|
k_orig++;
|
|
}
|
|
jc[col+1] = k_new;
|
|
}
|
|
}
|
|
|
|
if (use_dll)
|
|
DynamicModelDllCaller::unload_dll();
|
|
}
|