316 lines
14 KiB
C++
316 lines
14 KiB
C++
/*
|
|
* Copyright © 2019-2023 Dynare Team
|
|
*
|
|
* This file is part of Dynare.
|
|
*
|
|
* Dynare is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Dynare is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Dynare. If not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <algorithm>
|
|
#include <memory>
|
|
#include <string>
|
|
|
|
#include <dynmex.h>
|
|
|
|
#include "DynamicModelCaller.hh"
|
|
|
|
void
|
|
mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[])
|
|
{
|
|
if (nlhs < 1 || nlhs > 2 || nrhs != 9)
|
|
mexErrMsgTxt("Must have 9 input arguments and 1 or 2 output arguments");
|
|
bool compute_jacobian = nlhs == 2;
|
|
|
|
// Give explicit names to input arguments
|
|
const mxArray* y_mx = prhs[0];
|
|
const mxArray* y0_mx = prhs[1];
|
|
const mxArray* yT_mx = prhs[2];
|
|
const mxArray* exo_path_mx = prhs[3];
|
|
const mxArray* params_mx = prhs[4];
|
|
const mxArray* steady_state_mx = prhs[5];
|
|
const mxArray* periods_mx = prhs[6];
|
|
const mxArray* M_mx = prhs[7];
|
|
const mxArray* options_mx = prhs[8];
|
|
|
|
// Extract various fields from M_
|
|
const mxArray* basename_mx = mxGetField(M_mx, 0, "fname");
|
|
if (!(basename_mx && mxIsChar(basename_mx) && mxGetM(basename_mx) == 1))
|
|
mexErrMsgTxt("M_.fname should be a character string");
|
|
std::string basename {mxArrayToString(basename_mx)};
|
|
|
|
const mxArray* endo_nbr_mx = mxGetField(M_mx, 0, "endo_nbr");
|
|
if (!(endo_nbr_mx && mxIsScalar(endo_nbr_mx) && mxIsNumeric(endo_nbr_mx)))
|
|
mexErrMsgTxt("M_.endo_nbr should be a numeric scalar");
|
|
auto ny = static_cast<mwIndex>(mxGetScalar(endo_nbr_mx));
|
|
|
|
const mxArray* maximum_lag_mx = mxGetField(M_mx, 0, "maximum_lag");
|
|
if (!(maximum_lag_mx && mxIsScalar(maximum_lag_mx) && mxIsNumeric(maximum_lag_mx)))
|
|
mexErrMsgTxt("M_.maximum_lag should be a numeric scalar");
|
|
auto maximum_lag = static_cast<mwIndex>(mxGetScalar(maximum_lag_mx));
|
|
|
|
const mxArray* dynamic_tmp_nbr_mx = mxGetField(M_mx, 0, "dynamic_tmp_nbr");
|
|
if (!(dynamic_tmp_nbr_mx && mxIsDouble(dynamic_tmp_nbr_mx)
|
|
&& mxGetNumberOfElements(dynamic_tmp_nbr_mx) >= 2)
|
|
|| mxIsComplex(dynamic_tmp_nbr_mx) || mxIsSparse(dynamic_tmp_nbr_mx))
|
|
mexErrMsgTxt("M_.dynamic_tmp_nbr should be a real dense array of at least 2 elements");
|
|
size_t ntt {static_cast<size_t>(mxGetPr(dynamic_tmp_nbr_mx)[0])
|
|
+ (compute_jacobian ? static_cast<size_t>(mxGetPr(dynamic_tmp_nbr_mx)[1]) : 0)};
|
|
|
|
const mxArray* has_external_function_mx = mxGetField(M_mx, 0, "has_external_function");
|
|
if (!(has_external_function_mx && mxIsLogicalScalar(has_external_function_mx)))
|
|
mexErrMsgTxt("M_.has_external_function should be a logical scalar");
|
|
bool has_external_function = static_cast<bool>(mxGetScalar(has_external_function_mx));
|
|
|
|
// Extract various fields from options_
|
|
const mxArray* use_dll_mx = mxGetField(options_mx, 0, "use_dll");
|
|
if (!(use_dll_mx && mxIsLogicalScalar(use_dll_mx)))
|
|
mexErrMsgTxt("options_.use_dll should be a logical scalar");
|
|
bool use_dll = static_cast<bool>(mxGetScalar(use_dll_mx));
|
|
|
|
const mxArray* linear_mx = mxGetField(options_mx, 0, "linear");
|
|
if (!(linear_mx && mxIsLogicalScalar(linear_mx)))
|
|
mexErrMsgTxt("options_.linear should be a logical scalar");
|
|
bool linear = static_cast<bool>(mxGetScalar(linear_mx));
|
|
|
|
const mxArray* threads_mx = mxGetField(options_mx, 0, "threads");
|
|
if (!threads_mx)
|
|
mexErrMsgTxt("Can't find field options_.threads");
|
|
const mxArray* num_threads_mx = mxGetField(threads_mx, 0, "perfect_foresight_problem");
|
|
if (!(num_threads_mx && mxIsScalar(num_threads_mx) && mxIsNumeric(num_threads_mx)))
|
|
mexErrMsgTxt("options_.threads.perfect_foresight_problem should be a numeric scalar");
|
|
int num_threads = static_cast<int>(mxGetScalar(num_threads_mx));
|
|
|
|
// Check other input and map it to local variables
|
|
if (!(mxIsScalar(periods_mx) && mxIsNumeric(periods_mx)))
|
|
mexErrMsgTxt("periods should be a numeric scalar");
|
|
auto periods = static_cast<mwIndex>(mxGetScalar(periods_mx));
|
|
|
|
if (!(mxIsDouble(y_mx) && mxGetM(y_mx) == static_cast<size_t>(ny * periods) && mxGetN(y_mx) == 1))
|
|
mexErrMsgTxt("y should be a double precision column-vector of M_.endo_nbr*periods elements");
|
|
const double* y = mxGetPr(y_mx);
|
|
|
|
if (!(mxIsDouble(y0_mx) && mxGetM(y0_mx) == static_cast<size_t>(ny) && mxGetN(y0_mx) == 1))
|
|
mexErrMsgTxt("y0 should be a double precision column-vector of M_.endo_nbr elements");
|
|
const double* y0 = mxGetPr(y0_mx);
|
|
|
|
if (!(mxIsDouble(yT_mx) && mxGetM(yT_mx) == static_cast<size_t>(ny) && mxGetN(yT_mx) == 1))
|
|
mexErrMsgTxt("yT should be a double precision column-vector of M_.endo_nbr elements");
|
|
const double* yT = mxGetPr(yT_mx);
|
|
|
|
if (!(mxIsDouble(exo_path_mx)
|
|
&& mxGetM(exo_path_mx) >= static_cast<size_t>(periods + maximum_lag)))
|
|
mexErrMsgTxt(
|
|
"exo_path should be a double precision matrix with at least periods+M_.maximum_lag rows");
|
|
auto nx = static_cast<mwIndex>(mxGetN(exo_path_mx));
|
|
size_t nb_row_x = mxGetM(exo_path_mx);
|
|
const double* exo_path = mxGetPr(exo_path_mx);
|
|
|
|
const mxArray* g1_sparse_rowval_mx {mxGetField(M_mx, 0, "dynamic_g1_sparse_rowval")};
|
|
if (!(mxIsInt32(g1_sparse_rowval_mx)))
|
|
mexErrMsgTxt("M_.dynamic_g1_sparse_rowval should be an int32 vector");
|
|
#if MX_HAS_INTERLEAVED_COMPLEX
|
|
const int32_T* g1_sparse_rowval {mxGetInt32s(g1_sparse_rowval_mx)};
|
|
#else
|
|
const int32_T* g1_sparse_rowval {static_cast<const int32_T*>(mxGetData(g1_sparse_rowval_mx))};
|
|
#endif
|
|
|
|
const mxArray* g1_sparse_colval_mx {mxGetField(M_mx, 0, "dynamic_g1_sparse_colval")};
|
|
if (!(mxIsInt32(g1_sparse_colval_mx)))
|
|
mexErrMsgTxt("M_.dynamic_g1_sparse_colval should be an int32 vector");
|
|
if (mxGetNumberOfElements(g1_sparse_colval_mx) != mxGetNumberOfElements(g1_sparse_rowval_mx))
|
|
mexErrMsgTxt(
|
|
"M_.dynamic_g1_sparse_colval should have the same length as M_.dynamic_g1_sparse_rowval");
|
|
|
|
const mxArray* g1_sparse_colptr_mx {mxGetField(M_mx, 0, "dynamic_g1_sparse_colptr")};
|
|
if (!(mxIsInt32(g1_sparse_colptr_mx)
|
|
&& mxGetNumberOfElements(g1_sparse_colptr_mx) == static_cast<size_t>(3 * ny + nx + 1)))
|
|
mexErrMsgTxt(("M_.dynamic_g1_sparse_colptr should be an int32 vector with "
|
|
+ std::to_string(3 * ny + nx + 1) + " elements")
|
|
.c_str());
|
|
#if MX_HAS_INTERLEAVED_COMPLEX
|
|
const int32_T* g1_sparse_colptr {mxGetInt32s(g1_sparse_colptr_mx)};
|
|
#else
|
|
const int32_T* g1_sparse_colptr {static_cast<const int32_T*>(mxGetData(g1_sparse_colptr_mx))};
|
|
#endif
|
|
if (static_cast<size_t>(g1_sparse_colptr[3 * ny + nx]) - 1
|
|
!= mxGetNumberOfElements(g1_sparse_rowval_mx))
|
|
mexErrMsgTxt("The size of M_.dynamic_g1_sparse_rowval is not consistent with the last element "
|
|
"of M_.dynamic_g1_sparse_colptr");
|
|
|
|
if (!(mxIsDouble(params_mx) && mxGetN(params_mx) == 1))
|
|
mexErrMsgTxt("params should be a double precision column-vector");
|
|
const double* params = mxGetPr(params_mx);
|
|
|
|
if (!(mxIsDouble(steady_state_mx) && mxGetN(steady_state_mx) == 1))
|
|
mexErrMsgTxt("steady_state should be a double precision column-vector");
|
|
const double* steady_state = mxGetPr(steady_state_mx);
|
|
|
|
// Allocate output matrices
|
|
plhs[0] = mxCreateDoubleMatrix(periods * ny, 1, mxREAL);
|
|
double* stacked_residual = mxGetPr(plhs[0]);
|
|
|
|
/* Number of non-zero values in the stacked Jacobian.
|
|
Contemporaneous derivatives appear at all periods, while lag or lead
|
|
derivatives appear at all periods except one. */
|
|
mwSize nzmax {static_cast<mwSize>((g1_sparse_colptr[3 * ny] - 1) * (periods - 1)
|
|
+ (g1_sparse_colptr[2 * ny] - g1_sparse_colptr[ny]))};
|
|
|
|
double* stacked_jacobian = nullptr;
|
|
mwIndex *ir = nullptr, *jc = nullptr;
|
|
if (compute_jacobian)
|
|
{
|
|
plhs[1] = mxCreateSparse(periods * ny, periods * ny, nzmax, mxREAL);
|
|
stacked_jacobian = mxGetPr(plhs[1]);
|
|
ir = mxGetIr(plhs[1]);
|
|
jc = mxGetJc(plhs[1]);
|
|
|
|
/* Create the index vectors (IR, JC) of the sparse stacked jacobian. This
|
|
makes parallelization across periods possible when evaluating the model,
|
|
since all indices are known ex ante. */
|
|
mwIndex k = 0;
|
|
jc[0] = 0;
|
|
for (mwIndex T = 0; T < periods; T++)
|
|
for (mwIndex j {0}; j < ny; j++) // Column within the period (i.e. variable)
|
|
{
|
|
if (T > 0)
|
|
for (int32_T idx {g1_sparse_colptr[2 * ny + j] - 1};
|
|
idx < g1_sparse_colptr[2 * ny + j + 1] - 1; idx++)
|
|
ir[k++]
|
|
= (T - 1) * ny + g1_sparse_rowval[idx] - 1; // Derivatives w.r.t. y_{t+1} in T-1
|
|
for (int32_T idx {g1_sparse_colptr[ny + j] - 1}; idx < g1_sparse_colptr[ny + j + 1] - 1;
|
|
idx++)
|
|
ir[k++] = T * ny + g1_sparse_rowval[idx] - 1; // Derivatives w.r.t. y_t in T
|
|
if (T < periods - 1)
|
|
for (int32_T idx {g1_sparse_colptr[j] - 1}; idx < g1_sparse_colptr[j + 1] - 1; idx++)
|
|
ir[k++]
|
|
= (T + 1) * ny + g1_sparse_rowval[idx] - 1; // Derivatives w.r.t. y_{t-1} in T+1
|
|
jc[T * ny + j + 1] = k;
|
|
}
|
|
}
|
|
|
|
if (use_dll)
|
|
DynamicModelDllCaller::load_dll(basename);
|
|
|
|
DynamicModelCaller::error_msg.clear();
|
|
|
|
/* Parallelize the main loop, if use_dll and no external function (to avoid
|
|
parallel calls to MATLAB) */
|
|
#pragma omp parallel num_threads(num_threads) if (use_dll && !has_external_function)
|
|
{
|
|
// Allocate (thread-private) model evaluator (which allocates space for temporaries)
|
|
std::unique_ptr<DynamicModelCaller> m;
|
|
if (use_dll)
|
|
m = std::make_unique<DynamicModelDllCaller>(ntt, ny, nx, params, steady_state,
|
|
g1_sparse_colptr, linear, compute_jacobian);
|
|
else
|
|
m = std::make_unique<DynamicModelMatlabCaller>(basename, ny, nx, params_mx, steady_state_mx,
|
|
g1_sparse_rowval_mx, g1_sparse_colval_mx,
|
|
g1_sparse_colptr_mx, linear, compute_jacobian);
|
|
|
|
// Main computing loop
|
|
#pragma omp for
|
|
for (mwIndex T = 0; T < periods; T++)
|
|
{
|
|
// Fill vector of dynamic variables
|
|
if (T > 0 && T < periods - 1)
|
|
std::copy_n(y + (T - 1) * ny, 3 * ny, m->y());
|
|
else if (T > 0) // Last simulation period
|
|
{
|
|
std::copy_n(y + (T - 1) * ny, 2 * ny, m->y());
|
|
std::copy_n(yT, ny, m->y() + 2 * ny);
|
|
}
|
|
else if (T < periods - 1) // First simulation period
|
|
{
|
|
std::copy_n(y0, ny, m->y());
|
|
std::copy_n(y + T * ny, 2 * ny, m->y() + ny);
|
|
}
|
|
else // Special case: periods=1 (and so T=0)
|
|
{
|
|
std::copy_n(y0, ny, m->y());
|
|
std::copy_n(y, ny, m->y() + ny);
|
|
std::copy_n(yT, ny, m->y() + 2 * ny);
|
|
}
|
|
|
|
// Fill exogenous
|
|
for (mwIndex j {0}; j < nx; j++)
|
|
m->x()[j] = exo_path[T + maximum_lag + nb_row_x * j];
|
|
|
|
// Compute the residual and Jacobian, and fill the stacked residual
|
|
m->eval(stacked_residual + T * ny);
|
|
|
|
if (compute_jacobian)
|
|
{
|
|
// Fill the stacked jacobian
|
|
for (mwIndex col {T > 1 ? (T - 1) * ny : 0}; // We can't use std::max() here, because
|
|
// mwIndex is unsigned under MATLAB
|
|
col < std::min(periods * ny, (T + 2) * ny); col++)
|
|
{
|
|
mwIndex k = jc[col];
|
|
while (k < jc[col + 1])
|
|
{
|
|
if (ir[k] >= (T + 1) * ny)
|
|
break; // Nothing to copy for this column
|
|
if (ir[k] >= T * ny)
|
|
{
|
|
/* Within the current column, this is the first line of
|
|
the stacked Jacobian that contains elements from the
|
|
(small) Jacobian just computed, so copy the whole
|
|
column of the latter to the former. */
|
|
m->copy_jacobian_column(col - (T - 1) * ny, stacked_jacobian + k);
|
|
break;
|
|
}
|
|
k++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Mimic a try/catch using a global string, since exceptions are not allowed
|
|
to cross OpenMP boundary */
|
|
if (!DynamicModelCaller::error_msg.empty())
|
|
mexErrMsgTxt(DynamicModelCaller::error_msg.c_str());
|
|
|
|
if (compute_jacobian)
|
|
{
|
|
/* The stacked jacobian so far constructed can still reference some zero
|
|
elements, because some expressions that are symbolically different from
|
|
zero may still evaluate to zero for some endogenous/parameter values. The
|
|
following code further compresses the Jacobian by removing the references
|
|
to those extra zeros. This makes a significant speed difference when
|
|
inversing the Jacobian for some large models. */
|
|
mwIndex k_orig = 0, k_new = 0;
|
|
for (mwIndex col = 0; col < periods * ny; col++)
|
|
{
|
|
while (k_orig < jc[col + 1])
|
|
{
|
|
if (stacked_jacobian[k_orig] != 0.0)
|
|
{
|
|
if (k_new != k_orig)
|
|
{
|
|
stacked_jacobian[k_new] = stacked_jacobian[k_orig];
|
|
ir[k_new] = ir[k_orig];
|
|
}
|
|
k_new++;
|
|
}
|
|
k_orig++;
|
|
}
|
|
jc[col + 1] = k_new;
|
|
}
|
|
}
|
|
|
|
if (use_dll)
|
|
DynamicModelDllCaller::unload_dll();
|
|
}
|