/* * Copyright © 2019-2023 Dynare Team * * This file is part of Dynare. * * Dynare is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Dynare is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Dynare. If not, see . */ #include #include #include #include #include "DynamicModelCaller.hh" void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { if (nlhs < 1 || nlhs > 2 || nrhs != 9) mexErrMsgTxt("Must have 9 input arguments and 1 or 2 output arguments"); bool compute_jacobian = nlhs == 2; // Give explicit names to input arguments const mxArray* y_mx = prhs[0]; const mxArray* y0_mx = prhs[1]; const mxArray* yT_mx = prhs[2]; const mxArray* exo_path_mx = prhs[3]; const mxArray* params_mx = prhs[4]; const mxArray* steady_state_mx = prhs[5]; const mxArray* periods_mx = prhs[6]; const mxArray* M_mx = prhs[7]; const mxArray* options_mx = prhs[8]; // Extract various fields from M_ const mxArray* basename_mx = mxGetField(M_mx, 0, "fname"); if (!(basename_mx && mxIsChar(basename_mx) && mxGetM(basename_mx) == 1)) mexErrMsgTxt("M_.fname should be a character string"); std::string basename {mxArrayToString(basename_mx)}; const mxArray* endo_nbr_mx = mxGetField(M_mx, 0, "endo_nbr"); if (!(endo_nbr_mx && mxIsScalar(endo_nbr_mx) && mxIsNumeric(endo_nbr_mx))) mexErrMsgTxt("M_.endo_nbr should be a numeric scalar"); auto ny = static_cast(mxGetScalar(endo_nbr_mx)); const mxArray* maximum_lag_mx = mxGetField(M_mx, 0, "maximum_lag"); if (!(maximum_lag_mx && mxIsScalar(maximum_lag_mx) && mxIsNumeric(maximum_lag_mx))) mexErrMsgTxt("M_.maximum_lag should be a numeric scalar"); auto maximum_lag = static_cast(mxGetScalar(maximum_lag_mx)); const mxArray* dynamic_tmp_nbr_mx = mxGetField(M_mx, 0, "dynamic_tmp_nbr"); if (!(dynamic_tmp_nbr_mx && mxIsDouble(dynamic_tmp_nbr_mx) && mxGetNumberOfElements(dynamic_tmp_nbr_mx) >= 2) || mxIsComplex(dynamic_tmp_nbr_mx) || mxIsSparse(dynamic_tmp_nbr_mx)) mexErrMsgTxt("M_.dynamic_tmp_nbr should be a real dense array of at least 2 elements"); size_t ntt {static_cast(mxGetPr(dynamic_tmp_nbr_mx)[0]) + (compute_jacobian ? static_cast(mxGetPr(dynamic_tmp_nbr_mx)[1]) : 0)}; const mxArray* has_external_function_mx = mxGetField(M_mx, 0, "has_external_function"); if (!(has_external_function_mx && mxIsLogicalScalar(has_external_function_mx))) mexErrMsgTxt("M_.has_external_function should be a logical scalar"); bool has_external_function = static_cast(mxGetScalar(has_external_function_mx)); // Extract various fields from options_ const mxArray* use_dll_mx = mxGetField(options_mx, 0, "use_dll"); if (!(use_dll_mx && mxIsLogicalScalar(use_dll_mx))) mexErrMsgTxt("options_.use_dll should be a logical scalar"); bool use_dll = static_cast(mxGetScalar(use_dll_mx)); const mxArray* linear_mx = mxGetField(options_mx, 0, "linear"); if (!(linear_mx && mxIsLogicalScalar(linear_mx))) mexErrMsgTxt("options_.linear should be a logical scalar"); bool linear = static_cast(mxGetScalar(linear_mx)); const mxArray* threads_mx = mxGetField(options_mx, 0, "threads"); if (!threads_mx) mexErrMsgTxt("Can't find field options_.threads"); const mxArray* num_threads_mx = mxGetField(threads_mx, 0, "perfect_foresight_problem"); if (!(num_threads_mx && mxIsScalar(num_threads_mx) && mxIsNumeric(num_threads_mx))) mexErrMsgTxt("options_.threads.perfect_foresight_problem should be a numeric scalar"); // False positive: num_threads is used in OpemMP pragma // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) int num_threads = static_cast(mxGetScalar(num_threads_mx)); // Check other input and map it to local variables if (!(mxIsScalar(periods_mx) && mxIsNumeric(periods_mx))) mexErrMsgTxt("periods should be a numeric scalar"); auto periods = static_cast(mxGetScalar(periods_mx)); if (!(mxIsDouble(y_mx) && mxGetM(y_mx) == static_cast(ny * periods) && mxGetN(y_mx) == 1)) mexErrMsgTxt("y should be a double precision column-vector of M_.endo_nbr*periods elements"); const double* y = mxGetPr(y_mx); if (!(mxIsDouble(y0_mx) && mxGetM(y0_mx) == static_cast(ny) && mxGetN(y0_mx) == 1)) mexErrMsgTxt("y0 should be a double precision column-vector of M_.endo_nbr elements"); const double* y0 = mxGetPr(y0_mx); if (!(mxIsDouble(yT_mx) && mxGetM(yT_mx) == static_cast(ny) && mxGetN(yT_mx) == 1)) mexErrMsgTxt("yT should be a double precision column-vector of M_.endo_nbr elements"); const double* yT = mxGetPr(yT_mx); if (!(mxIsDouble(exo_path_mx) && mxGetM(exo_path_mx) >= static_cast(periods + maximum_lag))) mexErrMsgTxt( "exo_path should be a double precision matrix with at least periods+M_.maximum_lag rows"); auto nx = static_cast(mxGetN(exo_path_mx)); size_t nb_row_x = mxGetM(exo_path_mx); const double* exo_path = mxGetPr(exo_path_mx); const mxArray* g1_sparse_rowval_mx {mxGetField(M_mx, 0, "dynamic_g1_sparse_rowval")}; if (!(mxIsInt32(g1_sparse_rowval_mx))) mexErrMsgTxt("M_.dynamic_g1_sparse_rowval should be an int32 vector"); #if MX_HAS_INTERLEAVED_COMPLEX const int32_T* g1_sparse_rowval {mxGetInt32s(g1_sparse_rowval_mx)}; #else const int32_T* g1_sparse_rowval {static_cast(mxGetData(g1_sparse_rowval_mx))}; #endif const mxArray* g1_sparse_colval_mx {mxGetField(M_mx, 0, "dynamic_g1_sparse_colval")}; if (!(mxIsInt32(g1_sparse_colval_mx))) mexErrMsgTxt("M_.dynamic_g1_sparse_colval should be an int32 vector"); if (mxGetNumberOfElements(g1_sparse_colval_mx) != mxGetNumberOfElements(g1_sparse_rowval_mx)) mexErrMsgTxt( "M_.dynamic_g1_sparse_colval should have the same length as M_.dynamic_g1_sparse_rowval"); const mxArray* g1_sparse_colptr_mx {mxGetField(M_mx, 0, "dynamic_g1_sparse_colptr")}; if (!(mxIsInt32(g1_sparse_colptr_mx) && mxGetNumberOfElements(g1_sparse_colptr_mx) == static_cast(3 * ny + nx + 1))) mexErrMsgTxt(("M_.dynamic_g1_sparse_colptr should be an int32 vector with " + std::to_string(3 * ny + nx + 1) + " elements") .c_str()); #if MX_HAS_INTERLEAVED_COMPLEX const int32_T* g1_sparse_colptr {mxGetInt32s(g1_sparse_colptr_mx)}; #else const int32_T* g1_sparse_colptr {static_cast(mxGetData(g1_sparse_colptr_mx))}; #endif if (static_cast(g1_sparse_colptr[3 * ny + nx]) - 1 != mxGetNumberOfElements(g1_sparse_rowval_mx)) mexErrMsgTxt("The size of M_.dynamic_g1_sparse_rowval is not consistent with the last element " "of M_.dynamic_g1_sparse_colptr"); if (!(mxIsDouble(params_mx) && mxGetN(params_mx) == 1)) mexErrMsgTxt("params should be a double precision column-vector"); const double* params = mxGetPr(params_mx); if (!(mxIsDouble(steady_state_mx) && mxGetN(steady_state_mx) == 1)) mexErrMsgTxt("steady_state should be a double precision column-vector"); const double* steady_state = mxGetPr(steady_state_mx); // Allocate output matrices plhs[0] = mxCreateDoubleMatrix(periods * ny, 1, mxREAL); double* stacked_residual = mxGetPr(plhs[0]); /* Number of non-zero values in the stacked Jacobian. Contemporaneous derivatives appear at all periods, while lag or lead derivatives appear at all periods except one. */ mwSize nzmax {static_cast((g1_sparse_colptr[3 * ny] - 1) * (periods - 1) + (g1_sparse_colptr[2 * ny] - g1_sparse_colptr[ny]))}; double* stacked_jacobian = nullptr; mwIndex *ir = nullptr, *jc = nullptr; if (compute_jacobian) { plhs[1] = mxCreateSparse(periods * ny, periods * ny, nzmax, mxREAL); stacked_jacobian = mxGetPr(plhs[1]); ir = mxGetIr(plhs[1]); jc = mxGetJc(plhs[1]); /* Create the index vectors (IR, JC) of the sparse stacked jacobian. This makes parallelization across periods possible when evaluating the model, since all indices are known ex ante. */ mwIndex k = 0; jc[0] = 0; for (mwIndex T = 0; T < periods; T++) for (mwIndex j {0}; j < ny; j++) // Column within the period (i.e. variable) { if (T > 0) for (int32_T idx {g1_sparse_colptr[2 * ny + j] - 1}; idx < g1_sparse_colptr[2 * ny + j + 1] - 1; idx++) ir[k++] = (T - 1) * ny + g1_sparse_rowval[idx] - 1; // Derivatives w.r.t. y_{t+1} in T-1 for (int32_T idx {g1_sparse_colptr[ny + j] - 1}; idx < g1_sparse_colptr[ny + j + 1] - 1; idx++) ir[k++] = T * ny + g1_sparse_rowval[idx] - 1; // Derivatives w.r.t. y_t in T if (T < periods - 1) for (int32_T idx {g1_sparse_colptr[j] - 1}; idx < g1_sparse_colptr[j + 1] - 1; idx++) ir[k++] = (T + 1) * ny + g1_sparse_rowval[idx] - 1; // Derivatives w.r.t. y_{t-1} in T+1 jc[T * ny + j + 1] = k; } } if (use_dll) DynamicModelDllCaller::load_dll(basename); DynamicModelCaller::error_msg.clear(); /* Parallelize the main loop, if use_dll and no external function (to avoid parallel calls to MATLAB) */ #pragma omp parallel num_threads(num_threads) if (use_dll && !has_external_function) { // Allocate (thread-private) model evaluator (which allocates space for temporaries) std::unique_ptr m; if (use_dll) m = std::make_unique(ntt, ny, nx, params, steady_state, g1_sparse_colptr, linear, compute_jacobian); else m = std::make_unique(basename, ny, nx, params_mx, steady_state_mx, g1_sparse_rowval_mx, g1_sparse_colval_mx, g1_sparse_colptr_mx, linear, compute_jacobian); // Main computing loop #pragma omp for for (mwIndex T = 0; T < periods; T++) { // Fill vector of dynamic variables if (T > 0 && T < periods - 1) std::copy_n(y + (T - 1) * ny, 3 * ny, m->y()); else if (T > 0) // Last simulation period { std::copy_n(y + (T - 1) * ny, 2 * ny, m->y()); std::copy_n(yT, ny, m->y() + 2 * ny); } else if (T < periods - 1) // First simulation period { std::copy_n(y0, ny, m->y()); std::copy_n(y + T * ny, 2 * ny, m->y() + ny); } else // Special case: periods=1 (and so T=0) { std::copy_n(y0, ny, m->y()); std::copy_n(y, ny, m->y() + ny); std::copy_n(yT, ny, m->y() + 2 * ny); } // Fill exogenous for (mwIndex j {0}; j < nx; j++) m->x()[j] = exo_path[T + maximum_lag + nb_row_x * j]; // Compute the residual and Jacobian, and fill the stacked residual m->eval(stacked_residual + T * ny); if (compute_jacobian) { // Fill the stacked jacobian for (mwIndex col {T > 1 ? (T - 1) * ny : 0}; // We can't use std::max() here, because // mwIndex is unsigned under MATLAB col < std::min(periods * ny, (T + 2) * ny); col++) { mwIndex k = jc[col]; while (k < jc[col + 1]) { if (ir[k] >= (T + 1) * ny) break; // Nothing to copy for this column if (ir[k] >= T * ny) { /* Within the current column, this is the first line of the stacked Jacobian that contains elements from the (small) Jacobian just computed, so copy the whole column of the latter to the former. */ m->copy_jacobian_column(col - (T - 1) * ny, stacked_jacobian + k); break; } k++; } } } } } /* Mimic a try/catch using a global string, since exceptions are not allowed to cross OpenMP boundary */ if (!DynamicModelCaller::error_msg.empty()) mexErrMsgTxt(DynamicModelCaller::error_msg.c_str()); if (compute_jacobian) { /* The stacked jacobian so far constructed can still reference some zero elements, because some expressions that are symbolically different from zero may still evaluate to zero for some endogenous/parameter values. The following code further compresses the Jacobian by removing the references to those extra zeros. This makes a significant speed difference when inversing the Jacobian for some large models. */ mwIndex k_orig = 0, k_new = 0; for (mwIndex col = 0; col < periods * ny; col++) { while (k_orig < jc[col + 1]) { if (stacked_jacobian[k_orig] != 0.0) { if (k_new != k_orig) { stacked_jacobian[k_new] = stacked_jacobian[k_orig]; ir[k_new] = ir[k_orig]; } k_new++; } k_orig++; } jc[col + 1] = k_new; } } if (use_dll) DynamicModelDllCaller::unload_dll(); }