/* * Copyright © 2010-2022 Dynare Team * * This file is part of Dynare. * * Dynare is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Dynare is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Dynare. If not, see . */ /* * This mex file computes particles at time t+1 given particles and innovations at time t, * using a second order approximation of the nonlinear state space model. */ #include #include #include #include #include #include #include /* Uncomment the following line to use BLAS instead of loops when computing ghx·ŷ and ghu·ε. N.B.: Under MATLAB, this only works in single-threaded mode, otherwise one gets a crash (because of the incompatibility between Intel and GNU OpenMPs). */ //#define USE_BLAS_AT_FIRST_ORDER std::tuple, std::vector, std::vector> set_vector_of_indices(int n, int r) { int m = n*(n+1)/2; std::vector v1(m, 0), v2(m, 0), v3(m, 0); for (int i = 0, index = 0, jndex = 0; i < n; i++) { jndex += i; for (int j = i; j < n; j++, index++, jndex++) { v1[index] = i; v2[index] = j; v3[index] = jndex*r; } } return { v1, v2, v3 }; } void ss2Iteration_pruning(double *y2, double *y1, const double *yhat2, const double *yhat1, const double *epsilon, const double *ghx, const double *ghu, const double *constant, const double *ghxx, const double *ghuu, const double *ghxu, const double *ss, blas_int m, blas_int n, blas_int q, blas_int s, int number_of_threads) { #ifdef USE_BLAS_AT_FIRST_ORDER const double one = 1.0; const blas_int ONE = 1; #endif auto [ii1, ii2, ii3] = set_vector_of_indices(n, m); // vector indices for ghxx auto [jj1, jj2, jj3] = set_vector_of_indices(q, m); // vector indices for ghuu #pragma omp parallel for num_threads(number_of_threads) for (int particle = 0; particle < s; particle++) { int particle_ = particle*m; int particle__ = particle*n; int particle___ = particle*q; std::copy_n(constant, m, &y2[particle_]); std::copy_n(ss, m, &y1[particle_]); #ifdef USE_BLAS_AT_FIRST_ORDER dgemv("N", &m, &n, &one, ghx, &m, &yhat2[particle__], &ONE, &one, &y2[particle_], &ONE); dgemv("N", &m, &q, &one, ghu, &m, &epsilon[particle___], &ONE, &one, &y2[particle_], &ONE); #endif for (int variable = 0; variable < m; variable++) { int variable_ = variable + particle_; // +ghx·ŷ₂+ghu·ε #ifndef USE_BLAS_AT_FIRST_ORDER for (int column = 0, column_ = 0; column < n; column++, column_ += m) y2[variable_] += ghx[variable+column_]*yhat2[column+particle__]; for (int column = 0, column_ = 0; column < q; column++, column_ += m) y2[variable_] += ghu[variable+column_]*epsilon[column+particle___]; #endif // +½ghxx·ŷ₁⊗ŷ₁ for (int i = 0; i < n*(n+1)/2; i++) { int i1 = particle__+ii1[i]; int i2 = particle__+ii2[i]; if (i1 == i2) y2[variable_] += .5*ghxx[variable+ii3[i]]*yhat1[i1]*yhat1[i1]; else y2[variable_] += ghxx[variable+ii3[i]]*yhat1[i1]*yhat1[i2]; } // +½ghuu·ε⊗ε for (int j = 0; j < q*(q+1)/2; j++) { int j1 = particle___+jj1[j]; int j2 = particle___+jj2[j]; if (j1 == j2) y2[variable_] += .5*ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j1]; else y2[variable_] += ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j2]; } // +ghxu·ŷ₁⊗ε for (int v = particle__, i = 0; v < particle__+n; v++) for (int s = particle___; s < particle___+q; s++, i += m) y2[variable_] += ghxu[variable+i]*epsilon[s]*yhat1[v]; #ifndef USE_BLAS_AT_FIRST_ORDER for (int column = 0, column_ = 0; column < q; column++, column_ += m) { int i1 = variable+column_; int i2 = column+particle__; int i3 = column+particle___; y1[variable_] += ghx[i1]*yhat1[i2]; y1[variable_] += ghu[i1]*epsilon[i3]; } for (int column = q, column_ = q*m; column < n; column++, column_ += m) y1[variable_] += ghx[variable+column_]*yhat1[column+particle__]; #endif } #ifdef USE_BLAS_AT_FIRST_ORDER dgemv("N", &m, &n, &one, &ghx[0], &m, &yhat1[particle__], &ONE, &one, &y1[particle_], &ONE); dgemv("N", &m, &q, &one, &ghu[0], &m, &epsilon[particle___], &ONE, &one, &y1[particle_], &ONE); #endif } } void ss2Iteration(double *y, const double *yhat, const double *epsilon, const double *ghx, const double *ghu, const double *constant, const double *ghxx, const double *ghuu, const double *ghxu, blas_int m, blas_int n, blas_int q, blas_int s, int number_of_threads) { #ifdef USE_BLAS_AT_FIRST_ORDER const double one = 1.0; const blas_int ONE = 1; #endif auto [ii1, ii2, ii3] = set_vector_of_indices(n, m); // vector indices for ghxx auto [jj1, jj2, jj3] = set_vector_of_indices(q, m); // vector indices for ghuu #pragma omp parallel for num_threads(number_of_threads) for (int particle = 0; particle < s; particle++) { int particle_ = particle*m; int particle__ = particle*n; int particle___ = particle*q; std::copy_n(constant, m, &y[particle_]); #ifdef USE_BLAS_AT_FIRST_ORDER dgemv("N", &m, &n, &one, ghx, &m, &yhat[particle__], &ONE, &one, &y[particle_], &ONE); dgemv("N", &m, &q, &one, ghu, &m, &epsilon[particle___], &ONE, &one, &y[particle_], &ONE); #endif for (int variable = 0; variable < m; variable++) { int variable_ = variable + particle_; // +ghx·ŷ+ghu·ε #ifndef USE_BLAS_AT_FIRST_ORDER for (int column = 0, column_ = 0; column < n; column++, column_ += m) y[variable_] += ghx[variable+column_]*yhat[column+particle__]; for (int column = 0, column_ = 0; column < q; column++, column_ += m) y[variable_] += ghu[variable+column_]*epsilon[column+particle___]; #endif // +½ghxx·ŷ⊗ŷ for (int i = 0; i < n*(n+1)/2; i++) { int i1 = particle__+ii1[i]; int i2 = particle__+ii2[i]; if (i1 == i2) y[variable_] += .5*ghxx[variable+ii3[i]]*yhat[i1]*yhat[i1]; else y[variable_] += ghxx[variable+ii3[i]]*yhat[i1]*yhat[i2]; } // +½ghuu·ε⊗ε for (int j = 0; j < q*(q+1)/2; j++) { int j1 = particle___+jj1[j]; int j2 = particle___+jj2[j]; if (j1 == j2) y[variable_] += .5*ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j1]; else y[variable_] += ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j2]; } // +ghxu·ŷ⊗ε for (int v = particle__, i = 0; v < particle__+n; v++) for (int s = particle___; s < particle___+q; s++, i += m) y[variable_] += ghxu[variable+i]*epsilon[s]*yhat[v]; } } } void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { /* prhs[0] yhat [double] n×s array, time t particles. prhs[1] epsilon [double] q×s array, time t innovations. prhs[2] ghx [double] m×n array, first order reduced form. prhs[3] ghu [double] m×q array, first order reduced form. prhs[4] constant [double] m×1 array, deterministic steady state + second order correction for the union of the states and observed variables. prhs[5] ghxx [double] m×n² array, second order reduced form. prhs[6] ghuu [double] m×q² array, second order reduced form. prhs[7] ghxu [double] m×nq array, second order reduced form. prhs[8] yhat_ [double] [OPTIONAL] n×s array, time t particles (pruning additional latent variables). prhs[9] ss [double] [OPTIONAL] m×1 array, steady state for the union of the states and the observed variables (needed for the pruning mode). prhs[8 or 10] [double] num of threads plhs[0] y [double] m×s array, time t+1 particles. plhs[1] y_ [double] m×s array, time t+1 particles for the pruning latent variables. */ // Check the number of input and output. if (nrhs != 9 && nrhs != 11) mexErrMsgTxt("Nine or eleven input arguments are required."); if (nlhs > 2) mexErrMsgTxt("Too many output arguments."); auto check_input_real_dense_array = [=](int i) { if (!mxIsDouble(prhs[i]) || mxIsComplex(prhs[i]) || mxIsSparse(prhs[i])) mexErrMsgTxt(("Input argument " + std::to_string(i+1) + " should be a real dense array").c_str()); }; for (int i = 0; i < 8; i++) check_input_real_dense_array(i); // Get dimensions. size_t n = mxGetM(prhs[0]); // Number of states. size_t s = mxGetN(prhs[0]); // Number of particles. size_t q = mxGetM(prhs[1]); // Number of innovations. size_t m = mxGetM(prhs[2]); // Number of elements in the union of states and observed variables. // Check the dimensions. if (s != mxGetN(prhs[1]) // Number of columns for epsilon || n != mxGetN(prhs[2]) // Number of columns for ghx || m != mxGetM(prhs[3]) // Number of rows for ghu || q != mxGetN(prhs[3]) // Number of columns for ghu || m != mxGetM(prhs[4]) // Number of rows for 2nd order constant correction + deterministic steady state || m != mxGetM(prhs[5]) // Number of rows for ghxx || n*n != mxGetN(prhs[5]) // Number of columns for ghxx || m != mxGetM(prhs[6]) // Number of rows for ghuu || q*q != mxGetN(prhs[6]) // Number of columns for ghuu || m != mxGetM(prhs[7]) // Number of rows for ghxu || n*q != mxGetN(prhs[7])) // Number of rows for ghxu mexErrMsgTxt("Input dimension mismatch!."); if (nrhs > 9) { for (int i = 8; i < 10; i++) check_input_real_dense_array(i); if (n != mxGetM(prhs[8]) // Number of rows for yhat_ || s != mxGetN(prhs[8]) // Number of columns for yhat_ || m != mxGetM(prhs[9])) // Number of rows for ss mexErrMsgTxt("Input dimension mismatch!."); } // Get Input arrays. const double *yhat = mxGetPr(prhs[0]); const double *epsilon = mxGetPr(prhs[1]); const double *ghx = mxGetPr(prhs[2]); const double *ghu = mxGetPr(prhs[3]); const double *constant = mxGetPr(prhs[4]); const double *ghxx = mxGetPr(prhs[5]); const double *ghuu = mxGetPr(prhs[6]); const double *ghxu = mxGetPr(prhs[7]); const double *yhat_ = nullptr, *ss = nullptr; if (nrhs > 9) { yhat_ = mxGetPr(prhs[8]); ss = mxGetPr(prhs[9]); } const mxArray *numthreads_mx = prhs[nrhs == 9 ? 8 : 10]; if (!(mxIsScalar(numthreads_mx) && mxIsNumeric(numthreads_mx))) mexErrMsgTxt("Last argument should be a numeric scalar"); int numthreads = static_cast(mxGetScalar(numthreads_mx)); if (numthreads <= 0) mexErrMsgTxt("Last argument should be a positive integer"); #if defined(USE_BLAS_AT_FIRST_ORDER) && defined(MATLAB_MEX_FILE) if (numthreads != 1) mexErrMsgTxt("Parallelization is not possible when compiled with USE_BLAS_AT_FIRST_ORDER."); #endif if (nrhs == 9) { plhs[0] = mxCreateDoubleMatrix(m, s, mxREAL); double *y = mxGetPr(plhs[0]); ss2Iteration(y, yhat, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, static_cast(m), static_cast(n), static_cast(q), static_cast(s), numthreads); } else { plhs[0] = mxCreateDoubleMatrix(m, s, mxREAL); plhs[1] = mxCreateDoubleMatrix(m, s, mxREAL); double *y = mxGetPr(plhs[0]); double *y_ = mxGetPr(plhs[1]); ss2Iteration_pruning(y, y_, yhat, yhat_, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, ss, static_cast(m), static_cast(n), static_cast(q), static_cast(s), numthreads); } }