308 lines
12 KiB
C++
308 lines
12 KiB
C++
/*
|
||
* Copyright © 2010-2022 Dynare Team
|
||
*
|
||
* This file is part of Dynare.
|
||
*
|
||
* Dynare is free software: you can redistribute it and/or modify
|
||
* it under the terms of the GNU General Public License as published by
|
||
* the Free Software Foundation, either version 3 of the License, or
|
||
* (at your option) any later version.
|
||
*
|
||
* Dynare is distributed in the hope that it will be useful,
|
||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
* GNU General Public License for more details.
|
||
*
|
||
* You should have received a copy of the GNU General Public License
|
||
* along with Dynare. If not, see <https://www.gnu.org/licenses/>.
|
||
*/
|
||
|
||
/*
|
||
* This mex file computes particles at time t+1 given particles and innovations at time t,
|
||
* using a second order approximation of the nonlinear state space model.
|
||
*/
|
||
|
||
#include <vector>
|
||
#include <algorithm>
|
||
#include <tuple>
|
||
#include <string>
|
||
|
||
#include <dynmex.h>
|
||
#include <dynblas.h>
|
||
|
||
#include <omp.h>
|
||
|
||
/*
|
||
Uncomment the following line to use BLAS instead of loops when computing
|
||
ghx·ŷ and ghu·ε.
|
||
N.B.: Under MATLAB, this only works in single-threaded mode, otherwise one
|
||
gets a crash (because of the incompatibility between Intel and GNU OpenMPs).
|
||
*/
|
||
//#define USE_BLAS_AT_FIRST_ORDER
|
||
|
||
std::tuple<std::vector<int>, std::vector<int>, std::vector<int>>
|
||
set_vector_of_indices(int n, int r)
|
||
{
|
||
int m = n*(n+1)/2;
|
||
std::vector<int> v1(m, 0), v2(m, 0), v3(m, 0);
|
||
for (int i = 0, index = 0, jndex = 0; i < n; i++)
|
||
{
|
||
jndex += i;
|
||
for (int j = i; j < n; j++, index++, jndex++)
|
||
{
|
||
v1[index] = i;
|
||
v2[index] = j;
|
||
v3[index] = jndex*r;
|
||
}
|
||
}
|
||
return { v1, v2, v3 };
|
||
}
|
||
|
||
void
|
||
ss2Iteration_pruning(double *y2, double *y1, const double *yhat2, const double *yhat1, const double *epsilon,
|
||
const double *ghx, const double *ghu,
|
||
const double *constant, const double *ghxx, const double *ghuu, const double *ghxu, const double *ss,
|
||
blas_int m, blas_int n, blas_int q, blas_int s, int number_of_threads)
|
||
{
|
||
#ifdef USE_BLAS_AT_FIRST_ORDER
|
||
const double one = 1.0;
|
||
const blas_int ONE = 1;
|
||
#endif
|
||
auto [ii1, ii2, ii3] = set_vector_of_indices(n, m); // vector indices for ghxx
|
||
auto [jj1, jj2, jj3] = set_vector_of_indices(q, m); // vector indices for ghuu
|
||
#pragma omp parallel for num_threads(number_of_threads)
|
||
for (int particle = 0; particle < s; particle++)
|
||
{
|
||
int particle_ = particle*m;
|
||
int particle__ = particle*n;
|
||
int particle___ = particle*q;
|
||
std::copy_n(constant, m, &y2[particle_]);
|
||
std::copy_n(ss, m, &y1[particle_]);
|
||
#ifdef USE_BLAS_AT_FIRST_ORDER
|
||
dgemv("N", &m, &n, &one, ghx, &m, &yhat2[particle__], &ONE, &one, &y2[particle_], &ONE);
|
||
dgemv("N", &m, &q, &one, ghu, &m, &epsilon[particle___], &ONE, &one, &y2[particle_], &ONE);
|
||
#endif
|
||
for (int variable = 0; variable < m; variable++)
|
||
{
|
||
int variable_ = variable + particle_;
|
||
// +ghx·ŷ₂+ghu·ε
|
||
#ifndef USE_BLAS_AT_FIRST_ORDER
|
||
for (int column = 0, column_ = 0; column < n; column++, column_ += m)
|
||
y2[variable_] += ghx[variable+column_]*yhat2[column+particle__];
|
||
for (int column = 0, column_ = 0; column < q; column++, column_ += m)
|
||
y2[variable_] += ghu[variable+column_]*epsilon[column+particle___];
|
||
#endif
|
||
// +½ghxx·ŷ₁⊗ŷ₁
|
||
for (int i = 0; i < n*(n+1)/2; i++)
|
||
{
|
||
int i1 = particle__+ii1[i];
|
||
int i2 = particle__+ii2[i];
|
||
if (i1 == i2)
|
||
y2[variable_] += .5*ghxx[variable+ii3[i]]*yhat1[i1]*yhat1[i1];
|
||
else
|
||
y2[variable_] += ghxx[variable+ii3[i]]*yhat1[i1]*yhat1[i2];
|
||
}
|
||
// +½ghuu·ε⊗ε
|
||
for (int j = 0; j < q*(q+1)/2; j++)
|
||
{
|
||
int j1 = particle___+jj1[j];
|
||
int j2 = particle___+jj2[j];
|
||
if (j1 == j2)
|
||
y2[variable_] += .5*ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j1];
|
||
else
|
||
y2[variable_] += ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j2];
|
||
}
|
||
// +ghxu·ŷ₁⊗ε
|
||
for (int v = particle__, i = 0; v < particle__+n; v++)
|
||
for (int s = particle___; s < particle___+q; s++, i += m)
|
||
y2[variable_] += ghxu[variable+i]*epsilon[s]*yhat1[v];
|
||
#ifndef USE_BLAS_AT_FIRST_ORDER
|
||
for (int column = 0, column_ = 0; column < q; column++, column_ += m)
|
||
{
|
||
int i1 = variable+column_;
|
||
int i2 = column+particle__;
|
||
int i3 = column+particle___;
|
||
y1[variable_] += ghx[i1]*yhat1[i2];
|
||
y1[variable_] += ghu[i1]*epsilon[i3];
|
||
}
|
||
for (int column = q, column_ = q*m; column < n; column++, column_ += m)
|
||
y1[variable_] += ghx[variable+column_]*yhat1[column+particle__];
|
||
#endif
|
||
}
|
||
#ifdef USE_BLAS_AT_FIRST_ORDER
|
||
dgemv("N", &m, &n, &one, &ghx[0], &m, &yhat1[particle__], &ONE, &one, &y1[particle_], &ONE);
|
||
dgemv("N", &m, &q, &one, &ghu[0], &m, &epsilon[particle___], &ONE, &one, &y1[particle_], &ONE);
|
||
#endif
|
||
}
|
||
}
|
||
|
||
void
|
||
ss2Iteration(double *y, const double *yhat, const double *epsilon,
|
||
const double *ghx, const double *ghu,
|
||
const double *constant, const double *ghxx, const double *ghuu, const double *ghxu,
|
||
blas_int m, blas_int n, blas_int q, blas_int s, int number_of_threads)
|
||
{
|
||
#ifdef USE_BLAS_AT_FIRST_ORDER
|
||
const double one = 1.0;
|
||
const blas_int ONE = 1;
|
||
#endif
|
||
auto [ii1, ii2, ii3] = set_vector_of_indices(n, m); // vector indices for ghxx
|
||
auto [jj1, jj2, jj3] = set_vector_of_indices(q, m); // vector indices for ghuu
|
||
#pragma omp parallel for num_threads(number_of_threads)
|
||
for (int particle = 0; particle < s; particle++)
|
||
{
|
||
int particle_ = particle*m;
|
||
int particle__ = particle*n;
|
||
int particle___ = particle*q;
|
||
std::copy_n(constant, m, &y[particle_]);
|
||
#ifdef USE_BLAS_AT_FIRST_ORDER
|
||
dgemv("N", &m, &n, &one, ghx, &m, &yhat[particle__], &ONE, &one, &y[particle_], &ONE);
|
||
dgemv("N", &m, &q, &one, ghu, &m, &epsilon[particle___], &ONE, &one, &y[particle_], &ONE);
|
||
#endif
|
||
for (int variable = 0; variable < m; variable++)
|
||
{
|
||
int variable_ = variable + particle_;
|
||
// +ghx·ŷ+ghu·ε
|
||
#ifndef USE_BLAS_AT_FIRST_ORDER
|
||
for (int column = 0, column_ = 0; column < n; column++, column_ += m)
|
||
y[variable_] += ghx[variable+column_]*yhat[column+particle__];
|
||
for (int column = 0, column_ = 0; column < q; column++, column_ += m)
|
||
y[variable_] += ghu[variable+column_]*epsilon[column+particle___];
|
||
#endif
|
||
// +½ghxx·ŷ⊗ŷ
|
||
for (int i = 0; i < n*(n+1)/2; i++)
|
||
{
|
||
int i1 = particle__+ii1[i];
|
||
int i2 = particle__+ii2[i];
|
||
if (i1 == i2)
|
||
y[variable_] += .5*ghxx[variable+ii3[i]]*yhat[i1]*yhat[i1];
|
||
else
|
||
y[variable_] += ghxx[variable+ii3[i]]*yhat[i1]*yhat[i2];
|
||
}
|
||
// +½ghuu·ε⊗ε
|
||
for (int j = 0; j < q*(q+1)/2; j++)
|
||
{
|
||
int j1 = particle___+jj1[j];
|
||
int j2 = particle___+jj2[j];
|
||
if (j1 == j2)
|
||
y[variable_] += .5*ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j1];
|
||
else
|
||
y[variable_] += ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j2];
|
||
}
|
||
// +ghxu·ŷ⊗ε
|
||
for (int v = particle__, i = 0; v < particle__+n; v++)
|
||
for (int s = particle___; s < particle___+q; s++, i += m)
|
||
y[variable_] += ghxu[variable+i]*epsilon[s]*yhat[v];
|
||
}
|
||
}
|
||
}
|
||
|
||
void
|
||
mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
|
||
{
|
||
/*
|
||
prhs[0] yhat [double] n×s array, time t particles.
|
||
prhs[1] epsilon [double] q×s array, time t innovations.
|
||
prhs[2] ghx [double] m×n array, first order reduced form.
|
||
prhs[3] ghu [double] m×q array, first order reduced form.
|
||
prhs[4] constant [double] m×1 array, deterministic steady state + second order correction for the union of the states and observed variables.
|
||
prhs[5] ghxx [double] m×n² array, second order reduced form.
|
||
prhs[6] ghuu [double] m×q² array, second order reduced form.
|
||
prhs[7] ghxu [double] m×nq array, second order reduced form.
|
||
prhs[8] yhat_ [double] [OPTIONAL] n×s array, time t particles (pruning additional latent variables).
|
||
prhs[9] ss [double] [OPTIONAL] m×1 array, steady state for the union of the states and the observed variables (needed for the pruning mode).
|
||
|
||
prhs[8 or 10] [double] num of threads
|
||
|
||
plhs[0] y [double] m×s array, time t+1 particles.
|
||
plhs[1] y_ [double] m×s array, time t+1 particles for the pruning latent variables.
|
||
*/
|
||
|
||
// Check the number of input and output.
|
||
if (nrhs != 9 && nrhs != 11)
|
||
mexErrMsgTxt("Nine or eleven input arguments are required.");
|
||
|
||
if (nlhs > 2)
|
||
mexErrMsgTxt("Too many output arguments.");
|
||
|
||
auto check_input_real_dense_array = [=](int i)
|
||
{
|
||
if (!mxIsDouble(prhs[i]) || mxIsComplex(prhs[i]) || mxIsSparse(prhs[i]))
|
||
mexErrMsgTxt(("Input argument " + std::to_string(i+1) + " should be a real dense array").c_str());
|
||
};
|
||
|
||
for (int i = 0; i < 8; i++)
|
||
check_input_real_dense_array(i);
|
||
|
||
// Get dimensions.
|
||
size_t n = mxGetM(prhs[0]); // Number of states.
|
||
size_t s = mxGetN(prhs[0]); // Number of particles.
|
||
size_t q = mxGetM(prhs[1]); // Number of innovations.
|
||
size_t m = mxGetM(prhs[2]); // Number of elements in the union of states and observed variables.
|
||
// Check the dimensions.
|
||
if (s != mxGetN(prhs[1]) // Number of columns for epsilon
|
||
|| n != mxGetN(prhs[2]) // Number of columns for ghx
|
||
|| m != mxGetM(prhs[3]) // Number of rows for ghu
|
||
|| q != mxGetN(prhs[3]) // Number of columns for ghu
|
||
|| m != mxGetM(prhs[4]) // Number of rows for 2nd order constant correction + deterministic steady state
|
||
|| m != mxGetM(prhs[5]) // Number of rows for ghxx
|
||
|| n*n != mxGetN(prhs[5]) // Number of columns for ghxx
|
||
|| m != mxGetM(prhs[6]) // Number of rows for ghuu
|
||
|| q*q != mxGetN(prhs[6]) // Number of columns for ghuu
|
||
|| m != mxGetM(prhs[7]) // Number of rows for ghxu
|
||
|| n*q != mxGetN(prhs[7])) // Number of rows for ghxu
|
||
mexErrMsgTxt("Input dimension mismatch!.");
|
||
if (nrhs > 9)
|
||
{
|
||
for (int i = 8; i < 10; i++)
|
||
check_input_real_dense_array(i);
|
||
|
||
if (n != mxGetM(prhs[8]) // Number of rows for yhat_
|
||
|| s != mxGetN(prhs[8]) // Number of columns for yhat_
|
||
|| m != mxGetM(prhs[9])) // Number of rows for ss
|
||
mexErrMsgTxt("Input dimension mismatch!.");
|
||
}
|
||
|
||
// Get Input arrays.
|
||
const double *yhat = mxGetPr(prhs[0]);
|
||
const double *epsilon = mxGetPr(prhs[1]);
|
||
const double *ghx = mxGetPr(prhs[2]);
|
||
const double *ghu = mxGetPr(prhs[3]);
|
||
const double *constant = mxGetPr(prhs[4]);
|
||
const double *ghxx = mxGetPr(prhs[5]);
|
||
const double *ghuu = mxGetPr(prhs[6]);
|
||
const double *ghxu = mxGetPr(prhs[7]);
|
||
const double *yhat_ = nullptr, *ss = nullptr;
|
||
if (nrhs > 9)
|
||
{
|
||
yhat_ = mxGetPr(prhs[8]);
|
||
ss = mxGetPr(prhs[9]);
|
||
}
|
||
|
||
const mxArray *numthreads_mx = prhs[nrhs == 9 ? 8 : 10];
|
||
if (!(mxIsScalar(numthreads_mx) && mxIsNumeric(numthreads_mx)))
|
||
mexErrMsgTxt("Last argument should be a numeric scalar");
|
||
int numthreads = static_cast<int>(mxGetScalar(numthreads_mx));
|
||
if (numthreads <= 0)
|
||
mexErrMsgTxt("Last argument should be a positive integer");
|
||
|
||
#if defined(USE_BLAS_AT_FIRST_ORDER) && defined(MATLAB_MEX_FILE)
|
||
if (numthreads != 1)
|
||
mexErrMsgTxt("Parallelization is not possible when compiled with USE_BLAS_AT_FIRST_ORDER.");
|
||
#endif
|
||
if (nrhs == 9)
|
||
{
|
||
plhs[0] = mxCreateDoubleMatrix(m, s, mxREAL);
|
||
double *y = mxGetPr(plhs[0]);
|
||
ss2Iteration(y, yhat, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, static_cast<int>(m), static_cast<int>(n), static_cast<int>(q), static_cast<int>(s), numthreads);
|
||
}
|
||
else
|
||
{
|
||
plhs[0] = mxCreateDoubleMatrix(m, s, mxREAL);
|
||
plhs[1] = mxCreateDoubleMatrix(m, s, mxREAL);
|
||
double *y = mxGetPr(plhs[0]);
|
||
double *y_ = mxGetPr(plhs[1]);
|
||
ss2Iteration_pruning(y, y_, yhat, yhat_, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, ss, static_cast<int>(m), static_cast<int>(n), static_cast<int>(q), static_cast<int>(s), numthreads);
|
||
}
|
||
}
|