local state space iterations 2 DLL: various modernizations and simplifications

time-shift
Sébastien Villemot 2019-04-30 15:22:19 +02:00
parent 1199d4abae
commit faa3185bdf
No known key found for this signature in database
GPG Key ID: 2CECE9350ECEBE4A
1 changed files with 88 additions and 128 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright © 2010-2017 Dynare Team
* Copyright © 2010-2019 Dynare Team
*
* This file is part of Dynare.
*
@ -22,9 +22,9 @@
* using a second order approximation of the nonlinear state space model.
*/
#include <iostream>
#include <cstring>
#include <vector>
#include <algorithm>
#include <dynmex.h>
#include <dynblas.h>
@ -32,14 +32,12 @@
# include <omp.h>
#endif
using namespace std;
#define FIRST_ORDER_LOOP 1// Comment out this line to use mkl-blas instead of loops when computing ghx*yhat and ghu*epsilon
void
set_vector_of_indices(const int n, const int r, vector<int> &v1, vector<int> &v2, vector<int> &v3)
set_vector_of_indices(int n, int r, std::vector<int> &v1, std::vector<int> &v2, std::vector<int> &v3)
{
const int m = n*(n+1)/2;
int m = n*(n+1)/2;
v1.resize(m, 0);
v2.resize(m, 0);
v3.resize(m, 0);
@ -57,17 +55,16 @@ set_vector_of_indices(const int n, const int r, vector<int> &v1, vector<int> &v2
void
ss2Iteration_pruning(double *y2, double *y1, const double *yhat2, const double *yhat1, const double *epsilon,
double *ghx, double *ghu,
const double *ghx, const double *ghu,
const double *constant, const double *ghxx, const double *ghuu, const double *ghxu, const double *ss,
const blas_int m, const blas_int n, const blas_int q, const blas_int s, const int number_of_threads)
blas_int m, blas_int n, blas_int q, blas_int s, int number_of_threads)
{
#ifndef FIRST_ORDER_LOOP
const char transpose[2] = "N";
const double one = 1.0;
const blas_int ONE = 1;
#endif
vector<int> ii1, ii2, ii3;// vector indices for ghxx
vector<int> jj1, jj2, jj3;// vector indices for ghuu
std::vector<int> ii1, ii2, ii3;// vector indices for ghxx
std::vector<int> jj1, jj2, jj3;// vector indices for ghuu
set_vector_of_indices(n, m, ii1, ii2, ii3);
set_vector_of_indices(q, m, jj1, jj2, jj3);
#ifdef USE_OMP
@ -78,16 +75,16 @@ ss2Iteration_pruning(double *y2, double *y1, const double *yhat2, const double *
int particle_ = particle*m;
int particle__ = particle*n;
int particle___ = particle*q;
memcpy(&y2[particle_], &constant[0], m*sizeof(double));
memcpy(&y1[particle_], &ss[0], m*sizeof(double));
std::copy_n(constant, m, &y2[particle_]);
std::copy_n(ss, m, &y1[particle_]);
#ifndef FIRST_ORDER_LOOP
dgemv(transpose, &m, &n, &one, &ghx[0], &m, &yhat2[particle__], &ONE, &one, &y2[particle_], &ONE);
dgemv(transpose, &m, &q, &one, &ghu[0], &m, &epsilon[particle___], &ONE, &one, &y2[particle_], &ONE);
dgemv("N", &m, &n, &one, ghx, &m, &yhat2[particle__], &ONE, &one, &y2[particle_], &ONE);
dgemv("N", &m, &q, &one, ghu, &m, &epsilon[particle___], &ONE, &one, &y2[particle_], &ONE);
#endif
for (int variable = 0; variable < m; variable++)
{
int variable_ = variable + particle_;
// +ghx*yhat2+ghu*u
// +ghx·yhat2+ghu·u
#ifdef FIRST_ORDER_LOOP
for (int column = 0, column_ = 0; column < q; column++, column_ += m)
{
@ -98,39 +95,29 @@ ss2Iteration_pruning(double *y2, double *y1, const double *yhat2, const double *
y2[variable_] += ghu[i1]*epsilon[i3];
}
for (int column = q, column_ = q*m; column < n; column++, column_ += m)
{
y2[variable_] += ghx[variable+column_]*yhat2[column+particle__];
}
y2[variable_] += ghx[variable+column_]*yhat2[column+particle__];
#endif
// +ghxx*kron(yhat1,yhat1)
// +ghxx·yhat1⊗yhat1
for (int i = 0; i < n*(n+1)/2; i++)
{
int i1 = particle__+ii1[i];
int i2 = particle__+ii2[i];
if (i1 == i2)
{
y2[variable_] += .5*ghxx[variable+ii3[i]]*yhat1[i1]*yhat1[i1];
}
y2[variable_] += .5*ghxx[variable+ii3[i]]*yhat1[i1]*yhat1[i1];
else
{
y2[variable_] += ghxx[variable+ii3[i]]*yhat1[i1]*yhat1[i2];
}
y2[variable_] += ghxx[variable+ii3[i]]*yhat1[i1]*yhat1[i2];
}
// +ghuu*kron(u,u)
// +ghuu·u⊗u
for (int j = 0; j < q*(q+1)/2; j++)
{
int j1 = particle___+jj1[j];
int j2 = particle___+jj2[j];
if (j1 == j2)
{
y2[variable_] += .5*ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j1];
}
y2[variable_] += .5*ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j1];
else
{
y2[variable_] += ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j2];
}
y2[variable_] += ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j2];
}
// +ghxu*kron(yhat1,u)
// +ghxu·yhat1⊗u
for (int v = particle__, i = 0; v < particle__+n; v++)
for (int s = particle___; s < particle___+q; s++, i += m)
y2[variable_] += ghxu[variable+i]*epsilon[s]*yhat2[v];
@ -144,31 +131,28 @@ ss2Iteration_pruning(double *y2, double *y1, const double *yhat2, const double *
y1[variable_] += ghu[i1]*epsilon[i3];
}
for (int column = q, column_ = q*m; column < n; column++, column_ += m)
{
y1[variable_] += ghx[variable+column_]*yhat1[column+particle__];
}
y1[variable_] += ghx[variable+column_]*yhat1[column+particle__];
#endif
}
#ifndef FIRST_ORDER_LOOP
dgemv(transpose, &m, &n, &one, &ghx[0], &m, &yhat1[particle__], &ONE, &one, &y1[particle_], &ONE);
dgemv(transpose, &m, &q, &one, &ghu[0], &m, &epsilon[particle___], &ONE, &one, &y1[particle_], &ONE);
dgemv("N", &m, &n, &one, &ghx[0], &m, &yhat1[particle__], &ONE, &one, &y1[particle_], &ONE);
dgemv("N", &m, &q, &one, &ghu[0], &m, &epsilon[particle___], &ONE, &one, &y1[particle_], &ONE);
#endif
}
}
void
ss2Iteration(double *y, const double *yhat, const double *epsilon,
double *ghx, double *ghu,
const double *ghx, const double *ghu,
const double *constant, const double *ghxx, const double *ghuu, const double *ghxu,
const blas_int m, const blas_int n, const blas_int q, const blas_int s, const int number_of_threads)
blas_int m, blas_int n, blas_int q, blas_int s, int number_of_threads)
{
#ifndef FIRST_ORDER_LOOP
const char transpose[2] = "N";
const double one = 1.0;
const blas_int ONE = 1;
#endif
vector<int> ii1, ii2, ii3;// vector indices for ghxx
vector<int> jj1, jj2, jj3;// vector indices for ghuu
std::vector<int> ii1, ii2, ii3;// vector indices for ghxx
std::vector<int> jj1, jj2, jj3;// vector indices for ghuu
set_vector_of_indices(n, m, ii1, ii2, ii3);
set_vector_of_indices(q, m, jj1, jj2, jj3);
#ifdef USE_OMP
@ -179,15 +163,15 @@ ss2Iteration(double *y, const double *yhat, const double *epsilon,
int particle_ = particle*m;
int particle__ = particle*n;
int particle___ = particle*q;
memcpy(&y[particle_], &constant[0], m*sizeof(double));
std::copy_n(constant, m, &y[particle_]);
#ifndef FIRST_ORDER_LOOP
dgemv(transpose, &m, &n, &one, &ghx[0], &m, &yhat[particle__], &ONE, &one, &y[particle_], &ONE);
dgemv(transpose, &m, &q, &one, &ghu[0], &m, &epsilon[particle___], &ONE, &one, &y[particle_], &ONE);
dgemv("N", &m, &n, &one, ghx, &m, &yhat[particle__], &ONE, &one, &y[particle_], &ONE);
dgemv("N", &m, &q, &one, ghu, &m, &epsilon[particle___], &ONE, &one, &y[particle_], &ONE);
#endif
for (int variable = 0; variable < m; variable++)
{
int variable_ = variable + particle_;
// +ghx*yhat+ghu*u
// +ghx·yhat+ghu·u
#ifdef FIRST_ORDER_LOOP
for (int column = 0, column_ = 0; column < q; column++, column_ += m)
{
@ -198,39 +182,29 @@ ss2Iteration(double *y, const double *yhat, const double *epsilon,
y[variable_] += ghu[i1]*epsilon[i3];
}
for (int column = q, column_ = q*m; column < n; column++, column_ += m)
{
y[variable_] += ghx[variable+column_]*yhat[column+particle__];
}
y[variable_] += ghx[variable+column_]*yhat[column+particle__];
#endif
// +ghxx*kron(yhat,yhat)
// +ghxx·yhat⊗yhat
for (int i = 0; i < n*(n+1)/2; i++)
{
int i1 = particle__+ii1[i];
int i2 = particle__+ii2[i];
if (i1 == i2)
{
y[variable_] += .5*ghxx[variable+ii3[i]]*yhat[i1]*yhat[i1];
}
y[variable_] += .5*ghxx[variable+ii3[i]]*yhat[i1]*yhat[i1];
else
{
y[variable_] += ghxx[variable+ii3[i]]*yhat[i1]*yhat[i2];
}
y[variable_] += ghxx[variable+ii3[i]]*yhat[i1]*yhat[i2];
}
// +ghuu*kron(u,u)
// +ghuu·u⊗u
for (int j = 0; j < q*(q+1)/2; j++)
{
int j1 = particle___+jj1[j];
int j2 = particle___+jj2[j];
if (j1 == j2)
{
y[variable_] += .5*ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j1];
}
y[variable_] += .5*ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j1];
else
{
y[variable_] += ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j2];
}
y[variable_] += ghuu[variable+jj3[j]]*epsilon[j1]*epsilon[j2];
}
// +ghxu*kron(yhat,u)
// +ghxu·yhat⊗u
for (int v = particle__, i = 0; v < particle__+n; v++)
for (int s = particle___; s < particle___+q; s++, i += m)
y[variable_] += ghxu[variable+i]*epsilon[s]*yhat[v];
@ -242,31 +216,28 @@ void
mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
/*
** prhs[0] yhat [double] n*s array, time t particles.
** prhs[1] epsilon [double] q*s array, time t innovations.
** prhs[2] ghx [double] m*n array, first order reduced form.
** prhs[3] ghu [double] m*q array, first order reduced form.
** prhs[4] constant [double] m*1 array, deterministic steady state + second order correction for the union of the states and observed variables.
** prhs[5] ghxx [double] m*n^2 array, second order reduced form.
** prhs[6] ghuu [double] m*q^2 array, second order reduced form.
** prhs[7] ghxu [double] m*nq array, second order reduced form.
** prhs[8] yhat_ [double] [OPTIONAL] n*s array, time t particles (pruning additional latent variables).
** prhs[9] ss [double] [OPTIONAL] m*1 array, steady state for the union of the states and the observed variables (needed for the pruning mode).
**
** plhs[0] y [double] n*s array, time t+1 particles.
** plhs[1] y_ [double] n*s array, time t+1 particles for the pruning latent variables.
**
prhs[0] yhat [double] n×s array, time t particles.
prhs[1] epsilon [double] q×s array, time t innovations.
prhs[2] ghx [double] m×n array, first order reduced form.
prhs[3] ghu [double] m×q array, first order reduced form.
prhs[4] constant [double] m×1 array, deterministic steady state + second order correction for the union of the states and observed variables.
prhs[5] ghxx [double] m×n² array, second order reduced form.
prhs[6] ghuu [double] m×q² array, second order reduced form.
prhs[7] ghxu [double] m×nq array, second order reduced form.
prhs[8] yhat_ [double] [OPTIONAL] n×s array, time t particles (pruning additional latent variables).
prhs[9] ss [double] [OPTIONAL] m×1 array, steady state for the union of the states and the observed variables (needed for the pruning mode).
plhs[0] y [double] n×s array, time t+1 particles.
plhs[1] y_ [double] n×s array, time t+1 particles for the pruning latent variables.
*/
// Check the number of input and output.
if ((nrhs != 9) && (nrhs != 11))
{
mexErrMsgTxt("Eight or ten input arguments are required.");
}
if (nrhs != 9 && nrhs != 11)
mexErrMsgTxt("Eight or ten input arguments are required.");
if (nlhs > 2)
{
mexErrMsgTxt("Too many output arguments.");
}
mexErrMsgTxt("Too many output arguments.");
// Get dimensions.
size_t n = mxGetM(prhs[0]);// Number of states.
size_t s = mxGetN(prhs[0]);// Number of particles.
@ -275,43 +246,34 @@ mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
//mexPrintf("\n s (the number of column of yhat) is equal to %d.", s);
//mexPrintf("\n The number of column of epsilon is %d.", mxGetN(prhs[1]));
// Check the dimensions.
if (
(s != mxGetN(prhs[1])) // Number of columns for epsilon
|| (n != mxGetN(prhs[2])) // Number of columns for ghx
|| (m != mxGetM(prhs[3])) // Number of rows for ghu
|| (q != mxGetN(prhs[3])) // Number of columns for ghu
|| (m != mxGetM(prhs[4])) // Number of rows for 2nd order constant correction + deterministic steady state
|| (m != mxGetM(prhs[5])) // Number of rows for ghxx
|| (n*n != mxGetN(prhs[5])) // Number of columns for ghxx
|| (m != mxGetM(prhs[6])) // Number of rows for ghuu
|| (q*q != mxGetN(prhs[6])) // Number of columns for ghuu
|| (m != mxGetM(prhs[7])) // Number of rows for ghxu
|| (n*q != mxGetN(prhs[7])) // Number of rows for ghxu
)
{
mexErrMsgTxt("Input dimension mismatch!.");
}
if (s != mxGetN(prhs[1]) // Number of columns for epsilon
|| n != mxGetN(prhs[2]) // Number of columns for ghx
|| m != mxGetM(prhs[3]) // Number of rows for ghu
|| q != mxGetN(prhs[3]) // Number of columns for ghu
|| m != mxGetM(prhs[4]) // Number of rows for 2nd order constant correction + deterministic steady state
|| m != mxGetM(prhs[5]) // Number of rows for ghxx
|| n*n != mxGetN(prhs[5]) // Number of columns for ghxx
|| m != mxGetM(prhs[6]) // Number of rows for ghuu
|| q*q != mxGetN(prhs[6]) // Number of columns for ghuu
|| m != mxGetM(prhs[7]) // Number of rows for ghxu
|| n*q != mxGetN(prhs[7])) // Number of rows for ghxu
mexErrMsgTxt("Input dimension mismatch!.");
if (nrhs > 9)
{
if (
(n != mxGetM(prhs[8])) // Number of rows for yhat_
|| (s != mxGetN(prhs[8])) // Number of columns for yhat_
|| (m != mxGetM(prhs[9])) // Number of rows for ss
)
{
mexErrMsgTxt("Input dimension mismatch!.");
}
}
if (n != mxGetM(prhs[8]) // Number of rows for yhat_
|| s != mxGetN(prhs[8]) // Number of columns for yhat_
|| m != mxGetM(prhs[9])) // Number of rows for ss
mexErrMsgTxt("Input dimension mismatch!.");
// Get Input arrays.
double *yhat = mxGetPr(prhs[0]);
double *epsilon = mxGetPr(prhs[1]);
double *ghx = mxGetPr(prhs[2]);
double *ghu = mxGetPr(prhs[3]);
double *constant = mxGetPr(prhs[4]);
double *ghxx = mxGetPr(prhs[5]);
double *ghuu = mxGetPr(prhs[6]);
double *ghxu = mxGetPr(prhs[7]);
double *yhat_ = NULL, *ss = NULL;
const double *yhat = mxGetPr(prhs[0]);
const double *epsilon = mxGetPr(prhs[1]);
const double *ghx = mxGetPr(prhs[2]);
const double *ghu = mxGetPr(prhs[3]);
const double *constant = mxGetPr(prhs[4]);
const double *ghxx = mxGetPr(prhs[5]);
const double *ghuu = mxGetPr(prhs[6]);
const double *ghxu = mxGetPr(prhs[7]);
const double *yhat_ = nullptr, *ss = nullptr;
if (nrhs > 9)
{
yhat_ = mxGetPr(prhs[8]);
@ -320,19 +282,17 @@ mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
if (nrhs == 9)
{
int numthreads = static_cast<int>(mxGetScalar(prhs[8]));
double *y;
plhs[0] = mxCreateDoubleMatrix(m, s, mxREAL);
y = mxGetPr(plhs[0]);
double *y = mxGetPr(plhs[0]);
ss2Iteration(y, yhat, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, static_cast<int>(m), static_cast<int>(n), static_cast<int>(q), static_cast<int>(s), numthreads);
}
else
{
int numthreads = static_cast<int>(mxGetScalar(prhs[10]));
double *y, *y_;
plhs[0] = mxCreateDoubleMatrix(m, s, mxREAL);
plhs[1] = mxCreateDoubleMatrix(m, s, mxREAL);
y = mxGetPr(plhs[0]);
y_ = mxGetPr(plhs[1]);
double *y = mxGetPr(plhs[0]);
double *y_ = mxGetPr(plhs[1]);
ss2Iteration_pruning(y, y_, yhat, yhat_, epsilon, ghx, ghu, constant, ghxx, ghuu, ghxu, ss, static_cast<int>(m), static_cast<int>(n), static_cast<int>(q), static_cast<int>(s), numthreads);
}
}