6871 lines
250 KiB
C++
6871 lines
250 KiB
C++
/*
|
|
* Copyright (C) 2007-2017 Dynare Team
|
|
*
|
|
* This file is part of Dynare.
|
|
*
|
|
* Dynare is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Dynare is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Dynare. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
//define _GLIBCXX_USE_C99_FENV_TR1 1
|
|
//include <cfenv>
|
|
|
|
#include <cstring>
|
|
#include <ctime>
|
|
#include <sstream>
|
|
//#include <gsl/gsl_min.h>
|
|
//#include <minimize.h>
|
|
#include "SparseMatrix.hh"
|
|
|
|
#ifdef CUDA
|
|
# include "SparseMatrix_kernel.cu"
|
|
#endif
|
|
|
|
using namespace std;
|
|
#ifdef _MSC_VER
|
|
# include <windows.h>
|
|
HINSTANCE hinstLib;
|
|
|
|
# define UMFPACK_INFO 90
|
|
# define UMFPACK_CONTROL 20
|
|
/* used in all UMFPACK_report_* routines: */
|
|
# define UMFPACK_PRL 0 /* print level */
|
|
/* returned by all routines that use Info: */
|
|
# define UMFPACK_OK (0)
|
|
# define UMFPACK_STATUS 0 /* UMFPACK_OK, or other result */
|
|
|
|
typedef void (*t_umfpack_dl_free_numeric)(void **Numeric);
|
|
t_umfpack_dl_free_numeric umfpack_dl_free_numeric;
|
|
typedef void (*t_umfpack_dl_free_symbolic)(void **Symbolic);
|
|
t_umfpack_dl_free_symbolic umfpack_dl_free_symbolic;
|
|
typedef int64_t (*t_umfpack_dl_solve)(int64_t sys,
|
|
const int64_t Ap [],
|
|
const int64_t Ai [],
|
|
const double Ax [],
|
|
double X [],
|
|
const double B [],
|
|
void *Numeric,
|
|
const double Control [UMFPACK_CONTROL],
|
|
double Info [UMFPACK_INFO]);
|
|
t_umfpack_dl_solve umfpack_dl_solve;
|
|
typedef int64_t (*t_umfpack_dl_numeric)(const int64_t Ap [],
|
|
const int64_t Ai [],
|
|
const double Ax [],
|
|
void *Symbolic,
|
|
void **Numeric,
|
|
const double Control [UMFPACK_CONTROL],
|
|
double Info [UMFPACK_INFO]);
|
|
t_umfpack_dl_numeric umfpack_dl_numeric;
|
|
typedef int64_t (*t_umfpack_dl_symbolic)(int64_t n_row,
|
|
int64_t n_col,
|
|
const int64_t Ap [],
|
|
const int64_t Ai [],
|
|
const double Ax [],
|
|
void **Symbolic,
|
|
const double Control [UMFPACK_CONTROL],
|
|
double Info [UMFPACK_INFO]);
|
|
t_umfpack_dl_symbolic umfpack_dl_symbolic;
|
|
typedef void (*t_umfpack_dl_report_info)(const double Control [UMFPACK_CONTROL],
|
|
const double Info [UMFPACK_INFO]);
|
|
t_umfpack_dl_report_info umfpack_dl_report_info;
|
|
typedef void (*t_umfpack_dl_report_status)(const double Control [UMFPACK_CONTROL],
|
|
int64_t status);
|
|
t_umfpack_dl_report_status umfpack_dl_report_status;
|
|
typedef void (*t_umfpack_dl_defaults)(double Control [UMFPACK_CONTROL]);
|
|
t_umfpack_dl_defaults umfpack_dl_defaults;
|
|
|
|
#endif
|
|
|
|
dynSparseMatrix::dynSparseMatrix()
|
|
{
|
|
pivotva = NULL;
|
|
g_save_op = NULL;
|
|
g_nop_all = 0;
|
|
mem_mngr.init_Mem();
|
|
symbolic = true;
|
|
alt_symbolic = false;
|
|
alt_symbolic_count = 0;
|
|
max_u = 0;
|
|
min_u = 0x7FFFFFFF;
|
|
res1a = 9.0e60;
|
|
tbreak_g = 0;
|
|
start_compare = 0;
|
|
restart = 0;
|
|
IM_i.clear();
|
|
lu_inc_tol = 1e-10;
|
|
Symbolic = NULL;
|
|
Numeric = NULL;
|
|
#ifdef _MSC_VER
|
|
// Get a handle to the DLL module.
|
|
hinstLib = LoadLibrary(TEXT("libmwumfpack.dll"));
|
|
// If the handle is valid, try to get the function address.
|
|
if (hinstLib)
|
|
{
|
|
umfpack_dl_free_numeric = (t_umfpack_dl_free_numeric) GetProcAddress(hinstLib, "umfpack_dl_free_numeric");
|
|
if (!umfpack_dl_free_numeric)
|
|
{
|
|
mexPrintf("umfpack_dl_free_numeric not found\n");
|
|
ostringstream tmp;
|
|
tmp << " in libmwumfpack.dll, the function umfpack_dl_free_numeric is not found.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
umfpack_dl_free_symbolic = (t_umfpack_dl_free_symbolic) GetProcAddress(hinstLib, "umfpack_dl_free_symbolic");
|
|
if (!umfpack_dl_free_symbolic)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in libmwumfpack.dll, the function umfpack_dl_free_symbolic is not found.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
umfpack_dl_solve = (t_umfpack_dl_solve) GetProcAddress(hinstLib, "umfpack_dl_free_solve");
|
|
if (!umfpack_dl_solve)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in libmwumfpack.dll, the function umfpack_dl_solve is not found.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
umfpack_dl_numeric = (t_umfpack_dl_numeric) GetProcAddress(hinstLib, "umfpack_dl_numeric");
|
|
if (!umfpack_dl_numeric)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in libmwumfpack.dll, the function umfpack_dl_numeric is not found.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
umfpack_dl_symbolic = (t_umfpack_dl_symbolic) GetProcAddress(hinstLib, "umfpack_dl_symbolic");
|
|
if (!umfpack_dl_symbolic)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in libmwumfpack.dll, the function umfpack_dl_symbolic is not found.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
umfpack_dl_report_info = (t_umfpack_dl_report_info) GetProcAddress(hinstLib, "umfpack_dl_report_info");
|
|
if (!umfpack_dl_report_info)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in libmwumfpack.dll, the function umfpack_dl_report_info is not found.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
umfpack_dl_report_status = (t_umfpack_dl_report_status) GetProcAddress(hinstLib, "umfpack_dl_report_status");
|
|
if (!umfpack_dl_report_status)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in libmwumfpack.dll, the function umfpack_dl_report_status is not found.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
umfpack_dl_defaults = (t_umfpack_dl_defaults) GetProcAddress(hinstLib, "umfpack_dl_defaults");
|
|
if (!umfpack_dl_defaults)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in libmwumfpack.dll, the function umfpack_dl_defaults is not found.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
}
|
|
else
|
|
{
|
|
mexPrintf("library loading error\n");
|
|
ostringstream tmp;
|
|
tmp << " in main, libmwumfpack.dll not found. \n Check that \\Program files\\MATLAB\\RXXXXX\\bin\\win64 is in the current path.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
}
|
|
|
|
dynSparseMatrix::dynSparseMatrix(const int y_size_arg, const int y_kmin_arg, const int y_kmax_arg, const bool print_it_arg, const bool steady_state_arg, const int periods_arg,
|
|
const int minimal_solving_periods_arg, const double slowc_arg
|
|
#ifdef CUDA
|
|
, const int CUDA_device_arg, cublasHandle_t cublas_handle_arg, cusparseHandle_t cusparse_handle_arg, cusparseMatDescr_t descr_arg
|
|
#endif
|
|
) :
|
|
Evaluate(y_size_arg, y_kmin_arg, y_kmax_arg, print_it_arg, steady_state_arg, periods_arg, minimal_solving_periods_arg, slowc_arg)
|
|
{
|
|
pivotva = NULL;
|
|
g_save_op = NULL;
|
|
g_nop_all = 0;
|
|
mem_mngr.init_Mem();
|
|
symbolic = true;
|
|
alt_symbolic = false;
|
|
alt_symbolic_count = 0;
|
|
max_u = 0;
|
|
min_u = 0x7FFFFFFF;
|
|
res1a = 9.0e60;
|
|
tbreak_g = 0;
|
|
start_compare = 0;
|
|
restart = 0;
|
|
IM_i.clear();
|
|
lu_inc_tol = 1e-10;
|
|
Symbolic = NULL;
|
|
Numeric = NULL;
|
|
#ifdef CUDA
|
|
CUDA_device = CUDA_device_arg;
|
|
cublas_handle = cublas_handle_arg;
|
|
cusparse_handle = cusparse_handle_arg;
|
|
CUDA_descr = descr_arg;
|
|
#endif
|
|
#ifdef _MSC_VER
|
|
// Get a handle to the DLL module.
|
|
hinstLib = LoadLibrary(TEXT("libmwumfpack.dll"));
|
|
// If the handle is valid, try to get the function address.
|
|
if (hinstLib != NULL)
|
|
{
|
|
umfpack_dl_free_numeric = (t_umfpack_dl_free_numeric) GetProcAddress(hinstLib, "umfpack_dl_free_numeric");
|
|
if (!umfpack_dl_free_numeric)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in libmwumfpack.dll, the function umfpack_dl_free_numeric is not found.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
umfpack_dl_free_symbolic = (t_umfpack_dl_free_symbolic) GetProcAddress(hinstLib, "umfpack_dl_free_symbolic");
|
|
if (!umfpack_dl_free_symbolic)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in libmwumfpack.dll, the function umfpack_dl_free_symbolic is not found.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
umfpack_dl_report_info = (t_umfpack_dl_report_info) GetProcAddress(hinstLib, "umfpack_dl_report_info");
|
|
if (!umfpack_dl_report_info)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in libmwumfpack.dll, the function umfpack_dl_report_info is not found.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
umfpack_dl_solve = (t_umfpack_dl_solve) GetProcAddress(hinstLib, "umfpack_dl_solve");
|
|
if (!umfpack_dl_solve)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in libmwumfpack.dll, the function umfpack_dl_solve is not found.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
umfpack_dl_numeric = (t_umfpack_dl_numeric) GetProcAddress(hinstLib, "umfpack_dl_numeric");
|
|
if (!umfpack_dl_numeric)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in libmwumfpack.dll, the function umfpack_dl_numeric is not found.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
umfpack_dl_symbolic = (t_umfpack_dl_symbolic) GetProcAddress(hinstLib, "umfpack_dl_symbolic");
|
|
if (!umfpack_dl_symbolic)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in libmwumfpack.dll, the function umfpack_dl_symbolic is not found.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
umfpack_dl_report_status = (t_umfpack_dl_report_status) GetProcAddress(hinstLib, "umfpack_dl_report_status");
|
|
if (!umfpack_dl_report_status)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in libmwumfpack.dll, the function umfpack_dl_report_status is not found.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
umfpack_dl_defaults = (t_umfpack_dl_defaults) GetProcAddress(hinstLib, "umfpack_dl_defaults");
|
|
if (!umfpack_dl_defaults)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in libmwumfpack.dll, the function umfpack_dl_defaults is not found.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
}
|
|
else
|
|
{
|
|
mexPrintf("library loading error\n");
|
|
ostringstream tmp;
|
|
tmp << " in main, libmwumfpack.dll not found. \n Check that \\Program files\\MATLAB\\RXXXXX\\bin\\win64 in the current path.";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
}
|
|
|
|
int
|
|
dynSparseMatrix::NRow(int r)
|
|
{
|
|
return NbNZRow[r];
|
|
}
|
|
|
|
int
|
|
dynSparseMatrix::NCol(int c)
|
|
{
|
|
return NbNZCol[c];
|
|
}
|
|
|
|
int
|
|
dynSparseMatrix::At_Row(int r, NonZeroElem **first)
|
|
{
|
|
(*first) = FNZE_R[r];
|
|
return NbNZRow[r];
|
|
}
|
|
|
|
int
|
|
dynSparseMatrix::Union_Row(int row1, int row2)
|
|
{
|
|
NonZeroElem *first1, *first2;
|
|
int n1 = At_Row(row1, &first1);
|
|
int n2 = At_Row(row2, &first2);
|
|
int i1 = 0, i2 = 0, nb_elem = 0;
|
|
while (i1 < n1 && i2 < n2)
|
|
{
|
|
if (first1->c_index == first2->c_index)
|
|
{
|
|
nb_elem++;
|
|
i1++;
|
|
i2++;
|
|
first1 = first1->NZE_R_N;
|
|
first2 = first2->NZE_R_N;
|
|
}
|
|
else if (first1->c_index < first2->c_index)
|
|
{
|
|
nb_elem++;
|
|
i1++;
|
|
first1 = first1->NZE_R_N;
|
|
}
|
|
else
|
|
{
|
|
nb_elem++;
|
|
i2++;
|
|
first2 = first2->NZE_R_N;
|
|
}
|
|
}
|
|
return nb_elem;
|
|
}
|
|
|
|
int
|
|
dynSparseMatrix::At_Pos(int r, int c, NonZeroElem **first)
|
|
{
|
|
(*first) = FNZE_R[r];
|
|
while ((*first)->c_index != c)
|
|
(*first) = (*first)->NZE_R_N;
|
|
return NbNZRow[r];
|
|
}
|
|
|
|
int
|
|
dynSparseMatrix::At_Col(int c, NonZeroElem **first)
|
|
{
|
|
(*first) = FNZE_C[c];
|
|
return NbNZCol[c];
|
|
}
|
|
|
|
int
|
|
dynSparseMatrix::At_Col(int c, int lag, NonZeroElem **first)
|
|
{
|
|
(*first) = FNZE_C[c];
|
|
int i = 0;
|
|
while ((*first)->lag_index != lag && (*first))
|
|
(*first) = (*first)->NZE_C_N;
|
|
if ((*first))
|
|
{
|
|
NonZeroElem *firsta = (*first);
|
|
if (!firsta->NZE_C_N)
|
|
i++;
|
|
else
|
|
{
|
|
while (firsta->lag_index == lag && firsta->NZE_C_N)
|
|
{
|
|
firsta = firsta->NZE_C_N;
|
|
i++;
|
|
}
|
|
if (firsta->lag_index == lag)
|
|
i++;
|
|
}
|
|
}
|
|
return i;
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Delete(const int r, const int c)
|
|
{
|
|
NonZeroElem *first = FNZE_R[r], *firsta = NULL;
|
|
|
|
while (first->c_index != c)
|
|
{
|
|
firsta = first;
|
|
first = first->NZE_R_N;
|
|
}
|
|
if (firsta != NULL)
|
|
firsta->NZE_R_N = first->NZE_R_N;
|
|
if (first == FNZE_R[r])
|
|
FNZE_R[r] = first->NZE_R_N;
|
|
NbNZRow[r]--;
|
|
|
|
first = FNZE_C[c];
|
|
firsta = NULL;
|
|
while (first->r_index != r)
|
|
{
|
|
firsta = first;
|
|
first = first->NZE_C_N;
|
|
}
|
|
|
|
if (firsta != NULL)
|
|
firsta->NZE_C_N = first->NZE_C_N;
|
|
if (first == FNZE_C[c])
|
|
FNZE_C[c] = first->NZE_C_N;
|
|
|
|
u_liste.push_back(first->u_index);
|
|
mem_mngr.mxFree_NZE(first);
|
|
NbNZCol[c]--;
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Print(int Size, int *b)
|
|
{
|
|
int a, i, j, k, l;
|
|
mexPrintf(" ");
|
|
for (k = 0; k < Size*periods; k++)
|
|
mexPrintf("%-2d ", k);
|
|
mexPrintf(" | ");
|
|
for (k = 0; k < Size*periods; k++)
|
|
mexPrintf("%8d", k);
|
|
mexPrintf("\n");
|
|
for (i = 0; i < Size*periods; i++)
|
|
{
|
|
NonZeroElem *first = FNZE_R[i];
|
|
j = NbNZRow[i];
|
|
mexPrintf("%-2d ", i);
|
|
a = 0;
|
|
for (k = 0; k < j; k++)
|
|
{
|
|
for (l = 0; l < (first->c_index-a); l++)
|
|
mexPrintf(" ");
|
|
mexPrintf("%-2d ", first->u_index);
|
|
a = first->c_index+1;
|
|
first = first->NZE_R_N;
|
|
}
|
|
for (k = a; k < Size*periods; k++)
|
|
mexPrintf(" ");
|
|
mexPrintf("%-2d ", b[i]);
|
|
|
|
first = FNZE_R[i];
|
|
j = NbNZRow[i];
|
|
mexPrintf(" | %-2d ", i);
|
|
a = 0;
|
|
for (k = 0; k < j; k++)
|
|
{
|
|
for (l = 0; l < (first->c_index-a); l++)
|
|
mexPrintf(" ");
|
|
mexPrintf("%8.4f", double (u[first->u_index]));
|
|
a = first->c_index+1;
|
|
first = first->NZE_R_N;
|
|
}
|
|
for (k = a; k < Size*periods; k++)
|
|
mexPrintf(" ");
|
|
mexPrintf("%8.4f", double (u[b[i]]));
|
|
mexPrintf("\n");
|
|
}
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Insert(const int r, const int c, const int u_index, const int lag_index)
|
|
{
|
|
NonZeroElem *firstn, *first, *firsta, *a;
|
|
firstn = mem_mngr.mxMalloc_NZE();
|
|
first = FNZE_R[r];
|
|
firsta = NULL;
|
|
while (first->c_index < c && (a = first->NZE_R_N))
|
|
{
|
|
firsta = first;
|
|
first = a;
|
|
}
|
|
firstn->u_index = u_index;
|
|
firstn->r_index = r;
|
|
firstn->c_index = c;
|
|
firstn->lag_index = lag_index;
|
|
if (first->c_index > c)
|
|
{
|
|
if (first == FNZE_R[r])
|
|
FNZE_R[r] = firstn;
|
|
if (firsta != NULL)
|
|
firsta->NZE_R_N = firstn;
|
|
firstn->NZE_R_N = first;
|
|
}
|
|
else
|
|
{
|
|
first->NZE_R_N = firstn;
|
|
firstn->NZE_R_N = NULL;
|
|
}
|
|
NbNZRow[r]++;
|
|
first = FNZE_C[c];
|
|
firsta = NULL;
|
|
while (first->r_index < r && (a = first->NZE_C_N))
|
|
{
|
|
firsta = first;
|
|
first = a;
|
|
}
|
|
if (first->r_index > r)
|
|
{
|
|
if (first == FNZE_C[c])
|
|
FNZE_C[c] = firstn;
|
|
if (firsta != NULL)
|
|
firsta->NZE_C_N = firstn;
|
|
firstn->NZE_C_N = first;
|
|
}
|
|
else
|
|
{
|
|
first->NZE_C_N = firstn;
|
|
firstn->NZE_C_N = NULL;
|
|
}
|
|
|
|
NbNZCol[c]++;
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Close_SaveCode()
|
|
{
|
|
SaveCode.close();
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Read_SparseMatrix(string file_name, const int Size, int periods, int y_kmin, int y_kmax, bool two_boundaries, int stack_solve_algo, int solve_algo)
|
|
{
|
|
unsigned int eq, var;
|
|
int lag;
|
|
mem_mngr.fixe_file_name(file_name);
|
|
/*mexPrintf("steady_state=%d, size=%d, solve_algo=%d, stack_solve_algo=%d, two_boundaries=%d\n",steady_state, Size, solve_algo, stack_solve_algo, two_boundaries);
|
|
mexEvalString("drawnow;");*/
|
|
if (!SaveCode.is_open())
|
|
{
|
|
if (steady_state)
|
|
SaveCode.open(file_name + "/model/bytecode/static.bin", ios::in | ios::binary);
|
|
else
|
|
SaveCode.open(file_name + "/model/bytecode/dynamic.bin", ios::in | ios::binary);
|
|
if (!SaveCode.is_open())
|
|
{
|
|
ostringstream tmp;
|
|
if (steady_state)
|
|
tmp << " in Read_SparseMatrix, " << file_name << "/model/bytecode/static.bin cannot be opened\n";
|
|
else
|
|
tmp << " in Read_SparseMatrix, " << file_name << "/model/bytecode/dynamic.bin cannot be opened\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
}
|
|
IM_i.clear();
|
|
if (two_boundaries)
|
|
{
|
|
if (stack_solve_algo == 5)
|
|
{
|
|
for (int i = 0; i < u_count_init-Size; i++)
|
|
{
|
|
int val;
|
|
SaveCode.read(reinterpret_cast<char *>(&eq), sizeof(eq));
|
|
SaveCode.read(reinterpret_cast<char *>(&var), sizeof(var));
|
|
SaveCode.read(reinterpret_cast<char *>(&lag), sizeof(lag));
|
|
SaveCode.read(reinterpret_cast<char *>(&val), sizeof(val));
|
|
IM_i[make_pair(make_pair(eq, var), lag)] = val;
|
|
}
|
|
for (int j = 0; j < Size; j++)
|
|
IM_i[make_pair(make_pair(j, Size*(periods+y_kmax)), 0)] = j;
|
|
}
|
|
else if (stack_solve_algo >= 0 && stack_solve_algo <= 4)
|
|
{
|
|
for (int i = 0; i < u_count_init-Size; i++)
|
|
{
|
|
int val;
|
|
SaveCode.read(reinterpret_cast<char *>(&eq), sizeof(eq));
|
|
SaveCode.read(reinterpret_cast<char *>(&var), sizeof(var));
|
|
SaveCode.read(reinterpret_cast<char *>(&lag), sizeof(lag));
|
|
SaveCode.read(reinterpret_cast<char *>(&val), sizeof(val));
|
|
IM_i[make_pair(make_pair(var - lag*Size, -lag), eq)] = val;
|
|
}
|
|
for (int j = 0; j < Size; j++)
|
|
IM_i[make_pair(make_pair(Size*(periods+y_kmax), 0), j)] = j;
|
|
}
|
|
else if (stack_solve_algo == 7)
|
|
{
|
|
for (int i = 0; i < u_count_init-Size; i++)
|
|
{
|
|
int val;
|
|
SaveCode.read(reinterpret_cast<char *>(&eq), sizeof(eq));
|
|
SaveCode.read(reinterpret_cast<char *>(&var), sizeof(var));
|
|
SaveCode.read(reinterpret_cast<char *>(&lag), sizeof(lag));
|
|
SaveCode.read(reinterpret_cast<char *>(&val), sizeof(val));
|
|
IM_i[make_pair(make_pair(eq, lag), var - lag * Size)] = val;
|
|
}
|
|
for (int j = 0; j < Size; j++)
|
|
IM_i[make_pair(make_pair(Size*(periods+y_kmax), 0), j)] = j;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if ((stack_solve_algo == 5 && !steady_state) || (solve_algo == 5 && steady_state))
|
|
{
|
|
for (int i = 0; i < u_count_init; i++)
|
|
{
|
|
int val;
|
|
SaveCode.read(reinterpret_cast<char *>(&eq), sizeof(eq));
|
|
SaveCode.read(reinterpret_cast<char *>(&var), sizeof(var));
|
|
SaveCode.read(reinterpret_cast<char *>(&lag), sizeof(lag));
|
|
SaveCode.read(reinterpret_cast<char *>(&val), sizeof(val));
|
|
IM_i[make_pair(make_pair(eq, var), lag)] = val;
|
|
}
|
|
}
|
|
else if (((stack_solve_algo >= 0 || stack_solve_algo <= 4) && !steady_state) || ((solve_algo >= 6 || solve_algo <= 8) && steady_state))
|
|
{
|
|
for (int i = 0; i < u_count_init; i++)
|
|
{
|
|
int val;
|
|
SaveCode.read(reinterpret_cast<char *>(&eq), sizeof(eq));
|
|
SaveCode.read(reinterpret_cast<char *>(&var), sizeof(var));
|
|
SaveCode.read(reinterpret_cast<char *>(&lag), sizeof(lag));
|
|
SaveCode.read(reinterpret_cast<char *>(&val), sizeof(val));
|
|
IM_i[make_pair(make_pair(var - lag*Size, -lag), eq)] = val;
|
|
}
|
|
}
|
|
}
|
|
index_vara = (int *) mxMalloc(Size*(periods+y_kmin+y_kmax)*sizeof(int));
|
|
test_mxMalloc(index_vara, __LINE__, __FILE__, __func__, Size*(periods+y_kmin+y_kmax)*sizeof(int));
|
|
for (int j = 0; j < Size; j++)
|
|
SaveCode.read(reinterpret_cast<char *>(&index_vara[j]), sizeof(*index_vara));
|
|
if (periods+y_kmin+y_kmax > 1)
|
|
for (int i = 1; i < periods+y_kmin+y_kmax; i++)
|
|
{
|
|
for (int j = 0; j < Size; j++)
|
|
index_vara[j+Size*i] = index_vara[j+Size*(i-1)] + y_size;
|
|
}
|
|
index_equa = (int *) mxMalloc(Size*sizeof(int));
|
|
test_mxMalloc(index_equa, __LINE__, __FILE__, __func__, Size*sizeof(int));
|
|
for (int j = 0; j < Size; j++)
|
|
SaveCode.read(reinterpret_cast<char *>(&index_equa[j]), sizeof(*index_equa));
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Simple_Init(int Size, map<pair<pair<int, int>, int>, int> &IM, bool &zero_solution)
|
|
{
|
|
int i, eq, var, lag;
|
|
map<pair<pair<int, int>, int>, int>::iterator it4;
|
|
NonZeroElem *first;
|
|
pivot = (int *) mxMalloc(Size*sizeof(int));
|
|
test_mxMalloc(pivot, __LINE__, __FILE__, __func__, Size*sizeof(int));
|
|
pivot_save = (int *) mxMalloc(Size*sizeof(int));
|
|
test_mxMalloc(pivot_save, __LINE__, __FILE__, __func__, Size*sizeof(int));
|
|
pivotk = (int *) mxMalloc(Size*sizeof(int));
|
|
test_mxMalloc(pivotk, __LINE__, __FILE__, __func__, Size*sizeof(int));
|
|
pivotv = (double *) mxMalloc(Size*sizeof(double));
|
|
test_mxMalloc(pivotv, __LINE__, __FILE__, __func__, Size*sizeof(double));
|
|
pivotva = (double *) mxMalloc(Size*sizeof(double));
|
|
test_mxMalloc(pivotva, __LINE__, __FILE__, __func__, Size*sizeof(double));
|
|
b = (int *) mxMalloc(Size*sizeof(int));
|
|
test_mxMalloc(b, __LINE__, __FILE__, __func__, Size*sizeof(int));
|
|
line_done = (bool *) mxMalloc(Size*sizeof(bool));
|
|
test_mxMalloc(line_done, __LINE__, __FILE__, __func__, Size*sizeof(bool));
|
|
|
|
mem_mngr.init_CHUNK_BLCK_SIZE(u_count);
|
|
g_save_op = NULL;
|
|
g_nop_all = 0;
|
|
i = Size*sizeof(NonZeroElem *);
|
|
FNZE_R = (NonZeroElem **) mxMalloc(i);
|
|
test_mxMalloc(FNZE_R, __LINE__, __FILE__, __func__, i);
|
|
FNZE_C = (NonZeroElem **) mxMalloc(i);
|
|
test_mxMalloc(FNZE_C, __LINE__, __FILE__, __func__, i);
|
|
NonZeroElem **temp_NZE_R = (NonZeroElem **) mxMalloc(i);
|
|
test_mxMalloc(temp_NZE_R, __LINE__, __FILE__, __func__, i);
|
|
NonZeroElem **temp_NZE_C = (NonZeroElem **) mxMalloc(i);
|
|
test_mxMalloc(temp_NZE_C, __LINE__, __FILE__, __func__, i);
|
|
i = Size*sizeof(int);
|
|
NbNZRow = (int *) mxMalloc(i);
|
|
test_mxMalloc(NbNZRow, __LINE__, __FILE__, __func__, i);
|
|
NbNZCol = (int *) mxMalloc(i);
|
|
test_mxMalloc(NbNZCol, __LINE__, __FILE__, __func__, i);
|
|
it4 = IM.begin();
|
|
eq = -1;
|
|
for (i = 0; i < Size; i++)
|
|
{
|
|
line_done[i] = 0;
|
|
FNZE_C[i] = NULL;
|
|
FNZE_R[i] = NULL;
|
|
temp_NZE_C[i] = 0;
|
|
temp_NZE_R[i] = 0;
|
|
NbNZRow[i] = 0;
|
|
NbNZCol[i] = 0;
|
|
}
|
|
int u_count1 = Size;
|
|
while (it4 != IM.end())
|
|
{
|
|
var = it4->first.first.second;
|
|
eq = it4->first.first.first;
|
|
lag = it4->first.second;
|
|
if (lag == 0) /*Build the index for sparse matrix containing the jacobian : u*/
|
|
{
|
|
NbNZRow[eq]++;
|
|
NbNZCol[var]++;
|
|
first = mem_mngr.mxMalloc_NZE();
|
|
first->NZE_C_N = NULL;
|
|
first->NZE_R_N = NULL;
|
|
first->u_index = u_count1;
|
|
first->r_index = eq;
|
|
first->c_index = var;
|
|
first->lag_index = lag;
|
|
if (FNZE_R[eq] == NULL)
|
|
FNZE_R[eq] = first;
|
|
if (FNZE_C[var] == NULL)
|
|
FNZE_C[var] = first;
|
|
if (temp_NZE_R[eq] != NULL)
|
|
temp_NZE_R[eq]->NZE_R_N = first;
|
|
if (temp_NZE_C[var] != NULL)
|
|
temp_NZE_C[var]->NZE_C_N = first;
|
|
temp_NZE_R[eq] = first;
|
|
temp_NZE_C[var] = first;
|
|
u_count1++;
|
|
}
|
|
it4++;
|
|
}
|
|
double cum_abs_sum = 0;
|
|
for (int i = 0; i < Size; i++)
|
|
{
|
|
b[i] = i;
|
|
cum_abs_sum += fabs(u[i]);
|
|
}
|
|
if (cum_abs_sum < 1e-20)
|
|
zero_solution = true;
|
|
else
|
|
zero_solution = false;
|
|
|
|
mxFree(temp_NZE_R);
|
|
mxFree(temp_NZE_C);
|
|
u_count = u_count1;
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Init_Matlab_Sparse_Simple(int Size, map<pair<pair<int, int>, int>, int> &IM, mxArray *A_m, mxArray *b_m, bool &zero_solution, mxArray *x0_m)
|
|
{
|
|
int eq, var;
|
|
double *b = mxGetPr(b_m);
|
|
if (!b)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, can't retrieve b vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
double *x0 = mxGetPr(x0_m);
|
|
if (!x0)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, can't retrieve x0 vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
mwIndex *Ai = mxGetIr(A_m);
|
|
if (!Ai)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, can't allocate Ai index vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
mwIndex *Aj = mxGetJc(A_m);
|
|
if (!Aj)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, can't allocate Aj index vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
double *A = mxGetPr(A_m);
|
|
if (!A)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, can't retrieve A matrix\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
map<pair<pair<int, int>, int>, int>::iterator it4;
|
|
for (int i = 0; i < y_size*(periods+y_kmin); i++)
|
|
ya[i] = y[i];
|
|
#ifdef DEBUG
|
|
unsigned int max_nze = mxGetNzmax(A_m);
|
|
#endif
|
|
unsigned int NZE = 0;
|
|
int last_var = 0;
|
|
double cum_abs_sum = 0;
|
|
for (int i = 0; i < Size; i++)
|
|
{
|
|
b[i] = u[i];
|
|
cum_abs_sum += fabs(b[i]);
|
|
x0[i] = y[i];
|
|
}
|
|
if (cum_abs_sum < 1e-20)
|
|
zero_solution = true;
|
|
else
|
|
zero_solution = false;
|
|
|
|
Aj[0] = 0;
|
|
last_var = 0;
|
|
it4 = IM.begin();
|
|
while (it4 != IM.end())
|
|
{
|
|
var = it4->first.first.first;
|
|
if (var != last_var)
|
|
{
|
|
Aj[1+last_var ] = NZE;
|
|
last_var = var;
|
|
}
|
|
eq = it4->first.second;
|
|
int index = it4->second;
|
|
#ifdef DEBUG
|
|
if (index < 0 || index >= u_count_alloc || index > Size + Size*Size)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, index (" << index << ") out of range for u vector max = " << Size+Size*Size << " allocated = " << u_count_alloc << "\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (NZE >= max_nze)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, exceeds the capacity of A_m sparse matrix\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
A[NZE] = u[index];
|
|
Ai[NZE] = eq;
|
|
NZE++;
|
|
#ifdef DEBUG
|
|
if (eq < 0 || eq >= Size)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, index (" << eq << ") out of range for b vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (var < 0 || var >= Size)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, index (" << var << ") out of range for index_vara vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (index_vara[var] < 0 || index_vara[var] >= y_size)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, index (" << index_vara[var] << ") out of range for y vector max=" << y_size << " (0)\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
it4++;
|
|
}
|
|
Aj[Size] = NZE;
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Init_UMFPACK_Sparse_Simple(int Size, map<pair<pair<int, int>, int>, int> &IM, SuiteSparse_long **Ap, SuiteSparse_long **Ai, double **Ax, double **b, bool &zero_solution, mxArray *x0_m)
|
|
{
|
|
int eq, var;
|
|
*b = (double *) mxMalloc(Size * sizeof(double));
|
|
test_mxMalloc(*b, __LINE__, __FILE__, __func__, Size * sizeof(double));
|
|
if (!(*b))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, can't retrieve b vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
double *x0 = mxGetPr(x0_m);
|
|
if (!x0)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse_Simple, can't retrieve x0 vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
*Ap = (SuiteSparse_long *) mxMalloc((Size+1) * sizeof(SuiteSparse_long));
|
|
test_mxMalloc(*Ap, __LINE__, __FILE__, __func__, (Size+1) * sizeof(SuiteSparse_long));
|
|
if (!(*Ap))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, can't allocate Ap index vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
size_t prior_nz = IM.size();
|
|
*Ai = (SuiteSparse_long *) mxMalloc(prior_nz * sizeof(SuiteSparse_long));
|
|
test_mxMalloc(*Ai, __LINE__, __FILE__, __func__, prior_nz * sizeof(SuiteSparse_long));
|
|
if (!(*Ai))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, can't allocate Ai index vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
*Ax = (double *) mxMalloc(prior_nz * sizeof(double));
|
|
test_mxMalloc(*Ax, __LINE__, __FILE__, __func__, prior_nz * sizeof(double));
|
|
if (!(*Ax))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, can't retrieve Ax matrix\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
map<pair<pair<int, int>, int>, int>::iterator it4;
|
|
for (int i = 0; i < Size; i++)
|
|
{
|
|
int eq = index_vara[i];
|
|
ya[eq+it_*y_size] = y[eq+it_*y_size];
|
|
}
|
|
#ifdef DEBUG
|
|
unsigned int max_nze = prior_nz;//mxGetNzmax(A_m);
|
|
#endif
|
|
unsigned int NZE = 0;
|
|
int last_var = 0;
|
|
double cum_abs_sum = 0;
|
|
for (int i = 0; i < Size; i++)
|
|
{
|
|
(*b)[i] = u[i];
|
|
cum_abs_sum += fabs((*b)[i]);
|
|
x0[i] = y[i];
|
|
}
|
|
if (cum_abs_sum < 1e-20)
|
|
zero_solution = true;
|
|
else
|
|
zero_solution = false;
|
|
|
|
(*Ap)[0] = 0;
|
|
last_var = 0;
|
|
it4 = IM.begin();
|
|
while (it4 != IM.end())
|
|
{
|
|
var = it4->first.first.first;
|
|
if (var != last_var)
|
|
{
|
|
(*Ap)[1+last_var ] = NZE;
|
|
last_var = var;
|
|
}
|
|
eq = it4->first.second;
|
|
int index = it4->second;
|
|
#ifdef DEBUG
|
|
if (index < 0 || index >= u_count_alloc || index > Size + Size*Size)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, index (" << index << ") out of range for u vector max = " << Size+Size*Size << " allocated = " << u_count_alloc << "\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (NZE >= max_nze)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, exceeds the capacity of A_m sparse matrix\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
(*Ax)[NZE] = u[index];
|
|
(*Ai)[NZE] = eq;
|
|
NZE++;
|
|
#ifdef DEBUG
|
|
if (eq < 0 || eq >= Size)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, index (" << eq << ") out of range for b vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (var < 0 || var >= Size)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, index (" << var << ") out of range for index_vara vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (index_vara[var] < 0 || index_vara[var] >= y_size)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, index (" << index_vara[var] << ") out of range for y vector max=" << y_size << " (0)\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
it4++;
|
|
}
|
|
(*Ap)[Size] = NZE;
|
|
}
|
|
|
|
int
|
|
dynSparseMatrix::find_exo_num(vector<s_plan> sconstrained_extended_path, int value)
|
|
{
|
|
int res = -1;
|
|
int i = 0;
|
|
for (vector<s_plan>::iterator it = sconstrained_extended_path.begin(); it != sconstrained_extended_path.end(); it++, i++)
|
|
if (it->exo_num == value)
|
|
{
|
|
res = i;
|
|
break;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
int
|
|
dynSparseMatrix::find_int_date(vector<pair<int, double> > per_value, int value)
|
|
{
|
|
int res = -1;
|
|
int i = 0;
|
|
for (vector<pair<int, double> >::iterator it = per_value.begin(); it != per_value.end(); it++, i++)
|
|
if (it->first == value)
|
|
{
|
|
res = i;
|
|
break;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Init_UMFPACK_Sparse(int periods, int y_kmin, int y_kmax, int Size, map<pair<pair<int, int>, int>, int> &IM, SuiteSparse_long **Ap, SuiteSparse_long **Ai, double **Ax, double **b, mxArray *x0_m, vector_table_conditional_local_type vector_table_conditional_local, int block_num)
|
|
{
|
|
int t, eq, var, lag, ti_y_kmin, ti_y_kmax;
|
|
double *jacob_exo;
|
|
int row_x = 0;
|
|
#ifdef DEBUG
|
|
int col_x;
|
|
#endif
|
|
int n = periods * Size;
|
|
*b = (double *) mxMalloc(n * sizeof(double));
|
|
if (!(*b))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, can't retrieve b vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
double *x0 = mxGetPr(x0_m);
|
|
if (!x0)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse_Simple, can't retrieve x0 vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
*Ap = (SuiteSparse_long *) mxMalloc((n+1) * sizeof(SuiteSparse_long));
|
|
test_mxMalloc(*Ap, __LINE__, __FILE__, __func__, (n+1) * sizeof(SuiteSparse_long));
|
|
if (!(*Ap))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, can't allocate Ap index vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
size_t prior_nz = IM.size() * periods;
|
|
*Ai = (SuiteSparse_long *) mxMalloc(prior_nz * sizeof(SuiteSparse_long));
|
|
test_mxMalloc(*Ai, __LINE__, __FILE__, __func__, prior_nz * sizeof(SuiteSparse_long));
|
|
if (!(*Ai))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, can't allocate Ai index vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
*Ax = (double *) mxMalloc(prior_nz * sizeof(double));
|
|
test_mxMalloc(*Ax, __LINE__, __FILE__, __func__, prior_nz * sizeof(double));
|
|
if (!(*Ax))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, can't retrieve Ax matrix\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
map<pair<pair<int, int>, int>, int>::iterator it4, it5;
|
|
for (int i = 0; i < y_size*(periods+y_kmin); i++)
|
|
ya[i] = y[i];
|
|
#ifdef DEBUG
|
|
unsigned int max_nze = prior_nz; //mxGetNzmax(A_m);
|
|
#endif
|
|
unsigned int NZE = 0;
|
|
int last_var = 0;
|
|
for (int i = 0; i < periods*Size; i++)
|
|
{
|
|
(*b)[i] = 0;
|
|
x0[i] = y[index_vara[Size*y_kmin+i]];
|
|
}
|
|
if (vector_table_conditional_local.size())
|
|
{
|
|
jacob_exo = mxGetPr(jacobian_exo_block[block_num]);
|
|
row_x = mxGetM(jacobian_exo_block[block_num]);
|
|
#ifdef DEBUG
|
|
col_x = mxGetN(jacobian_exo_block[block_num]);
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
jacob_exo = NULL;
|
|
}
|
|
#ifdef DEBUG
|
|
int local_index;
|
|
#endif
|
|
|
|
bool fliped = false;
|
|
bool fliped_exogenous_derivatives_updated = false;
|
|
int flip_exo;
|
|
(*Ap)[0] = 0;
|
|
for (t = 0; t < periods; t++)
|
|
{
|
|
last_var = -1;
|
|
it4 = IM.begin();
|
|
var = 0;
|
|
while (it4 != IM.end())
|
|
{
|
|
var = it4->first.first.first;
|
|
#ifdef DEBUG
|
|
if (var < 0 || var >= Size)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, var (" << var << ") out of range\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
eq = it4->first.second+Size*t;
|
|
#ifdef DEBUG
|
|
if (eq < 0 || eq >= Size)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, eq (" << eq << ") out of range\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
lag = -it4->first.first.second;
|
|
int index = it4->second+ (t-lag) * u_count_init;
|
|
if (var != last_var)
|
|
{
|
|
(*Ap)[1+last_var + t * Size] = NZE;
|
|
last_var = var;
|
|
if (var < Size*(periods+y_kmax))
|
|
{
|
|
if (t == 0 && vector_table_conditional_local.size())
|
|
{
|
|
fliped = vector_table_conditional_local[var].is_cond;
|
|
fliped_exogenous_derivatives_updated = false;
|
|
}
|
|
else
|
|
fliped = false;
|
|
}
|
|
else
|
|
fliped = false;
|
|
}
|
|
if (fliped)
|
|
{
|
|
if ((t == 0) && (var < (periods+y_kmax)*Size) && (lag == 0) && (vector_table_conditional_local.size()))
|
|
{
|
|
flip_exo = vector_table_conditional_local[var].var_exo;
|
|
#ifdef DEBUG
|
|
local_index = eq;
|
|
#endif
|
|
if (!fliped_exogenous_derivatives_updated)
|
|
{
|
|
fliped_exogenous_derivatives_updated = true;
|
|
for (int k = 0; k < row_x; k++)
|
|
{
|
|
if (jacob_exo[k + row_x*flip_exo] != 0)
|
|
{
|
|
(*Ax)[NZE] = jacob_exo[k + row_x*flip_exo];
|
|
(*Ai)[NZE] = k;
|
|
NZE++;
|
|
|
|
#ifdef DEBUG
|
|
if (local_index < 0 || local_index >= Size * periods)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, index (" << local_index << ") out of range for b vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (k + row_x*flip_exo < 0 || k + row_x*flip_exo >= row_x * col_x)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, index (" << var+Size*(y_kmin+t+lag) << ") out of range for jacob_exo vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (t+y_kmin+flip_exo*nb_row_x < 0 || t+y_kmin+flip_exo*nb_row_x >= nb_row_x * this->col_x)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, index (" << index_vara[var+Size*(y_kmin+t+lag)] << ") out of range for x vector max=" << nb_row_x * this->col_x << "\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
u[k] -= jacob_exo[k + row_x*flip_exo] * x[t+y_kmin+flip_exo*nb_row_x];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
/*if (t==0)
|
|
{
|
|
if (min_lag > lag)
|
|
min_lag = lag;
|
|
if (max_lag < lag)
|
|
max_lag = lag;
|
|
}*/
|
|
|
|
if (var < (periods+y_kmax)*Size)
|
|
{
|
|
ti_y_kmin = -min(t, y_kmin);
|
|
ti_y_kmax = min(periods-(t +1), y_kmax);
|
|
int ti_new_y_kmax = min(t, y_kmax);
|
|
int ti_new_y_kmin = -min(periods-(t+1), y_kmin);
|
|
if (lag <= ti_new_y_kmax && lag >= ti_new_y_kmin) /*Build the index for sparse matrix containing the jacobian : u*/
|
|
{
|
|
#ifdef DEBUG
|
|
if (index < 0 || index >= u_count_alloc || index > Size + Size*Size)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, index (" << index << ") out of range for u vector max = " << Size+Size*Size << " allocated = " << u_count_alloc << "\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (NZE >= max_nze)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, exceeds the capacity of A_m sparse matrix\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
if ((!fliped /*|| lag != 0*/) /*&& (!(vector_table_conditional_local[eq-lag*Size].is_cond && (t-lag == 0)))*/)
|
|
{
|
|
(*Ax)[NZE] = u[index];
|
|
(*Ai)[NZE] = eq - lag * Size;
|
|
NZE++;
|
|
}
|
|
else /*if (fliped)*/
|
|
{
|
|
#ifdef DEBUG
|
|
if (eq - lag * Size < 0 || eq - lag * Size >= Size * periods)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, index (" << eq - lag * Size << ") out of range for b vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (var+Size*(y_kmin+t) < 0 || var+Size*(y_kmin+t) >= Size*(periods+y_kmin+y_kmax))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, index (" << var+Size*(y_kmin+t) << ") out of range for index_vara vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (index_vara[var+Size*(y_kmin+t /*+lag*/)] < 0 || index_vara[var+Size*(y_kmin+t /*+lag*/)] >= y_size*(periods+y_kmin+y_kmax))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, index (" << index_vara[var+Size*(y_kmin+t /*+lag*/)] << ") out of range for y vector max=" << y_size*(periods+y_kmin+y_kmax) << "\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
(*b)[eq - lag * Size] += u[index] * y[index_vara[var+Size*(y_kmin+t /*+lag*/)]];
|
|
}
|
|
|
|
}
|
|
if (lag > ti_y_kmax || lag < ti_y_kmin)
|
|
{
|
|
#ifdef DEBUG
|
|
if (eq < 0 || eq >= Size * periods)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, index (" << eq << ") out of range for b vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (var+Size*(y_kmin+t+lag) < 0 || var+Size*(y_kmin+t+lag) >= Size*(periods+y_kmin+y_kmax))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, index (" << var+Size*(y_kmin+t+lag) << ") out of range for index_vara vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (index_vara[var+Size*(y_kmin+t+lag)] < 0 || index_vara[var+Size*(y_kmin+t+lag)] >= y_size*(periods+y_kmin+y_kmax))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, index (" << index_vara[var+Size*(y_kmin+t+lag)] << ") out of range for y vector max=" << y_size*(periods+y_kmin+y_kmax) << "\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
(*b)[eq] += u[index+lag*u_count_init]*y[index_vara[var+Size*(y_kmin+t+lag)]];
|
|
}
|
|
}
|
|
else /* ...and store it in the u vector*/
|
|
{
|
|
#ifdef DEBUG
|
|
if (index < 0 || index >= u_count_alloc)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, index (" << index << ") out of range for u vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (eq < 0 || eq >= (Size*periods))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_UMFPACK_Sparse, index (" << eq << ") out of range for b vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
(*b)[eq] += u[index];
|
|
}
|
|
it4++;
|
|
}
|
|
}
|
|
(*Ap)[Size*periods] = NZE;
|
|
#ifdef DEBUG
|
|
mexPrintf("*Ax = [");
|
|
for (int i = 0; i < NZE; i++)
|
|
mexPrintf("%f ", (*Ax)[i]);
|
|
mexPrintf("]\n");
|
|
|
|
mexPrintf("*Ap = [");
|
|
for (int i = 0; i < n+1; i++)
|
|
mexPrintf("%d ", (*Ap)[i]);
|
|
mexPrintf("]\n");
|
|
|
|
mexPrintf("*Ai = [");
|
|
for (int i = 0; i < NZE; i++)
|
|
mexPrintf("%d ", (*Ai)[i]);
|
|
mexPrintf("]\n");
|
|
#endif
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Init_CUDA_Sparse_Simple(int Size, map<pair<pair<int, int>, int>, int> &IM, SuiteSparse_long **Ap, SuiteSparse_long **Ai, double **Ax, double **b, double **x0, bool &zero_solution, mxArray *x0_m)
|
|
{
|
|
int eq, var;
|
|
|
|
*b = (double *) mxMalloc(Size * sizeof(double));
|
|
test_mxMalloc(*b, __LINE__, __FILE__, __func__, Size * sizeof(double));
|
|
if (!(*b))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse, can't retrieve b vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
double *Host_x0 = mxGetPr(x0_m);
|
|
if (!Host_x0)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse_Simple, can't retrieve x0 vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
*Ap = (SuiteSparse_long *) mxMalloc((Size+1) * sizeof(SuiteSparse_long));
|
|
test_mxMalloc(*Ap, __LINE__, __FILE__, __func__, (Size+1) * sizeof(SuiteSparse_long));
|
|
if (!(*Ap))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse, can't allocate Ap index vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
size_t prior_nz = IM.size();
|
|
*Ai = (SuiteSparse_long *) mxMalloc(prior_nz * sizeof(SuiteSparse_long));
|
|
test_mxMalloc(*Ai, __LINE__, __FILE__, __func__, prior_nz * sizeof(SuiteSparse_long));
|
|
if (!(*Ai))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse, can't allocate Ai index vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
*Ax = (double *) mxMalloc(prior_nz * sizeof(double));
|
|
test_mxMalloc(*Ax, __LINE__, __FILE__, __func__, prior_nz * sizeof(double));
|
|
if (!(*Ax))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse, can't retrieve Ax matrix\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
|
|
map<pair<pair<int, int>, int>, int>::iterator it4;
|
|
for (int i = 0; i < Size; i++)
|
|
{
|
|
int eq = index_vara[i];
|
|
ya[eq+it_*y_size] = y[eq+it_*y_size];
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
unsigned int max_nze = prior_nz; //mxGetNzmax(A_m);
|
|
#endif
|
|
unsigned int NZE = 0;
|
|
int last_var = 0;
|
|
double cum_abs_sum = 0;
|
|
for (int i = 0; i < Size; i++)
|
|
{
|
|
(*b)[i] = u[i];
|
|
cum_abs_sum += fabs((*b)[i]);
|
|
(*x0)[i] = y[i];
|
|
}
|
|
if (cum_abs_sum < 1e-20)
|
|
zero_solution = true;
|
|
else
|
|
zero_solution = false;
|
|
|
|
(*Ap)[0] = 0;
|
|
last_var = -1;
|
|
it4 = IM.begin();
|
|
while (it4 != IM.end())
|
|
{
|
|
var = it4->first.first.first;
|
|
if (var != last_var)
|
|
{
|
|
(*Ap)[1+last_var ] = NZE;
|
|
last_var = var;
|
|
}
|
|
eq = it4->first.second;
|
|
int index = it4->second;
|
|
#ifdef DEBUG
|
|
if (index < 0 || index >= u_count_alloc || index > Size + Size*Size)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse_Simple, index (" << index << ") out of range for u vector max = " << Size+Size*Size << " allocated = " << u_count_alloc << "\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (NZE >= max_nze)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse_Simple, exceeds the capacity of A_m sparse matrix\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
(*Ax)[NZE] = u[index];
|
|
(*Ai)[NZE] = eq;
|
|
NZE++;
|
|
#ifdef DEBUG
|
|
if (eq < 0 || eq >= Size)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse_Simple, index (" << eq << ") out of range for b vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (var < 0 || var >= Size)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse_Simple, index (" << var << ") out of range for index_vara vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (index_vara[var] < 0 || index_vara[var] >= y_size)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse_Simple, index (" << index_vara[var] << ") out of range for y vector max=" << y_size << " (0)\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
it4++;
|
|
}
|
|
(*Ap)[Size] = NZE;
|
|
}
|
|
|
|
#ifdef CUDA
|
|
void
|
|
dynSparseMatrix::Init_CUDA_Sparse(int periods, int y_kmin, int y_kmax, int Size, map<pair<pair<int, int>, int>, int> &IM, int **Ap, int **Ai, double **Ax, int **Ap_tild, int **Ai_tild, double **A_tild, double **b, double **x0, mxArray *x0_m, int *nnz, int *nnz_tild, int preconditioner)
|
|
{
|
|
//cudaError_t cuda_error;
|
|
int t, eq, var, lag, ti_y_kmin, ti_y_kmax;
|
|
int n = periods * Size;
|
|
size_t prior_nz = IM.size() * periods;
|
|
size_t preconditioner_size = 0;
|
|
map<pair<int, int>, int> jacob_struct;
|
|
|
|
/* ask cuda how many devices it can find */
|
|
int device_count;
|
|
cudaGetDeviceCount(&device_count);
|
|
|
|
cudaSetDevice(CUDA_device);
|
|
|
|
double *Host_b = (double *) mxMalloc(n * sizeof(double));
|
|
test_mxMalloc(Host_b, __LINE__, __FILE__, __func__, n * sizeof(double));
|
|
cudaChk(cudaMalloc((void **) b, n * sizeof(double)), " in Init_Cuda_Sparse, not enought memory to allocate b vector on the graphic card\n");
|
|
|
|
double *Host_x0 = mxGetPr(x0_m);
|
|
if (!Host_x0)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Cuda_Sparse, can't retrieve x0 vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
cudaChk(cudaMalloc((void **) x0, n * sizeof(double)), " in Init_Cuda_Sparse, not enought memory to allocate x0 vector on the graphic card\n");
|
|
|
|
int *Host_Ap = (int *) mxMalloc((n+1) * sizeof(int));
|
|
test_mxMalloc(Host_Ap, __LINE__, __FILE__, __func__, (n+1) * sizeof(int));
|
|
|
|
int *Host_Ai = (int *) mxMalloc(prior_nz * sizeof(int));
|
|
test_mxMalloc(Host_Ai, __LINE__, __FILE__, __func__, prior_nz * sizeof(int));
|
|
|
|
double *Host_Ax = (double *) mxMalloc(prior_nz * sizeof(double));
|
|
test_mxMalloc(Host_Ax, __LINE__, __FILE__, __func__, prior_nz * sizeof(double));
|
|
|
|
int *Host_Ai_tild, *Host_Ap_tild;
|
|
if (preconditioner == 3)
|
|
{
|
|
Host_Ap_tild = (int *) mxMalloc((n+1)*sizeof(int));
|
|
test_mxMalloc(Host_Ap_tild, __LINE__, __FILE__, __func__, (n+1)*sizeof(int));
|
|
Host_Ai_tild = (int *) mxMalloc(prior_nz*sizeof(int));
|
|
test_mxMalloc(Host_Ai_tild, __LINE__, __FILE__, __func__, prior_nz*sizeof(int));
|
|
Host_Ap_tild[0] = 0;
|
|
}
|
|
|
|
if (preconditioner == 0)
|
|
preconditioner_size = n;
|
|
else if (preconditioner == 1 || preconditioner == 2 || preconditioner == 3)
|
|
preconditioner_size = prior_nz;
|
|
|
|
double *Host_A_tild = (double *) mxMalloc(preconditioner_size * sizeof(double));
|
|
test_mxMalloc(Host_A_tild, __LINE__, __FILE__, __func__, preconditioner_size * sizeof(double));
|
|
|
|
map<pair<pair<int, int>, int>, int>::iterator it4;
|
|
for (int i = 0; i < y_size*(periods+y_kmin); i++)
|
|
ya[i] = y[i];
|
|
# ifdef DEBUG
|
|
unsigned int max_nze = mxGetNzmax(A_m);
|
|
# endif
|
|
unsigned int NZE = 0, NZE_tild = 0;
|
|
int last_eq = 0;
|
|
for (int i = 0; i < periods*Size; i++)
|
|
{
|
|
Host_b[i] = 0;
|
|
Host_x0[i] = y[index_vara[Size*y_kmin+i]];
|
|
}
|
|
|
|
//Ordered in CSR and not in CSC
|
|
|
|
Host_Ap[0] = 0;
|
|
for (t = 0; t < periods; t++)
|
|
{
|
|
last_eq = -1;
|
|
it4 = IM.begin();
|
|
while (it4 != IM.end())
|
|
{
|
|
eq = it4->first.first.first;
|
|
if (eq != last_eq)
|
|
{
|
|
# ifdef DEBUG
|
|
if (1+last_eq + t * Size > (n + 1))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse, 1+last_eq + t * Size (" << 1+last_eq + t * Size << ") out of range for Host_Ap vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
# endif
|
|
Host_Ap[1+last_eq + t * Size] = NZE;
|
|
if (preconditioner == 3 && t == 0)
|
|
Host_Ap_tild[1+last_eq ] = NZE_tild;
|
|
last_eq = eq;
|
|
}
|
|
var = it4->first.second+Size*t;
|
|
lag = it4->first.first.second;
|
|
int index = it4->second+ (t /*+ lag*/) * u_count_init;
|
|
if (eq < (periods+y_kmax)*Size)
|
|
{
|
|
ti_y_kmin = -min(t, y_kmin);
|
|
ti_y_kmax = min(periods-(t + 1), y_kmax);
|
|
if ((lag <= ti_y_kmax && lag >= ti_y_kmin) || preconditioner == 3) /*Build the index for sparse matrix containing the jacobian : u*/
|
|
{
|
|
# ifdef DEBUG
|
|
if (index < 0 || index >= u_count_alloc || index > (periods-1)* IM.size() + Size * Size + periods * Size)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse, index (" << index << ") out of range for u vector max = " << (periods-1)* IM.size() + Size * Size + periods * Size << " allocated = " << u_count_alloc << "\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (NZE >= prior_nz)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse, exceeds the capacity of A_i or A_x sparse matrix\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
# endif
|
|
bool to_store = true;
|
|
if (preconditioner == 0)
|
|
{
|
|
if (lag == 0 && it4->first.second == eq)
|
|
Host_A_tild[var] = u[index];
|
|
}
|
|
else if (preconditioner == 1 || preconditioner == 2)
|
|
Host_A_tild[NZE] = u[index];
|
|
else if (preconditioner == 3)
|
|
{
|
|
if (lag > ti_y_kmax || lag < ti_y_kmin)
|
|
{
|
|
Host_b[eq + t * Size] += u[index]*y[index_vara[var+Size*(y_kmin+lag)]];
|
|
to_store = false;
|
|
}
|
|
if (t == 0)
|
|
{
|
|
map<pair<int, int>, int>::const_iterator it = jacob_struct.find(make_pair(eq + t * Size, var));
|
|
if (it != jacob_struct.end())
|
|
Host_A_tild[it->second] += u[index];
|
|
else
|
|
{
|
|
jacob_struct[make_pair(eq, var)] = NZE_tild;
|
|
Host_A_tild[NZE_tild] = u[index];
|
|
Host_Ai_tild[NZE_tild] = var;
|
|
NZE_tild++;
|
|
}
|
|
}
|
|
}
|
|
if (to_store)
|
|
{
|
|
Host_Ax[NZE] = u[index];
|
|
Host_Ai[NZE] = var + lag * Size;
|
|
NZE++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
# ifdef DEBUG
|
|
if (var < 0 || var >= Size * periods)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse, index (" << var << ") out of range for b vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (var+Size*(y_kmin+t+lag) < 0 || var+Size*(y_kmin+lag) >= Size*(periods+y_kmin+y_kmax))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse, index (" << var+Size*(y_kmin+lag) << ") out of range for index_vara vector max=" << Size*(periods+y_kmin+y_kmax) << "\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (index_vara[var+Size*(y_kmin+lag)] < 0 || index_vara[var+Size*(y_kmin+lag)] >= y_size*(periods+y_kmin+y_kmax))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse, index (" << index_vara[var+Size*(y_kmin+lag)] << ") out of range for y vector max=" << y_size*(periods+y_kmin+y_kmax) << "\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
# endif
|
|
Host_b[eq + t * Size] += u[index]*y[index_vara[var+Size*(y_kmin+lag)]];
|
|
}
|
|
}
|
|
else // ...and store it in the u vector
|
|
{
|
|
# ifdef DEBUG
|
|
if (index < 0 || index >= u_count_alloc)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse, index (" << index << ") out of range for u vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (var < 0 || var >= (Size*periods))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_CUDA_Sparse, index (" << var << ") out of range for b vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
# endif
|
|
Host_b[var] += u[index];
|
|
}
|
|
it4++;
|
|
}
|
|
}
|
|
Host_Ap[Size*periods] = NZE;
|
|
if (preconditioner == 3)
|
|
{
|
|
int *tmp_Ap_tild = (int *) mxMalloc((Size + 1) * sizeof(int));
|
|
test_mxMalloc(tmp_Ap_tild, __LINE__, __FILE__, __func__, (Size + 1) * sizeof(int));
|
|
int *tmp_Ai_tild = (int *) mxMalloc(NZE_tild * sizeof(int));
|
|
test_mxMalloc(tmp_Ai_tild, __LINE__, __FILE__, __func__, NZE_tild * sizeof(int));
|
|
double *tmp_A_tild = (double *) mxMalloc(NZE_tild * sizeof(double));
|
|
test_mxMalloc(tmp_A_tild, __LINE__, __FILE__, __func__, NZE_tild * sizeof(double));
|
|
memcpy(tmp_Ap_tild, Host_Ap_tild, (Size + 1) * sizeof(int));
|
|
memcpy(tmp_Ai_tild, Host_Ai_tild, NZE_tild * sizeof(int));
|
|
memcpy(tmp_A_tild, Host_A_tild, NZE_tild * sizeof(double));
|
|
//int NZE_tild_old = NZE_tild;
|
|
NZE_tild = 0;
|
|
Host_Ap_tild[0] = NZE_tild;
|
|
|
|
for (int i = 0; i < Size; i++)
|
|
{
|
|
for (int j = tmp_Ap_tild[i]; j < tmp_Ap_tild[i+1]; j++)
|
|
if (abs(tmp_A_tild[j]) > 1.0e-20)
|
|
{
|
|
Host_A_tild[NZE_tild] = tmp_A_tild[j];
|
|
Host_Ai_tild[NZE_tild] = tmp_Ai_tild[j];
|
|
NZE_tild++;
|
|
}
|
|
Host_Ap_tild[i+1] = NZE_tild;
|
|
}
|
|
mxFree(tmp_Ap_tild);
|
|
mxFree(tmp_Ai_tild);
|
|
mxFree(tmp_A_tild);
|
|
}
|
|
|
|
*nnz = NZE;
|
|
*nnz_tild = NZE_tild;
|
|
if (preconditioner == 1 || preconditioner == 2 || preconditioner == 3)
|
|
preconditioner_size = NZE;
|
|
|
|
# ifdef DEBUG
|
|
mexPrintf("Host_Ax = [");
|
|
for (int i = 0; i < NZE; i++)
|
|
mexPrintf("%f ", Host_Ax[i]);
|
|
mexPrintf("]\n");
|
|
|
|
mexPrintf("Host_Ap = [");
|
|
for (int i = 0; i < n+1; i++)
|
|
mexPrintf("%d ", Host_Ap[i]);
|
|
mexPrintf("]\n");
|
|
|
|
mexPrintf("Host_Ai = [");
|
|
for (int i = 0; i < NZE; i++)
|
|
mexPrintf("%d ", Host_Ai[i]);
|
|
mexPrintf("]\n");
|
|
# endif
|
|
cudaChk(cudaMalloc((void **) Ai, NZE * sizeof(int)), " in Init_Cuda_Sparse, can't allocate Ai index vector on the graphic card\n");
|
|
cudaChk(cudaMalloc((void **) Ax, NZE * sizeof(double)), " in Init_Cuda_Sparse, can't allocate Ax on the graphic card\n");
|
|
cudaChk(cudaMalloc((void **) Ap, (n+1) * sizeof(int)), " in Init_Cuda_Sparse, can't allocate Ap index vector on the graphic card\n");
|
|
if (preconditioner == 3)
|
|
{
|
|
cudaChk(cudaMalloc((void **) Ai_tild, NZE_tild * sizeof(int)), " in Init_Cuda_Sparse, can't allocate Ai_tild index vector on the graphic card\n");
|
|
cudaChk(cudaMalloc((void **) Ap_tild, (n+1) * sizeof(int)), " in Init_Cuda_Sparse, can't allocate Ap_tild index vector on the graphic card\n");
|
|
}
|
|
cudaChk(cudaMalloc((void **) A_tild, preconditioner_size * sizeof(double)), " in Init_Cuda_Sparse, can't allocate A_tild on the graphic card\n");
|
|
|
|
cudaChk(cudaMemcpy(*x0, Host_x0, n * sizeof(double), cudaMemcpyHostToDevice), " in Init_CUDA_Sparse, cudaMemcpy x0 = Host_x0 failed");
|
|
cudaChk(cudaMemcpy(*b, Host_b, n * sizeof(double), cudaMemcpyHostToDevice), " in Init_CUDA_Sparse, cudaMemcpy b = Host_b failed");
|
|
cudaChk(cudaMemcpy(*Ap, Host_Ap, (n + 1) * sizeof(int), cudaMemcpyHostToDevice), " in Init_CUDA_Sparse, cudaMemcpy Ap = Host_Ap failed");
|
|
cudaChk(cudaMemcpy(*Ai, Host_Ai, NZE * sizeof(int), cudaMemcpyHostToDevice), " in Init_CUDA_Sparse, cudaMemcpy Ai = Host_Ai failed");
|
|
cudaChk(cudaMemcpy(*Ax, Host_Ax, NZE * sizeof(double), cudaMemcpyHostToDevice), " in Init_CUDA_Sparse, cudaMemcpy Ax = Host_Ax failed");
|
|
if (preconditioner == 3)
|
|
{
|
|
cudaChk(cudaMemcpy(*Ap_tild, Host_Ap_tild, (n + 1) * sizeof(int), cudaMemcpyHostToDevice), " in Init_CUDA_Sparse, cudaMemcpy Ap_tild = Host_Ap_tild failed");
|
|
cudaChk(cudaMemcpy(*Ai_tild, Host_Ai_tild, NZE_tild * sizeof(int), cudaMemcpyHostToDevice), " in Init_CUDA_Sparse, cudaMemcpy Ai_tild = Host_Ai_til failed");
|
|
}
|
|
cudaChk(cudaMemcpy(*A_tild, Host_A_tild, preconditioner_size * sizeof(double), cudaMemcpyHostToDevice), " in Init_CUDA_Sparse, cudaMemcpy A_tild = Host_A_tild failed");
|
|
}
|
|
#endif
|
|
|
|
void
|
|
dynSparseMatrix::PrintM(int n, double *Ax, mwIndex *Ap, mwIndex *Ai)
|
|
{
|
|
int nnz = Ap[n];
|
|
double *A = (double *) mxMalloc(n * n * sizeof(double));
|
|
test_mxMalloc(A, __LINE__, __FILE__, __func__, n * n * sizeof(double));
|
|
memset(A, 0, n * n * sizeof(double));
|
|
int k = 0;
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
for (int j = Ap[i]; j < (int) Ap[i + 1]; j++)
|
|
{
|
|
int row = Ai[j];
|
|
A[row *n + i] = Ax[j];
|
|
k++;
|
|
}
|
|
}
|
|
if (nnz != k)
|
|
mexPrintf("Problem nnz(%d) != number of elements(%d)\n", nnz, k);
|
|
mexPrintf("----------------------\n");
|
|
//mexEvalString("drawnow;");
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
for (int j = 0; j < n; j++)
|
|
mexPrintf("%-6.3f ", A[i * n + j]);
|
|
mexPrintf("\n");
|
|
}
|
|
mxFree(A);
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Init_Matlab_Sparse(int periods, int y_kmin, int y_kmax, int Size, map<pair<pair<int, int>, int>, int> &IM, mxArray *A_m, mxArray *b_m, mxArray *x0_m)
|
|
{
|
|
int t, eq, var, lag, ti_y_kmin, ti_y_kmax;
|
|
double *b = mxGetPr(b_m);
|
|
|
|
if (!b)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse, can't retrieve b vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
double *x0 = mxGetPr(x0_m);
|
|
if (!x0)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, can't retrieve x0 vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
mwIndex *Aj = mxGetJc(A_m);
|
|
if (!Aj)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse, can't allocate Aj index vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
mwIndex *Ai = mxGetIr(A_m);
|
|
if (!Ai)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse, can't allocate Ai index vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
double *A = mxGetPr(A_m);
|
|
if (!A)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse, can't retrieve A matrix\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
|
|
map<pair<pair<int, int>, int>, int>::iterator it4;
|
|
for (int i = 0; i < y_size*(periods+y_kmin); i++)
|
|
ya[i] = y[i];
|
|
#ifdef DEBUG
|
|
unsigned int max_nze = mxGetNzmax(A_m);
|
|
#endif
|
|
unsigned int NZE = 0;
|
|
int last_var = 0;
|
|
for (int i = 0; i < periods*Size; i++)
|
|
{
|
|
b[i] = 0;
|
|
x0[i] = y[index_vara[Size*y_kmin+i]];
|
|
}
|
|
Aj[0] = 0;
|
|
for (t = 0; t < periods; t++)
|
|
{
|
|
last_var = 0;
|
|
it4 = IM.begin();
|
|
while (it4 != IM.end())
|
|
{
|
|
var = it4->first.first.first;
|
|
if (var != last_var)
|
|
{
|
|
Aj[1+last_var + t * Size] = NZE;
|
|
last_var = var;
|
|
}
|
|
eq = it4->first.second+Size*t;
|
|
lag = -it4->first.first.second;
|
|
int index = it4->second+ (t-lag) * u_count_init;
|
|
if (var < (periods+y_kmax)*Size)
|
|
{
|
|
ti_y_kmin = -min(t, y_kmin);
|
|
ti_y_kmax = min(periods-(t +1), y_kmax);
|
|
int ti_new_y_kmax = min(t, y_kmax);
|
|
int ti_new_y_kmin = -min(periods-(t+1), y_kmin);
|
|
if (lag <= ti_new_y_kmax && lag >= ti_new_y_kmin) /*Build the index for sparse matrix containing the jacobian : u*/
|
|
{
|
|
#ifdef DEBUG
|
|
if (index < 0 || index >= u_count_alloc || index > Size + Size*Size)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse, index (" << index << ") out of range for u vector max = " << Size+Size*Size << " allocated = " << u_count_alloc << "\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (NZE >= max_nze)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse, exceeds the capacity of A_m sparse matrix\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
A[NZE] = u[index];
|
|
Ai[NZE] = eq - lag * Size;
|
|
NZE++;
|
|
}
|
|
if (lag > ti_y_kmax || lag < ti_y_kmin)
|
|
{
|
|
#ifdef DEBUG
|
|
if (eq < 0 || eq >= Size * periods)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse, index (" << eq << ") out of range for b vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (var+Size*(y_kmin+t+lag) < 0 || var+Size*(y_kmin+t+lag) >= Size*(periods+y_kmin+y_kmax))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse, index (" << var+Size*(y_kmin+t+lag) << ") out of range for index_vara vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (index_vara[var+Size*(y_kmin+t+lag)] < 0 || index_vara[var+Size*(y_kmin+t+lag)] >= y_size*(periods+y_kmin+y_kmax))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse, index (" << index_vara[var+Size*(y_kmin+t+lag)] << ") out of range for y vector max=" << y_size*(periods+y_kmin+y_kmax) << "\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
b[eq] += u[index+lag*u_count_init]*y[index_vara[var+Size*(y_kmin+t+lag)]];
|
|
}
|
|
}
|
|
else /* ...and store it in the u vector*/
|
|
{
|
|
#ifdef DEBUG
|
|
if (index < 0 || index >= u_count_alloc)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse, index (" << index << ") out of range for u vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (eq < 0 || eq >= (Size*periods))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse, index (" << eq << ") out of range for b vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
#endif
|
|
b[eq] += u[index];
|
|
}
|
|
it4++;
|
|
}
|
|
}
|
|
Aj[Size*periods] = NZE;
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Init_GE(int periods, int y_kmin, int y_kmax, int Size, map<pair<pair<int, int>, int>, int> &IM)
|
|
{
|
|
int t, i, eq, var, lag, ti_y_kmin, ti_y_kmax;
|
|
double tmp_b = 0.0;
|
|
map<pair<pair<int, int>, int>, int>::iterator it4;
|
|
NonZeroElem *first;
|
|
pivot = (int *) mxMalloc(Size*periods*sizeof(int));
|
|
test_mxMalloc(pivot, __LINE__, __FILE__, __func__, Size*periods*sizeof(int));
|
|
pivot_save = (int *) mxMalloc(Size*periods*sizeof(int));
|
|
test_mxMalloc(pivot_save, __LINE__, __FILE__, __func__, Size*periods*sizeof(int));
|
|
pivotk = (int *) mxMalloc(Size*periods*sizeof(int));
|
|
test_mxMalloc(pivotk, __LINE__, __FILE__, __func__, Size*periods*sizeof(int));
|
|
pivotv = (double *) mxMalloc(Size*periods*sizeof(double));
|
|
test_mxMalloc(pivotv, __LINE__, __FILE__, __func__, Size*periods*sizeof(double));
|
|
pivotva = (double *) mxMalloc(Size*periods*sizeof(double));
|
|
test_mxMalloc(pivotva, __LINE__, __FILE__, __func__, Size*periods*sizeof(double));
|
|
b = (int *) mxMalloc(Size*periods*sizeof(int));
|
|
test_mxMalloc(b, __LINE__, __FILE__, __func__, Size*periods*sizeof(int));
|
|
line_done = (bool *) mxMalloc(Size*periods*sizeof(bool));
|
|
test_mxMalloc(line_done, __LINE__, __FILE__, __func__, Size*periods*sizeof(bool));
|
|
mem_mngr.init_CHUNK_BLCK_SIZE(u_count);
|
|
g_save_op = NULL;
|
|
g_nop_all = 0;
|
|
i = (periods+y_kmax+1)*Size*sizeof(NonZeroElem *);
|
|
FNZE_R = (NonZeroElem **) mxMalloc(i);
|
|
test_mxMalloc(FNZE_R, __LINE__, __FILE__, __func__, i);
|
|
FNZE_C = (NonZeroElem **) mxMalloc(i);
|
|
test_mxMalloc(FNZE_C, __LINE__, __FILE__, __func__, i);
|
|
NonZeroElem **temp_NZE_R = (NonZeroElem **) mxMalloc(i);
|
|
test_mxMalloc(temp_NZE_R, __LINE__, __FILE__, __func__, i);
|
|
NonZeroElem **temp_NZE_C = (NonZeroElem **) mxMalloc(i);
|
|
test_mxMalloc(temp_NZE_C, __LINE__, __FILE__, __func__, i);
|
|
i = (periods+y_kmax+1)*Size*sizeof(int);
|
|
NbNZRow = (int *) mxMalloc(i);
|
|
test_mxMalloc(NbNZRow, __LINE__, __FILE__, __func__, i);
|
|
NbNZCol = (int *) mxMalloc(i);
|
|
test_mxMalloc(NbNZCol, __LINE__, __FILE__, __func__, i);
|
|
|
|
for (int i = 0; i < periods*Size; i++)
|
|
{
|
|
b[i] = 0;
|
|
line_done[i] = 0;
|
|
}
|
|
for (int i = 0; i < (periods+y_kmax+1)*Size; i++)
|
|
{
|
|
FNZE_C[i] = NULL;
|
|
FNZE_R[i] = NULL;
|
|
temp_NZE_C[i] = NULL;
|
|
temp_NZE_R[i] = NULL;
|
|
NbNZRow[i] = 0;
|
|
NbNZCol[i] = 0;
|
|
}
|
|
int nnz = 0;
|
|
//pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS"))) ordered private(it4, ti_y_kmin, ti_y_kmax, eq, var, lag) schedule(dynamic)
|
|
for (t = 0; t < periods; t++)
|
|
{
|
|
ti_y_kmin = -min(t, y_kmin);
|
|
ti_y_kmax = min(periods-(t+1), y_kmax);
|
|
it4 = IM.begin();
|
|
eq = -1;
|
|
//pragma omp ordered
|
|
while (it4 != IM.end())
|
|
{
|
|
var = it4->first.first.second;
|
|
if (eq != it4->first.first.first+Size*t)
|
|
tmp_b = 0;
|
|
eq = it4->first.first.first+Size*t;
|
|
lag = it4->first.second;
|
|
if (var < (periods+y_kmax)*Size)
|
|
{
|
|
lag = it4->first.second;
|
|
if (lag <= ti_y_kmax && lag >= ti_y_kmin) /*Build the index for sparse matrix containing the jacobian : u*/
|
|
{
|
|
nnz++;
|
|
var += Size*t;
|
|
NbNZRow[eq]++;
|
|
NbNZCol[var]++;
|
|
first = mem_mngr.mxMalloc_NZE();
|
|
first->NZE_C_N = NULL;
|
|
first->NZE_R_N = NULL;
|
|
first->u_index = it4->second+u_count_init*t;
|
|
first->r_index = eq;
|
|
first->c_index = var;
|
|
first->lag_index = lag;
|
|
if (FNZE_R[eq] == NULL)
|
|
FNZE_R[eq] = first;
|
|
if (FNZE_C[var] == NULL)
|
|
FNZE_C[var] = first;
|
|
if (temp_NZE_R[eq] != NULL)
|
|
temp_NZE_R[eq]->NZE_R_N = first;
|
|
if (temp_NZE_C[var] != NULL)
|
|
temp_NZE_C[var]->NZE_C_N = first;
|
|
temp_NZE_R[eq] = first;
|
|
temp_NZE_C[var] = first;
|
|
}
|
|
else /*Build the additive terms ooutside the simulation periods related to the first lags and the last leads...*/
|
|
{
|
|
if (lag < ti_y_kmin)
|
|
{
|
|
tmp_b += u[it4->second+u_count_init*t]*y[index_vara[var+Size*(y_kmin+t)]];
|
|
}
|
|
else
|
|
{
|
|
tmp_b += u[it4->second+u_count_init*t]*y[index_vara[var+Size*(y_kmin+t)]];
|
|
|
|
}
|
|
}
|
|
}
|
|
else /* ...and store it in the u vector*/
|
|
{
|
|
b[eq] = it4->second+u_count_init*t;
|
|
u[b[eq]] += tmp_b;
|
|
tmp_b = 0;
|
|
}
|
|
it4++;
|
|
}
|
|
}
|
|
mxFree(temp_NZE_R);
|
|
mxFree(temp_NZE_C);
|
|
}
|
|
|
|
int
|
|
dynSparseMatrix::Get_u()
|
|
{
|
|
if (!u_liste.empty())
|
|
{
|
|
int i = u_liste.back();
|
|
u_liste.pop_back();
|
|
return i;
|
|
}
|
|
else
|
|
{
|
|
if (u_count < u_count_alloc)
|
|
{
|
|
int i = u_count;
|
|
u_count++;
|
|
return i;
|
|
}
|
|
else
|
|
{
|
|
u_count_alloc += 5*u_count_alloc_save;
|
|
u = (double *) mxRealloc(u, u_count_alloc*sizeof(double));
|
|
if (!u)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Get_u, memory exhausted (realloc(" << u_count_alloc*sizeof(double) << "))\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
int i = u_count;
|
|
u_count++;
|
|
return i;
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Delete_u(int pos)
|
|
{
|
|
u_liste.push_back(pos);
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Clear_u()
|
|
{
|
|
u_liste.clear();
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Print_u()
|
|
{
|
|
for (unsigned int i = 0; i < u_liste.size(); i++)
|
|
mexPrintf("%d ", u_liste[i]);
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::End_GE(int Size)
|
|
{
|
|
mem_mngr.Free_All();
|
|
mxFree(FNZE_R);
|
|
mxFree(FNZE_C);
|
|
mxFree(NbNZRow);
|
|
mxFree(NbNZCol);
|
|
mxFree(b);
|
|
mxFree(line_done);
|
|
mxFree(pivot);
|
|
mxFree(pivot_save);
|
|
mxFree(pivotk);
|
|
mxFree(pivotv);
|
|
mxFree(pivotva);
|
|
}
|
|
|
|
bool
|
|
dynSparseMatrix::compare(int *save_op, int *save_opa, int *save_opaa, int beg_t, int periods, long int nop4, int Size)
|
|
{
|
|
long int i, j, nop = nop4/2;
|
|
double r = 0.0;
|
|
bool OK = true;
|
|
t_save_op_s *save_op_s, *save_opa_s, *save_opaa_s;
|
|
int *diff1, *diff2;
|
|
diff1 = (int *) mxMalloc(nop*sizeof(int));
|
|
test_mxMalloc(diff1, __LINE__, __FILE__, __func__, nop*sizeof(int));
|
|
diff2 = (int *) mxMalloc(nop*sizeof(int));
|
|
test_mxMalloc(diff2, __LINE__, __FILE__, __func__, nop*sizeof(int));
|
|
int max_save_ops_first = -1;
|
|
j = i = 0;
|
|
while (i < nop4 && OK)
|
|
{
|
|
save_op_s = (t_save_op_s *) &(save_op[i]);
|
|
save_opa_s = (t_save_op_s *) &(save_opa[i]);
|
|
save_opaa_s = (t_save_op_s *) &(save_opaa[i]);
|
|
diff1[j] = save_op_s->first-save_opa_s->first;
|
|
if (max_save_ops_first < save_op_s->first+diff1[j]*(periods-beg_t))
|
|
{
|
|
max_save_ops_first = save_op_s->first+diff1[j]*(periods-beg_t);
|
|
}
|
|
switch (save_op_s->operat)
|
|
{
|
|
case IFLD:
|
|
case IFDIV:
|
|
OK = (save_op_s->operat == save_opa_s->operat && save_opa_s->operat == save_opaa_s->operat
|
|
&& diff1[j] == (save_opa_s->first-save_opaa_s->first));
|
|
i += 2;
|
|
break;
|
|
case IFLESS:
|
|
case IFSUB:
|
|
diff2[j] = save_op_s->second-save_opa_s->second;
|
|
OK = (save_op_s->operat == save_opa_s->operat && save_opa_s->operat == save_opaa_s->operat
|
|
&& diff1[j] == (save_opa_s->first-save_opaa_s->first)
|
|
&& diff2[j] == (save_opa_s->second-save_opaa_s->second));
|
|
i += 3;
|
|
break;
|
|
default:
|
|
ostringstream tmp;
|
|
tmp << " in compare, unknown operator = " << save_op_s->operat << "\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
j++;
|
|
}
|
|
// the same pivot for all remaining periods
|
|
if (OK)
|
|
{
|
|
for (int i = beg_t; i < periods; i++)
|
|
{
|
|
for (int j = 0; j < Size; j++)
|
|
pivot[i*Size+j] = pivot[(i-1)*Size+j]+Size;
|
|
}
|
|
if (max_save_ops_first >= u_count_alloc)
|
|
{
|
|
u_count_alloc += max_save_ops_first;
|
|
u = (double *) mxRealloc(u, u_count_alloc*sizeof(double));
|
|
if (!u)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in compare, memory exhausted (realloc(" << u_count_alloc*sizeof(double) << "))\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
}
|
|
for (int t = 1; t < periods-beg_t-y_kmax; t++)
|
|
{
|
|
int i = j = 0;
|
|
double *up;
|
|
while (i < nop4)
|
|
{
|
|
t_save_op_s *save_op_s = (t_save_op_s *) (&(save_op[i]));
|
|
up = &u[save_op_s->first+t*diff1[j]];
|
|
switch (save_op_s->operat)
|
|
{
|
|
case IFLD:
|
|
r = *up;
|
|
i += 2;
|
|
break;
|
|
case IFDIV:
|
|
*up /= r;
|
|
i += 2;
|
|
break;
|
|
case IFSUB:
|
|
*up -= u[save_op_s->second+t*diff2[j]]*r;;
|
|
i += 3;
|
|
break;
|
|
case IFLESS:
|
|
*up = -u[save_op_s->second+t*diff2[j]]*r;
|
|
i += 3;
|
|
break;
|
|
}
|
|
j++;
|
|
}
|
|
}
|
|
int t1 = max(1, periods-beg_t-y_kmax);
|
|
int periods_beg_t = periods-beg_t;
|
|
for (int t = t1; t < periods_beg_t; t++)
|
|
{
|
|
int i = j = 0;
|
|
int gap = periods_beg_t-t;
|
|
while (i < nop4)
|
|
{
|
|
t_save_op_s *save_op_s = (t_save_op_s *) (&(save_op[i]));
|
|
if (save_op_s->lag < gap)
|
|
{
|
|
double *up = &u[save_op_s->first+t*diff1[j]];
|
|
switch (save_op_s->operat)
|
|
{
|
|
case IFLD:
|
|
r = *up;
|
|
i += 2;
|
|
break;
|
|
case IFDIV:
|
|
*up /= r;
|
|
i += 2;
|
|
break;
|
|
case IFSUB:
|
|
*up -= u[save_op_s->second+t*diff2[j]]*r;
|
|
i += 3;
|
|
break;
|
|
case IFLESS:
|
|
*up = -u[save_op_s->second+t*diff2[j]]*r;
|
|
i += 3;
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
switch (save_op_s->operat)
|
|
{
|
|
case IFLD:
|
|
case IFDIV:
|
|
i += 2;
|
|
break;
|
|
case IFSUB:
|
|
case IFLESS:
|
|
i += 3;
|
|
break;
|
|
}
|
|
}
|
|
j++;
|
|
}
|
|
}
|
|
}
|
|
mxFree(diff1);
|
|
mxFree(diff2);
|
|
return OK;
|
|
}
|
|
|
|
int
|
|
dynSparseMatrix::complete(int beg_t, int Size, int periods, int *b)
|
|
{
|
|
long int i, j, k, nop, nopa, nop1, cal_y, nb_var, pos, max_var, min_var;
|
|
NonZeroElem *first;
|
|
int *save_code;
|
|
int *diff;
|
|
double yy = 0.0, err;
|
|
|
|
int size_of_save_code = (1+y_kmax)*Size*(Size+1+4)/2*4;
|
|
save_code = (int *) mxMalloc(size_of_save_code*sizeof(int));
|
|
test_mxMalloc(save_code, __LINE__, __FILE__, __func__, size_of_save_code*sizeof(int));
|
|
int size_of_diff = (1+y_kmax)*Size*(Size+1+4);
|
|
diff = (int *) mxMalloc(size_of_diff*sizeof(int));
|
|
test_mxMalloc(diff, __LINE__, __FILE__, __func__, size_of_diff*sizeof(int));
|
|
cal_y = y_size*y_kmin;
|
|
|
|
i = (beg_t+1)*Size-1;
|
|
nop = 0;
|
|
for (j = i; j > i-Size; j--)
|
|
{
|
|
pos = pivot[j];
|
|
nb_var = At_Row(pos, &first);
|
|
first = first->NZE_R_N;
|
|
nb_var--;
|
|
save_code[nop] = IFLDZ;
|
|
save_code[nop+1] = 0;
|
|
save_code[nop+2] = 0;
|
|
save_code[nop+3] = 0;
|
|
#ifdef DEBUG
|
|
if ((nop+3) >= size_of_save_code)
|
|
mexPrintf("out of save_code[%d] (bound=%d)\n", nop+2, size_of_save_code);
|
|
#endif
|
|
nop += 4;
|
|
for (k = 0; k < nb_var; k++)
|
|
{
|
|
save_code[nop] = IFMUL;
|
|
save_code[nop+1] = index_vara[first->c_index]+cal_y;
|
|
save_code[nop+2] = first->u_index;
|
|
save_code[nop+3] = first->lag_index;
|
|
#ifdef DEBUG
|
|
if ((nop+3) >= size_of_save_code)
|
|
mexPrintf("out of save_code[%d] (bound=%d)\n", nop+2, size_of_save_code);
|
|
#endif
|
|
nop += 4;
|
|
first = first->NZE_R_N;
|
|
}
|
|
save_code[nop] = IFADD;
|
|
save_code[nop+1] = b[pos];
|
|
save_code[nop+2] = 0;
|
|
save_code[nop+3] = 0;
|
|
#ifdef DEBUG
|
|
if ((nop+3) >= size_of_save_code)
|
|
mexPrintf("out of save_code[%d] (bound=%d)\n", nop+2, size_of_save_code);
|
|
#endif
|
|
nop += 4;
|
|
save_code[nop] = IFSTP;
|
|
save_code[nop+1] = index_vara[j]+y_size*y_kmin;
|
|
save_code[nop+2] = 0;
|
|
save_code[nop+3] = 0;
|
|
#ifdef DEBUG
|
|
if ((nop+2) >= size_of_save_code)
|
|
mexPrintf("out of save_code[%d] (bound=%d)\n", nop+2, size_of_save_code);
|
|
#endif
|
|
nop += 4;
|
|
}
|
|
i = beg_t*Size-1;
|
|
nop1 = nopa = 0;
|
|
for (j = i; j > i-Size; j--)
|
|
{
|
|
pos = pivot[j];
|
|
nb_var = At_Row(pos, &first);
|
|
first = first->NZE_R_N;
|
|
nb_var--;
|
|
diff[nopa] = 0;
|
|
diff[nopa+1] = 0;
|
|
nopa += 2;
|
|
nop1 += 4;
|
|
for (k = 0; k < nb_var; k++)
|
|
{
|
|
diff[nopa] = save_code[nop1+1]-(index_vara[first->c_index]+cal_y);
|
|
diff[nopa+1] = save_code[nop1+2]-(first->u_index);
|
|
#ifdef DEBUG
|
|
if ((nop1+2) >= size_of_save_code)
|
|
mexPrintf("out of save_code[%d] (bound=%d)\n", nop1+2, size_of_save_code);
|
|
if ((nopa+1) >= size_of_diff)
|
|
mexPrintf("out of diff[%d] (bound=%d)\n", nopa+2, size_of_diff);
|
|
#endif
|
|
nopa += 2;
|
|
nop1 += 4;
|
|
first = first->NZE_R_N;
|
|
}
|
|
diff[nopa] = save_code[nop1+1]-(b[pos]);
|
|
diff[nopa+1] = 0;
|
|
#ifdef DEBUG
|
|
if ((nop1+3) >= size_of_save_code)
|
|
mexPrintf("out of save_code[%d] (bound=%d)\n", nop1+2, size_of_save_code);
|
|
if ((nopa+1) >= size_of_diff)
|
|
mexPrintf("out of diff[%d] (bound=%d)\n", nopa+2, size_of_diff);
|
|
#endif
|
|
nopa += 2;
|
|
nop1 += 4;
|
|
diff[nopa] = save_code[nop1+1]-(index_vara[j]+y_size*y_kmin);
|
|
diff[nopa+1] = 0;
|
|
#ifdef DEBUG
|
|
if ((nop1+4) >= size_of_save_code)
|
|
mexPrintf("out of save_code[%d] (bound=%d)\n", nop1+2, size_of_save_code);
|
|
if ((nopa+1) >= size_of_diff)
|
|
mexPrintf("out of diff[%d] (bound=%d)\n", nopa+2, size_of_diff);
|
|
#endif
|
|
nopa += 2;
|
|
nop1 += 4;
|
|
}
|
|
max_var = (periods+y_kmin)*y_size;
|
|
min_var = y_kmin*y_size;
|
|
for (int t = periods+y_kmin-1; t >= beg_t+y_kmin; t--)
|
|
{
|
|
int j = 0, k;
|
|
int ti = t-y_kmin-beg_t;
|
|
for (int i = 0; i < nop; i += 4)
|
|
{
|
|
switch (save_code[i])
|
|
{
|
|
case IFLDZ:
|
|
yy = 0;
|
|
break;
|
|
case IFMUL:
|
|
k = save_code[i+1]+ti*diff[j];
|
|
if (k < max_var && k > min_var)
|
|
{
|
|
yy += y[k]*u[save_code[i+2]+ti*diff[j+1]];
|
|
}
|
|
break;
|
|
case IFADD:
|
|
yy = -(yy+u[save_code[i+1]+ti*diff[j]]);
|
|
break;
|
|
case IFSTP:
|
|
k = save_code[i+1]+ti*diff[j];
|
|
err = yy - y[k];
|
|
y[k] += slowc*(err);
|
|
break;
|
|
}
|
|
j += 2;
|
|
}
|
|
}
|
|
mxFree(save_code);
|
|
mxFree(diff);
|
|
return (beg_t);
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::bksub(int tbreak, int last_period, int Size, double slowc_l)
|
|
{
|
|
NonZeroElem *first;
|
|
int i, j, k;
|
|
double yy;
|
|
for (int i = 0; i < y_size*(periods+y_kmin); i++)
|
|
y[i] = ya[i];
|
|
if (symbolic && tbreak)
|
|
last_period = complete(tbreak, Size, periods, b);
|
|
else
|
|
last_period = periods;
|
|
for (int t = last_period+y_kmin-1; t >= y_kmin; t--)
|
|
{
|
|
int ti = (t-y_kmin)*Size;
|
|
int cal = y_kmin*Size;
|
|
int cal_y = y_size*y_kmin;
|
|
for (i = ti-1; i >= ti-Size; i--)
|
|
{
|
|
j = i+cal;
|
|
int pos = pivot[i+Size];
|
|
int nb_var = At_Row(pos, &first);
|
|
first = first->NZE_R_N;
|
|
nb_var--;
|
|
int eq = index_vara[j]+y_size;
|
|
yy = 0;
|
|
for (k = 0; k < nb_var; k++)
|
|
{
|
|
yy += y[index_vara[first->c_index]+cal_y]*u[first->u_index];
|
|
first = first->NZE_R_N;
|
|
}
|
|
yy = -(yy+y[eq]+u[b[pos]]);
|
|
direction[eq] = yy;
|
|
y[eq] += slowc_l*yy;
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::simple_bksub(int it_, int Size, double slowc_l)
|
|
{
|
|
int i, k;
|
|
double yy;
|
|
NonZeroElem *first;
|
|
for (int i = 0; i < y_size; i++)
|
|
y[i+it_*y_size] = ya[i+it_*y_size];
|
|
for (i = Size-1; i >= 0; i--)
|
|
{
|
|
int pos = pivot[i];
|
|
int nb_var = At_Row(pos, &first);
|
|
first = first->NZE_R_N;
|
|
nb_var--;
|
|
int eq = index_vara[i];
|
|
yy = 0;
|
|
for (k = 0; k < nb_var; k++)
|
|
{
|
|
yy += y[index_vara[first->c_index]+it_*y_size]*u[first->u_index];
|
|
first = first->NZE_R_N;
|
|
}
|
|
yy = -(yy+y[eq+it_*y_size]+u[b[pos]]);
|
|
direction[eq+it_*y_size] = yy;
|
|
y[eq+it_*y_size] += slowc_l*yy;
|
|
}
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::CheckIt(int y_size, int y_kmin, int y_kmax, int Size, int periods)
|
|
{
|
|
const double epsilon = 1e-7;
|
|
fstream SaveResult;
|
|
ostringstream out;
|
|
out << "Result" << iter;
|
|
SaveResult.open(out.str().c_str(), ios::in);
|
|
if (!SaveResult.is_open())
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in CheckIt, Result file cannot be opened\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
mexPrintf("Reading Result...");
|
|
int row, col;
|
|
SaveResult >> row;
|
|
mexPrintf("row=%d\n", row);
|
|
SaveResult >> col;
|
|
mexPrintf("col=%d\n", col);
|
|
double G1a;
|
|
mexPrintf("Allocated\n");
|
|
NonZeroElem *first;
|
|
for (int j = 0; j < col; j++)
|
|
{
|
|
mexPrintf("j=%d ", j);
|
|
int nb_equ = At_Col(j, &first);
|
|
mexPrintf("nb_equ=%d\n", nb_equ);
|
|
int line;
|
|
if (first)
|
|
line = first->r_index;
|
|
else
|
|
line = -9999999;
|
|
for (int i = 0; i < row; i++)
|
|
{
|
|
SaveResult >> G1a;
|
|
if (line == i)
|
|
{
|
|
if (abs(u[first->u_index]/G1a-1) > epsilon)
|
|
mexPrintf("Problem at r=%d c=%d u[first->u_index]=%5.14f G1a[i][j]=%5.14f %f\n", i, j, u[first->u_index], G1a, u[first->u_index]/G1a-1);
|
|
first = first->NZE_C_N;
|
|
if (first)
|
|
line = first->r_index;
|
|
else
|
|
line = -9999999;
|
|
}
|
|
else
|
|
{
|
|
if (G1a != 0.0)
|
|
mexPrintf("Problem at r=%d c=%d G1a[i][j]=%f\n", i, j, G1a);
|
|
}
|
|
}
|
|
}
|
|
SaveResult >> row;
|
|
mexPrintf("row(2)=%d\n", row);
|
|
double *B;
|
|
B = (double *) mxMalloc(row*sizeof(double));
|
|
test_mxMalloc(B, __LINE__, __FILE__, __func__, row*sizeof(double));
|
|
for (int i = 0; i < row; i++)
|
|
SaveResult >> B[i];
|
|
SaveResult.close();
|
|
mexPrintf("done\n");
|
|
mexPrintf("Comparing...");
|
|
for (int i = 0; i < row; i++)
|
|
{
|
|
if (abs(u[b[i]]+B[i]) > epsilon)
|
|
mexPrintf("Problem at i=%d u[b[i]]=%f B[i]=%f\n", i, u[b[i]], B[i]);
|
|
}
|
|
mxFree(B);
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Check_the_Solution(int periods, int y_kmin, int y_kmax, int Size, double *u, int *pivot, int *b)
|
|
{
|
|
const double epsilon = 1e-10;
|
|
Init_GE(periods, y_kmin, y_kmax, Size, IM_i);
|
|
NonZeroElem *first;
|
|
int cal_y = y_kmin*Size;
|
|
mexPrintf(" ");
|
|
for (int i = 0; i < Size; i++)
|
|
mexPrintf(" %8d", i);
|
|
mexPrintf("\n");
|
|
for (int t = y_kmin; t < periods+y_kmin; t++)
|
|
{
|
|
mexPrintf("t=%5d", t);
|
|
for (int i = 0; i < Size; i++)
|
|
mexPrintf(" %d %1.6f", t*y_size+index_vara[i], y[t*y_size+index_vara[i]]);
|
|
mexPrintf("\n");
|
|
}
|
|
for (int i = 0; i < Size*periods; i++)
|
|
{
|
|
double res = 0;
|
|
int pos = pivot[i];
|
|
mexPrintf("pos[%d]=%d", i, pos);
|
|
int nb_var = At_Row(pos, &first);
|
|
mexPrintf(" nb_var=%d\n", nb_var);
|
|
for (int j = 0; j < nb_var; j++)
|
|
{
|
|
mexPrintf("(y[%d]=%f)*(u[%d]=%f)(r=%d, c=%d)\n", index_vara[first->c_index]+cal_y, y[index_vara[first->c_index]+cal_y], first->u_index, u[first->u_index], first->r_index, first->c_index);
|
|
res += y[index_vara[first->c_index]+cal_y]*u[first->u_index];
|
|
first = first->NZE_R_N;
|
|
}
|
|
double tmp_ = res;
|
|
res += u[b[pos]];
|
|
if (abs(res) > epsilon)
|
|
mexPrintf("Error for equation %d => res=%f y[%d]=%f u[b[%d]]=%f somme(y*u)=%f\n", pos, res, pos, y[index_vara[pos]], pos, u[b[pos]], tmp_);
|
|
}
|
|
}
|
|
|
|
mxArray *
|
|
dynSparseMatrix::substract_A_B(mxArray *A_m, mxArray *B_m)
|
|
{
|
|
size_t n_A = mxGetN(A_m);
|
|
size_t m_A = mxGetM(A_m);
|
|
double *A_d = mxGetPr(A_m);
|
|
size_t n_B = mxGetN(B_m);
|
|
double *B_d = mxGetPr(B_m);
|
|
mxArray *C_m = mxCreateDoubleMatrix(m_A, n_B, mxREAL);
|
|
double *C_d = mxGetPr(C_m);
|
|
for (int j = 0; j < (int) n_A; j++)
|
|
for (unsigned int i = 0; i < m_A; i++)
|
|
{
|
|
size_t index = j*m_A+i;
|
|
C_d[index] = A_d[index] - B_d[index];
|
|
}
|
|
return C_m;
|
|
}
|
|
|
|
mxArray *
|
|
dynSparseMatrix::Sparse_substract_A_SB(mxArray *A_m, mxArray *B_m)
|
|
{
|
|
size_t n_B = mxGetN(B_m);
|
|
size_t m_B = mxGetM(B_m);
|
|
mwIndex *B_i = mxGetIr(B_m);
|
|
mwIndex *B_j = mxGetJc(B_m);
|
|
size_t total_nze_B = B_j[n_B];
|
|
double *B_d = mxGetPr(B_m);
|
|
mxArray *C_m = mxDuplicateArray(A_m);
|
|
double *C_d = mxGetPr(C_m);
|
|
unsigned int nze_B = 0;
|
|
unsigned int B_col = 0;
|
|
while (nze_B < total_nze_B)
|
|
{
|
|
while (nze_B >= (unsigned int) B_j[B_col+1] && (nze_B < total_nze_B))
|
|
B_col++;
|
|
C_d[B_col*m_B+B_i[nze_B]] -= B_d[nze_B];
|
|
nze_B++;
|
|
}
|
|
return C_m;
|
|
}
|
|
|
|
mxArray *
|
|
dynSparseMatrix::Sparse_substract_SA_SB(mxArray *A_m, mxArray *B_m)
|
|
{
|
|
size_t n_A = mxGetN(A_m);
|
|
size_t m_A = mxGetM(A_m);
|
|
mwIndex *A_i = mxGetIr(A_m);
|
|
mwIndex *A_j = mxGetJc(A_m);
|
|
size_t total_nze_A = A_j[n_A];
|
|
double *A_d = mxGetPr(A_m);
|
|
size_t n_B = mxGetN(B_m);
|
|
mwIndex *B_i = mxGetIr(B_m);
|
|
mwIndex *B_j = mxGetJc(B_m);
|
|
size_t total_nze_B = B_j[n_B];
|
|
double *B_d = mxGetPr(B_m);
|
|
mxArray *C_m = mxCreateSparse(m_A, n_B, m_A*n_B, mxREAL);
|
|
mwIndex *C_i = mxGetIr(C_m);
|
|
mwIndex *C_j = mxGetJc(C_m);
|
|
double *C_d = mxGetPr(C_m);
|
|
unsigned int nze_B = 0, nze_C = 0, nze_A = 0;
|
|
unsigned int A_col = 0, B_col = 0, C_col = 0;
|
|
C_j[C_col] = 0;
|
|
while (nze_A < total_nze_A || nze_B < total_nze_B)
|
|
{
|
|
while (nze_A >= (unsigned int) A_j[A_col+1] && (nze_A < total_nze_A))
|
|
A_col++;
|
|
size_t A_row = A_i[nze_A];
|
|
while (nze_B >= (unsigned int) B_j[B_col+1] && (nze_B < total_nze_B))
|
|
B_col++;
|
|
size_t B_row = B_i[nze_B];
|
|
if (A_col == B_col)
|
|
{
|
|
if (A_row == B_row && (nze_B < total_nze_B && nze_A < total_nze_A))
|
|
{
|
|
C_d[nze_C] = A_d[nze_A++] - B_d[nze_B++];
|
|
C_i[nze_C] = A_row;
|
|
while (C_col < A_col)
|
|
C_j[++C_col] = nze_C;
|
|
C_j[A_col+1] = nze_C++;
|
|
C_col = A_col;
|
|
}
|
|
else if (A_row < B_row || (nze_B >= total_nze_B && nze_A < total_nze_A))
|
|
{
|
|
C_d[nze_C] = A_d[nze_A++];
|
|
C_i[nze_C] = A_row;
|
|
while (C_col < A_col)
|
|
C_j[++C_col] = nze_C;
|
|
C_j[A_col+1] = nze_C++;
|
|
C_col = A_col;
|
|
}
|
|
else
|
|
{
|
|
C_d[nze_C] = -B_d[nze_B++];
|
|
C_i[nze_C] = B_row;
|
|
while (C_col < B_col)
|
|
C_j[++C_col] = nze_C;
|
|
C_j[B_col+1] = nze_C++;
|
|
C_col = B_col;
|
|
}
|
|
}
|
|
else if (A_col < B_col || (nze_B >= total_nze_B && nze_A < total_nze_A))
|
|
{
|
|
C_d[nze_C] = A_d[nze_A++];
|
|
C_i[nze_C] = A_row;
|
|
while (C_col < A_col)
|
|
C_j[++C_col] = nze_C;
|
|
C_j[A_col+1] = nze_C++;
|
|
C_col = A_col;
|
|
}
|
|
else
|
|
{
|
|
C_d[nze_C] = -B_d[nze_B++];
|
|
C_i[nze_C] = B_row;
|
|
while (C_col < B_col)
|
|
C_j[++C_col] = nze_C;
|
|
C_j[B_col+1] = nze_C++;
|
|
C_col = B_col;
|
|
}
|
|
}
|
|
while (C_col < n_B)
|
|
C_j[++C_col] = nze_C;
|
|
mxSetNzmax(C_m, nze_C);
|
|
return C_m;
|
|
}
|
|
|
|
mxArray *
|
|
dynSparseMatrix::mult_SAT_B(mxArray *A_m, mxArray *B_m)
|
|
{
|
|
size_t n_A = mxGetN(A_m);
|
|
size_t m_A = mxGetM(A_m);
|
|
mwIndex *A_i = mxGetIr(A_m);
|
|
mwIndex *A_j = mxGetJc(A_m);
|
|
double *A_d = mxGetPr(A_m);
|
|
size_t n_B = mxGetN(B_m);
|
|
double *B_d = mxGetPr(B_m);
|
|
mxArray *C_m = mxCreateDoubleMatrix(m_A, n_B, mxREAL);
|
|
double *C_d = mxGetPr(C_m);
|
|
for (int j = 0; j < (int) n_B; j++)
|
|
{
|
|
for (unsigned int i = 0; i < n_A; i++)
|
|
{
|
|
double sum = 0;
|
|
size_t nze_A = A_j[i];
|
|
while (nze_A < (unsigned int) A_j[i+1])
|
|
{
|
|
size_t i_A = A_i[nze_A];
|
|
sum += A_d[nze_A++] * B_d[i_A];
|
|
}
|
|
C_d[j*n_A+i] = sum;
|
|
}
|
|
}
|
|
return C_m;
|
|
}
|
|
|
|
mxArray *
|
|
dynSparseMatrix::Sparse_mult_SAT_B(mxArray *A_m, mxArray *B_m)
|
|
{
|
|
size_t n_A = mxGetN(A_m);
|
|
size_t m_A = mxGetM(A_m);
|
|
mwIndex *A_i = mxGetIr(A_m);
|
|
mwIndex *A_j = mxGetJc(A_m);
|
|
double *A_d = mxGetPr(A_m);
|
|
size_t n_B = mxGetN(B_m);
|
|
size_t m_B = mxGetM(B_m);
|
|
double *B_d = mxGetPr(B_m);
|
|
mxArray *C_m = mxCreateSparse(m_A, n_B, m_A*n_B, mxREAL);
|
|
mwIndex *C_i = mxGetIr(C_m);
|
|
mwIndex *C_j = mxGetJc(C_m);
|
|
double *C_d = mxGetPr(C_m);
|
|
unsigned int nze_C = 0;
|
|
//unsigned int nze_A = 0;
|
|
unsigned int C_col = 0;
|
|
C_j[C_col] = 0;
|
|
//#pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
|
|
for (unsigned int j = 0; j < n_B; j++)
|
|
{
|
|
for (unsigned int i = 0; i < n_A; i++)
|
|
{
|
|
double sum = 0;
|
|
size_t nze_A = A_j[i];
|
|
while (nze_A < (unsigned int) A_j[i+1])
|
|
{
|
|
size_t i_A = A_i[nze_A];
|
|
sum += A_d[nze_A++] * B_d[i_A];
|
|
}
|
|
if (fabs(sum) > 1e-10)
|
|
{
|
|
C_d[nze_C] = sum;
|
|
C_i[nze_C] = i;
|
|
while (C_col < j)
|
|
C_j[++C_col] = nze_C;
|
|
nze_C++;
|
|
}
|
|
}
|
|
}
|
|
while (C_col < m_B)
|
|
C_j[++C_col] = nze_C;
|
|
mxSetNzmax(C_m, nze_C);
|
|
return C_m;
|
|
}
|
|
|
|
mxArray *
|
|
dynSparseMatrix::Sparse_mult_SAT_SB(mxArray *A_m, mxArray *B_m)
|
|
{
|
|
size_t n_A = mxGetN(A_m);
|
|
size_t m_A = mxGetM(A_m);
|
|
mwIndex *A_i = mxGetIr(A_m);
|
|
mwIndex *A_j = mxGetJc(A_m);
|
|
double *A_d = mxGetPr(A_m);
|
|
size_t n_B = mxGetN(B_m);
|
|
mwIndex *B_i = mxGetIr(B_m);
|
|
mwIndex *B_j = mxGetJc(B_m);
|
|
double *B_d = mxGetPr(B_m);
|
|
mxArray *C_m = mxCreateSparse(m_A, n_B, m_A*n_B, mxREAL);
|
|
mwIndex *C_i = mxGetIr(C_m);
|
|
mwIndex *C_j = mxGetJc(C_m);
|
|
double *C_d = mxGetPr(C_m);
|
|
size_t nze_B = 0, nze_C = 0, nze_A = 0;
|
|
unsigned int C_col = 0;
|
|
C_j[C_col] = 0;
|
|
for (unsigned int j = 0; j < n_B; j++)
|
|
{
|
|
for (unsigned int i = 0; i < n_A; i++)
|
|
{
|
|
double sum = 0;
|
|
nze_B = B_j[j];
|
|
nze_A = A_j[i];
|
|
while (nze_A < (unsigned int) A_j[i+1] && nze_B < (unsigned int) B_j[j+1])
|
|
{
|
|
size_t i_A = A_i[nze_A];
|
|
size_t i_B = B_i[nze_B];
|
|
if (i_A == i_B)
|
|
sum += A_d[nze_A++] * B_d[nze_B++];
|
|
else if (i_A < i_B)
|
|
nze_A++;
|
|
else
|
|
nze_B++;
|
|
}
|
|
if (fabs(sum) > 1e-10)
|
|
{
|
|
C_d[nze_C] = sum;
|
|
C_i[nze_C] = i;
|
|
while (C_col < j)
|
|
C_j[++C_col] = nze_C;
|
|
nze_C++;
|
|
}
|
|
}
|
|
}
|
|
while (C_col < n_B)
|
|
C_j[++C_col] = nze_C;
|
|
mxSetNzmax(C_m, nze_C);
|
|
return C_m;
|
|
}
|
|
|
|
mxArray *
|
|
dynSparseMatrix::Sparse_transpose(mxArray *A_m)
|
|
{
|
|
size_t n_A = mxGetN(A_m);
|
|
size_t m_A = mxGetM(A_m);
|
|
mwIndex *A_i = mxGetIr(A_m);
|
|
mwIndex *A_j = mxGetJc(A_m);
|
|
size_t total_nze_A = A_j[n_A];
|
|
double *A_d = mxGetPr(A_m);
|
|
mxArray *C_m = mxCreateSparse(n_A, m_A, total_nze_A, mxREAL);
|
|
mwIndex *C_i = mxGetIr(C_m);
|
|
mwIndex *C_j = mxGetJc(C_m);
|
|
double *C_d = mxGetPr(C_m);
|
|
unsigned int nze_C = 0, nze_A = 0;
|
|
memset(C_j, 0, m_A);
|
|
map<pair<mwIndex, unsigned int>, double> B2;
|
|
for (unsigned int i = 0; i < n_A; i++)
|
|
{
|
|
while (nze_A < (unsigned int) A_j[i+1])
|
|
{
|
|
C_j[A_i[nze_A]+1]++;
|
|
B2[make_pair(A_i[nze_A], i)] = A_d[nze_A];
|
|
nze_A++;
|
|
}
|
|
}
|
|
for (unsigned int i = 0; i < m_A; i++)
|
|
C_j[i+1] += C_j[i];
|
|
for (map<pair<mwIndex, unsigned int>, double>::const_iterator it = B2.begin(); it != B2.end(); it++)
|
|
{
|
|
C_d[nze_C] = it->second;
|
|
C_i[nze_C++] = it->first.second;
|
|
}
|
|
return C_m;
|
|
}
|
|
|
|
#define sign(a, b) ((b) >= 0.0 ? fabs(a) : -fabs(a))
|
|
bool
|
|
dynSparseMatrix::mnbrak(double *ax, double *bx, double *cx, double *fa, double *fb, double *fc)
|
|
{
|
|
const double GOLD = 1.618034;
|
|
const double GLIMIT = 100.0;
|
|
const double TINY = 1.0e-20;
|
|
|
|
double tmp;
|
|
mexPrintf("bracketing *ax=%f, *bx=%f\n", *ax, *bx);
|
|
//mexEvalString("drawnow;");
|
|
double ulim, u, r, q, fu;
|
|
if (!compute_complete(*ax, fa))
|
|
return false;
|
|
if (!compute_complete(*bx, fb))
|
|
return false;
|
|
if (*fb > *fa)
|
|
{
|
|
tmp = *ax;
|
|
*ax = *bx;
|
|
*bx = tmp;
|
|
|
|
tmp = *fa;
|
|
*fa = *fb;
|
|
*fb = tmp;
|
|
}
|
|
*cx = (*bx)+GOLD*(*bx-*ax);
|
|
if (!compute_complete(*cx, fc))
|
|
return false;
|
|
while (*fb > *fc)
|
|
{
|
|
r = (*bx-*ax)*(*fb-*fc);
|
|
q = (*bx-*cx)*(*fb-*fa);
|
|
u = (*bx)-((*bx-*cx)*q-(*bx-*ax)*r)
|
|
/(2.0*sign(fmax(fabs(q-r), TINY), q-r));
|
|
ulim = (*bx)+GLIMIT*(*cx-*bx);
|
|
if ((*bx-u)*(u-*cx) > 0.0)
|
|
{
|
|
if (!compute_complete(u, &fu))
|
|
return false;
|
|
if (fu < *fc)
|
|
{
|
|
*ax = (*bx);
|
|
*bx = u;
|
|
*fa = (*fb);
|
|
*fb = fu;
|
|
return true;
|
|
}
|
|
else if (fu > *fb)
|
|
{
|
|
*cx = u;
|
|
*fc = fu;
|
|
return true;
|
|
}
|
|
u = (*cx)+GOLD*(*cx-*bx);
|
|
if (!compute_complete(u, &fu))
|
|
return false;
|
|
}
|
|
else if ((*cx-u)*(u-ulim) > 0.0)
|
|
{
|
|
if (!compute_complete(u, &fu))
|
|
return false;
|
|
if (fu < *fc)
|
|
{
|
|
*bx = *cx;
|
|
*cx = u;
|
|
u = *cx+GOLD*(*cx-*bx);
|
|
*fb = *fc;
|
|
*fc = fu;
|
|
if (!compute_complete(u, &fu))
|
|
return false;
|
|
}
|
|
}
|
|
else if ((u-ulim)*(ulim-*cx) >= 0.0)
|
|
{
|
|
u = ulim;
|
|
if (!compute_complete(u, &fu))
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
u = (*cx)+GOLD*(*cx-*bx);
|
|
if (!compute_complete(u, &fu))
|
|
return false;
|
|
}
|
|
*ax = *bx;
|
|
*bx = *cx;
|
|
*cx = u;
|
|
*fa = *fb;
|
|
*fb = *fc;
|
|
*fc = fu;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
dynSparseMatrix::golden(double ax, double bx, double cx, double tol, double solve_tolf, double *xmin)
|
|
{
|
|
const double R = 0.61803399;
|
|
const double C = (1.0-R);
|
|
mexPrintf("golden\n");
|
|
//mexEvalString("drawnow;");
|
|
double f1, f2, x0, x1, x2, x3;
|
|
int iter = 0, max_iter = 100;
|
|
x0 = ax;
|
|
x3 = cx;
|
|
if (fabs(cx-bx) > fabs(bx-ax))
|
|
{
|
|
x1 = bx;
|
|
x2 = bx+C*(cx-bx);
|
|
}
|
|
else
|
|
{
|
|
x2 = bx;
|
|
x1 = bx-C*(bx-ax);
|
|
}
|
|
if (!compute_complete(x1, &f1))
|
|
return false;
|
|
if (!compute_complete(x2, &f2))
|
|
return false;
|
|
while ((fabs(x3-x0) > tol*(fabs(x1)+fabs(x2)) && (f1 > solve_tolf && f2 > solve_tolf)) && (iter < max_iter) && (abs(x1 - x2) > 1e-4))
|
|
{
|
|
if (f2 < f1)
|
|
{
|
|
x0 = x1;
|
|
x1 = x2;
|
|
x2 = R*x1+C*x3;
|
|
f1 = f2;
|
|
if (!compute_complete(x2, &f2))
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
x3 = x2;
|
|
x2 = x1;
|
|
x1 = R*x2+C*x0;
|
|
f2 = f1;
|
|
if (!compute_complete(x1, &f1))
|
|
return false;
|
|
}
|
|
iter++;
|
|
}
|
|
if (f1 < f2)
|
|
{
|
|
*xmin = x1;
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
*xmin = x2;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Solve_Matlab_Relaxation(mxArray *A_m, mxArray *b_m, unsigned int Size, double slowc_l, bool is_two_boundaries, int it_)
|
|
{
|
|
mxArray *B1, *C1, *A2, *B2, *A3, *b1, *b2;
|
|
double *b_m_d = mxGetPr(b_m);
|
|
if (!b_m_d)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Solve_Matlab_Relaxation, can't retrieve b_m vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
mwIndex *A_m_i = mxGetIr(A_m);
|
|
if (!A_m_i)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Solve_Matlab_Relaxation, can't allocate A_m_i index vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
mwIndex *A_m_j = mxGetJc(A_m);
|
|
if (!A_m_j)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Solve_Matlab_Relaxation, can't allocate A_m_j index vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
double *A_m_d = mxGetPr(A_m);
|
|
if (!A_m_d)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Solve_Matlab_Relaxation, can't retrieve A matrix\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
size_t max_nze = A_m_j[Size*periods];
|
|
unsigned int nze = 0;
|
|
size_t var = A_m_j[nze];
|
|
B1 = mxCreateSparse(Size, Size, Size*Size, mxREAL);
|
|
mwIndex *B1_i = mxGetIr(B1);
|
|
mwIndex *B1_j = mxGetJc(B1);
|
|
double *B1_d = mxGetPr(B1);
|
|
unsigned int B1_nze = 0;
|
|
unsigned int B1_var = 0;
|
|
B1_i[B1_nze] = 0;
|
|
B1_j[B1_var] = 0;
|
|
C1 = mxCreateSparse(Size, Size, Size*Size, mxREAL);
|
|
mwIndex *C1_i = mxGetIr(C1);
|
|
mwIndex *C1_j = mxGetJc(C1);
|
|
double *C1_d = mxGetPr(C1);
|
|
unsigned int C1_nze = 0;
|
|
unsigned int C1_var = 0;
|
|
C1_i[C1_nze] = 0;
|
|
C1_j[C1_var] = 0;
|
|
A2 = mxCreateSparse(Size, Size, Size*Size, mxREAL);
|
|
mwIndex *A2_i = mxGetIr(A2);
|
|
mwIndex *A2_j = mxGetJc(A2);
|
|
double *A2_d = mxGetPr(A2);
|
|
unsigned int A2_nze = 0;
|
|
unsigned int A2_var = 0;
|
|
A2_i[A2_nze] = 0;
|
|
A2_j[A2_var] = 0;
|
|
B2 = mxCreateSparse(Size, Size, Size*Size, mxREAL);
|
|
mwIndex *B2_i = mxGetIr(B2);
|
|
mwIndex *B2_j = mxGetJc(B2);
|
|
double *B2_d = mxGetPr(B2);
|
|
unsigned int B2_nze = 0;
|
|
unsigned int B2_var = 0;
|
|
B2_i[B2_nze] = 0;
|
|
B2_j[B2_var] = 0;
|
|
A3 = mxCreateSparse(Size, Size, Size*Size, mxREAL);
|
|
mwIndex *A3_i = mxGetIr(A3);
|
|
mwIndex *A3_j = mxGetJc(A3);
|
|
double *A3_d = mxGetPr(A3);
|
|
unsigned int A3_nze = 0;
|
|
unsigned int A3_var = 0;
|
|
A3_i[A3_nze] = 0;
|
|
A3_j[A3_var] = 0;
|
|
b1 = mxCreateDoubleMatrix(Size, 1, mxREAL);
|
|
double *b1_d = mxGetPr(b1);
|
|
b2 = mxCreateDoubleMatrix(Size, 1, mxREAL);
|
|
double *b2_d = mxGetPr(b2);
|
|
size_t eq = 0;
|
|
/*B1 C1
|
|
A2 B2
|
|
A3*/
|
|
while (var < 2*Size && nze < max_nze)
|
|
{
|
|
if ((unsigned int) A_m_j[var+1] <= nze)
|
|
{
|
|
if (var < Size)
|
|
b1_d[var] = b_m_d[var];
|
|
else
|
|
b2_d[var - Size] = b_m_d[var];
|
|
var++;
|
|
}
|
|
eq = A_m_i[nze];
|
|
if (var < Size)
|
|
{
|
|
if (eq < Size)
|
|
{
|
|
while (B1_var < var)
|
|
B1_j[++B1_var] = B1_nze;
|
|
B1_i[B1_nze] = eq;
|
|
B1_d[B1_nze] = A_m_d[nze];
|
|
B1_nze++;
|
|
}
|
|
else
|
|
{
|
|
while (A2_var < var)
|
|
A2_j[++A2_var] = A2_nze;
|
|
A2_i[A2_nze] = eq - Size;
|
|
A2_d[A2_nze] = A_m_d[nze];
|
|
A2_nze++;
|
|
}
|
|
}
|
|
else if (var < 2*Size)
|
|
{
|
|
if (eq < Size)
|
|
{
|
|
while (C1_var < var - Size)
|
|
C1_j[++C1_var] = C1_nze;
|
|
C1_i[C1_nze] = eq;
|
|
C1_d[C1_nze] = A_m_d[nze];
|
|
C1_nze++;
|
|
}
|
|
else if (eq < 2*Size)
|
|
{
|
|
while (B2_var < var - Size)
|
|
B2_j[++B2_var] = B2_nze;
|
|
B2_i[B2_nze] = eq - Size;
|
|
B2_d[B2_nze] = A_m_d[nze];
|
|
B2_nze++;
|
|
}
|
|
else
|
|
{
|
|
while (A3_var < var - Size)
|
|
A3_j[++A3_var] = A3_nze;
|
|
A3_i[A3_nze] = eq - 2*Size;
|
|
A3_d[A3_nze] = A_m_d[nze];
|
|
A3_nze++;
|
|
}
|
|
}
|
|
nze++;
|
|
}
|
|
while (B1_var < Size)
|
|
B1_j[++B1_var] = B1_nze;
|
|
while (C1_var < Size)
|
|
C1_j[++C1_var] = C1_nze;
|
|
while (A2_var < Size)
|
|
A2_j[++A2_var] = A2_nze;
|
|
while (B2_var < Size)
|
|
B2_j[++B2_var] = B2_nze;
|
|
while (A3_var < Size)
|
|
A3_j[++A3_var] = A3_nze;
|
|
mxArray *d1 = NULL;
|
|
vector<pair<mxArray *, mxArray *> > triangular_form;
|
|
double sumc = 0, C_sumc = 1000;
|
|
mxArray *B1_inv = NULL;
|
|
mxArray *B1_inv_t = NULL;
|
|
for (int t = 1; t <= periods; t++)
|
|
{
|
|
if (abs(sumc / C_sumc -1) > 1e-10*res1)
|
|
{
|
|
C_sumc = sumc;
|
|
if (B1_inv)
|
|
mxDestroyArray(B1_inv);
|
|
mexCallMATLAB(1, &B1_inv, 1, &B1, "inv");
|
|
mwIndex *B_inv_j = mxGetJc(B1_inv);
|
|
size_t B_inv_nze = B_inv_j[Size];
|
|
double *B_inv_d = mxGetPr(B1_inv);
|
|
sumc = 0;
|
|
for (unsigned int i = 0; i < B_inv_nze; i++)
|
|
sumc += fabs(B_inv_d[i]);
|
|
}
|
|
B1_inv_t = Sparse_transpose(B1_inv);
|
|
mxArray *S1 = Sparse_mult_SAT_SB(B1_inv_t, C1);
|
|
|
|
d1 = mult_SAT_B(B1_inv_t, b1);
|
|
if (t < periods)
|
|
//Computation for the next lines
|
|
{
|
|
mxDestroyArray(B1_inv_t);
|
|
mxArray *A2_t = Sparse_transpose(A2);
|
|
mxDestroyArray(A2);
|
|
|
|
mxArray *tmp = Sparse_mult_SAT_SB(A2_t, S1);
|
|
mxDestroyArray(B1);
|
|
B1 = Sparse_substract_SA_SB(B2, tmp);
|
|
mxDestroyArray(tmp);
|
|
|
|
tmp = mult_SAT_B(A2_t, d1);
|
|
b1 = substract_A_B(b2, tmp);
|
|
mxDestroyArray(tmp);
|
|
|
|
triangular_form.push_back(make_pair(S1, d1));
|
|
mxDestroyArray(A2_t);
|
|
}
|
|
A2 = mxDuplicateArray(A3);
|
|
|
|
//I S1
|
|
//0 B1 C1 =>B1 =
|
|
// A2 B2 => A2 = A3
|
|
// A3
|
|
C1_nze = B2_nze = A3_nze = 0;
|
|
C1_var = B2_var = A3_var = 0;
|
|
|
|
if (nze < max_nze)
|
|
nze--;
|
|
while (var < (t+2)*Size && nze < max_nze)
|
|
{
|
|
if ((unsigned int) A_m_j[var+1] <= nze)
|
|
{
|
|
b2_d[var - (t+1) * Size] = b_m_d[var];
|
|
var++;
|
|
}
|
|
eq = A_m_i[nze];
|
|
if (eq < (t+1) * Size)
|
|
{
|
|
C1_d[C1_nze] = A_m_d[nze];
|
|
C1_nze++;
|
|
}
|
|
else if (eq < (t+2)*Size)
|
|
{
|
|
B2_d[B2_nze] = A_m_d[nze];
|
|
B2_nze++;
|
|
}
|
|
else
|
|
{
|
|
A3_d[A3_nze] = A_m_d[nze];
|
|
A3_nze++;
|
|
}
|
|
nze++;
|
|
}
|
|
}
|
|
double *d1_d = mxGetPr(d1);
|
|
for (unsigned i = 0; i < Size; i++)
|
|
{
|
|
int eq = index_vara[i+Size*(y_kmin+periods-1)];
|
|
double yy = -(d1_d[i] + y[eq]);
|
|
direction[eq] = yy;
|
|
y[eq] += slowc_l * yy;
|
|
}
|
|
|
|
pair<mxArray *, mxArray *> tf;
|
|
for (int t = periods-2; t >= 0; t--)
|
|
{
|
|
mxArray *tmp;
|
|
tf = triangular_form.back();
|
|
triangular_form.pop_back();
|
|
mxArray *tf_first_t = Sparse_transpose(tf.first);
|
|
mxDestroyArray(tf.first);
|
|
tmp = mult_SAT_B(tf_first_t, d1);
|
|
d1 = substract_A_B(tf.second, tmp);
|
|
d1_d = mxGetPr(d1);
|
|
mxDestroyArray(tmp);
|
|
for (unsigned i = 0; i < Size; i++)
|
|
{
|
|
int eq = index_vara[i+Size*(y_kmin+t)];
|
|
double yy = -(d1_d[i] + y[eq]);
|
|
direction[eq] = yy;
|
|
y[eq] += slowc_l * yy;
|
|
}
|
|
mxDestroyArray(tf_first_t);
|
|
mxDestroyArray(tf.second);
|
|
}
|
|
mxDestroyArray(B1);
|
|
mxDestroyArray(C1);
|
|
mxDestroyArray(A2);
|
|
mxDestroyArray(B2);
|
|
mxDestroyArray(A3);
|
|
mxDestroyArray(b1);
|
|
mxDestroyArray(b2);
|
|
mxDestroyArray(A_m);
|
|
mxDestroyArray(b_m);
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Solve_Matlab_LU_UMFPack(mxArray *A_m, mxArray *b_m, int Size, double slowc_l, bool is_two_boundaries, int it_)
|
|
{
|
|
size_t n = mxGetM(A_m);
|
|
mxArray *z;
|
|
mxArray *rhs[2];
|
|
rhs[0] = A_m;
|
|
rhs[1] = b_m;
|
|
mexCallMATLAB(1, &z, 2, rhs, "mldivide");
|
|
double *res = mxGetPr(z);
|
|
if (is_two_boundaries)
|
|
for (int i = 0; i < (int) n; i++)
|
|
{
|
|
int eq = index_vara[i+Size*y_kmin];
|
|
double yy = -(res[i] + y[eq]);
|
|
direction[eq] = yy;
|
|
y[eq] += slowc_l * yy;
|
|
}
|
|
else
|
|
for (int i = 0; i < (int) n; i++)
|
|
{
|
|
int eq = index_vara[i];
|
|
double yy = -(res[i] + y[eq+it_*y_size]);
|
|
direction[eq] = yy;
|
|
y[eq+it_*y_size] += slowc_l * yy;
|
|
}
|
|
mxDestroyArray(A_m);
|
|
mxDestroyArray(b_m);
|
|
mxDestroyArray(z);
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::End_Matlab_LU_UMFPack()
|
|
{
|
|
if (Symbolic)
|
|
umfpack_dl_free_symbolic(&Symbolic);
|
|
if (Numeric)
|
|
umfpack_dl_free_numeric(&Numeric);
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::End_Solver()
|
|
{
|
|
if (((stack_solve_algo == 0 || stack_solve_algo == 4) && !steady_state) || (solve_algo == 6 && steady_state))
|
|
End_Matlab_LU_UMFPack();
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Printfull_UMFPack(SuiteSparse_long *Ap, SuiteSparse_long *Ai, double *Ax, double *b, int n)
|
|
{
|
|
double A[n*n];
|
|
for (int i = 0; i < n*n; i++)
|
|
A[i] = 0;
|
|
int k = 0;
|
|
for (int i = 0; i < n; i++)
|
|
for (int j = Ap[i]; j < Ap[i+1]; j++)
|
|
A[Ai[j] * n + i] = Ax[k++];
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
for (int j = 0; j < n; j++)
|
|
mexPrintf("%4.1f ", A[i*n+j]);
|
|
mexPrintf(" %6.3f\n", b[i]);
|
|
}
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Print_UMFPack(SuiteSparse_long *Ap, SuiteSparse_long *Ai, double *Ax, int n)
|
|
{
|
|
int k = 0;
|
|
for (int i = 0; i < n; i++)
|
|
for (int j = Ap[i]; j < Ap[i+1]; j++)
|
|
mexPrintf("(%d, %d) %f\n", Ai[j]+1, i+1, Ax[k++]);
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Solve_LU_UMFPack(SuiteSparse_long *Ap, SuiteSparse_long *Ai, double *Ax, double *b, int n, int Size, double slowc_l, bool is_two_boundaries, int it_, vector_table_conditional_local_type vector_table_conditional_local)
|
|
{
|
|
SuiteSparse_long status, sys = 0;
|
|
#ifndef _MSC_VER
|
|
double Control [UMFPACK_CONTROL], Info [UMFPACK_INFO], res [n];
|
|
#else
|
|
double *Control, *Info, *res;
|
|
Control = (double *) mxMalloc(UMFPACK_CONTROL * sizeof(double));
|
|
test_mxMalloc(Control, __LINE__, __FILE__, __func__, UMFPACK_CONTROL * sizeof(double));
|
|
Info = (double *) mxMalloc(UMFPACK_INFO * sizeof(double));
|
|
test_mxMalloc(Info, __LINE__, __FILE__, __func__, UMFPACK_INFO * sizeof(double));
|
|
res = (double *) mxMalloc(n * sizeof(double));
|
|
test_mxMalloc(res, __LINE__, __FILE__, __func__, n * sizeof(double));
|
|
#endif
|
|
|
|
umfpack_dl_defaults(Control);
|
|
Control [UMFPACK_PRL] = 5;
|
|
status = 0;
|
|
if (iter == 0)
|
|
{
|
|
status = umfpack_dl_symbolic(n, n, Ap, Ai, Ax, &Symbolic, Control, Info);
|
|
if (status < 0)
|
|
{
|
|
umfpack_dl_report_info(Control, Info);
|
|
umfpack_dl_report_status(Control, status);
|
|
ostringstream Error;
|
|
Error << " umfpack_dl_symbolic failed\n";
|
|
throw FatalExceptionHandling(Error.str());
|
|
}
|
|
}
|
|
if (iter > 0)
|
|
umfpack_dl_free_numeric(&Numeric);
|
|
status = umfpack_dl_numeric(Ap, Ai, Ax, Symbolic, &Numeric, Control, Info);
|
|
if (status < 0)
|
|
{
|
|
umfpack_dl_report_info(Control, Info);
|
|
umfpack_dl_report_status(Control, status);
|
|
ostringstream Error;
|
|
Error << " umfpack_dl_numeric failed\n";
|
|
throw FatalExceptionHandling(Error.str());
|
|
}
|
|
status = umfpack_dl_solve(sys, Ap, Ai, Ax, res, b, Numeric, Control, Info);
|
|
if (status != UMFPACK_OK)
|
|
{
|
|
umfpack_dl_report_info(Control, Info);
|
|
umfpack_dl_report_status(Control, status);
|
|
ostringstream Error;
|
|
Error << " umfpack_dl_solve failed\n";
|
|
throw FatalExceptionHandling(Error.str());
|
|
}
|
|
|
|
if (vector_table_conditional_local.size())
|
|
{
|
|
if (is_two_boundaries)
|
|
for (int t = 0; t < n / Size; t++)
|
|
if (t == 0)
|
|
{
|
|
for (int i = 0; i < Size; i++)
|
|
{
|
|
bool fliped = vector_table_conditional_local[i].is_cond;
|
|
if (fliped)
|
|
{
|
|
int eq = index_vara[i+Size*(y_kmin)];
|
|
int flip_exo = vector_table_conditional_local[i].var_exo;
|
|
double yy = -(res[i] + x[y_kmin + flip_exo*nb_row_x]);
|
|
direction[eq] = 0;
|
|
x[flip_exo*nb_row_x + y_kmin] += slowc_l * yy;
|
|
}
|
|
else
|
|
{
|
|
int eq = index_vara[i+Size*(y_kmin)];
|
|
double yy = -(res[i ] + y[eq]);
|
|
direction[eq] = yy;
|
|
y[eq] += slowc_l * yy;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int i = 0; i < Size; i++)
|
|
{
|
|
int eq = index_vara[i+Size*(t + y_kmin)];
|
|
double yy = -(res[i + Size * t] + y[eq]);
|
|
direction[eq] = yy;
|
|
y[eq] += slowc_l * yy;
|
|
}
|
|
}
|
|
else
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
int eq = index_vara[i];
|
|
double yy = -(res[i] + y[eq+it_*y_size]);
|
|
direction[eq] = yy;
|
|
y[eq+it_*y_size] += slowc_l * yy;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (is_two_boundaries)
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
int eq = index_vara[i+Size*y_kmin];
|
|
double yy = -(res[i] + y[eq]);
|
|
direction[eq] = yy;
|
|
y[eq] += slowc_l * yy;
|
|
}
|
|
else
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
int eq = index_vara[i];
|
|
double yy = -(res[i] + y[eq+it_*y_size]);
|
|
direction[eq] = yy;
|
|
y[eq+it_*y_size] += slowc_l * yy;
|
|
}
|
|
}
|
|
|
|
mxFree(Ap);
|
|
mxFree(Ai);
|
|
mxFree(Ax);
|
|
mxFree(b);
|
|
#ifdef _MSC_VER
|
|
mxFree(Control);
|
|
mxFree(Info);
|
|
mxFree(res);
|
|
#endif
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Solve_LU_UMFPack(SuiteSparse_long *Ap, SuiteSparse_long *Ai, double *Ax, double *b, int n, int Size, double slowc_l, bool is_two_boundaries, int it_)
|
|
{
|
|
SuiteSparse_long status, sys = 0;
|
|
#ifndef _MSC_VER
|
|
double Control [UMFPACK_CONTROL], Info [UMFPACK_INFO], res [n];
|
|
#else
|
|
double *Control, *Info, *res;
|
|
Control = (double *) mxMalloc(UMFPACK_CONTROL * sizeof(double));
|
|
test_mxMalloc(Control, __LINE__, __FILE__, __func__, UMFPACK_CONTROL * sizeof(double));
|
|
Info = (double *) mxMalloc(UMFPACK_INFO * sizeof(double));
|
|
test_mxMalloc(Info, __LINE__, __FILE__, __func__, UMFPACK_INFO * sizeof(double));
|
|
res = (double *) mxMalloc(n * sizeof(double));
|
|
test_mxMalloc(res, __LINE__, __FILE__, __func__, n * sizeof(double));
|
|
#endif
|
|
|
|
umfpack_dl_defaults(Control);
|
|
Control [UMFPACK_PRL] = 5;
|
|
status = 0;
|
|
if (iter == 0)
|
|
{
|
|
status = umfpack_dl_symbolic(n, n, Ap, Ai, Ax, &Symbolic, Control, Info);
|
|
if (status < 0)
|
|
{
|
|
umfpack_dl_report_info(Control, Info);
|
|
umfpack_dl_report_status(Control, status);
|
|
ostringstream Error;
|
|
Error << " umfpack_dl_symbolic failed\n";
|
|
throw FatalExceptionHandling(Error.str());
|
|
}
|
|
}
|
|
if (iter > 0)
|
|
umfpack_dl_free_numeric(&Numeric);
|
|
status = umfpack_dl_numeric(Ap, Ai, Ax, Symbolic, &Numeric, Control, Info);
|
|
if (status < 0)
|
|
{
|
|
umfpack_dl_report_info(Control, Info);
|
|
umfpack_dl_report_status(Control, status);
|
|
ostringstream Error;
|
|
Error << " umfpack_dl_numeric failed\n";
|
|
throw FatalExceptionHandling(Error.str());
|
|
}
|
|
status = umfpack_dl_solve(sys, Ap, Ai, Ax, res, b, Numeric, Control, Info);
|
|
if (status != UMFPACK_OK)
|
|
{
|
|
umfpack_dl_report_info(Control, Info);
|
|
umfpack_dl_report_status(Control, status);
|
|
ostringstream Error;
|
|
Error << " umfpack_dl_solve failed\n";
|
|
throw FatalExceptionHandling(Error.str());
|
|
}
|
|
|
|
if (is_two_boundaries)
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
int eq = index_vara[i+Size*y_kmin];
|
|
double yy = -(res[i] + y[eq]);
|
|
direction[eq] = yy;
|
|
y[eq] += slowc_l * yy;
|
|
}
|
|
else
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
int eq = index_vara[i];
|
|
double yy = -(res[i] + y[eq+it_*y_size]);
|
|
direction[eq] = yy;
|
|
y[eq+it_*y_size] += slowc_l * yy;
|
|
}
|
|
mxFree(Ap);
|
|
mxFree(Ai);
|
|
mxFree(Ax);
|
|
mxFree(b);
|
|
#ifdef _MSC_VER
|
|
mxFree(Control);
|
|
mxFree(Info);
|
|
mxFree(res);
|
|
#endif
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Solve_LU_UMFPack(mxArray *A_m, mxArray *b_m, int Size, double slowc_l, bool is_two_boundaries, int it_)
|
|
{
|
|
SuiteSparse_long n = mxGetM(A_m);
|
|
|
|
SuiteSparse_long *Ap = (SuiteSparse_long *) mxGetJc(A_m);
|
|
|
|
SuiteSparse_long *Ai = (SuiteSparse_long *) mxGetIr(A_m);
|
|
double *Ax = mxGetPr(A_m);
|
|
double *B = mxGetPr(b_m);
|
|
SuiteSparse_long status, sys = 0;
|
|
#ifndef _MSC_VER
|
|
double Control [UMFPACK_CONTROL], Info [UMFPACK_INFO], res [n];
|
|
#else
|
|
double *Control, *Info, *res;
|
|
Control = (double *) mxMalloc(UMFPACK_CONTROL * sizeof(double));
|
|
test_mxMalloc(Control, __LINE__, __FILE__, __func__, UMFPACK_CONTROL * sizeof(double));
|
|
Info = (double *) mxMalloc(UMFPACK_INFO * sizeof(double));
|
|
test_mxMalloc(Info, __LINE__, __FILE__, __func__, UMFPACK_INFO * sizeof(double));
|
|
res = (double *) mxMalloc(n * sizeof(double));
|
|
test_mxMalloc(res, __LINE__, __FILE__, __func__, n * sizeof(double));
|
|
#endif
|
|
void *Symbolic, *Numeric;
|
|
umfpack_dl_defaults(Control);
|
|
|
|
status = umfpack_dl_symbolic(n, n, Ap, Ai, Ax, &Symbolic, Control, Info);
|
|
if (status != UMFPACK_OK)
|
|
umfpack_dl_report_info((double *) NULL, Info);
|
|
|
|
status = umfpack_dl_numeric(Ap, Ai, Ax, Symbolic, &Numeric, Control, Info);
|
|
if (status != UMFPACK_OK)
|
|
umfpack_dl_report_info((double *) NULL, Info);
|
|
|
|
status = umfpack_dl_solve(sys, Ap, Ai, Ax, res, B, Numeric, Control, Info);
|
|
if (status != UMFPACK_OK)
|
|
umfpack_dl_report_info((double *) NULL, Info);
|
|
//double *res = mxGetPr(z);
|
|
if (is_two_boundaries)
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
int eq = index_vara[i+Size*y_kmin];
|
|
double yy = -(res[i] + y[eq]);
|
|
direction[eq] = yy;
|
|
y[eq] += slowc_l * yy;
|
|
}
|
|
else
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
int eq = index_vara[i];
|
|
double yy = -(res[i] + y[eq+it_*y_size]);
|
|
direction[eq] = yy;
|
|
y[eq+it_*y_size] += slowc_l * yy;
|
|
}
|
|
mxDestroyArray(A_m);
|
|
mxDestroyArray(b_m);
|
|
#ifdef _MSC_VER
|
|
mxFree(Control);
|
|
mxFree(Info);
|
|
mxFree(res);
|
|
#endif
|
|
|
|
}
|
|
|
|
#ifdef CUDA
|
|
void
|
|
printM(int n, double *Ax, int *Ap, int *Ai, cusparseMatDescr_t descrA, cusparseHandle_t cusparse_handle)
|
|
{
|
|
//cudaError_t cuda_error;
|
|
//cusparseStatus_t cusparse_status;
|
|
double *A_dense;
|
|
cudaChk(cudaMalloc((void **) &A_dense, n * n *sizeof(double)), "A_dense cudaMalloc has failed\n");
|
|
|
|
cusparseChk(cusparseDcsr2dense(cusparse_handle, n, n, descrA,
|
|
Ax, Ap, Ai, A_dense, n), "cusparseDcsr2dense has failed\n");
|
|
double *A_dense_hoste = (double *) mxMalloc(n * n * sizeof(double));
|
|
test_mxMalloc(A_dense_hoste, __LINE__, __FILE__, __func__, n * n * sizeof(double));
|
|
cudaChk(cudaMemcpy(A_dense_hoste, A_dense, n * n * sizeof(double), cudaMemcpyDeviceToHost), " cudaMemcpy(A_dense_hoste, A_dense) has failed\n");
|
|
mexPrintf("----------------------\n");
|
|
mexPrintf("FillMode=%d, IndexBase=%d, MatType=%d, DiagType=%d\n", cusparseGetMatFillMode(descrA), cusparseGetMatIndexBase(descrA), cusparseGetMatType(descrA), cusparseGetMatDiagType(descrA));
|
|
//mexEvalString("drawnow;");
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
for (int j = 0; j < n; j++)
|
|
mexPrintf("%-6.3f ", A_dense_hoste[i + j * n]);
|
|
mexPrintf("\n");
|
|
}
|
|
mxFree(A_dense_hoste);
|
|
cudaChk(cudaFree(A_dense), "cudaFree(A_dense) has failed\n");
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Solve_CUDA_BiCGStab_Free(double *tmp_vect_host, double *p, double *r, double *v, double *s, double *t, double *y_, double *z, double *tmp_,
|
|
int *Ai, double *Ax, int *Ap, double *x0, double *b, double *A_tild, int *A_tild_i, int *A_tild_p/*, double* Lx, int* Li, int* Lp,
|
|
double* Ux, int* Ui, int* Up, int* device_n*/, cusparseSolveAnalysisInfo_t infoL, cusparseSolveAnalysisInfo_t infoU,
|
|
cusparseMatDescr_t descrL, cusparseMatDescr_t descrU, int preconditioner)
|
|
{
|
|
//cudaError_t cuda_error;
|
|
//cusparseStatus_t cusparse_status;
|
|
mxFree(tmp_vect_host);
|
|
cudaChk(cudaFree(p), " in Solve_Cuda_BiCGStab, can't free p\n");
|
|
cudaChk(cudaFree(r), " in Solve_Cuda_BiCGStab, can't free r\n");
|
|
cudaChk(cudaFree(v), " in Solve_Cuda_BiCGStab, can't free v\n");
|
|
cudaChk(cudaFree(s), " in Solve_Cuda_BiCGStab, can't free s\n");
|
|
cudaChk(cudaFree(t), " in Solve_Cuda_BiCGStab, can't free t\n");
|
|
cudaChk(cudaFree(y_), " in Solve_Cuda_BiCGStab, can't free y_\n");
|
|
cudaChk(cudaFree(z), " in Solve_Cuda_BiCGStab, can't free z\n");
|
|
cudaChk(cudaFree(tmp_), " in Solve_Cuda_BiCGStab, can't free tmp_\n");
|
|
cudaChk(cudaFree(Ai), " in Solve_Cuda_BiCGStab, can't free Ai\n");
|
|
cudaChk(cudaFree(Ax), " in Solve_Cuda_BiCGStab, can't free Ax\n");
|
|
cudaChk(cudaFree(Ap), " in Solve_Cuda_BiCGStab, can't free Ap\n");
|
|
cudaChk(cudaFree(x0), " in Solve_Cuda_BiCGStab, can't free x0\n");
|
|
cudaChk(cudaFree(b), " in Solve_Cuda_BiCGStab, can't free b\n");
|
|
/*if (preconditioner == 0)
|
|
{*/
|
|
cudaChk(cudaFree(A_tild), " in Solve_Cuda_BiCGStab, can't free A_tild (1)\n");
|
|
cudaChk(cudaFree(A_tild_i), " in Solve_Cuda_BiCGStab, can't free A_tild_i (1)\n");
|
|
cudaChk(cudaFree(A_tild_p), " in Solve_Cuda_BiCGStab, can't free A_tild_p (1)\n");
|
|
/*}
|
|
else
|
|
{
|
|
cudaChk(cudaFree(Lx), " in Solve_Cuda_BiCGStab, can't free Lx\n");
|
|
cudaChk(cudaFree(Li), " in Solve_Cuda_BiCGStab, can't free Li\n");
|
|
cudaChk(cudaFree(Lp), " in Solve_Cuda_BiCGStab, can't free Lp\n");
|
|
cudaChk(cudaFree(Ux), " in Solve_Cuda_BiCGStab, can't free Ux\n");
|
|
cudaChk(cudaFree(Ui), " in Solve_Cuda_BiCGStab, can't free Ui\n");
|
|
cudaChk(cudaFree(Up), " in Solve_Cuda_BiCGStab, can't free Up\n");
|
|
}*/
|
|
//cudaChk(cudaFree(device_n), " in Solve_Cuda_BiCGStab, can't free device_n\n");
|
|
if (preconditioner == 1 || preconditioner == 2 || preconditioner == 3)
|
|
{
|
|
cusparseChk(cusparseDestroySolveAnalysisInfo(infoL),
|
|
" in Solve_Cuda_BiCGStab, cusparseDestroySolveAnalysisInfo has failed for infoL\n");
|
|
cusparseChk(cusparseDestroySolveAnalysisInfo(infoU),
|
|
" in Solve_Cuda_BiCGStab, cusparseDestroySolveAnalysisInfo has failed for infoU\n");
|
|
}
|
|
cusparseChk(cusparseDestroyMatDescr(descrL),
|
|
" in Solve_Cuda_BiCGStab, matrix descriptor destruction failed for descrL\n");
|
|
cusparseChk(cusparseDestroyMatDescr(descrU),
|
|
" in Solve_Cuda_BiCGStab, matrix descriptor destruction failed for descrU\n");
|
|
}
|
|
#endif
|
|
|
|
void
|
|
Solve(double *Ax, int *Ap, int *Ai, double *b, int n, bool Lower, double *x)
|
|
{
|
|
if (Lower)
|
|
{
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
double sum = 0;
|
|
for (int j = Ap[i]; j < Ap[i+1]; j++)
|
|
{
|
|
int k = Ai[j];
|
|
if (k < i)
|
|
sum += x[k] * Ax[j];
|
|
}
|
|
x[i] = b[i] - sum;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int i = n-1; i >= 0; i--)
|
|
{
|
|
double sum = 0, mul = 1;
|
|
for (int j = Ap[i]; j < Ap[i+1]; j++)
|
|
{
|
|
int k = Ai[j];
|
|
if (k > i)
|
|
sum += x[k] * Ax[j];
|
|
else if (k == i)
|
|
mul = Ax[j];
|
|
}
|
|
x[i] = (b[i] - sum) / mul;
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
Check(int n, double *Ax, int *Ap, int *Ai, double *b, double *x, bool Lower)
|
|
{
|
|
if (Lower)
|
|
{
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
double sum = 0;
|
|
for (int j = Ap[i]; j < Ap[i+1]; j++)
|
|
{
|
|
int k = Ai[j];
|
|
if (k < i)
|
|
sum += x[k] * Ax[j];
|
|
}
|
|
double err = b[i] - sum - x[i];
|
|
if (abs(err) > 1e-10)
|
|
mexPrintf("error at i=%d\n", i);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int i = n-1; i >= 0; i--)
|
|
{
|
|
double sum = 0;
|
|
for (int j = Ap[i]; j < Ap[i+1]; j++)
|
|
{
|
|
int k = Ai[j];
|
|
if (k >= i)
|
|
sum += x[k] * Ax[j];
|
|
}
|
|
double err = b[i] - sum;
|
|
if (abs(err) > 1e-10)
|
|
mexPrintf("error at i=%d\n", i);
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef CUDA
|
|
int
|
|
dynSparseMatrix::Solve_CUDA_BiCGStab(int *Ap, int *Ai, double *Ax, int *Ap_tild, int *Ai_tild, double *A_tild, double *b, double *x0, int n, int Size, double slowc_l, bool is_two_boundaries,
|
|
int it_, int nnz, int nnz_tild, int preconditioner, int max_iterations, int block)
|
|
{
|
|
cusparseSolveAnalysisInfo_t info, infoL, infoU;
|
|
cusparseMatDescr_t descrL, descrU;
|
|
const double tol = 1.0e-6;//1.0e-6;
|
|
const double eps = 1.0e-16;
|
|
double *p, *r, *r0, *v, *s, *t, *y_, *z, *tmp_;
|
|
int *A_tild_i, *A_tild_p;
|
|
double *Qx;
|
|
int *Qi, *Qj;
|
|
double *Px;
|
|
int *Pi, *Pj;
|
|
int Q_nnz, P_nnz;
|
|
int W_nnz;
|
|
double bnorm;
|
|
double tmp1, tmp2;
|
|
int refinement_needed = 0, stagnation = 0;
|
|
int max_refinement = min(min(int (floor(double (n)/50)), 10), n-max_iterations), max_stagnation = 3;
|
|
int nblocks = ceil(double (n) / double (1024));
|
|
int n_threads;
|
|
if (nblocks == 0)
|
|
n_threads = n;
|
|
else
|
|
n_threads = 1024;
|
|
int periods = n / Size;
|
|
|
|
double *tmp_vect_host = (double *) mxMalloc(n * sizeof(double));
|
|
test_mxMalloc(tmp_vect_host, __LINE__, __FILE__, __func__, n * sizeof(double));
|
|
|
|
cublasChk(cublasDnrm2(cublas_handle, n, b, 1, &bnorm),
|
|
" in Solve_Cuda_BiCGStab, cublasDnrm2(b) has failed\n");
|
|
|
|
double tolb = tol * bnorm;
|
|
|
|
if (bnorm == 0.0)
|
|
{
|
|
// if b = 0 the A.x = 0 => x = 0
|
|
cudaChk(cudaFree(Ai), " in Solve_Cuda_BiCGStab, can't free Ai\n");
|
|
cudaChk(cudaFree(Ax), " in Solve_Cuda_BiCGStab, can't free Ax\n");
|
|
cudaChk(cudaFree(Ap), " in Solve_Cuda_BiCGStab, can't free Ap\n");
|
|
if (preconditioner == 3)
|
|
{
|
|
cudaChk(cudaFree(Ai_tild), " in Solve_Cuda_BiCGStab, can't free Ai_tild\n");
|
|
cudaChk(cudaFree(Ap_tild), " in Solve_Cuda_BiCGStab, can't free Ap_tild\n");
|
|
}
|
|
cudaChk(cudaFree(A_tild), " in Solve_Cuda_BiCGStab, can't free A_tild\n");
|
|
cudaChk(cudaFree(x0), " in Solve_Cuda_BiCGStab, can't free x0\n");
|
|
cudaChk(cudaFree(b), " in Solve_Cuda_BiCGStab, can't free b\n");
|
|
if (is_two_boundaries)
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
int eq = index_vara[i+Size*y_kmin];
|
|
double yy = -y[eq];
|
|
direction[eq] = yy;
|
|
y[eq] += slowc * yy;
|
|
}
|
|
else
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
int eq = index_vara[i];
|
|
double yy = -y[eq+it_*y_size];
|
|
direction[eq] = yy;
|
|
y[eq+it_*y_size] += slowc * yy;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int iteration = 0;
|
|
bool convergence = false;
|
|
double zeros = 0.0, one = 1.0, m_one = -1.0;
|
|
|
|
cudaChk(cudaMalloc((void **) &tmp_, n * sizeof(double)), " in Solve_Cuda_Sparse, can't allocate tmp_ on the graphic card\n");
|
|
|
|
cudaChk(cudaMalloc((void **) &r, n * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate r on the graphic card\n");
|
|
|
|
cudaChk(cudaMemcpy(r, b, n * sizeof(double), cudaMemcpyDeviceToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy r = b has failed\n");
|
|
|
|
//r = b - A * x0
|
|
cusparseChk(cusparseDcsrmv(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n,
|
|
n, nnz, &m_one,
|
|
CUDA_descr, Ax,
|
|
Ap, Ai,
|
|
x0, &one,
|
|
r), "in Solve_Cuda_BiCGStab, cusparseDcsrmv A * x0 has failed");
|
|
|
|
cudaChk(cudaMemcpy(tmp_vect_host, r, n*sizeof(double), cudaMemcpyDeviceToHost), " in Solve_Cuda_BiCGStab, cudaMemcpy tmp_vect_host = p_tild has failed\n");
|
|
/*mexPrintf("r\n");
|
|
for (int i = 0; i < n; i++)
|
|
mexPrintf("%f\n",tmp_vect_host[i]);*/
|
|
|
|
cudaChk(cudaMalloc((void **) &r0, n * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate r0 on the graphic card\n");
|
|
cudaChk(cudaMemcpy(r0, r, n * sizeof(double), cudaMemcpyDeviceToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy r0 = r has failed\n");
|
|
|
|
cublasChk(cublasDnrm2(cublas_handle, n, // numerator
|
|
r, 1,
|
|
&tmp1),
|
|
" in Solve_Cuda_BiCGStab, cublasDnrm2(r) has failed\n");
|
|
double conv_criteria = tmp1;
|
|
|
|
convergence = conv_criteria < tolb;
|
|
if (convergence)
|
|
{
|
|
/* the initial value (x0) is solution of A x = b*/
|
|
cudaChk(cudaFree(Ai), " in Solve_Cuda_BiCGStab, can't free Ai\n");
|
|
cudaChk(cudaFree(Ax), " in Solve_Cuda_BiCGStab, can't free Ax\n");
|
|
cudaChk(cudaFree(Ap), " in Solve_Cuda_BiCGStab, can't free Ap\n");
|
|
if (preconditioner == 3)
|
|
{
|
|
cudaChk(cudaFree(Ai_tild), " in Solve_Cuda_BiCGStab, can't free Ai_tild\n");
|
|
cudaChk(cudaFree(Ap_tild), " in Solve_Cuda_BiCGStab, can't free Ap_tild\n");
|
|
}
|
|
cudaChk(cudaFree(A_tild), " in Solve_Cuda_BiCGStab, can't free A_tild\n");
|
|
cudaChk(cudaFree(x0), " in Solve_Cuda_BiCGStab, can't free x0\n");
|
|
cudaChk(cudaFree(b), " in Solve_Cuda_BiCGStab, can't free b\n");
|
|
return 0;
|
|
}
|
|
|
|
if (preconditioner == 0)
|
|
{
|
|
//Apply the Jacobi preconditioner
|
|
/*VecDiv<<<nblocks, n_threads>>>(r_, A_tild, z_, n);
|
|
cuda_error = cudaMemcpy(zz_, z_, n * sizeof(double), cudaMemcpyDeviceToDevice);*/
|
|
}
|
|
else if (preconditioner == 1)
|
|
{
|
|
//Apply an incomplete LU decomposition of A as preconditioner
|
|
cusparseChk(cusparseCreateSolveAnalysisInfo(&info), " in Solve_Cuda_BiCGStab, cusparseCreateSolveAnalysisInfo for info has failed\n");
|
|
|
|
cusparseChk(cusparseDcsrsv_analysis(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, nnz, CUDA_descr,
|
|
A_tild, Ap, Ai,
|
|
info),
|
|
" in Solve_Cuda_BiCGStab, cusparseDcsrsm_analysis(info) has failed\n");
|
|
|
|
cusparseChk(cusparseDcsrilu0(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, CUDA_descr,
|
|
A_tild, Ap, Ai,
|
|
info),
|
|
" in Solve_Cuda_BiCGStab, cusparseDcsrilu0 has failed\n");
|
|
|
|
//Make a copy of the indexes in A_tild_i and A_tild_p to use it the Bicgstab algorithm
|
|
cudaChk(cudaMalloc((void **) &A_tild_i, nnz * sizeof(int)), " in Solve_Cuda_BiCGStab, can't allocate A_tild_i on the graphic card\n");
|
|
cudaChk(cudaMemcpy(A_tild_i, Ai, nnz * sizeof(int), cudaMemcpyDeviceToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy A_tild_i = Ai has failed\n");
|
|
cudaChk(cudaMalloc((void **) &A_tild_p, (n + 1) * sizeof(int)), " in Solve_Cuda_BiCGStab, can't allocate A_tild_p on the graphic card\n");
|
|
cudaChk(cudaMemcpy(A_tild_p, Ap, (n + 1) * sizeof(int), cudaMemcpyDeviceToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy A_tild_p = Ap has failed\n");
|
|
}
|
|
else if (preconditioner == 2)
|
|
{
|
|
//Because the Jacobian matrix A is store in CSC format in matlab
|
|
// we have to transpose it to get a CSR format used by CUDA
|
|
mwIndex *Awi, *Awp;
|
|
double *A_tild_host = (double *) mxMalloc(nnz*sizeof(double));
|
|
test_mxMalloc(A_tild_host, __LINE__, __FILE__, __func__, nnz*sizeof(double));
|
|
Awi = (mwIndex *) mxMalloc(nnz * sizeof(mwIndex));
|
|
test_mxMalloc(Awi, __LINE__, __FILE__, __func__, nnz * sizeof(mwIndex));
|
|
Awp = (mwIndex *) mxMalloc((n + 1) * sizeof(mwIndex));
|
|
test_mxMalloc(Awp, __LINE__, __FILE__, __func__, (n + 1) * sizeof(mwIndex));
|
|
int *Aii = (int *) mxMalloc(nnz * sizeof(int));
|
|
test_mxMalloc(Aii, __LINE__, __FILE__, __func__, nnz * sizeof(int));
|
|
int *Aip = (int *) mxMalloc((n + 1) * sizeof(int));
|
|
test_mxMalloc(Aip, __LINE__, __FILE__, __func__, (n + 1) * sizeof(int));
|
|
cudaChk(cudaMemcpy(A_tild_host, A_tild, nnz*sizeof(double), cudaMemcpyDeviceToHost), " in Solve_Cuda_BiCGStab, cudaMemcpy A_tild_host = A_tild has failed\n");
|
|
cudaChk(cudaMemcpy(Aii, Ai, nnz*sizeof(int), cudaMemcpyDeviceToHost), " in Solve_Cuda_BiCGStab, cudaMemcpy Aii = Ai has failed\n");
|
|
cudaChk(cudaMemcpy(Aip, Ap, (n+1)*sizeof(int), cudaMemcpyDeviceToHost), " in Solve_Cuda_BiCGStab, cudaMemcpy Aip = Ai has failed\n");
|
|
for (int i = 0; i < nnz; i++)
|
|
Awi[i] = Aii[i];
|
|
for (int i = 0; i < n + 1; i++)
|
|
Awp[i] = Aip[i];
|
|
mxFree(Aii);
|
|
mxFree(Aip);
|
|
mxArray *At_m = mxCreateSparse(n, n, nnz, mxREAL);
|
|
mxSetIr(At_m, Awi);
|
|
mxSetJc(At_m, Awp);
|
|
mxSetPr(At_m, A_tild_host);
|
|
mxArray *A_m;
|
|
mexCallMATLAB(1, &A_m, 1, &At_m, "transpose");
|
|
mxDestroyArray(At_m);
|
|
|
|
/*mexPrintf("A_m\n");
|
|
mexCallMATLAB(0, NULL, 1, &A_m, "disp_dense");*/
|
|
/*mxFree(Awi);
|
|
mxFree(Awp);*/
|
|
|
|
/*[L1, U1] = ilu(g1a=;*/
|
|
const char *field_names[] = {"type", "droptol", "milu", "udiag", "thresh"};
|
|
const int type = 0;
|
|
const int droptol = 1;
|
|
const int milu = 2;
|
|
const int udiag = 3;
|
|
const int thresh = 4;
|
|
mwSize dims[1] = {(mwSize) 1 };
|
|
mxArray *Setup = mxCreateStructArray(1, dims, 5, field_names);
|
|
mxSetFieldByNumber(Setup, 0, type, mxCreateString("ilutp"));
|
|
//mxSetFieldByNumber(Setup, 0, type, mxCreateString("nofill"));
|
|
mxSetFieldByNumber(Setup, 0, droptol, mxCreateDoubleScalar(lu_inc_tol));
|
|
mxSetFieldByNumber(Setup, 0, milu, mxCreateString("off"));
|
|
mxSetFieldByNumber(Setup, 0, udiag, mxCreateDoubleScalar(0));
|
|
mxSetFieldByNumber(Setup, 0, thresh, mxCreateDoubleScalar(1));
|
|
//mxSetFieldByNumber(Setup, 0, thresh, mxCreateDoubleScalar(1));
|
|
mxArray *lhs0[2];
|
|
mxArray *rhs0[2];
|
|
rhs0[0] = A_m;
|
|
rhs0[1] = Setup;
|
|
ostringstream tmp;
|
|
if (mexCallMATLAB(2, lhs0, 2, rhs0, "ilu"))
|
|
{
|
|
tmp << " In BiCGStab, the incomplet LU decomposition (ilu) ahs failed.\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
mxDestroyArray(Setup);
|
|
|
|
/* //ILUT preconditionner computed by Matlab (todo: in futur version of cuda replace it by a new equivalent cuda function)
|
|
const char *field_names[] = {"type", "droptol", "milu", "udiag", "thresh"};
|
|
const int type = 0;
|
|
const int droptol = 1;
|
|
const int milu = 2;
|
|
const int udiag = 3;
|
|
const int thresh = 4;
|
|
mwSize dims[1] = {(mwSize)1 };
|
|
mxArray *Setup = mxCreateStructArray(1, dims, 5, field_names);
|
|
mxSetFieldByNumber(Setup, 0, type, mxCreateString("ilutp"));
|
|
mxSetFieldByNumber(Setup, 0, droptol, mxCreateDoubleScalar(lu_inc_tol));
|
|
mxSetFieldByNumber(Setup, 0, milu, mxCreateString("off"));
|
|
mxSetFieldByNumber(Setup, 0, udiag, mxCreateDoubleScalar(0));
|
|
mxSetFieldByNumber(Setup, 0, thresh, mxCreateDoubleScalar(0));
|
|
mxArray *lhs0[2], *rhs0[2];
|
|
rhs0[0] = A_m;
|
|
rhs0[1] = Setup;
|
|
mexCallMATLAB(1, lhs0, 2, rhs0, "ilu");
|
|
*/
|
|
// To store the resultng matrix in a CSR format we have to transpose it
|
|
mxArray *Wt = lhs0[0];
|
|
mwIndex *Wtj = mxGetJc(Wt);
|
|
nnz = Wtj[n];
|
|
mxArray *W;
|
|
mexCallMATLAB(1, &W, 1, &Wt, "transpose");
|
|
mxDestroyArray(Wt);
|
|
double *pW = mxGetPr(W);
|
|
mwIndex *Wi = mxGetIr(W);
|
|
mwIndex *Wp = mxGetJc(W);
|
|
int *Wii = (int *) mxMalloc(nnz * sizeof(int));
|
|
test_mxMalloc(Wii, __LINE__, __FILE__, __func__, nnz * sizeof(int));
|
|
int *Wip = (int *) mxMalloc((n + 1) * sizeof(int));
|
|
test_mxMalloc(Wip, __LINE__, __FILE__, __func__, (n + 1) * sizeof(int));
|
|
for (int i = 0; i < nnz; i++)
|
|
Wii[i] = Wi[i];
|
|
for (int i = 0; i < n + 1; i++)
|
|
Wip[i] = Wp[i];
|
|
|
|
//mxFree(A_tild_host);
|
|
|
|
cudaChk(cudaFree(A_tild), "cudaFree(A_tild) has failed\n");
|
|
|
|
cudaChk(cudaMalloc((void **) &A_tild, nnz * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate A_tild on the graphic card\n");
|
|
cudaChk(cudaMemcpy(A_tild, pW, nnz * sizeof(double), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy A_tild = pW has failed\n");
|
|
cudaChk(cudaMalloc((void **) &A_tild_i, nnz * sizeof(int)), " in Solve_Cuda_BiCGStab, can't allocate Ai on the graphic card\n");
|
|
cudaChk(cudaMemcpy(A_tild_i, Wii, nnz * sizeof(int), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy A_tild_i = A_tild_i_host has failed\n");
|
|
cudaChk(cudaMalloc((void **) &A_tild_p, (n + 1) * sizeof(int)), " in Solve_Cuda_BiCGStab, can't allocate A_tild_p on the graphic card\n");
|
|
cudaChk(cudaMemcpy(A_tild_p, Wip, (n + 1) * sizeof(int), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy A_tild_p = A_tild_j_host has failed\n");
|
|
/*mxFree(pW);
|
|
mxFree(Wi);
|
|
mxFree(Wj);*/
|
|
mxDestroyArray(W);
|
|
mxFree(Wii);
|
|
mxFree(Wip);
|
|
}
|
|
else if (preconditioner == 3)
|
|
{
|
|
mwIndex *Aowi, *Aowp;
|
|
double *A_host = (double *) mxMalloc(nnz*sizeof(double));
|
|
test_mxMalloc(A_host, __LINE__, __FILE__, __func__, nnz*sizeof(double));
|
|
Aowi = (mwIndex *) mxMalloc(nnz * sizeof(mwIndex));
|
|
test_mxMalloc(Aowi, __LINE__, __FILE__, __func__, nnz * sizeof(mwIndex));
|
|
Aowp = (mwIndex *) mxMalloc((n + 1) * sizeof(mwIndex));
|
|
test_mxMalloc(Aowp, __LINE__, __FILE__, __func__, (n + 1) * sizeof(mwIndex));
|
|
int *Aoii = (int *) mxMalloc(nnz * sizeof(int));
|
|
test_mxMalloc(Aoii, __LINE__, __FILE__, __func__, nnz * sizeof(int));
|
|
int *Aoip = (int *) mxMalloc((n + 1) * sizeof(int));
|
|
test_mxMalloc(Aoip, __LINE__, __FILE__, __func__, (n + 1) * sizeof(int));
|
|
cudaChk(cudaMemcpy(A_host, Ax, nnz*sizeof(double), cudaMemcpyDeviceToHost), " in Solve_Cuda_BiCGStab, cudaMemcpy A_tild_host = A_tild has failed\n");
|
|
cudaChk(cudaMemcpy(Aoii, Ai, nnz*sizeof(int), cudaMemcpyDeviceToHost), " in Solve_Cuda_BiCGStab, cudaMemcpy Aii = Ai_tild has failed\n");
|
|
cudaChk(cudaMemcpy(Aoip, Ap, (n+1)*sizeof(int), cudaMemcpyDeviceToHost), " in Solve_Cuda_BiCGStab, cudaMemcpy Aip = Ap_tild has failed\n");
|
|
for (int i = 0; i < nnz; i++)
|
|
Aowi[i] = Aoii[i];
|
|
for (int i = 0; i < n + 1; i++)
|
|
Aowp[i] = Aoip[i];
|
|
mxFree(Aoii);
|
|
mxFree(Aoip);
|
|
mxArray *Ao_m = mxCreateSparse(n, n, nnz, mxREAL);
|
|
mxSetIr(Ao_m, Aowi);
|
|
mxSetJc(Ao_m, Aowp);
|
|
mxSetPr(Ao_m, A_host);
|
|
/*mexPrintf("A_m\n");
|
|
mxArray *Aoo;
|
|
mexCallMATLAB(1, &Aoo, 1, &Ao_m, "transpose");
|
|
mexCallMATLAB(0, NULL, 1, &Aoo, "disp_dense");
|
|
mxDestroyArray(Ao_m);
|
|
mxDestroyArray(Aoo);*/
|
|
|
|
//Because the Jacobian matrix A is store in CSC format in matlab
|
|
// we have to transpose it to get a CSR format used by CUDA
|
|
mwIndex *Awi, *Awp;
|
|
double *A_tild_host = (double *) mxMalloc(nnz_tild*sizeof(double));
|
|
test_mxMalloc(A_tild_host, __LINE__, __FILE__, __func__, nnz_tild*sizeof(double));
|
|
Awi = (mwIndex *) mxMalloc(nnz_tild * sizeof(mwIndex));
|
|
test_mxMalloc(Awi, __LINE__, __FILE__, __func__, nnz_tild * sizeof(mwIndex));
|
|
Awp = (mwIndex *) mxMalloc((Size + 1) * sizeof(mwIndex));
|
|
test_mxMalloc(Awp, __LINE__, __FILE__, __func__, (Size + 1) * sizeof(mwIndex));
|
|
int *Aii = (int *) mxMalloc(nnz_tild * sizeof(int));
|
|
test_mxMalloc(Aii, __LINE__, __FILE__, __func__, nnz_tild * sizeof(int));
|
|
int *Aip = (int *) mxMalloc((Size + 1) * sizeof(int));
|
|
test_mxMalloc(Aip, __LINE__, __FILE__, __func__, (Size + 1) * sizeof(int));
|
|
cudaChk(cudaMemcpy(A_tild_host, A_tild, nnz_tild*sizeof(double), cudaMemcpyDeviceToHost), " in Solve_Cuda_BiCGStab, cudaMemcpy A_tild_host = A_tild has failed\n");
|
|
cudaChk(cudaMemcpy(Aii, Ai_tild, nnz_tild*sizeof(int), cudaMemcpyDeviceToHost), " in Solve_Cuda_BiCGStab, cudaMemcpy Aii = Ai_tild has failed\n");
|
|
cudaChk(cudaMemcpy(Aip, Ap_tild, (Size+1)*sizeof(int), cudaMemcpyDeviceToHost), " in Solve_Cuda_BiCGStab, cudaMemcpy Aip = Ap_tild has failed\n");
|
|
for (int i = 0; i < nnz_tild; i++)
|
|
Awi[i] = Aii[i];
|
|
for (int i = 0; i < Size + 1; i++)
|
|
Awp[i] = Aip[i];
|
|
/*for (int i = 0; i < nnz_tild; i++)
|
|
mexPrintf("%20.17f\n",A_tild_host[i]);*/
|
|
mxFree(Aii);
|
|
mxFree(Aip);
|
|
mxArray *At_m = mxCreateSparse(Size, Size, nnz_tild, mxREAL);
|
|
mxSetIr(At_m, Awi);
|
|
mxSetJc(At_m, Awp);
|
|
mxSetPr(At_m, A_tild_host);
|
|
mxArray *A_m;
|
|
mexCallMATLAB(1, &A_m, 1, &At_m, "transpose");
|
|
/*mexPrintf("A_tild_m\n");
|
|
mexCallMATLAB(0, NULL, 1, &A_m, "disp_dense");*/
|
|
mxDestroyArray(At_m);
|
|
mxArray *P, *Q, *L, *U;
|
|
mxArray *lhs0[4];
|
|
mexCallMATLAB(4, lhs0, 1, &A_m, "lu");
|
|
|
|
mxArray *P0, *Q0, *L0, *U0;
|
|
L0 = lhs0[0];
|
|
U0 = lhs0[1];
|
|
P0 = lhs0[2];
|
|
Q0 = lhs0[3];
|
|
mexCallMATLAB(1, &P, 1, &P0, "transpose");
|
|
mexCallMATLAB(1, &Q, 1, &Q0, "transpose");
|
|
mexCallMATLAB(1, &L, 1, &L0, "transpose");
|
|
mexCallMATLAB(1, &U, 1, &U0, "transpose");
|
|
mxDestroyArray(P0);
|
|
mxDestroyArray(Q0);
|
|
mxDestroyArray(L0);
|
|
mxDestroyArray(U0);
|
|
/*L = lhs0[0];
|
|
U = lhs0[1];
|
|
P = lhs0[2];
|
|
Q = lhs0[3];*/
|
|
|
|
/*mexPrintf("L\n");
|
|
mexCallMATLAB(0, NULL, 1, &L, "disp_dense");
|
|
|
|
mexPrintf("U\n");
|
|
mexCallMATLAB(0, NULL, 1, &U, "disp_dense");
|
|
|
|
mexPrintf("P\n");
|
|
mexCallMATLAB(0, NULL, 1, &P, "disp_dense");
|
|
|
|
mexPrintf("Q\n");
|
|
mexCallMATLAB(0, NULL, 1, &Q, "disp_dense");*/
|
|
|
|
mwIndex *Qiw_host = mxGetIr(Q);
|
|
mwIndex *Qjw_host = mxGetJc(Q);
|
|
double *Qx_host = mxGetPr(Q);
|
|
Q_nnz = Qjw_host[Size];
|
|
mexPrintf("Q_nnz=%d\n", Q_nnz);
|
|
int *Qi_host = (int *) mxMalloc(Q_nnz * periods * sizeof(int));
|
|
test_mxMalloc(Qi_host, __LINE__, __FILE__, __func__, Q_nnz * periods * sizeof(int));
|
|
double *Q_x_host = (double *) mxMalloc(Q_nnz * periods * sizeof(double));
|
|
test_mxMalloc(Q_x_host, __LINE__, __FILE__, __func__, Q_nnz * periods * sizeof(double));
|
|
int *Qj_host = (int *) mxMalloc((n + 1) * sizeof(int));
|
|
test_mxMalloc(Qj_host, __LINE__, __FILE__, __func__, (n + 1) * sizeof(int));
|
|
for (int t = 0; t < periods; t++)
|
|
{
|
|
for (int i = 0; i < Q_nnz; i++)
|
|
{
|
|
Qi_host[i + t * Q_nnz] = Qiw_host[i] + t * Size;
|
|
Q_x_host[i + t * Q_nnz] = Qx_host[i];
|
|
}
|
|
for (int i = 0; i < Size; i++)
|
|
{
|
|
Qj_host[i + t * Size] = Qjw_host[i] + t * Q_nnz;
|
|
}
|
|
}
|
|
Qj_host[periods * Size] = periods * Q_nnz;
|
|
|
|
/*mwIndex *Qtiw_host = (mwIndex*) mxMalloc(Q_nnz * periods * sizeof(mwIndex));
|
|
double *Qt_x_host = (double*)mxMalloc(Q_nnz * periods * sizeof(double));
|
|
mwIndex *Qtjw_host = (mwIndex*)mxMalloc((n + 1) * sizeof(mwIndex));
|
|
mexPrintf("n = %d\n",n);
|
|
for (int i = 0; i < n + 1; i++)
|
|
Qtjw_host[i] = Qj_host[i];
|
|
for (int i = 0; i < Q_nnz * periods; i++)
|
|
{
|
|
Qtiw_host[i] = Qi_host[i];
|
|
Qt_x_host[i] = Q_x_host[i];
|
|
}
|
|
mxArray* Qt_m = mxCreateSparse(n,n,Q_nnz * periods,mxREAL);
|
|
mxSetIr(Qt_m, Qtiw_host);
|
|
mxSetJc(Qt_m, Qtjw_host);
|
|
mxSetPr(Qt_m, Qt_x_host);
|
|
mexPrintf("Qt_m\n");
|
|
mexCallMATLAB(0, NULL, 1, &Qt_m, "disp_dense");*/
|
|
|
|
/*mexPrintf("Qtjw_host[periods * Size=%d]=%d\n", periods * Size, Qtjw_host[periods * Size]);
|
|
for (int i = 0; i < n; i++)
|
|
for (int j = Qtjw_host[i]; j < Qtjw_host[i+1]; j++)
|
|
mexPrintf("(i=%d, j=%d) = %f\n", i, Qtiw_host[j], Qt_x_host[j]);*/
|
|
//mxDestroyArray(Qt_m);
|
|
|
|
cudaChk(cudaMalloc((void **) &Qx, Q_nnz * periods * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate Qx on the graphic card\n");
|
|
cudaChk(cudaMemcpy(Qx, Q_x_host, Q_nnz * periods * sizeof(double), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy Qx = Qx_host has failed\n");
|
|
cudaChk(cudaMalloc((void **) &Qi, Q_nnz * periods * sizeof(int)), " in Solve_Cuda_BiCGStab, can't allocate Qi on the graphic card\n");
|
|
cudaChk(cudaMemcpy(Qi, Qi_host, Q_nnz * periods * sizeof(int), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy Qi = Qi_host has failed\n");
|
|
cudaChk(cudaMalloc((void **) &Qj, (Size * periods + 1) * sizeof(int)), " in Solve_Cuda_BiCGStab, can't allocate Qj on the graphic card\n");
|
|
cudaChk(cudaMemcpy(Qj, Qj_host, (Size * periods + 1) * sizeof(int), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy Qj = Qj_host has failed\n");
|
|
mxFree(Qi_host);
|
|
mxFree(Qj_host);
|
|
mxFree(Q_x_host);
|
|
mxDestroyArray(Q);
|
|
|
|
mwIndex *Piw_host = mxGetIr(P);
|
|
mwIndex *Pjw_host = mxGetJc(P);
|
|
double *Px_host = mxGetPr(P);
|
|
P_nnz = Pjw_host[Size];
|
|
int *Pi_host = (int *) mxMalloc(P_nnz * periods * sizeof(int));
|
|
test_mxMalloc(Pi_host, __LINE__, __FILE__, __func__, P_nnz * periods * sizeof(int));
|
|
double *P_x_host = (double *) mxMalloc(P_nnz * periods * sizeof(double));
|
|
test_mxMalloc(P_x_host, __LINE__, __FILE__, __func__, P_nnz * periods * sizeof(double));
|
|
int *Pj_host = (int *) mxMalloc((n + 1) * sizeof(int));
|
|
test_mxMalloc(Pj_host, __LINE__, __FILE__, __func__, (n + 1) * sizeof(int));
|
|
for (int t = 0; t < periods; t++)
|
|
{
|
|
for (int i = 0; i < P_nnz; i++)
|
|
{
|
|
Pi_host[i + t * P_nnz] = Piw_host[i] + t * Size;
|
|
P_x_host[i + t * P_nnz] = Px_host[i];
|
|
}
|
|
for (int i = 0; i < Size; i++)
|
|
Pj_host[i + t * Size] = Pjw_host[i] + t * P_nnz;
|
|
}
|
|
Pj_host[periods * Size] = periods * P_nnz;
|
|
|
|
/*mwIndex *Ptiw_host = (mwIndex*) mxMalloc(P_nnz * periods * sizeof(mwIndex));
|
|
double *Pt_x_host = (double*)mxMalloc(P_nnz * periods * sizeof(double));
|
|
mwIndex *Ptjw_host = (mwIndex*)mxMalloc((n + 1) * sizeof(mwIndex));
|
|
for (int i = 0; i < n + 1; i++)
|
|
Ptjw_host[i] = Pj_host[i];
|
|
for (int i = 0; i < P_nnz * periods; i++)
|
|
{
|
|
Ptiw_host[i] = Pi_host[i];
|
|
Pt_x_host[i] = P_x_host[i];
|
|
}
|
|
mxArray* Pt_m = mxCreateSparse(n,n,P_nnz * periods,mxREAL);
|
|
mxSetIr(Pt_m, Ptiw_host);
|
|
mxSetJc(Pt_m, Ptjw_host);
|
|
mxSetPr(Pt_m, Pt_x_host);
|
|
mexPrintf("Pt_m\n");
|
|
mexCallMATLAB(0, NULL, 1, &Pt_m, "disp_dense");
|
|
mxDestroyArray(Pt_m);*/
|
|
|
|
cudaChk(cudaMalloc((void **) &Px, P_nnz * periods * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate Px on the graphic card\n");
|
|
cudaChk(cudaMemcpy(Px, P_x_host, P_nnz * periods * sizeof(double), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy Px = Px_host has failed\n");
|
|
cudaChk(cudaMalloc((void **) &Pi, P_nnz * periods * sizeof(int)), " in Solve_Cuda_BiCGStab, can't allocate Pi on the graphic card\n");
|
|
cudaChk(cudaMemcpy(Pi, Pi_host, P_nnz * periods * sizeof(int), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy Pi = Pi_host has failed\n");
|
|
cudaChk(cudaMalloc((void **) &Pj, (Size * periods + 1) * sizeof(int)), " in Solve_Cuda_BiCGStab, can't allocate Pj on the graphic card\n");
|
|
cudaChk(cudaMemcpy(Pj, Pj_host, (Size * periods + 1) * sizeof(int), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy Pj = Pj_host has failed\n");
|
|
mxFree(Pi_host);
|
|
mxFree(Pj_host);
|
|
mxFree(P_x_host);
|
|
mxDestroyArray(P);
|
|
|
|
/*mwIndex* Piw_host = mxGetIr(P);
|
|
mwIndex* Pjw_host = mxGetJc(P);
|
|
double* Px_host = mxGetPr(P);
|
|
P_nnz = Pjw_host[Size];
|
|
int *Pi_host = (int*)mxMalloc(P_nnz * sizeof(int));
|
|
int *Pj_host = (int*)mxMalloc((Size + 1) * sizeof(int));
|
|
for (int i = 0; i < P_nnz; i++)
|
|
Pi_host[i] = Piw_host[i];
|
|
for (int i = 0; i < Size + 1; i++)
|
|
Pj_host[i] = Pjw_host[i];
|
|
|
|
cudaChk(cudaMalloc((void**)&Px, P_nnz * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate Px on the graphic card\n");
|
|
cudaChk(cudaMemcpy(Px, Px_host, P_nnz * sizeof(double), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy Px = Px_host has failed\n");
|
|
cudaChk(cudaMalloc((void**)&Pi, P_nnz * sizeof(int)), " in Solve_Cuda_BiCGStab, can't allocate Pi on the graphic card\n");
|
|
cudaChk(cudaMemcpy(Pi, Pi_host, P_nnz * sizeof(int), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy Pi = Pi_host has failed\n");
|
|
cudaChk(cudaMalloc((void**)&Pj, (Size + 1) * sizeof(int)), " in Solve_Cuda_BiCGStab, can't allocate Pj on the graphic card\n");
|
|
cudaChk(cudaMemcpy(Pj, Pj_host, (Size + 1) * sizeof(int), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy Pj = Pj_host has failed\n");
|
|
mxFree(Pi_host);
|
|
mxFree(Pj_host);
|
|
mxDestroyArray(P);*/
|
|
|
|
/*mexPrintf("L\n");
|
|
mexCallMATLAB(0, NULL, 1, &L, "disp_dense");
|
|
|
|
mexPrintf("U\n");
|
|
mexCallMATLAB(0, NULL, 1, &U, "disp_dense");*/
|
|
|
|
mwIndex *Liw_host = mxGetIr(L);
|
|
mwIndex *Ljw_host = mxGetJc(L);
|
|
double *Lx_host = mxGetPr(L);
|
|
int L_nnz = Ljw_host[Size];
|
|
|
|
mwIndex *Uiw_host = mxGetIr(U);
|
|
mwIndex *Ujw_host = mxGetJc(U);
|
|
double *Ux_host = mxGetPr(U);
|
|
int U_nnz = Ujw_host[Size];
|
|
|
|
double *pW = (double *) mxMalloc((L_nnz + U_nnz - Size) * periods * sizeof(double));
|
|
test_mxMalloc(pW, __LINE__, __FILE__, __func__, (L_nnz + U_nnz - Size) * periods * sizeof(double));
|
|
int *Wi = (int *) mxMalloc((L_nnz + U_nnz - Size) * periods * sizeof(int));
|
|
test_mxMalloc(Wi, __LINE__, __FILE__, __func__, (L_nnz + U_nnz - Size) * periods * sizeof(int));
|
|
int *Wj = (int *) mxMalloc((n + 1) * sizeof(int));
|
|
test_mxMalloc(Wj, __LINE__, __FILE__, __func__, (n + 1) * sizeof(int));
|
|
Wj[0] = 0;
|
|
W_nnz = 0;
|
|
for (int t = 0; t < periods; t++)
|
|
for (int i = 0; i < Size; i++)
|
|
{
|
|
for (mwIndex l = Ujw_host[i]; l < Ujw_host[i+1]; l++)
|
|
{
|
|
Wi[W_nnz] = Uiw_host[l] + t * Size;
|
|
pW[W_nnz] = Ux_host[l];
|
|
//mexPrintf("Wj[%d] = %d, Wi[%d] = Uiw_host[%d] + t * Size = %d, pW[%d]=%f\n", i + t * Size, Wj[i + t * Size], W_nnz, l, Uiw_host[l] + t * Size, W_nnz, Ux_host[l]);
|
|
W_nnz++;
|
|
}
|
|
for (mwIndex l = Ljw_host[i]; l < Ljw_host[i+1]; l++)
|
|
{
|
|
if (Liw_host[l] > i)
|
|
{
|
|
Wi[W_nnz] = Liw_host[l] + t * Size;
|
|
pW[W_nnz] = Lx_host[l];
|
|
//mexPrintf("Wj[%d] = %d, Wi[%d] = Liw_host[%d] + t * Size = %d, pW[%d]=%f\n", i + t * Size, Wj[i + t * Size], W_nnz, l, Liw_host[l] + t * Size, W_nnz, Lx_host[l]);
|
|
W_nnz++;
|
|
}
|
|
}
|
|
Wj[i + 1 + t * Size] = W_nnz;
|
|
}
|
|
//mexPrintf("Wj[%d] = %d, n=%d\n", periods * Size, Wj[periods * Size], n);
|
|
cudaChk(cudaMalloc((void **) &A_tild, W_nnz * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate Px on the graphic card\n");
|
|
cudaChk(cudaMemcpy(A_tild, pW, W_nnz * sizeof(double), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy A_tild = pW has failed\n");
|
|
cudaChk(cudaMalloc((void **) &A_tild_i, W_nnz * sizeof(int)), " in Solve_Cuda_BiCGStab, can't allocate Pi on the graphic card\n");
|
|
cudaChk(cudaMemcpy(A_tild_i, Wi, W_nnz * sizeof(int), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy A_tild_i = Wi has failed\n");
|
|
cudaChk(cudaMalloc((void **) &A_tild_p, (n + 1) * sizeof(int)), " in Solve_Cuda_BiCGStab, can't allocate Pj on the graphic card\n");
|
|
cudaChk(cudaMemcpy(A_tild_p, Wj, (n + 1) * sizeof(int), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy A_tild_p = Wj has failed\n");
|
|
|
|
/*mwIndex *Wwi = (mwIndex*)mxMalloc(W_nnz * sizeof(mwIndex));
|
|
mwIndex *Wwj = (mwIndex*)mxMalloc((n + 1) * sizeof(mwIndex));
|
|
for (int i = 0; i < W_nnz; i++)
|
|
Wwi[i] = Wi[i];
|
|
for (int i = 0; i < n + 1; i++)
|
|
Wwj[i] = Wj[i];
|
|
mxFree(Wi);
|
|
mxFree(Wj);
|
|
mxArray* Ao_tild = mxCreateSparse(n,n,W_nnz,mxREAL);
|
|
mxSetIr(Ao_tild, Wwi);
|
|
mxSetJc(Ao_tild, Wwj);
|
|
mxSetPr(Ao_tild, pW);
|
|
mexPrintf("Ao_tild\n");
|
|
mexCallMATLAB(0, NULL, 1, &Ao_tild, "disp_dense");
|
|
mxDestroyArray(Ao_tild);*/
|
|
|
|
/*ostringstream tmp;
|
|
tmp << "debugging";
|
|
mexWarnMsgTxt(tmp.str().c_str());
|
|
return 4;*/
|
|
|
|
/* Apply the permutation matrices (P and Q) to the b vector of system to solve :
|
|
b_tild = P-1 . b = P' . b */
|
|
/*cudaChk(cudaMalloc((void**)&b_tild, n * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate b_tild on the graphic card\n");
|
|
cusparseChk(cusparseDcsrmv(cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE,
|
|
n, n, nnz, &one, CUDA_descr,
|
|
Px, Pj, Pi,
|
|
b, &zeros,
|
|
b_tild),
|
|
" in Solve_Cuda_BiCGStab, b_tild = cusparseDcsrmv(P', b) has failed\n");
|
|
|
|
cusparseChk(cusparseDcsrmv(cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE,
|
|
n, n, nnz, &one, CUDA_descr,
|
|
Px, Pj, Pi,
|
|
b, &zeros,
|
|
b),
|
|
" in Solve_Cuda_BiCGStab, b = cusparseDcsrmv(P', b) has failed\n");
|
|
*/
|
|
/*mexPrintf("Wt = lu(A_m)\n");
|
|
mexCallMATLAB(0, NULL, 1, &Wt, "disp_dense");*/
|
|
/*ostringstream tmp;
|
|
tmp << "debugging";
|
|
mexWarnMsgTxt(tmp.str().c_str());
|
|
return 4;*/
|
|
// To store the resultng matrix in a CSR format we have to transpose it
|
|
/*mwIndex* Wtj = mxGetJc(Wt);
|
|
nnz = Wtj[n];
|
|
mxArray* W;
|
|
mexCallMATLAB(1, &W, 1, &Wt, "transpose");
|
|
mxDestroyArray(Wt);
|
|
pW = mxGetPr(W);
|
|
Wwi = mxGetIr(W);
|
|
mwIndex* Wp = mxGetJc(W);
|
|
int *Wii = (int*)mxMalloc(nnz * sizeof(int));
|
|
int *Wip = (int*)mxMalloc((n + 1) * sizeof(int));
|
|
for (int i = 0; i < nnz; i++)
|
|
Wii[i] = Wi[i];
|
|
for (int i = 0; i < n + 1; i++)
|
|
Wip[i] = Wp[i];
|
|
|
|
//mxFree(A_tild_host);
|
|
|
|
cudaChk(cudaFree(Ai_tild), " in Solve_Cuda_BiCGStab, cudaFree(Ai_tild) has failed\n");
|
|
cudaChk(cudaFree(Ap_tild), " in Solve_Cuda_BiCGStab, cudaFree(Ap_tild) has failed\n");
|
|
cudaChk(cudaFree(A_tild), " in Solve_Cuda_BiCGStab, cudaFree(A_tild) has failed\n");
|
|
|
|
cudaChk(cudaMalloc((void**)&A_tild, nnz * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate A_tild on the graphic card\n");
|
|
cudaChk(cudaMemcpy(A_tild, pW, nnz * sizeof(double), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy A_tild = pW has failed\n");
|
|
cudaChk(cudaMalloc((void**)&A_tild_i, nnz * sizeof(int)), " in Solve_Cuda_BiCGStab, can't allocate Ai on the graphic card\n");
|
|
cudaChk(cudaMemcpy(A_tild_i, Wii, nnz * sizeof(int), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy A_tild_i = A_tild_i_host has failed\n");
|
|
cudaChk(cudaMalloc((void**)&A_tild_p, (n + 1) * sizeof(int)), " in Solve_Cuda_BiCGStab, can't allocate A_tild_p on the graphic card\n");
|
|
cudaChk(cudaMemcpy(A_tild_p, Wip, (n + 1) * sizeof(int), cudaMemcpyHostToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy A_tild_p = A_tild_j_host has failed\n");
|
|
mxDestroyArray(W);
|
|
mxFree(Wii);
|
|
mxFree(Wip);*/
|
|
}
|
|
if (preconditioner == 1 || preconditioner == 2 || preconditioner == 3)
|
|
{
|
|
cusparseChk(cusparseCreateMatDescr(&descrL),
|
|
" in Solve_Cuda_BiCGStab, cusparseCreateMatDescr has failed for descrL\n");
|
|
cusparseChk(cusparseSetMatIndexBase(descrL, CUSPARSE_INDEX_BASE_ZERO),
|
|
" in Solve_Cuda_BiCGStab, cusparseSetMatIndexBase has failed for descrL\n");
|
|
cusparseChk(cusparseSetMatType(descrL, CUSPARSE_MATRIX_TYPE_GENERAL),
|
|
" in Solve_Cuda_BiCGStab, cusparseSetMatType has failed for descrL\n");
|
|
cusparseChk(cusparseSetMatFillMode(descrL, CUSPARSE_FILL_MODE_LOWER),
|
|
" in Solve_Cuda_BiCGStab, cusparseSetFillMod has failed for descrL\n");
|
|
cusparseChk(cusparseSetMatDiagType(descrL, CUSPARSE_DIAG_TYPE_UNIT),
|
|
" in Solve_Cuda_BiCGStab, cusparseSetMatDiagType has failed for descrL\n");
|
|
|
|
cusparseChk(cusparseCreateMatDescr(&descrU),
|
|
" in Solve_Cuda_BiCGStab, cusparseCreateMatDescr has failed for descrU\n");
|
|
cusparseChk(cusparseSetMatIndexBase(descrU, CUSPARSE_INDEX_BASE_ZERO),
|
|
" in Solve_Cuda_BiCGStab, cusparseSetMatIndexBase has failed for descrU\n");
|
|
cusparseChk(cusparseSetMatType(descrU, CUSPARSE_MATRIX_TYPE_GENERAL),
|
|
" in Solve_Cuda_BiCGStab, cusparseSetMatType has failed for descrU\n");
|
|
cusparseChk(cusparseSetMatFillMode(descrU, CUSPARSE_FILL_MODE_UPPER),
|
|
" in Solve_Cuda_BiCGStab, cusparseSetFillMod has failed for descrU\n");
|
|
cusparseChk(cusparseSetMatDiagType(descrU, CUSPARSE_DIAG_TYPE_NON_UNIT),
|
|
" in Solve_Cuda_BiCGStab, cusparseSetMatDiagType has failed for descrU\n");
|
|
|
|
int host_nnz_tild;
|
|
if (preconditioner == 3)
|
|
host_nnz_tild = W_nnz;
|
|
else
|
|
host_nnz_tild = nnz;
|
|
|
|
if (preconditioner == 1)
|
|
cusparseChk(cusparseDestroySolveAnalysisInfo(info),
|
|
" in Solve_Cuda_BiCGStab, cusparseDestroySolveAnalysisInfo has failed for info\n");
|
|
|
|
cusparseChk(cusparseCreateSolveAnalysisInfo(&infoL),
|
|
" in Solve_Cuda_BiCGStab, cusparseCreateSolveAnalysisInfo has failed for infoL\n");
|
|
cusparseChk(cusparseDcsrsv_analysis(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, host_nnz_tild, descrL,
|
|
A_tild, A_tild_p, A_tild_i,
|
|
infoL),
|
|
" in Solve_Cuda_BiCGStab, cusparseDcsrsm_analysis for infoL has failed\n");
|
|
|
|
cusparseChk(cusparseCreateSolveAnalysisInfo(&infoU),
|
|
" in Solve_Cuda_BiCGStab, cusparseCreateSolveAnalysisInfo has failed for infoU\n");
|
|
cusparseChk(cusparseDcsrsv_analysis(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, host_nnz_tild, descrU,
|
|
A_tild, A_tild_p, A_tild_i,
|
|
infoU),
|
|
" in Solve_Cuda_BiCGStab, cusparseDcsrsm_analysis for infoU has failed\n");
|
|
}
|
|
|
|
cudaChk(cudaMalloc((void **) &v, n * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate v on the graphic card\n");
|
|
cudaChk(cudaMalloc((void **) &p, n * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate p on the graphic card\n");
|
|
//cudaChk(cudaMemset(p, 0, n * sizeof(double)), " in Solve_Cuda_BiCGStab, cudaMemset p = 0 has failed\n");
|
|
cudaChk(cudaMalloc((void **) &s, n * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate s on the graphic card\n");
|
|
cudaChk(cudaMalloc((void **) &t, n * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate t on the graphic card\n");
|
|
cudaChk(cudaMalloc((void **) &y_, n * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate y_ on the graphic card\n");
|
|
cudaChk(cudaMalloc((void **) &z, n * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate z on the graphic card\n");
|
|
|
|
double rho = 1.0, alpha = 1.0, omega = 1.0;
|
|
|
|
//residual = P*B*Q - L*U;
|
|
//norm(Z,1) should be close to 0
|
|
|
|
while (iteration < 50 /*max_iterations*/ && !convergence)
|
|
{
|
|
double rho_prev = rho;
|
|
/**store in s previous value of r*/
|
|
cudaChk(cudaMemcpy(s, r, n * sizeof(double), cudaMemcpyDeviceToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy s = r has failed\n");
|
|
|
|
/**rho = r0 . r*/
|
|
cublasChk(cublasDdot(cublas_handle, n, // numerator
|
|
r0, 1,
|
|
r, 1,
|
|
&rho),
|
|
" in Solve_Cuda_BiCGStab, rho = cublasDdot(r0, r) has failed\n");
|
|
|
|
mexPrintf("rho=%f\n", rho);
|
|
|
|
double beta;
|
|
|
|
if (iteration == 0)
|
|
{
|
|
cudaChk(cudaMemcpy(p, r, n * sizeof(double), cudaMemcpyDeviceToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy p = r has failed\n");
|
|
}
|
|
else
|
|
{
|
|
/**beta = (rho / rho_prev) . (alpha / omega);*/
|
|
beta = rho / rho_prev * alpha / omega;
|
|
|
|
/**p = r + beta * (p - omega * v)*/
|
|
// tmp_ = p - omega * v
|
|
VecAdd<<< nblocks, n_threads>>> (tmp_, p, -omega, v, n);
|
|
//p = r + beta * tmp_
|
|
VecAdd<<< nblocks, n_threads>>> (p, r, beta, tmp_, n);
|
|
}
|
|
|
|
/**y_ solution of A_tild * y_ = p <=> L . U . y_ = p*/
|
|
// L tmp_ = p => tmp_ = L^-1 p, with tmp_ = U . y_
|
|
|
|
if (preconditioner == 3)
|
|
{
|
|
double *p_tild;
|
|
|
|
cudaChk(cudaMemcpy(tmp_vect_host, p, n*sizeof(double), cudaMemcpyDeviceToHost), " in Solve_Cuda_BiCGStab, cudaMemcpy tmp_vect_host = p has failed\n");
|
|
/*mexPrintf("p\n");
|
|
for (int i = 0; i < n; i++)
|
|
mexPrintf("%f\n",tmp_vect_host[i]);*/
|
|
|
|
cudaChk(cudaMalloc((void **) &p_tild, n * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate b_tild on the graphic card\n");
|
|
cusparseChk(cusparseDcsrmv(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, n, P_nnz * periods, &one, CUDA_descr,
|
|
Px, Pj, Pi,
|
|
p, &zeros,
|
|
p_tild),
|
|
" in Solve_Cuda_BiCGStab, p_tild = cusparseDcsrmv(P', p) has failed\n");
|
|
|
|
/*mexPrintf("P\n");
|
|
printM(n, Px, Pj, Pi, CUDA_descr, cusparse_handle);*/
|
|
|
|
cudaChk(cudaMemcpy(tmp_vect_host, p_tild, n*sizeof(double), cudaMemcpyDeviceToHost), " in Solve_Cuda_BiCGStab, cudaMemcpy tmp_vect_host = p_tild has failed\n");
|
|
/*mexPrintf("p_tild\n");
|
|
for (int i = 0; i < n; i++)
|
|
mexPrintf("%f\n",tmp_vect_host[i]);*/
|
|
|
|
cusparseChk(cusparseDcsrsv_solve(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, &one,
|
|
descrL,
|
|
A_tild, A_tild_p, A_tild_i,
|
|
infoL, p_tild,
|
|
tmp_),
|
|
" in Solve_Cuda_BiCGStab, cusparseDcsrsv_solve for L . tmp_ = p_tild has failed\n");
|
|
cudaChk(cudaFree(p_tild), " in Solve_Cuda_BiCGStab, can't free p_tild\n");
|
|
|
|
cudaChk(cudaMemcpy(tmp_vect_host, tmp_, n*sizeof(double), cudaMemcpyDeviceToHost), " in Solve_Cuda_BiCGStab, cudaMemcpy tmp_vect_host = v has failed\n");
|
|
/*mexPrintf("tmp_\n");
|
|
for (int i = 0; i < n; i++)
|
|
mexPrintf("%f\n",tmp_vect_host[i]);*/
|
|
}
|
|
else
|
|
cusparseChk(cusparseDcsrsv_solve(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, &one,
|
|
descrL,
|
|
A_tild, A_tild_p, A_tild_i,
|
|
infoL, p,
|
|
tmp_),
|
|
" in Solve_Cuda_BiCGStab, cusparseDcsrsv_solve for L . tmp_ = p has failed\n");
|
|
|
|
// U . y_ = L^-1 p <=> U . y_ = tmp_ => y_ = U^-1 L^-1 p
|
|
cusparseChk(cusparseDcsrsv_solve(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, &one,
|
|
descrU,
|
|
A_tild, A_tild_p, A_tild_i,
|
|
infoU, tmp_,
|
|
y_),
|
|
" in Solve_Cuda_BiCGStab, cusparseDcsrsv_solve for U . y_ = tmp_ has failed\n");
|
|
|
|
/*cudaChk(cudaMemcpy(tmp_vect_host, y_, n*sizeof(double), cudaMemcpyDeviceToHost), " in Solve_Cuda_BiCGStab, cudaMemcpy tmp_vect_host = v has failed\n");
|
|
mexPrintf("y_\n");
|
|
for (int i = 0; i < n; i++)
|
|
mexPrintf("%f\n",tmp_vect_host[i]);*/
|
|
|
|
if (preconditioner == 3)
|
|
{
|
|
double *y_tild;
|
|
cudaChk(cudaMalloc((void **) &y_tild, n * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate b_tild on the graphic card\n");
|
|
cudaChk(cudaMemcpy(y_tild, y_, n * sizeof(double), cudaMemcpyDeviceToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy y_tild = y_ has failed\n");
|
|
cusparseChk(cusparseDcsrmv(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, n, Q_nnz * periods, &one, CUDA_descr,
|
|
Qx, Qj, Qi,
|
|
y_tild, &zeros,
|
|
y_),
|
|
" in Solve_Cuda_BiCGStab, y_ = cusparseDcsrmv(Q', y_tild) has failed\n");
|
|
cudaChk(cudaFree(y_tild), " in Solve_Cuda_BiCGStab, can't free y_tild\n");
|
|
}
|
|
/*cudaChk(cudaMemcpy(tmp_vect_host, y_, n*sizeof(double), cudaMemcpyDeviceToHost), " in Solve_Cuda_BiCGStab, cudaMemcpy tmp_vect_host = v has failed\n");
|
|
mexPrintf("y_\n");
|
|
for (int i = 0; i < n; i++)
|
|
mexPrintf("%f\n",tmp_vect_host[i]);*/
|
|
/**v = A*y_*/
|
|
cusparseChk(cusparseDcsrmv(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, n, nnz, &one, CUDA_descr,
|
|
Ax, Ap, Ai,
|
|
y_, &zeros,
|
|
v),
|
|
" in Solve_Cuda_BiCGStab, v = cusparseDcsrmv(A, y_) has failed\n");
|
|
cudaChk(cudaMemcpy(tmp_vect_host, v, n*sizeof(double), cudaMemcpyDeviceToHost), " in Solve_Cuda_BiCGStab, cudaMemcpy tmp_vect_host = v has failed\n");
|
|
/*mexPrintf("v\n");
|
|
for (int i = 0; i < n; i++)
|
|
mexPrintf("%f\n",tmp_vect_host[i]);*/
|
|
|
|
/**alpha = rho / (rr0 . v) with rr0 = r0*/
|
|
cublasChk(cublasDdot(cublas_handle, n, // numerator
|
|
r0, 1,
|
|
v, 1,
|
|
&tmp1),
|
|
" in Solve_Cuda_BiCGStab, cublasDdot(r0, v) has failed\n");
|
|
|
|
alpha = rho / tmp1;
|
|
mexPrintf("rho = %f, tmp1 = %f\n", rho, tmp1);
|
|
mexPrintf("alpha = %f\n", alpha);
|
|
|
|
if (alpha == 0 || isinf(alpha) || isnan(alpha))
|
|
{
|
|
Solve_CUDA_BiCGStab_Free(tmp_vect_host, p, r, v, s, t, y_, z, tmp_, Ai, Ax, Ap, x0, b, A_tild, A_tild_i, A_tild_p, infoL, infoU, descrL, descrU, preconditioner);
|
|
ostringstream tmp;
|
|
tmp << "one of the scalar quantities (alpha=" << alpha << ") calculated during BICGSTAB became too small or too large to continue computing, in block " << block+1;
|
|
mexWarnMsgTxt(tmp.str().c_str());
|
|
return 4;
|
|
}
|
|
|
|
/** Check for potential stagnation*/
|
|
cublasChk(cublasDnrm2(cublas_handle, n, // numerator
|
|
y_, 1,
|
|
&tmp1),
|
|
" in Solve_Cuda_BiCGStab, cublasDnrm2(y_) has failed\n");
|
|
cublasChk(cublasDnrm2(cublas_handle, n, // denominator
|
|
x0, 1,
|
|
&tmp2),
|
|
" in Solve_Cuda_BiCGStab, cublasDnrm2(y_) has failed\n");
|
|
mexPrintf("abs(alpha)*tmp1 = %f, alpha = %f, tmp1 = %f, tmp2 = %f, eps = %f\n", abs(alpha)*tmp1, alpha, tmp1, tmp2, eps);
|
|
if (abs(alpha)*tmp1 < eps * tmp2)
|
|
stagnation++;
|
|
else
|
|
stagnation = 0;
|
|
|
|
/**x = x + alpha * y_*/
|
|
VecInc<<< nblocks, n_threads>>> (x0, alpha, y_, n);
|
|
|
|
/**s = r_prev - alpha *v with r_prev = s*/
|
|
VecInc<<< nblocks, n_threads>>> (s, -alpha, v, n);
|
|
|
|
/**Has BiCGStab converged?*/
|
|
cublasChk(cublasDnrm2(cublas_handle, n, // numerator
|
|
s, 1,
|
|
&tmp1),
|
|
" in Solve_Cuda_BiCGStab, cublasDnrm2(s) has failed\n");
|
|
conv_criteria = tmp1;
|
|
mexPrintf("conv_criteria = %f, tolb = %f\n", conv_criteria, tolb);
|
|
convergence = conv_criteria < tolb;
|
|
|
|
if (convergence || stagnation >= max_stagnation || refinement_needed)
|
|
{
|
|
/**s = b - A * x0*/
|
|
cudaChk(cudaMemcpy(s, b, n * sizeof(double), cudaMemcpyDeviceToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy s = b has failed\n");
|
|
cusparseChk(cusparseDcsrmv(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, n, nnz, &m_one, CUDA_descr,
|
|
Ax, Ap, Ai,
|
|
x0, &one,
|
|
s),
|
|
" in Solve_Cuda_BiCGStab, s = b - cusparseDcsrmv(A, x0) has failed\n");
|
|
cublasChk(cublasDnrm2(cublas_handle, n, // numerator
|
|
s, 1,
|
|
&tmp1),
|
|
" in Solve_Cuda_BiCGStab, cublasDnrm2(s) has failed\n");
|
|
conv_criteria = tmp1;
|
|
convergence = conv_criteria < tolb;
|
|
if (convergence)
|
|
{
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
if (stagnation >= max_stagnation && refinement_needed == 0)
|
|
stagnation = 0;
|
|
refinement_needed++;
|
|
if (refinement_needed > max_refinement)
|
|
{
|
|
Solve_CUDA_BiCGStab_Free(tmp_vect_host, p, r, v, s, t, y_, z, tmp_, Ai, Ax, Ap, x0, b, A_tild, A_tild_i, A_tild_p, infoL, infoU, descrL, descrU, preconditioner);
|
|
ostringstream tmp;
|
|
tmp << "Error in bytecode: BiCGStab stagnated (Two consecutive iterates were the same.), in block " << block+1;
|
|
mexWarnMsgTxt(tmp.str().c_str());
|
|
return 3;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**z solution of A_tild * z = s*/
|
|
// L tmp_ = s => tmp_ = L^-1 s, with tmp_ = U . z
|
|
if (preconditioner == 3)
|
|
{
|
|
double *s_tild;
|
|
cudaChk(cudaMalloc((void **) &s_tild, n * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate b_tild on the graphic card\n");
|
|
cusparseChk(cusparseDcsrmv(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, n, P_nnz * periods, &one, CUDA_descr,
|
|
Px, Pj, Pi,
|
|
s, &zeros,
|
|
s_tild),
|
|
" in Solve_Cuda_BiCGStab, s_tild = cusparseDcsrmv(P', s) has failed\n");
|
|
cusparseChk(cusparseDcsrsv_solve(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, &one,
|
|
descrL,
|
|
A_tild, A_tild_p, A_tild_i,
|
|
infoL, s_tild,
|
|
tmp_),
|
|
" in Solve_Cuda_BiCGStab, cusparseDcsrsv_solve for L . tmp_ = s_tild has failed\n");
|
|
cudaChk(cudaFree(s_tild), " in Solve_Cuda_BiCGStab, can't free s_tild\n");
|
|
}
|
|
else
|
|
cusparseChk(cusparseDcsrsv_solve(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, &one,
|
|
descrL,
|
|
//Lx, Lp, Li,
|
|
A_tild, A_tild_p, A_tild_i,
|
|
infoL, s,
|
|
tmp_),
|
|
" in Solve_Cuda_BiCGStab, cusparseDcsrsv_solve for L . tmp_ = s has failed\n");
|
|
// U . z = L^-1 s <=> U . z = tmp_ => z = U^-1 L^-1 s
|
|
cusparseChk(cusparseDcsrsv_solve(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, &one,
|
|
descrU,
|
|
//Ux, Up, Ui,
|
|
A_tild, A_tild_p, A_tild_i,
|
|
infoU, tmp_,
|
|
z),
|
|
" in Solve_Cuda_BiCGStab, cusparseDcsrsv_solve for U . z = tmp_ has failed\n");
|
|
if (preconditioner == 3)
|
|
{
|
|
double *z_tild;
|
|
cudaChk(cudaMalloc((void **) &z_tild, n * sizeof(double)), " in Solve_Cuda_BiCGStab, can't allocate z_tild on the graphic card\n");
|
|
cudaChk(cudaMemcpy(z_tild, z, n * sizeof(double), cudaMemcpyDeviceToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy z_tild = z has failed\n");
|
|
cusparseChk(cusparseDcsrmv(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, n, Q_nnz * periods, &one, CUDA_descr,
|
|
Qx, Qj, Qi,
|
|
z_tild, &zeros,
|
|
z),
|
|
" in Solve_Cuda_BiCGStab, z = cusparseDcsrmv(Q, z_tild) has failed\n");
|
|
cudaChk(cudaFree(z_tild), " in Solve_Cuda_BiCGStab, can't free x_tild\n");
|
|
}
|
|
/**t = A * z*/
|
|
cusparseChk(cusparseDcsrmv(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, n, nnz, &one, CUDA_descr,
|
|
Ax, Ap, Ai,
|
|
z, &zeros,
|
|
t),
|
|
" in Solve_Cuda_BiCGStab, t = cusparseDcsrmv(A, z) has failed\n");
|
|
|
|
/** omega = (t' s) / (t' t)*/
|
|
cublasChk(cublasDdot(cublas_handle, n, // numerator
|
|
t, 1,
|
|
s, 1,
|
|
&tmp1),
|
|
" in Solve_Cuda_BiCGStab, cublasDdot(t, s) has failed\n");
|
|
|
|
cublasChk(cublasDdot(cublas_handle, n, // numerator
|
|
t, 1,
|
|
t, 1,
|
|
&tmp2),
|
|
" in Solve_Cuda_BiCGStab, cublasDdot(t, t) has failed\n");
|
|
|
|
omega = tmp1 / tmp2;
|
|
|
|
if (omega == 0 || isinf(omega) || isnan(omega))
|
|
{
|
|
Solve_CUDA_BiCGStab_Free(tmp_vect_host, p, r, v, s, t, y_, z, tmp_, Ai, Ax, Ap, x0, b, A_tild, A_tild_i, A_tild_p, infoL, infoU, descrL, descrU, preconditioner);
|
|
ostringstream tmp;
|
|
mexEvalString("diary off;");
|
|
tmp << "one of the scalar quantities (omega=" << omega << ") calculated during BICGSTAB became too small or too large to continue computing, in block " << block+1;
|
|
mexWarnMsgTxt(tmp.str().c_str());
|
|
return 4;
|
|
}
|
|
|
|
/**x = x + omega * z*/
|
|
VecInc<<< nblocks, n_threads>>> (x0, omega, z, n);
|
|
|
|
/**r = s - omega * t*/
|
|
VecAdd<<< nblocks, n_threads>>> (r, s, -omega, t, n);
|
|
|
|
/**Has BiCGStab converged?*/
|
|
cublasChk(cublasDnrm2(cublas_handle, n, // numerator
|
|
r, 1,
|
|
&tmp1),
|
|
" in Solve_Cuda_BiCGStab, cublasDnrm2(r) has failed\n");
|
|
conv_criteria = tmp1;
|
|
|
|
convergence = conv_criteria < tolb;
|
|
|
|
if (convergence || stagnation >= max_stagnation || refinement_needed)
|
|
{
|
|
/**r = b - A * x0*/
|
|
cudaChk(cudaMemcpy(r, b, n * sizeof(double), cudaMemcpyDeviceToDevice), " in Solve_Cuda_BiCGStab, cudaMemcpy r = b has failed\n");
|
|
cusparseChk(cusparseDcsrmv(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
|
n, n, nnz, &m_one, CUDA_descr,
|
|
Ax, Ap, Ai,
|
|
x0, &one,
|
|
r),
|
|
" in Solve_Cuda_BiCGStab, r = b - cusparseDcsrmv(A, x0) has failed\n");
|
|
cublasChk(cublasDnrm2(cublas_handle, n, // numerator
|
|
r, 1,
|
|
&tmp1),
|
|
" in Solve_Cuda_BiCGStab, cublasDnrm2(r) has failed\n");
|
|
conv_criteria = tmp1;
|
|
convergence = conv_criteria < tolb;
|
|
if (convergence)
|
|
{
|
|
mexPrintf("convergence achieved\n");
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
if (stagnation >= max_stagnation && refinement_needed == 0)
|
|
stagnation = 0;
|
|
refinement_needed++;
|
|
if (refinement_needed > max_refinement)
|
|
{
|
|
Solve_CUDA_BiCGStab_Free(tmp_vect_host, p, r, v, s, t, y_, z, tmp_, Ai, Ax, Ap, x0, b, A_tild, A_tild_i, A_tild_p, /*Lx, Li, Lp, Ux, Ui, Up, device_n, */ infoL, infoU, descrL, descrU, preconditioner);
|
|
ostringstream tmp;
|
|
mexEvalString("diary off;");
|
|
tmp << "Error in bytecode: BiCGStab stagnated (Two consecutive iterates were the same.), in block " << block+1;
|
|
mexWarnMsgTxt(tmp.str().c_str());
|
|
return 3;
|
|
}
|
|
}
|
|
}
|
|
|
|
iteration++;
|
|
}
|
|
cudaChk(cudaMemcpy(tmp_vect_host, x0, n * sizeof(double), cudaMemcpyDeviceToHost), " in Solve_Cuda_BiCGStab, cudaMemcpy tmp_vect_host = x0 has failed\n");
|
|
|
|
if (is_two_boundaries)
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
int eq = index_vara[i+Size*y_kmin];
|
|
double yy = -(tmp_vect_host[i] + y[eq]);
|
|
direction[eq] = yy;
|
|
y[eq] += slowc * yy;
|
|
}
|
|
else
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
int eq = index_vara[i];
|
|
double yy = -(tmp_vect_host[i] + y[eq+it_*y_size]);
|
|
direction[eq] = yy;
|
|
y[eq+it_*y_size] += slowc * yy;
|
|
}
|
|
Solve_CUDA_BiCGStab_Free(tmp_vect_host, p, r, v, s, t, y_, z, tmp_, Ai, Ax, Ap, x0, b, A_tild, A_tild_i, A_tild_p, infoL, infoU, descrL, descrU, preconditioner);
|
|
|
|
if (iteration >= max_iterations)
|
|
{
|
|
ostringstream tmp;
|
|
mexEvalString("diary off;");
|
|
tmp << "Error in bytecode: No convergence inside BiCGStab, in block " << block+1;
|
|
mexWarnMsgTxt(tmp.str().c_str());
|
|
return 1;
|
|
}
|
|
else
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
void
|
|
dynSparseMatrix::Solve_Matlab_GMRES(mxArray *A_m, mxArray *b_m, int Size, double slowc, int block, bool is_two_boundaries, int it_, mxArray *x0_m)
|
|
{
|
|
#ifdef OCTAVE_MEX_FILE
|
|
ostringstream tmp;
|
|
if (steady_state)
|
|
tmp << " GMRES method is not implemented in Octave. You cannot use solve_algo=7, change solve_algo.\n";
|
|
else
|
|
tmp << " GMRES method is not implemented in Octave. You cannot use stack_solve_algo=2, change stack_solve_algo.\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
#endif
|
|
size_t n = mxGetM(A_m);
|
|
const char *field_names[] = {"droptol", "type"};
|
|
mwSize dims[1] = { 1 };
|
|
mxArray *Setup = mxCreateStructArray(1, dims, 2, field_names);
|
|
mxSetFieldByNumber(Setup, 0, 0, mxCreateDoubleScalar(lu_inc_tol));
|
|
mxSetFieldByNumber(Setup, 0, 1, mxCreateString("ilutp"));
|
|
mxArray *lhs0[2];
|
|
mxArray *rhs0[2];
|
|
rhs0[0] = A_m;
|
|
rhs0[1] = Setup;
|
|
if (mexCallMATLAB(2, lhs0, 2, rhs0, "ilu"))
|
|
throw FatalExceptionHandling("In GMRES, the incomplet LU decomposition (ilu) ahs failed.");
|
|
mxArray *L1 = lhs0[0];
|
|
mxArray *U1 = lhs0[1];
|
|
/*[za,flag1] = gmres(g1a,b,Blck_size,1e-6,Blck_size*periods,L1,U1);*/
|
|
mxArray *rhs[8];
|
|
rhs[0] = A_m;
|
|
rhs[1] = b_m;
|
|
rhs[2] = mxCreateDoubleScalar(Size);
|
|
rhs[3] = mxCreateDoubleScalar(1e-6);
|
|
rhs[4] = mxCreateDoubleScalar((double) n);
|
|
rhs[5] = L1;
|
|
rhs[6] = U1;
|
|
rhs[7] = x0_m;
|
|
mxArray *lhs[2];
|
|
mexCallMATLAB(2, lhs, 8, rhs, "gmres");
|
|
mxArray *z = lhs[0];
|
|
mxArray *flag = lhs[1];
|
|
double *flag1 = mxGetPr(flag);
|
|
mxDestroyArray(rhs0[1]);
|
|
mxDestroyArray(rhs[2]);
|
|
mxDestroyArray(rhs[3]);
|
|
mxDestroyArray(rhs[4]);
|
|
mxDestroyArray(rhs[5]);
|
|
mxDestroyArray(rhs[6]);
|
|
if (*flag1 > 0)
|
|
{
|
|
ostringstream tmp;
|
|
if (*flag1 == 1)
|
|
{
|
|
tmp << "Error in bytecode: No convergence inside GMRES, in block " << block+1;
|
|
mexWarnMsgTxt(tmp.str().c_str());
|
|
}
|
|
else if (*flag1 == 2)
|
|
{
|
|
tmp << "Error in bytecode: Preconditioner is ill-conditioned, in block " << block+1;
|
|
mexWarnMsgTxt(tmp.str().c_str());
|
|
}
|
|
else if (*flag1 == 3)
|
|
{
|
|
tmp << "Error in bytecode: GMRES stagnated (Two consecutive iterates were the same.), in block " << block+1;
|
|
mexWarnMsgTxt(tmp.str().c_str());
|
|
}
|
|
lu_inc_tol /= 10;
|
|
}
|
|
else
|
|
{
|
|
double *res = mxGetPr(z);
|
|
if (is_two_boundaries)
|
|
for (int i = 0; i < (int) n; i++)
|
|
{
|
|
int eq = index_vara[i+Size*y_kmin];
|
|
double yy = -(res[i] + y[eq]);
|
|
direction[eq] = yy;
|
|
y[eq] += slowc * yy;
|
|
}
|
|
else
|
|
for (int i = 0; i < (int) n; i++)
|
|
{
|
|
int eq = index_vara[i];
|
|
double yy = -(res[i] + y[eq+it_*y_size]);
|
|
direction[eq] = yy;
|
|
y[eq+it_*y_size] += slowc * yy;
|
|
}
|
|
}
|
|
mxDestroyArray(A_m);
|
|
mxDestroyArray(b_m);
|
|
mxDestroyArray(z);
|
|
mxDestroyArray(flag);
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Solve_Matlab_BiCGStab(mxArray *A_m, mxArray *b_m, int Size, double slowc, int block, bool is_two_boundaries, int it_, mxArray *x0_m, int preconditioner)
|
|
{
|
|
/* precond = 0 => Jacobi
|
|
precond = 1 => Incomplet LU decomposition*/
|
|
size_t n = mxGetM(A_m);
|
|
mxArray *L1, *U1, *Diag;
|
|
L1 = NULL;
|
|
U1 = NULL;
|
|
Diag = NULL;
|
|
|
|
mxArray *rhs0[4];
|
|
if (preconditioner == 0)
|
|
{
|
|
mxArray *lhs0[1];
|
|
rhs0[0] = A_m;
|
|
rhs0[1] = mxCreateDoubleScalar(0);
|
|
mexCallMATLAB(1, lhs0, 2, rhs0, "spdiags");
|
|
mxArray *tmp = lhs0[0];
|
|
double *tmp_val = mxGetPr(tmp);
|
|
Diag = mxCreateSparse(n, n, n, mxREAL);
|
|
mwIndex *Diag_i = mxGetIr(Diag);
|
|
mwIndex *Diag_j = mxGetJc(Diag);
|
|
double *Diag_val = mxGetPr(Diag);
|
|
for (size_t i = 0; i < n; i++)
|
|
{
|
|
Diag_val[i] = tmp_val[i];
|
|
Diag_j[i] = i;
|
|
Diag_i[i] = i;
|
|
}
|
|
Diag_j[n] = n;
|
|
}
|
|
else if (preconditioner == 1)
|
|
{
|
|
/*[L1, U1] = ilu(g1a=;*/
|
|
const char *field_names[] = {"type", "droptol", "milu", "udiag", "thresh"};
|
|
const int type = 0;
|
|
const int droptol = 1;
|
|
const int milu = 2;
|
|
const int udiag = 3;
|
|
const int thresh = 4;
|
|
mwSize dims[1] = {(mwSize) 1 };
|
|
mxArray *Setup = mxCreateStructArray(1, dims, 5, field_names);
|
|
mxSetFieldByNumber(Setup, 0, type, mxCreateString("ilutp"));
|
|
mxSetFieldByNumber(Setup, 0, droptol, mxCreateDoubleScalar(lu_inc_tol));
|
|
mxSetFieldByNumber(Setup, 0, milu, mxCreateString("off"));
|
|
mxSetFieldByNumber(Setup, 0, udiag, mxCreateDoubleScalar(0));
|
|
mxSetFieldByNumber(Setup, 0, thresh, mxCreateDoubleScalar(1));
|
|
mxArray *lhs0[2];
|
|
mxArray *rhs0[2];
|
|
rhs0[0] = A_m;
|
|
rhs0[1] = Setup;
|
|
if (mexCallMATLAB(2, lhs0, 2, rhs0, "ilu"))
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " In BiCGStab, the incomplet LU decomposition (ilu) ahs failed.\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
L1 = lhs0[0];
|
|
U1 = lhs0[1];
|
|
mxDestroyArray(Setup);
|
|
}
|
|
double flags = 2;
|
|
mxArray *z;
|
|
z = NULL;
|
|
if (steady_state) /*Octave BicStab algorihtm involves a 0 division in case of a preconditionner equal to the LU decomposition of A matrix*/
|
|
{
|
|
mxArray *res = mult_SAT_B(Sparse_transpose(A_m), x0_m);
|
|
double *resid = mxGetPr(res);
|
|
double *b = mxGetPr(b_m);
|
|
for (int i = 0; i < (int) n; i++)
|
|
resid[i] = b[i] - resid[i];
|
|
mxArray *rhs[2];
|
|
mxArray *lhs[1];
|
|
rhs[0] = L1;
|
|
rhs[1] = res;
|
|
mexCallMATLAB(1, lhs, 2, rhs, "mldivide");
|
|
rhs[0] = U1;
|
|
rhs[1] = lhs[0];
|
|
mexCallMATLAB(1, lhs, 2, rhs, "mldivide");
|
|
z = lhs[0];
|
|
double *phat = mxGetPr(z);
|
|
double *x0 = mxGetPr(x0_m);
|
|
for (int i = 0; i < (int) n; i++)
|
|
phat[i] = x0[i] + phat[i];
|
|
|
|
/*Check the solution*/
|
|
res = mult_SAT_B(Sparse_transpose(A_m), z);
|
|
resid = mxGetPr(res);
|
|
double cum_abs = 0;
|
|
for (int i = 0; i < (int) n; i++)
|
|
{
|
|
resid[i] = b[i] - resid[i];
|
|
cum_abs += fabs(resid[i]);
|
|
}
|
|
if (cum_abs > 1e-7)
|
|
flags = 2;
|
|
else
|
|
flags = 0;
|
|
mxDestroyArray(res);
|
|
}
|
|
//else
|
|
|
|
if (flags == 2)
|
|
{
|
|
if (preconditioner == 0)
|
|
{
|
|
/*[za,flag1] = bicgstab(g1a,b,1e-6,Blck_size*periods,L1,U1);*/
|
|
mxArray *rhs[5];
|
|
rhs[0] = A_m;
|
|
rhs[1] = b_m;
|
|
rhs[2] = mxCreateDoubleScalar(1e-6);
|
|
rhs[3] = mxCreateDoubleScalar((double) n);
|
|
rhs[4] = Diag;
|
|
//rhs[5] = x0_m;
|
|
mxArray *lhs[2];
|
|
mexCallMATLAB(2, lhs, 5, rhs, "bicgstab");
|
|
z = lhs[0];
|
|
mxArray *flag = lhs[1];
|
|
double *flag1 = mxGetPr(flag);
|
|
flags = flag1[0];
|
|
mxDestroyArray(flag);
|
|
mxDestroyArray(rhs[2]);
|
|
mxDestroyArray(rhs[3]);
|
|
mxDestroyArray(rhs[4]);
|
|
}
|
|
else if (preconditioner == 1)
|
|
{
|
|
/*[za,flag1] = bicgstab(g1a,b,1e-6,Blck_size*periods,L1,U1);*/
|
|
mxArray *rhs[7];
|
|
rhs[0] = A_m;
|
|
rhs[1] = b_m;
|
|
rhs[2] = mxCreateDoubleScalar(1e-6);
|
|
rhs[3] = mxCreateDoubleScalar((double) n);
|
|
rhs[4] = L1;
|
|
rhs[5] = U1;
|
|
rhs[6] = x0_m;
|
|
mxArray *lhs[2];
|
|
mexCallMATLAB(2, lhs, 7, rhs, "bicgstab");
|
|
z = lhs[0];
|
|
mxArray *flag = lhs[1];
|
|
double *flag1 = mxGetPr(flag);
|
|
flags = flag1[0];
|
|
mxDestroyArray(flag);
|
|
mxDestroyArray(rhs[2]);
|
|
mxDestroyArray(rhs[3]);
|
|
mxDestroyArray(rhs[4]);
|
|
mxDestroyArray(rhs[5]);
|
|
}
|
|
}
|
|
|
|
if (flags > 0)
|
|
{
|
|
ostringstream tmp;
|
|
if (flags == 1)
|
|
{
|
|
tmp << "Error in bytecode: No convergence inside BiCGStab, in block " << block+1;
|
|
mexWarnMsgTxt(tmp.str().c_str());
|
|
}
|
|
else if (flags == 2)
|
|
{
|
|
tmp << "Error in bytecode: Preconditioner is ill-conditioned, in block " << block+1;
|
|
mexWarnMsgTxt(tmp.str().c_str());
|
|
}
|
|
else if (flags == 3)
|
|
{
|
|
tmp << "Error in bytecode: BiCGStab stagnated (Two consecutive iterates were the same.), in block " << block+1;
|
|
mexWarnMsgTxt(tmp.str().c_str());
|
|
}
|
|
lu_inc_tol /= 10;
|
|
}
|
|
else
|
|
{
|
|
double *res = mxGetPr(z);
|
|
if (is_two_boundaries)
|
|
for (int i = 0; i < (int) n; i++)
|
|
{
|
|
int eq = index_vara[i+Size*y_kmin];
|
|
double yy = -(res[i] + y[eq]);
|
|
direction[eq] = yy;
|
|
y[eq] += slowc * yy;
|
|
}
|
|
else
|
|
for (int i = 0; i < (int) n; i++)
|
|
{
|
|
int eq = index_vara[i];
|
|
double yy = -(res[i] + y[eq+it_*y_size]);
|
|
direction[eq] = yy;
|
|
y[eq+it_*y_size] += slowc * yy;
|
|
}
|
|
}
|
|
mxDestroyArray(A_m);
|
|
mxDestroyArray(b_m);
|
|
mxDestroyArray(z);
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Singular_display(int block, int Size)
|
|
{
|
|
bool zero_solution;
|
|
Simple_Init(Size, IM_i, zero_solution);
|
|
NonZeroElem *first;
|
|
mxArray *rhs[1];
|
|
rhs[0] = mxCreateDoubleMatrix(Size, Size, mxREAL);
|
|
double *pind;
|
|
pind = mxGetPr(rhs[0]);
|
|
for (int j = 0; j < Size * Size; j++)
|
|
pind[j] = 0.0;
|
|
for (int ii = 0; ii < Size; ii++)
|
|
{
|
|
int nb_eq = At_Col(ii, &first);
|
|
for (int j = 0; j < nb_eq; j++)
|
|
{
|
|
int k = first->u_index;
|
|
int jj = first->r_index;
|
|
pind[ii * Size + jj ] = u[k];
|
|
first = first->NZE_C_N;
|
|
}
|
|
}
|
|
mxArray *lhs[3];
|
|
mexCallMATLAB(3, lhs, 1, rhs, "svd");
|
|
mxArray *SVD_u = lhs[0];
|
|
mxArray *SVD_s = lhs[1];
|
|
//mxArray* SVD_v = lhs[2];
|
|
double *SVD_ps = mxGetPr(SVD_s);
|
|
double *SVD_pu = mxGetPr(SVD_u);
|
|
for (int i = 0; i < Size; i++)
|
|
{
|
|
if (abs(SVD_ps[i * (1 + Size)]) < 1e-12)
|
|
{
|
|
mexPrintf(" The following equations form a linear combination:\n ");
|
|
double max_u = 0;
|
|
for (int j = 0; j < Size; j++)
|
|
if (abs(SVD_pu[j + i * Size]) > abs(max_u))
|
|
max_u = SVD_pu[j + i * Size];
|
|
vector<int> equ_list;
|
|
for (int j = 0; j < Size; j++)
|
|
{
|
|
double rr = SVD_pu[j + i * Size] / max_u;
|
|
if (rr < -1e-10)
|
|
{
|
|
equ_list.push_back(j);
|
|
if (rr != -1)
|
|
mexPrintf(" - %3.2f*Dequ_%d_dy", abs(rr), j+1);
|
|
else
|
|
mexPrintf(" - Dequ_%d_dy", j+1);
|
|
}
|
|
else if (rr > 1e-10)
|
|
{
|
|
equ_list.push_back(j);
|
|
if (j > 0)
|
|
if (rr != 1)
|
|
mexPrintf(" + %3.2f*Dequ_%d_dy", rr, j+1);
|
|
else
|
|
mexPrintf(" + Dequ_%d_dy", j+1);
|
|
else if (rr != 1)
|
|
mexPrintf(" %3.2f*Dequ_%d_dy", rr, j+1);
|
|
else
|
|
mexPrintf(" Dequ_%d_dy", j+1);
|
|
}
|
|
}
|
|
mexPrintf(" = 0\n");
|
|
/*mexPrintf(" with:\n");
|
|
it_code = get_begin_block(block);
|
|
for (int j=0; j < Size; j++)
|
|
{
|
|
if (find(equ_list.begin(), equ_list.end(), j) != equ_list.end())
|
|
mexPrintf(" equ_%d: %s\n",j, print_expression(it_code_expr, false, Size, block, steady_state, 0, 0, it_code, true).c_str());
|
|
}*/
|
|
}
|
|
}
|
|
mxDestroyArray(lhs[0]);
|
|
mxDestroyArray(lhs[1]);
|
|
mxDestroyArray(lhs[2]);
|
|
ostringstream tmp;
|
|
if (block > 1)
|
|
tmp << " in Solve_ByteCode_Sparse_GaussianElimination, singular system in block " << block+1 << "\n";
|
|
else
|
|
tmp << " in Solve_ByteCode_Sparse_GaussianElimination, singular system\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
|
|
bool
|
|
dynSparseMatrix::Solve_ByteCode_Sparse_GaussianElimination(int Size, int blck, int it_)
|
|
{
|
|
bool one;
|
|
int pivj = 0, pivk = 0;
|
|
double *piv_v;
|
|
int *pivj_v, *pivk_v, *NR;
|
|
int l, N_max;
|
|
NonZeroElem *first, *firsta, *first_suba;
|
|
double piv_abs;
|
|
NonZeroElem **bc;
|
|
bc = (NonZeroElem **) mxMalloc(Size*sizeof(*bc));
|
|
test_mxMalloc(bc, __LINE__, __FILE__, __func__, Size*sizeof(*bc));
|
|
piv_v = (double *) mxMalloc(Size*sizeof(double));
|
|
test_mxMalloc(piv_v, __LINE__, __FILE__, __func__, Size*sizeof(double));
|
|
pivj_v = (int *) mxMalloc(Size*sizeof(int));
|
|
test_mxMalloc(pivj_v, __LINE__, __FILE__, __func__, Size*sizeof(int));
|
|
pivk_v = (int *) mxMalloc(Size*sizeof(int));
|
|
test_mxMalloc(pivk_v, __LINE__, __FILE__, __func__, Size*sizeof(int));
|
|
NR = (int *) mxMalloc(Size*sizeof(int));
|
|
test_mxMalloc(NR, __LINE__, __FILE__, __func__, Size*sizeof(int));
|
|
|
|
for (int i = 0; i < Size; i++)
|
|
{
|
|
/*finding the max-pivot*/
|
|
double piv = piv_abs = 0;
|
|
int nb_eq = At_Col(i, &first);
|
|
l = 0;
|
|
N_max = 0;
|
|
one = false;
|
|
piv_abs = 0;
|
|
for (int j = 0; j < nb_eq; j++)
|
|
{
|
|
if (!line_done[first->r_index])
|
|
{
|
|
int k = first->u_index;
|
|
int jj = first->r_index;
|
|
int NRow_jj = NRow(jj);
|
|
|
|
piv_v[l] = u[k];
|
|
double piv_fabs = fabs(u[k]);
|
|
pivj_v[l] = jj;
|
|
pivk_v[l] = k;
|
|
NR[l] = NRow_jj;
|
|
if (NRow_jj == 1 && !one)
|
|
{
|
|
one = true;
|
|
piv_abs = piv_fabs;
|
|
N_max = NRow_jj;
|
|
}
|
|
if (!one)
|
|
{
|
|
if (piv_fabs > piv_abs)
|
|
piv_abs = piv_fabs;
|
|
if (NRow_jj > N_max)
|
|
N_max = NRow_jj;
|
|
}
|
|
else
|
|
{
|
|
if (NRow_jj == 1)
|
|
{
|
|
if (piv_fabs > piv_abs)
|
|
piv_abs = piv_fabs;
|
|
if (NRow_jj > N_max)
|
|
N_max = NRow_jj;
|
|
}
|
|
}
|
|
l++;
|
|
}
|
|
first = first->NZE_C_N;
|
|
}
|
|
if (piv_abs < eps)
|
|
{
|
|
mxFree(piv_v);
|
|
mxFree(pivj_v);
|
|
mxFree(pivk_v);
|
|
mxFree(NR);
|
|
mxFree(bc);
|
|
if (steady_state)
|
|
{
|
|
if (blck > 1)
|
|
mexPrintf("Error: singular system in Simulate_NG in block %d\n", blck+1);
|
|
else
|
|
mexPrintf("Error: singular system in Simulate_NG\n");
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
ostringstream tmp;
|
|
if (blck > 1)
|
|
tmp << " in Solve_ByteCode_Sparse_GaussianElimination, singular system in block " << blck+1 << "\n";
|
|
else
|
|
tmp << " in Solve_ByteCode_Sparse_GaussianElimination, singular system\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
}
|
|
double markovitz = 0, markovitz_max = -9e70;
|
|
if (!one)
|
|
{
|
|
for (int j = 0; j < l; j++)
|
|
{
|
|
if (N_max > 0 && NR[j] > 0)
|
|
{
|
|
if (fabs(piv_v[j]) > 0)
|
|
{
|
|
if (markowitz_c > 0)
|
|
markovitz = exp(log(fabs(piv_v[j])/piv_abs)-markowitz_c*log(double (NR[j])/double (N_max)));
|
|
else
|
|
markovitz = fabs(piv_v[j])/piv_abs;
|
|
}
|
|
else
|
|
markovitz = 0;
|
|
}
|
|
else
|
|
markovitz = fabs(piv_v[j])/piv_abs;
|
|
if (markovitz > markovitz_max)
|
|
{
|
|
piv = piv_v[j];
|
|
pivj = pivj_v[j]; //Line number
|
|
pivk = pivk_v[j]; //positi
|
|
markovitz_max = markovitz;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int j = 0; j < l; j++)
|
|
{
|
|
if (N_max > 0 && NR[j] > 0)
|
|
{
|
|
if (fabs(piv_v[j]) > 0)
|
|
{
|
|
if (markowitz_c > 0)
|
|
markovitz = exp(log(fabs(piv_v[j])/piv_abs)-markowitz_c*log(double (NR[j])/double (N_max)));
|
|
else
|
|
markovitz = fabs(piv_v[j])/piv_abs;
|
|
}
|
|
else
|
|
markovitz = 0;
|
|
}
|
|
else
|
|
markovitz = fabs(piv_v[j])/piv_abs;
|
|
if (NR[j] == 1)
|
|
{
|
|
piv = piv_v[j];
|
|
pivj = pivj_v[j]; //Line number
|
|
pivk = pivk_v[j]; //positi
|
|
markovitz_max = markovitz;
|
|
}
|
|
}
|
|
}
|
|
pivot[i] = pivj;
|
|
pivotk[i] = pivk;
|
|
pivotv[i] = piv;
|
|
line_done[pivj] = true;
|
|
|
|
/*divide all the non zeros elements of the line pivj by the max_pivot*/
|
|
int nb_var = At_Row(pivj, &first);
|
|
for (int j = 0; j < nb_var; j++)
|
|
{
|
|
u[first->u_index] /= piv;
|
|
first = first->NZE_R_N;
|
|
}
|
|
u[b[pivj]] /= piv;
|
|
/*substract the elements on the non treated lines*/
|
|
nb_eq = At_Col(i, &first);
|
|
NonZeroElem *first_piva;
|
|
int nb_var_piva = At_Row(pivj, &first_piva);
|
|
int nb_eq_todo = 0;
|
|
for (int j = 0; j < nb_eq && first; j++)
|
|
{
|
|
if (!line_done[first->r_index])
|
|
bc[nb_eq_todo++] = first;
|
|
first = first->NZE_C_N;
|
|
}
|
|
//pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS")))
|
|
for (int j = 0; j < nb_eq_todo; j++)
|
|
{
|
|
first = bc[j];
|
|
int row = first->r_index;
|
|
double first_elem = u[first->u_index];
|
|
|
|
int nb_var_piv = nb_var_piva;
|
|
NonZeroElem *first_piv = first_piva;
|
|
NonZeroElem *first_sub;
|
|
int nb_var_sub = At_Row(row, &first_sub);
|
|
int l_sub = 0, l_piv = 0;
|
|
int sub_c_index = first_sub->c_index, piv_c_index = first_piv->c_index;
|
|
while (l_sub < nb_var_sub || l_piv < nb_var_piv)
|
|
{
|
|
if (l_sub < nb_var_sub && (sub_c_index < piv_c_index || l_piv >= nb_var_piv))
|
|
{
|
|
first_sub = first_sub->NZE_R_N;
|
|
if (first_sub)
|
|
sub_c_index = first_sub->c_index;
|
|
else
|
|
sub_c_index = Size;
|
|
l_sub++;
|
|
}
|
|
else if (sub_c_index > piv_c_index || l_sub >= nb_var_sub)
|
|
{
|
|
int tmp_u_count = Get_u();
|
|
Insert(row, first_piv->c_index, tmp_u_count, 0);
|
|
u[tmp_u_count] = -u[first_piv->u_index]*first_elem;
|
|
first_piv = first_piv->NZE_R_N;
|
|
if (first_piv)
|
|
piv_c_index = first_piv->c_index;
|
|
else
|
|
piv_c_index = Size;
|
|
l_piv++;
|
|
}
|
|
else
|
|
{
|
|
if (i == sub_c_index)
|
|
{
|
|
firsta = first;
|
|
first_suba = first_sub->NZE_R_N;
|
|
Delete(first_sub->r_index, first_sub->c_index);
|
|
first = firsta->NZE_C_N;
|
|
first_sub = first_suba;
|
|
if (first_sub)
|
|
sub_c_index = first_sub->c_index;
|
|
else
|
|
sub_c_index = Size;
|
|
l_sub++;
|
|
first_piv = first_piv->NZE_R_N;
|
|
if (first_piv)
|
|
piv_c_index = first_piv->c_index;
|
|
else
|
|
piv_c_index = Size;
|
|
l_piv++;
|
|
}
|
|
else
|
|
{
|
|
u[first_sub->u_index] -= u[first_piv->u_index]*first_elem;
|
|
first_sub = first_sub->NZE_R_N;
|
|
if (first_sub)
|
|
sub_c_index = first_sub->c_index;
|
|
else
|
|
sub_c_index = Size;
|
|
l_sub++;
|
|
first_piv = first_piv->NZE_R_N;
|
|
if (first_piv)
|
|
piv_c_index = first_piv->c_index;
|
|
else
|
|
piv_c_index = Size;
|
|
l_piv++;
|
|
}
|
|
}
|
|
}
|
|
u[b[row]] -= u[b[pivj]]*first_elem;
|
|
}
|
|
}
|
|
double slowc_lbx = slowc;
|
|
for (int i = 0; i < y_size; i++)
|
|
ya[i+it_*y_size] = y[i+it_*y_size];
|
|
|
|
slowc_save = slowc;
|
|
simple_bksub(it_, Size, slowc_lbx);
|
|
End_GE(Size);
|
|
mxFree(piv_v);
|
|
mxFree(pivj_v);
|
|
mxFree(pivk_v);
|
|
mxFree(NR);
|
|
mxFree(bc);
|
|
return false;
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Solve_ByteCode_Symbolic_Sparse_GaussianElimination(int Size, bool symbolic, int Block_number)
|
|
{
|
|
/*Triangularisation at each period of a block using a simple gaussian Elimination*/
|
|
t_save_op_s *save_op_s;
|
|
int *save_op = NULL, *save_opa = NULL, *save_opaa = NULL;
|
|
long int nop = 0, nopa = 0;
|
|
bool record = false;
|
|
double *piv_v;
|
|
double piv_abs;
|
|
int *pivj_v, *pivk_v, *NR;
|
|
int pivj = 0, pivk = 0;
|
|
NonZeroElem *first;
|
|
int tmp_u_count, lag;
|
|
int tbreak = 0, last_period = periods;
|
|
|
|
piv_v = (double *) mxMalloc(Size*sizeof(double));
|
|
test_mxMalloc(piv_v, __LINE__, __FILE__, __func__, Size*sizeof(double));
|
|
pivj_v = (int *) mxMalloc(Size*sizeof(int));
|
|
test_mxMalloc(pivj_v, __LINE__, __FILE__, __func__, Size*sizeof(int));
|
|
pivk_v = (int *) mxMalloc(Size*sizeof(int));
|
|
test_mxMalloc(pivk_v, __LINE__, __FILE__, __func__, Size*sizeof(int));
|
|
NR = (int *) mxMalloc(Size*sizeof(int));
|
|
test_mxMalloc(NR, __LINE__, __FILE__, __func__, Size*sizeof(int));
|
|
//clock_t time00 = clock();
|
|
NonZeroElem **bc;
|
|
bc = (NonZeroElem **) mxMalloc(Size*sizeof(first));
|
|
test_mxMalloc(bc, __LINE__, __FILE__, __func__, Size*sizeof(first));
|
|
|
|
for (int t = 0; t < periods; t++)
|
|
{
|
|
/*clock_t time11 = clock();
|
|
mexPrintf("t=%d, record = %d\n",t, record);*/
|
|
#ifdef MATLAB_MEX_FILE
|
|
if (utIsInterruptPending())
|
|
throw UserExceptionHandling();
|
|
#endif
|
|
|
|
if (record && symbolic)
|
|
{
|
|
/*if (save_op)
|
|
{
|
|
mxFree(save_op);
|
|
save_op = NULL;
|
|
}*/
|
|
save_op = (int *) mxMalloc(nop*sizeof(int));
|
|
test_mxMalloc(save_op, __LINE__, __FILE__, __func__, nop*sizeof(int));
|
|
nopa = nop;
|
|
}
|
|
nop = 0;
|
|
Clear_u();
|
|
int ti = t*Size;
|
|
for (int i = ti; i < Size+ti; i++)
|
|
{
|
|
/*finding the max-pivot*/
|
|
double piv = piv_abs = 0;
|
|
int nb_eq = At_Col(i, 0, &first);
|
|
if ((symbolic && t <= start_compare) || !symbolic)
|
|
{
|
|
int l = 0, N_max = 0;
|
|
bool one = false;
|
|
piv_abs = 0;
|
|
for (int j = 0; j < nb_eq; j++)
|
|
{
|
|
if (!line_done[first->r_index])
|
|
{
|
|
int k = first->u_index;
|
|
int jj = first->r_index;
|
|
int NRow_jj = NRow(jj);
|
|
piv_v[l] = u[k];
|
|
double piv_fabs = fabs(u[k]);
|
|
pivj_v[l] = jj;
|
|
pivk_v[l] = k;
|
|
NR[l] = NRow_jj;
|
|
if (NRow_jj == 1 && !one)
|
|
{
|
|
one = true;
|
|
piv_abs = piv_fabs;
|
|
N_max = NRow_jj;
|
|
}
|
|
if (!one)
|
|
{
|
|
if (piv_fabs > piv_abs)
|
|
piv_abs = piv_fabs;
|
|
if (NRow_jj > N_max)
|
|
N_max = NRow_jj;
|
|
}
|
|
else
|
|
{
|
|
if (NRow_jj == 1)
|
|
{
|
|
if (piv_fabs > piv_abs)
|
|
piv_abs = piv_fabs;
|
|
if (NRow_jj > N_max)
|
|
N_max = NRow_jj;
|
|
}
|
|
}
|
|
l++;
|
|
}
|
|
first = first->NZE_C_N;
|
|
}
|
|
double markovitz = 0, markovitz_max = -9e70;
|
|
int NR_max = 0;
|
|
if (!one)
|
|
{
|
|
for (int j = 0; j < l; j++)
|
|
{
|
|
if (N_max > 0 && NR[j] > 0)
|
|
{
|
|
if (fabs(piv_v[j]) > 0)
|
|
{
|
|
if (markowitz_c > 0)
|
|
markovitz = exp(log(fabs(piv_v[j])/piv_abs)-markowitz_c*log(double (NR[j])/double (N_max)));
|
|
else
|
|
markovitz = fabs(piv_v[j])/piv_abs;
|
|
}
|
|
else
|
|
markovitz = 0;
|
|
}
|
|
else
|
|
markovitz = fabs(piv_v[j])/piv_abs;
|
|
if (markovitz > markovitz_max)
|
|
{
|
|
piv = piv_v[j];
|
|
pivj = pivj_v[j]; //Line number
|
|
pivk = pivk_v[j]; //positi
|
|
markovitz_max = markovitz;
|
|
NR_max = NR[j];
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int j = 0; j < l; j++)
|
|
{
|
|
if (N_max > 0 && NR[j] > 0)
|
|
{
|
|
if (fabs(piv_v[j]) > 0)
|
|
{
|
|
if (markowitz_c > 0)
|
|
markovitz = exp(log(fabs(piv_v[j])/piv_abs)-markowitz_c*log(double (NR[j])/double (N_max)));
|
|
else
|
|
markovitz = fabs(piv_v[j])/piv_abs;
|
|
}
|
|
else
|
|
markovitz = 0;
|
|
}
|
|
else
|
|
markovitz = fabs(piv_v[j])/piv_abs;
|
|
if (NR[j] == 1)
|
|
{
|
|
piv = piv_v[j];
|
|
pivj = pivj_v[j]; //Line number
|
|
pivk = pivk_v[j]; //positi
|
|
markovitz_max = markovitz;
|
|
NR_max = NR[j];
|
|
}
|
|
}
|
|
}
|
|
if (fabs(piv) < eps)
|
|
mexPrintf("==> Error NR_max=%d, N_max=%d and piv=%f, piv_abs=%f, markovitz_max=%f\n", NR_max, N_max, piv, piv_abs, markovitz_max);
|
|
if (NR_max == 0)
|
|
mexPrintf("==> Error NR_max=0 and piv=%f, markovitz_max=%f\n", piv, markovitz_max);
|
|
pivot[i] = pivj;
|
|
pivot_save[i] = pivj;
|
|
pivotk[i] = pivk;
|
|
pivotv[i] = piv;
|
|
}
|
|
else
|
|
{
|
|
pivj = pivot[i-Size]+Size;
|
|
pivot[i] = pivj;
|
|
At_Pos(pivj, i, &first);
|
|
pivk = first->u_index;
|
|
piv = u[pivk];
|
|
piv_abs = fabs(piv);
|
|
}
|
|
line_done[pivj] = true;
|
|
|
|
if (record && symbolic)
|
|
{
|
|
if (nop+1 >= nopa)
|
|
{
|
|
nopa = long (mem_increasing_factor*(double) nopa);
|
|
save_op = (int *) mxRealloc(save_op, nopa*sizeof(int));
|
|
}
|
|
save_op_s = (t_save_op_s *) (&(save_op[nop]));
|
|
save_op_s->operat = IFLD;
|
|
save_op_s->first = pivk;
|
|
save_op_s->lag = 0;
|
|
nop += 2;
|
|
if (piv_abs < eps)
|
|
{
|
|
ostringstream tmp;
|
|
if (Block_number > 1)
|
|
tmp << " in Solve_ByteCode_Symbolic_Sparse_GaussianElimination, singular system in block " << Block_number+1 << "\n";
|
|
else
|
|
tmp << " in Solve_ByteCode_Symbolic_Sparse_GaussianElimination, singular system\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
/*divide all the non zeros elements of the line pivj by the max_pivot*/
|
|
int nb_var = At_Row(pivj, &first);
|
|
for (int j = 0; j < nb_var; j++)
|
|
{
|
|
u[first->u_index] /= piv;
|
|
if (nop+j*2+1 >= nopa)
|
|
{
|
|
nopa = long (mem_increasing_factor*(double) nopa);
|
|
save_op = (int *) mxRealloc(save_op, nopa*sizeof(int));
|
|
}
|
|
save_op_s = (t_save_op_s *) (&(save_op[nop+j*2]));
|
|
save_op_s->operat = IFDIV;
|
|
save_op_s->first = first->u_index;
|
|
save_op_s->lag = first->lag_index;
|
|
first = first->NZE_R_N;
|
|
}
|
|
nop += nb_var*2;
|
|
u[b[pivj]] /= piv;
|
|
if (nop+1 >= nopa)
|
|
{
|
|
nopa = long (mem_increasing_factor*(double) nopa);
|
|
save_op = (int *) mxRealloc(save_op, nopa*sizeof(int));
|
|
}
|
|
save_op_s = (t_save_op_s *) (&(save_op[nop]));
|
|
save_op_s->operat = IFDIV;
|
|
save_op_s->first = b[pivj];
|
|
save_op_s->lag = 0;
|
|
nop += 2;
|
|
/*substract the elements on the non treated lines*/
|
|
nb_eq = At_Col(i, &first);
|
|
NonZeroElem *first_piva;
|
|
int nb_var_piva = At_Row(pivj, &first_piva);
|
|
|
|
int nb_eq_todo = 0;
|
|
for (int j = 0; j < nb_eq && first; j++)
|
|
{
|
|
if (!line_done[first->r_index])
|
|
bc[nb_eq_todo++] = first;
|
|
first = first->NZE_C_N;
|
|
}
|
|
//#pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS"))) shared(nb_var_piva, first_piva, nopa, save_op) reduction(+:nop)
|
|
for (int j = 0; j < nb_eq_todo; j++)
|
|
{
|
|
t_save_op_s *save_op_s_l;
|
|
NonZeroElem *first = bc[j];
|
|
int row = first->r_index;
|
|
double first_elem = u[first->u_index];
|
|
if (nop+1 >= nopa)
|
|
{
|
|
nopa = long (mem_increasing_factor*(double) nopa);
|
|
save_op = (int *) mxRealloc(save_op, nopa*sizeof(int));
|
|
}
|
|
save_op_s_l = (t_save_op_s *) (&(save_op[nop]));
|
|
save_op_s_l->operat = IFLD;
|
|
save_op_s_l->first = first->u_index;
|
|
save_op_s_l->lag = abs(first->lag_index);
|
|
nop += 2;
|
|
|
|
int nb_var_piv = nb_var_piva;
|
|
NonZeroElem *first_piv = first_piva;
|
|
NonZeroElem *first_sub;
|
|
int nb_var_sub = At_Row(row, &first_sub);
|
|
int l_sub = 0;
|
|
int l_piv = 0;
|
|
int sub_c_index = first_sub->c_index;
|
|
int piv_c_index = first_piv->c_index;
|
|
int tmp_lag = first_sub->lag_index;
|
|
while (l_sub < (nb_var_sub /*=NRow(row)*/) || l_piv < nb_var_piv)
|
|
{
|
|
if (l_sub < nb_var_sub && (sub_c_index < piv_c_index || l_piv >= nb_var_piv))
|
|
{
|
|
//There is no nonzero element at row pivot for this column=> Nothing to do for the current element got to next column
|
|
first_sub = first_sub->NZE_R_N;
|
|
if (first_sub)
|
|
sub_c_index = first_sub->c_index;
|
|
else
|
|
sub_c_index = Size*periods;
|
|
l_sub++;
|
|
}
|
|
else if (sub_c_index > piv_c_index || l_sub >= nb_var_sub)
|
|
{
|
|
// There is an nonzero element at row pivot but not at the current row=> insert a negative element in the current row
|
|
tmp_u_count = Get_u();
|
|
lag = first_piv->c_index/Size-row/Size;
|
|
//#pragma omp critical
|
|
{
|
|
Insert(row, first_piv->c_index, tmp_u_count, lag);
|
|
}
|
|
u[tmp_u_count] = -u[first_piv->u_index]*first_elem;
|
|
if (nop+2 >= nopa)
|
|
{
|
|
nopa = long (mem_increasing_factor*(double) nopa);
|
|
save_op = (int *) mxRealloc(save_op, nopa*sizeof(int));
|
|
}
|
|
save_op_s_l = (t_save_op_s *) (&(save_op[nop]));
|
|
save_op_s_l->operat = IFLESS;
|
|
save_op_s_l->first = tmp_u_count;
|
|
save_op_s_l->second = first_piv->u_index;
|
|
save_op_s_l->lag = max(first_piv->lag_index, abs(tmp_lag));
|
|
nop += 3;
|
|
first_piv = first_piv->NZE_R_N;
|
|
if (first_piv)
|
|
piv_c_index = first_piv->c_index;
|
|
else
|
|
piv_c_index = Size*periods;
|
|
l_piv++;
|
|
}
|
|
else /*first_sub->c_index==first_piv->c_index*/
|
|
{
|
|
if (i == sub_c_index)
|
|
{
|
|
NonZeroElem *firsta = first;
|
|
NonZeroElem *first_suba = first_sub->NZE_R_N;
|
|
//#pragma omp critical
|
|
{
|
|
Delete(first_sub->r_index, first_sub->c_index);
|
|
}
|
|
first = firsta->NZE_C_N;
|
|
first_sub = first_suba;
|
|
if (first_sub)
|
|
sub_c_index = first_sub->c_index;
|
|
else
|
|
sub_c_index = Size*periods;
|
|
l_sub++;
|
|
first_piv = first_piv->NZE_R_N;
|
|
if (first_piv)
|
|
piv_c_index = first_piv->c_index;
|
|
else
|
|
piv_c_index = Size*periods;
|
|
l_piv++;
|
|
}
|
|
else
|
|
{
|
|
u[first_sub->u_index] -= u[first_piv->u_index]*first_elem;
|
|
if (nop+3 >= nopa)
|
|
{
|
|
nopa = long (mem_increasing_factor*(double) nopa);
|
|
save_op = (int *) mxRealloc(save_op, nopa*sizeof(int));
|
|
}
|
|
save_op_s_l = (t_save_op_s *) (&(save_op[nop]));
|
|
save_op_s_l->operat = IFSUB;
|
|
save_op_s_l->first = first_sub->u_index;
|
|
save_op_s_l->second = first_piv->u_index;
|
|
save_op_s_l->lag = max(abs(tmp_lag), first_piv->lag_index);
|
|
nop += 3;
|
|
first_sub = first_sub->NZE_R_N;
|
|
if (first_sub)
|
|
sub_c_index = first_sub->c_index;
|
|
else
|
|
sub_c_index = Size*periods;
|
|
l_sub++;
|
|
first_piv = first_piv->NZE_R_N;
|
|
if (first_piv)
|
|
piv_c_index = first_piv->c_index;
|
|
else
|
|
piv_c_index = Size*periods;
|
|
l_piv++;
|
|
}
|
|
}
|
|
}
|
|
u[b[row]] -= u[b[pivj]]*first_elem;
|
|
|
|
if (nop+3 >= nopa)
|
|
{
|
|
nopa = long (mem_increasing_factor*(double) nopa);
|
|
save_op = (int *) mxRealloc(save_op, nopa*sizeof(int));
|
|
}
|
|
save_op_s_l = (t_save_op_s *) (&(save_op[nop]));
|
|
save_op_s_l->operat = IFSUB;
|
|
save_op_s_l->first = b[row];
|
|
save_op_s_l->second = b[pivj];
|
|
save_op_s_l->lag = abs(tmp_lag);
|
|
nop += 3;
|
|
}
|
|
}
|
|
else if (symbolic)
|
|
{
|
|
nop += 2;
|
|
if (piv_abs < eps)
|
|
{
|
|
ostringstream tmp;
|
|
if (Block_number > 1)
|
|
tmp << " in Solve_ByteCode_Symbolic_Sparse_GaussianElimination, singular system in block " << Block_number+1 << "\n";
|
|
else
|
|
tmp << " in Solve_ByteCode_Symbolic_Sparse_GaussianElimination, singular system\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
/*divide all the non zeros elements of the line pivj by the max_pivot*/
|
|
int nb_var = At_Row(pivj, &first);
|
|
for (int j = 0; j < nb_var; j++)
|
|
{
|
|
u[first->u_index] /= piv;
|
|
first = first->NZE_R_N;
|
|
}
|
|
nop += nb_var*2;
|
|
u[b[pivj]] /= piv;
|
|
nop += 2;
|
|
/*substract the elements on the non treated lines*/
|
|
nb_eq = At_Col(i, &first);
|
|
NonZeroElem *first_piva;
|
|
int nb_var_piva = At_Row(pivj, &first_piva);
|
|
|
|
int nb_eq_todo = 0;
|
|
for (int j = 0; j < nb_eq && first; j++)
|
|
{
|
|
if (!line_done[first->r_index])
|
|
bc[nb_eq_todo++] = first;
|
|
first = first->NZE_C_N;
|
|
}
|
|
//#pragma omp parallel for num_threads(atoi(getenv("DYNARE_NUM_THREADS"))) shared(nb_var_piva, first_piva, nopa, save_op) reduction(+:nop)
|
|
for (int j = 0; j < nb_eq_todo; j++)
|
|
{
|
|
NonZeroElem *first = bc[j];
|
|
int row = first->r_index;
|
|
double first_elem = u[first->u_index];
|
|
nop += 2;
|
|
int nb_var_piv = nb_var_piva;
|
|
NonZeroElem *first_piv = first_piva;
|
|
NonZeroElem *first_sub;
|
|
int nb_var_sub = At_Row(row, &first_sub);
|
|
int l_sub = 0;
|
|
int l_piv = 0;
|
|
int sub_c_index = first_sub->c_index;
|
|
int piv_c_index = first_piv->c_index;
|
|
while (l_sub < (nb_var_sub /*= NRow(row)*/) || l_piv < nb_var_piv)
|
|
{
|
|
if (l_sub < nb_var_sub && (sub_c_index < piv_c_index || l_piv >= nb_var_piv))
|
|
{
|
|
//There is no nonzero element at row pivot for this column=> Nothing to do for the current element got to next column
|
|
first_sub = first_sub->NZE_R_N;
|
|
if (first_sub)
|
|
sub_c_index = first_sub->c_index;
|
|
else
|
|
sub_c_index = Size*periods;
|
|
l_sub++;
|
|
}
|
|
else if (sub_c_index > piv_c_index || l_sub >= nb_var_sub)
|
|
{
|
|
// There is an nonzero element at row pivot but not at the current row=> insert a negative element in the current row
|
|
tmp_u_count = Get_u();
|
|
lag = first_piv->c_index/Size-row/Size;
|
|
//#pragma omp critical
|
|
{
|
|
Insert(row, first_piv->c_index, tmp_u_count, lag);
|
|
}
|
|
u[tmp_u_count] = -u[first_piv->u_index]*first_elem;
|
|
nop += 3;
|
|
first_piv = first_piv->NZE_R_N;
|
|
if (first_piv)
|
|
piv_c_index = first_piv->c_index;
|
|
else
|
|
piv_c_index = Size*periods;
|
|
l_piv++;
|
|
}
|
|
else /*first_sub->c_index==first_piv->c_index*/
|
|
{
|
|
if (i == sub_c_index)
|
|
{
|
|
NonZeroElem *firsta = first;
|
|
NonZeroElem *first_suba = first_sub->NZE_R_N;
|
|
//#pragma omp critical
|
|
{
|
|
Delete(first_sub->r_index, first_sub->c_index);
|
|
}
|
|
first = firsta->NZE_C_N;
|
|
first_sub = first_suba;
|
|
if (first_sub)
|
|
sub_c_index = first_sub->c_index;
|
|
else
|
|
sub_c_index = Size*periods;
|
|
l_sub++;
|
|
first_piv = first_piv->NZE_R_N;
|
|
if (first_piv)
|
|
piv_c_index = first_piv->c_index;
|
|
else
|
|
piv_c_index = Size*periods;
|
|
l_piv++;
|
|
}
|
|
else
|
|
{
|
|
u[first_sub->u_index] -= u[first_piv->u_index]*first_elem;
|
|
nop += 3;
|
|
first_sub = first_sub->NZE_R_N;
|
|
if (first_sub)
|
|
sub_c_index = first_sub->c_index;
|
|
else
|
|
sub_c_index = Size*periods;
|
|
l_sub++;
|
|
first_piv = first_piv->NZE_R_N;
|
|
if (first_piv)
|
|
piv_c_index = first_piv->c_index;
|
|
else
|
|
piv_c_index = Size*periods;
|
|
l_piv++;
|
|
}
|
|
}
|
|
}
|
|
u[b[row]] -= u[b[pivj]]*first_elem;
|
|
nop += 3;
|
|
}
|
|
}
|
|
}
|
|
if (symbolic)
|
|
{
|
|
if (t > int (periods*0.35))
|
|
{
|
|
symbolic = false;
|
|
mxFree(save_opaa);
|
|
mxFree(save_opa);
|
|
mxFree(save_op);
|
|
}
|
|
else if (record && (nop == nop1))
|
|
{
|
|
if (t > int (periods*0.35))
|
|
{
|
|
symbolic = false;
|
|
if (save_opaa)
|
|
{
|
|
mxFree(save_opaa);
|
|
save_opaa = NULL;
|
|
}
|
|
if (save_opa)
|
|
{
|
|
mxFree(save_opa);
|
|
save_opa = NULL;
|
|
}
|
|
if (save_op)
|
|
{
|
|
mxFree(save_op);
|
|
save_op = NULL;
|
|
}
|
|
}
|
|
else if (save_opa && save_opaa)
|
|
{
|
|
if (compare(save_op, save_opa, save_opaa, t, periods, nop, Size))
|
|
{
|
|
tbreak = t;
|
|
tbreak_g = tbreak;
|
|
//mexPrintf("time=%f\n",(1000.0*(double (clock())-double (time11)))/double (CLOCKS_PER_SEC));
|
|
break;
|
|
}
|
|
}
|
|
if (save_opa)
|
|
{
|
|
if (save_opaa)
|
|
{
|
|
mxFree(save_opaa);
|
|
save_opaa = NULL;
|
|
}
|
|
save_opaa = save_opa;
|
|
}
|
|
save_opa = save_op;
|
|
}
|
|
else
|
|
{
|
|
if (nop == nop1)
|
|
record = true;
|
|
else
|
|
{
|
|
record = false;
|
|
if (save_opa)
|
|
{
|
|
mxFree(save_opa);
|
|
save_opa = NULL;
|
|
}
|
|
if (save_opaa)
|
|
{
|
|
mxFree(save_opaa);
|
|
save_opaa = NULL;
|
|
}
|
|
}
|
|
}
|
|
nop2 = nop1;
|
|
nop1 = nop;
|
|
}
|
|
//mexPrintf("time=%f\n",(1000.0*(double (clock())-double (time11)))/double (CLOCKS_PER_SEC));
|
|
}
|
|
mxFree(bc);
|
|
mxFree(piv_v);
|
|
mxFree(pivj_v);
|
|
mxFree(pivk_v);
|
|
mxFree(NR);
|
|
/*mexPrintf("tbreak=%d, periods=%d time required=%f\n",tbreak,periods, (1000.0*(double (clock())-double (time00)))/double (CLOCKS_PER_SEC));
|
|
mexEvalString("drawnow;");
|
|
time00 = clock();*/
|
|
nop_all += nop;
|
|
if (symbolic)
|
|
{
|
|
if (save_op)
|
|
mxFree(save_op);
|
|
if (save_opa)
|
|
mxFree(save_opa);
|
|
if (save_opaa)
|
|
mxFree(save_opaa);
|
|
}
|
|
|
|
/*The backward substitution*/
|
|
double slowc_lbx = slowc;
|
|
for (int i = 0; i < y_size*(periods+y_kmin); i++)
|
|
ya[i] = y[i];
|
|
slowc_save = slowc;
|
|
bksub(tbreak, last_period, Size, slowc_lbx);
|
|
/*mexPrintf("remaining operations and bksub time required=%f\n",tbreak,periods, (1000.0*(double (clock())-double (time00)))/double (CLOCKS_PER_SEC));
|
|
mexEvalString("drawnow;");*/
|
|
End_GE(Size);
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Grad_f_product(int n, mxArray *b_m, double *vectr, mxArray *A_m, SuiteSparse_long *Ap, SuiteSparse_long *Ai, double *Ax, double *b_)
|
|
{
|
|
if ((solve_algo == 5 && steady_state) || (stack_solve_algo == 5 && !steady_state))
|
|
{
|
|
NonZeroElem *first;
|
|
for (int i = 0; i < n; i++)
|
|
{
|
|
double sum = 0;
|
|
first = FNZE_R[i];
|
|
if (first)
|
|
for (int k = 0; k < NbNZRow[i]; k++)
|
|
{
|
|
sum += u[first->u_index] * u[b[first->c_index]];
|
|
first = first->NZE_R_N;
|
|
}
|
|
vectr[i] = sum;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (!((solve_algo == 6 && steady_state) || ((stack_solve_algo == 0 || stack_solve_algo == 1 || stack_solve_algo == 4) && !steady_state)))
|
|
{
|
|
mwIndex *Ai = mxGetIr(A_m);
|
|
if (!Ai)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, can't allocate Ai index vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
mwIndex *Aj = mxGetJc(A_m);
|
|
if (!Aj)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, can't allocate Aj index vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
double *A = mxGetPr(A_m);
|
|
if (!A)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, can't retrieve A matrix\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
b_ = mxGetPr(b_m);
|
|
if (!b_)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Init_Matlab_Sparse_Simple, can't retrieve b matrix\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
}
|
|
memset(vectr, 0, n * sizeof(double));
|
|
for (int i = 0; i < n; i++)
|
|
for (SuiteSparse_long j = Ap[i]; j < Ap[i+1]; j++)
|
|
vectr[Ai[j]] += Ax[j] * b_[i];
|
|
}
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Check_and_Correct_Previous_Iteration(int block_num, int y_size, int size, double crit_opt_old)
|
|
{
|
|
double top = 1.0;
|
|
double bottom = 0.1;
|
|
if (isnan(res1) || isinf(res1) || (res2 > g0 && iter > 0))
|
|
{
|
|
while ((isnan(res1) || isinf(res1)))
|
|
{
|
|
prev_slowc_save = slowc_save;
|
|
slowc_save /= 1.1;
|
|
for (int i = 0; i < size; i++)
|
|
{
|
|
int eq = index_vara[i];
|
|
y[eq+it_*y_size] = ya[eq+it_*y_size] + slowc_save * direction[eq+it_*y_size];
|
|
}
|
|
/*mexPrintf("reducing solwc_save = %e, it_=%d, y_size=%d, size=%d, y[%d]=%e, ya[%d]=%e,\n y[%d]=%e, ya[%d]=%e\n",slowc_save, it_, y_size, size-1, index_vara[0]+it_*y_size, y[index_vara[0]+it_*y_size], index_vara[0]+it_*y_size, ya[index_vara[0]+it_*y_size]
|
|
, index_vara[size-1]+it_*y_size, y[index_vara[size-1]+it_*y_size], index_vara[size-1]+it_*y_size, ya[index_vara[size-1]+it_*y_size]);*/
|
|
//mexPrintf("->slowc_save=%f\n",slowc_save);
|
|
compute_complete(true, res1, res2, max_res, max_res_idx);
|
|
}
|
|
|
|
while (res2 > g0 && slowc_save > 1e-1)
|
|
{
|
|
prev_slowc_save = slowc_save;
|
|
slowc_save /= 1.5;
|
|
for (int i = 0; i < size; i++)
|
|
{
|
|
int eq = index_vara[i];
|
|
y[eq+it_*y_size] = ya[eq+it_*y_size] + slowc_save * direction[eq+it_*y_size];
|
|
}
|
|
/*mexPrintf("reducing solwc_save = %e, it_=%d, y_size=%d, size=%d, y[%d]=%e, ya[%d]=%e,\n y[%d]=%e, ya[%d]=%e\n",slowc_save, it_, y_size, size-1, index_vara[0]+it_*y_size, y[index_vara[0]+it_*y_size], index_vara[0]+it_*y_size, ya[index_vara[0]+it_*y_size] , index_vara[size-1]+it_*y_size, y[index_vara[size-1]+it_*y_size], index_vara[size-1]+it_*y_size, ya[index_vara[size-1]+it_*y_size]);*/
|
|
//mexPrintf("->slowc_save=%f\n",slowc_save);
|
|
compute_complete(true, res1, res2, max_res, max_res_idx);
|
|
}
|
|
double ax = slowc_save-0.001, bx = slowc_save+0.001, cx = slowc_save, fa, fb, fc, xmin;
|
|
if (false /*slowc_save > 2e-1*/)
|
|
if (mnbrak(&ax, &bx, &cx, &fa, &fb, &fc))
|
|
if (golden(ax, bx, cx, 1e-1, solve_tolf, &xmin))
|
|
slowc_save = xmin;
|
|
//mexPrintf("cx=%f\n", cx);
|
|
//mexPrintf("ax= %f, bx=%f, cx=%f, fa=%f, fb=%f, fc=%d\n", ax, bx, cx, fa, fb, fc);
|
|
|
|
//if (!(isnan(res1) || isinf(res1))/* && !(isnan(g0) || isinf(g0))*//*|| (res2 > g0 && iter > 1)*/)
|
|
if (false)
|
|
{
|
|
|
|
double *p = (double *) mxMalloc(size * sizeof(double));
|
|
test_mxMalloc(p, __LINE__, __FILE__, __func__, size * sizeof(double));
|
|
Grad_f_product(size, b_m_save, p, A_m_save, Ap_save, Ai_save, Ax_save, b_save);
|
|
double slope = 0.0;
|
|
for (int i = 1; i < size; i++)
|
|
slope += -direction[i] * p[i];
|
|
/*if (slope > 0)
|
|
mexPrintf("Roundoff in lnsearch\n");
|
|
else*/
|
|
{
|
|
prev_slowc_save = 1;
|
|
double crit_opt = res2/2;
|
|
double max_try_iteration = 100;
|
|
double small_ = 1.0e-4;
|
|
bool try_at_cvg = false;
|
|
while ((try_at_iteration < max_try_iteration) && (!try_at_cvg) && (abs(prev_slowc_save - slowc_save) > 1e-10))
|
|
{
|
|
crit_opt = res2 / 2;
|
|
if (slowc_save < 1e-7)
|
|
{
|
|
try_at_cvg = true;
|
|
continue;
|
|
}
|
|
else if ((crit_opt <= crit_opt_old + small_ * slowc_save * slope) && !(isnan(res1) || isinf(res1)))
|
|
{
|
|
try_at_cvg = true;
|
|
continue;
|
|
}
|
|
else if (try_at_iteration == 0)
|
|
{
|
|
prev_slowc_save = slowc_save;
|
|
//slowc_save = max(- top * slope / ( (crit_opt - crit_opt_old - slope)), bottom);
|
|
slowc_save /= 1.2;
|
|
}
|
|
else
|
|
{
|
|
double t1 = crit_opt - slope * slowc_save - crit_opt_old;
|
|
double t2 = glambda2 - slope * prev_slowc_save - crit_opt_old;
|
|
double a = (1/(slowc_save * slowc_save) * t1 - 1/(prev_slowc_save * prev_slowc_save) * t2) / (slowc_save - prev_slowc_save);
|
|
double b = (-prev_slowc_save/(slowc_save * slowc_save) * t1 + slowc_save/(prev_slowc_save * prev_slowc_save) * t2) / (slowc_save - prev_slowc_save);
|
|
if (a == 0)
|
|
slowc_save = max(min(-slope/(2 * b), top * slowc_save), bottom * slowc_save);
|
|
else
|
|
{
|
|
double delta = b*b - 3 * a * slope;
|
|
if (delta <= 0)
|
|
slowc_save = top * slowc_save;
|
|
else if (b <= 0)
|
|
slowc_save = max(min(-b + sqrt(delta) / (3 * a), top * slowc_save), bottom * slowc_save);
|
|
else
|
|
slowc_save = max(min(-slope / (b + sqrt(delta)), top * slowc_save), bottom * slowc_save);
|
|
}
|
|
}
|
|
if (abs(prev_slowc_save - slowc_save) < 1e-10)
|
|
slowc_save /= 1.1;
|
|
//mexPrintf("=>slowc_save=%f, prev_slowc_save=%f\n",slowc_save, prev_slowc_save);
|
|
prev_slowc_save = slowc_save;
|
|
glambda2 = crit_opt;
|
|
try_at_iteration++;
|
|
for (int i = 0; i < size; i++)
|
|
{
|
|
int eq = index_vara[i];
|
|
y[eq+it_*y_size] = ya[eq+it_*y_size] + slowc_save * direction[eq+it_*y_size];
|
|
}
|
|
compute_complete(true, res1, res2, max_res, max_res_idx);
|
|
}
|
|
}
|
|
mxFree(p);
|
|
}
|
|
//if (print_it)
|
|
mexPrintf("Error: Simulation diverging, trying to correct it using slowc=%f\n", slowc_save);
|
|
for (int i = 0; i < size; i++)
|
|
{
|
|
int eq = index_vara[i];
|
|
y[eq+it_*y_size] = ya[eq+it_*y_size] + slowc_save * direction[eq+it_*y_size];
|
|
}
|
|
compute_complete(false, res1, res2, max_res, max_res_idx);
|
|
}
|
|
else
|
|
{
|
|
//mexPrintf("slowc_save=%f res1=%f\n",slowc_save, res1);
|
|
for (int i = 0; i < size; i++)
|
|
{
|
|
int eq = index_vara[i];
|
|
y[eq+it_*y_size] = ya[eq+it_*y_size] + slowc_save * direction[eq+it_*y_size];
|
|
}
|
|
}
|
|
slowc_save = slowc;
|
|
}
|
|
|
|
bool
|
|
dynSparseMatrix::Simulate_One_Boundary(int block_num, int y_size, int y_kmin, int y_kmax, int size, bool cvg)
|
|
{
|
|
//int i;
|
|
mxArray *b_m = NULL, *A_m = NULL, *x0_m = NULL;
|
|
SuiteSparse_long *Ap = NULL, *Ai = NULL;
|
|
double *Ax = NULL, *b = NULL;
|
|
int preconditioner = 1;
|
|
|
|
try_at_iteration = 0;
|
|
Clear_u();
|
|
bool singular_system = false;
|
|
u_count_alloc_save = u_count_alloc;
|
|
|
|
if (isnan(res1) || isinf(res1))
|
|
{
|
|
#ifdef DEBUG
|
|
for (int j = 0; j < y_size; j++)
|
|
{
|
|
bool select = false;
|
|
for (int i = 0; i < size; i++)
|
|
if (j == index_vara[i])
|
|
{
|
|
select = true;
|
|
break;
|
|
}
|
|
if (select)
|
|
mexPrintf("-> variable %s (%d) at time %d = %f direction = %f\n", get_variable(eEndogenous, j).c_str(), j+1, it_, y[j+it_*y_size], direction[j+it_*y_size]);
|
|
else
|
|
mexPrintf(" variable %s (%d) at time %d = %f direction = %f\n", get_variable(eEndogenous, j).c_str(), j+1, it_, y[j+it_*y_size], direction[j+it_*y_size]);
|
|
}
|
|
#endif
|
|
if (steady_state)
|
|
{
|
|
if (iter == 0)
|
|
mexPrintf(" the initial values of endogenous variables are too far from the solution.\nChange them!\n");
|
|
else
|
|
mexPrintf(" dynare cannot improve the simulation in block %d at time %d (variable %d)\n", block_num+1, it_+1, index_vara[max_res_idx]+1);
|
|
mexEvalString("drawnow;");
|
|
//return singular_system;
|
|
}
|
|
else
|
|
{
|
|
ostringstream tmp;
|
|
if (iter == 0)
|
|
tmp << " in Simulate_One_Boundary, The initial values of endogenous variables are too far from the solution.\nChange them!\n";
|
|
else
|
|
tmp << " in Simulate_One_Boundary, Dynare cannot improve the simulation in block " << block_num+1 << " at time " << it_+1 << " (variable " << index_vara[max_res_idx]+1 << "%d)\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
}
|
|
if (print_it)
|
|
{
|
|
if (steady_state)
|
|
{
|
|
switch (solve_algo)
|
|
{
|
|
case 0:
|
|
mexPrintf("MODEL STEADY STATE: MATLAB fsolve\n");
|
|
break;
|
|
case 1:
|
|
mexPrintf("MODEL STEADY STATE: MATLAB solve1\n");
|
|
break;
|
|
case 2:
|
|
case 4:
|
|
mexPrintf("MODEL STEADY STATE: block decomposition + MATLAB solve1\n");
|
|
break;
|
|
case 3:
|
|
mexPrintf("MODEL STEADY STATE: MATLAB csolve\n");
|
|
break;
|
|
case 5:
|
|
mexPrintf("MODEL STEADY STATE: (method=ByteCode own solver)\n");
|
|
break;
|
|
case 6:
|
|
mexPrintf("MODEL STEADY STATE: Sparse LU\n");
|
|
break;
|
|
case 7:
|
|
mexPrintf(preconditioner_print_out("MODEL STEADY STATE: (method=GMRES)\n", preconditioner, true).c_str());
|
|
//mexPrintf("MODEL STEADY STATE: (method=GMRES)\n");
|
|
break;
|
|
case 8:
|
|
mexPrintf(preconditioner_print_out("MODEL STEADY STATE: (method=BiCGStab)\n", preconditioner, true).c_str());
|
|
//mexPrintf("MODEL STEADY STATE: (method=BiCGStab)\n");
|
|
break;
|
|
default:
|
|
mexPrintf("MODEL STEADY STATE: (method=Unknown - %d - )\n", stack_solve_algo);
|
|
}
|
|
}
|
|
|
|
mexPrintf("-----------------------------------\n");
|
|
mexPrintf(" Simulate iteration no %d \n", iter+1);
|
|
mexPrintf(" max. error=%.10e \n", double (max_res));
|
|
mexPrintf(" sqr. error=%.10e \n", double (res2));
|
|
mexPrintf(" abs. error=%.10e \n", double (res1));
|
|
mexPrintf("-----------------------------------\n");
|
|
}
|
|
bool zero_solution;
|
|
|
|
if ((solve_algo == 5 && steady_state) || (stack_solve_algo == 5 && !steady_state))
|
|
Simple_Init(size, IM_i, zero_solution);
|
|
else
|
|
{
|
|
b_m = mxCreateDoubleMatrix(size, 1, mxREAL);
|
|
if (!b_m)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Simulate_One_Boundary, can't allocate b_m vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
A_m = mxCreateSparse(size, size, min(int (IM_i.size()*2), size * size), mxREAL);
|
|
if (!A_m)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Simulate_One_Boundary, can't allocate A_m matrix\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
x0_m = mxCreateDoubleMatrix(size, 1, mxREAL);
|
|
if (!x0_m)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Simulate_One_Boundary, can't allocate x0_m vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (!((solve_algo == 6 && steady_state) || ((stack_solve_algo == 0 || stack_solve_algo == 4) && !steady_state)))
|
|
{
|
|
Init_Matlab_Sparse_Simple(size, IM_i, A_m, b_m, zero_solution, x0_m);
|
|
A_m_save = mxDuplicateArray(A_m);
|
|
b_m_save = mxDuplicateArray(b_m);
|
|
}
|
|
else
|
|
{
|
|
Init_UMFPACK_Sparse_Simple(size, IM_i, &Ap, &Ai, &Ax, &b, zero_solution, x0_m);
|
|
if (Ap_save[size] != Ap[size])
|
|
{
|
|
mxFree(Ai_save);
|
|
mxFree(Ax_save);
|
|
Ai_save = (SuiteSparse_long *) mxMalloc(Ap[size] * sizeof(SuiteSparse_long));
|
|
test_mxMalloc(Ai_save, __LINE__, __FILE__, __func__, Ap[size] * sizeof(SuiteSparse_long));
|
|
Ax_save = (double *) mxMalloc(Ap[size] * sizeof(double));
|
|
test_mxMalloc(Ax_save, __LINE__, __FILE__, __func__, Ap[size] * sizeof(double));
|
|
}
|
|
memcpy(Ap_save, Ap, (size + 1) * sizeof(SuiteSparse_long));
|
|
memcpy(Ai_save, Ai, Ap[size] * sizeof(SuiteSparse_long));
|
|
memcpy(Ax_save, Ax, Ap[size] * sizeof(double));
|
|
memcpy(b_save, b, size * sizeof(double));
|
|
}
|
|
}
|
|
if (zero_solution)
|
|
{
|
|
for (int i = 0; i < size; i++)
|
|
{
|
|
int eq = index_vara[i];
|
|
double yy = -(y[eq+it_*y_size]);
|
|
direction[eq] = yy;
|
|
y[eq+it_*y_size] += slowc * yy;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if ((solve_algo == 5 && steady_state) || (stack_solve_algo == 5 && !steady_state))
|
|
singular_system = Solve_ByteCode_Sparse_GaussianElimination(size, block_num, it_);
|
|
else if ((solve_algo == 7 && steady_state) || (stack_solve_algo == 2 && !steady_state))
|
|
Solve_Matlab_GMRES(A_m, b_m, size, slowc, block_num, false, it_, x0_m);
|
|
else if ((solve_algo == 8 && steady_state) || (stack_solve_algo == 3 && !steady_state))
|
|
Solve_Matlab_BiCGStab(A_m, b_m, size, slowc, block_num, false, it_, x0_m, preconditioner);
|
|
else if ((solve_algo == 6 && steady_state) || ((stack_solve_algo == 0 || stack_solve_algo == 1 || stack_solve_algo == 4) && !steady_state))
|
|
Solve_LU_UMFPack(Ap, Ai, Ax, b, size, size, slowc, true, 0);
|
|
}
|
|
return singular_system;
|
|
}
|
|
|
|
bool
|
|
dynSparseMatrix::solve_linear(const int block_num, const int y_size, const int y_kmin, const int y_kmax, const int size, const int iter)
|
|
{
|
|
bool cvg = false;
|
|
double crit_opt_old = res2/2;
|
|
compute_complete(false, res1, res2, max_res, max_res_idx);
|
|
cvg = (max_res < solve_tolf);
|
|
if (!cvg || isnan(res1) || isinf(res1))
|
|
{
|
|
if (iter)
|
|
Check_and_Correct_Previous_Iteration(block_num, y_size, size, crit_opt_old);
|
|
bool singular_system = Simulate_One_Boundary(block_num, y_size, y_kmin, y_kmax, size, cvg);
|
|
if (singular_system)
|
|
Singular_display(block_num, size);
|
|
}
|
|
return cvg;
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::solve_non_linear(const int block_num, const int y_size, const int y_kmin, const int y_kmax, const int size)
|
|
|
|
{
|
|
max_res_idx = 0;
|
|
bool cvg = false;
|
|
iter = 0;
|
|
glambda2 = g0 = very_big;
|
|
//try_at_iteration = 0;
|
|
while ((!cvg) && (iter < maxit_))
|
|
{
|
|
cvg = solve_linear(block_num, y_size, y_kmin, y_kmax, size, iter);
|
|
g0 = res2;
|
|
iter++;
|
|
}
|
|
if (!cvg)
|
|
{
|
|
ostringstream tmp;
|
|
if (steady_state)
|
|
tmp << " in Solve Forward complete, convergence not achieved in block " << block_num+1 << ", after " << iter << " iterations\n";
|
|
else
|
|
tmp << " in Solve Forward complete, convergence not achieved in block " << block_num+1 << ", at time " << it_ << ", after " << iter << " iterations\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Simulate_Newton_One_Boundary(const bool forward)
|
|
{
|
|
g1 = (double *) mxMalloc(size*size*sizeof(double));
|
|
test_mxMalloc(g1, __LINE__, __FILE__, __func__, size*size*sizeof(double));
|
|
r = (double *) mxMalloc(size*sizeof(double));
|
|
test_mxMalloc(r, __LINE__, __FILE__, __func__, size*sizeof(double));
|
|
iter = 0;
|
|
if ((solve_algo == 6 && steady_state) || ((stack_solve_algo == 0 || stack_solve_algo == 1 || stack_solve_algo == 4) && !steady_state))
|
|
{
|
|
Ap_save = (SuiteSparse_long *) mxMalloc((size + 1) * sizeof(SuiteSparse_long));
|
|
test_mxMalloc(Ap_save, __LINE__, __FILE__, __func__, (size + 1) * sizeof(SuiteSparse_long));
|
|
Ap_save[size] = 0;
|
|
Ai_save = (SuiteSparse_long *) mxMalloc(1 * sizeof(SuiteSparse_long));
|
|
test_mxMalloc(Ai_save, __LINE__, __FILE__, __func__, 1 * sizeof(SuiteSparse_long));
|
|
Ax_save = (double *) mxMalloc(1 * sizeof(double));
|
|
test_mxMalloc(Ax_save, __LINE__, __FILE__, __func__, 1 * sizeof(double));
|
|
b_save = (double *) mxMalloc((size) * sizeof(SuiteSparse_long));
|
|
test_mxMalloc(b_save, __LINE__, __FILE__, __func__, (size) * sizeof(SuiteSparse_long));
|
|
}
|
|
if (steady_state)
|
|
{
|
|
it_ = 0;
|
|
if (!is_linear)
|
|
solve_non_linear(block_num, y_size, 0, 0, size);
|
|
else
|
|
solve_linear(block_num, y_size, 0, 0, size, 0);
|
|
}
|
|
else if (forward)
|
|
{
|
|
if (!is_linear)
|
|
{
|
|
for (it_ = y_kmin; it_ < periods+y_kmin; it_++)
|
|
solve_non_linear(block_num, y_size, y_kmin, y_kmax, size);
|
|
}
|
|
else
|
|
{
|
|
for (int it_ = y_kmin; it_ < periods+y_kmin; it_++)
|
|
solve_linear(block_num, y_size, y_kmin, y_kmax, size, 0);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (!is_linear)
|
|
{
|
|
for (it_ = periods+y_kmin-1; it_ >= y_kmin; it_--)
|
|
solve_non_linear(block_num, y_size, y_kmin, y_kmax, size);
|
|
}
|
|
else
|
|
{
|
|
for (it_ = periods+y_kmin-1; it_ >= y_kmin; it_--)
|
|
solve_linear(block_num, y_size, y_kmin, y_kmax, size, 0);
|
|
}
|
|
}
|
|
if ((solve_algo == 6 && steady_state) || ((stack_solve_algo == 0 || stack_solve_algo == 1 || stack_solve_algo == 4) && !steady_state))
|
|
{
|
|
mxFree(Ap_save);
|
|
mxFree(Ai_save);
|
|
mxFree(Ax_save);
|
|
mxFree(b_save);
|
|
}
|
|
mxFree(g1);
|
|
mxFree(r);
|
|
}
|
|
|
|
string
|
|
dynSparseMatrix::preconditioner_print_out(string s, int preconditioner, bool ss)
|
|
{
|
|
int n = s.length();
|
|
string tmp = ", preconditioner=";
|
|
switch (preconditioner)
|
|
{
|
|
case 0:
|
|
if (ss)
|
|
tmp.append("Jacobi on static jacobian");
|
|
else
|
|
tmp.append("Jacobi on dynamic jacobian");
|
|
break;
|
|
case 1:
|
|
if (ss)
|
|
tmp.append("incomplet lutp on static jacobian");
|
|
else
|
|
tmp.append("incomplet lu0 on dynamic jacobian");
|
|
break;
|
|
case 2:
|
|
tmp.append("incomplet lutp on dynamic jacobian");
|
|
break;
|
|
case 3:
|
|
tmp.append("lu on static jacobian");
|
|
break;
|
|
}
|
|
s.insert(n - 2, tmp);
|
|
return s;
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::Simulate_Newton_Two_Boundaries(int blck, int y_size, int y_kmin, int y_kmax, int Size, int periods, bool cvg, int minimal_solving_periods, int stack_solve_algo, unsigned int endo_name_length, char *P_endo_names, vector_table_conditional_local_type vector_table_conditional_local)
|
|
{
|
|
double top = 0.5;
|
|
double bottom = 0.1;
|
|
#ifdef CUDA
|
|
int nnz, nnz_tild;
|
|
int *Ap_i, *Ai_i;
|
|
int *Ap_i_tild, *Ai_i_tild;
|
|
double *x0, *A_tild;
|
|
|
|
#endif
|
|
int preconditioner = 2;
|
|
if (start_compare == 0)
|
|
start_compare = y_kmin;
|
|
u_count_alloc_save = u_count_alloc;
|
|
clock_t t1 = clock();
|
|
nop1 = 0;
|
|
mxArray *b_m = NULL, *A_m = NULL, *x0_m = NULL;
|
|
double *Ax = NULL, *b;
|
|
SuiteSparse_long *Ap = NULL, *Ai = NULL;
|
|
|
|
if (iter > 0)
|
|
{
|
|
if (print_it)
|
|
{
|
|
mexPrintf("Sim : %f ms\n", (1000.0*(double (clock())-double (time00)))/double (CLOCKS_PER_SEC));
|
|
mexEvalString("drawnow;");
|
|
}
|
|
time00 = clock();
|
|
}
|
|
if (isnan(res1) || isinf(res1) || (res2 > 12*g0 && iter > 0))
|
|
{
|
|
if (iter == 0 || fabs(slowc_save) < 1e-8)
|
|
{
|
|
mexPrintf("res1 = %f, res2 = %f g0 = %f iter = %d\n", res1, res2, g0, iter);
|
|
for (int j = 0; j < y_size; j++)
|
|
{
|
|
ostringstream res;
|
|
for (unsigned int i = 0; i < endo_name_length; i++)
|
|
if (P_endo_names[CHAR_LENGTH*(j+i*y_size)] != ' ')
|
|
res << P_endo_names[CHAR_LENGTH*(j+i*y_size)];
|
|
bool select = false;
|
|
for (int i = 0; i < Size; i++)
|
|
if (j == index_vara[i])
|
|
{
|
|
select = true;
|
|
break;
|
|
}
|
|
if (select)
|
|
mexPrintf("-> variable %s (%d) at time %d = %f direction = %f\n", res.str().c_str(), j+1, it_, y[j+it_*y_size], direction[j+it_*y_size]);
|
|
else
|
|
mexPrintf(" variable %s (%d) at time %d = %f direction = %f\n", res.str().c_str(), j+1, it_, y[j+it_*y_size], direction[j+it_*y_size]);
|
|
}
|
|
ostringstream Error;
|
|
if (iter == 0)
|
|
Error << " in Simulate_Newton_Two_Boundaries, the initial values of endogenous variables are too far from the solution.\nChange them!\n";
|
|
else
|
|
Error << " in Simulate_Newton_Two_Boundaries, dynare cannot improve the simulation in block " << blck+1 << " at time " << it_+1 << " (variable " << index_vara[max_res_idx]+1 << " = " << max_res << ")\n";
|
|
throw FatalExceptionHandling(Error.str());
|
|
}
|
|
if (!(isnan(res1) || isinf(res1)) && !(isnan(g0) || isinf(g0)) && (stack_solve_algo == 4 || stack_solve_algo == 5))
|
|
{
|
|
if (try_at_iteration == 0)
|
|
{
|
|
prev_slowc_save = slowc_save;
|
|
slowc_save = max(-gp0 / (2 * (res2 - g0 - gp0)), bottom);
|
|
}
|
|
else
|
|
{
|
|
double t1 = res2 - gp0 * slowc_save - g0;
|
|
double t2 = glambda2 - gp0 * prev_slowc_save - g0;
|
|
double a = (1/(slowc_save * slowc_save) * t1 - 1/(prev_slowc_save * prev_slowc_save) * t2) / (slowc_save - prev_slowc_save);
|
|
double b = (-prev_slowc_save/(slowc_save * slowc_save) * t1 + slowc_save/(prev_slowc_save * prev_slowc_save) * t2) / (slowc_save - prev_slowc_save);
|
|
prev_slowc_save = slowc_save;
|
|
slowc_save = max(min(-b + sqrt(b*b - 3 * a * gp0) / (3 * a), top * slowc_save), bottom * slowc_save);
|
|
}
|
|
glambda2 = res2;
|
|
try_at_iteration++;
|
|
if (slowc_save <= bottom)
|
|
{
|
|
for (int i = 0; i < y_size*(periods+y_kmin); i++)
|
|
y[i] = ya[i]+direction[i];
|
|
g0 = res2;
|
|
gp0 = -res2;
|
|
try_at_iteration = 0;
|
|
iter--;
|
|
return;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
prev_slowc_save = slowc_save;
|
|
slowc_save /= 1.05;
|
|
}
|
|
if (print_it)
|
|
{
|
|
if (isnan(res1) || isinf(res1))
|
|
mexPrintf("The model cannot be evaluated, trying to correct it using slowc=%f\n", slowc_save);
|
|
else
|
|
mexPrintf("Simulation diverging, trying to correct it using slowc=%f\n", slowc_save);
|
|
}
|
|
for (int i = 0; i < y_size*(periods+y_kmin); i++)
|
|
y[i] = ya[i]+slowc_save*direction[i];
|
|
iter--;
|
|
return;
|
|
}
|
|
u_count += u_count_init;
|
|
if (stack_solve_algo == 5)
|
|
{
|
|
if (alt_symbolic && alt_symbolic_count < alt_symbolic_count_max)
|
|
{
|
|
mexPrintf("Pivoting method will be applied only to the first periods.\n");
|
|
alt_symbolic = false;
|
|
symbolic = true;
|
|
markowitz_c = markowitz_c_s;
|
|
alt_symbolic_count++;
|
|
}
|
|
if (((res1/res1a-1) > -0.3) && symbolic && iter > 0)
|
|
{
|
|
if (restart > 2)
|
|
{
|
|
mexPrintf("Divergence or slowdown occurred during simulation.\nIn the next iteration, pivoting method will be applied to all periods.\n");
|
|
symbolic = false;
|
|
alt_symbolic = true;
|
|
markowitz_c_s = markowitz_c;
|
|
markowitz_c = 0;
|
|
}
|
|
else
|
|
{
|
|
mexPrintf("Divergence or slowdown occurred during simulation.\nIn the next iteration, pivoting method will be applied for a longer period.\n");
|
|
start_compare = min(tbreak_g, periods);
|
|
restart++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
start_compare = max(y_kmin, minimal_solving_periods);
|
|
restart = 0;
|
|
}
|
|
}
|
|
res1a = res1;
|
|
if (print_it)
|
|
{
|
|
if (iter == 0)
|
|
{
|
|
switch (stack_solve_algo)
|
|
{
|
|
case 0:
|
|
mexPrintf("MODEL SIMULATION: (method=Sparse LU)\n");
|
|
break;
|
|
case 1:
|
|
mexPrintf("MODEL SIMULATION: (method=Relaxation)\n");
|
|
break;
|
|
case 2:
|
|
mexPrintf(preconditioner_print_out("MODEL SIMULATION: (method=GMRES)\n", preconditioner, false).c_str());
|
|
break;
|
|
case 3:
|
|
mexPrintf(preconditioner_print_out("MODEL SIMULATION: (method=BiCGStab)\n", preconditioner, false).c_str());
|
|
break;
|
|
case 4:
|
|
mexPrintf("MODEL SIMULATION: (method=Sparse LU & optimal path length)\n");
|
|
break;
|
|
case 5:
|
|
mexPrintf("MODEL SIMULATION: (method=ByteCode own solver)\n");
|
|
break;
|
|
case 7:
|
|
mexPrintf(preconditioner_print_out("MODEL SIMULATION: (method=GPU BiCGStab)\n", preconditioner, false).c_str());
|
|
break;
|
|
default:
|
|
mexPrintf("MODEL SIMULATION: (method=Unknown - %d - )\n", stack_solve_algo);
|
|
}
|
|
}
|
|
mexPrintf("-----------------------------------\n");
|
|
mexPrintf(" Simulate iteration no %d \n", iter+1);
|
|
mexPrintf(" max. error=%.10e \n", double (max_res));
|
|
mexPrintf(" sqr. error=%.10e \n", double (res2));
|
|
mexPrintf(" abs. error=%.10e \n", double (res1));
|
|
mexPrintf("-----------------------------------\n");
|
|
mexEvalString("drawnow;");
|
|
}
|
|
if (cvg)
|
|
{
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
if (stack_solve_algo == 5)
|
|
Init_GE(periods, y_kmin, y_kmax, Size, IM_i);
|
|
else
|
|
{
|
|
b_m = mxCreateDoubleMatrix(periods*Size, 1, mxREAL);
|
|
if (!b_m)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Simulate_Newton_Two_Boundaries, can't allocate b_m vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
x0_m = mxCreateDoubleMatrix(periods*Size, 1, mxREAL);
|
|
if (!x0_m)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Simulate_Newton_Two_Boundaries, can't allocate x0_m vector\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
if (stack_solve_algo != 0 && stack_solve_algo != 4 && stack_solve_algo != 7)
|
|
{
|
|
A_m = mxCreateSparse(periods*Size, periods*Size, IM_i.size()* periods*2, mxREAL);
|
|
if (!A_m)
|
|
{
|
|
ostringstream tmp;
|
|
tmp << " in Simulate_Newton_Two_Boundaries, can't allocate A_m matrix\n";
|
|
throw FatalExceptionHandling(tmp.str());
|
|
}
|
|
}
|
|
if (stack_solve_algo == 0 || stack_solve_algo == 4)
|
|
Init_UMFPACK_Sparse(periods, y_kmin, y_kmax, Size, IM_i, &Ap, &Ai, &Ax, &b, x0_m, vector_table_conditional_local, blck);
|
|
#ifdef CUDA
|
|
else if (stack_solve_algo == 7)
|
|
Init_CUDA_Sparse(periods, y_kmin, y_kmax, Size, IM_i, &Ap_i, &Ai_i, &Ax, &Ap_i_tild, &Ai_i_tild, &A_tild, &b, &x0, x0_m, &nnz, &nnz_tild, preconditioner);
|
|
#endif
|
|
else
|
|
Init_Matlab_Sparse(periods, y_kmin, y_kmax, Size, IM_i, A_m, b_m, x0_m);
|
|
|
|
}
|
|
if (stack_solve_algo == 0 || stack_solve_algo == 4)
|
|
Solve_LU_UMFPack(Ap, Ai, Ax, b, Size * periods, Size, slowc, true, 0, vector_table_conditional_local);
|
|
else if (stack_solve_algo == 1)
|
|
Solve_Matlab_Relaxation(A_m, b_m, Size, slowc, true, 0);
|
|
else if (stack_solve_algo == 2)
|
|
Solve_Matlab_GMRES(A_m, b_m, Size, slowc, blck, true, 0, x0_m);
|
|
else if (stack_solve_algo == 3)
|
|
Solve_Matlab_BiCGStab(A_m, b_m, Size, slowc, blck, true, 0, x0_m, 1);
|
|
else if (stack_solve_algo == 5)
|
|
Solve_ByteCode_Symbolic_Sparse_GaussianElimination(Size, symbolic, blck);
|
|
#ifdef CUDA
|
|
else if (stack_solve_algo == 7)
|
|
Solve_CUDA_BiCGStab(Ap_i, Ai_i, Ax, Ap_i_tild, Ai_i_tild, A_tild, b, x0, Size * periods, Size, slowc, true, 0, nnz, nnz_tild, preconditioner, Size * periods, blck);
|
|
#endif
|
|
}
|
|
if (print_it)
|
|
{
|
|
clock_t t2 = clock();
|
|
mexPrintf("(** %f milliseconds **)\n", 1000.0*(double (t2) - double (t1))/double (CLOCKS_PER_SEC));
|
|
mexEvalString("drawnow;");
|
|
}
|
|
if ((!steady_state && (stack_solve_algo == 4 /*|| stack_solve_algo == 0*/)) /* || steady_state*/)
|
|
{
|
|
clock_t t2 = clock();
|
|
double ax = -0.1, bx = 1.1, cx = 0.5, fa, fb, fc, xmin;
|
|
|
|
if (!mnbrak(&ax, &bx, &cx, &fa, &fb, &fc))
|
|
return;
|
|
//mexPrintf("ax= %f, bx=%f, cx=%f, fa=%f, fb=%f, fc=%d\n", ax, bx, cx, fa, fb, fc);
|
|
if (!golden(ax, bx, cx, 1e-1, solve_tolf, &xmin))
|
|
return;
|
|
slowc = xmin;
|
|
clock_t t3 = clock();
|
|
mexPrintf("(** %f milliseconds **)\n", 1000.0*(double (t3) - double (t2))/double (CLOCKS_PER_SEC));
|
|
mexEvalString("drawnow;");
|
|
}
|
|
time00 = clock();
|
|
if (tbreak_g == 0)
|
|
tbreak_g = periods;
|
|
return;
|
|
}
|
|
|
|
void
|
|
dynSparseMatrix::fixe_u(double **u, int u_count_int, int max_lag_plus_max_lead_plus_1)
|
|
{
|
|
u_count = u_count_int * periods;
|
|
u_count_alloc = 2*u_count;
|
|
#ifdef DEBUG
|
|
mexPrintf("fixe_u : alloc(%d double)\n", u_count_alloc);
|
|
#endif
|
|
(*u) = (double *) mxMalloc(u_count_alloc*sizeof(double));
|
|
test_mxMalloc(*u, __LINE__, __FILE__, __func__, u_count_alloc*sizeof(double));
|
|
#ifdef DEBUG
|
|
mexPrintf("*u=%d\n", *u);
|
|
#endif
|
|
memset((*u), 0, u_count_alloc*sizeof(double));
|
|
u_count_init = max_lag_plus_max_lead_plus_1;
|
|
}
|