Bytecode: remove CUDA-specific stuff
parent
5ef440ad9b
commit
eb29dc003a
|
@ -33,16 +33,8 @@ Interpreter::Interpreter(double *params_arg, double *y_arg, double *ya_arg, doub
|
|||
int maxit_arg_, double solve_tolf_arg, size_t size_of_direction_arg, double slowc_arg, int y_decal_arg, double markowitz_c_arg,
|
||||
string &filename_arg, int minimal_solving_periods_arg, int stack_solve_algo_arg, int solve_algo_arg,
|
||||
bool global_temporary_terms_arg, bool print_arg, bool print_error_arg, mxArray *GlobalTemporaryTerms_arg,
|
||||
bool steady_state_arg, bool print_it_arg, int col_x_arg, int col_y_arg
|
||||
#ifdef CUDA
|
||||
, const int CUDA_device_arg, cublasHandle_t cublas_handle_arg, cusparseHandle_t cusparse_handle_arg, cusparseMatDescr_t descr_arg
|
||||
#endif
|
||||
)
|
||||
: dynSparseMatrix(y_size_arg, y_kmin_arg, y_kmax_arg, print_it_arg, steady_state_arg, periods_arg, minimal_solving_periods_arg, slowc_arg
|
||||
#ifdef CUDA
|
||||
, CUDA_device_arg, cublas_handle_arg, cusparse_handle_arg, descr_arg
|
||||
#endif
|
||||
)
|
||||
bool steady_state_arg, bool print_it_arg, int col_x_arg, int col_y_arg)
|
||||
: dynSparseMatrix(y_size_arg, y_kmin_arg, y_kmax_arg, print_it_arg, steady_state_arg, periods_arg, minimal_solving_periods_arg, slowc_arg)
|
||||
{
|
||||
params = params_arg;
|
||||
y = y_arg;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright © 2007-2017 Dynare Team
|
||||
* Copyright © 2007-2021 Dynare Team
|
||||
*
|
||||
* This file is part of Dynare.
|
||||
*
|
||||
|
@ -58,11 +58,7 @@ public:
|
|||
int maxit_arg_, double solve_tolf_arg, size_t size_of_direction_arg, double slowc_arg, int y_decal_arg, double markowitz_c_arg,
|
||||
string &filename_arg, int minimal_solving_periods_arg, int stack_solve_algo_arg, int solve_algo_arg,
|
||||
bool global_temporary_terms_arg, bool print_arg, bool print_error_arg, mxArray *GlobalTemporaryTerms_arg,
|
||||
bool steady_state_arg, bool print_it_arg, int col_x_arg, int col_y_arg
|
||||
#ifdef CUDA
|
||||
, const int CUDA_device, cublasHandle_t cublas_handle_arg, cusparseHandle_t cusparse_handle_arg, cusparseMatDescr_t descr_arg
|
||||
#endif
|
||||
);
|
||||
bool steady_state_arg, bool print_it_arg, int col_x_arg, int col_y_arg);
|
||||
bool extended_path(string file_name, string bin_basename, bool evaluate, int block, int &nb_blocks, int nb_periods, vector<s_plan> sextended_path, vector<s_plan> sconstrained_extended_path, vector<string> dates, table_conditional_global_type table_conditional_global);
|
||||
bool compute_blocks(string file_name, string bin_basename, bool evaluate, int block, int &nb_blocks);
|
||||
void check_for_controlled_exo_validity(FBEGINBLOCK_ *fb, vector<s_plan> sconstrained_extended_path);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -28,51 +28,11 @@
|
|||
#include "dynblas.h"
|
||||
#include "dynumfpack.h"
|
||||
|
||||
#ifdef CUDA
|
||||
# include "cuda.h"
|
||||
# include "cuda_runtime_api.h"
|
||||
# include "cublas_v2.h"
|
||||
# include "cusparse_v2.h"
|
||||
#endif
|
||||
|
||||
#include "Mem_Mngr.hh"
|
||||
#include "ErrorHandling.hh"
|
||||
//#include "Interpreter.hh"
|
||||
#include "Evaluate.hh"
|
||||
|
||||
#define cudaChk(x, y) \
|
||||
{ \
|
||||
cudaError_t cuda_error = x; \
|
||||
if (cuda_error != cudaSuccess) \
|
||||
{ \
|
||||
ostringstream tmp; \
|
||||
tmp << y; \
|
||||
throw FatalExceptionHandling(tmp.str()); \
|
||||
} \
|
||||
};
|
||||
|
||||
#define cusparseChk(x, y) \
|
||||
{ \
|
||||
cusparseStatus_t cusparse_status = x; \
|
||||
if (cusparse_status != CUSPARSE_STATUS_SUCCESS) \
|
||||
{ \
|
||||
ostringstream tmp; \
|
||||
tmp << y; \
|
||||
throw FatalExceptionHandling(tmp.str()); \
|
||||
} \
|
||||
};
|
||||
|
||||
#define cublasChk(x, y) \
|
||||
{ \
|
||||
cublasStatus_t cublas_status = x; \
|
||||
if (cublas_status != CUBLAS_STATUS_SUCCESS) \
|
||||
{ \
|
||||
ostringstream tmp; \
|
||||
tmp << y; \
|
||||
throw FatalExceptionHandling(tmp.str()); \
|
||||
} \
|
||||
};
|
||||
|
||||
#define NEW_ALLOC
|
||||
#define MARKOVITZ
|
||||
|
||||
|
@ -101,11 +61,7 @@ class dynSparseMatrix : public Evaluate
|
|||
{
|
||||
public:
|
||||
dynSparseMatrix();
|
||||
dynSparseMatrix(const int y_size_arg, const int y_kmin_arg, const int y_kmax_arg, const bool print_it_arg, const bool steady_state_arg, const int periods_arg, const int minimal_solving_periods_arg, const double slowc_arg
|
||||
#ifdef CUDA
|
||||
, const int CUDA_device_arg, cublasHandle_t cublas_handle_arg, cusparseHandle_t cusparse_handle_arg, cusparseMatDescr_t descr_arg
|
||||
#endif
|
||||
);
|
||||
dynSparseMatrix(const int y_size_arg, const int y_kmin_arg, const int y_kmax_arg, const bool print_it_arg, const bool steady_state_arg, const int periods_arg, const int minimal_solving_periods_arg, const double slowc_arg);
|
||||
void Simulate_Newton_Two_Boundaries(int blck, int y_size, int y_kmin, int y_kmax, int Size, int periods, bool cvg, int minimal_solving_periods, int stack_solve_algo, unsigned int endo_name_length, char *P_endo_names, vector_table_conditional_local_type vector_table_conditional_local);
|
||||
void Simulate_Newton_One_Boundary(bool forward);
|
||||
void fixe_u(double **u, int u_count_int, int max_lag_plus_max_lead_plus_1);
|
||||
|
@ -123,12 +79,8 @@ private:
|
|||
void Init_GE(int periods, int y_kmin, int y_kmax, int Size, map<pair<pair<int, int>, int>, int> &IM);
|
||||
void Init_Matlab_Sparse(int periods, int y_kmin, int y_kmax, int Size, map<pair<pair<int, int>, int>, int> &IM, mxArray *A_m, mxArray *b_m, mxArray *x0_m);
|
||||
void Init_UMFPACK_Sparse(int periods, int y_kmin, int y_kmax, int Size, map<pair<pair<int, int>, int>, int> &IM, SuiteSparse_long **Ap, SuiteSparse_long **Ai, double **Ax, double **b, mxArray *x0_m, vector_table_conditional_local_type vector_table_conditional_local, int block_num);
|
||||
#ifdef CUDA
|
||||
void Init_CUDA_Sparse(int periods, int y_kmin, int y_kmax, int Size, map<pair<pair<int, int>, int>, int> &IM, int **Ap, int **Ai, double **Ax, int **Ap_tild, int **Ai_tild, double **A_tild, double **b, double **x0, mxArray *x0_m, int *nnz, int *nnz_tild, int preconditioner);
|
||||
#endif
|
||||
void Init_Matlab_Sparse_Simple(int Size, map<pair<pair<int, int>, int>, int> &IM, mxArray *A_m, mxArray *b_m, bool &zero_solution, mxArray *x0_m);
|
||||
void Init_UMFPACK_Sparse_Simple(int Size, map<pair<pair<int, int>, int>, int> &IM, SuiteSparse_long **Ap, SuiteSparse_long **Ai, double **Ax, double **b, bool &zero_solution, mxArray *x0_m);
|
||||
void Init_CUDA_Sparse_Simple(int Size, map<pair<pair<int, int>, int>, int> &IM, SuiteSparse_long **Ap, SuiteSparse_long **Ai, double **Ax, double **b, double **x0, bool &zero_solution, mxArray *x0_m);
|
||||
void Simple_Init(int Size, std::map<std::pair<std::pair<int, int>, int>, int> &IM, bool &zero_solution);
|
||||
void End_GE(int Size);
|
||||
bool mnbrak(double *ax, double *bx, double *cx, double *fa, double *fb, double *fc);
|
||||
|
@ -145,13 +97,6 @@ private:
|
|||
void Solve_LU_UMFPack(SuiteSparse_long *Ap, SuiteSparse_long *Ai, double *Ax, double *b, int n, int Size, double slowc_l, bool is_two_boundaries, int it_);
|
||||
|
||||
void End_Matlab_LU_UMFPack();
|
||||
#ifdef CUDA
|
||||
void Solve_CUDA_BiCGStab_Free(double *tmp_vect_host, double *p, double *r, double *v, double *s, double *t, double *y_, double *z, double *tmp_,
|
||||
int *Ai, double *Ax, int *Ap, double *x0, double *b, double *A_tild, int *A_tild_i, int *A_tild_p,
|
||||
cusparseSolveAnalysisInfo_t infoL, cusparseSolveAnalysisInfo_t infoU,
|
||||
cusparseMatDescr_t descrL, cusparseMatDescr_t descrU, int preconditioner);
|
||||
int Solve_CUDA_BiCGStab(int *Ap, int *Ai, double *Ax, int *Ap_tild, int *Ai_tild, double *A_tild, double *b, double *x0, int n, int Size, double slowc_l, bool is_two_boundaries, int it_, int nnz, int nnz_tild, int preconditioner, int max_iterations, int block);
|
||||
#endif
|
||||
void Solve_Matlab_GMRES(mxArray *A_m, mxArray *b_m, int Size, double slowc, int block, bool is_two_boundaries, int it_, mxArray *x0_m);
|
||||
void Solve_Matlab_BiCGStab(mxArray *A_m, mxArray *b_m, int Size, double slowc, int block, bool is_two_boundaries, int it_, mxArray *x0_m, int precond);
|
||||
void Check_and_Correct_Previous_Iteration(int block_num, int y_size, int size, double crit_opt_old);
|
||||
|
@ -196,12 +141,6 @@ private:
|
|||
mxArray *Sparse_substract_SA_SB(mxArray *A_m, mxArray *B_m);
|
||||
mxArray *Sparse_substract_A_SB(mxArray *A_m, mxArray *B_m);
|
||||
mxArray *substract_A_B(mxArray *A_m, mxArray *B_m);
|
||||
#ifdef CUDA
|
||||
int CUDA_device;
|
||||
cublasHandle_t cublas_handle;
|
||||
cusparseHandle_t cusparse_handle;
|
||||
cusparseMatDescr_t CUDA_descr;
|
||||
#endif
|
||||
protected:
|
||||
stack<double> Stack;
|
||||
int nb_prologue_table_u, nb_first_table_u, nb_middle_table_u, nb_last_table_u;
|
||||
|
|
|
@ -1,121 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2012 Dynare Team
|
||||
*
|
||||
* This file is part of Dynare.
|
||||
*
|
||||
* Dynare is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Dynare is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Dynare. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef SPARMATRIX_KERNEL
|
||||
#define SPARMATRIX_KERNEL
|
||||
|
||||
// Kernel definition of vector division
|
||||
__global__ void
|
||||
VecDiv(double* A, double* B, double* C, int n)
|
||||
{
|
||||
int i = blockIdx.x * 1024 + threadIdx.x;
|
||||
if (i < n)
|
||||
C[i] = (B[i] != 0.0 ? A[i] / B[i] : A[i]);
|
||||
}
|
||||
|
||||
__global__ void
|
||||
VecAdd(double* res, double* r, double alpha, double* x, int n)
|
||||
{
|
||||
int i = blockIdx.x * 1024 + threadIdx.x;
|
||||
if (i < n)
|
||||
res[i] = r[i] + alpha * x[i];
|
||||
}
|
||||
|
||||
__global__ void
|
||||
VecInc(double* res, double alpha, double* x, int n)
|
||||
{
|
||||
int i = blockIdx.x * 1024 + threadIdx.x;
|
||||
if (i < n)
|
||||
res[i] += alpha * x[i];
|
||||
}
|
||||
|
||||
|
||||
__global__ void
|
||||
update_x(double* x, double alpha, double* y, double omega, double *z)
|
||||
{
|
||||
int i = threadIdx.x;
|
||||
x[i] += alpha * y[i] + omega * z[i];
|
||||
}
|
||||
|
||||
__global__ void
|
||||
Get_LU_dim(int *n, int* A_tild_i, int *A_tild_p, int *nnz_l, int *nnz_u)
|
||||
{
|
||||
nnz_u[0] = 0;
|
||||
nnz_l[0] = 0;
|
||||
for (int i = 0; i < n[0]; i++)
|
||||
{
|
||||
for (int j = A_tild_p[i]; j < A_tild_p[i+1]; j++)
|
||||
{
|
||||
if (A_tild_i[j] < i)
|
||||
nnz_l[0]++;
|
||||
else if (A_tild_i[j] == i)
|
||||
{
|
||||
nnz_u[0]++;
|
||||
//nnz_l[0]++;
|
||||
}
|
||||
else
|
||||
nnz_u[0]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void
|
||||
Get_LU1_dim(int* n, int *nnz_l, int *nnz_u)
|
||||
{
|
||||
nnz_u[0] = 3+n[0];
|
||||
nnz_l[0] = 1+n[0];
|
||||
}
|
||||
|
||||
|
||||
__global__ void
|
||||
Get_L_and_U(int *n, double* A_tild_x, int* A_tild_i, int *A_tild_p, double* Lx, int* Li, int *Lp, double* Ux, int* Ui, int* Up)
|
||||
{
|
||||
int nnz_u = 0, nnz_l = 0;
|
||||
Lp[0] = 0;
|
||||
Up[0] = 0;
|
||||
for (int i = 0; i < n[0]; i++)
|
||||
{
|
||||
for (int j = A_tild_p[i]; j < A_tild_p[i+1]; j++)
|
||||
{
|
||||
if (A_tild_i[j] < i)
|
||||
{
|
||||
Lx[nnz_l] = A_tild_x[j];
|
||||
Li[nnz_l] = A_tild_i[j];
|
||||
nnz_l++;
|
||||
}
|
||||
else if (A_tild_i[j] == i)
|
||||
{
|
||||
Ux[nnz_u] = A_tild_x[j];
|
||||
Lx[nnz_l] = 1.0;
|
||||
Li[nnz_l] = Ui[nnz_u] = A_tild_i[j];
|
||||
nnz_u++;
|
||||
//nnz_l++;
|
||||
}
|
||||
else
|
||||
{
|
||||
Ux[nnz_u] = A_tild_x[j];
|
||||
Ui[nnz_u] = A_tild_i[j];
|
||||
nnz_u++;
|
||||
}
|
||||
}
|
||||
Lp[i+1] = nnz_l;
|
||||
Up[i+1] = nnz_u;
|
||||
}
|
||||
}
|
||||
#endif
|
|
@ -57,164 +57,6 @@ Get_Argument(const mxArray *prhs)
|
|||
//#include <windows.h>
|
||||
#include <cstdio>
|
||||
|
||||
#ifdef CUDA
|
||||
int
|
||||
GPU_Test_and_Info(cublasHandle_t *cublas_handle, cusparseHandle_t *cusparse_handle, cusparseMatDescr_t *descr)
|
||||
{
|
||||
cudaDeviceProp deviceProp;
|
||||
int device_count, device, version, version_max = 0;
|
||||
cublasStatus_t cublas_status;
|
||||
cudaError_t cuda_error;
|
||||
*descr = 0;
|
||||
|
||||
/* ask cuda how many devices it can find */
|
||||
cudaGetDeviceCount(&device_count);
|
||||
if (device_count < 1)
|
||||
{
|
||||
/* if it couldn't find any fail out */
|
||||
ostringstream tmp;
|
||||
tmp << " Unable to find a CUDA device. Unable to implement CUDA solvers\n";
|
||||
throw FatalExceptionHandling(tmp.str());
|
||||
}
|
||||
else
|
||||
{
|
||||
mexPrintf("-----------------------------------------\n");
|
||||
for (int i = 0; i < device_count; i++)
|
||||
{
|
||||
cudaSetDevice(i);
|
||||
// Statistics about the GPU device
|
||||
cuda_error = cudaGetDeviceProperties(&deviceProp, i);
|
||||
if (cuda_error != cudaSuccess)
|
||||
{
|
||||
ostringstream tmp;
|
||||
tmp << " bytecode cudaGetDeviceProperties failed\n";
|
||||
throw FatalExceptionHandling(tmp.str());
|
||||
}
|
||||
mexPrintf("> GPU device %d: \"%s\" has:\n - %d Multi-Processors,\n - %d threads per multiprocessor,\n", i, deviceProp.name, deviceProp.multiProcessorCount, deviceProp.maxThreadsPerMultiProcessor);
|
||||
mexEvalString("drawnow;");
|
||||
version = (deviceProp.major * 0x10 + deviceProp.minor);
|
||||
if (version >= version_max)
|
||||
{
|
||||
device = i;
|
||||
version_max = version;
|
||||
}
|
||||
mexPrintf(" - %4.2fMhz clock rate,\n - %2.0fMb of memory,\n - %d.%d compute capabilities.\n", double (deviceProp.clockRate) / (1024 * 1024), double (deviceProp.totalGlobalMem) / (1024 * 1024), deviceProp.major, deviceProp.minor);
|
||||
mexEvalString("drawnow;");
|
||||
}
|
||||
}
|
||||
mexPrintf("> Device %d selected\n", device);
|
||||
mexEvalString("drawnow;");
|
||||
|
||||
cuda_error = cudaSetDevice(device);
|
||||
if (cuda_error != cudaSuccess)
|
||||
{
|
||||
ostringstream tmp;
|
||||
tmp << " bytecode cudaSetDevice failed\n";
|
||||
throw FatalExceptionHandling(tmp.str());
|
||||
}
|
||||
|
||||
if (version_max < 0x11)
|
||||
{
|
||||
ostringstream tmp;
|
||||
tmp << " bytecode requires a minimum CUDA compute 1.1 capability\n";
|
||||
cudaDeviceReset();
|
||||
throw FatalExceptionHandling(tmp.str());
|
||||
}
|
||||
|
||||
// Initialize CuBlas library
|
||||
cublas_status = cublasCreate(cublas_handle);
|
||||
if (cublas_status != CUBLAS_STATUS_SUCCESS)
|
||||
{
|
||||
ostringstream tmp;
|
||||
switch (cublas_status)
|
||||
{
|
||||
case CUBLAS_STATUS_NOT_INITIALIZED:
|
||||
tmp << " the CUBLAS initialization failed.\n";
|
||||
break;
|
||||
case CUBLAS_STATUS_ALLOC_FAILED:
|
||||
tmp << " the resources could not be allocated.\n";
|
||||
break;
|
||||
default:
|
||||
tmp << " unknown error during the initialization of cusparse library.\n";
|
||||
}
|
||||
throw FatalExceptionHandling(tmp.str());
|
||||
}
|
||||
|
||||
// Initialize the CuSparse library
|
||||
cusparseStatus_t cusparse_status;
|
||||
cusparse_status = cusparseCreate(cusparse_handle);
|
||||
if (cusparse_status != CUSPARSE_STATUS_SUCCESS)
|
||||
{
|
||||
ostringstream tmp;
|
||||
switch (cusparse_status)
|
||||
{
|
||||
case CUSPARSE_STATUS_NOT_INITIALIZED:
|
||||
tmp << " the CUDA Runtime initialization failed.\n";
|
||||
break;
|
||||
case CUSPARSE_STATUS_ALLOC_FAILED:
|
||||
tmp << " the resources could not be allocated.\n";
|
||||
break;
|
||||
case CUSPARSE_STATUS_ARCH_MISMATCH:
|
||||
tmp << " the device compute capability (CC) is less than 1.1. The CC of at least 1.1 is required.\n";
|
||||
break;
|
||||
default:
|
||||
tmp << " unknown error during the initialization of cusparse library.\n";
|
||||
}
|
||||
throw FatalExceptionHandling(tmp.str());
|
||||
}
|
||||
|
||||
// Create and setup matrix descriptor
|
||||
cusparse_status = cusparseCreateMatDescr(descr);
|
||||
if (cusparse_status != CUSPARSE_STATUS_SUCCESS)
|
||||
{
|
||||
ostringstream tmp;
|
||||
tmp << " Matrix descriptor initialization failed\n";
|
||||
throw FatalExceptionHandling(tmp.str());
|
||||
}
|
||||
cusparseSetMatType(*descr, CUSPARSE_MATRIX_TYPE_GENERAL);
|
||||
cusparseSetMatIndexBase(*descr, CUSPARSE_INDEX_BASE_ZERO);
|
||||
|
||||
mexPrintf("> Driver version:\n");
|
||||
int cuda_version;
|
||||
cuda_error = cudaDriverGetVersion(&cuda_version);
|
||||
if (cuda_error != cudaSuccess)
|
||||
{
|
||||
ostringstream tmp;
|
||||
tmp << " cudaGetVersion has failed\n";
|
||||
throw FatalExceptionHandling(tmp.str());
|
||||
}
|
||||
mexPrintf(" - CUDA version %5.3f\n", double (cuda_version) / 1000);
|
||||
int cublas_version;
|
||||
cublas_status = cublasGetVersion(*cublas_handle, &cublas_version);
|
||||
if (cublas_status != CUBLAS_STATUS_SUCCESS)
|
||||
{
|
||||
ostringstream tmp;
|
||||
tmp << " cublasGetVersion has failed\n";
|
||||
throw FatalExceptionHandling(tmp.str());
|
||||
}
|
||||
mexPrintf(" - CUBLAS version %5.3f\n", double (cublas_version) / 1000);
|
||||
int cusparse_version;
|
||||
cusparse_status = cusparseGetVersion(*cusparse_handle, &cusparse_version);
|
||||
if (cusparse_status != CUSPARSE_STATUS_SUCCESS)
|
||||
{
|
||||
ostringstream tmp;
|
||||
tmp << " cusparseGetVersion has failed\n";
|
||||
throw FatalExceptionHandling(tmp.str());
|
||||
}
|
||||
mexPrintf(" - CUSPARSE version %5.3f\n", double (cusparse_version) / 1000);
|
||||
mexPrintf("-----------------------------------------\n");
|
||||
return device;
|
||||
}
|
||||
|
||||
void
|
||||
GPU_close(cublasHandle_t cublas_handle, cusparseHandle_t cusparse_handle, cusparseMatDescr_t descr)
|
||||
{
|
||||
cublasChk(cublasDestroy(cublas_handle), "in bytecode cublasDestroy failed\n");
|
||||
cusparseChk(cusparseDestroyMatDescr(descr), "in bytecode cusparseDestroyMatDescr failed\n");
|
||||
cusparseChk(cusparseDestroy(cusparse_handle), "in bytecode cusparseDestroy failed\n");
|
||||
}
|
||||
|
||||
#endif
|
||||
string
|
||||
deblank(string x)
|
||||
{
|
||||
|
@ -437,12 +279,6 @@ main(int nrhs, const char *prhs[])
|
|||
|
||||
int max_periods = 0;
|
||||
|
||||
#ifdef CUDA
|
||||
int CUDA_device = -1;
|
||||
cublasHandle_t cublas_handle;
|
||||
cusparseHandle_t cusparse_handle;
|
||||
cusparseMatDescr_t descr;
|
||||
#endif
|
||||
try
|
||||
{
|
||||
Get_Arguments_and_global_variables(nrhs, prhs, count_array_argument,
|
||||
|
@ -1005,20 +841,9 @@ main(int nrhs, const char *prhs[])
|
|||
mexWarnMsgTxt("Not enough space. Filename is truncated.");
|
||||
string file_name = fname;
|
||||
|
||||
#ifdef CUDA
|
||||
try
|
||||
{
|
||||
if (stack_solve_algo == 7 && !steady_state)
|
||||
CUDA_device = GPU_Test_and_Info(&cublas_handle, &cusparse_handle, &descr);
|
||||
}
|
||||
catch (GeneralExceptionHandling &feh)
|
||||
{
|
||||
mexErrMsgTxt(feh.GetErrorMsg().c_str());
|
||||
}
|
||||
#else
|
||||
if (stack_solve_algo == 7 && !steady_state)
|
||||
mexErrMsgTxt("bytecode has not been compiled with CUDA option. Bytecode Can't use options_.stack_solve_algo=7\n");
|
||||
#endif
|
||||
mexErrMsgTxt("Bytecode: Can't use option stack_solve_algo=7\n");
|
||||
|
||||
size_t size_of_direction = col_y*row_y*sizeof(double);
|
||||
auto *y = static_cast<double *>(mxMalloc(size_of_direction));
|
||||
error_msg.test_mxMalloc(y, __LINE__, __FILE__, __func__, size_of_direction);
|
||||
|
@ -1045,11 +870,7 @@ main(int nrhs, const char *prhs[])
|
|||
clock_t t0 = clock();
|
||||
Interpreter interprete(params, y, ya, x, steady_yd, steady_xd, direction, y_size, nb_row_x, nb_row_xd, periods, y_kmin, y_kmax, maxit_, solve_tolf, size_of_direction, slowc, y_decal,
|
||||
markowitz_c, file_name, minimal_solving_periods, stack_solve_algo, solve_algo, global_temporary_terms, print, print_error, GlobalTemporaryTerms, steady_state,
|
||||
print_it, col_x, col_y
|
||||
#ifdef CUDA
|
||||
, CUDA_device, cublas_handle, cusparse_handle, descr
|
||||
#endif
|
||||
);
|
||||
print_it, col_x, col_y);
|
||||
string f(fname);
|
||||
mxFree(fname);
|
||||
int nb_blocks = 0;
|
||||
|
@ -1078,11 +899,6 @@ main(int nrhs, const char *prhs[])
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef CUDA
|
||||
if (stack_solve_algo == 7 && !steady_state)
|
||||
GPU_close(cublas_handle, cusparse_handle, descr);
|
||||
#endif
|
||||
|
||||
clock_t t1 = clock();
|
||||
if (!steady_state && !evaluate && print)
|
||||
mexPrintf("Simulation Time=%f milliseconds\n", 1000.0*(double (t1)-double (t0))/double (CLOCKS_PER_SEC));
|
||||
|
|
Loading…
Reference in New Issue