Bytecode: remove CUDA-specific stuff
parent
5ef440ad9b
commit
eb29dc003a
|
@ -33,16 +33,8 @@ Interpreter::Interpreter(double *params_arg, double *y_arg, double *ya_arg, doub
|
||||||
int maxit_arg_, double solve_tolf_arg, size_t size_of_direction_arg, double slowc_arg, int y_decal_arg, double markowitz_c_arg,
|
int maxit_arg_, double solve_tolf_arg, size_t size_of_direction_arg, double slowc_arg, int y_decal_arg, double markowitz_c_arg,
|
||||||
string &filename_arg, int minimal_solving_periods_arg, int stack_solve_algo_arg, int solve_algo_arg,
|
string &filename_arg, int minimal_solving_periods_arg, int stack_solve_algo_arg, int solve_algo_arg,
|
||||||
bool global_temporary_terms_arg, bool print_arg, bool print_error_arg, mxArray *GlobalTemporaryTerms_arg,
|
bool global_temporary_terms_arg, bool print_arg, bool print_error_arg, mxArray *GlobalTemporaryTerms_arg,
|
||||||
bool steady_state_arg, bool print_it_arg, int col_x_arg, int col_y_arg
|
bool steady_state_arg, bool print_it_arg, int col_x_arg, int col_y_arg)
|
||||||
#ifdef CUDA
|
: dynSparseMatrix(y_size_arg, y_kmin_arg, y_kmax_arg, print_it_arg, steady_state_arg, periods_arg, minimal_solving_periods_arg, slowc_arg)
|
||||||
, const int CUDA_device_arg, cublasHandle_t cublas_handle_arg, cusparseHandle_t cusparse_handle_arg, cusparseMatDescr_t descr_arg
|
|
||||||
#endif
|
|
||||||
)
|
|
||||||
: dynSparseMatrix(y_size_arg, y_kmin_arg, y_kmax_arg, print_it_arg, steady_state_arg, periods_arg, minimal_solving_periods_arg, slowc_arg
|
|
||||||
#ifdef CUDA
|
|
||||||
, CUDA_device_arg, cublas_handle_arg, cusparse_handle_arg, descr_arg
|
|
||||||
#endif
|
|
||||||
)
|
|
||||||
{
|
{
|
||||||
params = params_arg;
|
params = params_arg;
|
||||||
y = y_arg;
|
y = y_arg;
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright © 2007-2017 Dynare Team
|
* Copyright © 2007-2021 Dynare Team
|
||||||
*
|
*
|
||||||
* This file is part of Dynare.
|
* This file is part of Dynare.
|
||||||
*
|
*
|
||||||
|
@ -58,11 +58,7 @@ public:
|
||||||
int maxit_arg_, double solve_tolf_arg, size_t size_of_direction_arg, double slowc_arg, int y_decal_arg, double markowitz_c_arg,
|
int maxit_arg_, double solve_tolf_arg, size_t size_of_direction_arg, double slowc_arg, int y_decal_arg, double markowitz_c_arg,
|
||||||
string &filename_arg, int minimal_solving_periods_arg, int stack_solve_algo_arg, int solve_algo_arg,
|
string &filename_arg, int minimal_solving_periods_arg, int stack_solve_algo_arg, int solve_algo_arg,
|
||||||
bool global_temporary_terms_arg, bool print_arg, bool print_error_arg, mxArray *GlobalTemporaryTerms_arg,
|
bool global_temporary_terms_arg, bool print_arg, bool print_error_arg, mxArray *GlobalTemporaryTerms_arg,
|
||||||
bool steady_state_arg, bool print_it_arg, int col_x_arg, int col_y_arg
|
bool steady_state_arg, bool print_it_arg, int col_x_arg, int col_y_arg);
|
||||||
#ifdef CUDA
|
|
||||||
, const int CUDA_device, cublasHandle_t cublas_handle_arg, cusparseHandle_t cusparse_handle_arg, cusparseMatDescr_t descr_arg
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
bool extended_path(string file_name, string bin_basename, bool evaluate, int block, int &nb_blocks, int nb_periods, vector<s_plan> sextended_path, vector<s_plan> sconstrained_extended_path, vector<string> dates, table_conditional_global_type table_conditional_global);
|
bool extended_path(string file_name, string bin_basename, bool evaluate, int block, int &nb_blocks, int nb_periods, vector<s_plan> sextended_path, vector<s_plan> sconstrained_extended_path, vector<string> dates, table_conditional_global_type table_conditional_global);
|
||||||
bool compute_blocks(string file_name, string bin_basename, bool evaluate, int block, int &nb_blocks);
|
bool compute_blocks(string file_name, string bin_basename, bool evaluate, int block, int &nb_blocks);
|
||||||
void check_for_controlled_exo_validity(FBEGINBLOCK_ *fb, vector<s_plan> sconstrained_extended_path);
|
void check_for_controlled_exo_validity(FBEGINBLOCK_ *fb, vector<s_plan> sconstrained_extended_path);
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -28,51 +28,11 @@
|
||||||
#include "dynblas.h"
|
#include "dynblas.h"
|
||||||
#include "dynumfpack.h"
|
#include "dynumfpack.h"
|
||||||
|
|
||||||
#ifdef CUDA
|
|
||||||
# include "cuda.h"
|
|
||||||
# include "cuda_runtime_api.h"
|
|
||||||
# include "cublas_v2.h"
|
|
||||||
# include "cusparse_v2.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "Mem_Mngr.hh"
|
#include "Mem_Mngr.hh"
|
||||||
#include "ErrorHandling.hh"
|
#include "ErrorHandling.hh"
|
||||||
//#include "Interpreter.hh"
|
//#include "Interpreter.hh"
|
||||||
#include "Evaluate.hh"
|
#include "Evaluate.hh"
|
||||||
|
|
||||||
#define cudaChk(x, y) \
|
|
||||||
{ \
|
|
||||||
cudaError_t cuda_error = x; \
|
|
||||||
if (cuda_error != cudaSuccess) \
|
|
||||||
{ \
|
|
||||||
ostringstream tmp; \
|
|
||||||
tmp << y; \
|
|
||||||
throw FatalExceptionHandling(tmp.str()); \
|
|
||||||
} \
|
|
||||||
};
|
|
||||||
|
|
||||||
#define cusparseChk(x, y) \
|
|
||||||
{ \
|
|
||||||
cusparseStatus_t cusparse_status = x; \
|
|
||||||
if (cusparse_status != CUSPARSE_STATUS_SUCCESS) \
|
|
||||||
{ \
|
|
||||||
ostringstream tmp; \
|
|
||||||
tmp << y; \
|
|
||||||
throw FatalExceptionHandling(tmp.str()); \
|
|
||||||
} \
|
|
||||||
};
|
|
||||||
|
|
||||||
#define cublasChk(x, y) \
|
|
||||||
{ \
|
|
||||||
cublasStatus_t cublas_status = x; \
|
|
||||||
if (cublas_status != CUBLAS_STATUS_SUCCESS) \
|
|
||||||
{ \
|
|
||||||
ostringstream tmp; \
|
|
||||||
tmp << y; \
|
|
||||||
throw FatalExceptionHandling(tmp.str()); \
|
|
||||||
} \
|
|
||||||
};
|
|
||||||
|
|
||||||
#define NEW_ALLOC
|
#define NEW_ALLOC
|
||||||
#define MARKOVITZ
|
#define MARKOVITZ
|
||||||
|
|
||||||
|
@ -101,11 +61,7 @@ class dynSparseMatrix : public Evaluate
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
dynSparseMatrix();
|
dynSparseMatrix();
|
||||||
dynSparseMatrix(const int y_size_arg, const int y_kmin_arg, const int y_kmax_arg, const bool print_it_arg, const bool steady_state_arg, const int periods_arg, const int minimal_solving_periods_arg, const double slowc_arg
|
dynSparseMatrix(const int y_size_arg, const int y_kmin_arg, const int y_kmax_arg, const bool print_it_arg, const bool steady_state_arg, const int periods_arg, const int minimal_solving_periods_arg, const double slowc_arg);
|
||||||
#ifdef CUDA
|
|
||||||
, const int CUDA_device_arg, cublasHandle_t cublas_handle_arg, cusparseHandle_t cusparse_handle_arg, cusparseMatDescr_t descr_arg
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
void Simulate_Newton_Two_Boundaries(int blck, int y_size, int y_kmin, int y_kmax, int Size, int periods, bool cvg, int minimal_solving_periods, int stack_solve_algo, unsigned int endo_name_length, char *P_endo_names, vector_table_conditional_local_type vector_table_conditional_local);
|
void Simulate_Newton_Two_Boundaries(int blck, int y_size, int y_kmin, int y_kmax, int Size, int periods, bool cvg, int minimal_solving_periods, int stack_solve_algo, unsigned int endo_name_length, char *P_endo_names, vector_table_conditional_local_type vector_table_conditional_local);
|
||||||
void Simulate_Newton_One_Boundary(bool forward);
|
void Simulate_Newton_One_Boundary(bool forward);
|
||||||
void fixe_u(double **u, int u_count_int, int max_lag_plus_max_lead_plus_1);
|
void fixe_u(double **u, int u_count_int, int max_lag_plus_max_lead_plus_1);
|
||||||
|
@ -123,12 +79,8 @@ private:
|
||||||
void Init_GE(int periods, int y_kmin, int y_kmax, int Size, map<pair<pair<int, int>, int>, int> &IM);
|
void Init_GE(int periods, int y_kmin, int y_kmax, int Size, map<pair<pair<int, int>, int>, int> &IM);
|
||||||
void Init_Matlab_Sparse(int periods, int y_kmin, int y_kmax, int Size, map<pair<pair<int, int>, int>, int> &IM, mxArray *A_m, mxArray *b_m, mxArray *x0_m);
|
void Init_Matlab_Sparse(int periods, int y_kmin, int y_kmax, int Size, map<pair<pair<int, int>, int>, int> &IM, mxArray *A_m, mxArray *b_m, mxArray *x0_m);
|
||||||
void Init_UMFPACK_Sparse(int periods, int y_kmin, int y_kmax, int Size, map<pair<pair<int, int>, int>, int> &IM, SuiteSparse_long **Ap, SuiteSparse_long **Ai, double **Ax, double **b, mxArray *x0_m, vector_table_conditional_local_type vector_table_conditional_local, int block_num);
|
void Init_UMFPACK_Sparse(int periods, int y_kmin, int y_kmax, int Size, map<pair<pair<int, int>, int>, int> &IM, SuiteSparse_long **Ap, SuiteSparse_long **Ai, double **Ax, double **b, mxArray *x0_m, vector_table_conditional_local_type vector_table_conditional_local, int block_num);
|
||||||
#ifdef CUDA
|
|
||||||
void Init_CUDA_Sparse(int periods, int y_kmin, int y_kmax, int Size, map<pair<pair<int, int>, int>, int> &IM, int **Ap, int **Ai, double **Ax, int **Ap_tild, int **Ai_tild, double **A_tild, double **b, double **x0, mxArray *x0_m, int *nnz, int *nnz_tild, int preconditioner);
|
|
||||||
#endif
|
|
||||||
void Init_Matlab_Sparse_Simple(int Size, map<pair<pair<int, int>, int>, int> &IM, mxArray *A_m, mxArray *b_m, bool &zero_solution, mxArray *x0_m);
|
void Init_Matlab_Sparse_Simple(int Size, map<pair<pair<int, int>, int>, int> &IM, mxArray *A_m, mxArray *b_m, bool &zero_solution, mxArray *x0_m);
|
||||||
void Init_UMFPACK_Sparse_Simple(int Size, map<pair<pair<int, int>, int>, int> &IM, SuiteSparse_long **Ap, SuiteSparse_long **Ai, double **Ax, double **b, bool &zero_solution, mxArray *x0_m);
|
void Init_UMFPACK_Sparse_Simple(int Size, map<pair<pair<int, int>, int>, int> &IM, SuiteSparse_long **Ap, SuiteSparse_long **Ai, double **Ax, double **b, bool &zero_solution, mxArray *x0_m);
|
||||||
void Init_CUDA_Sparse_Simple(int Size, map<pair<pair<int, int>, int>, int> &IM, SuiteSparse_long **Ap, SuiteSparse_long **Ai, double **Ax, double **b, double **x0, bool &zero_solution, mxArray *x0_m);
|
|
||||||
void Simple_Init(int Size, std::map<std::pair<std::pair<int, int>, int>, int> &IM, bool &zero_solution);
|
void Simple_Init(int Size, std::map<std::pair<std::pair<int, int>, int>, int> &IM, bool &zero_solution);
|
||||||
void End_GE(int Size);
|
void End_GE(int Size);
|
||||||
bool mnbrak(double *ax, double *bx, double *cx, double *fa, double *fb, double *fc);
|
bool mnbrak(double *ax, double *bx, double *cx, double *fa, double *fb, double *fc);
|
||||||
|
@ -145,13 +97,6 @@ private:
|
||||||
void Solve_LU_UMFPack(SuiteSparse_long *Ap, SuiteSparse_long *Ai, double *Ax, double *b, int n, int Size, double slowc_l, bool is_two_boundaries, int it_);
|
void Solve_LU_UMFPack(SuiteSparse_long *Ap, SuiteSparse_long *Ai, double *Ax, double *b, int n, int Size, double slowc_l, bool is_two_boundaries, int it_);
|
||||||
|
|
||||||
void End_Matlab_LU_UMFPack();
|
void End_Matlab_LU_UMFPack();
|
||||||
#ifdef CUDA
|
|
||||||
void Solve_CUDA_BiCGStab_Free(double *tmp_vect_host, double *p, double *r, double *v, double *s, double *t, double *y_, double *z, double *tmp_,
|
|
||||||
int *Ai, double *Ax, int *Ap, double *x0, double *b, double *A_tild, int *A_tild_i, int *A_tild_p,
|
|
||||||
cusparseSolveAnalysisInfo_t infoL, cusparseSolveAnalysisInfo_t infoU,
|
|
||||||
cusparseMatDescr_t descrL, cusparseMatDescr_t descrU, int preconditioner);
|
|
||||||
int Solve_CUDA_BiCGStab(int *Ap, int *Ai, double *Ax, int *Ap_tild, int *Ai_tild, double *A_tild, double *b, double *x0, int n, int Size, double slowc_l, bool is_two_boundaries, int it_, int nnz, int nnz_tild, int preconditioner, int max_iterations, int block);
|
|
||||||
#endif
|
|
||||||
void Solve_Matlab_GMRES(mxArray *A_m, mxArray *b_m, int Size, double slowc, int block, bool is_two_boundaries, int it_, mxArray *x0_m);
|
void Solve_Matlab_GMRES(mxArray *A_m, mxArray *b_m, int Size, double slowc, int block, bool is_two_boundaries, int it_, mxArray *x0_m);
|
||||||
void Solve_Matlab_BiCGStab(mxArray *A_m, mxArray *b_m, int Size, double slowc, int block, bool is_two_boundaries, int it_, mxArray *x0_m, int precond);
|
void Solve_Matlab_BiCGStab(mxArray *A_m, mxArray *b_m, int Size, double slowc, int block, bool is_two_boundaries, int it_, mxArray *x0_m, int precond);
|
||||||
void Check_and_Correct_Previous_Iteration(int block_num, int y_size, int size, double crit_opt_old);
|
void Check_and_Correct_Previous_Iteration(int block_num, int y_size, int size, double crit_opt_old);
|
||||||
|
@ -196,12 +141,6 @@ private:
|
||||||
mxArray *Sparse_substract_SA_SB(mxArray *A_m, mxArray *B_m);
|
mxArray *Sparse_substract_SA_SB(mxArray *A_m, mxArray *B_m);
|
||||||
mxArray *Sparse_substract_A_SB(mxArray *A_m, mxArray *B_m);
|
mxArray *Sparse_substract_A_SB(mxArray *A_m, mxArray *B_m);
|
||||||
mxArray *substract_A_B(mxArray *A_m, mxArray *B_m);
|
mxArray *substract_A_B(mxArray *A_m, mxArray *B_m);
|
||||||
#ifdef CUDA
|
|
||||||
int CUDA_device;
|
|
||||||
cublasHandle_t cublas_handle;
|
|
||||||
cusparseHandle_t cusparse_handle;
|
|
||||||
cusparseMatDescr_t CUDA_descr;
|
|
||||||
#endif
|
|
||||||
protected:
|
protected:
|
||||||
stack<double> Stack;
|
stack<double> Stack;
|
||||||
int nb_prologue_table_u, nb_first_table_u, nb_middle_table_u, nb_last_table_u;
|
int nb_prologue_table_u, nb_first_table_u, nb_middle_table_u, nb_last_table_u;
|
||||||
|
|
|
@ -1,121 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2007-2012 Dynare Team
|
|
||||||
*
|
|
||||||
* This file is part of Dynare.
|
|
||||||
*
|
|
||||||
* Dynare is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* Dynare is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with Dynare. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef SPARMATRIX_KERNEL
|
|
||||||
#define SPARMATRIX_KERNEL
|
|
||||||
|
|
||||||
// Kernel definition of vector division
|
|
||||||
__global__ void
|
|
||||||
VecDiv(double* A, double* B, double* C, int n)
|
|
||||||
{
|
|
||||||
int i = blockIdx.x * 1024 + threadIdx.x;
|
|
||||||
if (i < n)
|
|
||||||
C[i] = (B[i] != 0.0 ? A[i] / B[i] : A[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void
|
|
||||||
VecAdd(double* res, double* r, double alpha, double* x, int n)
|
|
||||||
{
|
|
||||||
int i = blockIdx.x * 1024 + threadIdx.x;
|
|
||||||
if (i < n)
|
|
||||||
res[i] = r[i] + alpha * x[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void
|
|
||||||
VecInc(double* res, double alpha, double* x, int n)
|
|
||||||
{
|
|
||||||
int i = blockIdx.x * 1024 + threadIdx.x;
|
|
||||||
if (i < n)
|
|
||||||
res[i] += alpha * x[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
__global__ void
|
|
||||||
update_x(double* x, double alpha, double* y, double omega, double *z)
|
|
||||||
{
|
|
||||||
int i = threadIdx.x;
|
|
||||||
x[i] += alpha * y[i] + omega * z[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void
|
|
||||||
Get_LU_dim(int *n, int* A_tild_i, int *A_tild_p, int *nnz_l, int *nnz_u)
|
|
||||||
{
|
|
||||||
nnz_u[0] = 0;
|
|
||||||
nnz_l[0] = 0;
|
|
||||||
for (int i = 0; i < n[0]; i++)
|
|
||||||
{
|
|
||||||
for (int j = A_tild_p[i]; j < A_tild_p[i+1]; j++)
|
|
||||||
{
|
|
||||||
if (A_tild_i[j] < i)
|
|
||||||
nnz_l[0]++;
|
|
||||||
else if (A_tild_i[j] == i)
|
|
||||||
{
|
|
||||||
nnz_u[0]++;
|
|
||||||
//nnz_l[0]++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
nnz_u[0]++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void
|
|
||||||
Get_LU1_dim(int* n, int *nnz_l, int *nnz_u)
|
|
||||||
{
|
|
||||||
nnz_u[0] = 3+n[0];
|
|
||||||
nnz_l[0] = 1+n[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
__global__ void
|
|
||||||
Get_L_and_U(int *n, double* A_tild_x, int* A_tild_i, int *A_tild_p, double* Lx, int* Li, int *Lp, double* Ux, int* Ui, int* Up)
|
|
||||||
{
|
|
||||||
int nnz_u = 0, nnz_l = 0;
|
|
||||||
Lp[0] = 0;
|
|
||||||
Up[0] = 0;
|
|
||||||
for (int i = 0; i < n[0]; i++)
|
|
||||||
{
|
|
||||||
for (int j = A_tild_p[i]; j < A_tild_p[i+1]; j++)
|
|
||||||
{
|
|
||||||
if (A_tild_i[j] < i)
|
|
||||||
{
|
|
||||||
Lx[nnz_l] = A_tild_x[j];
|
|
||||||
Li[nnz_l] = A_tild_i[j];
|
|
||||||
nnz_l++;
|
|
||||||
}
|
|
||||||
else if (A_tild_i[j] == i)
|
|
||||||
{
|
|
||||||
Ux[nnz_u] = A_tild_x[j];
|
|
||||||
Lx[nnz_l] = 1.0;
|
|
||||||
Li[nnz_l] = Ui[nnz_u] = A_tild_i[j];
|
|
||||||
nnz_u++;
|
|
||||||
//nnz_l++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Ux[nnz_u] = A_tild_x[j];
|
|
||||||
Ui[nnz_u] = A_tild_i[j];
|
|
||||||
nnz_u++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Lp[i+1] = nnz_l;
|
|
||||||
Up[i+1] = nnz_u;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
|
@ -57,164 +57,6 @@ Get_Argument(const mxArray *prhs)
|
||||||
//#include <windows.h>
|
//#include <windows.h>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
|
||||||
#ifdef CUDA
|
|
||||||
int
|
|
||||||
GPU_Test_and_Info(cublasHandle_t *cublas_handle, cusparseHandle_t *cusparse_handle, cusparseMatDescr_t *descr)
|
|
||||||
{
|
|
||||||
cudaDeviceProp deviceProp;
|
|
||||||
int device_count, device, version, version_max = 0;
|
|
||||||
cublasStatus_t cublas_status;
|
|
||||||
cudaError_t cuda_error;
|
|
||||||
*descr = 0;
|
|
||||||
|
|
||||||
/* ask cuda how many devices it can find */
|
|
||||||
cudaGetDeviceCount(&device_count);
|
|
||||||
if (device_count < 1)
|
|
||||||
{
|
|
||||||
/* if it couldn't find any fail out */
|
|
||||||
ostringstream tmp;
|
|
||||||
tmp << " Unable to find a CUDA device. Unable to implement CUDA solvers\n";
|
|
||||||
throw FatalExceptionHandling(tmp.str());
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
mexPrintf("-----------------------------------------\n");
|
|
||||||
for (int i = 0; i < device_count; i++)
|
|
||||||
{
|
|
||||||
cudaSetDevice(i);
|
|
||||||
// Statistics about the GPU device
|
|
||||||
cuda_error = cudaGetDeviceProperties(&deviceProp, i);
|
|
||||||
if (cuda_error != cudaSuccess)
|
|
||||||
{
|
|
||||||
ostringstream tmp;
|
|
||||||
tmp << " bytecode cudaGetDeviceProperties failed\n";
|
|
||||||
throw FatalExceptionHandling(tmp.str());
|
|
||||||
}
|
|
||||||
mexPrintf("> GPU device %d: \"%s\" has:\n - %d Multi-Processors,\n - %d threads per multiprocessor,\n", i, deviceProp.name, deviceProp.multiProcessorCount, deviceProp.maxThreadsPerMultiProcessor);
|
|
||||||
mexEvalString("drawnow;");
|
|
||||||
version = (deviceProp.major * 0x10 + deviceProp.minor);
|
|
||||||
if (version >= version_max)
|
|
||||||
{
|
|
||||||
device = i;
|
|
||||||
version_max = version;
|
|
||||||
}
|
|
||||||
mexPrintf(" - %4.2fMhz clock rate,\n - %2.0fMb of memory,\n - %d.%d compute capabilities.\n", double (deviceProp.clockRate) / (1024 * 1024), double (deviceProp.totalGlobalMem) / (1024 * 1024), deviceProp.major, deviceProp.minor);
|
|
||||||
mexEvalString("drawnow;");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mexPrintf("> Device %d selected\n", device);
|
|
||||||
mexEvalString("drawnow;");
|
|
||||||
|
|
||||||
cuda_error = cudaSetDevice(device);
|
|
||||||
if (cuda_error != cudaSuccess)
|
|
||||||
{
|
|
||||||
ostringstream tmp;
|
|
||||||
tmp << " bytecode cudaSetDevice failed\n";
|
|
||||||
throw FatalExceptionHandling(tmp.str());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (version_max < 0x11)
|
|
||||||
{
|
|
||||||
ostringstream tmp;
|
|
||||||
tmp << " bytecode requires a minimum CUDA compute 1.1 capability\n";
|
|
||||||
cudaDeviceReset();
|
|
||||||
throw FatalExceptionHandling(tmp.str());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize CuBlas library
|
|
||||||
cublas_status = cublasCreate(cublas_handle);
|
|
||||||
if (cublas_status != CUBLAS_STATUS_SUCCESS)
|
|
||||||
{
|
|
||||||
ostringstream tmp;
|
|
||||||
switch (cublas_status)
|
|
||||||
{
|
|
||||||
case CUBLAS_STATUS_NOT_INITIALIZED:
|
|
||||||
tmp << " the CUBLAS initialization failed.\n";
|
|
||||||
break;
|
|
||||||
case CUBLAS_STATUS_ALLOC_FAILED:
|
|
||||||
tmp << " the resources could not be allocated.\n";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
tmp << " unknown error during the initialization of cusparse library.\n";
|
|
||||||
}
|
|
||||||
throw FatalExceptionHandling(tmp.str());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize the CuSparse library
|
|
||||||
cusparseStatus_t cusparse_status;
|
|
||||||
cusparse_status = cusparseCreate(cusparse_handle);
|
|
||||||
if (cusparse_status != CUSPARSE_STATUS_SUCCESS)
|
|
||||||
{
|
|
||||||
ostringstream tmp;
|
|
||||||
switch (cusparse_status)
|
|
||||||
{
|
|
||||||
case CUSPARSE_STATUS_NOT_INITIALIZED:
|
|
||||||
tmp << " the CUDA Runtime initialization failed.\n";
|
|
||||||
break;
|
|
||||||
case CUSPARSE_STATUS_ALLOC_FAILED:
|
|
||||||
tmp << " the resources could not be allocated.\n";
|
|
||||||
break;
|
|
||||||
case CUSPARSE_STATUS_ARCH_MISMATCH:
|
|
||||||
tmp << " the device compute capability (CC) is less than 1.1. The CC of at least 1.1 is required.\n";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
tmp << " unknown error during the initialization of cusparse library.\n";
|
|
||||||
}
|
|
||||||
throw FatalExceptionHandling(tmp.str());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create and setup matrix descriptor
|
|
||||||
cusparse_status = cusparseCreateMatDescr(descr);
|
|
||||||
if (cusparse_status != CUSPARSE_STATUS_SUCCESS)
|
|
||||||
{
|
|
||||||
ostringstream tmp;
|
|
||||||
tmp << " Matrix descriptor initialization failed\n";
|
|
||||||
throw FatalExceptionHandling(tmp.str());
|
|
||||||
}
|
|
||||||
cusparseSetMatType(*descr, CUSPARSE_MATRIX_TYPE_GENERAL);
|
|
||||||
cusparseSetMatIndexBase(*descr, CUSPARSE_INDEX_BASE_ZERO);
|
|
||||||
|
|
||||||
mexPrintf("> Driver version:\n");
|
|
||||||
int cuda_version;
|
|
||||||
cuda_error = cudaDriverGetVersion(&cuda_version);
|
|
||||||
if (cuda_error != cudaSuccess)
|
|
||||||
{
|
|
||||||
ostringstream tmp;
|
|
||||||
tmp << " cudaGetVersion has failed\n";
|
|
||||||
throw FatalExceptionHandling(tmp.str());
|
|
||||||
}
|
|
||||||
mexPrintf(" - CUDA version %5.3f\n", double (cuda_version) / 1000);
|
|
||||||
int cublas_version;
|
|
||||||
cublas_status = cublasGetVersion(*cublas_handle, &cublas_version);
|
|
||||||
if (cublas_status != CUBLAS_STATUS_SUCCESS)
|
|
||||||
{
|
|
||||||
ostringstream tmp;
|
|
||||||
tmp << " cublasGetVersion has failed\n";
|
|
||||||
throw FatalExceptionHandling(tmp.str());
|
|
||||||
}
|
|
||||||
mexPrintf(" - CUBLAS version %5.3f\n", double (cublas_version) / 1000);
|
|
||||||
int cusparse_version;
|
|
||||||
cusparse_status = cusparseGetVersion(*cusparse_handle, &cusparse_version);
|
|
||||||
if (cusparse_status != CUSPARSE_STATUS_SUCCESS)
|
|
||||||
{
|
|
||||||
ostringstream tmp;
|
|
||||||
tmp << " cusparseGetVersion has failed\n";
|
|
||||||
throw FatalExceptionHandling(tmp.str());
|
|
||||||
}
|
|
||||||
mexPrintf(" - CUSPARSE version %5.3f\n", double (cusparse_version) / 1000);
|
|
||||||
mexPrintf("-----------------------------------------\n");
|
|
||||||
return device;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
GPU_close(cublasHandle_t cublas_handle, cusparseHandle_t cusparse_handle, cusparseMatDescr_t descr)
|
|
||||||
{
|
|
||||||
cublasChk(cublasDestroy(cublas_handle), "in bytecode cublasDestroy failed\n");
|
|
||||||
cusparseChk(cusparseDestroyMatDescr(descr), "in bytecode cusparseDestroyMatDescr failed\n");
|
|
||||||
cusparseChk(cusparseDestroy(cusparse_handle), "in bytecode cusparseDestroy failed\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
string
|
string
|
||||||
deblank(string x)
|
deblank(string x)
|
||||||
{
|
{
|
||||||
|
@ -437,12 +279,6 @@ main(int nrhs, const char *prhs[])
|
||||||
|
|
||||||
int max_periods = 0;
|
int max_periods = 0;
|
||||||
|
|
||||||
#ifdef CUDA
|
|
||||||
int CUDA_device = -1;
|
|
||||||
cublasHandle_t cublas_handle;
|
|
||||||
cusparseHandle_t cusparse_handle;
|
|
||||||
cusparseMatDescr_t descr;
|
|
||||||
#endif
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
Get_Arguments_and_global_variables(nrhs, prhs, count_array_argument,
|
Get_Arguments_and_global_variables(nrhs, prhs, count_array_argument,
|
||||||
|
@ -1005,20 +841,9 @@ main(int nrhs, const char *prhs[])
|
||||||
mexWarnMsgTxt("Not enough space. Filename is truncated.");
|
mexWarnMsgTxt("Not enough space. Filename is truncated.");
|
||||||
string file_name = fname;
|
string file_name = fname;
|
||||||
|
|
||||||
#ifdef CUDA
|
|
||||||
try
|
|
||||||
{
|
|
||||||
if (stack_solve_algo == 7 && !steady_state)
|
if (stack_solve_algo == 7 && !steady_state)
|
||||||
CUDA_device = GPU_Test_and_Info(&cublas_handle, &cusparse_handle, &descr);
|
mexErrMsgTxt("Bytecode: Can't use option stack_solve_algo=7\n");
|
||||||
}
|
|
||||||
catch (GeneralExceptionHandling &feh)
|
|
||||||
{
|
|
||||||
mexErrMsgTxt(feh.GetErrorMsg().c_str());
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
if (stack_solve_algo == 7 && !steady_state)
|
|
||||||
mexErrMsgTxt("bytecode has not been compiled with CUDA option. Bytecode Can't use options_.stack_solve_algo=7\n");
|
|
||||||
#endif
|
|
||||||
size_t size_of_direction = col_y*row_y*sizeof(double);
|
size_t size_of_direction = col_y*row_y*sizeof(double);
|
||||||
auto *y = static_cast<double *>(mxMalloc(size_of_direction));
|
auto *y = static_cast<double *>(mxMalloc(size_of_direction));
|
||||||
error_msg.test_mxMalloc(y, __LINE__, __FILE__, __func__, size_of_direction);
|
error_msg.test_mxMalloc(y, __LINE__, __FILE__, __func__, size_of_direction);
|
||||||
|
@ -1045,11 +870,7 @@ main(int nrhs, const char *prhs[])
|
||||||
clock_t t0 = clock();
|
clock_t t0 = clock();
|
||||||
Interpreter interprete(params, y, ya, x, steady_yd, steady_xd, direction, y_size, nb_row_x, nb_row_xd, periods, y_kmin, y_kmax, maxit_, solve_tolf, size_of_direction, slowc, y_decal,
|
Interpreter interprete(params, y, ya, x, steady_yd, steady_xd, direction, y_size, nb_row_x, nb_row_xd, periods, y_kmin, y_kmax, maxit_, solve_tolf, size_of_direction, slowc, y_decal,
|
||||||
markowitz_c, file_name, minimal_solving_periods, stack_solve_algo, solve_algo, global_temporary_terms, print, print_error, GlobalTemporaryTerms, steady_state,
|
markowitz_c, file_name, minimal_solving_periods, stack_solve_algo, solve_algo, global_temporary_terms, print, print_error, GlobalTemporaryTerms, steady_state,
|
||||||
print_it, col_x, col_y
|
print_it, col_x, col_y);
|
||||||
#ifdef CUDA
|
|
||||||
, CUDA_device, cublas_handle, cusparse_handle, descr
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
string f(fname);
|
string f(fname);
|
||||||
mxFree(fname);
|
mxFree(fname);
|
||||||
int nb_blocks = 0;
|
int nb_blocks = 0;
|
||||||
|
@ -1078,11 +899,6 @@ main(int nrhs, const char *prhs[])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CUDA
|
|
||||||
if (stack_solve_algo == 7 && !steady_state)
|
|
||||||
GPU_close(cublas_handle, cusparse_handle, descr);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
clock_t t1 = clock();
|
clock_t t1 = clock();
|
||||||
if (!steady_state && !evaluate && print)
|
if (!steady_state && !evaluate && print)
|
||||||
mexPrintf("Simulation Time=%f milliseconds\n", 1000.0*(double (t1)-double (t0))/double (CLOCKS_PER_SEC));
|
mexPrintf("Simulation Time=%f milliseconds\n", 1000.0*(double (t1)-double (t0))/double (CLOCKS_PER_SEC));
|
||||||
|
|
Loading…
Reference in New Issue