diff --git a/matlab/swz/c-code/utilities/DWCcode/matrix/blas_lapack.h b/matlab/swz/c-code/utilities/DWCcode/matrix/blas_lapack.h index d3998606d..0c77a0e14 100644 --- a/matlab/swz/c-code/utilities/DWCcode/matrix/blas_lapack.h +++ b/matlab/swz/c-code/utilities/DWCcode/matrix/blas_lapack.h @@ -1,7 +1,16 @@ +#if defined(MATLAB_MEX_FILE) || defined(OCTAVE_MEX_FILE) +#include +#include +#include +#else + #ifndef __BLAS_LAPACK__ #define __BLAS_LAPACK__ +typedef int lapack_int; +typedef int blas_int; + #ifdef __cplusplus extern "C" { @@ -41,6 +50,11 @@ extern "C" #define dgeev dgeev_ #define dpotrf dpotrf_ #define dpotri dpotri_ +#define dtrtri dtrtri_ +#define dgetri dgetri_ +#define dgeqp3 dgeqp3_ +#define dormqr dormqr_ +#define dgesv dgesv_ /*******************************************************************************/ @@ -81,6 +95,11 @@ void dsyev(char*,char*,int*,double*,int*,double*,double*,int*,int*); void dgeev(char*,char*,int*,double*,int*,double*,double*,double*,int*,double*,int*,double*,int*,int*); void dpotrf(char*,int*,double*,int*,int*); void dpotri(char*,int*,double*,int*,int*); +void dgeqp3(int*,int*,double*,int*,int*,double*,double*,int*,int*); +void dtrtri(char*,char*,int*,double*,int*,int*); +void dgetri(int*,double*,int*,int*,double*,int*,int*); +void dormqr(char*,char*,int*,int*,int*,double*,int*,double*,double*,int*,double*,int*,int*); +void dgesv(int*,int*,double*,int*,int*,double*,int*,int*); /*******************************************************************************/ #ifdef __cplusplus @@ -88,3 +107,5 @@ void dpotri(char*,int*,double*,int*,int*); #endif #endif + +#endif diff --git a/matlab/swz/c-code/utilities/DWCcode/matrix/bmatrix_blas_lapack.c b/matlab/swz/c-code/utilities/DWCcode/matrix/bmatrix_blas_lapack.c index 9ef981562..8e697a2ca 100644 --- a/matlab/swz/c-code/utilities/DWCcode/matrix/bmatrix_blas_lapack.c +++ b/matlab/swz/c-code/utilities/DWCcode/matrix/bmatrix_blas_lapack.c @@ -207,11 +207,12 @@ int bSubtract(PRECISION *x, PRECISION *y, PRECISION *z, int n) */ int bLinearUpdateScalar(PRECISION *x, PRECISION *y, PRECISION a, int m) { - int inc=1; + blas_int inc = 1; + blas_int m2 = m; #if (PRECISION_SIZE == 4) - saxpy(&m,&a,y,&inc,x,&inc); + saxpy(&m2,&a,y,&inc,x,&inc); #else - daxpy(&m,&a,y,&inc,x,&inc); + daxpy(&m2,&a,y,&inc,x,&inc); #endif return NO_ERR; } @@ -392,33 +393,38 @@ int bMultiply(PRECISION *x, PRECISION *y, PRECISION s, int n) int bMatrixMultiply(PRECISION *x, PRECISION *y, PRECISION *z, int m, int n, int p, int xt, int yt, int zt) { char transy, transz; - int dy, dz; + blas_int dy, dz; PRECISION beta=0.0, alpha=1.0; + + blas_int m2 = m; + blas_int n2 = n; + blas_int p2 = p; + #if PRECISION_SIZE == 4 if (xt) { if (yt) {transy='N'; dy=m;} else {transy='T'; dy=p;} if (zt) {transz='N'; dz=p;} else {transz='T'; dz=n;} - sgemm(&transy,&transz,&m,&n,&p,&alpha,y,&dy,z,&dz,&beta,x,&m); + sgemm(&transy,&transz,&m2,&n2,&p2,&alpha,y,&dy,z,&dz,&beta,x,&m2); } else { if (yt) {transy='T'; dy=m;} else {transy='N'; dy=p;} if (zt) {transz='T'; dz=p;} else {transz='N'; dz=n;} - sgemm(&transz,&transy,&n,&m,&p,&alpha,z,&dz,y,&dy,&beta,x,&n); + sgemm(&transz,&transy,&n2,&m2,&p2,&alpha,z,&dz,y,&dy,&beta,x,&n2); } #else if (xt) { if (yt) {transy='N'; dy=m;} else {transy='T'; dy=p;} if (zt) {transz='N'; dz=p;} else {transz='T'; dz=n;} - dgemm(&transy,&transz,&m,&n,&p,&alpha,y,&dy,z,&dz,&beta,x,&m); + dgemm(&transy,&transz,&m2,&n2,&p2,&alpha,y,&dy,z,&dz,&beta,x,&m2); } else { if (yt) {transy='T'; dy=m;} else {transy='N'; dy=p;} if (zt) {transz='T'; dz=p;} else {transz='N'; dz=n;} - dgemm(&transz,&transy,&n,&m,&p,&alpha,z,&dz,y,&dy,&beta,x,&n); + dgemm(&transz,&transy,&n2,&m2,&p2,&alpha,z,&dz,y,&dy,&beta,x,&n2); } #endif return NO_ERR; @@ -499,21 +505,40 @@ int bLU(int *p, PRECISION *x, int m, int n, int xt) #endif PRECISION *y; - int i, info; + int i; + + lapack_int m2 = m; + lapack_int n2 = n; + lapack_int info; + lapack_int *p2; + + int minmn = (m < n) ? m : n; + + if(!(p2 = (lapack_int *)calloc(minmn, sizeof(lapack_int)))) + return MEM_ERR; + + for(i=0; i= 0; i--) p[i]--; return (info < 0) ? SING_ERR : NO_ERR; @@ -930,9 +955,11 @@ int bSVD_new(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int #endif char jobu, jobv; - int qu, qv, k, info, err=NO_ERR; + int qu, qv, err=NO_ERR; PRECISION *A_, *U_, *V_, *work, opt_size; + lapack_int k, m2, n2, qv2, info; + if (!(A_=(PRECISION*)malloc(m*n*sizeof(PRECISION)))) return MEM_ERR; if (at) memcpy(A_,A,m*n*sizeof(PRECISION)); @@ -998,7 +1025,10 @@ int bSVD_new(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int // compute singular value decomposition k=-1; - gesvd(&jobu,&jobv,&m,&n,A_,&m,d,U_,&m,V_,&qv,&opt_size,&k,&info); + m2 = m; + n2 = n; + qv2 = qv; + gesvd(&jobu,&jobv,&m2,&n2,A_,&m2,d,U_,&m2,V_,&qv2,&opt_size,&k,&info); if (info) err=BLAS_LAPACK_ERR; else @@ -1006,7 +1036,7 @@ int bSVD_new(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int err=MEM_ERR; else { - gesvd(&jobu,&jobv,&m,&n,A_,&m,d,U_,&m,V_,&qv,work,&k,&info); + gesvd(&jobu,&jobv,&m2,&n2,A_,&m2,d,U_,&m2,V_,&qv2,work,&k,&info); free(work); if (info) err=BLAS_LAPACK_ERR; @@ -1231,8 +1261,11 @@ int bSVD(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int n, i #endif char jobz='A'; - int k, *iwork, info; + int *iwork; PRECISION *X, *work, opt_size; + + lapack_int m2, n2, k, info; + if (!(X=(PRECISION*)malloc(m*n*sizeof(PRECISION)))) return MEM_ERR; memcpy(X,A,m*n*sizeof(PRECISION)); if (!(iwork=(int*)malloc(8*((m < n) ? m : n)*sizeof(int)))) @@ -1241,18 +1274,20 @@ int bSVD(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int n, i return MEM_ERR; } k=-1; + m2 = m; + n2 = n; if (at) { memcpy(X,A,m*n*sizeof(PRECISION)); k=-1; - gesvd(&jobz,&jobz,&m,&n,X,&m,d,U,&m,V,&n,&opt_size,&k,&info); + gesvd(&jobz,&jobz,&m2,&n2,X,&m2,d,U,&m2,V,&n2,&opt_size,&k,&info); if (info || !(work=(PRECISION*)malloc((k=(int)opt_size)*sizeof(PRECISION)))) { free(iwork); free(X); return info ? BLAS_LAPACK_ERR : MEM_ERR; } - gesvd(&jobz,&jobz,&m,&n,X,&m,d,U,&m,V,&n,work,&k,&info); + gesvd(&jobz,&jobz,&m2,&n2,X,&m2,d,U,&m2,V,&n2,work,&k,&info); if (info) { free(work); @@ -1269,14 +1304,14 @@ int bSVD(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int n, i { memcpy(X,A,m*n*sizeof(PRECISION)); k=-1; - gesvd(&jobz,&jobz,&n,&m,X,&n,d,V,&n,U,&m,&opt_size,&k,&info); + gesvd(&jobz,&jobz,&n2,&m2,X,&n2,d,V,&n2,U,&m2,&opt_size,&k,&info); if (info || !(work=(PRECISION*)malloc((k=(int)opt_size)*sizeof(PRECISION)))) { free(iwork); free(X); return info ? BLAS_LAPACK_ERR : MEM_ERR; } - gesvd(&jobz,&jobz,&n,&m,X,&n,d,V,&n,U,&m,work,&k,&info); + gesvd(&jobz,&jobz,&n2,&m2,X,&n2,d,V,&n2,U,&m2,work,&k,&info); if (info) { free(work); @@ -1443,14 +1478,18 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i #define orglq dorglq #endif - int i, j, k, l, lwork, info, p=(m < n) ? m : n; + int i, j, k, l, p=(m < n) ? m : n; PRECISION *tau, *work, *ptr, opt_size; + + lapack_int m2, n2, p2, q2, lwork, info; + if (!(tau=(PRECISION*)malloc(p*sizeof(PRECISION)))) return MEM_ERR; if (xt) { lwork=-1; - - geqrf(&m,&n,X,&m,tau,&opt_size,&lwork,&info); + m2 = m; + n2 = n; + geqrf(&m2,&n2,X,&m2,tau,&opt_size,&lwork,&info); if (!(work=(PRECISION*)malloc((lwork=(int)opt_size)*sizeof(PRECISION)))) { @@ -1458,7 +1497,7 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i return MEM_ERR; } - geqrf(&m,&n,X,&m,tau,work,&lwork,&info); + geqrf(&m2,&n2,X,&m2,tau,work,&lwork,&info); free(work); if (info) @@ -1479,7 +1518,9 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i memcpy(ptr,X,m*p*sizeof(PRECISION)); lwork=-1; - orgqr(&m,&q,&p,ptr,&m,tau,&opt_size,&lwork,&info); + p2 = p; + q2 = q; + orgqr(&m2,&q2,&p2,ptr,&m2,tau,&opt_size,&lwork,&info); if (!(work=(PRECISION*)malloc((lwork=(int)opt_size)*sizeof(PRECISION)))) { @@ -1488,7 +1529,7 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i return MEM_ERR; } - orgqr(&m,&q,&p,ptr,&m,tau,work,&lwork,&info); + orgqr(&m2,&q2,&p2,ptr,&m2,tau,work,&lwork,&info); free(work); if (!qt) @@ -1523,8 +1564,9 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i else { lwork=-1; - - gelqf(&n,&m,X,&n,tau,&opt_size,&lwork,&info); + m2 = m; + n2 = n; + gelqf(&n2,&m2,X,&n2,tau,&opt_size,&lwork,&info); if (!(work=(PRECISION*)malloc((lwork=(int)opt_size)*sizeof(PRECISION)))) { @@ -1532,7 +1574,7 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i return MEM_ERR; } - gelqf(&n,&m,X,&n,tau,work,&lwork,&info); + gelqf(&n2,&m2,X,&n2,tau,work,&lwork,&info); free(work); if (info) @@ -1563,7 +1605,10 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i ptr[--k]=X[--l]; lwork=-1; - orglq(&q,&m,&p,ptr,&q,tau,&opt_size,&lwork,&info); + m2 = m; + p2 = p; + q2 = q; + orglq(&q2,&m2,&p2,ptr,&q2,tau,&opt_size,&lwork,&info); if (!(work=(PRECISION*)malloc((lwork=(int)opt_size)*sizeof(PRECISION)))) { @@ -1572,7 +1617,7 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i return MEM_ERR; } - orglq(&q,&m,&p,ptr,&q,tau,work,&lwork,&info); + orglq(&q2,&m2,&p2,ptr,&q2,tau,work,&lwork,&info); free(work); if (qt) @@ -1666,9 +1711,11 @@ int bQZ_real(PRECISION *Q, PRECISION *Z, PRECISION *S, PRECISION *T, PRECISION * #endif char jobvsl, jobvsr, sort='N'; - int lwork, simd, info, rtrn; + int rtrn; PRECISION *work, size, *palpha_r, *palpha_i, *pbeta; + lapack_int n2, simd, lwork, info; + jobvsl=Q ? 'V' : 'N'; jobvsr=Z ? 'V' : 'N'; palpha_r=alpha_r ? alpha_r : (PRECISION*)malloc(n*sizeof(PRECISION)); @@ -1694,13 +1741,14 @@ int bQZ_real(PRECISION *Q, PRECISION *Z, PRECISION *S, PRECISION *T, PRECISION * if (!bt) bTransposeInPlace(B,n); lwork=-1; - gges(&jobvsl,&jobvsr,&sort,(void*)NULL,&n,S,&n,T,&n,&simd,palpha_r,palpha_i,pbeta,Q,&n,Z,&n,&size,&lwork,(void*)NULL,&info); + n2 = n; + gges(&jobvsl,&jobvsr,&sort,(void*)NULL,&n2,S,&n2,T,&n2,&simd,palpha_r,palpha_i,pbeta,Q,&n2,Z,&n2,&size,&lwork,(void*)NULL,&info); if (!info) if (!(work=malloc((lwork=(int)size)*sizeof(PRECISION)))) rtrn=MEM_ERR; else { - gges(&jobvsl,&jobvsr,&sort,(void*)NULL,&n,S,&n,T,&n,&simd,palpha_r,palpha_i,pbeta,Q,&n,Z,&n,work,&lwork,(void*)NULL,&info); + gges(&jobvsl,&jobvsr,&sort,(void*)NULL,&n2,S,&n2,T,&n2,&simd,palpha_r,palpha_i,pbeta,Q,&n2,Z,&n2,work,&lwork,(void*)NULL,&info); if (!info) { if (Q && !qt) bTransposeInPlace(Q,n); @@ -1784,9 +1832,19 @@ int bReorderQZ_real(int *select, PRECISION *QQ, PRECISION *ZZ, PRECISION *SS, PR #define tgsen dtgsen #endif - int ijob=0, wantq, wantz, lwork, liwork=1, m=n, info, rtrn, iwork; + int wantq, wantz, m=n, rtrn, i; PRECISION size, *palpha_r, *palpha_i, *pbeta, *work; + lapack_int ijob, wantq2, wantz2, *select2, n2, m2, lwork, iwork, liwork, info; + ijob = 0; + liwork=1; + + if(!(select2 = (lapack_int *)calloc(n, sizeof(lapack_int)))) + return MEM_ERR; + + for(i=0; inrows) != lu_dm->nrows) || ( (ncols=x_dm->ncols) != lu_dm->ncols) ) @@ -46,16 +48,21 @@ int lurgen(TSdmatrix *lu_dm, TSivector *pivot_dv, TSdmatrix *x_dm) { lu_dm->flag = M_UT; //To make the lower part of lu_dm available, one must create another matrix and explicitly make it a unit lower matrix. //=== Calling the MKL function. - if (!pivot_dv) { - pivot_p = tzMalloc(mindim, int); - dgetrf_(&nrows, &ncols, LU, &nrows, pivot_p, &errflag); - free(pivot_p); //Frees the memory belonging to this function. - } + nrows2 = nrows; + ncols2 = ncols; + errflag2 = errflag; + pivot_p = tzMalloc(mindim, lapack_int); + if (!pivot_dv) + dgetrf_(&nrows2, &ncols2, LU, &nrows2, pivot_p, &errflag2); else { if ( pivot_dv->n != mindim) fn_DisplayError("Make sure the dimension of the input vector pivot_dv is the minimum number of row number and column number of the input matrix x_dm"); - dgetrf_(&nrows, &ncols, LU, &nrows, pivot_dv->v, &errflag); - } + dgetrf_(&nrows2, &ncols2, LU, &nrows2, pivot_p, &errflag2); + for(i=0; iv[i] = pivot_p[i]; + } + free(pivot_p); //Frees the memory belonging to this function. + errflag = errflag2; return( errflag ); //(1) If errflag = 0, success. (2) If errorflag = -i, the ith parameter has an illegal value. //(3) If errflag = i, u_{ii}=0.0. The factorization is completed, but U is exactly singular. Dividing @@ -85,6 +92,8 @@ int eigrsym(TSdvector *eval_dv, TSdmatrix *eVec_dm, const TSdmatrix *S_dm) double *tmpd0_m = NULL, *work_p = NULL; + lapack_int n2, lwork2, errflag2; + if ( !S_dm || !(S_dm->flag & (M_SU | M_SL)) ) fn_DisplayError(".../mathlib.c/eigrsym(): input matrix (1) must be created (memory-alloacted) and (2) must be symmetric (either M_SU or M_SL)"); if ( !eval_dv ) fn_DisplayError(".../mathlib.c/eigrsym(): input eigenvalue vector must be created (memory-allocated)"); lwork = (n1=_n=S_dm->nrows)*BLOCKSIZE_FOR_INTEL_MKL; @@ -99,7 +108,12 @@ int eigrsym(TSdvector *eval_dv, TSdmatrix *eVec_dm, const TSdmatrix *S_dm) // Obtains eigenvalues and, optionally, eigenvectors. //--------------------------- memcpy(tmpd0_m, S_dm->M, square(_n)*sizeof(double)); - dsyev_( (eVec_dm) ? "V" : "N", (S_dm->flag & M_SU) ? "U" : "L", &n1, tmpd0_m, &n1, eval_dv->v, work_p, &lwork, &errflag); + + n2 = n1; + lwork2 = lwork; + errflag2 = errflag; + dsyev_( (eVec_dm) ? "V" : "N", (S_dm->flag & M_SU) ? "U" : "L", &n2, tmpd0_m, &n2, eval_dv->v, work_p, &lwork2, &errflag2); + errflag = errflag2; if (work_p[0]>lwork) printf("Warning for /mathlib.c/eigrsym(): needs at least %d workspace for good performance " "but lwork is allocated with only %d space!\n", (int)work_p[0], lwork); eval_dv->flag = V_DEF; @@ -154,6 +168,8 @@ int eigrgen(TSdzvector *vals_dzv, TSdzmatrix *rights_dzm, TSdzmatrix *lefts_dzm, *revecr_m=NULL, *reveci_m=NULL, //NULL means that by default we dont' compute eigenvectors. *levecr_m=NULL, *leveci_m=NULL; + lapack_int n2, lwork2, errflag2; + //--------------------------- // Checking dimensions, etc. //--------------------------- @@ -197,8 +213,12 @@ int eigrgen(TSdzvector *vals_dzv, TSdzmatrix *rights_dzm, TSdzmatrix *lefts_dzm, //--------------------------- // Obtains eigenvalues and, optionally, eigenvectors. //--------------------------- - dgeev_( (levecr_m) ? "V" : "N", (revecr_m) ? "V" : "N", &n1, tmpd0_m, &n1, evalr_v, evali_v, - levecr_m, &n1, revecr_m, &n1, work_p, &lwork, &errflag); + n2 = n1; + lwork2 = lwork; + errflag2 = errflag; + dgeev_( (levecr_m) ? "V" : "N", (revecr_m) ? "V" : "N", &n2, tmpd0_m, &n2, evalr_v, evali_v, + levecr_m, &n2, revecr_m, &n2, work_p, &lwork2, &errflag2); + errflag = errflag2; vals_dzv->real->flag = V_DEF; //--------------------------- @@ -258,6 +278,7 @@ int chol(TSdmatrix *D_dm, TSdmatrix *S_dm, const char ul) { int errflag=2, loc, nrows, _m, _i, _j; //errflat=0 implies successful decomposition. But we start with 2 so as to let dpotrf_ export a correct flag. double *D, *S; + lapack_int _m2, errflag2; if ( !D_dm || !S_dm ) fn_DisplayError(".../mathlib.c/chol(): L and R input square matricies must be created (memory allocated)"); else { @@ -293,7 +314,10 @@ int chol(TSdmatrix *D_dm, TSdmatrix *S_dm, const char ul) { printf("\n ------- .../mathlib.c/chol(): R input square matrix must be symmetric!-------\n"); return (-6); } - dpotrf_("U", &_m, D, &_m, &errflag); + _m2 = _m; + errflag2 = errflag; + dpotrf_("U", &_m2, D, &_m2, &errflag2); + errflag = errflag2; break; case 'L': case 'l': if (S_dm->flag & M_SL) { @@ -320,7 +344,10 @@ int chol(TSdmatrix *D_dm, TSdmatrix *S_dm, const char ul) { return (-6); } //??????NOT tested yet. - dpotrf_("L", &_m, D, &_m, &errflag); + _m2 = _m; + errflag2 = errflag; + dpotrf_("L", &_m2, D, &_m2, &errflag2); + errflag = errflag2; break; default: fn_DisplayError(".../mathlib.c/chol(): Input ul must be either 'U' or 'L'"); @@ -328,12 +355,18 @@ int chol(TSdmatrix *D_dm, TSdmatrix *S_dm, const char ul) { } else { if ( (ul=='U' || ul=='u') && (D_dm->flag & M_SU) ) { - dpotrf_("U", &_m, D, &_m, &errflag); + _m2 = _m; + errflag2 = errflag; + dpotrf_("U", &_m2, D, &_m2, &errflag2); + errflag = errflag2; D_dm->flag = M_UT; } else if ( (ul=='L' || ul=='l') && (D_dm->flag & M_SL) ) { //Tested. It works! - dpotrf_("L", &_m, D, &_m, &errflag); + _m2 = _m; + errflag2 = errflag; + dpotrf_("L", &_m2, D, &_m2, &errflag2); + errflag = errflag2; D_dm->flag = M_LT; } else { @@ -370,6 +403,7 @@ int invrtri(TSdmatrix *X_dm, TSdmatrix *A_dm, const char un) int _n, errflag=2; //errflat=0 implies successful decomposition. But we start with 2 so as to let dgetri_ export a correct flag. double *X, *A; + lapack_int _n2, errflag2; if ( !X_dm || !A_dm ) fn_DisplayError(".../mathlib.c/invrtri(): Both input matrices must be created (memory-allocated)"); else if ( !(A_dm->flag & (M_UT | M_LT)) ) fn_DisplayError(".../mathlib.c/invrtri(): (1) R input matrix A must be given legal values; (2) A must be a real triangular matrix, i.e., M_UT or M_LT"); @@ -381,14 +415,17 @@ int invrtri(TSdmatrix *X_dm, TSdmatrix *A_dm, const char un) if ( (_n != A_dm->ncols) || (_n != X_dm->nrows) || (_n != X_dm->ncols) ) fn_DisplayError(".../mathlib.c/invrtri(): both input and output matrices (1) must be square and (2) must have the same dimension"); - + _n2 = _n; + errflag2 = errflag; if (X==A) { - dtrtri_((A_dm->flag & M_UT) ? "U" : "L", (un=='U' || un=='u') ? "U" : "N", &_n, X, &_n, &errflag); + dtrtri_((A_dm->flag & M_UT) ? "U" : "L", (un=='U' || un=='u') ? "U" : "N", &_n2, X, &_n2, &errflag2); + errflag = errflag2; if (errflag) return (errflag); } else { memcpy(X, A, _n*_n*sizeof(double)); - dtrtri_((A_dm->flag & M_UT) ? "U" : "L", (un=='U' || un=='u') ? "U" : "N", &_n, X, &_n, &errflag); + dtrtri_((A_dm->flag & M_UT) ? "U" : "L", (un=='U' || un=='u') ? "U" : "N", &_n2, X, &_n2, &errflag2); + errflag = errflag2; if (errflag) return (errflag); else X_dm->flag = A_dm->flag; } @@ -419,6 +456,7 @@ int invspd(TSdmatrix *X_dm, TSdmatrix *A_dm, const char ul) int _n, errflag=2; //errflat=0 implies successful decomposition. But we start with 2 so as to let dgetri_ export a correct flag. double *X, *A; + lapack_int _n2, errflag2; if ( !X_dm || !A_dm ) fn_DisplayError(".../mathlib.c/invspd(): Both input matrices must be created (memory-allocated)"); else if ( !(A_dm->flag & (M_SU | M_SL)) ) fn_DisplayError(".../mathlib.c/invspd(): (1) R input matrix A must be given legal values; (2) A must be symmetric, positive-definite, i.e., M_SU or M_SL"); @@ -433,10 +471,14 @@ int invspd(TSdmatrix *X_dm, TSdmatrix *A_dm, const char ul) if ( (_n != A_dm->ncols) ) fn_DisplayError(".../mathlib.c/invspd(): input matrix (1) must be square and (2) must have the same dimension"); //=== Choleski decomposition. - dpotrf_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n, X, &_n, &errflag); + _n2 = _n; + errflag2 = errflag; + dpotrf_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n2, X, &_n2, &errflag2); + errflag = errflag2; if (errflag) return (errflag); //=== Takes inverse. - dpotri_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n, X, &_n, &errflag); + dpotri_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n2, X, &_n2, &errflag2); + errflag = errflag2; A_dm->flag = ((ul=='U') || (ul=='u')) ? M_SU : M_SL; return (errflag); //--- @@ -448,10 +490,14 @@ int invspd(TSdmatrix *X_dm, TSdmatrix *A_dm, const char ul) fn_DisplayError(".../mathlib.c/invspd(): both input and output matrices (1) must be square and (2) must have the same dimension"); memcpy(X, A, _n*_n*sizeof(double)); //=== Choleski decomposition. - dpotrf_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n, X, &_n, &errflag); + _n2 = _n; + errflag2 = errflag; + dpotrf_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n2, X, &_n2, &errflag2); + errflag = errflag2; if (errflag) return (errflag); //=== Takes inverse. - dpotri_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n, X, &_n, &errflag); + dpotri_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n2, X, &_n2, &errflag2); + errflag = errflag2; X_dm->flag = ((ul=='U') || (ul=='u')) ? M_SU : M_SL; return (errflag); //--- @@ -478,17 +524,18 @@ int invrgen(TSdmatrix *X_dm, TSdmatrix *A_dm) //Inputs: // A: _n-by-_n real general matrix. int _n, errflag=2, //errflat=0 implies successful decomposition. But we start with 2 so as to let dgetri_ export a correct flag. - lwork, *ipivot; //Used when calling LAPACK. + lwork; //Used when calling LAPACK. double *X, *A, *work; //Used when calling LAPACK. + lapack_int _n2, errflag2, *ipivot, lwork2; if ( !X_dm || !A_dm ) fn_DisplayError(".../mathlib.c/invrgen(): Both input matrices must be created (memory-allocated)"); else if ( !(A_dm->flag & M_GE) ) fn_DisplayError(".../mathlib.c/invrgen(): (1) R input matrix A must be given legal values; (2) A must be a general matrix, i.e., M_GE"); else { X = X_dm->M; A = A_dm->M; - ipivot = tzMalloc((_n=A_dm->nrows), int); + ipivot = tzMalloc((_n=A_dm->nrows), lapack_int); work = tzMalloc((lwork=_n*BLOCKSIZE_FOR_INTEL_MKL), double); } if ( (_n != A_dm->ncols) || (_n != X_dm->nrows) || (_n != X_dm->ncols) ) @@ -496,14 +543,19 @@ int invrgen(TSdmatrix *X_dm, TSdmatrix *A_dm) if (X==A) { - dgetrf_(&_n, &_n, A, &_n, ipivot, &errflag); + _n2 = _n; + errflag2 = errflag; + dgetrf_(&_n2, &_n2, A, &_n2, ipivot, &errflag2); + errflag = errflag2; if (errflag) { // A_dm->flag = M_UNDEF; free(ipivot); free(work); return errflag; } - dgetri_(&_n, A, &_n, ipivot, work, &lwork, &errflag); + lwork2 = lwork; + dgetri_(&_n2, A, &_n2, ipivot, work, &lwork2, &errflag2); + errflag = errflag2; if (work[0]>lwork) printf("Warning for /mathlib.c/invrgen(); when calling MKL dgetri_(), we need at least %d workspace for good performance " "but lwork is allocated with only %d space!\n", (int)work[0], lwork); if (errflag) { @@ -514,14 +566,19 @@ int invrgen(TSdmatrix *X_dm, TSdmatrix *A_dm) } else { memcpy(X, A, _n*_n*sizeof(double)); - dgetrf_(&_n, &_n, X, &_n, ipivot, &errflag); + _n2 = _n; + errflag2 = errflag; + dgetrf_(&_n2, &_n2, X, &_n2, ipivot, &errflag2); + errflag = errflag2; if (errflag) { // X_dm->flag = M_UNDEF; free(ipivot); free(work); return errflag; } - dgetri_(&_n, X, &_n, ipivot, work, &lwork, &errflag); + lwork2 = lwork; + dgetri_(&_n2, X, &_n2, ipivot, work, &lwork2, &errflag2); + errflag = errflag2; if (work[0]>lwork) printf("Warning for /mathlib.c/invrgen(); when calling MKL dgetri_(), we need at least %d workspace for good performance " "but lwork is allocated with only %d space!\n", (int)work[0], lwork); if (errflag) { @@ -565,7 +622,7 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd // _r-by-_m real rectangular (rrect) matrix if / (XA=B). // lr: if lr='\\', left division \ is performed; if lr='/', right division / is performed. - int _m, _n, _r, //mn_max, mn_min, + int _m, _n, _r, i, //mn_max, mn_min, lwork, _i, info = -2, *jpvt_p = NULL; double *A, *B, *X, @@ -574,6 +631,9 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd *tau_p = NULL, *work_p = NULL; + lapack_int _m2, _n2, _r2, lwork2, info2; + lapack_int *jpvt_p2 = NULL; + if (!A_dm || !(A_dm->flag & M_GE) || !B_dm || !(B_dm->flag &M_GE)) fn_DisplayError(".../mathlib.c/BdivA_rrect(): both input matricies A_dm and B_dm must be (a) created (allocated memory) and (b) given legal values for all elements (in other words, the flag M_GE must exist)"); if (!X_dm) fn_DisplayError(".../mathlib.c/BdivA_rrect(): output matrix X_dm must be created (allocated memory)"); if (lr=='/') { @@ -603,6 +663,7 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd // qrb_p = tzMalloc((mn_max = _m>_n?_m:_n)*_r, double); //DDDDebug: seems requiring _m>_n, but this may not be the case. qrb_p = tzMalloc(_m*_r, double); //Note that _m>=_n. jpvt_p = tzMalloc(_n, int); + jpvt_p2 = tzMalloc(_n, lapack_int); tau_p = tzMalloc(_n, double); // work_p = tzMalloc(lwork, double); @@ -622,7 +683,16 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd //=== Computes the QR factorization of a general m by n matrix with column pivoting using Level 3 BLAS. work_p = tzMalloc(lwork=_n*BLOCKSIZE_FOR_INTEL_MKL, double); - dgeqp3_(&_m,&_n,qra_p,&_m,jpvt_p,tau_p,work_p,&lwork,&info); + _m2 = _m; + _n2 = _n; + lwork2 = lwork; + info2 = info; + for(i=0; i<_n; i++) + jpvt_p2[i] = jpvt_p[i]; + dgeqp3_(&_m2,&_n2,qra_p,&_m2,jpvt_p2,tau_p,work_p,&lwork2,&info2); + info = info2; + for(i=0; i<_n; i++) + jpvt_p[i] = jpvt_p2[i]; if (work_p[0]>lwork) printf("Warning for /mathlib.c/BdivA_rrect(); when calling MKL dgeqp3_(), we need at least %d workspace for good performance " "but lwork is allocated with only %d space!\n", (int)work_p[0], lwork); tzDestroy(work_p); @@ -630,7 +700,13 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd //=== Multiplies a real matrix by the orthogonal matrix Q of the QR factorization formed by dgeqp3_. work_p = tzMalloc(lwork=_r*BLOCKSIZE_FOR_INTEL_MKL, double); - dormqr_("L","T",&_m,&_r,&_n,qra_p,&_m,tau_p,qrb_p,&_m,work_p,&lwork,&info); + _m2 = _m; + _n2 = _n; + _r2 = _r; + lwork2 = lwork; + info2 = info; + dormqr_("L","T",&_m2,&_r2,&_n2,qra_p,&_m2,tau_p,qrb_p,&_m2,work_p,&lwork2,&info2); + info = info2; if (work_p[0]>lwork) printf("Warning for /mathlib.c/BdivA_rrect(); when calling MKL dormqr_(), we need at least %d workspace for good performance " "but lwork is allocated with only %d space!\n", (int)work_p[0], lwork); //dormqr_("L","T",&_m,&_r,&mn_min,qra_p,&_m,tau_p,qrb_p,&mn_max,work_p,&lwork,&info); @@ -653,6 +729,7 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd tzDestroy(qra_p); tzDestroy(qrb_p); tzDestroy(jpvt_p); + tzDestroy(jpvt_p2); tzDestroy(tau_p); // tzDestroy(work_p); @@ -683,8 +760,7 @@ int BdivA_rgens(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd // lr: if lr='\\', left division \ is performed; if lr='/', right division / is performed. int _m, _r, m2, - _i, info = -2, - *ipiv_p = NULL; + _i, info = -2; double *A, *B, *X, *Atran_p = NULL, //Transpose of A if right division / takes place. *Btran_p = NULL, //Transpose of B if right division / takes place. @@ -692,6 +768,10 @@ int BdivA_rgens(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd // *tau_p = NULL, // *work_p = NULL; + + lapack_int _mlap, _rlap, info2, *ipiv_p = NULL; + + if (!A_dm || !(A_dm->flag & M_GE) || !B_dm || !(B_dm->flag & M_GE)) fn_DisplayError(".../mathlib.c/BdivA_rgens(): both input matricies A_dm and B_dm must be (a) created (allocated memory) and (b) given legal values for all elements (in other words, the flag M_GE must exist)"); if (!X_dm) fn_DisplayError(".../mathlib.c/BdivA_rgens(): output matrix X_dm must be created (allocated memory)"); if (lr=='/') { @@ -717,7 +797,7 @@ int BdivA_rgens(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd if (lr=='/') { //Right divistion /. //=== Memory allocation for this function only. - ipiv_p = tzMalloc(_m, int); + ipiv_p = tzMalloc(_m, lapack_int); Atran_p = tzMalloc(square(_m), double); Btran_p = tzMalloc(_m*_r, double); @@ -725,7 +805,11 @@ int BdivA_rgens(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd cblas_dcopy(_m, A+_i*_m, 1, Atran_p+_i, _m); //Copying the transpose of A to Atran. cblas_dcopy(_r, B+_i*_r, 1, Btran_p+_i, _m); //Copying the transpose of B (_r-by-_m) to Btran (_m-by-_r); } - dgesv_(&_m, &_r, Atran_p, &_m, ipiv_p, Btran_p, &_m, &info); + _mlap = _m; + _rlap = _r; + info2 = info; + dgesv_(&_mlap, &_rlap, Atran_p, &_mlap, ipiv_p, Btran_p, &_mlap, &info2); + info = info2; for (_i=0; _i<_r; _i++) cblas_dcopy(_m, Btran_p+_i*_m, 1, X+_i, _r); //Copying the transpose of Btran(_m-by-_r) to X (_r-by-_m); X_dm->flag = M_GE; @@ -739,12 +823,16 @@ int BdivA_rgens(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd else { //Left division \. //=== Memory allocation for this function only. - ipiv_p = tzMalloc(_m, int); + ipiv_p = tzMalloc(_m, lapack_int); W = tzMalloc(m2=square(_m), double); memcpy(X, B, _m*_r*sizeof(double)); memcpy(W, A, m2*sizeof(double)); - dgesv_(&_m, &_r, W, &_m, ipiv_p, X, &_m, &info); + _mlap = _m; + _rlap = _r; + info2 = info; + dgesv_(&_mlap, &_rlap, W, &_mlap, ipiv_p, X, &_mlap, &info2); + info = info2; X_dm->flag = M_GE; //=== Destroyes memory allocated for this function only. @@ -778,12 +866,13 @@ int bdivA_rgens(TSdvector *x_dv, const TSdvector *b_dv, const char lr, const TSd int _m, m2, _r = 1, - _i, info = -2, - *ipiv_p = NULL; + _i, info = -2; double *A, *b, *x, *Atran_p = NULL, //Transpose of A if right division / takes place. *W = NULL; //Duplicate copy of A when left division \ is used. This will be replaced by LU decomposition. + lapack_int _mlap, _rlap, info2, *ipiv_p = NULL; + if (!A_dm || !(A_dm->flag & M_GE) || !b_dv || !b_dv->flag) fn_DisplayError("mathlib.c/bdivA_rgens(): Both A_dm and b_dv must be (a) created (allocated memory) and (b) given legal values for all elements (in other words, the flag M_GE must exist)"); if (!x_dv) fn_DisplayError("mathlib.c/bdivA_rgens(): output vector x_dv must be created (allocated memory)"); if ( b_dv->n != x_dv->n ) fn_DisplayError("mathlib.c/bdivA_rgens(): The dim of b_dv and the dim of x_dv must exactly match"); @@ -797,13 +886,17 @@ int bdivA_rgens(TSdvector *x_dv, const TSdvector *b_dv, const char lr, const TSd if (lr=='/') { //Right divistion /. //=== Memory allocation for this function only. - ipiv_p = tzMalloc(_m, int); + ipiv_p = tzMalloc(_m, lapack_int); Atran_p = tzMalloc(square(_m), double); for (_i=0; _i<_m; _i++) cblas_dcopy(_m, A+_i*_m, 1, Atran_p+_i, _m); //Copying the transpose of A to Atran. if (x_dv != b_dv ) memcpy(x, b, _m*sizeof(double)); - dgesv_(&_m, &_r, Atran_p, &_m, ipiv_p, x, &_m, &info); + _mlap = _m; + _rlap = _r; + info2 = info; + dgesv_(&_mlap, &_rlap, Atran_p, &_mlap, ipiv_p, x, &_mlap, &info2); + info = info2; x_dv->flag = V_DEF; //=== Destroyes memory allocated for this function only. @@ -815,12 +908,16 @@ int bdivA_rgens(TSdvector *x_dv, const TSdvector *b_dv, const char lr, const TSd else { //Left division \. //=== Memory allocation for this function only. - ipiv_p = tzMalloc(_m, int); + ipiv_p = tzMalloc(_m, lapack_int); W = tzMalloc(m2=square(_m), double); if (x_dv != b_dv ) memcpy(x, b, _m*sizeof(double)); memcpy(W, A, m2*sizeof(double)); - dgesv_(&_m, &_r, W, &_m, ipiv_p, x, &_m, &info); + _mlap = _m; + _rlap = _r; + info2 = info; + dgesv_(&_mlap, &_rlap, W, &_mlap, ipiv_p, x, &_mlap, &info2); + info = info2; x_dv->flag = V_DEF; //=== Destroyes memory allocated for this function only. @@ -859,6 +956,8 @@ void Aldivb_spd(TSdvector *x_dv, TSdmatrix *A_dm, TSdvector *b_dv, char an) { int errflag=2, nrows, nels; //errflat=0 implies successful decomposition. But we start with 2 so as to let dpotrf_ export a correct flag. double *A, *W=NULL, *x, *b; + lapack_int nrows2, errflag2; + if ( !A_dm || !b_dv || !x_dv ) fn_DisplayError(".../mathlib.c/Aldivb_spd(): All input matrices or vectors must be created (memory allocated)"); nrows = A_dm->nrows; nels = square(nrows); @@ -876,7 +975,10 @@ void Aldivb_spd(TSdvector *x_dv, TSdmatrix *A_dm, TSdvector *b_dv, char an) { if (A_dm->flag & M_SU) { - dpotrf_("U", &nrows, W, &nrows, &errflag); //Choleski. U'*U = W where W will be replaced by upper triangular U. + nrows2 = nrows; + errflag2 = errflag; + dpotrf_("U", &nrows2, W, &nrows2, &errflag2); //Choleski. U'*U = W where W will be replaced by upper triangular U. + errflag = errflag2; if (errflag) fn_DisplayError(".../mathlib.c/Aldivb_spd(): Error when calling Choleski dpotrf_(). Check if the L input matrix A_dm is positive definite or has legal values"); if (x==b) { //=== Solving for A*x=b. @@ -892,7 +994,10 @@ void Aldivb_spd(TSdvector *x_dv, TSdmatrix *A_dm, TSdvector *b_dv, char an) { if ( (an!='N') && (an!='n') ) A_dm->flag = M_UT; } else if (A_dm->flag & M_SL) { //?????????? Not tested yet. - dpotrf_("L", &nrows, W, &nrows, &errflag); //Choleski. L*L' = W where W will be replaced by lower triangular L. + nrows2 = nrows; + errflag2 = errflag; + dpotrf_("L", &nrows2, W, &nrows2, &errflag2); //Choleski. L*L' = W where W will be replaced by lower triangular L. + errflag = errflag2; if (errflag) fn_DisplayError(".../mathlib.c/Aldivb_spd(): Error when calling Choleski dpotrf_(). Check if the L input matrix A_dm is positive definite or has legal values"); if (x==b) { //=== Solving for A*x=b. @@ -1053,12 +1158,18 @@ int eigrsym_decomp(double *eval_v, double *evec_m, const double *s_m, const int double *tmpd0_m=tzMalloc(square(_n), double), *work_p=tzMalloc(lwork, double); + lapack_int n2, lwork2, errflag2; //--------------------------- // Obtains eigenvalues and, optionally, eigenvectors. //--------------------------- memcpy(tmpd0_m, s_m, square(_n)*sizeof(double)); - dsyev_( (evec_m) ? "V" : "N", ((ul=='u') || (ul=='U')) ? "U" : "L", &n1, tmpd0_m, &n1, eval_v, work_p, &lwork, &errflag); + + n2 = n1; + lwork2 = lwork; + errflag2 = errflag; + dsyev_( (evec_m) ? "V" : "N", ((ul=='u') || (ul=='U')) ? "U" : "L", &n2, tmpd0_m, &n2, eval_v, work_p, &lwork2, &errflag2); + errflag = errflag2; if (evec_m) memcpy(evec_m, tmpd0_m, square(_n)*sizeof(double)); @@ -1096,6 +1207,8 @@ int eigrgen_decomp(double *evalr_v, double *evali_v, double *revecr_m, double *r double *tmpd0_m=tzMalloc(square(_n), double), //@@Must be freed in this function.@@ *work_p=tzMalloc(lwork, double); //@@Must be freed in this function.@@ + lapack_int n2, lwork2, errflag2; + //--------------------------- // Starts with x_m -- the matrix to be decomposed. //--------------------------- @@ -1104,8 +1217,12 @@ int eigrgen_decomp(double *evalr_v, double *evali_v, double *revecr_m, double *r //--------------------------- // Obtains eigenvalues and, optionally, eigenvectors. //--------------------------- - dgeev_( (levecr_m) ? "V" : "N", (revecr_m) ? "V" : "N", &n1, tmpd0_m, &n1, evalr_v, evali_v, - levecr_m, &n1, revecr_m, &n1, work_p, &lwork, &errflag); + n2 = n1; + lwork2 = lwork; + errflag2 = errflag; + dgeev_( (levecr_m) ? "V" : "N", (revecr_m) ? "V" : "N", &n2, tmpd0_m, &n2, evalr_v, evali_v, + levecr_m, &n2, revecr_m, &n2, work_p, &lwork2, &errflag2); + errflag = errflag2; //--------------------------- // Frees the allocated memory. @@ -1164,6 +1281,8 @@ int chol_decomp(double *D, const double *s_m, const int _n, const char ul) { #ifdef INTELCMATHLIBRARY //Intel MKL Lapack dependent code. int errflag=2, _m=_n, _i, _j, tmpi0; //errflat=0 implies successful decomposition. But we start with 2 so as to let dpotrf_ export a correct flag. + lapack_int _m2, errflag2; + //=== Fills the triangular part that is used for Choleski decomposition. switch (ul) { @@ -1188,7 +1307,10 @@ int chol_decomp(double *D, const double *s_m, const int _n, const char ul) { return (-1); } //=== Choleski decomposition. - dpotrf_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m, D, &_m, &errflag); + _m2 = _m; + errflag2 = errflag; + dpotrf_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m2, D, &_m2, &errflag2); + errflag = errflag2; //--- // if (errflag<0) fn_DisplayError("Some element has an illegal value"); // else if (errflag>0) fn_DisplayError("The leadding minor of some order, hence the entire matrix, is not positive definite"); @@ -1227,6 +1349,8 @@ int inv_spd(double *D, const double *s_m, const int _n, const char ul) { int errflag=2, _m=_n, _i, _j, tmpi0; //errflat=0 implies successful decomposition. But we start with 2 so as to let dpotrf_ export a correct flag. + lapack_int _m2, errflag2; + //=== Fills the triangular part that is used for Choleski decomposition. switch (ul) { case 'u': case 'U': @@ -1251,10 +1375,14 @@ int inv_spd(double *D, const double *s_m, const int _n, const char ul) { return (-1); } //=== Choleski decomposition. - dpotrf_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m, D, &_m, &errflag); + _m2 = _m; + errflag2 = errflag; + dpotrf_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m2, D, &_m2, &errflag2); + errflag = errflag2; if (errflag) return (errflag); //=== Takes inverse. - dpotri_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m, D, &_m, &errflag); + dpotri_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m2, D, &_m2, &errflag2); + errflag = errflag2; return (errflag); //--- // if (errflag<0) fn_DisplayError("Some element has an illegal value"); @@ -2891,7 +3019,7 @@ void ScalarTimesColofMatrix(TSdvector *y_dv, double _alpha, TSdmatrix *X_dm, int y_dv->flag = V_DEF; } #else - Need to be tested for the following. + // Need to be tested for the following. // // M = X_dm->M + (_j+1)*(nrows=X_dm->nrows) - 1; //Points to the end of the jth column. // if (!y_dv) @@ -3896,7 +4024,7 @@ void CopySubrowmatrix(TSdmatrix *x1_dm, const int br1, const int bc1, TSdmatrix else fn_DisplayError(".../mathlib.c/CopySubrowmatrix(): the submatrix of x2_dm must be within the range of itself as well as x1_dm"); } #else - Havent got time to code up the default using Linux BLAS. +// Havent got time to code up the default using Linux BLAS. #endif @@ -3930,7 +4058,7 @@ void CopySubmatrix2rowmatrix(TSdmatrix *x1_dm, const int br1, const int bc1, TSd else fn_DisplayError(".../mathlib.c/CopySubmatrix2rowmatrix(): the submatrix of x2_dm must be within the range of x2_dm and its transpose must be within the range of x1_dm"); } #else - Havent got time to code up the default using Linux BLAS. +// Havent got time to code up the default using Linux BLAS. #endif @@ -3965,7 +4093,7 @@ void CopySubrowmatrix2matrix(TSdmatrix *x1_dm, const int br1, const int bc1, TSd else fn_DisplayError(".../mathlib.c/CopySubrowmatrix2matrix(): the submatrix of x2_dm must be within the range of itself as well as x1_dm"); } #else - Havent got time to code up the default using Linux BLAS. +// Havent got time to code up the default using Linux BLAS. #endif @@ -4208,7 +4336,7 @@ void CopySubvector2rowmatrix(TSdmatrix *x1_dm, const int br, const int bc, const else fn_DisplayError(".../mathlib.c/CopySubvector2rowmatrix(): Copying (copied) elements are outside the (row) dimension of the copying vector x2_dv (the copied matrix x1_dm)"); } #else - Havent got time to code up the default using Linux BLAS. +// Havent got time to code up the default using Linux BLAS. #endif diff --git a/mex/sources/dynblas.h b/mex/sources/dynblas.h index aea257b6c..860eecc18 100644 --- a/mex/sources/dynblas.h +++ b/mex/sources/dynblas.h @@ -54,7 +54,9 @@ extern "C" { typedef const char *BLCHAR; typedef const blas_int *CONST_BLINT; typedef const double *CONST_BLDOU; + typedef const float *CONST_BLFLT; typedef double *BLDOU; + typedef float *BLFLT; #define dgemm FORTRAN_WRAPPER(dgemm) void dgemm(BLCHAR transa, BLCHAR transb, CONST_BLINT m, CONST_BLINT n, @@ -62,6 +64,12 @@ extern "C" { CONST_BLDOU b, CONST_BLINT ldb, CONST_BLDOU beta, BLDOU c, CONST_BLINT ldc); +#define sgemm FORTRAN_WRAPPER(sgemm) + void sgemm(BLCHAR transa, BLCHAR transb, CONST_BLINT m, CONST_BLINT n, + CONST_BLINT k, CONST_BLFLT alpha, CONST_BLFLT a, CONST_BLINT lda, + CONST_BLFLT b, CONST_BLINT ldb, CONST_BLFLT beta, + BLFLT c, CONST_BLINT ldc); + #define dsymm FORTRAN_WRAPPER(dsymm) void dsymm(BLCHAR side, BLCHAR uplo, CONST_BLINT m, CONST_BLINT n, CONST_BLDOU alpha, CONST_BLDOU a, CONST_BLINT lda, @@ -85,6 +93,10 @@ extern "C" { void daxpy(CONST_BLINT n, CONST_BLDOU a, CONST_BLDOU x, CONST_BLINT incx, BLDOU y, CONST_BLINT incy); +#define saxpy FORTRAN_WRAPPER(saxpy) + void saxpy(CONST_BLINT n, CONST_BLFLT a, CONST_BLFLT x, CONST_BLINT incx, + BLFLT y, CONST_BLINT incy); + #define dcopy FORTRAN_WRAPPER(dcopy) void dcopy(CONST_BLINT n, CONST_BLDOU x, CONST_BLINT incx, BLDOU y, CONST_BLINT incy); @@ -96,6 +108,9 @@ extern "C" { #define dscal FORTRAN_WRAPPER(dscal) void dscal(CONST_BLINT n, CONST_BLDOU a, BLDOU x, CONST_BLINT incx); +#define sscal FORTRAN_WRAPPER(sscal) + void sscal(CONST_BLINT n, CONST_BLDOU a, BLFLT x, CONST_BLINT incx); + #define dtrsm FORTRAN_WRAPPER(dtrsm) void dtrsm(BLCHAR side, BLCHAR uplo, BLCHAR transa, BLCHAR diag, CONST_BLINT m, CONST_BLINT n, CONST_BLDOU alpha, CONST_BLDOU a, CONST_BLINT lda, diff --git a/mex/sources/dynlapack.h b/mex/sources/dynlapack.h index 6d782c941..6ea7215f2 100644 --- a/mex/sources/dynlapack.h +++ b/mex/sources/dynlapack.h @@ -57,6 +57,10 @@ extern "C" { typedef const double *CONST_LADOU; typedef double *LADOU; typedef lapack_int (*DGGESCRIT)(const double *, const double *, const double *); + typedef lapack_int (*SGGESCRIT)(const float *, const float *, const float *); + typedef float *LAFLT; + typedef const float *CONST_LAFLT; + typedef lapack_int *CONST_LALOG; //logical #define dgetrs FORTRAN_WRAPPER(dgetrs) void dgetrs(LACHAR trans, CONST_LAINT n, CONST_LAINT nrhs, CONST_LADOU a, CONST_LAINT lda, CONST_LAINT ipiv, @@ -66,6 +70,14 @@ extern "C" { void dgetrf(CONST_LAINT m, CONST_LAINT n, LADOU a, CONST_LAINT lda, LAINT ipiv, LAINT info); +#define dgetri FORTRAN_WRAPPER(dgetri) + void dgetri(CONST_LAINT n, LADOU a, CONST_LAINT lda, CONST_LAINT ipiv, LADOU work, + CONST_LAINT lwork, LAINT info); + +#define sgetrf FORTRAN_WRAPPER(sgetrf) + void sgetrf(CONST_LAINT m, CONST_LAINT n, LAFLT a, + CONST_LAINT lda, LAINT ipiv, LAINT info); + #define dgees FORTRAN_WRAPPER(dgees) void dgees(LACHAR jobvs, LACHAR sort, const void *select, CONST_LAINT n, LADOU a, CONST_LAINT lda, LAINT sdim, @@ -92,6 +104,14 @@ extern "C" { void dpotrf(LACHAR uplo, CONST_LAINT n, LADOU a, CONST_LAINT lda, LAINT info); +#define dpotri FORTRAN_WRAPPER(dpotri) + void dpotri(LACHAR uplo, CONST_LAINT n, LADOU a, CONST_LAINT lda, + LAINT info); + +#define dtrtri FORTRAN_WRAPPER(dtrtri) + void dtrtri(LACHAR uplo, LACHAR diag, CONST_LAINT n, LADOU a, CONST_LAINT lda, + LAINT info); + #define dgges FORTRAN_WRAPPER(dgges) void dgges(LACHAR jobvsl, LACHAR jobvsr, LACHAR sort, DGGESCRIT delztg, CONST_LAINT n, LADOU a, CONST_LAINT lda, LADOU b, CONST_LAINT ldb, @@ -99,6 +119,13 @@ extern "C" { LADOU vsl, CONST_LAINT ldvsl, LADOU vsr, CONST_LAINT ldvsr, LADOU work, CONST_LAINT lwork, LAINT bwork, LAINT info); +#define sgges FORTRAN_WRAPPER(sgges) + void sgges(LACHAR jobvsl, LACHAR jobvsr, LACHAR sort, SGGESCRIT delztg, + CONST_LAINT n, LAFLT a, CONST_LAINT lda, LAFLT b, CONST_LAINT ldb, + LAINT sdim, LAFLT alphar, LAFLT alphai, LAFLT beta, + LAFLT vsl, CONST_LAINT ldvsl, LAFLT vsr, CONST_LAINT ldvsr, + LAFLT work, CONST_LAINT lwork, LAINT bwork, LAINT info); + #define dsyev FORTRAN_WRAPPER(dsyev) void dsyev(LACHAR jobz, LACHAR uplo, CONST_LAINT n, LADOU a, CONST_LAINT lda, LADOU w, LADOU work, CONST_LAINT lwork, LAINT info); @@ -114,11 +141,96 @@ extern "C" { void dgeqrf(CONST_LAINT m, CONST_LAINT n, LADOU a, CONST_LAINT lda, LADOU tau, LADOU work, CONST_LAINT lwork, LAINT info); +#define sgeqrf FORTRAN_WRAPPER(sgeqrf) + void sgeqrf(CONST_LAINT m, CONST_LAINT n, LAFLT a, CONST_LAINT lda, + LAFLT tau, LAFLT work, CONST_LAINT lwork, LAINT info); + #define dormqr FORTRAN_WRAPPER(dormqr) void dormqr(LACHAR side, LACHAR trans, CONST_LAINT m, CONST_LAINT n, CONST_LAINT k, CONST_LADOU a, CONST_LAINT lda, CONST_LADOU tau, LADOU c, CONST_LAINT ldc, LADOU work, CONST_LAINT lwork, LAINT info); +#define dorgqr FORTRAN_WRAPPER(dorgqr) + void dorgqr(CONST_LAINT m, CONST_LAINT n, CONST_LAINT k, LADOU a, CONST_LAINT lda, + CONST_LADOU tau, LADOU work, CONST_LAINT lwork, LAINT info); + +#define sorgqr FORTRAN_WRAPPER(sorgqr) + void sorgqr(CONST_LAINT m, CONST_LAINT n, CONST_LAINT k, LAFLT a, CONST_LAINT lda, + CONST_LAFLT tau, LAFLT work, CONST_LAINT lwork, LAINT info); + +#define dgelqf FORTRAN_WRAPPER(dgelqf) + void dgelqf(CONST_LAINT m, CONST_LAINT n, LADOU a, CONST_LAINT lda, LADOU tau, + LADOU work, CONST_LAINT lwork, LAINT info); + +#define sgelqf FORTRAN_WRAPPER(sgelqf) + void sgelqf(CONST_LAINT m, CONST_LAINT n, LAFLT a, CONST_LAINT lda, LAFLT tau, + LAFLT work, CONST_LAINT lwork, LAINT info); + +#define dorglq FORTRAN_WRAPPER(dorglq) + void dorglq(CONST_LAINT m, CONST_LAINT n, CONST_LAINT k, LADOU a, CONST_LAINT lda, + CONST_LADOU tau, LADOU work, CONST_LAINT lwork, LAINT info); + +#define sorglq FORTRAN_WRAPPER(sorglq) + void sorglq(CONST_LAINT m, CONST_LAINT n, CONST_LAINT k, LAFLT a, CONST_LAINT lda, + CONST_LAFLT tau, LAFLT work, CONST_LAINT lwork, LAINT info); + +#define dgesv FORTRAN_WRAPPER(dgesv) + void dgesv(CONST_LAINT n, CONST_LAINT nrhs, LADOU a, CONST_LAINT lda, LAINT ipiv, + LADOU b, CONST_LAINT ldb, LAINT info); + +#define dgesvd FORTRAN_WRAPPER(dgesvd) + void dgesvd(LACHAR jobu, LACHAR jobvt, CONST_LAINT m, CONST_LAINT n, LADOU a, CONST_LAINT lda, + LADOU s, LADOU u, CONST_LAINT ldu, LADOU vt, CONST_LAINT ldvt, LADOU work, + CONST_LAINT lwork, LAINT info); + +#define sgesvd FORTRAN_WRAPPER(sgesvd) + void sgesvd(LACHAR jobu, LACHAR jobvt, CONST_LAINT m, CONST_LAINT n, LAFLT a, CONST_LAINT lda, + LAFLT s, LAFLT u, CONST_LAINT ldu, LAFLT vt, CONST_LAINT ldvt, LAFLT work, + CONST_LAINT lwork, LAINT info); + +#define dtgsen FORTRAN_WRAPPER(dtgsen) + void dtgsen(CONST_LAINT ijob, CONST_LALOG wantq, CONST_LALOG wantz, CONST_LALOG select, + CONST_LAINT n, LADOU a, CONST_LAINT lda, LADOU b, CONST_LAINT ldb, + LADOU alphar, LADOU alphai, LADOU beta, LADOU q, CONST_LAINT ldq, + LADOU z, CONST_LAINT ldz, LAINT m, LADOU pl, LADOU pr, LADOU dif, LADOU work, + CONST_LAINT lwork, LAINT iwork, CONST_LAINT liwork, LAINT info); + +#define stgsen FORTRAN_WRAPPER(stgsen) + void stgsen(CONST_LAINT ijob, CONST_LALOG wantq, CONST_LALOG wantz, CONST_LALOG select, + CONST_LAINT n, LAFLT a, CONST_LAINT lda, LAFLT b, CONST_LAINT ldb, + LAFLT alphar, LAFLT alphai, LAFLT beta, LAFLT q, CONST_LAINT ldq, + LAFLT z, CONST_LAINT ldz, LAINT m, LAFLT pl, LAFLT pr, LAFLT dif, LAFLT work, + CONST_LAINT lwork, LAINT iwork, CONST_LAINT liwork, LAINT info); + +#define dtgexc FORTRAN_WRAPPER(dtgexc) + void dtgexc(CONST_LALOG wantq, CONST_LALOG wantz, CONST_LAINT n, LADOU a, CONST_LAINT lda, + LADOU b, CONST_LAINT ldb, LADOU q, CONST_LAINT ldq, LADOU z, CONST_LAINT ldz, + LAINT ifst, LAINT ilst, LADOU work, CONST_LAINT lwork, LAINT info); + +#define stgexc FORTRAN_WRAPPER(stgexc) + void stgexc(CONST_LALOG wantq, CONST_LALOG wantz, CONST_LAINT n, LAFLT a, CONST_LAINT lda, + LAFLT b, CONST_LAINT ldb, LAFLT q, CONST_LAINT ldq, LAFLT z, CONST_LAINT ldz, + LAINT ifst, LAINT ilst, LAFLT work, CONST_LAINT lwork, LAINT info); + +#define dgesdd FORTRAN_WRAPPER(dgesdd) + void dgesdd(LACHAR jobz, CONST_LAINT m, CONST_LAINT n, LADOU a, CONST_LAINT lda, + LADOU s, LADOU u, CONST_LAINT ldu, LADOU vt, CONST_LAINT ldvt, LADOU work, + CONST_LAINT lwork, LAINT iwork, LAINT info); + +#define sgesdd FORTRAN_WRAPPER(sgesdd) + void sgesdd(LACHAR jobz, CONST_LAINT m, CONST_LAINT n, LAFLT a, CONST_LAINT lda, + LAFLT s, LAFLT u, CONST_LAINT ldu, LAFLT vt, CONST_LAINT ldvt, LAFLT work, + CONST_LAINT lwork, LAINT iwork, LAINT info); + +#define dgeev FORTRAN_WRAPPER(dgeev) + void dgeev(LACHAR jobvl, LACHAR jobvr, CONST_LAINT n, LADOU a, CONST_LAINT lda, + LADOU wr, LADOU wi, LADOU vl, CONST_LAINT ldvl, LADOU vr, CONST_LAINT ldvr, + LADOU work, CONST_LAINT lwork, LAINT info); + +#define dgeqp3 FORTRAN_WRAPPER(dgeqp3) + void dgeqp3(CONST_LAINT m, CONST_LAINT n, LADOU a, CONST_LAINT lda, LAINT jpvt, LADOU tau, + LADOU work, CONST_LAINT lwork, LAINT info); + #ifdef __cplusplus } /* extern "C" */ #endif