make blas and lapack functions work with matlab

time-shift
Houtan Bastani 2010-04-29 16:05:34 +02:00
parent 14a081c0f5
commit 1bc7bf7fbb
5 changed files with 453 additions and 96 deletions

View File

@ -1,7 +1,16 @@
#if defined(MATLAB_MEX_FILE) || defined(OCTAVE_MEX_FILE)
#include <dynmex.h>
#include <dynblas.h>
#include <dynlapack.h>
#else
#ifndef __BLAS_LAPACK__
#define __BLAS_LAPACK__
typedef int lapack_int;
typedef int blas_int;
#ifdef __cplusplus
extern "C"
{
@ -41,6 +50,11 @@ extern "C"
#define dgeev dgeev_
#define dpotrf dpotrf_
#define dpotri dpotri_
#define dtrtri dtrtri_
#define dgetri dgetri_
#define dgeqp3 dgeqp3_
#define dormqr dormqr_
#define dgesv dgesv_
/*******************************************************************************/
@ -81,6 +95,11 @@ void dsyev(char*,char*,int*,double*,int*,double*,double*,int*,int*);
void dgeev(char*,char*,int*,double*,int*,double*,double*,double*,int*,double*,int*,double*,int*,int*);
void dpotrf(char*,int*,double*,int*,int*);
void dpotri(char*,int*,double*,int*,int*);
void dgeqp3(int*,int*,double*,int*,int*,double*,double*,int*,int*);
void dtrtri(char*,char*,int*,double*,int*,int*);
void dgetri(int*,double*,int*,int*,double*,int*,int*);
void dormqr(char*,char*,int*,int*,int*,double*,int*,double*,double*,int*,double*,int*,int*);
void dgesv(int*,int*,double*,int*,int*,double*,int*,int*);
/*******************************************************************************/
#ifdef __cplusplus
@ -88,3 +107,5 @@ void dpotri(char*,int*,double*,int*,int*);
#endif
#endif
#endif

View File

@ -207,11 +207,12 @@ int bSubtract(PRECISION *x, PRECISION *y, PRECISION *z, int n)
*/
int bLinearUpdateScalar(PRECISION *x, PRECISION *y, PRECISION a, int m)
{
int inc=1;
blas_int inc = 1;
blas_int m2 = m;
#if (PRECISION_SIZE == 4)
saxpy(&m,&a,y,&inc,x,&inc);
saxpy(&m2,&a,y,&inc,x,&inc);
#else
daxpy(&m,&a,y,&inc,x,&inc);
daxpy(&m2,&a,y,&inc,x,&inc);
#endif
return NO_ERR;
}
@ -392,33 +393,38 @@ int bMultiply(PRECISION *x, PRECISION *y, PRECISION s, int n)
int bMatrixMultiply(PRECISION *x, PRECISION *y, PRECISION *z, int m, int n, int p, int xt, int yt, int zt)
{
char transy, transz;
int dy, dz;
blas_int dy, dz;
PRECISION beta=0.0, alpha=1.0;
blas_int m2 = m;
blas_int n2 = n;
blas_int p2 = p;
#if PRECISION_SIZE == 4
if (xt)
{
if (yt) {transy='N'; dy=m;} else {transy='T'; dy=p;}
if (zt) {transz='N'; dz=p;} else {transz='T'; dz=n;}
sgemm(&transy,&transz,&m,&n,&p,&alpha,y,&dy,z,&dz,&beta,x,&m);
sgemm(&transy,&transz,&m2,&n2,&p2,&alpha,y,&dy,z,&dz,&beta,x,&m2);
}
else
{
if (yt) {transy='T'; dy=m;} else {transy='N'; dy=p;}
if (zt) {transz='T'; dz=p;} else {transz='N'; dz=n;}
sgemm(&transz,&transy,&n,&m,&p,&alpha,z,&dz,y,&dy,&beta,x,&n);
sgemm(&transz,&transy,&n2,&m2,&p2,&alpha,z,&dz,y,&dy,&beta,x,&n2);
}
#else
if (xt)
{
if (yt) {transy='N'; dy=m;} else {transy='T'; dy=p;}
if (zt) {transz='N'; dz=p;} else {transz='T'; dz=n;}
dgemm(&transy,&transz,&m,&n,&p,&alpha,y,&dy,z,&dz,&beta,x,&m);
dgemm(&transy,&transz,&m2,&n2,&p2,&alpha,y,&dy,z,&dz,&beta,x,&m2);
}
else
{
if (yt) {transy='T'; dy=m;} else {transy='N'; dy=p;}
if (zt) {transz='T'; dz=p;} else {transz='N'; dz=n;}
dgemm(&transz,&transy,&n,&m,&p,&alpha,z,&dz,y,&dy,&beta,x,&n);
dgemm(&transz,&transy,&n2,&m2,&p2,&alpha,z,&dz,y,&dy,&beta,x,&n2);
}
#endif
return NO_ERR;
@ -499,21 +505,40 @@ int bLU(int *p, PRECISION *x, int m, int n, int xt)
#endif
PRECISION *y;
int i, info;
int i;
lapack_int m2 = m;
lapack_int n2 = n;
lapack_int info;
lapack_int *p2;
int minmn = (m < n) ? m : n;
if(!(p2 = (lapack_int *)calloc(minmn, sizeof(lapack_int))))
return MEM_ERR;
for(i=0; i<minmn; i++)
p2[i] = p[i];
if (xt)
{
getrf(&m,&n,x,&m,p,&info);
getrf(&m2,&n2,x,&m2,p2,&info);
}
else
{
if (!( y=(PRECISION*)malloc(m*n*sizeof(PRECISION)))) return MEM_ERR;
bTranspose(y,x,m,n,0);
getrf(&m,&n,y,&m,p,&info);
getrf(&m2,&n2,y,&m2,p2,&info);
bTranspose(x,y,m,n,1);
free(y);
}
for(i=0; i<minmn; i++)
p[i] = p2[i];
free(p2);
for (i=(m < n) ? m-1 : n-1; i >= 0; i--) p[i]--;
return (info < 0) ? SING_ERR : NO_ERR;
@ -930,9 +955,11 @@ int bSVD_new(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int
#endif
char jobu, jobv;
int qu, qv, k, info, err=NO_ERR;
int qu, qv, err=NO_ERR;
PRECISION *A_, *U_, *V_, *work, opt_size;
lapack_int k, m2, n2, qv2, info;
if (!(A_=(PRECISION*)malloc(m*n*sizeof(PRECISION)))) return MEM_ERR;
if (at)
memcpy(A_,A,m*n*sizeof(PRECISION));
@ -998,7 +1025,10 @@ int bSVD_new(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int
// compute singular value decomposition
k=-1;
gesvd(&jobu,&jobv,&m,&n,A_,&m,d,U_,&m,V_,&qv,&opt_size,&k,&info);
m2 = m;
n2 = n;
qv2 = qv;
gesvd(&jobu,&jobv,&m2,&n2,A_,&m2,d,U_,&m2,V_,&qv2,&opt_size,&k,&info);
if (info)
err=BLAS_LAPACK_ERR;
else
@ -1006,7 +1036,7 @@ int bSVD_new(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int
err=MEM_ERR;
else
{
gesvd(&jobu,&jobv,&m,&n,A_,&m,d,U_,&m,V_,&qv,work,&k,&info);
gesvd(&jobu,&jobv,&m2,&n2,A_,&m2,d,U_,&m2,V_,&qv2,work,&k,&info);
free(work);
if (info)
err=BLAS_LAPACK_ERR;
@ -1231,8 +1261,11 @@ int bSVD(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int n, i
#endif
char jobz='A';
int k, *iwork, info;
int *iwork;
PRECISION *X, *work, opt_size;
lapack_int m2, n2, k, info;
if (!(X=(PRECISION*)malloc(m*n*sizeof(PRECISION)))) return MEM_ERR;
memcpy(X,A,m*n*sizeof(PRECISION));
if (!(iwork=(int*)malloc(8*((m < n) ? m : n)*sizeof(int))))
@ -1241,18 +1274,20 @@ int bSVD(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int n, i
return MEM_ERR;
}
k=-1;
m2 = m;
n2 = n;
if (at)
{
memcpy(X,A,m*n*sizeof(PRECISION));
k=-1;
gesvd(&jobz,&jobz,&m,&n,X,&m,d,U,&m,V,&n,&opt_size,&k,&info);
gesvd(&jobz,&jobz,&m2,&n2,X,&m2,d,U,&m2,V,&n2,&opt_size,&k,&info);
if (info || !(work=(PRECISION*)malloc((k=(int)opt_size)*sizeof(PRECISION))))
{
free(iwork);
free(X);
return info ? BLAS_LAPACK_ERR : MEM_ERR;
}
gesvd(&jobz,&jobz,&m,&n,X,&m,d,U,&m,V,&n,work,&k,&info);
gesvd(&jobz,&jobz,&m2,&n2,X,&m2,d,U,&m2,V,&n2,work,&k,&info);
if (info)
{
free(work);
@ -1269,14 +1304,14 @@ int bSVD(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int n, i
{
memcpy(X,A,m*n*sizeof(PRECISION));
k=-1;
gesvd(&jobz,&jobz,&n,&m,X,&n,d,V,&n,U,&m,&opt_size,&k,&info);
gesvd(&jobz,&jobz,&n2,&m2,X,&n2,d,V,&n2,U,&m2,&opt_size,&k,&info);
if (info || !(work=(PRECISION*)malloc((k=(int)opt_size)*sizeof(PRECISION))))
{
free(iwork);
free(X);
return info ? BLAS_LAPACK_ERR : MEM_ERR;
}
gesvd(&jobz,&jobz,&n,&m,X,&n,d,V,&n,U,&m,work,&k,&info);
gesvd(&jobz,&jobz,&n2,&m2,X,&n2,d,V,&n2,U,&m2,work,&k,&info);
if (info)
{
free(work);
@ -1443,14 +1478,18 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i
#define orglq dorglq
#endif
int i, j, k, l, lwork, info, p=(m < n) ? m : n;
int i, j, k, l, p=(m < n) ? m : n;
PRECISION *tau, *work, *ptr, opt_size;
lapack_int m2, n2, p2, q2, lwork, info;
if (!(tau=(PRECISION*)malloc(p*sizeof(PRECISION)))) return MEM_ERR;
if (xt)
{
lwork=-1;
geqrf(&m,&n,X,&m,tau,&opt_size,&lwork,&info);
m2 = m;
n2 = n;
geqrf(&m2,&n2,X,&m2,tau,&opt_size,&lwork,&info);
if (!(work=(PRECISION*)malloc((lwork=(int)opt_size)*sizeof(PRECISION))))
{
@ -1458,7 +1497,7 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i
return MEM_ERR;
}
geqrf(&m,&n,X,&m,tau,work,&lwork,&info);
geqrf(&m2,&n2,X,&m2,tau,work,&lwork,&info);
free(work);
if (info)
@ -1479,7 +1518,9 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i
memcpy(ptr,X,m*p*sizeof(PRECISION));
lwork=-1;
orgqr(&m,&q,&p,ptr,&m,tau,&opt_size,&lwork,&info);
p2 = p;
q2 = q;
orgqr(&m2,&q2,&p2,ptr,&m2,tau,&opt_size,&lwork,&info);
if (!(work=(PRECISION*)malloc((lwork=(int)opt_size)*sizeof(PRECISION))))
{
@ -1488,7 +1529,7 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i
return MEM_ERR;
}
orgqr(&m,&q,&p,ptr,&m,tau,work,&lwork,&info);
orgqr(&m2,&q2,&p2,ptr,&m2,tau,work,&lwork,&info);
free(work);
if (!qt)
@ -1523,8 +1564,9 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i
else
{
lwork=-1;
gelqf(&n,&m,X,&n,tau,&opt_size,&lwork,&info);
m2 = m;
n2 = n;
gelqf(&n2,&m2,X,&n2,tau,&opt_size,&lwork,&info);
if (!(work=(PRECISION*)malloc((lwork=(int)opt_size)*sizeof(PRECISION))))
{
@ -1532,7 +1574,7 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i
return MEM_ERR;
}
gelqf(&n,&m,X,&n,tau,work,&lwork,&info);
gelqf(&n2,&m2,X,&n2,tau,work,&lwork,&info);
free(work);
if (info)
@ -1563,7 +1605,10 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i
ptr[--k]=X[--l];
lwork=-1;
orglq(&q,&m,&p,ptr,&q,tau,&opt_size,&lwork,&info);
m2 = m;
p2 = p;
q2 = q;
orglq(&q2,&m2,&p2,ptr,&q2,tau,&opt_size,&lwork,&info);
if (!(work=(PRECISION*)malloc((lwork=(int)opt_size)*sizeof(PRECISION))))
{
@ -1572,7 +1617,7 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i
return MEM_ERR;
}
orglq(&q,&m,&p,ptr,&q,tau,work,&lwork,&info);
orglq(&q2,&m2,&p2,ptr,&q2,tau,work,&lwork,&info);
free(work);
if (qt)
@ -1666,9 +1711,11 @@ int bQZ_real(PRECISION *Q, PRECISION *Z, PRECISION *S, PRECISION *T, PRECISION *
#endif
char jobvsl, jobvsr, sort='N';
int lwork, simd, info, rtrn;
int rtrn;
PRECISION *work, size, *palpha_r, *palpha_i, *pbeta;
lapack_int n2, simd, lwork, info;
jobvsl=Q ? 'V' : 'N';
jobvsr=Z ? 'V' : 'N';
palpha_r=alpha_r ? alpha_r : (PRECISION*)malloc(n*sizeof(PRECISION));
@ -1694,13 +1741,14 @@ int bQZ_real(PRECISION *Q, PRECISION *Z, PRECISION *S, PRECISION *T, PRECISION *
if (!bt) bTransposeInPlace(B,n);
lwork=-1;
gges(&jobvsl,&jobvsr,&sort,(void*)NULL,&n,S,&n,T,&n,&simd,palpha_r,palpha_i,pbeta,Q,&n,Z,&n,&size,&lwork,(void*)NULL,&info);
n2 = n;
gges(&jobvsl,&jobvsr,&sort,(void*)NULL,&n2,S,&n2,T,&n2,&simd,palpha_r,palpha_i,pbeta,Q,&n2,Z,&n2,&size,&lwork,(void*)NULL,&info);
if (!info)
if (!(work=malloc((lwork=(int)size)*sizeof(PRECISION))))
rtrn=MEM_ERR;
else
{
gges(&jobvsl,&jobvsr,&sort,(void*)NULL,&n,S,&n,T,&n,&simd,palpha_r,palpha_i,pbeta,Q,&n,Z,&n,work,&lwork,(void*)NULL,&info);
gges(&jobvsl,&jobvsr,&sort,(void*)NULL,&n2,S,&n2,T,&n2,&simd,palpha_r,palpha_i,pbeta,Q,&n2,Z,&n2,work,&lwork,(void*)NULL,&info);
if (!info)
{
if (Q && !qt) bTransposeInPlace(Q,n);
@ -1784,9 +1832,19 @@ int bReorderQZ_real(int *select, PRECISION *QQ, PRECISION *ZZ, PRECISION *SS, PR
#define tgsen dtgsen
#endif
int ijob=0, wantq, wantz, lwork, liwork=1, m=n, info, rtrn, iwork;
int wantq, wantz, m=n, rtrn, i;
PRECISION size, *palpha_r, *palpha_i, *pbeta, *work;
lapack_int ijob, wantq2, wantz2, *select2, n2, m2, lwork, iwork, liwork, info;
ijob = 0;
liwork=1;
if(!(select2 = (lapack_int *)calloc(n, sizeof(lapack_int))))
return MEM_ERR;
for(i=0; i<n; i++)
select2[i] = select[i];
wantq=(QQ && Q) ? 1 : 0;
wantz=(ZZ && Z) ? 1 : 0;
@ -1831,15 +1889,21 @@ int bReorderQZ_real(int *select, PRECISION *QQ, PRECISION *ZZ, PRECISION *SS, PR
if (!zt) bTransposeInPlace(Z,n);
lwork=-1;
tgsen(&ijob,&wantq,&wantz,select,&n,SS,&n,TT,&n,palpha_r,palpha_i,pbeta,QQ,&n,ZZ,&n,&m,
wantq2 = wantq;
wantz2 = wantz;
n2 = n;
m2 = m;
tgsen(&ijob,&wantq2,&wantz2,select2,&n2,SS,&n2,TT,&n2,palpha_r,palpha_i,pbeta,QQ,&n2,ZZ,&n2,&m2,
(PRECISION*)NULL,(PRECISION*)NULL,(PRECISION*)NULL,&size,&lwork,&iwork,&liwork,&info);
m = m2;
if (!info)
if (!(work=malloc((lwork=(int)size)*sizeof(PRECISION))))
rtrn=MEM_ERR;
else
{
tgsen(&ijob,&wantq,&wantz,select,&n,SS,&n,TT,&n,palpha_r,palpha_i,pbeta,QQ,&n,ZZ,&n,&m,
tgsen(&ijob,&wantq2,&wantz2,select2,&n2,SS,&n2,TT,&n2,palpha_r,palpha_i,pbeta,QQ,&n2,ZZ,&n2,&m2,
(PRECISION*)NULL,(PRECISION*)NULL,(PRECISION*)NULL,work,&lwork,&iwork,&liwork,&info);
m = m2;
if (!info)
{
if (wantq && !qqt) bTransposeInPlace(QQ,n);
@ -1859,6 +1923,7 @@ int bReorderQZ_real(int *select, PRECISION *QQ, PRECISION *ZZ, PRECISION *SS, PR
if (!alpha_r && palpha_r) free(palpha_r);
if (!alpha_i && palpha_i) free(palpha_i);
if (!beta && pbeta) free(pbeta);
free(select2);
return rtrn;
@ -1923,9 +1988,11 @@ int bSortQZ_real(int *select, PRECISION *QQ, PRECISION *ZZ, PRECISION *SS, PRECI
#define tgexc dtgexc
#endif
int wantq, wantz, lwork, info, rtrn, *pairs, i, j, ii, jj;
int wantq, wantz, rtrn, *pairs, i, j, ii, jj;
PRECISION size, *work, *gev, small, x1, x2;
lapack_int wantq2, wantz2, n2, i2, j2, ii2, jj2, lwork, info;
if (n == 1) return NO_ERR;
wantq=(QQ && Q) ? 1 : 0;
@ -1973,7 +2040,14 @@ int bSortQZ_real(int *select, PRECISION *QQ, PRECISION *ZZ, PRECISION *SS, PRECI
lwork=-1;
j=2; i=1;
tgexc(&wantq,&wantz,&n,SS,&n,TT,&n,QQ,&n,ZZ,&n,&j,&i,&size,&lwork,&info);
wantq2 = wantq;
wantz2 = wantz;
n2 = n;
j2 = j;
i2 = i;
tgexc(&wantq2,&wantz2,&n2,SS,&n2,TT,&n2,QQ,&n2,ZZ,&n2,&j2,&i2,&size,&lwork,&info);
i = i2;
j = j2;
if (!info)
if (!(work=malloc((lwork=(int)size)*sizeof(PRECISION))))
rtrn=MEM_ERR;
@ -2008,7 +2082,14 @@ int bSortQZ_real(int *select, PRECISION *QQ, PRECISION *ZZ, PRECISION *SS, PRECI
{
ii=i+1;
jj=j+1;
tgexc(&wantq,&wantz,&n,SS,&n,TT,&n,QQ,&n,ZZ,&n,&jj,&ii,work,&lwork,&info);
wantq2 = wantq;
wantz2 = wantz;
n2 = n;
jj2 = jj;
ii2 = ii;
tgexc(&wantq2,&wantz2,&n2,SS,&n2,TT,&n2,QQ,&n2,ZZ,&n2,&jj2,&ii2,work,&lwork,&info);
ii = ii2;
jj = jj2;
if (!info)
if (pairs[j])
{

View File

@ -22,11 +22,13 @@ int lurgen(TSdmatrix *lu_dm, TSivector *pivot_dv, TSdmatrix *x_dm) {
//Inputs:
// x_dm: nrows-by-ncols general real matrix.
int nrows, ncols, mindim,
int nrows, ncols, mindim, i,
errflag=2; //errflag=0 implies a successful execution. But we start with 2 so as to let dgetrf_ export a correct flag.
int *pivot_p=NULL;
double *LU;
lapack_int nrows2, ncols2, errflag2;
lapack_int *pivot_p=NULL;
//=== Checking dimensions and memory allocation.
if ( !lu_dm || !x_dm ) fn_DisplayError(".../mathlib.c/lurgen(): The input arguments lu_dm and x_dm must be cretaed (memory-allocated)");
else if ( ( (nrows=x_dm->nrows) != lu_dm->nrows) || ( (ncols=x_dm->ncols) != lu_dm->ncols) )
@ -46,16 +48,21 @@ int lurgen(TSdmatrix *lu_dm, TSivector *pivot_dv, TSdmatrix *x_dm) {
lu_dm->flag = M_UT; //To make the lower part of lu_dm available, one must create another matrix and explicitly make it a unit lower matrix.
//=== Calling the MKL function.
if (!pivot_dv) {
pivot_p = tzMalloc(mindim, int);
dgetrf_(&nrows, &ncols, LU, &nrows, pivot_p, &errflag);
free(pivot_p); //Frees the memory belonging to this function.
}
nrows2 = nrows;
ncols2 = ncols;
errflag2 = errflag;
pivot_p = tzMalloc(mindim, lapack_int);
if (!pivot_dv)
dgetrf_(&nrows2, &ncols2, LU, &nrows2, pivot_p, &errflag2);
else {
if ( pivot_dv->n != mindim) fn_DisplayError("Make sure the dimension of the input vector pivot_dv is the minimum number of row number and column number of the input matrix x_dm");
dgetrf_(&nrows, &ncols, LU, &nrows, pivot_dv->v, &errflag);
}
dgetrf_(&nrows2, &ncols2, LU, &nrows2, pivot_p, &errflag2);
for(i=0; i<mindim; i++)
pivot_dv->v[i] = pivot_p[i];
}
free(pivot_p); //Frees the memory belonging to this function.
errflag = errflag2;
return( errflag ); //(1) If errflag = 0, success. (2) If errorflag = -i, the ith parameter has an illegal value.
//(3) If errflag = i, u_{ii}=0.0. The factorization is completed, but U is exactly singular. Dividing
@ -85,6 +92,8 @@ int eigrsym(TSdvector *eval_dv, TSdmatrix *eVec_dm, const TSdmatrix *S_dm)
double *tmpd0_m = NULL,
*work_p = NULL;
lapack_int n2, lwork2, errflag2;
if ( !S_dm || !(S_dm->flag & (M_SU | M_SL)) ) fn_DisplayError(".../mathlib.c/eigrsym(): input matrix (1) must be created (memory-alloacted) and (2) must be symmetric (either M_SU or M_SL)");
if ( !eval_dv ) fn_DisplayError(".../mathlib.c/eigrsym(): input eigenvalue vector must be created (memory-allocated)");
lwork = (n1=_n=S_dm->nrows)*BLOCKSIZE_FOR_INTEL_MKL;
@ -99,7 +108,12 @@ int eigrsym(TSdvector *eval_dv, TSdmatrix *eVec_dm, const TSdmatrix *S_dm)
// Obtains eigenvalues and, optionally, eigenvectors.
//---------------------------
memcpy(tmpd0_m, S_dm->M, square(_n)*sizeof(double));
dsyev_( (eVec_dm) ? "V" : "N", (S_dm->flag & M_SU) ? "U" : "L", &n1, tmpd0_m, &n1, eval_dv->v, work_p, &lwork, &errflag);
n2 = n1;
lwork2 = lwork;
errflag2 = errflag;
dsyev_( (eVec_dm) ? "V" : "N", (S_dm->flag & M_SU) ? "U" : "L", &n2, tmpd0_m, &n2, eval_dv->v, work_p, &lwork2, &errflag2);
errflag = errflag2;
if (work_p[0]>lwork) printf("Warning for /mathlib.c/eigrsym(): needs at least %d workspace for good performance "
"but lwork is allocated with only %d space!\n", (int)work_p[0], lwork);
eval_dv->flag = V_DEF;
@ -154,6 +168,8 @@ int eigrgen(TSdzvector *vals_dzv, TSdzmatrix *rights_dzm, TSdzmatrix *lefts_dzm,
*revecr_m=NULL, *reveci_m=NULL, //NULL means that by default we dont' compute eigenvectors.
*levecr_m=NULL, *leveci_m=NULL;
lapack_int n2, lwork2, errflag2;
//---------------------------
// Checking dimensions, etc.
//---------------------------
@ -197,8 +213,12 @@ int eigrgen(TSdzvector *vals_dzv, TSdzmatrix *rights_dzm, TSdzmatrix *lefts_dzm,
//---------------------------
// Obtains eigenvalues and, optionally, eigenvectors.
//---------------------------
dgeev_( (levecr_m) ? "V" : "N", (revecr_m) ? "V" : "N", &n1, tmpd0_m, &n1, evalr_v, evali_v,
levecr_m, &n1, revecr_m, &n1, work_p, &lwork, &errflag);
n2 = n1;
lwork2 = lwork;
errflag2 = errflag;
dgeev_( (levecr_m) ? "V" : "N", (revecr_m) ? "V" : "N", &n2, tmpd0_m, &n2, evalr_v, evali_v,
levecr_m, &n2, revecr_m, &n2, work_p, &lwork2, &errflag2);
errflag = errflag2;
vals_dzv->real->flag = V_DEF;
//---------------------------
@ -258,6 +278,7 @@ int chol(TSdmatrix *D_dm, TSdmatrix *S_dm, const char ul) {
int errflag=2, loc, nrows, _m, _i, _j; //errflat=0 implies successful decomposition. But we start with 2 so as to let dpotrf_ export a correct flag.
double *D, *S;
lapack_int _m2, errflag2;
if ( !D_dm || !S_dm ) fn_DisplayError(".../mathlib.c/chol(): L and R input square matricies must be created (memory allocated)");
else {
@ -293,7 +314,10 @@ int chol(TSdmatrix *D_dm, TSdmatrix *S_dm, const char ul) {
printf("\n ------- .../mathlib.c/chol(): R input square matrix must be symmetric!-------\n");
return (-6);
}
dpotrf_("U", &_m, D, &_m, &errflag);
_m2 = _m;
errflag2 = errflag;
dpotrf_("U", &_m2, D, &_m2, &errflag2);
errflag = errflag2;
break;
case 'L': case 'l':
if (S_dm->flag & M_SL) {
@ -320,7 +344,10 @@ int chol(TSdmatrix *D_dm, TSdmatrix *S_dm, const char ul) {
return (-6);
}
//??????NOT tested yet.
dpotrf_("L", &_m, D, &_m, &errflag);
_m2 = _m;
errflag2 = errflag;
dpotrf_("L", &_m2, D, &_m2, &errflag2);
errflag = errflag2;
break;
default:
fn_DisplayError(".../mathlib.c/chol(): Input ul must be either 'U' or 'L'");
@ -328,12 +355,18 @@ int chol(TSdmatrix *D_dm, TSdmatrix *S_dm, const char ul) {
}
else {
if ( (ul=='U' || ul=='u') && (D_dm->flag & M_SU) ) {
dpotrf_("U", &_m, D, &_m, &errflag);
_m2 = _m;
errflag2 = errflag;
dpotrf_("U", &_m2, D, &_m2, &errflag2);
errflag = errflag2;
D_dm->flag = M_UT;
}
else if ( (ul=='L' || ul=='l') && (D_dm->flag & M_SL) ) {
//Tested. It works!
dpotrf_("L", &_m, D, &_m, &errflag);
_m2 = _m;
errflag2 = errflag;
dpotrf_("L", &_m2, D, &_m2, &errflag2);
errflag = errflag2;
D_dm->flag = M_LT;
}
else {
@ -370,6 +403,7 @@ int invrtri(TSdmatrix *X_dm, TSdmatrix *A_dm, const char un)
int _n, errflag=2; //errflat=0 implies successful decomposition. But we start with 2 so as to let dgetri_ export a correct flag.
double *X, *A;
lapack_int _n2, errflag2;
if ( !X_dm || !A_dm ) fn_DisplayError(".../mathlib.c/invrtri(): Both input matrices must be created (memory-allocated)");
else if ( !(A_dm->flag & (M_UT | M_LT)) ) fn_DisplayError(".../mathlib.c/invrtri(): (1) R input matrix A must be given legal values; (2) A must be a real triangular matrix, i.e., M_UT or M_LT");
@ -381,14 +415,17 @@ int invrtri(TSdmatrix *X_dm, TSdmatrix *A_dm, const char un)
if ( (_n != A_dm->ncols) || (_n != X_dm->nrows) || (_n != X_dm->ncols) )
fn_DisplayError(".../mathlib.c/invrtri(): both input and output matrices (1) must be square and (2) must have the same dimension");
_n2 = _n;
errflag2 = errflag;
if (X==A) {
dtrtri_((A_dm->flag & M_UT) ? "U" : "L", (un=='U' || un=='u') ? "U" : "N", &_n, X, &_n, &errflag);
dtrtri_((A_dm->flag & M_UT) ? "U" : "L", (un=='U' || un=='u') ? "U" : "N", &_n2, X, &_n2, &errflag2);
errflag = errflag2;
if (errflag) return (errflag);
}
else {
memcpy(X, A, _n*_n*sizeof(double));
dtrtri_((A_dm->flag & M_UT) ? "U" : "L", (un=='U' || un=='u') ? "U" : "N", &_n, X, &_n, &errflag);
dtrtri_((A_dm->flag & M_UT) ? "U" : "L", (un=='U' || un=='u') ? "U" : "N", &_n2, X, &_n2, &errflag2);
errflag = errflag2;
if (errflag) return (errflag);
else X_dm->flag = A_dm->flag;
}
@ -419,6 +456,7 @@ int invspd(TSdmatrix *X_dm, TSdmatrix *A_dm, const char ul)
int _n, errflag=2; //errflat=0 implies successful decomposition. But we start with 2 so as to let dgetri_ export a correct flag.
double *X, *A;
lapack_int _n2, errflag2;
if ( !X_dm || !A_dm ) fn_DisplayError(".../mathlib.c/invspd(): Both input matrices must be created (memory-allocated)");
else if ( !(A_dm->flag & (M_SU | M_SL)) ) fn_DisplayError(".../mathlib.c/invspd(): (1) R input matrix A must be given legal values; (2) A must be symmetric, positive-definite, i.e., M_SU or M_SL");
@ -433,10 +471,14 @@ int invspd(TSdmatrix *X_dm, TSdmatrix *A_dm, const char ul)
if ( (_n != A_dm->ncols) )
fn_DisplayError(".../mathlib.c/invspd(): input matrix (1) must be square and (2) must have the same dimension");
//=== Choleski decomposition.
dpotrf_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n, X, &_n, &errflag);
_n2 = _n;
errflag2 = errflag;
dpotrf_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n2, X, &_n2, &errflag2);
errflag = errflag2;
if (errflag) return (errflag);
//=== Takes inverse.
dpotri_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n, X, &_n, &errflag);
dpotri_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n2, X, &_n2, &errflag2);
errflag = errflag2;
A_dm->flag = ((ul=='U') || (ul=='u')) ? M_SU : M_SL;
return (errflag);
//---
@ -448,10 +490,14 @@ int invspd(TSdmatrix *X_dm, TSdmatrix *A_dm, const char ul)
fn_DisplayError(".../mathlib.c/invspd(): both input and output matrices (1) must be square and (2) must have the same dimension");
memcpy(X, A, _n*_n*sizeof(double));
//=== Choleski decomposition.
dpotrf_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n, X, &_n, &errflag);
_n2 = _n;
errflag2 = errflag;
dpotrf_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n2, X, &_n2, &errflag2);
errflag = errflag2;
if (errflag) return (errflag);
//=== Takes inverse.
dpotri_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n, X, &_n, &errflag);
dpotri_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n2, X, &_n2, &errflag2);
errflag = errflag2;
X_dm->flag = ((ul=='U') || (ul=='u')) ? M_SU : M_SL;
return (errflag);
//---
@ -478,17 +524,18 @@ int invrgen(TSdmatrix *X_dm, TSdmatrix *A_dm)
//Inputs:
// A: _n-by-_n real general matrix.
int _n, errflag=2, //errflat=0 implies successful decomposition. But we start with 2 so as to let dgetri_ export a correct flag.
lwork, *ipivot; //Used when calling LAPACK.
lwork; //Used when calling LAPACK.
double *X, *A,
*work; //Used when calling LAPACK.
lapack_int _n2, errflag2, *ipivot, lwork2;
if ( !X_dm || !A_dm ) fn_DisplayError(".../mathlib.c/invrgen(): Both input matrices must be created (memory-allocated)");
else if ( !(A_dm->flag & M_GE) ) fn_DisplayError(".../mathlib.c/invrgen(): (1) R input matrix A must be given legal values; (2) A must be a general matrix, i.e., M_GE");
else {
X = X_dm->M;
A = A_dm->M;
ipivot = tzMalloc((_n=A_dm->nrows), int);
ipivot = tzMalloc((_n=A_dm->nrows), lapack_int);
work = tzMalloc((lwork=_n*BLOCKSIZE_FOR_INTEL_MKL), double);
}
if ( (_n != A_dm->ncols) || (_n != X_dm->nrows) || (_n != X_dm->ncols) )
@ -496,14 +543,19 @@ int invrgen(TSdmatrix *X_dm, TSdmatrix *A_dm)
if (X==A) {
dgetrf_(&_n, &_n, A, &_n, ipivot, &errflag);
_n2 = _n;
errflag2 = errflag;
dgetrf_(&_n2, &_n2, A, &_n2, ipivot, &errflag2);
errflag = errflag2;
if (errflag) {
// A_dm->flag = M_UNDEF;
free(ipivot);
free(work);
return errflag;
}
dgetri_(&_n, A, &_n, ipivot, work, &lwork, &errflag);
lwork2 = lwork;
dgetri_(&_n2, A, &_n2, ipivot, work, &lwork2, &errflag2);
errflag = errflag2;
if (work[0]>lwork) printf("Warning for /mathlib.c/invrgen(); when calling MKL dgetri_(), we need at least %d workspace for good performance "
"but lwork is allocated with only %d space!\n", (int)work[0], lwork);
if (errflag) {
@ -514,14 +566,19 @@ int invrgen(TSdmatrix *X_dm, TSdmatrix *A_dm)
}
else {
memcpy(X, A, _n*_n*sizeof(double));
dgetrf_(&_n, &_n, X, &_n, ipivot, &errflag);
_n2 = _n;
errflag2 = errflag;
dgetrf_(&_n2, &_n2, X, &_n2, ipivot, &errflag2);
errflag = errflag2;
if (errflag) {
// X_dm->flag = M_UNDEF;
free(ipivot);
free(work);
return errflag;
}
dgetri_(&_n, X, &_n, ipivot, work, &lwork, &errflag);
lwork2 = lwork;
dgetri_(&_n2, X, &_n2, ipivot, work, &lwork2, &errflag2);
errflag = errflag2;
if (work[0]>lwork) printf("Warning for /mathlib.c/invrgen(); when calling MKL dgetri_(), we need at least %d workspace for good performance "
"but lwork is allocated with only %d space!\n", (int)work[0], lwork);
if (errflag) {
@ -565,7 +622,7 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
// _r-by-_m real rectangular (rrect) matrix if / (XA=B).
// lr: if lr='\\', left division \ is performed; if lr='/', right division / is performed.
int _m, _n, _r, //mn_max, mn_min,
int _m, _n, _r, i, //mn_max, mn_min,
lwork, _i, info = -2,
*jpvt_p = NULL;
double *A, *B, *X,
@ -574,6 +631,9 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
*tau_p = NULL,
*work_p = NULL;
lapack_int _m2, _n2, _r2, lwork2, info2;
lapack_int *jpvt_p2 = NULL;
if (!A_dm || !(A_dm->flag & M_GE) || !B_dm || !(B_dm->flag &M_GE)) fn_DisplayError(".../mathlib.c/BdivA_rrect(): both input matricies A_dm and B_dm must be (a) created (allocated memory) and (b) given legal values for all elements (in other words, the flag M_GE must exist)");
if (!X_dm) fn_DisplayError(".../mathlib.c/BdivA_rrect(): output matrix X_dm must be created (allocated memory)");
if (lr=='/') {
@ -603,6 +663,7 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
// qrb_p = tzMalloc((mn_max = _m>_n?_m:_n)*_r, double); //DDDDebug: seems requiring _m>_n, but this may not be the case.
qrb_p = tzMalloc(_m*_r, double); //Note that _m>=_n.
jpvt_p = tzMalloc(_n, int);
jpvt_p2 = tzMalloc(_n, lapack_int);
tau_p = tzMalloc(_n, double);
// work_p = tzMalloc(lwork, double);
@ -622,7 +683,16 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
//=== Computes the QR factorization of a general m by n matrix with column pivoting using Level 3 BLAS.
work_p = tzMalloc(lwork=_n*BLOCKSIZE_FOR_INTEL_MKL, double);
dgeqp3_(&_m,&_n,qra_p,&_m,jpvt_p,tau_p,work_p,&lwork,&info);
_m2 = _m;
_n2 = _n;
lwork2 = lwork;
info2 = info;
for(i=0; i<_n; i++)
jpvt_p2[i] = jpvt_p[i];
dgeqp3_(&_m2,&_n2,qra_p,&_m2,jpvt_p2,tau_p,work_p,&lwork2,&info2);
info = info2;
for(i=0; i<_n; i++)
jpvt_p[i] = jpvt_p2[i];
if (work_p[0]>lwork) printf("Warning for /mathlib.c/BdivA_rrect(); when calling MKL dgeqp3_(), we need at least %d workspace for good performance "
"but lwork is allocated with only %d space!\n", (int)work_p[0], lwork);
tzDestroy(work_p);
@ -630,7 +700,13 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
//=== Multiplies a real matrix by the orthogonal matrix Q of the QR factorization formed by dgeqp3_.
work_p = tzMalloc(lwork=_r*BLOCKSIZE_FOR_INTEL_MKL, double);
dormqr_("L","T",&_m,&_r,&_n,qra_p,&_m,tau_p,qrb_p,&_m,work_p,&lwork,&info);
_m2 = _m;
_n2 = _n;
_r2 = _r;
lwork2 = lwork;
info2 = info;
dormqr_("L","T",&_m2,&_r2,&_n2,qra_p,&_m2,tau_p,qrb_p,&_m2,work_p,&lwork2,&info2);
info = info2;
if (work_p[0]>lwork) printf("Warning for /mathlib.c/BdivA_rrect(); when calling MKL dormqr_(), we need at least %d workspace for good performance "
"but lwork is allocated with only %d space!\n", (int)work_p[0], lwork);
//dormqr_("L","T",&_m,&_r,&mn_min,qra_p,&_m,tau_p,qrb_p,&mn_max,work_p,&lwork,&info);
@ -653,6 +729,7 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
tzDestroy(qra_p);
tzDestroy(qrb_p);
tzDestroy(jpvt_p);
tzDestroy(jpvt_p2);
tzDestroy(tau_p);
// tzDestroy(work_p);
@ -683,8 +760,7 @@ int BdivA_rgens(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
// lr: if lr='\\', left division \ is performed; if lr='/', right division / is performed.
int _m, _r, m2,
_i, info = -2,
*ipiv_p = NULL;
_i, info = -2;
double *A, *B, *X,
*Atran_p = NULL, //Transpose of A if right division / takes place.
*Btran_p = NULL, //Transpose of B if right division / takes place.
@ -692,6 +768,10 @@ int BdivA_rgens(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
// *tau_p = NULL,
// *work_p = NULL;
lapack_int _mlap, _rlap, info2, *ipiv_p = NULL;
if (!A_dm || !(A_dm->flag & M_GE) || !B_dm || !(B_dm->flag & M_GE)) fn_DisplayError(".../mathlib.c/BdivA_rgens(): both input matricies A_dm and B_dm must be (a) created (allocated memory) and (b) given legal values for all elements (in other words, the flag M_GE must exist)");
if (!X_dm) fn_DisplayError(".../mathlib.c/BdivA_rgens(): output matrix X_dm must be created (allocated memory)");
if (lr=='/') {
@ -717,7 +797,7 @@ int BdivA_rgens(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
if (lr=='/') {
//Right divistion /.
//=== Memory allocation for this function only.
ipiv_p = tzMalloc(_m, int);
ipiv_p = tzMalloc(_m, lapack_int);
Atran_p = tzMalloc(square(_m), double);
Btran_p = tzMalloc(_m*_r, double);
@ -725,7 +805,11 @@ int BdivA_rgens(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
cblas_dcopy(_m, A+_i*_m, 1, Atran_p+_i, _m); //Copying the transpose of A to Atran.
cblas_dcopy(_r, B+_i*_r, 1, Btran_p+_i, _m); //Copying the transpose of B (_r-by-_m) to Btran (_m-by-_r);
}
dgesv_(&_m, &_r, Atran_p, &_m, ipiv_p, Btran_p, &_m, &info);
_mlap = _m;
_rlap = _r;
info2 = info;
dgesv_(&_mlap, &_rlap, Atran_p, &_mlap, ipiv_p, Btran_p, &_mlap, &info2);
info = info2;
for (_i=0; _i<_r; _i++) cblas_dcopy(_m, Btran_p+_i*_m, 1, X+_i, _r); //Copying the transpose of Btran(_m-by-_r) to X (_r-by-_m);
X_dm->flag = M_GE;
@ -739,12 +823,16 @@ int BdivA_rgens(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
else {
//Left division \.
//=== Memory allocation for this function only.
ipiv_p = tzMalloc(_m, int);
ipiv_p = tzMalloc(_m, lapack_int);
W = tzMalloc(m2=square(_m), double);
memcpy(X, B, _m*_r*sizeof(double));
memcpy(W, A, m2*sizeof(double));
dgesv_(&_m, &_r, W, &_m, ipiv_p, X, &_m, &info);
_mlap = _m;
_rlap = _r;
info2 = info;
dgesv_(&_mlap, &_rlap, W, &_mlap, ipiv_p, X, &_mlap, &info2);
info = info2;
X_dm->flag = M_GE;
//=== Destroyes memory allocated for this function only.
@ -778,12 +866,13 @@ int bdivA_rgens(TSdvector *x_dv, const TSdvector *b_dv, const char lr, const TSd
int _m, m2,
_r = 1,
_i, info = -2,
*ipiv_p = NULL;
_i, info = -2;
double *A, *b, *x,
*Atran_p = NULL, //Transpose of A if right division / takes place.
*W = NULL; //Duplicate copy of A when left division \ is used. This will be replaced by LU decomposition.
lapack_int _mlap, _rlap, info2, *ipiv_p = NULL;
if (!A_dm || !(A_dm->flag & M_GE) || !b_dv || !b_dv->flag) fn_DisplayError("mathlib.c/bdivA_rgens(): Both A_dm and b_dv must be (a) created (allocated memory) and (b) given legal values for all elements (in other words, the flag M_GE must exist)");
if (!x_dv) fn_DisplayError("mathlib.c/bdivA_rgens(): output vector x_dv must be created (allocated memory)");
if ( b_dv->n != x_dv->n ) fn_DisplayError("mathlib.c/bdivA_rgens(): The dim of b_dv and the dim of x_dv must exactly match");
@ -797,13 +886,17 @@ int bdivA_rgens(TSdvector *x_dv, const TSdvector *b_dv, const char lr, const TSd
if (lr=='/') {
//Right divistion /.
//=== Memory allocation for this function only.
ipiv_p = tzMalloc(_m, int);
ipiv_p = tzMalloc(_m, lapack_int);
Atran_p = tzMalloc(square(_m), double);
for (_i=0; _i<_m; _i++)
cblas_dcopy(_m, A+_i*_m, 1, Atran_p+_i, _m); //Copying the transpose of A to Atran.
if (x_dv != b_dv ) memcpy(x, b, _m*sizeof(double));
dgesv_(&_m, &_r, Atran_p, &_m, ipiv_p, x, &_m, &info);
_mlap = _m;
_rlap = _r;
info2 = info;
dgesv_(&_mlap, &_rlap, Atran_p, &_mlap, ipiv_p, x, &_mlap, &info2);
info = info2;
x_dv->flag = V_DEF;
//=== Destroyes memory allocated for this function only.
@ -815,12 +908,16 @@ int bdivA_rgens(TSdvector *x_dv, const TSdvector *b_dv, const char lr, const TSd
else {
//Left division \.
//=== Memory allocation for this function only.
ipiv_p = tzMalloc(_m, int);
ipiv_p = tzMalloc(_m, lapack_int);
W = tzMalloc(m2=square(_m), double);
if (x_dv != b_dv ) memcpy(x, b, _m*sizeof(double));
memcpy(W, A, m2*sizeof(double));
dgesv_(&_m, &_r, W, &_m, ipiv_p, x, &_m, &info);
_mlap = _m;
_rlap = _r;
info2 = info;
dgesv_(&_mlap, &_rlap, W, &_mlap, ipiv_p, x, &_mlap, &info2);
info = info2;
x_dv->flag = V_DEF;
//=== Destroyes memory allocated for this function only.
@ -859,6 +956,8 @@ void Aldivb_spd(TSdvector *x_dv, TSdmatrix *A_dm, TSdvector *b_dv, char an) {
int errflag=2, nrows, nels; //errflat=0 implies successful decomposition. But we start with 2 so as to let dpotrf_ export a correct flag.
double *A, *W=NULL, *x, *b;
lapack_int nrows2, errflag2;
if ( !A_dm || !b_dv || !x_dv ) fn_DisplayError(".../mathlib.c/Aldivb_spd(): All input matrices or vectors must be created (memory allocated)");
nrows = A_dm->nrows;
nels = square(nrows);
@ -876,7 +975,10 @@ void Aldivb_spd(TSdvector *x_dv, TSdmatrix *A_dm, TSdvector *b_dv, char an) {
if (A_dm->flag & M_SU) {
dpotrf_("U", &nrows, W, &nrows, &errflag); //Choleski. U'*U = W where W will be replaced by upper triangular U.
nrows2 = nrows;
errflag2 = errflag;
dpotrf_("U", &nrows2, W, &nrows2, &errflag2); //Choleski. U'*U = W where W will be replaced by upper triangular U.
errflag = errflag2;
if (errflag) fn_DisplayError(".../mathlib.c/Aldivb_spd(): Error when calling Choleski dpotrf_(). Check if the L input matrix A_dm is positive definite or has legal values");
if (x==b) {
//=== Solving for A*x=b.
@ -892,7 +994,10 @@ void Aldivb_spd(TSdvector *x_dv, TSdmatrix *A_dm, TSdvector *b_dv, char an) {
if ( (an!='N') && (an!='n') ) A_dm->flag = M_UT;
}
else if (A_dm->flag & M_SL) { //?????????? Not tested yet.
dpotrf_("L", &nrows, W, &nrows, &errflag); //Choleski. L*L' = W where W will be replaced by lower triangular L.
nrows2 = nrows;
errflag2 = errflag;
dpotrf_("L", &nrows2, W, &nrows2, &errflag2); //Choleski. L*L' = W where W will be replaced by lower triangular L.
errflag = errflag2;
if (errflag) fn_DisplayError(".../mathlib.c/Aldivb_spd(): Error when calling Choleski dpotrf_(). Check if the L input matrix A_dm is positive definite or has legal values");
if (x==b) {
//=== Solving for A*x=b.
@ -1053,12 +1158,18 @@ int eigrsym_decomp(double *eval_v, double *evec_m, const double *s_m, const int
double *tmpd0_m=tzMalloc(square(_n), double),
*work_p=tzMalloc(lwork, double);
lapack_int n2, lwork2, errflag2;
//---------------------------
// Obtains eigenvalues and, optionally, eigenvectors.
//---------------------------
memcpy(tmpd0_m, s_m, square(_n)*sizeof(double));
dsyev_( (evec_m) ? "V" : "N", ((ul=='u') || (ul=='U')) ? "U" : "L", &n1, tmpd0_m, &n1, eval_v, work_p, &lwork, &errflag);
n2 = n1;
lwork2 = lwork;
errflag2 = errflag;
dsyev_( (evec_m) ? "V" : "N", ((ul=='u') || (ul=='U')) ? "U" : "L", &n2, tmpd0_m, &n2, eval_v, work_p, &lwork2, &errflag2);
errflag = errflag2;
if (evec_m) memcpy(evec_m, tmpd0_m, square(_n)*sizeof(double));
@ -1096,6 +1207,8 @@ int eigrgen_decomp(double *evalr_v, double *evali_v, double *revecr_m, double *r
double *tmpd0_m=tzMalloc(square(_n), double), //@@Must be freed in this function.@@
*work_p=tzMalloc(lwork, double); //@@Must be freed in this function.@@
lapack_int n2, lwork2, errflag2;
//---------------------------
// Starts with x_m -- the matrix to be decomposed.
//---------------------------
@ -1104,8 +1217,12 @@ int eigrgen_decomp(double *evalr_v, double *evali_v, double *revecr_m, double *r
//---------------------------
// Obtains eigenvalues and, optionally, eigenvectors.
//---------------------------
dgeev_( (levecr_m) ? "V" : "N", (revecr_m) ? "V" : "N", &n1, tmpd0_m, &n1, evalr_v, evali_v,
levecr_m, &n1, revecr_m, &n1, work_p, &lwork, &errflag);
n2 = n1;
lwork2 = lwork;
errflag2 = errflag;
dgeev_( (levecr_m) ? "V" : "N", (revecr_m) ? "V" : "N", &n2, tmpd0_m, &n2, evalr_v, evali_v,
levecr_m, &n2, revecr_m, &n2, work_p, &lwork2, &errflag2);
errflag = errflag2;
//---------------------------
// Frees the allocated memory.
@ -1164,6 +1281,8 @@ int chol_decomp(double *D, const double *s_m, const int _n, const char ul) {
#ifdef INTELCMATHLIBRARY //Intel MKL Lapack dependent code.
int errflag=2, _m=_n, _i, _j, tmpi0; //errflat=0 implies successful decomposition. But we start with 2 so as to let dpotrf_ export a correct flag.
lapack_int _m2, errflag2;
//=== Fills the triangular part that is used for Choleski decomposition.
switch (ul) {
@ -1188,7 +1307,10 @@ int chol_decomp(double *D, const double *s_m, const int _n, const char ul) {
return (-1);
}
//=== Choleski decomposition.
dpotrf_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m, D, &_m, &errflag);
_m2 = _m;
errflag2 = errflag;
dpotrf_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m2, D, &_m2, &errflag2);
errflag = errflag2;
//---
// if (errflag<0) fn_DisplayError("Some element has an illegal value");
// else if (errflag>0) fn_DisplayError("The leadding minor of some order, hence the entire matrix, is not positive definite");
@ -1227,6 +1349,8 @@ int inv_spd(double *D, const double *s_m, const int _n, const char ul) {
int errflag=2, _m=_n, _i, _j, tmpi0; //errflat=0 implies successful decomposition. But we start with 2 so as to let dpotrf_ export a correct flag.
lapack_int _m2, errflag2;
//=== Fills the triangular part that is used for Choleski decomposition.
switch (ul) {
case 'u': case 'U':
@ -1251,10 +1375,14 @@ int inv_spd(double *D, const double *s_m, const int _n, const char ul) {
return (-1);
}
//=== Choleski decomposition.
dpotrf_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m, D, &_m, &errflag);
_m2 = _m;
errflag2 = errflag;
dpotrf_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m2, D, &_m2, &errflag2);
errflag = errflag2;
if (errflag) return (errflag);
//=== Takes inverse.
dpotri_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m, D, &_m, &errflag);
dpotri_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m2, D, &_m2, &errflag2);
errflag = errflag2;
return (errflag);
//---
// if (errflag<0) fn_DisplayError("Some element has an illegal value");
@ -2891,7 +3019,7 @@ void ScalarTimesColofMatrix(TSdvector *y_dv, double _alpha, TSdmatrix *X_dm, int
y_dv->flag = V_DEF;
}
#else
Need to be tested for the following.
// Need to be tested for the following.
//
// M = X_dm->M + (_j+1)*(nrows=X_dm->nrows) - 1; //Points to the end of the jth column.
// if (!y_dv)
@ -3896,7 +4024,7 @@ void CopySubrowmatrix(TSdmatrix *x1_dm, const int br1, const int bc1, TSdmatrix
else fn_DisplayError(".../mathlib.c/CopySubrowmatrix(): the submatrix of x2_dm must be within the range of itself as well as x1_dm");
}
#else
Havent got time to code up the default using Linux BLAS.
// Havent got time to code up the default using Linux BLAS.
#endif
@ -3930,7 +4058,7 @@ void CopySubmatrix2rowmatrix(TSdmatrix *x1_dm, const int br1, const int bc1, TSd
else fn_DisplayError(".../mathlib.c/CopySubmatrix2rowmatrix(): the submatrix of x2_dm must be within the range of x2_dm and its transpose must be within the range of x1_dm");
}
#else
Havent got time to code up the default using Linux BLAS.
// Havent got time to code up the default using Linux BLAS.
#endif
@ -3965,7 +4093,7 @@ void CopySubrowmatrix2matrix(TSdmatrix *x1_dm, const int br1, const int bc1, TSd
else fn_DisplayError(".../mathlib.c/CopySubrowmatrix2matrix(): the submatrix of x2_dm must be within the range of itself as well as x1_dm");
}
#else
Havent got time to code up the default using Linux BLAS.
// Havent got time to code up the default using Linux BLAS.
#endif
@ -4208,7 +4336,7 @@ void CopySubvector2rowmatrix(TSdmatrix *x1_dm, const int br, const int bc, const
else fn_DisplayError(".../mathlib.c/CopySubvector2rowmatrix(): Copying (copied) elements are outside the (row) dimension of the copying vector x2_dv (the copied matrix x1_dm)");
}
#else
Havent got time to code up the default using Linux BLAS.
// Havent got time to code up the default using Linux BLAS.
#endif

View File

@ -54,7 +54,9 @@ extern "C" {
typedef const char *BLCHAR;
typedef const blas_int *CONST_BLINT;
typedef const double *CONST_BLDOU;
typedef const float *CONST_BLFLT;
typedef double *BLDOU;
typedef float *BLFLT;
#define dgemm FORTRAN_WRAPPER(dgemm)
void dgemm(BLCHAR transa, BLCHAR transb, CONST_BLINT m, CONST_BLINT n,
@ -62,6 +64,12 @@ extern "C" {
CONST_BLDOU b, CONST_BLINT ldb, CONST_BLDOU beta,
BLDOU c, CONST_BLINT ldc);
#define sgemm FORTRAN_WRAPPER(sgemm)
void sgemm(BLCHAR transa, BLCHAR transb, CONST_BLINT m, CONST_BLINT n,
CONST_BLINT k, CONST_BLFLT alpha, CONST_BLFLT a, CONST_BLINT lda,
CONST_BLFLT b, CONST_BLINT ldb, CONST_BLFLT beta,
BLFLT c, CONST_BLINT ldc);
#define dsymm FORTRAN_WRAPPER(dsymm)
void dsymm(BLCHAR side, BLCHAR uplo, CONST_BLINT m, CONST_BLINT n,
CONST_BLDOU alpha, CONST_BLDOU a, CONST_BLINT lda,
@ -85,6 +93,10 @@ extern "C" {
void daxpy(CONST_BLINT n, CONST_BLDOU a, CONST_BLDOU x, CONST_BLINT incx,
BLDOU y, CONST_BLINT incy);
#define saxpy FORTRAN_WRAPPER(saxpy)
void saxpy(CONST_BLINT n, CONST_BLFLT a, CONST_BLFLT x, CONST_BLINT incx,
BLFLT y, CONST_BLINT incy);
#define dcopy FORTRAN_WRAPPER(dcopy)
void dcopy(CONST_BLINT n, CONST_BLDOU x, CONST_BLINT incx,
BLDOU y, CONST_BLINT incy);
@ -96,6 +108,9 @@ extern "C" {
#define dscal FORTRAN_WRAPPER(dscal)
void dscal(CONST_BLINT n, CONST_BLDOU a, BLDOU x, CONST_BLINT incx);
#define sscal FORTRAN_WRAPPER(sscal)
void sscal(CONST_BLINT n, CONST_BLDOU a, BLFLT x, CONST_BLINT incx);
#define dtrsm FORTRAN_WRAPPER(dtrsm)
void dtrsm(BLCHAR side, BLCHAR uplo, BLCHAR transa, BLCHAR diag, CONST_BLINT m,
CONST_BLINT n, CONST_BLDOU alpha, CONST_BLDOU a, CONST_BLINT lda,

View File

@ -57,6 +57,10 @@ extern "C" {
typedef const double *CONST_LADOU;
typedef double *LADOU;
typedef lapack_int (*DGGESCRIT)(const double *, const double *, const double *);
typedef lapack_int (*SGGESCRIT)(const float *, const float *, const float *);
typedef float *LAFLT;
typedef const float *CONST_LAFLT;
typedef lapack_int *CONST_LALOG; //logical
#define dgetrs FORTRAN_WRAPPER(dgetrs)
void dgetrs(LACHAR trans, CONST_LAINT n, CONST_LAINT nrhs, CONST_LADOU a, CONST_LAINT lda, CONST_LAINT ipiv,
@ -66,6 +70,14 @@ extern "C" {
void dgetrf(CONST_LAINT m, CONST_LAINT n, LADOU a,
CONST_LAINT lda, LAINT ipiv, LAINT info);
#define dgetri FORTRAN_WRAPPER(dgetri)
void dgetri(CONST_LAINT n, LADOU a, CONST_LAINT lda, CONST_LAINT ipiv, LADOU work,
CONST_LAINT lwork, LAINT info);
#define sgetrf FORTRAN_WRAPPER(sgetrf)
void sgetrf(CONST_LAINT m, CONST_LAINT n, LAFLT a,
CONST_LAINT lda, LAINT ipiv, LAINT info);
#define dgees FORTRAN_WRAPPER(dgees)
void dgees(LACHAR jobvs, LACHAR sort, const void *select,
CONST_LAINT n, LADOU a, CONST_LAINT lda, LAINT sdim,
@ -92,6 +104,14 @@ extern "C" {
void dpotrf(LACHAR uplo, CONST_LAINT n, LADOU a, CONST_LAINT lda,
LAINT info);
#define dpotri FORTRAN_WRAPPER(dpotri)
void dpotri(LACHAR uplo, CONST_LAINT n, LADOU a, CONST_LAINT lda,
LAINT info);
#define dtrtri FORTRAN_WRAPPER(dtrtri)
void dtrtri(LACHAR uplo, LACHAR diag, CONST_LAINT n, LADOU a, CONST_LAINT lda,
LAINT info);
#define dgges FORTRAN_WRAPPER(dgges)
void dgges(LACHAR jobvsl, LACHAR jobvsr, LACHAR sort, DGGESCRIT delztg,
CONST_LAINT n, LADOU a, CONST_LAINT lda, LADOU b, CONST_LAINT ldb,
@ -99,6 +119,13 @@ extern "C" {
LADOU vsl, CONST_LAINT ldvsl, LADOU vsr, CONST_LAINT ldvsr,
LADOU work, CONST_LAINT lwork, LAINT bwork, LAINT info);
#define sgges FORTRAN_WRAPPER(sgges)
void sgges(LACHAR jobvsl, LACHAR jobvsr, LACHAR sort, SGGESCRIT delztg,
CONST_LAINT n, LAFLT a, CONST_LAINT lda, LAFLT b, CONST_LAINT ldb,
LAINT sdim, LAFLT alphar, LAFLT alphai, LAFLT beta,
LAFLT vsl, CONST_LAINT ldvsl, LAFLT vsr, CONST_LAINT ldvsr,
LAFLT work, CONST_LAINT lwork, LAINT bwork, LAINT info);
#define dsyev FORTRAN_WRAPPER(dsyev)
void dsyev(LACHAR jobz, LACHAR uplo, CONST_LAINT n, LADOU a, CONST_LAINT lda,
LADOU w, LADOU work, CONST_LAINT lwork, LAINT info);
@ -114,11 +141,96 @@ extern "C" {
void dgeqrf(CONST_LAINT m, CONST_LAINT n, LADOU a, CONST_LAINT lda,
LADOU tau, LADOU work, CONST_LAINT lwork, LAINT info);
#define sgeqrf FORTRAN_WRAPPER(sgeqrf)
void sgeqrf(CONST_LAINT m, CONST_LAINT n, LAFLT a, CONST_LAINT lda,
LAFLT tau, LAFLT work, CONST_LAINT lwork, LAINT info);
#define dormqr FORTRAN_WRAPPER(dormqr)
void dormqr(LACHAR side, LACHAR trans, CONST_LAINT m, CONST_LAINT n, CONST_LAINT k,
CONST_LADOU a, CONST_LAINT lda, CONST_LADOU tau, LADOU c, CONST_LAINT ldc,
LADOU work, CONST_LAINT lwork, LAINT info);
#define dorgqr FORTRAN_WRAPPER(dorgqr)
void dorgqr(CONST_LAINT m, CONST_LAINT n, CONST_LAINT k, LADOU a, CONST_LAINT lda,
CONST_LADOU tau, LADOU work, CONST_LAINT lwork, LAINT info);
#define sorgqr FORTRAN_WRAPPER(sorgqr)
void sorgqr(CONST_LAINT m, CONST_LAINT n, CONST_LAINT k, LAFLT a, CONST_LAINT lda,
CONST_LAFLT tau, LAFLT work, CONST_LAINT lwork, LAINT info);
#define dgelqf FORTRAN_WRAPPER(dgelqf)
void dgelqf(CONST_LAINT m, CONST_LAINT n, LADOU a, CONST_LAINT lda, LADOU tau,
LADOU work, CONST_LAINT lwork, LAINT info);
#define sgelqf FORTRAN_WRAPPER(sgelqf)
void sgelqf(CONST_LAINT m, CONST_LAINT n, LAFLT a, CONST_LAINT lda, LAFLT tau,
LAFLT work, CONST_LAINT lwork, LAINT info);
#define dorglq FORTRAN_WRAPPER(dorglq)
void dorglq(CONST_LAINT m, CONST_LAINT n, CONST_LAINT k, LADOU a, CONST_LAINT lda,
CONST_LADOU tau, LADOU work, CONST_LAINT lwork, LAINT info);
#define sorglq FORTRAN_WRAPPER(sorglq)
void sorglq(CONST_LAINT m, CONST_LAINT n, CONST_LAINT k, LAFLT a, CONST_LAINT lda,
CONST_LAFLT tau, LAFLT work, CONST_LAINT lwork, LAINT info);
#define dgesv FORTRAN_WRAPPER(dgesv)
void dgesv(CONST_LAINT n, CONST_LAINT nrhs, LADOU a, CONST_LAINT lda, LAINT ipiv,
LADOU b, CONST_LAINT ldb, LAINT info);
#define dgesvd FORTRAN_WRAPPER(dgesvd)
void dgesvd(LACHAR jobu, LACHAR jobvt, CONST_LAINT m, CONST_LAINT n, LADOU a, CONST_LAINT lda,
LADOU s, LADOU u, CONST_LAINT ldu, LADOU vt, CONST_LAINT ldvt, LADOU work,
CONST_LAINT lwork, LAINT info);
#define sgesvd FORTRAN_WRAPPER(sgesvd)
void sgesvd(LACHAR jobu, LACHAR jobvt, CONST_LAINT m, CONST_LAINT n, LAFLT a, CONST_LAINT lda,
LAFLT s, LAFLT u, CONST_LAINT ldu, LAFLT vt, CONST_LAINT ldvt, LAFLT work,
CONST_LAINT lwork, LAINT info);
#define dtgsen FORTRAN_WRAPPER(dtgsen)
void dtgsen(CONST_LAINT ijob, CONST_LALOG wantq, CONST_LALOG wantz, CONST_LALOG select,
CONST_LAINT n, LADOU a, CONST_LAINT lda, LADOU b, CONST_LAINT ldb,
LADOU alphar, LADOU alphai, LADOU beta, LADOU q, CONST_LAINT ldq,
LADOU z, CONST_LAINT ldz, LAINT m, LADOU pl, LADOU pr, LADOU dif, LADOU work,
CONST_LAINT lwork, LAINT iwork, CONST_LAINT liwork, LAINT info);
#define stgsen FORTRAN_WRAPPER(stgsen)
void stgsen(CONST_LAINT ijob, CONST_LALOG wantq, CONST_LALOG wantz, CONST_LALOG select,
CONST_LAINT n, LAFLT a, CONST_LAINT lda, LAFLT b, CONST_LAINT ldb,
LAFLT alphar, LAFLT alphai, LAFLT beta, LAFLT q, CONST_LAINT ldq,
LAFLT z, CONST_LAINT ldz, LAINT m, LAFLT pl, LAFLT pr, LAFLT dif, LAFLT work,
CONST_LAINT lwork, LAINT iwork, CONST_LAINT liwork, LAINT info);
#define dtgexc FORTRAN_WRAPPER(dtgexc)
void dtgexc(CONST_LALOG wantq, CONST_LALOG wantz, CONST_LAINT n, LADOU a, CONST_LAINT lda,
LADOU b, CONST_LAINT ldb, LADOU q, CONST_LAINT ldq, LADOU z, CONST_LAINT ldz,
LAINT ifst, LAINT ilst, LADOU work, CONST_LAINT lwork, LAINT info);
#define stgexc FORTRAN_WRAPPER(stgexc)
void stgexc(CONST_LALOG wantq, CONST_LALOG wantz, CONST_LAINT n, LAFLT a, CONST_LAINT lda,
LAFLT b, CONST_LAINT ldb, LAFLT q, CONST_LAINT ldq, LAFLT z, CONST_LAINT ldz,
LAINT ifst, LAINT ilst, LAFLT work, CONST_LAINT lwork, LAINT info);
#define dgesdd FORTRAN_WRAPPER(dgesdd)
void dgesdd(LACHAR jobz, CONST_LAINT m, CONST_LAINT n, LADOU a, CONST_LAINT lda,
LADOU s, LADOU u, CONST_LAINT ldu, LADOU vt, CONST_LAINT ldvt, LADOU work,
CONST_LAINT lwork, LAINT iwork, LAINT info);
#define sgesdd FORTRAN_WRAPPER(sgesdd)
void sgesdd(LACHAR jobz, CONST_LAINT m, CONST_LAINT n, LAFLT a, CONST_LAINT lda,
LAFLT s, LAFLT u, CONST_LAINT ldu, LAFLT vt, CONST_LAINT ldvt, LAFLT work,
CONST_LAINT lwork, LAINT iwork, LAINT info);
#define dgeev FORTRAN_WRAPPER(dgeev)
void dgeev(LACHAR jobvl, LACHAR jobvr, CONST_LAINT n, LADOU a, CONST_LAINT lda,
LADOU wr, LADOU wi, LADOU vl, CONST_LAINT ldvl, LADOU vr, CONST_LAINT ldvr,
LADOU work, CONST_LAINT lwork, LAINT info);
#define dgeqp3 FORTRAN_WRAPPER(dgeqp3)
void dgeqp3(CONST_LAINT m, CONST_LAINT n, LADOU a, CONST_LAINT lda, LAINT jpvt, LADOU tau,
LADOU work, CONST_LAINT lwork, LAINT info);
#ifdef __cplusplus
} /* extern "C" */
#endif