make blas and lapack functions work with matlab
parent
14a081c0f5
commit
1bc7bf7fbb
|
@ -1,7 +1,16 @@
|
||||||
|
|
||||||
|
#if defined(MATLAB_MEX_FILE) || defined(OCTAVE_MEX_FILE)
|
||||||
|
#include <dynmex.h>
|
||||||
|
#include <dynblas.h>
|
||||||
|
#include <dynlapack.h>
|
||||||
|
#else
|
||||||
|
|
||||||
#ifndef __BLAS_LAPACK__
|
#ifndef __BLAS_LAPACK__
|
||||||
#define __BLAS_LAPACK__
|
#define __BLAS_LAPACK__
|
||||||
|
|
||||||
|
typedef int lapack_int;
|
||||||
|
typedef int blas_int;
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C"
|
extern "C"
|
||||||
{
|
{
|
||||||
|
@ -41,6 +50,11 @@ extern "C"
|
||||||
#define dgeev dgeev_
|
#define dgeev dgeev_
|
||||||
#define dpotrf dpotrf_
|
#define dpotrf dpotrf_
|
||||||
#define dpotri dpotri_
|
#define dpotri dpotri_
|
||||||
|
#define dtrtri dtrtri_
|
||||||
|
#define dgetri dgetri_
|
||||||
|
#define dgeqp3 dgeqp3_
|
||||||
|
#define dormqr dormqr_
|
||||||
|
#define dgesv dgesv_
|
||||||
/*******************************************************************************/
|
/*******************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
@ -81,6 +95,11 @@ void dsyev(char*,char*,int*,double*,int*,double*,double*,int*,int*);
|
||||||
void dgeev(char*,char*,int*,double*,int*,double*,double*,double*,int*,double*,int*,double*,int*,int*);
|
void dgeev(char*,char*,int*,double*,int*,double*,double*,double*,int*,double*,int*,double*,int*,int*);
|
||||||
void dpotrf(char*,int*,double*,int*,int*);
|
void dpotrf(char*,int*,double*,int*,int*);
|
||||||
void dpotri(char*,int*,double*,int*,int*);
|
void dpotri(char*,int*,double*,int*,int*);
|
||||||
|
void dgeqp3(int*,int*,double*,int*,int*,double*,double*,int*,int*);
|
||||||
|
void dtrtri(char*,char*,int*,double*,int*,int*);
|
||||||
|
void dgetri(int*,double*,int*,int*,double*,int*,int*);
|
||||||
|
void dormqr(char*,char*,int*,int*,int*,double*,int*,double*,double*,int*,double*,int*,int*);
|
||||||
|
void dgesv(int*,int*,double*,int*,int*,double*,int*,int*);
|
||||||
/*******************************************************************************/
|
/*******************************************************************************/
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@ -88,3 +107,5 @@ void dpotri(char*,int*,double*,int*,int*);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
|
@ -207,11 +207,12 @@ int bSubtract(PRECISION *x, PRECISION *y, PRECISION *z, int n)
|
||||||
*/
|
*/
|
||||||
int bLinearUpdateScalar(PRECISION *x, PRECISION *y, PRECISION a, int m)
|
int bLinearUpdateScalar(PRECISION *x, PRECISION *y, PRECISION a, int m)
|
||||||
{
|
{
|
||||||
int inc=1;
|
blas_int inc = 1;
|
||||||
|
blas_int m2 = m;
|
||||||
#if (PRECISION_SIZE == 4)
|
#if (PRECISION_SIZE == 4)
|
||||||
saxpy(&m,&a,y,&inc,x,&inc);
|
saxpy(&m2,&a,y,&inc,x,&inc);
|
||||||
#else
|
#else
|
||||||
daxpy(&m,&a,y,&inc,x,&inc);
|
daxpy(&m2,&a,y,&inc,x,&inc);
|
||||||
#endif
|
#endif
|
||||||
return NO_ERR;
|
return NO_ERR;
|
||||||
}
|
}
|
||||||
|
@ -392,33 +393,38 @@ int bMultiply(PRECISION *x, PRECISION *y, PRECISION s, int n)
|
||||||
int bMatrixMultiply(PRECISION *x, PRECISION *y, PRECISION *z, int m, int n, int p, int xt, int yt, int zt)
|
int bMatrixMultiply(PRECISION *x, PRECISION *y, PRECISION *z, int m, int n, int p, int xt, int yt, int zt)
|
||||||
{
|
{
|
||||||
char transy, transz;
|
char transy, transz;
|
||||||
int dy, dz;
|
blas_int dy, dz;
|
||||||
PRECISION beta=0.0, alpha=1.0;
|
PRECISION beta=0.0, alpha=1.0;
|
||||||
|
|
||||||
|
blas_int m2 = m;
|
||||||
|
blas_int n2 = n;
|
||||||
|
blas_int p2 = p;
|
||||||
|
|
||||||
#if PRECISION_SIZE == 4
|
#if PRECISION_SIZE == 4
|
||||||
if (xt)
|
if (xt)
|
||||||
{
|
{
|
||||||
if (yt) {transy='N'; dy=m;} else {transy='T'; dy=p;}
|
if (yt) {transy='N'; dy=m;} else {transy='T'; dy=p;}
|
||||||
if (zt) {transz='N'; dz=p;} else {transz='T'; dz=n;}
|
if (zt) {transz='N'; dz=p;} else {transz='T'; dz=n;}
|
||||||
sgemm(&transy,&transz,&m,&n,&p,&alpha,y,&dy,z,&dz,&beta,x,&m);
|
sgemm(&transy,&transz,&m2,&n2,&p2,&alpha,y,&dy,z,&dz,&beta,x,&m2);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (yt) {transy='T'; dy=m;} else {transy='N'; dy=p;}
|
if (yt) {transy='T'; dy=m;} else {transy='N'; dy=p;}
|
||||||
if (zt) {transz='T'; dz=p;} else {transz='N'; dz=n;}
|
if (zt) {transz='T'; dz=p;} else {transz='N'; dz=n;}
|
||||||
sgemm(&transz,&transy,&n,&m,&p,&alpha,z,&dz,y,&dy,&beta,x,&n);
|
sgemm(&transz,&transy,&n2,&m2,&p2,&alpha,z,&dz,y,&dy,&beta,x,&n2);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (xt)
|
if (xt)
|
||||||
{
|
{
|
||||||
if (yt) {transy='N'; dy=m;} else {transy='T'; dy=p;}
|
if (yt) {transy='N'; dy=m;} else {transy='T'; dy=p;}
|
||||||
if (zt) {transz='N'; dz=p;} else {transz='T'; dz=n;}
|
if (zt) {transz='N'; dz=p;} else {transz='T'; dz=n;}
|
||||||
dgemm(&transy,&transz,&m,&n,&p,&alpha,y,&dy,z,&dz,&beta,x,&m);
|
dgemm(&transy,&transz,&m2,&n2,&p2,&alpha,y,&dy,z,&dz,&beta,x,&m2);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (yt) {transy='T'; dy=m;} else {transy='N'; dy=p;}
|
if (yt) {transy='T'; dy=m;} else {transy='N'; dy=p;}
|
||||||
if (zt) {transz='T'; dz=p;} else {transz='N'; dz=n;}
|
if (zt) {transz='T'; dz=p;} else {transz='N'; dz=n;}
|
||||||
dgemm(&transz,&transy,&n,&m,&p,&alpha,z,&dz,y,&dy,&beta,x,&n);
|
dgemm(&transz,&transy,&n2,&m2,&p2,&alpha,z,&dz,y,&dy,&beta,x,&n2);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return NO_ERR;
|
return NO_ERR;
|
||||||
|
@ -499,21 +505,40 @@ int bLU(int *p, PRECISION *x, int m, int n, int xt)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
PRECISION *y;
|
PRECISION *y;
|
||||||
int i, info;
|
int i;
|
||||||
|
|
||||||
|
lapack_int m2 = m;
|
||||||
|
lapack_int n2 = n;
|
||||||
|
lapack_int info;
|
||||||
|
lapack_int *p2;
|
||||||
|
|
||||||
|
int minmn = (m < n) ? m : n;
|
||||||
|
|
||||||
|
if(!(p2 = (lapack_int *)calloc(minmn, sizeof(lapack_int))))
|
||||||
|
return MEM_ERR;
|
||||||
|
|
||||||
|
for(i=0; i<minmn; i++)
|
||||||
|
p2[i] = p[i];
|
||||||
|
|
||||||
if (xt)
|
if (xt)
|
||||||
{
|
{
|
||||||
getrf(&m,&n,x,&m,p,&info);
|
getrf(&m2,&n2,x,&m2,p2,&info);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (!( y=(PRECISION*)malloc(m*n*sizeof(PRECISION)))) return MEM_ERR;
|
if (!( y=(PRECISION*)malloc(m*n*sizeof(PRECISION)))) return MEM_ERR;
|
||||||
bTranspose(y,x,m,n,0);
|
bTranspose(y,x,m,n,0);
|
||||||
|
|
||||||
getrf(&m,&n,y,&m,p,&info);
|
getrf(&m2,&n2,y,&m2,p2,&info);
|
||||||
|
|
||||||
bTranspose(x,y,m,n,1);
|
bTranspose(x,y,m,n,1);
|
||||||
free(y);
|
free(y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for(i=0; i<minmn; i++)
|
||||||
|
p[i] = p2[i];
|
||||||
|
free(p2);
|
||||||
|
|
||||||
for (i=(m < n) ? m-1 : n-1; i >= 0; i--) p[i]--;
|
for (i=(m < n) ? m-1 : n-1; i >= 0; i--) p[i]--;
|
||||||
return (info < 0) ? SING_ERR : NO_ERR;
|
return (info < 0) ? SING_ERR : NO_ERR;
|
||||||
|
|
||||||
|
@ -930,9 +955,11 @@ int bSVD_new(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
char jobu, jobv;
|
char jobu, jobv;
|
||||||
int qu, qv, k, info, err=NO_ERR;
|
int qu, qv, err=NO_ERR;
|
||||||
PRECISION *A_, *U_, *V_, *work, opt_size;
|
PRECISION *A_, *U_, *V_, *work, opt_size;
|
||||||
|
|
||||||
|
lapack_int k, m2, n2, qv2, info;
|
||||||
|
|
||||||
if (!(A_=(PRECISION*)malloc(m*n*sizeof(PRECISION)))) return MEM_ERR;
|
if (!(A_=(PRECISION*)malloc(m*n*sizeof(PRECISION)))) return MEM_ERR;
|
||||||
if (at)
|
if (at)
|
||||||
memcpy(A_,A,m*n*sizeof(PRECISION));
|
memcpy(A_,A,m*n*sizeof(PRECISION));
|
||||||
|
@ -998,7 +1025,10 @@ int bSVD_new(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int
|
||||||
|
|
||||||
// compute singular value decomposition
|
// compute singular value decomposition
|
||||||
k=-1;
|
k=-1;
|
||||||
gesvd(&jobu,&jobv,&m,&n,A_,&m,d,U_,&m,V_,&qv,&opt_size,&k,&info);
|
m2 = m;
|
||||||
|
n2 = n;
|
||||||
|
qv2 = qv;
|
||||||
|
gesvd(&jobu,&jobv,&m2,&n2,A_,&m2,d,U_,&m2,V_,&qv2,&opt_size,&k,&info);
|
||||||
if (info)
|
if (info)
|
||||||
err=BLAS_LAPACK_ERR;
|
err=BLAS_LAPACK_ERR;
|
||||||
else
|
else
|
||||||
|
@ -1006,7 +1036,7 @@ int bSVD_new(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int
|
||||||
err=MEM_ERR;
|
err=MEM_ERR;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
gesvd(&jobu,&jobv,&m,&n,A_,&m,d,U_,&m,V_,&qv,work,&k,&info);
|
gesvd(&jobu,&jobv,&m2,&n2,A_,&m2,d,U_,&m2,V_,&qv2,work,&k,&info);
|
||||||
free(work);
|
free(work);
|
||||||
if (info)
|
if (info)
|
||||||
err=BLAS_LAPACK_ERR;
|
err=BLAS_LAPACK_ERR;
|
||||||
|
@ -1231,8 +1261,11 @@ int bSVD(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int n, i
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
char jobz='A';
|
char jobz='A';
|
||||||
int k, *iwork, info;
|
int *iwork;
|
||||||
PRECISION *X, *work, opt_size;
|
PRECISION *X, *work, opt_size;
|
||||||
|
|
||||||
|
lapack_int m2, n2, k, info;
|
||||||
|
|
||||||
if (!(X=(PRECISION*)malloc(m*n*sizeof(PRECISION)))) return MEM_ERR;
|
if (!(X=(PRECISION*)malloc(m*n*sizeof(PRECISION)))) return MEM_ERR;
|
||||||
memcpy(X,A,m*n*sizeof(PRECISION));
|
memcpy(X,A,m*n*sizeof(PRECISION));
|
||||||
if (!(iwork=(int*)malloc(8*((m < n) ? m : n)*sizeof(int))))
|
if (!(iwork=(int*)malloc(8*((m < n) ? m : n)*sizeof(int))))
|
||||||
|
@ -1241,18 +1274,20 @@ int bSVD(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int n, i
|
||||||
return MEM_ERR;
|
return MEM_ERR;
|
||||||
}
|
}
|
||||||
k=-1;
|
k=-1;
|
||||||
|
m2 = m;
|
||||||
|
n2 = n;
|
||||||
if (at)
|
if (at)
|
||||||
{
|
{
|
||||||
memcpy(X,A,m*n*sizeof(PRECISION));
|
memcpy(X,A,m*n*sizeof(PRECISION));
|
||||||
k=-1;
|
k=-1;
|
||||||
gesvd(&jobz,&jobz,&m,&n,X,&m,d,U,&m,V,&n,&opt_size,&k,&info);
|
gesvd(&jobz,&jobz,&m2,&n2,X,&m2,d,U,&m2,V,&n2,&opt_size,&k,&info);
|
||||||
if (info || !(work=(PRECISION*)malloc((k=(int)opt_size)*sizeof(PRECISION))))
|
if (info || !(work=(PRECISION*)malloc((k=(int)opt_size)*sizeof(PRECISION))))
|
||||||
{
|
{
|
||||||
free(iwork);
|
free(iwork);
|
||||||
free(X);
|
free(X);
|
||||||
return info ? BLAS_LAPACK_ERR : MEM_ERR;
|
return info ? BLAS_LAPACK_ERR : MEM_ERR;
|
||||||
}
|
}
|
||||||
gesvd(&jobz,&jobz,&m,&n,X,&m,d,U,&m,V,&n,work,&k,&info);
|
gesvd(&jobz,&jobz,&m2,&n2,X,&m2,d,U,&m2,V,&n2,work,&k,&info);
|
||||||
if (info)
|
if (info)
|
||||||
{
|
{
|
||||||
free(work);
|
free(work);
|
||||||
|
@ -1269,14 +1304,14 @@ int bSVD(PRECISION *U, PRECISION *d, PRECISION *V, PRECISION *A, int m, int n, i
|
||||||
{
|
{
|
||||||
memcpy(X,A,m*n*sizeof(PRECISION));
|
memcpy(X,A,m*n*sizeof(PRECISION));
|
||||||
k=-1;
|
k=-1;
|
||||||
gesvd(&jobz,&jobz,&n,&m,X,&n,d,V,&n,U,&m,&opt_size,&k,&info);
|
gesvd(&jobz,&jobz,&n2,&m2,X,&n2,d,V,&n2,U,&m2,&opt_size,&k,&info);
|
||||||
if (info || !(work=(PRECISION*)malloc((k=(int)opt_size)*sizeof(PRECISION))))
|
if (info || !(work=(PRECISION*)malloc((k=(int)opt_size)*sizeof(PRECISION))))
|
||||||
{
|
{
|
||||||
free(iwork);
|
free(iwork);
|
||||||
free(X);
|
free(X);
|
||||||
return info ? BLAS_LAPACK_ERR : MEM_ERR;
|
return info ? BLAS_LAPACK_ERR : MEM_ERR;
|
||||||
}
|
}
|
||||||
gesvd(&jobz,&jobz,&n,&m,X,&n,d,V,&n,U,&m,work,&k,&info);
|
gesvd(&jobz,&jobz,&n2,&m2,X,&n2,d,V,&n2,U,&m2,work,&k,&info);
|
||||||
if (info)
|
if (info)
|
||||||
{
|
{
|
||||||
free(work);
|
free(work);
|
||||||
|
@ -1443,14 +1478,18 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i
|
||||||
#define orglq dorglq
|
#define orglq dorglq
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int i, j, k, l, lwork, info, p=(m < n) ? m : n;
|
int i, j, k, l, p=(m < n) ? m : n;
|
||||||
PRECISION *tau, *work, *ptr, opt_size;
|
PRECISION *tau, *work, *ptr, opt_size;
|
||||||
|
|
||||||
|
lapack_int m2, n2, p2, q2, lwork, info;
|
||||||
|
|
||||||
if (!(tau=(PRECISION*)malloc(p*sizeof(PRECISION)))) return MEM_ERR;
|
if (!(tau=(PRECISION*)malloc(p*sizeof(PRECISION)))) return MEM_ERR;
|
||||||
if (xt)
|
if (xt)
|
||||||
{
|
{
|
||||||
lwork=-1;
|
lwork=-1;
|
||||||
|
m2 = m;
|
||||||
geqrf(&m,&n,X,&m,tau,&opt_size,&lwork,&info);
|
n2 = n;
|
||||||
|
geqrf(&m2,&n2,X,&m2,tau,&opt_size,&lwork,&info);
|
||||||
|
|
||||||
if (!(work=(PRECISION*)malloc((lwork=(int)opt_size)*sizeof(PRECISION))))
|
if (!(work=(PRECISION*)malloc((lwork=(int)opt_size)*sizeof(PRECISION))))
|
||||||
{
|
{
|
||||||
|
@ -1458,7 +1497,7 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i
|
||||||
return MEM_ERR;
|
return MEM_ERR;
|
||||||
}
|
}
|
||||||
|
|
||||||
geqrf(&m,&n,X,&m,tau,work,&lwork,&info);
|
geqrf(&m2,&n2,X,&m2,tau,work,&lwork,&info);
|
||||||
|
|
||||||
free(work);
|
free(work);
|
||||||
if (info)
|
if (info)
|
||||||
|
@ -1479,7 +1518,9 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i
|
||||||
memcpy(ptr,X,m*p*sizeof(PRECISION));
|
memcpy(ptr,X,m*p*sizeof(PRECISION));
|
||||||
lwork=-1;
|
lwork=-1;
|
||||||
|
|
||||||
orgqr(&m,&q,&p,ptr,&m,tau,&opt_size,&lwork,&info);
|
p2 = p;
|
||||||
|
q2 = q;
|
||||||
|
orgqr(&m2,&q2,&p2,ptr,&m2,tau,&opt_size,&lwork,&info);
|
||||||
|
|
||||||
if (!(work=(PRECISION*)malloc((lwork=(int)opt_size)*sizeof(PRECISION))))
|
if (!(work=(PRECISION*)malloc((lwork=(int)opt_size)*sizeof(PRECISION))))
|
||||||
{
|
{
|
||||||
|
@ -1488,7 +1529,7 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i
|
||||||
return MEM_ERR;
|
return MEM_ERR;
|
||||||
}
|
}
|
||||||
|
|
||||||
orgqr(&m,&q,&p,ptr,&m,tau,work,&lwork,&info);
|
orgqr(&m2,&q2,&p2,ptr,&m2,tau,work,&lwork,&info);
|
||||||
|
|
||||||
free(work);
|
free(work);
|
||||||
if (!qt)
|
if (!qt)
|
||||||
|
@ -1523,8 +1564,9 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
lwork=-1;
|
lwork=-1;
|
||||||
|
m2 = m;
|
||||||
gelqf(&n,&m,X,&n,tau,&opt_size,&lwork,&info);
|
n2 = n;
|
||||||
|
gelqf(&n2,&m2,X,&n2,tau,&opt_size,&lwork,&info);
|
||||||
|
|
||||||
if (!(work=(PRECISION*)malloc((lwork=(int)opt_size)*sizeof(PRECISION))))
|
if (!(work=(PRECISION*)malloc((lwork=(int)opt_size)*sizeof(PRECISION))))
|
||||||
{
|
{
|
||||||
|
@ -1532,7 +1574,7 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i
|
||||||
return MEM_ERR;
|
return MEM_ERR;
|
||||||
}
|
}
|
||||||
|
|
||||||
gelqf(&n,&m,X,&n,tau,work,&lwork,&info);
|
gelqf(&n2,&m2,X,&n2,tau,work,&lwork,&info);
|
||||||
|
|
||||||
free(work);
|
free(work);
|
||||||
if (info)
|
if (info)
|
||||||
|
@ -1563,7 +1605,10 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i
|
||||||
ptr[--k]=X[--l];
|
ptr[--k]=X[--l];
|
||||||
lwork=-1;
|
lwork=-1;
|
||||||
|
|
||||||
orglq(&q,&m,&p,ptr,&q,tau,&opt_size,&lwork,&info);
|
m2 = m;
|
||||||
|
p2 = p;
|
||||||
|
q2 = q;
|
||||||
|
orglq(&q2,&m2,&p2,ptr,&q2,tau,&opt_size,&lwork,&info);
|
||||||
|
|
||||||
if (!(work=(PRECISION*)malloc((lwork=(int)opt_size)*sizeof(PRECISION))))
|
if (!(work=(PRECISION*)malloc((lwork=(int)opt_size)*sizeof(PRECISION))))
|
||||||
{
|
{
|
||||||
|
@ -1572,7 +1617,7 @@ int bQR(PRECISION *Q, PRECISION *R, PRECISION *X, int m, int n, int q, int qt, i
|
||||||
return MEM_ERR;
|
return MEM_ERR;
|
||||||
}
|
}
|
||||||
|
|
||||||
orglq(&q,&m,&p,ptr,&q,tau,work,&lwork,&info);
|
orglq(&q2,&m2,&p2,ptr,&q2,tau,work,&lwork,&info);
|
||||||
|
|
||||||
free(work);
|
free(work);
|
||||||
if (qt)
|
if (qt)
|
||||||
|
@ -1666,9 +1711,11 @@ int bQZ_real(PRECISION *Q, PRECISION *Z, PRECISION *S, PRECISION *T, PRECISION *
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
char jobvsl, jobvsr, sort='N';
|
char jobvsl, jobvsr, sort='N';
|
||||||
int lwork, simd, info, rtrn;
|
int rtrn;
|
||||||
PRECISION *work, size, *palpha_r, *palpha_i, *pbeta;
|
PRECISION *work, size, *palpha_r, *palpha_i, *pbeta;
|
||||||
|
|
||||||
|
lapack_int n2, simd, lwork, info;
|
||||||
|
|
||||||
jobvsl=Q ? 'V' : 'N';
|
jobvsl=Q ? 'V' : 'N';
|
||||||
jobvsr=Z ? 'V' : 'N';
|
jobvsr=Z ? 'V' : 'N';
|
||||||
palpha_r=alpha_r ? alpha_r : (PRECISION*)malloc(n*sizeof(PRECISION));
|
palpha_r=alpha_r ? alpha_r : (PRECISION*)malloc(n*sizeof(PRECISION));
|
||||||
|
@ -1694,13 +1741,14 @@ int bQZ_real(PRECISION *Q, PRECISION *Z, PRECISION *S, PRECISION *T, PRECISION *
|
||||||
if (!bt) bTransposeInPlace(B,n);
|
if (!bt) bTransposeInPlace(B,n);
|
||||||
|
|
||||||
lwork=-1;
|
lwork=-1;
|
||||||
gges(&jobvsl,&jobvsr,&sort,(void*)NULL,&n,S,&n,T,&n,&simd,palpha_r,palpha_i,pbeta,Q,&n,Z,&n,&size,&lwork,(void*)NULL,&info);
|
n2 = n;
|
||||||
|
gges(&jobvsl,&jobvsr,&sort,(void*)NULL,&n2,S,&n2,T,&n2,&simd,palpha_r,palpha_i,pbeta,Q,&n2,Z,&n2,&size,&lwork,(void*)NULL,&info);
|
||||||
if (!info)
|
if (!info)
|
||||||
if (!(work=malloc((lwork=(int)size)*sizeof(PRECISION))))
|
if (!(work=malloc((lwork=(int)size)*sizeof(PRECISION))))
|
||||||
rtrn=MEM_ERR;
|
rtrn=MEM_ERR;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
gges(&jobvsl,&jobvsr,&sort,(void*)NULL,&n,S,&n,T,&n,&simd,palpha_r,palpha_i,pbeta,Q,&n,Z,&n,work,&lwork,(void*)NULL,&info);
|
gges(&jobvsl,&jobvsr,&sort,(void*)NULL,&n2,S,&n2,T,&n2,&simd,palpha_r,palpha_i,pbeta,Q,&n2,Z,&n2,work,&lwork,(void*)NULL,&info);
|
||||||
if (!info)
|
if (!info)
|
||||||
{
|
{
|
||||||
if (Q && !qt) bTransposeInPlace(Q,n);
|
if (Q && !qt) bTransposeInPlace(Q,n);
|
||||||
|
@ -1784,9 +1832,19 @@ int bReorderQZ_real(int *select, PRECISION *QQ, PRECISION *ZZ, PRECISION *SS, PR
|
||||||
#define tgsen dtgsen
|
#define tgsen dtgsen
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int ijob=0, wantq, wantz, lwork, liwork=1, m=n, info, rtrn, iwork;
|
int wantq, wantz, m=n, rtrn, i;
|
||||||
PRECISION size, *palpha_r, *palpha_i, *pbeta, *work;
|
PRECISION size, *palpha_r, *palpha_i, *pbeta, *work;
|
||||||
|
|
||||||
|
lapack_int ijob, wantq2, wantz2, *select2, n2, m2, lwork, iwork, liwork, info;
|
||||||
|
ijob = 0;
|
||||||
|
liwork=1;
|
||||||
|
|
||||||
|
if(!(select2 = (lapack_int *)calloc(n, sizeof(lapack_int))))
|
||||||
|
return MEM_ERR;
|
||||||
|
|
||||||
|
for(i=0; i<n; i++)
|
||||||
|
select2[i] = select[i];
|
||||||
|
|
||||||
wantq=(QQ && Q) ? 1 : 0;
|
wantq=(QQ && Q) ? 1 : 0;
|
||||||
wantz=(ZZ && Z) ? 1 : 0;
|
wantz=(ZZ && Z) ? 1 : 0;
|
||||||
|
|
||||||
|
@ -1831,15 +1889,21 @@ int bReorderQZ_real(int *select, PRECISION *QQ, PRECISION *ZZ, PRECISION *SS, PR
|
||||||
if (!zt) bTransposeInPlace(Z,n);
|
if (!zt) bTransposeInPlace(Z,n);
|
||||||
|
|
||||||
lwork=-1;
|
lwork=-1;
|
||||||
tgsen(&ijob,&wantq,&wantz,select,&n,SS,&n,TT,&n,palpha_r,palpha_i,pbeta,QQ,&n,ZZ,&n,&m,
|
wantq2 = wantq;
|
||||||
|
wantz2 = wantz;
|
||||||
|
n2 = n;
|
||||||
|
m2 = m;
|
||||||
|
tgsen(&ijob,&wantq2,&wantz2,select2,&n2,SS,&n2,TT,&n2,palpha_r,palpha_i,pbeta,QQ,&n2,ZZ,&n2,&m2,
|
||||||
(PRECISION*)NULL,(PRECISION*)NULL,(PRECISION*)NULL,&size,&lwork,&iwork,&liwork,&info);
|
(PRECISION*)NULL,(PRECISION*)NULL,(PRECISION*)NULL,&size,&lwork,&iwork,&liwork,&info);
|
||||||
|
m = m2;
|
||||||
if (!info)
|
if (!info)
|
||||||
if (!(work=malloc((lwork=(int)size)*sizeof(PRECISION))))
|
if (!(work=malloc((lwork=(int)size)*sizeof(PRECISION))))
|
||||||
rtrn=MEM_ERR;
|
rtrn=MEM_ERR;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
tgsen(&ijob,&wantq,&wantz,select,&n,SS,&n,TT,&n,palpha_r,palpha_i,pbeta,QQ,&n,ZZ,&n,&m,
|
tgsen(&ijob,&wantq2,&wantz2,select2,&n2,SS,&n2,TT,&n2,palpha_r,palpha_i,pbeta,QQ,&n2,ZZ,&n2,&m2,
|
||||||
(PRECISION*)NULL,(PRECISION*)NULL,(PRECISION*)NULL,work,&lwork,&iwork,&liwork,&info);
|
(PRECISION*)NULL,(PRECISION*)NULL,(PRECISION*)NULL,work,&lwork,&iwork,&liwork,&info);
|
||||||
|
m = m2;
|
||||||
if (!info)
|
if (!info)
|
||||||
{
|
{
|
||||||
if (wantq && !qqt) bTransposeInPlace(QQ,n);
|
if (wantq && !qqt) bTransposeInPlace(QQ,n);
|
||||||
|
@ -1859,6 +1923,7 @@ int bReorderQZ_real(int *select, PRECISION *QQ, PRECISION *ZZ, PRECISION *SS, PR
|
||||||
if (!alpha_r && palpha_r) free(palpha_r);
|
if (!alpha_r && palpha_r) free(palpha_r);
|
||||||
if (!alpha_i && palpha_i) free(palpha_i);
|
if (!alpha_i && palpha_i) free(palpha_i);
|
||||||
if (!beta && pbeta) free(pbeta);
|
if (!beta && pbeta) free(pbeta);
|
||||||
|
free(select2);
|
||||||
|
|
||||||
return rtrn;
|
return rtrn;
|
||||||
|
|
||||||
|
@ -1923,9 +1988,11 @@ int bSortQZ_real(int *select, PRECISION *QQ, PRECISION *ZZ, PRECISION *SS, PRECI
|
||||||
#define tgexc dtgexc
|
#define tgexc dtgexc
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int wantq, wantz, lwork, info, rtrn, *pairs, i, j, ii, jj;
|
int wantq, wantz, rtrn, *pairs, i, j, ii, jj;
|
||||||
PRECISION size, *work, *gev, small, x1, x2;
|
PRECISION size, *work, *gev, small, x1, x2;
|
||||||
|
|
||||||
|
lapack_int wantq2, wantz2, n2, i2, j2, ii2, jj2, lwork, info;
|
||||||
|
|
||||||
if (n == 1) return NO_ERR;
|
if (n == 1) return NO_ERR;
|
||||||
|
|
||||||
wantq=(QQ && Q) ? 1 : 0;
|
wantq=(QQ && Q) ? 1 : 0;
|
||||||
|
@ -1973,7 +2040,14 @@ int bSortQZ_real(int *select, PRECISION *QQ, PRECISION *ZZ, PRECISION *SS, PRECI
|
||||||
|
|
||||||
lwork=-1;
|
lwork=-1;
|
||||||
j=2; i=1;
|
j=2; i=1;
|
||||||
tgexc(&wantq,&wantz,&n,SS,&n,TT,&n,QQ,&n,ZZ,&n,&j,&i,&size,&lwork,&info);
|
wantq2 = wantq;
|
||||||
|
wantz2 = wantz;
|
||||||
|
n2 = n;
|
||||||
|
j2 = j;
|
||||||
|
i2 = i;
|
||||||
|
tgexc(&wantq2,&wantz2,&n2,SS,&n2,TT,&n2,QQ,&n2,ZZ,&n2,&j2,&i2,&size,&lwork,&info);
|
||||||
|
i = i2;
|
||||||
|
j = j2;
|
||||||
if (!info)
|
if (!info)
|
||||||
if (!(work=malloc((lwork=(int)size)*sizeof(PRECISION))))
|
if (!(work=malloc((lwork=(int)size)*sizeof(PRECISION))))
|
||||||
rtrn=MEM_ERR;
|
rtrn=MEM_ERR;
|
||||||
|
@ -2008,7 +2082,14 @@ int bSortQZ_real(int *select, PRECISION *QQ, PRECISION *ZZ, PRECISION *SS, PRECI
|
||||||
{
|
{
|
||||||
ii=i+1;
|
ii=i+1;
|
||||||
jj=j+1;
|
jj=j+1;
|
||||||
tgexc(&wantq,&wantz,&n,SS,&n,TT,&n,QQ,&n,ZZ,&n,&jj,&ii,work,&lwork,&info);
|
wantq2 = wantq;
|
||||||
|
wantz2 = wantz;
|
||||||
|
n2 = n;
|
||||||
|
jj2 = jj;
|
||||||
|
ii2 = ii;
|
||||||
|
tgexc(&wantq2,&wantz2,&n2,SS,&n2,TT,&n2,QQ,&n2,ZZ,&n2,&jj2,&ii2,work,&lwork,&info);
|
||||||
|
ii = ii2;
|
||||||
|
jj = jj2;
|
||||||
if (!info)
|
if (!info)
|
||||||
if (pairs[j])
|
if (pairs[j])
|
||||||
{
|
{
|
||||||
|
|
|
@ -22,11 +22,13 @@ int lurgen(TSdmatrix *lu_dm, TSivector *pivot_dv, TSdmatrix *x_dm) {
|
||||||
//Inputs:
|
//Inputs:
|
||||||
// x_dm: nrows-by-ncols general real matrix.
|
// x_dm: nrows-by-ncols general real matrix.
|
||||||
|
|
||||||
int nrows, ncols, mindim,
|
int nrows, ncols, mindim, i,
|
||||||
errflag=2; //errflag=0 implies a successful execution. But we start with 2 so as to let dgetrf_ export a correct flag.
|
errflag=2; //errflag=0 implies a successful execution. But we start with 2 so as to let dgetrf_ export a correct flag.
|
||||||
int *pivot_p=NULL;
|
|
||||||
double *LU;
|
double *LU;
|
||||||
|
|
||||||
|
lapack_int nrows2, ncols2, errflag2;
|
||||||
|
lapack_int *pivot_p=NULL;
|
||||||
|
|
||||||
//=== Checking dimensions and memory allocation.
|
//=== Checking dimensions and memory allocation.
|
||||||
if ( !lu_dm || !x_dm ) fn_DisplayError(".../mathlib.c/lurgen(): The input arguments lu_dm and x_dm must be cretaed (memory-allocated)");
|
if ( !lu_dm || !x_dm ) fn_DisplayError(".../mathlib.c/lurgen(): The input arguments lu_dm and x_dm must be cretaed (memory-allocated)");
|
||||||
else if ( ( (nrows=x_dm->nrows) != lu_dm->nrows) || ( (ncols=x_dm->ncols) != lu_dm->ncols) )
|
else if ( ( (nrows=x_dm->nrows) != lu_dm->nrows) || ( (ncols=x_dm->ncols) != lu_dm->ncols) )
|
||||||
|
@ -46,16 +48,21 @@ int lurgen(TSdmatrix *lu_dm, TSivector *pivot_dv, TSdmatrix *x_dm) {
|
||||||
lu_dm->flag = M_UT; //To make the lower part of lu_dm available, one must create another matrix and explicitly make it a unit lower matrix.
|
lu_dm->flag = M_UT; //To make the lower part of lu_dm available, one must create another matrix and explicitly make it a unit lower matrix.
|
||||||
|
|
||||||
//=== Calling the MKL function.
|
//=== Calling the MKL function.
|
||||||
if (!pivot_dv) {
|
nrows2 = nrows;
|
||||||
pivot_p = tzMalloc(mindim, int);
|
ncols2 = ncols;
|
||||||
dgetrf_(&nrows, &ncols, LU, &nrows, pivot_p, &errflag);
|
errflag2 = errflag;
|
||||||
free(pivot_p); //Frees the memory belonging to this function.
|
pivot_p = tzMalloc(mindim, lapack_int);
|
||||||
}
|
if (!pivot_dv)
|
||||||
|
dgetrf_(&nrows2, &ncols2, LU, &nrows2, pivot_p, &errflag2);
|
||||||
else {
|
else {
|
||||||
if ( pivot_dv->n != mindim) fn_DisplayError("Make sure the dimension of the input vector pivot_dv is the minimum number of row number and column number of the input matrix x_dm");
|
if ( pivot_dv->n != mindim) fn_DisplayError("Make sure the dimension of the input vector pivot_dv is the minimum number of row number and column number of the input matrix x_dm");
|
||||||
dgetrf_(&nrows, &ncols, LU, &nrows, pivot_dv->v, &errflag);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
dgetrf_(&nrows2, &ncols2, LU, &nrows2, pivot_p, &errflag2);
|
||||||
|
for(i=0; i<mindim; i++)
|
||||||
|
pivot_dv->v[i] = pivot_p[i];
|
||||||
|
}
|
||||||
|
free(pivot_p); //Frees the memory belonging to this function.
|
||||||
|
errflag = errflag2;
|
||||||
|
|
||||||
return( errflag ); //(1) If errflag = 0, success. (2) If errorflag = -i, the ith parameter has an illegal value.
|
return( errflag ); //(1) If errflag = 0, success. (2) If errorflag = -i, the ith parameter has an illegal value.
|
||||||
//(3) If errflag = i, u_{ii}=0.0. The factorization is completed, but U is exactly singular. Dividing
|
//(3) If errflag = i, u_{ii}=0.0. The factorization is completed, but U is exactly singular. Dividing
|
||||||
|
@ -85,6 +92,8 @@ int eigrsym(TSdvector *eval_dv, TSdmatrix *eVec_dm, const TSdmatrix *S_dm)
|
||||||
double *tmpd0_m = NULL,
|
double *tmpd0_m = NULL,
|
||||||
*work_p = NULL;
|
*work_p = NULL;
|
||||||
|
|
||||||
|
lapack_int n2, lwork2, errflag2;
|
||||||
|
|
||||||
if ( !S_dm || !(S_dm->flag & (M_SU | M_SL)) ) fn_DisplayError(".../mathlib.c/eigrsym(): input matrix (1) must be created (memory-alloacted) and (2) must be symmetric (either M_SU or M_SL)");
|
if ( !S_dm || !(S_dm->flag & (M_SU | M_SL)) ) fn_DisplayError(".../mathlib.c/eigrsym(): input matrix (1) must be created (memory-alloacted) and (2) must be symmetric (either M_SU or M_SL)");
|
||||||
if ( !eval_dv ) fn_DisplayError(".../mathlib.c/eigrsym(): input eigenvalue vector must be created (memory-allocated)");
|
if ( !eval_dv ) fn_DisplayError(".../mathlib.c/eigrsym(): input eigenvalue vector must be created (memory-allocated)");
|
||||||
lwork = (n1=_n=S_dm->nrows)*BLOCKSIZE_FOR_INTEL_MKL;
|
lwork = (n1=_n=S_dm->nrows)*BLOCKSIZE_FOR_INTEL_MKL;
|
||||||
|
@ -99,7 +108,12 @@ int eigrsym(TSdvector *eval_dv, TSdmatrix *eVec_dm, const TSdmatrix *S_dm)
|
||||||
// Obtains eigenvalues and, optionally, eigenvectors.
|
// Obtains eigenvalues and, optionally, eigenvectors.
|
||||||
//---------------------------
|
//---------------------------
|
||||||
memcpy(tmpd0_m, S_dm->M, square(_n)*sizeof(double));
|
memcpy(tmpd0_m, S_dm->M, square(_n)*sizeof(double));
|
||||||
dsyev_( (eVec_dm) ? "V" : "N", (S_dm->flag & M_SU) ? "U" : "L", &n1, tmpd0_m, &n1, eval_dv->v, work_p, &lwork, &errflag);
|
|
||||||
|
n2 = n1;
|
||||||
|
lwork2 = lwork;
|
||||||
|
errflag2 = errflag;
|
||||||
|
dsyev_( (eVec_dm) ? "V" : "N", (S_dm->flag & M_SU) ? "U" : "L", &n2, tmpd0_m, &n2, eval_dv->v, work_p, &lwork2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
if (work_p[0]>lwork) printf("Warning for /mathlib.c/eigrsym(): needs at least %d workspace for good performance "
|
if (work_p[0]>lwork) printf("Warning for /mathlib.c/eigrsym(): needs at least %d workspace for good performance "
|
||||||
"but lwork is allocated with only %d space!\n", (int)work_p[0], lwork);
|
"but lwork is allocated with only %d space!\n", (int)work_p[0], lwork);
|
||||||
eval_dv->flag = V_DEF;
|
eval_dv->flag = V_DEF;
|
||||||
|
@ -154,6 +168,8 @@ int eigrgen(TSdzvector *vals_dzv, TSdzmatrix *rights_dzm, TSdzmatrix *lefts_dzm,
|
||||||
*revecr_m=NULL, *reveci_m=NULL, //NULL means that by default we dont' compute eigenvectors.
|
*revecr_m=NULL, *reveci_m=NULL, //NULL means that by default we dont' compute eigenvectors.
|
||||||
*levecr_m=NULL, *leveci_m=NULL;
|
*levecr_m=NULL, *leveci_m=NULL;
|
||||||
|
|
||||||
|
lapack_int n2, lwork2, errflag2;
|
||||||
|
|
||||||
//---------------------------
|
//---------------------------
|
||||||
// Checking dimensions, etc.
|
// Checking dimensions, etc.
|
||||||
//---------------------------
|
//---------------------------
|
||||||
|
@ -197,8 +213,12 @@ int eigrgen(TSdzvector *vals_dzv, TSdzmatrix *rights_dzm, TSdzmatrix *lefts_dzm,
|
||||||
//---------------------------
|
//---------------------------
|
||||||
// Obtains eigenvalues and, optionally, eigenvectors.
|
// Obtains eigenvalues and, optionally, eigenvectors.
|
||||||
//---------------------------
|
//---------------------------
|
||||||
dgeev_( (levecr_m) ? "V" : "N", (revecr_m) ? "V" : "N", &n1, tmpd0_m, &n1, evalr_v, evali_v,
|
n2 = n1;
|
||||||
levecr_m, &n1, revecr_m, &n1, work_p, &lwork, &errflag);
|
lwork2 = lwork;
|
||||||
|
errflag2 = errflag;
|
||||||
|
dgeev_( (levecr_m) ? "V" : "N", (revecr_m) ? "V" : "N", &n2, tmpd0_m, &n2, evalr_v, evali_v,
|
||||||
|
levecr_m, &n2, revecr_m, &n2, work_p, &lwork2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
vals_dzv->real->flag = V_DEF;
|
vals_dzv->real->flag = V_DEF;
|
||||||
|
|
||||||
//---------------------------
|
//---------------------------
|
||||||
|
@ -258,6 +278,7 @@ int chol(TSdmatrix *D_dm, TSdmatrix *S_dm, const char ul) {
|
||||||
int errflag=2, loc, nrows, _m, _i, _j; //errflat=0 implies successful decomposition. But we start with 2 so as to let dpotrf_ export a correct flag.
|
int errflag=2, loc, nrows, _m, _i, _j; //errflat=0 implies successful decomposition. But we start with 2 so as to let dpotrf_ export a correct flag.
|
||||||
double *D, *S;
|
double *D, *S;
|
||||||
|
|
||||||
|
lapack_int _m2, errflag2;
|
||||||
|
|
||||||
if ( !D_dm || !S_dm ) fn_DisplayError(".../mathlib.c/chol(): L and R input square matricies must be created (memory allocated)");
|
if ( !D_dm || !S_dm ) fn_DisplayError(".../mathlib.c/chol(): L and R input square matricies must be created (memory allocated)");
|
||||||
else {
|
else {
|
||||||
|
@ -293,7 +314,10 @@ int chol(TSdmatrix *D_dm, TSdmatrix *S_dm, const char ul) {
|
||||||
printf("\n ------- .../mathlib.c/chol(): R input square matrix must be symmetric!-------\n");
|
printf("\n ------- .../mathlib.c/chol(): R input square matrix must be symmetric!-------\n");
|
||||||
return (-6);
|
return (-6);
|
||||||
}
|
}
|
||||||
dpotrf_("U", &_m, D, &_m, &errflag);
|
_m2 = _m;
|
||||||
|
errflag2 = errflag;
|
||||||
|
dpotrf_("U", &_m2, D, &_m2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
break;
|
break;
|
||||||
case 'L': case 'l':
|
case 'L': case 'l':
|
||||||
if (S_dm->flag & M_SL) {
|
if (S_dm->flag & M_SL) {
|
||||||
|
@ -320,7 +344,10 @@ int chol(TSdmatrix *D_dm, TSdmatrix *S_dm, const char ul) {
|
||||||
return (-6);
|
return (-6);
|
||||||
}
|
}
|
||||||
//??????NOT tested yet.
|
//??????NOT tested yet.
|
||||||
dpotrf_("L", &_m, D, &_m, &errflag);
|
_m2 = _m;
|
||||||
|
errflag2 = errflag;
|
||||||
|
dpotrf_("L", &_m2, D, &_m2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
fn_DisplayError(".../mathlib.c/chol(): Input ul must be either 'U' or 'L'");
|
fn_DisplayError(".../mathlib.c/chol(): Input ul must be either 'U' or 'L'");
|
||||||
|
@ -328,12 +355,18 @@ int chol(TSdmatrix *D_dm, TSdmatrix *S_dm, const char ul) {
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if ( (ul=='U' || ul=='u') && (D_dm->flag & M_SU) ) {
|
if ( (ul=='U' || ul=='u') && (D_dm->flag & M_SU) ) {
|
||||||
dpotrf_("U", &_m, D, &_m, &errflag);
|
_m2 = _m;
|
||||||
|
errflag2 = errflag;
|
||||||
|
dpotrf_("U", &_m2, D, &_m2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
D_dm->flag = M_UT;
|
D_dm->flag = M_UT;
|
||||||
}
|
}
|
||||||
else if ( (ul=='L' || ul=='l') && (D_dm->flag & M_SL) ) {
|
else if ( (ul=='L' || ul=='l') && (D_dm->flag & M_SL) ) {
|
||||||
//Tested. It works!
|
//Tested. It works!
|
||||||
dpotrf_("L", &_m, D, &_m, &errflag);
|
_m2 = _m;
|
||||||
|
errflag2 = errflag;
|
||||||
|
dpotrf_("L", &_m2, D, &_m2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
D_dm->flag = M_LT;
|
D_dm->flag = M_LT;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -370,6 +403,7 @@ int invrtri(TSdmatrix *X_dm, TSdmatrix *A_dm, const char un)
|
||||||
int _n, errflag=2; //errflat=0 implies successful decomposition. But we start with 2 so as to let dgetri_ export a correct flag.
|
int _n, errflag=2; //errflat=0 implies successful decomposition. But we start with 2 so as to let dgetri_ export a correct flag.
|
||||||
double *X, *A;
|
double *X, *A;
|
||||||
|
|
||||||
|
lapack_int _n2, errflag2;
|
||||||
|
|
||||||
if ( !X_dm || !A_dm ) fn_DisplayError(".../mathlib.c/invrtri(): Both input matrices must be created (memory-allocated)");
|
if ( !X_dm || !A_dm ) fn_DisplayError(".../mathlib.c/invrtri(): Both input matrices must be created (memory-allocated)");
|
||||||
else if ( !(A_dm->flag & (M_UT | M_LT)) ) fn_DisplayError(".../mathlib.c/invrtri(): (1) R input matrix A must be given legal values; (2) A must be a real triangular matrix, i.e., M_UT or M_LT");
|
else if ( !(A_dm->flag & (M_UT | M_LT)) ) fn_DisplayError(".../mathlib.c/invrtri(): (1) R input matrix A must be given legal values; (2) A must be a real triangular matrix, i.e., M_UT or M_LT");
|
||||||
|
@ -381,14 +415,17 @@ int invrtri(TSdmatrix *X_dm, TSdmatrix *A_dm, const char un)
|
||||||
if ( (_n != A_dm->ncols) || (_n != X_dm->nrows) || (_n != X_dm->ncols) )
|
if ( (_n != A_dm->ncols) || (_n != X_dm->nrows) || (_n != X_dm->ncols) )
|
||||||
fn_DisplayError(".../mathlib.c/invrtri(): both input and output matrices (1) must be square and (2) must have the same dimension");
|
fn_DisplayError(".../mathlib.c/invrtri(): both input and output matrices (1) must be square and (2) must have the same dimension");
|
||||||
|
|
||||||
|
_n2 = _n;
|
||||||
|
errflag2 = errflag;
|
||||||
if (X==A) {
|
if (X==A) {
|
||||||
dtrtri_((A_dm->flag & M_UT) ? "U" : "L", (un=='U' || un=='u') ? "U" : "N", &_n, X, &_n, &errflag);
|
dtrtri_((A_dm->flag & M_UT) ? "U" : "L", (un=='U' || un=='u') ? "U" : "N", &_n2, X, &_n2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
if (errflag) return (errflag);
|
if (errflag) return (errflag);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
memcpy(X, A, _n*_n*sizeof(double));
|
memcpy(X, A, _n*_n*sizeof(double));
|
||||||
dtrtri_((A_dm->flag & M_UT) ? "U" : "L", (un=='U' || un=='u') ? "U" : "N", &_n, X, &_n, &errflag);
|
dtrtri_((A_dm->flag & M_UT) ? "U" : "L", (un=='U' || un=='u') ? "U" : "N", &_n2, X, &_n2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
if (errflag) return (errflag);
|
if (errflag) return (errflag);
|
||||||
else X_dm->flag = A_dm->flag;
|
else X_dm->flag = A_dm->flag;
|
||||||
}
|
}
|
||||||
|
@ -419,6 +456,7 @@ int invspd(TSdmatrix *X_dm, TSdmatrix *A_dm, const char ul)
|
||||||
int _n, errflag=2; //errflat=0 implies successful decomposition. But we start with 2 so as to let dgetri_ export a correct flag.
|
int _n, errflag=2; //errflat=0 implies successful decomposition. But we start with 2 so as to let dgetri_ export a correct flag.
|
||||||
double *X, *A;
|
double *X, *A;
|
||||||
|
|
||||||
|
lapack_int _n2, errflag2;
|
||||||
|
|
||||||
if ( !X_dm || !A_dm ) fn_DisplayError(".../mathlib.c/invspd(): Both input matrices must be created (memory-allocated)");
|
if ( !X_dm || !A_dm ) fn_DisplayError(".../mathlib.c/invspd(): Both input matrices must be created (memory-allocated)");
|
||||||
else if ( !(A_dm->flag & (M_SU | M_SL)) ) fn_DisplayError(".../mathlib.c/invspd(): (1) R input matrix A must be given legal values; (2) A must be symmetric, positive-definite, i.e., M_SU or M_SL");
|
else if ( !(A_dm->flag & (M_SU | M_SL)) ) fn_DisplayError(".../mathlib.c/invspd(): (1) R input matrix A must be given legal values; (2) A must be symmetric, positive-definite, i.e., M_SU or M_SL");
|
||||||
|
@ -433,10 +471,14 @@ int invspd(TSdmatrix *X_dm, TSdmatrix *A_dm, const char ul)
|
||||||
if ( (_n != A_dm->ncols) )
|
if ( (_n != A_dm->ncols) )
|
||||||
fn_DisplayError(".../mathlib.c/invspd(): input matrix (1) must be square and (2) must have the same dimension");
|
fn_DisplayError(".../mathlib.c/invspd(): input matrix (1) must be square and (2) must have the same dimension");
|
||||||
//=== Choleski decomposition.
|
//=== Choleski decomposition.
|
||||||
dpotrf_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n, X, &_n, &errflag);
|
_n2 = _n;
|
||||||
|
errflag2 = errflag;
|
||||||
|
dpotrf_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n2, X, &_n2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
if (errflag) return (errflag);
|
if (errflag) return (errflag);
|
||||||
//=== Takes inverse.
|
//=== Takes inverse.
|
||||||
dpotri_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n, X, &_n, &errflag);
|
dpotri_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n2, X, &_n2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
A_dm->flag = ((ul=='U') || (ul=='u')) ? M_SU : M_SL;
|
A_dm->flag = ((ul=='U') || (ul=='u')) ? M_SU : M_SL;
|
||||||
return (errflag);
|
return (errflag);
|
||||||
//---
|
//---
|
||||||
|
@ -448,10 +490,14 @@ int invspd(TSdmatrix *X_dm, TSdmatrix *A_dm, const char ul)
|
||||||
fn_DisplayError(".../mathlib.c/invspd(): both input and output matrices (1) must be square and (2) must have the same dimension");
|
fn_DisplayError(".../mathlib.c/invspd(): both input and output matrices (1) must be square and (2) must have the same dimension");
|
||||||
memcpy(X, A, _n*_n*sizeof(double));
|
memcpy(X, A, _n*_n*sizeof(double));
|
||||||
//=== Choleski decomposition.
|
//=== Choleski decomposition.
|
||||||
dpotrf_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n, X, &_n, &errflag);
|
_n2 = _n;
|
||||||
|
errflag2 = errflag;
|
||||||
|
dpotrf_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n2, X, &_n2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
if (errflag) return (errflag);
|
if (errflag) return (errflag);
|
||||||
//=== Takes inverse.
|
//=== Takes inverse.
|
||||||
dpotri_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n, X, &_n, &errflag);
|
dpotri_(((ul=='U') || (ul=='u')) ? "U" : "L", &_n2, X, &_n2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
X_dm->flag = ((ul=='U') || (ul=='u')) ? M_SU : M_SL;
|
X_dm->flag = ((ul=='U') || (ul=='u')) ? M_SU : M_SL;
|
||||||
return (errflag);
|
return (errflag);
|
||||||
//---
|
//---
|
||||||
|
@ -478,17 +524,18 @@ int invrgen(TSdmatrix *X_dm, TSdmatrix *A_dm)
|
||||||
//Inputs:
|
//Inputs:
|
||||||
// A: _n-by-_n real general matrix.
|
// A: _n-by-_n real general matrix.
|
||||||
int _n, errflag=2, //errflat=0 implies successful decomposition. But we start with 2 so as to let dgetri_ export a correct flag.
|
int _n, errflag=2, //errflat=0 implies successful decomposition. But we start with 2 so as to let dgetri_ export a correct flag.
|
||||||
lwork, *ipivot; //Used when calling LAPACK.
|
lwork; //Used when calling LAPACK.
|
||||||
double *X, *A,
|
double *X, *A,
|
||||||
*work; //Used when calling LAPACK.
|
*work; //Used when calling LAPACK.
|
||||||
|
|
||||||
|
lapack_int _n2, errflag2, *ipivot, lwork2;
|
||||||
|
|
||||||
if ( !X_dm || !A_dm ) fn_DisplayError(".../mathlib.c/invrgen(): Both input matrices must be created (memory-allocated)");
|
if ( !X_dm || !A_dm ) fn_DisplayError(".../mathlib.c/invrgen(): Both input matrices must be created (memory-allocated)");
|
||||||
else if ( !(A_dm->flag & M_GE) ) fn_DisplayError(".../mathlib.c/invrgen(): (1) R input matrix A must be given legal values; (2) A must be a general matrix, i.e., M_GE");
|
else if ( !(A_dm->flag & M_GE) ) fn_DisplayError(".../mathlib.c/invrgen(): (1) R input matrix A must be given legal values; (2) A must be a general matrix, i.e., M_GE");
|
||||||
else {
|
else {
|
||||||
X = X_dm->M;
|
X = X_dm->M;
|
||||||
A = A_dm->M;
|
A = A_dm->M;
|
||||||
ipivot = tzMalloc((_n=A_dm->nrows), int);
|
ipivot = tzMalloc((_n=A_dm->nrows), lapack_int);
|
||||||
work = tzMalloc((lwork=_n*BLOCKSIZE_FOR_INTEL_MKL), double);
|
work = tzMalloc((lwork=_n*BLOCKSIZE_FOR_INTEL_MKL), double);
|
||||||
}
|
}
|
||||||
if ( (_n != A_dm->ncols) || (_n != X_dm->nrows) || (_n != X_dm->ncols) )
|
if ( (_n != A_dm->ncols) || (_n != X_dm->nrows) || (_n != X_dm->ncols) )
|
||||||
|
@ -496,14 +543,19 @@ int invrgen(TSdmatrix *X_dm, TSdmatrix *A_dm)
|
||||||
|
|
||||||
|
|
||||||
if (X==A) {
|
if (X==A) {
|
||||||
dgetrf_(&_n, &_n, A, &_n, ipivot, &errflag);
|
_n2 = _n;
|
||||||
|
errflag2 = errflag;
|
||||||
|
dgetrf_(&_n2, &_n2, A, &_n2, ipivot, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
if (errflag) {
|
if (errflag) {
|
||||||
// A_dm->flag = M_UNDEF;
|
// A_dm->flag = M_UNDEF;
|
||||||
free(ipivot);
|
free(ipivot);
|
||||||
free(work);
|
free(work);
|
||||||
return errflag;
|
return errflag;
|
||||||
}
|
}
|
||||||
dgetri_(&_n, A, &_n, ipivot, work, &lwork, &errflag);
|
lwork2 = lwork;
|
||||||
|
dgetri_(&_n2, A, &_n2, ipivot, work, &lwork2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
if (work[0]>lwork) printf("Warning for /mathlib.c/invrgen(); when calling MKL dgetri_(), we need at least %d workspace for good performance "
|
if (work[0]>lwork) printf("Warning for /mathlib.c/invrgen(); when calling MKL dgetri_(), we need at least %d workspace for good performance "
|
||||||
"but lwork is allocated with only %d space!\n", (int)work[0], lwork);
|
"but lwork is allocated with only %d space!\n", (int)work[0], lwork);
|
||||||
if (errflag) {
|
if (errflag) {
|
||||||
|
@ -514,14 +566,19 @@ int invrgen(TSdmatrix *X_dm, TSdmatrix *A_dm)
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
memcpy(X, A, _n*_n*sizeof(double));
|
memcpy(X, A, _n*_n*sizeof(double));
|
||||||
dgetrf_(&_n, &_n, X, &_n, ipivot, &errflag);
|
_n2 = _n;
|
||||||
|
errflag2 = errflag;
|
||||||
|
dgetrf_(&_n2, &_n2, X, &_n2, ipivot, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
if (errflag) {
|
if (errflag) {
|
||||||
// X_dm->flag = M_UNDEF;
|
// X_dm->flag = M_UNDEF;
|
||||||
free(ipivot);
|
free(ipivot);
|
||||||
free(work);
|
free(work);
|
||||||
return errflag;
|
return errflag;
|
||||||
}
|
}
|
||||||
dgetri_(&_n, X, &_n, ipivot, work, &lwork, &errflag);
|
lwork2 = lwork;
|
||||||
|
dgetri_(&_n2, X, &_n2, ipivot, work, &lwork2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
if (work[0]>lwork) printf("Warning for /mathlib.c/invrgen(); when calling MKL dgetri_(), we need at least %d workspace for good performance "
|
if (work[0]>lwork) printf("Warning for /mathlib.c/invrgen(); when calling MKL dgetri_(), we need at least %d workspace for good performance "
|
||||||
"but lwork is allocated with only %d space!\n", (int)work[0], lwork);
|
"but lwork is allocated with only %d space!\n", (int)work[0], lwork);
|
||||||
if (errflag) {
|
if (errflag) {
|
||||||
|
@ -565,7 +622,7 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
|
||||||
// _r-by-_m real rectangular (rrect) matrix if / (XA=B).
|
// _r-by-_m real rectangular (rrect) matrix if / (XA=B).
|
||||||
// lr: if lr='\\', left division \ is performed; if lr='/', right division / is performed.
|
// lr: if lr='\\', left division \ is performed; if lr='/', right division / is performed.
|
||||||
|
|
||||||
int _m, _n, _r, //mn_max, mn_min,
|
int _m, _n, _r, i, //mn_max, mn_min,
|
||||||
lwork, _i, info = -2,
|
lwork, _i, info = -2,
|
||||||
*jpvt_p = NULL;
|
*jpvt_p = NULL;
|
||||||
double *A, *B, *X,
|
double *A, *B, *X,
|
||||||
|
@ -574,6 +631,9 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
|
||||||
*tau_p = NULL,
|
*tau_p = NULL,
|
||||||
*work_p = NULL;
|
*work_p = NULL;
|
||||||
|
|
||||||
|
lapack_int _m2, _n2, _r2, lwork2, info2;
|
||||||
|
lapack_int *jpvt_p2 = NULL;
|
||||||
|
|
||||||
if (!A_dm || !(A_dm->flag & M_GE) || !B_dm || !(B_dm->flag &M_GE)) fn_DisplayError(".../mathlib.c/BdivA_rrect(): both input matricies A_dm and B_dm must be (a) created (allocated memory) and (b) given legal values for all elements (in other words, the flag M_GE must exist)");
|
if (!A_dm || !(A_dm->flag & M_GE) || !B_dm || !(B_dm->flag &M_GE)) fn_DisplayError(".../mathlib.c/BdivA_rrect(): both input matricies A_dm and B_dm must be (a) created (allocated memory) and (b) given legal values for all elements (in other words, the flag M_GE must exist)");
|
||||||
if (!X_dm) fn_DisplayError(".../mathlib.c/BdivA_rrect(): output matrix X_dm must be created (allocated memory)");
|
if (!X_dm) fn_DisplayError(".../mathlib.c/BdivA_rrect(): output matrix X_dm must be created (allocated memory)");
|
||||||
if (lr=='/') {
|
if (lr=='/') {
|
||||||
|
@ -603,6 +663,7 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
|
||||||
// qrb_p = tzMalloc((mn_max = _m>_n?_m:_n)*_r, double); //DDDDebug: seems requiring _m>_n, but this may not be the case.
|
// qrb_p = tzMalloc((mn_max = _m>_n?_m:_n)*_r, double); //DDDDebug: seems requiring _m>_n, but this may not be the case.
|
||||||
qrb_p = tzMalloc(_m*_r, double); //Note that _m>=_n.
|
qrb_p = tzMalloc(_m*_r, double); //Note that _m>=_n.
|
||||||
jpvt_p = tzMalloc(_n, int);
|
jpvt_p = tzMalloc(_n, int);
|
||||||
|
jpvt_p2 = tzMalloc(_n, lapack_int);
|
||||||
tau_p = tzMalloc(_n, double);
|
tau_p = tzMalloc(_n, double);
|
||||||
// work_p = tzMalloc(lwork, double);
|
// work_p = tzMalloc(lwork, double);
|
||||||
|
|
||||||
|
@ -622,7 +683,16 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
|
||||||
|
|
||||||
//=== Computes the QR factorization of a general m by n matrix with column pivoting using Level 3 BLAS.
|
//=== Computes the QR factorization of a general m by n matrix with column pivoting using Level 3 BLAS.
|
||||||
work_p = tzMalloc(lwork=_n*BLOCKSIZE_FOR_INTEL_MKL, double);
|
work_p = tzMalloc(lwork=_n*BLOCKSIZE_FOR_INTEL_MKL, double);
|
||||||
dgeqp3_(&_m,&_n,qra_p,&_m,jpvt_p,tau_p,work_p,&lwork,&info);
|
_m2 = _m;
|
||||||
|
_n2 = _n;
|
||||||
|
lwork2 = lwork;
|
||||||
|
info2 = info;
|
||||||
|
for(i=0; i<_n; i++)
|
||||||
|
jpvt_p2[i] = jpvt_p[i];
|
||||||
|
dgeqp3_(&_m2,&_n2,qra_p,&_m2,jpvt_p2,tau_p,work_p,&lwork2,&info2);
|
||||||
|
info = info2;
|
||||||
|
for(i=0; i<_n; i++)
|
||||||
|
jpvt_p[i] = jpvt_p2[i];
|
||||||
if (work_p[0]>lwork) printf("Warning for /mathlib.c/BdivA_rrect(); when calling MKL dgeqp3_(), we need at least %d workspace for good performance "
|
if (work_p[0]>lwork) printf("Warning for /mathlib.c/BdivA_rrect(); when calling MKL dgeqp3_(), we need at least %d workspace for good performance "
|
||||||
"but lwork is allocated with only %d space!\n", (int)work_p[0], lwork);
|
"but lwork is allocated with only %d space!\n", (int)work_p[0], lwork);
|
||||||
tzDestroy(work_p);
|
tzDestroy(work_p);
|
||||||
|
@ -630,7 +700,13 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
|
||||||
|
|
||||||
//=== Multiplies a real matrix by the orthogonal matrix Q of the QR factorization formed by dgeqp3_.
|
//=== Multiplies a real matrix by the orthogonal matrix Q of the QR factorization formed by dgeqp3_.
|
||||||
work_p = tzMalloc(lwork=_r*BLOCKSIZE_FOR_INTEL_MKL, double);
|
work_p = tzMalloc(lwork=_r*BLOCKSIZE_FOR_INTEL_MKL, double);
|
||||||
dormqr_("L","T",&_m,&_r,&_n,qra_p,&_m,tau_p,qrb_p,&_m,work_p,&lwork,&info);
|
_m2 = _m;
|
||||||
|
_n2 = _n;
|
||||||
|
_r2 = _r;
|
||||||
|
lwork2 = lwork;
|
||||||
|
info2 = info;
|
||||||
|
dormqr_("L","T",&_m2,&_r2,&_n2,qra_p,&_m2,tau_p,qrb_p,&_m2,work_p,&lwork2,&info2);
|
||||||
|
info = info2;
|
||||||
if (work_p[0]>lwork) printf("Warning for /mathlib.c/BdivA_rrect(); when calling MKL dormqr_(), we need at least %d workspace for good performance "
|
if (work_p[0]>lwork) printf("Warning for /mathlib.c/BdivA_rrect(); when calling MKL dormqr_(), we need at least %d workspace for good performance "
|
||||||
"but lwork is allocated with only %d space!\n", (int)work_p[0], lwork);
|
"but lwork is allocated with only %d space!\n", (int)work_p[0], lwork);
|
||||||
//dormqr_("L","T",&_m,&_r,&mn_min,qra_p,&_m,tau_p,qrb_p,&mn_max,work_p,&lwork,&info);
|
//dormqr_("L","T",&_m,&_r,&mn_min,qra_p,&_m,tau_p,qrb_p,&mn_max,work_p,&lwork,&info);
|
||||||
|
@ -653,6 +729,7 @@ int BdivA_rrect(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
|
||||||
tzDestroy(qra_p);
|
tzDestroy(qra_p);
|
||||||
tzDestroy(qrb_p);
|
tzDestroy(qrb_p);
|
||||||
tzDestroy(jpvt_p);
|
tzDestroy(jpvt_p);
|
||||||
|
tzDestroy(jpvt_p2);
|
||||||
tzDestroy(tau_p);
|
tzDestroy(tau_p);
|
||||||
// tzDestroy(work_p);
|
// tzDestroy(work_p);
|
||||||
|
|
||||||
|
@ -683,8 +760,7 @@ int BdivA_rgens(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
|
||||||
// lr: if lr='\\', left division \ is performed; if lr='/', right division / is performed.
|
// lr: if lr='\\', left division \ is performed; if lr='/', right division / is performed.
|
||||||
|
|
||||||
int _m, _r, m2,
|
int _m, _r, m2,
|
||||||
_i, info = -2,
|
_i, info = -2;
|
||||||
*ipiv_p = NULL;
|
|
||||||
double *A, *B, *X,
|
double *A, *B, *X,
|
||||||
*Atran_p = NULL, //Transpose of A if right division / takes place.
|
*Atran_p = NULL, //Transpose of A if right division / takes place.
|
||||||
*Btran_p = NULL, //Transpose of B if right division / takes place.
|
*Btran_p = NULL, //Transpose of B if right division / takes place.
|
||||||
|
@ -692,6 +768,10 @@ int BdivA_rgens(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
|
||||||
// *tau_p = NULL,
|
// *tau_p = NULL,
|
||||||
// *work_p = NULL;
|
// *work_p = NULL;
|
||||||
|
|
||||||
|
|
||||||
|
lapack_int _mlap, _rlap, info2, *ipiv_p = NULL;
|
||||||
|
|
||||||
|
|
||||||
if (!A_dm || !(A_dm->flag & M_GE) || !B_dm || !(B_dm->flag & M_GE)) fn_DisplayError(".../mathlib.c/BdivA_rgens(): both input matricies A_dm and B_dm must be (a) created (allocated memory) and (b) given legal values for all elements (in other words, the flag M_GE must exist)");
|
if (!A_dm || !(A_dm->flag & M_GE) || !B_dm || !(B_dm->flag & M_GE)) fn_DisplayError(".../mathlib.c/BdivA_rgens(): both input matricies A_dm and B_dm must be (a) created (allocated memory) and (b) given legal values for all elements (in other words, the flag M_GE must exist)");
|
||||||
if (!X_dm) fn_DisplayError(".../mathlib.c/BdivA_rgens(): output matrix X_dm must be created (allocated memory)");
|
if (!X_dm) fn_DisplayError(".../mathlib.c/BdivA_rgens(): output matrix X_dm must be created (allocated memory)");
|
||||||
if (lr=='/') {
|
if (lr=='/') {
|
||||||
|
@ -717,7 +797,7 @@ int BdivA_rgens(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
|
||||||
if (lr=='/') {
|
if (lr=='/') {
|
||||||
//Right divistion /.
|
//Right divistion /.
|
||||||
//=== Memory allocation for this function only.
|
//=== Memory allocation for this function only.
|
||||||
ipiv_p = tzMalloc(_m, int);
|
ipiv_p = tzMalloc(_m, lapack_int);
|
||||||
Atran_p = tzMalloc(square(_m), double);
|
Atran_p = tzMalloc(square(_m), double);
|
||||||
Btran_p = tzMalloc(_m*_r, double);
|
Btran_p = tzMalloc(_m*_r, double);
|
||||||
|
|
||||||
|
@ -725,7 +805,11 @@ int BdivA_rgens(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
|
||||||
cblas_dcopy(_m, A+_i*_m, 1, Atran_p+_i, _m); //Copying the transpose of A to Atran.
|
cblas_dcopy(_m, A+_i*_m, 1, Atran_p+_i, _m); //Copying the transpose of A to Atran.
|
||||||
cblas_dcopy(_r, B+_i*_r, 1, Btran_p+_i, _m); //Copying the transpose of B (_r-by-_m) to Btran (_m-by-_r);
|
cblas_dcopy(_r, B+_i*_r, 1, Btran_p+_i, _m); //Copying the transpose of B (_r-by-_m) to Btran (_m-by-_r);
|
||||||
}
|
}
|
||||||
dgesv_(&_m, &_r, Atran_p, &_m, ipiv_p, Btran_p, &_m, &info);
|
_mlap = _m;
|
||||||
|
_rlap = _r;
|
||||||
|
info2 = info;
|
||||||
|
dgesv_(&_mlap, &_rlap, Atran_p, &_mlap, ipiv_p, Btran_p, &_mlap, &info2);
|
||||||
|
info = info2;
|
||||||
for (_i=0; _i<_r; _i++) cblas_dcopy(_m, Btran_p+_i*_m, 1, X+_i, _r); //Copying the transpose of Btran(_m-by-_r) to X (_r-by-_m);
|
for (_i=0; _i<_r; _i++) cblas_dcopy(_m, Btran_p+_i*_m, 1, X+_i, _r); //Copying the transpose of Btran(_m-by-_r) to X (_r-by-_m);
|
||||||
X_dm->flag = M_GE;
|
X_dm->flag = M_GE;
|
||||||
|
|
||||||
|
@ -739,12 +823,16 @@ int BdivA_rgens(TSdmatrix *X_dm, const TSdmatrix *B_dm, const char lr, const TSd
|
||||||
else {
|
else {
|
||||||
//Left division \.
|
//Left division \.
|
||||||
//=== Memory allocation for this function only.
|
//=== Memory allocation for this function only.
|
||||||
ipiv_p = tzMalloc(_m, int);
|
ipiv_p = tzMalloc(_m, lapack_int);
|
||||||
W = tzMalloc(m2=square(_m), double);
|
W = tzMalloc(m2=square(_m), double);
|
||||||
|
|
||||||
memcpy(X, B, _m*_r*sizeof(double));
|
memcpy(X, B, _m*_r*sizeof(double));
|
||||||
memcpy(W, A, m2*sizeof(double));
|
memcpy(W, A, m2*sizeof(double));
|
||||||
dgesv_(&_m, &_r, W, &_m, ipiv_p, X, &_m, &info);
|
_mlap = _m;
|
||||||
|
_rlap = _r;
|
||||||
|
info2 = info;
|
||||||
|
dgesv_(&_mlap, &_rlap, W, &_mlap, ipiv_p, X, &_mlap, &info2);
|
||||||
|
info = info2;
|
||||||
X_dm->flag = M_GE;
|
X_dm->flag = M_GE;
|
||||||
|
|
||||||
//=== Destroyes memory allocated for this function only.
|
//=== Destroyes memory allocated for this function only.
|
||||||
|
@ -778,12 +866,13 @@ int bdivA_rgens(TSdvector *x_dv, const TSdvector *b_dv, const char lr, const TSd
|
||||||
|
|
||||||
int _m, m2,
|
int _m, m2,
|
||||||
_r = 1,
|
_r = 1,
|
||||||
_i, info = -2,
|
_i, info = -2;
|
||||||
*ipiv_p = NULL;
|
|
||||||
double *A, *b, *x,
|
double *A, *b, *x,
|
||||||
*Atran_p = NULL, //Transpose of A if right division / takes place.
|
*Atran_p = NULL, //Transpose of A if right division / takes place.
|
||||||
*W = NULL; //Duplicate copy of A when left division \ is used. This will be replaced by LU decomposition.
|
*W = NULL; //Duplicate copy of A when left division \ is used. This will be replaced by LU decomposition.
|
||||||
|
|
||||||
|
lapack_int _mlap, _rlap, info2, *ipiv_p = NULL;
|
||||||
|
|
||||||
if (!A_dm || !(A_dm->flag & M_GE) || !b_dv || !b_dv->flag) fn_DisplayError("mathlib.c/bdivA_rgens(): Both A_dm and b_dv must be (a) created (allocated memory) and (b) given legal values for all elements (in other words, the flag M_GE must exist)");
|
if (!A_dm || !(A_dm->flag & M_GE) || !b_dv || !b_dv->flag) fn_DisplayError("mathlib.c/bdivA_rgens(): Both A_dm and b_dv must be (a) created (allocated memory) and (b) given legal values for all elements (in other words, the flag M_GE must exist)");
|
||||||
if (!x_dv) fn_DisplayError("mathlib.c/bdivA_rgens(): output vector x_dv must be created (allocated memory)");
|
if (!x_dv) fn_DisplayError("mathlib.c/bdivA_rgens(): output vector x_dv must be created (allocated memory)");
|
||||||
if ( b_dv->n != x_dv->n ) fn_DisplayError("mathlib.c/bdivA_rgens(): The dim of b_dv and the dim of x_dv must exactly match");
|
if ( b_dv->n != x_dv->n ) fn_DisplayError("mathlib.c/bdivA_rgens(): The dim of b_dv and the dim of x_dv must exactly match");
|
||||||
|
@ -797,13 +886,17 @@ int bdivA_rgens(TSdvector *x_dv, const TSdvector *b_dv, const char lr, const TSd
|
||||||
if (lr=='/') {
|
if (lr=='/') {
|
||||||
//Right divistion /.
|
//Right divistion /.
|
||||||
//=== Memory allocation for this function only.
|
//=== Memory allocation for this function only.
|
||||||
ipiv_p = tzMalloc(_m, int);
|
ipiv_p = tzMalloc(_m, lapack_int);
|
||||||
Atran_p = tzMalloc(square(_m), double);
|
Atran_p = tzMalloc(square(_m), double);
|
||||||
|
|
||||||
for (_i=0; _i<_m; _i++)
|
for (_i=0; _i<_m; _i++)
|
||||||
cblas_dcopy(_m, A+_i*_m, 1, Atran_p+_i, _m); //Copying the transpose of A to Atran.
|
cblas_dcopy(_m, A+_i*_m, 1, Atran_p+_i, _m); //Copying the transpose of A to Atran.
|
||||||
if (x_dv != b_dv ) memcpy(x, b, _m*sizeof(double));
|
if (x_dv != b_dv ) memcpy(x, b, _m*sizeof(double));
|
||||||
dgesv_(&_m, &_r, Atran_p, &_m, ipiv_p, x, &_m, &info);
|
_mlap = _m;
|
||||||
|
_rlap = _r;
|
||||||
|
info2 = info;
|
||||||
|
dgesv_(&_mlap, &_rlap, Atran_p, &_mlap, ipiv_p, x, &_mlap, &info2);
|
||||||
|
info = info2;
|
||||||
x_dv->flag = V_DEF;
|
x_dv->flag = V_DEF;
|
||||||
|
|
||||||
//=== Destroyes memory allocated for this function only.
|
//=== Destroyes memory allocated for this function only.
|
||||||
|
@ -815,12 +908,16 @@ int bdivA_rgens(TSdvector *x_dv, const TSdvector *b_dv, const char lr, const TSd
|
||||||
else {
|
else {
|
||||||
//Left division \.
|
//Left division \.
|
||||||
//=== Memory allocation for this function only.
|
//=== Memory allocation for this function only.
|
||||||
ipiv_p = tzMalloc(_m, int);
|
ipiv_p = tzMalloc(_m, lapack_int);
|
||||||
W = tzMalloc(m2=square(_m), double);
|
W = tzMalloc(m2=square(_m), double);
|
||||||
|
|
||||||
if (x_dv != b_dv ) memcpy(x, b, _m*sizeof(double));
|
if (x_dv != b_dv ) memcpy(x, b, _m*sizeof(double));
|
||||||
memcpy(W, A, m2*sizeof(double));
|
memcpy(W, A, m2*sizeof(double));
|
||||||
dgesv_(&_m, &_r, W, &_m, ipiv_p, x, &_m, &info);
|
_mlap = _m;
|
||||||
|
_rlap = _r;
|
||||||
|
info2 = info;
|
||||||
|
dgesv_(&_mlap, &_rlap, W, &_mlap, ipiv_p, x, &_mlap, &info2);
|
||||||
|
info = info2;
|
||||||
x_dv->flag = V_DEF;
|
x_dv->flag = V_DEF;
|
||||||
|
|
||||||
//=== Destroyes memory allocated for this function only.
|
//=== Destroyes memory allocated for this function only.
|
||||||
|
@ -859,6 +956,8 @@ void Aldivb_spd(TSdvector *x_dv, TSdmatrix *A_dm, TSdvector *b_dv, char an) {
|
||||||
int errflag=2, nrows, nels; //errflat=0 implies successful decomposition. But we start with 2 so as to let dpotrf_ export a correct flag.
|
int errflag=2, nrows, nels; //errflat=0 implies successful decomposition. But we start with 2 so as to let dpotrf_ export a correct flag.
|
||||||
double *A, *W=NULL, *x, *b;
|
double *A, *W=NULL, *x, *b;
|
||||||
|
|
||||||
|
lapack_int nrows2, errflag2;
|
||||||
|
|
||||||
if ( !A_dm || !b_dv || !x_dv ) fn_DisplayError(".../mathlib.c/Aldivb_spd(): All input matrices or vectors must be created (memory allocated)");
|
if ( !A_dm || !b_dv || !x_dv ) fn_DisplayError(".../mathlib.c/Aldivb_spd(): All input matrices or vectors must be created (memory allocated)");
|
||||||
nrows = A_dm->nrows;
|
nrows = A_dm->nrows;
|
||||||
nels = square(nrows);
|
nels = square(nrows);
|
||||||
|
@ -876,7 +975,10 @@ void Aldivb_spd(TSdvector *x_dv, TSdmatrix *A_dm, TSdvector *b_dv, char an) {
|
||||||
|
|
||||||
|
|
||||||
if (A_dm->flag & M_SU) {
|
if (A_dm->flag & M_SU) {
|
||||||
dpotrf_("U", &nrows, W, &nrows, &errflag); //Choleski. U'*U = W where W will be replaced by upper triangular U.
|
nrows2 = nrows;
|
||||||
|
errflag2 = errflag;
|
||||||
|
dpotrf_("U", &nrows2, W, &nrows2, &errflag2); //Choleski. U'*U = W where W will be replaced by upper triangular U.
|
||||||
|
errflag = errflag2;
|
||||||
if (errflag) fn_DisplayError(".../mathlib.c/Aldivb_spd(): Error when calling Choleski dpotrf_(). Check if the L input matrix A_dm is positive definite or has legal values");
|
if (errflag) fn_DisplayError(".../mathlib.c/Aldivb_spd(): Error when calling Choleski dpotrf_(). Check if the L input matrix A_dm is positive definite or has legal values");
|
||||||
if (x==b) {
|
if (x==b) {
|
||||||
//=== Solving for A*x=b.
|
//=== Solving for A*x=b.
|
||||||
|
@ -892,7 +994,10 @@ void Aldivb_spd(TSdvector *x_dv, TSdmatrix *A_dm, TSdvector *b_dv, char an) {
|
||||||
if ( (an!='N') && (an!='n') ) A_dm->flag = M_UT;
|
if ( (an!='N') && (an!='n') ) A_dm->flag = M_UT;
|
||||||
}
|
}
|
||||||
else if (A_dm->flag & M_SL) { //?????????? Not tested yet.
|
else if (A_dm->flag & M_SL) { //?????????? Not tested yet.
|
||||||
dpotrf_("L", &nrows, W, &nrows, &errflag); //Choleski. L*L' = W where W will be replaced by lower triangular L.
|
nrows2 = nrows;
|
||||||
|
errflag2 = errflag;
|
||||||
|
dpotrf_("L", &nrows2, W, &nrows2, &errflag2); //Choleski. L*L' = W where W will be replaced by lower triangular L.
|
||||||
|
errflag = errflag2;
|
||||||
if (errflag) fn_DisplayError(".../mathlib.c/Aldivb_spd(): Error when calling Choleski dpotrf_(). Check if the L input matrix A_dm is positive definite or has legal values");
|
if (errflag) fn_DisplayError(".../mathlib.c/Aldivb_spd(): Error when calling Choleski dpotrf_(). Check if the L input matrix A_dm is positive definite or has legal values");
|
||||||
if (x==b) {
|
if (x==b) {
|
||||||
//=== Solving for A*x=b.
|
//=== Solving for A*x=b.
|
||||||
|
@ -1053,12 +1158,18 @@ int eigrsym_decomp(double *eval_v, double *evec_m, const double *s_m, const int
|
||||||
double *tmpd0_m=tzMalloc(square(_n), double),
|
double *tmpd0_m=tzMalloc(square(_n), double),
|
||||||
*work_p=tzMalloc(lwork, double);
|
*work_p=tzMalloc(lwork, double);
|
||||||
|
|
||||||
|
lapack_int n2, lwork2, errflag2;
|
||||||
|
|
||||||
//---------------------------
|
//---------------------------
|
||||||
// Obtains eigenvalues and, optionally, eigenvectors.
|
// Obtains eigenvalues and, optionally, eigenvectors.
|
||||||
//---------------------------
|
//---------------------------
|
||||||
memcpy(tmpd0_m, s_m, square(_n)*sizeof(double));
|
memcpy(tmpd0_m, s_m, square(_n)*sizeof(double));
|
||||||
dsyev_( (evec_m) ? "V" : "N", ((ul=='u') || (ul=='U')) ? "U" : "L", &n1, tmpd0_m, &n1, eval_v, work_p, &lwork, &errflag);
|
|
||||||
|
n2 = n1;
|
||||||
|
lwork2 = lwork;
|
||||||
|
errflag2 = errflag;
|
||||||
|
dsyev_( (evec_m) ? "V" : "N", ((ul=='u') || (ul=='U')) ? "U" : "L", &n2, tmpd0_m, &n2, eval_v, work_p, &lwork2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
if (evec_m) memcpy(evec_m, tmpd0_m, square(_n)*sizeof(double));
|
if (evec_m) memcpy(evec_m, tmpd0_m, square(_n)*sizeof(double));
|
||||||
|
|
||||||
|
|
||||||
|
@ -1096,6 +1207,8 @@ int eigrgen_decomp(double *evalr_v, double *evali_v, double *revecr_m, double *r
|
||||||
double *tmpd0_m=tzMalloc(square(_n), double), //@@Must be freed in this function.@@
|
double *tmpd0_m=tzMalloc(square(_n), double), //@@Must be freed in this function.@@
|
||||||
*work_p=tzMalloc(lwork, double); //@@Must be freed in this function.@@
|
*work_p=tzMalloc(lwork, double); //@@Must be freed in this function.@@
|
||||||
|
|
||||||
|
lapack_int n2, lwork2, errflag2;
|
||||||
|
|
||||||
//---------------------------
|
//---------------------------
|
||||||
// Starts with x_m -- the matrix to be decomposed.
|
// Starts with x_m -- the matrix to be decomposed.
|
||||||
//---------------------------
|
//---------------------------
|
||||||
|
@ -1104,8 +1217,12 @@ int eigrgen_decomp(double *evalr_v, double *evali_v, double *revecr_m, double *r
|
||||||
//---------------------------
|
//---------------------------
|
||||||
// Obtains eigenvalues and, optionally, eigenvectors.
|
// Obtains eigenvalues and, optionally, eigenvectors.
|
||||||
//---------------------------
|
//---------------------------
|
||||||
dgeev_( (levecr_m) ? "V" : "N", (revecr_m) ? "V" : "N", &n1, tmpd0_m, &n1, evalr_v, evali_v,
|
n2 = n1;
|
||||||
levecr_m, &n1, revecr_m, &n1, work_p, &lwork, &errflag);
|
lwork2 = lwork;
|
||||||
|
errflag2 = errflag;
|
||||||
|
dgeev_( (levecr_m) ? "V" : "N", (revecr_m) ? "V" : "N", &n2, tmpd0_m, &n2, evalr_v, evali_v,
|
||||||
|
levecr_m, &n2, revecr_m, &n2, work_p, &lwork2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
|
|
||||||
//---------------------------
|
//---------------------------
|
||||||
// Frees the allocated memory.
|
// Frees the allocated memory.
|
||||||
|
@ -1164,6 +1281,8 @@ int chol_decomp(double *D, const double *s_m, const int _n, const char ul) {
|
||||||
#ifdef INTELCMATHLIBRARY //Intel MKL Lapack dependent code.
|
#ifdef INTELCMATHLIBRARY //Intel MKL Lapack dependent code.
|
||||||
int errflag=2, _m=_n, _i, _j, tmpi0; //errflat=0 implies successful decomposition. But we start with 2 so as to let dpotrf_ export a correct flag.
|
int errflag=2, _m=_n, _i, _j, tmpi0; //errflat=0 implies successful decomposition. But we start with 2 so as to let dpotrf_ export a correct flag.
|
||||||
|
|
||||||
|
lapack_int _m2, errflag2;
|
||||||
|
|
||||||
//=== Fills the triangular part that is used for Choleski decomposition.
|
//=== Fills the triangular part that is used for Choleski decomposition.
|
||||||
|
|
||||||
switch (ul) {
|
switch (ul) {
|
||||||
|
@ -1188,7 +1307,10 @@ int chol_decomp(double *D, const double *s_m, const int _n, const char ul) {
|
||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
//=== Choleski decomposition.
|
//=== Choleski decomposition.
|
||||||
dpotrf_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m, D, &_m, &errflag);
|
_m2 = _m;
|
||||||
|
errflag2 = errflag;
|
||||||
|
dpotrf_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m2, D, &_m2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
//---
|
//---
|
||||||
// if (errflag<0) fn_DisplayError("Some element has an illegal value");
|
// if (errflag<0) fn_DisplayError("Some element has an illegal value");
|
||||||
// else if (errflag>0) fn_DisplayError("The leadding minor of some order, hence the entire matrix, is not positive definite");
|
// else if (errflag>0) fn_DisplayError("The leadding minor of some order, hence the entire matrix, is not positive definite");
|
||||||
|
@ -1227,6 +1349,8 @@ int inv_spd(double *D, const double *s_m, const int _n, const char ul) {
|
||||||
|
|
||||||
int errflag=2, _m=_n, _i, _j, tmpi0; //errflat=0 implies successful decomposition. But we start with 2 so as to let dpotrf_ export a correct flag.
|
int errflag=2, _m=_n, _i, _j, tmpi0; //errflat=0 implies successful decomposition. But we start with 2 so as to let dpotrf_ export a correct flag.
|
||||||
|
|
||||||
|
lapack_int _m2, errflag2;
|
||||||
|
|
||||||
//=== Fills the triangular part that is used for Choleski decomposition.
|
//=== Fills the triangular part that is used for Choleski decomposition.
|
||||||
switch (ul) {
|
switch (ul) {
|
||||||
case 'u': case 'U':
|
case 'u': case 'U':
|
||||||
|
@ -1251,10 +1375,14 @@ int inv_spd(double *D, const double *s_m, const int _n, const char ul) {
|
||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
//=== Choleski decomposition.
|
//=== Choleski decomposition.
|
||||||
dpotrf_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m, D, &_m, &errflag);
|
_m2 = _m;
|
||||||
|
errflag2 = errflag;
|
||||||
|
dpotrf_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m2, D, &_m2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
if (errflag) return (errflag);
|
if (errflag) return (errflag);
|
||||||
//=== Takes inverse.
|
//=== Takes inverse.
|
||||||
dpotri_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m, D, &_m, &errflag);
|
dpotri_(((ul=='u') || (ul=='U')) ? "U" : "L", &_m2, D, &_m2, &errflag2);
|
||||||
|
errflag = errflag2;
|
||||||
return (errflag);
|
return (errflag);
|
||||||
//---
|
//---
|
||||||
// if (errflag<0) fn_DisplayError("Some element has an illegal value");
|
// if (errflag<0) fn_DisplayError("Some element has an illegal value");
|
||||||
|
@ -2891,7 +3019,7 @@ void ScalarTimesColofMatrix(TSdvector *y_dv, double _alpha, TSdmatrix *X_dm, int
|
||||||
y_dv->flag = V_DEF;
|
y_dv->flag = V_DEF;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
Need to be tested for the following.
|
// Need to be tested for the following.
|
||||||
//
|
//
|
||||||
// M = X_dm->M + (_j+1)*(nrows=X_dm->nrows) - 1; //Points to the end of the jth column.
|
// M = X_dm->M + (_j+1)*(nrows=X_dm->nrows) - 1; //Points to the end of the jth column.
|
||||||
// if (!y_dv)
|
// if (!y_dv)
|
||||||
|
@ -3896,7 +4024,7 @@ void CopySubrowmatrix(TSdmatrix *x1_dm, const int br1, const int bc1, TSdmatrix
|
||||||
else fn_DisplayError(".../mathlib.c/CopySubrowmatrix(): the submatrix of x2_dm must be within the range of itself as well as x1_dm");
|
else fn_DisplayError(".../mathlib.c/CopySubrowmatrix(): the submatrix of x2_dm must be within the range of itself as well as x1_dm");
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
Havent got time to code up the default using Linux BLAS.
|
// Havent got time to code up the default using Linux BLAS.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
@ -3930,7 +4058,7 @@ void CopySubmatrix2rowmatrix(TSdmatrix *x1_dm, const int br1, const int bc1, TSd
|
||||||
else fn_DisplayError(".../mathlib.c/CopySubmatrix2rowmatrix(): the submatrix of x2_dm must be within the range of x2_dm and its transpose must be within the range of x1_dm");
|
else fn_DisplayError(".../mathlib.c/CopySubmatrix2rowmatrix(): the submatrix of x2_dm must be within the range of x2_dm and its transpose must be within the range of x1_dm");
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
Havent got time to code up the default using Linux BLAS.
|
// Havent got time to code up the default using Linux BLAS.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
@ -3965,7 +4093,7 @@ void CopySubrowmatrix2matrix(TSdmatrix *x1_dm, const int br1, const int bc1, TSd
|
||||||
else fn_DisplayError(".../mathlib.c/CopySubrowmatrix2matrix(): the submatrix of x2_dm must be within the range of itself as well as x1_dm");
|
else fn_DisplayError(".../mathlib.c/CopySubrowmatrix2matrix(): the submatrix of x2_dm must be within the range of itself as well as x1_dm");
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
Havent got time to code up the default using Linux BLAS.
|
// Havent got time to code up the default using Linux BLAS.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
@ -4208,7 +4336,7 @@ void CopySubvector2rowmatrix(TSdmatrix *x1_dm, const int br, const int bc, const
|
||||||
else fn_DisplayError(".../mathlib.c/CopySubvector2rowmatrix(): Copying (copied) elements are outside the (row) dimension of the copying vector x2_dv (the copied matrix x1_dm)");
|
else fn_DisplayError(".../mathlib.c/CopySubvector2rowmatrix(): Copying (copied) elements are outside the (row) dimension of the copying vector x2_dv (the copied matrix x1_dm)");
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
Havent got time to code up the default using Linux BLAS.
|
// Havent got time to code up the default using Linux BLAS.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -54,7 +54,9 @@ extern "C" {
|
||||||
typedef const char *BLCHAR;
|
typedef const char *BLCHAR;
|
||||||
typedef const blas_int *CONST_BLINT;
|
typedef const blas_int *CONST_BLINT;
|
||||||
typedef const double *CONST_BLDOU;
|
typedef const double *CONST_BLDOU;
|
||||||
|
typedef const float *CONST_BLFLT;
|
||||||
typedef double *BLDOU;
|
typedef double *BLDOU;
|
||||||
|
typedef float *BLFLT;
|
||||||
|
|
||||||
#define dgemm FORTRAN_WRAPPER(dgemm)
|
#define dgemm FORTRAN_WRAPPER(dgemm)
|
||||||
void dgemm(BLCHAR transa, BLCHAR transb, CONST_BLINT m, CONST_BLINT n,
|
void dgemm(BLCHAR transa, BLCHAR transb, CONST_BLINT m, CONST_BLINT n,
|
||||||
|
@ -62,6 +64,12 @@ extern "C" {
|
||||||
CONST_BLDOU b, CONST_BLINT ldb, CONST_BLDOU beta,
|
CONST_BLDOU b, CONST_BLINT ldb, CONST_BLDOU beta,
|
||||||
BLDOU c, CONST_BLINT ldc);
|
BLDOU c, CONST_BLINT ldc);
|
||||||
|
|
||||||
|
#define sgemm FORTRAN_WRAPPER(sgemm)
|
||||||
|
void sgemm(BLCHAR transa, BLCHAR transb, CONST_BLINT m, CONST_BLINT n,
|
||||||
|
CONST_BLINT k, CONST_BLFLT alpha, CONST_BLFLT a, CONST_BLINT lda,
|
||||||
|
CONST_BLFLT b, CONST_BLINT ldb, CONST_BLFLT beta,
|
||||||
|
BLFLT c, CONST_BLINT ldc);
|
||||||
|
|
||||||
#define dsymm FORTRAN_WRAPPER(dsymm)
|
#define dsymm FORTRAN_WRAPPER(dsymm)
|
||||||
void dsymm(BLCHAR side, BLCHAR uplo, CONST_BLINT m, CONST_BLINT n,
|
void dsymm(BLCHAR side, BLCHAR uplo, CONST_BLINT m, CONST_BLINT n,
|
||||||
CONST_BLDOU alpha, CONST_BLDOU a, CONST_BLINT lda,
|
CONST_BLDOU alpha, CONST_BLDOU a, CONST_BLINT lda,
|
||||||
|
@ -85,6 +93,10 @@ extern "C" {
|
||||||
void daxpy(CONST_BLINT n, CONST_BLDOU a, CONST_BLDOU x, CONST_BLINT incx,
|
void daxpy(CONST_BLINT n, CONST_BLDOU a, CONST_BLDOU x, CONST_BLINT incx,
|
||||||
BLDOU y, CONST_BLINT incy);
|
BLDOU y, CONST_BLINT incy);
|
||||||
|
|
||||||
|
#define saxpy FORTRAN_WRAPPER(saxpy)
|
||||||
|
void saxpy(CONST_BLINT n, CONST_BLFLT a, CONST_BLFLT x, CONST_BLINT incx,
|
||||||
|
BLFLT y, CONST_BLINT incy);
|
||||||
|
|
||||||
#define dcopy FORTRAN_WRAPPER(dcopy)
|
#define dcopy FORTRAN_WRAPPER(dcopy)
|
||||||
void dcopy(CONST_BLINT n, CONST_BLDOU x, CONST_BLINT incx,
|
void dcopy(CONST_BLINT n, CONST_BLDOU x, CONST_BLINT incx,
|
||||||
BLDOU y, CONST_BLINT incy);
|
BLDOU y, CONST_BLINT incy);
|
||||||
|
@ -96,6 +108,9 @@ extern "C" {
|
||||||
#define dscal FORTRAN_WRAPPER(dscal)
|
#define dscal FORTRAN_WRAPPER(dscal)
|
||||||
void dscal(CONST_BLINT n, CONST_BLDOU a, BLDOU x, CONST_BLINT incx);
|
void dscal(CONST_BLINT n, CONST_BLDOU a, BLDOU x, CONST_BLINT incx);
|
||||||
|
|
||||||
|
#define sscal FORTRAN_WRAPPER(sscal)
|
||||||
|
void sscal(CONST_BLINT n, CONST_BLDOU a, BLFLT x, CONST_BLINT incx);
|
||||||
|
|
||||||
#define dtrsm FORTRAN_WRAPPER(dtrsm)
|
#define dtrsm FORTRAN_WRAPPER(dtrsm)
|
||||||
void dtrsm(BLCHAR side, BLCHAR uplo, BLCHAR transa, BLCHAR diag, CONST_BLINT m,
|
void dtrsm(BLCHAR side, BLCHAR uplo, BLCHAR transa, BLCHAR diag, CONST_BLINT m,
|
||||||
CONST_BLINT n, CONST_BLDOU alpha, CONST_BLDOU a, CONST_BLINT lda,
|
CONST_BLINT n, CONST_BLDOU alpha, CONST_BLDOU a, CONST_BLINT lda,
|
||||||
|
|
|
@ -57,6 +57,10 @@ extern "C" {
|
||||||
typedef const double *CONST_LADOU;
|
typedef const double *CONST_LADOU;
|
||||||
typedef double *LADOU;
|
typedef double *LADOU;
|
||||||
typedef lapack_int (*DGGESCRIT)(const double *, const double *, const double *);
|
typedef lapack_int (*DGGESCRIT)(const double *, const double *, const double *);
|
||||||
|
typedef lapack_int (*SGGESCRIT)(const float *, const float *, const float *);
|
||||||
|
typedef float *LAFLT;
|
||||||
|
typedef const float *CONST_LAFLT;
|
||||||
|
typedef lapack_int *CONST_LALOG; //logical
|
||||||
|
|
||||||
#define dgetrs FORTRAN_WRAPPER(dgetrs)
|
#define dgetrs FORTRAN_WRAPPER(dgetrs)
|
||||||
void dgetrs(LACHAR trans, CONST_LAINT n, CONST_LAINT nrhs, CONST_LADOU a, CONST_LAINT lda, CONST_LAINT ipiv,
|
void dgetrs(LACHAR trans, CONST_LAINT n, CONST_LAINT nrhs, CONST_LADOU a, CONST_LAINT lda, CONST_LAINT ipiv,
|
||||||
|
@ -66,6 +70,14 @@ extern "C" {
|
||||||
void dgetrf(CONST_LAINT m, CONST_LAINT n, LADOU a,
|
void dgetrf(CONST_LAINT m, CONST_LAINT n, LADOU a,
|
||||||
CONST_LAINT lda, LAINT ipiv, LAINT info);
|
CONST_LAINT lda, LAINT ipiv, LAINT info);
|
||||||
|
|
||||||
|
#define dgetri FORTRAN_WRAPPER(dgetri)
|
||||||
|
void dgetri(CONST_LAINT n, LADOU a, CONST_LAINT lda, CONST_LAINT ipiv, LADOU work,
|
||||||
|
CONST_LAINT lwork, LAINT info);
|
||||||
|
|
||||||
|
#define sgetrf FORTRAN_WRAPPER(sgetrf)
|
||||||
|
void sgetrf(CONST_LAINT m, CONST_LAINT n, LAFLT a,
|
||||||
|
CONST_LAINT lda, LAINT ipiv, LAINT info);
|
||||||
|
|
||||||
#define dgees FORTRAN_WRAPPER(dgees)
|
#define dgees FORTRAN_WRAPPER(dgees)
|
||||||
void dgees(LACHAR jobvs, LACHAR sort, const void *select,
|
void dgees(LACHAR jobvs, LACHAR sort, const void *select,
|
||||||
CONST_LAINT n, LADOU a, CONST_LAINT lda, LAINT sdim,
|
CONST_LAINT n, LADOU a, CONST_LAINT lda, LAINT sdim,
|
||||||
|
@ -92,6 +104,14 @@ extern "C" {
|
||||||
void dpotrf(LACHAR uplo, CONST_LAINT n, LADOU a, CONST_LAINT lda,
|
void dpotrf(LACHAR uplo, CONST_LAINT n, LADOU a, CONST_LAINT lda,
|
||||||
LAINT info);
|
LAINT info);
|
||||||
|
|
||||||
|
#define dpotri FORTRAN_WRAPPER(dpotri)
|
||||||
|
void dpotri(LACHAR uplo, CONST_LAINT n, LADOU a, CONST_LAINT lda,
|
||||||
|
LAINT info);
|
||||||
|
|
||||||
|
#define dtrtri FORTRAN_WRAPPER(dtrtri)
|
||||||
|
void dtrtri(LACHAR uplo, LACHAR diag, CONST_LAINT n, LADOU a, CONST_LAINT lda,
|
||||||
|
LAINT info);
|
||||||
|
|
||||||
#define dgges FORTRAN_WRAPPER(dgges)
|
#define dgges FORTRAN_WRAPPER(dgges)
|
||||||
void dgges(LACHAR jobvsl, LACHAR jobvsr, LACHAR sort, DGGESCRIT delztg,
|
void dgges(LACHAR jobvsl, LACHAR jobvsr, LACHAR sort, DGGESCRIT delztg,
|
||||||
CONST_LAINT n, LADOU a, CONST_LAINT lda, LADOU b, CONST_LAINT ldb,
|
CONST_LAINT n, LADOU a, CONST_LAINT lda, LADOU b, CONST_LAINT ldb,
|
||||||
|
@ -99,6 +119,13 @@ extern "C" {
|
||||||
LADOU vsl, CONST_LAINT ldvsl, LADOU vsr, CONST_LAINT ldvsr,
|
LADOU vsl, CONST_LAINT ldvsl, LADOU vsr, CONST_LAINT ldvsr,
|
||||||
LADOU work, CONST_LAINT lwork, LAINT bwork, LAINT info);
|
LADOU work, CONST_LAINT lwork, LAINT bwork, LAINT info);
|
||||||
|
|
||||||
|
#define sgges FORTRAN_WRAPPER(sgges)
|
||||||
|
void sgges(LACHAR jobvsl, LACHAR jobvsr, LACHAR sort, SGGESCRIT delztg,
|
||||||
|
CONST_LAINT n, LAFLT a, CONST_LAINT lda, LAFLT b, CONST_LAINT ldb,
|
||||||
|
LAINT sdim, LAFLT alphar, LAFLT alphai, LAFLT beta,
|
||||||
|
LAFLT vsl, CONST_LAINT ldvsl, LAFLT vsr, CONST_LAINT ldvsr,
|
||||||
|
LAFLT work, CONST_LAINT lwork, LAINT bwork, LAINT info);
|
||||||
|
|
||||||
#define dsyev FORTRAN_WRAPPER(dsyev)
|
#define dsyev FORTRAN_WRAPPER(dsyev)
|
||||||
void dsyev(LACHAR jobz, LACHAR uplo, CONST_LAINT n, LADOU a, CONST_LAINT lda,
|
void dsyev(LACHAR jobz, LACHAR uplo, CONST_LAINT n, LADOU a, CONST_LAINT lda,
|
||||||
LADOU w, LADOU work, CONST_LAINT lwork, LAINT info);
|
LADOU w, LADOU work, CONST_LAINT lwork, LAINT info);
|
||||||
|
@ -114,11 +141,96 @@ extern "C" {
|
||||||
void dgeqrf(CONST_LAINT m, CONST_LAINT n, LADOU a, CONST_LAINT lda,
|
void dgeqrf(CONST_LAINT m, CONST_LAINT n, LADOU a, CONST_LAINT lda,
|
||||||
LADOU tau, LADOU work, CONST_LAINT lwork, LAINT info);
|
LADOU tau, LADOU work, CONST_LAINT lwork, LAINT info);
|
||||||
|
|
||||||
|
#define sgeqrf FORTRAN_WRAPPER(sgeqrf)
|
||||||
|
void sgeqrf(CONST_LAINT m, CONST_LAINT n, LAFLT a, CONST_LAINT lda,
|
||||||
|
LAFLT tau, LAFLT work, CONST_LAINT lwork, LAINT info);
|
||||||
|
|
||||||
#define dormqr FORTRAN_WRAPPER(dormqr)
|
#define dormqr FORTRAN_WRAPPER(dormqr)
|
||||||
void dormqr(LACHAR side, LACHAR trans, CONST_LAINT m, CONST_LAINT n, CONST_LAINT k,
|
void dormqr(LACHAR side, LACHAR trans, CONST_LAINT m, CONST_LAINT n, CONST_LAINT k,
|
||||||
CONST_LADOU a, CONST_LAINT lda, CONST_LADOU tau, LADOU c, CONST_LAINT ldc,
|
CONST_LADOU a, CONST_LAINT lda, CONST_LADOU tau, LADOU c, CONST_LAINT ldc,
|
||||||
LADOU work, CONST_LAINT lwork, LAINT info);
|
LADOU work, CONST_LAINT lwork, LAINT info);
|
||||||
|
|
||||||
|
#define dorgqr FORTRAN_WRAPPER(dorgqr)
|
||||||
|
void dorgqr(CONST_LAINT m, CONST_LAINT n, CONST_LAINT k, LADOU a, CONST_LAINT lda,
|
||||||
|
CONST_LADOU tau, LADOU work, CONST_LAINT lwork, LAINT info);
|
||||||
|
|
||||||
|
#define sorgqr FORTRAN_WRAPPER(sorgqr)
|
||||||
|
void sorgqr(CONST_LAINT m, CONST_LAINT n, CONST_LAINT k, LAFLT a, CONST_LAINT lda,
|
||||||
|
CONST_LAFLT tau, LAFLT work, CONST_LAINT lwork, LAINT info);
|
||||||
|
|
||||||
|
#define dgelqf FORTRAN_WRAPPER(dgelqf)
|
||||||
|
void dgelqf(CONST_LAINT m, CONST_LAINT n, LADOU a, CONST_LAINT lda, LADOU tau,
|
||||||
|
LADOU work, CONST_LAINT lwork, LAINT info);
|
||||||
|
|
||||||
|
#define sgelqf FORTRAN_WRAPPER(sgelqf)
|
||||||
|
void sgelqf(CONST_LAINT m, CONST_LAINT n, LAFLT a, CONST_LAINT lda, LAFLT tau,
|
||||||
|
LAFLT work, CONST_LAINT lwork, LAINT info);
|
||||||
|
|
||||||
|
#define dorglq FORTRAN_WRAPPER(dorglq)
|
||||||
|
void dorglq(CONST_LAINT m, CONST_LAINT n, CONST_LAINT k, LADOU a, CONST_LAINT lda,
|
||||||
|
CONST_LADOU tau, LADOU work, CONST_LAINT lwork, LAINT info);
|
||||||
|
|
||||||
|
#define sorglq FORTRAN_WRAPPER(sorglq)
|
||||||
|
void sorglq(CONST_LAINT m, CONST_LAINT n, CONST_LAINT k, LAFLT a, CONST_LAINT lda,
|
||||||
|
CONST_LAFLT tau, LAFLT work, CONST_LAINT lwork, LAINT info);
|
||||||
|
|
||||||
|
#define dgesv FORTRAN_WRAPPER(dgesv)
|
||||||
|
void dgesv(CONST_LAINT n, CONST_LAINT nrhs, LADOU a, CONST_LAINT lda, LAINT ipiv,
|
||||||
|
LADOU b, CONST_LAINT ldb, LAINT info);
|
||||||
|
|
||||||
|
#define dgesvd FORTRAN_WRAPPER(dgesvd)
|
||||||
|
void dgesvd(LACHAR jobu, LACHAR jobvt, CONST_LAINT m, CONST_LAINT n, LADOU a, CONST_LAINT lda,
|
||||||
|
LADOU s, LADOU u, CONST_LAINT ldu, LADOU vt, CONST_LAINT ldvt, LADOU work,
|
||||||
|
CONST_LAINT lwork, LAINT info);
|
||||||
|
|
||||||
|
#define sgesvd FORTRAN_WRAPPER(sgesvd)
|
||||||
|
void sgesvd(LACHAR jobu, LACHAR jobvt, CONST_LAINT m, CONST_LAINT n, LAFLT a, CONST_LAINT lda,
|
||||||
|
LAFLT s, LAFLT u, CONST_LAINT ldu, LAFLT vt, CONST_LAINT ldvt, LAFLT work,
|
||||||
|
CONST_LAINT lwork, LAINT info);
|
||||||
|
|
||||||
|
#define dtgsen FORTRAN_WRAPPER(dtgsen)
|
||||||
|
void dtgsen(CONST_LAINT ijob, CONST_LALOG wantq, CONST_LALOG wantz, CONST_LALOG select,
|
||||||
|
CONST_LAINT n, LADOU a, CONST_LAINT lda, LADOU b, CONST_LAINT ldb,
|
||||||
|
LADOU alphar, LADOU alphai, LADOU beta, LADOU q, CONST_LAINT ldq,
|
||||||
|
LADOU z, CONST_LAINT ldz, LAINT m, LADOU pl, LADOU pr, LADOU dif, LADOU work,
|
||||||
|
CONST_LAINT lwork, LAINT iwork, CONST_LAINT liwork, LAINT info);
|
||||||
|
|
||||||
|
#define stgsen FORTRAN_WRAPPER(stgsen)
|
||||||
|
void stgsen(CONST_LAINT ijob, CONST_LALOG wantq, CONST_LALOG wantz, CONST_LALOG select,
|
||||||
|
CONST_LAINT n, LAFLT a, CONST_LAINT lda, LAFLT b, CONST_LAINT ldb,
|
||||||
|
LAFLT alphar, LAFLT alphai, LAFLT beta, LAFLT q, CONST_LAINT ldq,
|
||||||
|
LAFLT z, CONST_LAINT ldz, LAINT m, LAFLT pl, LAFLT pr, LAFLT dif, LAFLT work,
|
||||||
|
CONST_LAINT lwork, LAINT iwork, CONST_LAINT liwork, LAINT info);
|
||||||
|
|
||||||
|
#define dtgexc FORTRAN_WRAPPER(dtgexc)
|
||||||
|
void dtgexc(CONST_LALOG wantq, CONST_LALOG wantz, CONST_LAINT n, LADOU a, CONST_LAINT lda,
|
||||||
|
LADOU b, CONST_LAINT ldb, LADOU q, CONST_LAINT ldq, LADOU z, CONST_LAINT ldz,
|
||||||
|
LAINT ifst, LAINT ilst, LADOU work, CONST_LAINT lwork, LAINT info);
|
||||||
|
|
||||||
|
#define stgexc FORTRAN_WRAPPER(stgexc)
|
||||||
|
void stgexc(CONST_LALOG wantq, CONST_LALOG wantz, CONST_LAINT n, LAFLT a, CONST_LAINT lda,
|
||||||
|
LAFLT b, CONST_LAINT ldb, LAFLT q, CONST_LAINT ldq, LAFLT z, CONST_LAINT ldz,
|
||||||
|
LAINT ifst, LAINT ilst, LAFLT work, CONST_LAINT lwork, LAINT info);
|
||||||
|
|
||||||
|
#define dgesdd FORTRAN_WRAPPER(dgesdd)
|
||||||
|
void dgesdd(LACHAR jobz, CONST_LAINT m, CONST_LAINT n, LADOU a, CONST_LAINT lda,
|
||||||
|
LADOU s, LADOU u, CONST_LAINT ldu, LADOU vt, CONST_LAINT ldvt, LADOU work,
|
||||||
|
CONST_LAINT lwork, LAINT iwork, LAINT info);
|
||||||
|
|
||||||
|
#define sgesdd FORTRAN_WRAPPER(sgesdd)
|
||||||
|
void sgesdd(LACHAR jobz, CONST_LAINT m, CONST_LAINT n, LAFLT a, CONST_LAINT lda,
|
||||||
|
LAFLT s, LAFLT u, CONST_LAINT ldu, LAFLT vt, CONST_LAINT ldvt, LAFLT work,
|
||||||
|
CONST_LAINT lwork, LAINT iwork, LAINT info);
|
||||||
|
|
||||||
|
#define dgeev FORTRAN_WRAPPER(dgeev)
|
||||||
|
void dgeev(LACHAR jobvl, LACHAR jobvr, CONST_LAINT n, LADOU a, CONST_LAINT lda,
|
||||||
|
LADOU wr, LADOU wi, LADOU vl, CONST_LAINT ldvl, LADOU vr, CONST_LAINT ldvr,
|
||||||
|
LADOU work, CONST_LAINT lwork, LAINT info);
|
||||||
|
|
||||||
|
#define dgeqp3 FORTRAN_WRAPPER(dgeqp3)
|
||||||
|
void dgeqp3(CONST_LAINT m, CONST_LAINT n, LADOU a, CONST_LAINT lda, LAINT jpvt, LADOU tau,
|
||||||
|
LADOU work, CONST_LAINT lwork, LAINT info);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "C" */
|
} /* extern "C" */
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue