673 lines
25 KiB
Fortran
673 lines
25 KiB
Fortran
SUBROUTINE SB04PD( DICO, FACTA, FACTB, TRANA, TRANB, ISGN, M, N,
|
|
$ A, LDA, U, LDU, B, LDB, V, LDV, C, LDC, SCALE,
|
|
$ DWORK, LDWORK, INFO )
|
|
C
|
|
C SLICOT RELEASE 5.0.
|
|
C
|
|
C Copyright (c) 2002-2009 NICONET e.V.
|
|
C
|
|
C This program is free software: you can redistribute it and/or
|
|
C modify it under the terms of the GNU General Public License as
|
|
C published by the Free Software Foundation, either version 2 of
|
|
C the License, or (at your option) any later version.
|
|
C
|
|
C This program is distributed in the hope that it will be useful,
|
|
C but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
C MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
C GNU General Public License for more details.
|
|
C
|
|
C You should have received a copy of the GNU General Public License
|
|
C along with this program. If not, see
|
|
C <http://www.gnu.org/licenses/>.
|
|
C
|
|
C PURPOSE
|
|
C
|
|
C To solve for X either the real continuous-time Sylvester equation
|
|
C
|
|
C op(A)*X + ISGN*X*op(B) = scale*C, (1)
|
|
C
|
|
C or the real discrete-time Sylvester equation
|
|
C
|
|
C op(A)*X*op(B) + ISGN*X = scale*C, (2)
|
|
C
|
|
C where op(M) = M or M**T, and ISGN = 1 or -1. A is M-by-M and
|
|
C B is N-by-N; the right hand side C and the solution X are M-by-N;
|
|
C and scale is an output scale factor, set less than or equal to 1
|
|
C to avoid overflow in X. The solution matrix X is overwritten
|
|
C onto C.
|
|
C
|
|
C If A and/or B are not (upper) quasi-triangular, that is, block
|
|
C upper triangular with 1-by-1 and 2-by-2 diagonal blocks, they are
|
|
C reduced to Schur canonical form, that is, quasi-triangular with
|
|
C each 2-by-2 diagonal block having its diagonal elements equal and
|
|
C its off-diagonal elements of opposite sign.
|
|
C
|
|
C ARGUMENTS
|
|
C
|
|
C Mode Parameters
|
|
C
|
|
C DICO CHARACTER*1
|
|
C Specifies the equation from which X is to be determined
|
|
C as follows:
|
|
C = 'C': Equation (1), continuous-time case;
|
|
C = 'D': Equation (2), discrete-time case.
|
|
C
|
|
C FACTA CHARACTER*1
|
|
C Specifies whether or not the real Schur factorization
|
|
C of the matrix A is supplied on entry, as follows:
|
|
C = 'F': On entry, A and U contain the factors from the
|
|
C real Schur factorization of the matrix A;
|
|
C = 'N': The Schur factorization of A will be computed
|
|
C and the factors will be stored in A and U;
|
|
C = 'S': The matrix A is quasi-triangular (or Schur).
|
|
C
|
|
C FACTB CHARACTER*1
|
|
C Specifies whether or not the real Schur factorization
|
|
C of the matrix B is supplied on entry, as follows:
|
|
C = 'F': On entry, B and V contain the factors from the
|
|
C real Schur factorization of the matrix B;
|
|
C = 'N': The Schur factorization of B will be computed
|
|
C and the factors will be stored in B and V;
|
|
C = 'S': The matrix B is quasi-triangular (or Schur).
|
|
C
|
|
C TRANA CHARACTER*1
|
|
C Specifies the form of op(A) to be used, as follows:
|
|
C = 'N': op(A) = A (No transpose);
|
|
C = 'T': op(A) = A**T (Transpose);
|
|
C = 'C': op(A) = A**T (Conjugate transpose = Transpose).
|
|
C
|
|
C TRANB CHARACTER*1
|
|
C Specifies the form of op(B) to be used, as follows:
|
|
C = 'N': op(B) = B (No transpose);
|
|
C = 'T': op(B) = B**T (Transpose);
|
|
C = 'C': op(B) = B**T (Conjugate transpose = Transpose).
|
|
C
|
|
C ISGN INTEGER
|
|
C Specifies the sign of the equation as described before.
|
|
C ISGN may only be 1 or -1.
|
|
C
|
|
C Input/Output Parameters
|
|
C
|
|
C M (input) INTEGER
|
|
C The order of the matrix A, and the number of rows in the
|
|
C matrices X and C. M >= 0.
|
|
C
|
|
C N (input) INTEGER
|
|
C The order of the matrix B, and the number of columns in
|
|
C the matrices X and C. N >= 0.
|
|
C
|
|
C A (input or input/output) DOUBLE PRECISION array,
|
|
C dimension (LDA,M)
|
|
C On entry, the leading M-by-M part of this array must
|
|
C contain the matrix A. If FACTA = 'S', then A contains
|
|
C a quasi-triangular matrix, and if FACTA = 'F', then A
|
|
C is in Schur canonical form; the elements below the upper
|
|
C Hessenberg part of the array A are not referenced.
|
|
C On exit, if FACTA = 'N', and INFO = 0 or INFO >= M+1, the
|
|
C leading M-by-M upper Hessenberg part of this array
|
|
C contains the upper quasi-triangular matrix in Schur
|
|
C canonical form from the Schur factorization of A. The
|
|
C contents of array A is not modified if FACTA = 'F' or 'S'.
|
|
C
|
|
C LDA INTEGER
|
|
C The leading dimension of array A. LDA >= MAX(1,M).
|
|
C
|
|
C U (input or output) DOUBLE PRECISION array, dimension
|
|
C (LDU,M)
|
|
C If FACTA = 'F', then U is an input argument and on entry
|
|
C the leading M-by-M part of this array must contain the
|
|
C orthogonal matrix U of the real Schur factorization of A.
|
|
C If FACTA = 'N', then U is an output argument and on exit,
|
|
C if INFO = 0 or INFO >= M+1, it contains the orthogonal
|
|
C M-by-M matrix from the real Schur factorization of A.
|
|
C If FACTA = 'S', the array U is not referenced.
|
|
C
|
|
C LDU INTEGER
|
|
C The leading dimension of array U.
|
|
C LDU >= MAX(1,M), if FACTA = 'F' or 'N';
|
|
C LDU >= 1, if FACTA = 'S'.
|
|
C
|
|
C B (input or input/output) DOUBLE PRECISION array,
|
|
C dimension (LDB,N)
|
|
C On entry, the leading N-by-N part of this array must
|
|
C contain the matrix B. If FACTB = 'S', then B contains
|
|
C a quasi-triangular matrix, and if FACTB = 'F', then B
|
|
C is in Schur canonical form; the elements below the upper
|
|
C Hessenberg part of the array B are not referenced.
|
|
C On exit, if FACTB = 'N', and INFO = 0 or INFO = M+N+1,
|
|
C the leading N-by-N upper Hessenberg part of this array
|
|
C contains the upper quasi-triangular matrix in Schur
|
|
C canonical form from the Schur factorization of B. The
|
|
C contents of array B is not modified if FACTB = 'F' or 'S'.
|
|
C
|
|
C LDB (input) INTEGER
|
|
C The leading dimension of the array B. LDB >= max(1,N).
|
|
C
|
|
C V (input or output) DOUBLE PRECISION array, dimension
|
|
C (LDV,N)
|
|
C If FACTB = 'F', then V is an input argument and on entry
|
|
C the leading N-by-N part of this array must contain the
|
|
C orthogonal matrix V of the real Schur factorization of B.
|
|
C If FACTB = 'N', then V is an output argument and on exit,
|
|
C if INFO = 0 or INFO = M+N+1, it contains the orthogonal
|
|
C N-by-N matrix from the real Schur factorization of B.
|
|
C If FACTB = 'S', the array V is not referenced.
|
|
C
|
|
C LDV INTEGER
|
|
C The leading dimension of array V.
|
|
C LDV >= MAX(1,N), if FACTB = 'F' or 'N';
|
|
C LDV >= 1, if FACTB = 'S'.
|
|
C
|
|
C C (input/output) DOUBLE PRECISION array, dimension (LDC,N)
|
|
C On entry, the leading M-by-N part of this array must
|
|
C contain the right hand side matrix C.
|
|
C On exit, if INFO = 0 or INFO = M+N+1, the leading M-by-N
|
|
C part of this array contains the solution matrix X.
|
|
C
|
|
C LDC INTEGER
|
|
C The leading dimension of array C. LDC >= MAX(1,M).
|
|
C
|
|
C SCALE (output) DOUBLE PRECISION
|
|
C The scale factor, scale, set less than or equal to 1 to
|
|
C prevent the solution overflowing.
|
|
C
|
|
C Workspace
|
|
C
|
|
C DWORK DOUBLE PRECISION array, dimension (LDWORK)
|
|
C On exit, if INFO = 0 or M+N+1, then: DWORK(1) returns the
|
|
C optimal value of LDWORK; if FACTA = 'N', DWORK(1+i) and
|
|
C DWORK(1+M+i), i = 1,...,M, contain the real and imaginary
|
|
C parts, respectively, of the eigenvalues of A; and, if
|
|
C FACTB = 'N', DWORK(1+f+j) and DWORK(1+f+N+j), j = 1,...,N,
|
|
C with f = 2*M if FACTA = 'N', and f = 0, otherwise, contain
|
|
C the real and imaginary parts, respectively, of the
|
|
C eigenvalues of B.
|
|
C
|
|
C LDWORK INTEGER
|
|
C The length of the array DWORK.
|
|
C LDWORK >= MAX( 1, a+MAX( c, b+d, b+e ) ),
|
|
C where a = 1+2*M, if FACTA = 'N',
|
|
C a = 0, if FACTA <> 'N',
|
|
C b = 2*N, if FACTB = 'N', FACTA = 'N',
|
|
C b = 1+2*N, if FACTB = 'N', FACTA <> 'N',
|
|
C b = 0, if FACTB <> 'N',
|
|
C c = 3*M, if FACTA = 'N',
|
|
C c = M, if FACTA = 'F',
|
|
C c = 0, if FACTA = 'S',
|
|
C d = 3*N, if FACTB = 'N',
|
|
C d = N, if FACTB = 'F',
|
|
C d = 0, if FACTB = 'S',
|
|
C e = M, if DICO = 'C', FACTA <> 'S',
|
|
C e = 0, if DICO = 'C', FACTA = 'S',
|
|
C e = 2*M, if DICO = 'D'.
|
|
C An upper bound is
|
|
C LDWORK = 1+2*M+MAX( 3*M, 5*N, 2*N+2*M ).
|
|
C For good performance, LDWORK should be larger, e.g.,
|
|
C LDWORK = 1+2*M+MAX( 3*M, 5*N, 2*N+2*M, 2*N+M*N ).
|
|
C
|
|
C Error Indicator
|
|
C
|
|
C INFO INTEGER
|
|
C = 0: successful exit;
|
|
C < 0: if INFO = -i, the i-th argument had an illegal
|
|
C value;
|
|
C = i: if INFO = i, i = 1,...,M, the QR algorithm failed
|
|
C to compute all the eigenvalues of the matrix A
|
|
C (see LAPACK Library routine DGEES); the elements
|
|
C 2+i:1+M and 2+i+M:1+2*M of DWORK contain the real
|
|
C and imaginary parts, respectively, of the
|
|
C eigenvalues of A which have converged, and the
|
|
C array A contains the partially converged Schur form;
|
|
C = M+j: if INFO = M+j, j = 1,...,N, the QR algorithm
|
|
C failed to compute all the eigenvalues of the matrix
|
|
C B (see LAPACK Library routine DGEES); the elements
|
|
C 2+f+j:1+f+N and 2+f+j+N:1+f+2*N of DWORK contain the
|
|
C real and imaginary parts, respectively, of the
|
|
C eigenvalues of B which have converged, and the
|
|
C array B contains the partially converged Schur form;
|
|
C as defined for the parameter DWORK,
|
|
C f = 2*M, if FACTA = 'N',
|
|
C f = 0, if FACTA <> 'N';
|
|
C = M+N+1: if DICO = 'C', and the matrices A and -ISGN*B
|
|
C have common or very close eigenvalues, or
|
|
C if DICO = 'D', and the matrices A and -ISGN*B have
|
|
C almost reciprocal eigenvalues (that is, if lambda(i)
|
|
C and mu(j) are eigenvalues of A and -ISGN*B, then
|
|
C lambda(i) = 1/mu(j) for some i and j);
|
|
C perturbed values were used to solve the equation
|
|
C (but the matrices A and B are unchanged).
|
|
C
|
|
C METHOD
|
|
C
|
|
C An extension and refinement of the algorithms in [1,2] is used.
|
|
C If the matrices A and/or B are not quasi-triangular (see PURPOSE),
|
|
C they are reduced to Schur canonical form
|
|
C
|
|
C A = U*S*U', B = V*T*V',
|
|
C
|
|
C where U, V are orthogonal, and S, T are block upper triangular
|
|
C with 1-by-1 and 2-by-2 blocks on their diagonal. The right hand
|
|
C side matrix C is updated accordingly,
|
|
C
|
|
C C = U'*C*V;
|
|
C
|
|
C then, the solution matrix X of the "reduced" Sylvester equation
|
|
C (with A and B in (1) or (2) replaced by S and T, respectively),
|
|
C is computed column-wise via a back substitution scheme. A set of
|
|
C equivalent linear algebraic systems of equations of order at most
|
|
C four are formed and solved using Gaussian elimination with
|
|
C complete pivoting. Finally, the solution X of the original
|
|
C equation is obtained from the updating formula
|
|
C
|
|
C X = U*X*V'.
|
|
C
|
|
C If A and/or B are already quasi-triangular (or in Schur form), the
|
|
C initial factorizations and the corresponding updating steps are
|
|
C omitted.
|
|
C
|
|
C REFERENCES
|
|
C
|
|
C [1] Bartels, R.H. and Stewart, G.W. T
|
|
C Solution of the matrix equation A X + XB = C.
|
|
C Comm. A.C.M., 15, pp. 820-826, 1972.
|
|
C
|
|
C [2] Anderson, E., Bai, Z., Bischof, C., Demmel, J., Dongarra, J.,
|
|
C Du Croz, J., Greenbaum, A., Hammarling, S., McKenney, A.,
|
|
C Ostrouchov, S., and Sorensen, D.
|
|
C LAPACK Users' Guide: Second Edition.
|
|
C SIAM, Philadelphia, 1995.
|
|
C
|
|
C NUMERICAL ASPECTS
|
|
C
|
|
C The algorithm is stable and reliable, since orthogonal
|
|
C transformations and Gaussian elimination with complete pivoting
|
|
C are used. If INFO = M+N+1, the Sylvester equation is numerically
|
|
C singular.
|
|
C
|
|
C CONTRIBUTORS
|
|
C
|
|
C D. Sima, University of Bucharest, April 2000.
|
|
C V. Sima, Research Institute for Informatics, Bucharest, Apr. 2000.
|
|
C
|
|
C REVISIONS
|
|
C
|
|
C -
|
|
C
|
|
C KEYWORDS
|
|
C
|
|
C Matrix algebra, orthogonal transformation, real Schur form,
|
|
C Sylvester equation.
|
|
C
|
|
C ******************************************************************
|
|
C
|
|
C .. Parameters ..
|
|
DOUBLE PRECISION ZERO, ONE
|
|
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
|
|
C ..
|
|
C .. Scalar Arguments ..
|
|
CHARACTER DICO, FACTA, FACTB, TRANA, TRANB
|
|
INTEGER INFO, ISGN, LDA, LDB, LDC, LDU, LDV, LDWORK, M,
|
|
$ N
|
|
DOUBLE PRECISION SCALE
|
|
C ..
|
|
C .. Array Arguments ..
|
|
DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ),
|
|
$ DWORK( * ), U( LDU, * ), V( LDV, * )
|
|
C ..
|
|
C .. Local Scalars ..
|
|
LOGICAL BLAS3A, BLAS3B, BLOCKA, BLOCKB, CONT, NOFACA,
|
|
$ NOFACB, NOTRNA, NOTRNB, SCHURA, SCHURB
|
|
INTEGER AVAILW, BL, CHUNKA, CHUNKB, I, IA, IB, IERR, J,
|
|
$ JWORK, MAXWRK, MINWRK, SDIM
|
|
C ..
|
|
C .. Local Arrays ..
|
|
LOGICAL BWORK( 1 )
|
|
C ..
|
|
C .. External Functions ..
|
|
LOGICAL LSAME, SELECT
|
|
EXTERNAL LSAME, SELECT
|
|
C ..
|
|
C .. External Subroutines ..
|
|
EXTERNAL DCOPY, DGEES, DGEMM, DGEMV, DLACPY, DTRSYL,
|
|
$ SB04PY, XERBLA
|
|
C ..
|
|
C .. Intrinsic Functions ..
|
|
INTRINSIC DBLE, INT, MAX, MIN
|
|
C ..
|
|
C .. Executable Statements ..
|
|
C
|
|
C Decode and Test input parameters
|
|
C
|
|
CONT = LSAME( DICO, 'C' )
|
|
NOFACA = LSAME( FACTA, 'N' )
|
|
NOFACB = LSAME( FACTB, 'N' )
|
|
SCHURA = LSAME( FACTA, 'S' )
|
|
SCHURB = LSAME( FACTB, 'S' )
|
|
NOTRNA = LSAME( TRANA, 'N' )
|
|
NOTRNB = LSAME( TRANB, 'N' )
|
|
C
|
|
INFO = 0
|
|
IF( .NOT.CONT .AND. .NOT.LSAME( DICO, 'D' ) ) THEN
|
|
INFO = -1
|
|
ELSE IF( .NOT.NOFACA .AND. .NOT.LSAME( FACTA, 'F' ) .AND.
|
|
$ .NOT.SCHURA ) THEN
|
|
INFO = -2
|
|
ELSE IF( .NOT.NOFACB .AND. .NOT.LSAME( FACTB, 'F' ) .AND.
|
|
$ .NOT.SCHURB ) THEN
|
|
INFO = -3
|
|
ELSE IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND.
|
|
$ .NOT.LSAME( TRANA, 'C' ) ) THEN
|
|
INFO = -4
|
|
ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND.
|
|
$ .NOT.LSAME( TRANB, 'C' ) ) THEN
|
|
INFO = -5
|
|
ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN
|
|
INFO = -6
|
|
ELSE IF( M.LT.0 ) THEN
|
|
INFO = -7
|
|
ELSE IF( N.LT.0 ) THEN
|
|
INFO = -8
|
|
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
|
|
INFO = -10
|
|
ELSE IF( LDU.LT.1 .OR. ( .NOT.SCHURA .AND. LDU.LT.M ) ) THEN
|
|
INFO = -12
|
|
ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
|
|
INFO = -14
|
|
ELSE IF( LDV.LT.1 .OR. ( .NOT.SCHURB .AND. LDV.LT.N ) ) THEN
|
|
INFO = -16
|
|
ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
|
|
INFO = -18
|
|
ELSE
|
|
IF ( NOFACA ) THEN
|
|
IA = 1 + 2*M
|
|
MINWRK = 3*M
|
|
ELSE
|
|
IA = 0
|
|
END IF
|
|
IF ( SCHURA ) THEN
|
|
MINWRK = 0
|
|
ELSE IF ( .NOT.NOFACA ) THEN
|
|
MINWRK = M
|
|
END IF
|
|
IB = 0
|
|
IF ( NOFACB ) THEN
|
|
IB = 2*N
|
|
IF ( .NOT.NOFACA )
|
|
$ IB = IB + 1
|
|
MINWRK = MAX( MINWRK, IB + 3*N )
|
|
ELSE IF ( .NOT.SCHURB ) THEN
|
|
MINWRK = MAX( MINWRK, N )
|
|
END IF
|
|
IF ( CONT ) THEN
|
|
IF ( .NOT.SCHURA )
|
|
$ MINWRK = MAX( MINWRK, IB + M )
|
|
ELSE
|
|
MINWRK = MAX( MINWRK, IB + 2*M )
|
|
END IF
|
|
MINWRK = MAX( 1, IA + MINWRK )
|
|
IF( LDWORK.LT.MINWRK )
|
|
$ INFO = -21
|
|
END IF
|
|
C
|
|
IF( INFO.NE.0 ) THEN
|
|
CALL XERBLA( 'SB04PD', -INFO )
|
|
RETURN
|
|
END IF
|
|
C
|
|
C Quick return if possible.
|
|
C
|
|
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
|
|
SCALE = ONE
|
|
DWORK( 1 ) = ONE
|
|
RETURN
|
|
END IF
|
|
MAXWRK = MINWRK
|
|
C
|
|
IF( NOFACA ) THEN
|
|
C
|
|
C Compute the Schur factorization of A.
|
|
C Workspace: need 1+5*M;
|
|
C prefer larger.
|
|
C (Note: Comments in the code beginning "Workspace:" describe the
|
|
C minimal amount of real workspace needed at that point in the
|
|
C code, as well as the preferred amount for good performance.
|
|
C NB refers to the optimal block size for the immediately
|
|
C following subroutine, as returned by ILAENV.)
|
|
C
|
|
JWORK = 2*M + 2
|
|
IA = JWORK
|
|
AVAILW = LDWORK - JWORK + 1
|
|
CALL DGEES( 'Vectors', 'Not ordered', SELECT, M, A, LDA, SDIM,
|
|
$ DWORK( 2 ), DWORK( M+2 ), U, LDU, DWORK( JWORK ),
|
|
$ AVAILW, BWORK, IERR )
|
|
IF( IERR.GT.0 ) THEN
|
|
INFO = IERR
|
|
RETURN
|
|
END IF
|
|
MAXWRK = MAX( MAXWRK, INT( DWORK( JWORK ) ) + JWORK - 1 )
|
|
ELSE
|
|
JWORK = 1
|
|
IA = 2
|
|
AVAILW = LDWORK
|
|
END IF
|
|
C
|
|
IF( .NOT.SCHURA ) THEN
|
|
C
|
|
C Transform the right-hand side: C <-- U'*C.
|
|
C Workspace: need a+M,
|
|
C prefer a+M*N,
|
|
C where a = 1+2*M, if FACTA = 'N',
|
|
C a = 0, if FACTA <> 'N'.
|
|
C
|
|
CHUNKA = AVAILW / M
|
|
BLOCKA = MIN( CHUNKA, N ).GT.1
|
|
BLAS3A = CHUNKA.GE.N .AND. BLOCKA
|
|
C
|
|
IF ( BLAS3A ) THEN
|
|
C
|
|
C Enough workspace for a fast BLAS 3 algorithm.
|
|
C
|
|
CALL DLACPY( 'Full', M, N, C, LDC, DWORK( JWORK ), M )
|
|
CALL DGEMM( 'Transpose', 'NoTranspose', M, N, M, ONE,
|
|
$ U, LDU, DWORK( JWORK ), M, ZERO, C, LDC )
|
|
ELSE IF ( BLOCKA ) THEN
|
|
C
|
|
C Use as many columns of C as possible.
|
|
C
|
|
DO 10 J = 1, N, CHUNKA
|
|
BL = MIN( N-J+1, CHUNKA )
|
|
CALL DLACPY( 'Full', M, BL, C( 1, J ), LDC,
|
|
$ DWORK( JWORK ), M )
|
|
CALL DGEMM( 'Transpose', 'NoTranspose', M, BL, M, ONE,
|
|
$ U, LDU, DWORK( JWORK ), M, ZERO, C( 1, J ),
|
|
$ LDC )
|
|
10 CONTINUE
|
|
C
|
|
ELSE
|
|
C
|
|
C Use a BLAS 2 algorithm.
|
|
C
|
|
DO 20 J = 1, N
|
|
CALL DCOPY( M, C( 1, J ), 1, DWORK( JWORK ), 1 )
|
|
CALL DGEMV( 'Transpose', M, M, ONE, U, LDU,
|
|
$ DWORK( JWORK ), 1, ZERO, C( 1, J ), 1 )
|
|
20 CONTINUE
|
|
C
|
|
END IF
|
|
MAXWRK = MAX( MAXWRK, JWORK + M*N - 1 )
|
|
END IF
|
|
C
|
|
IF( NOFACB ) THEN
|
|
C
|
|
C Compute the Schur factorization of B.
|
|
C Workspace: need 1+MAX(a-1,0)+5*N,
|
|
C prefer larger.
|
|
C
|
|
JWORK = IA + 2*N
|
|
AVAILW = LDWORK - JWORK + 1
|
|
CALL DGEES( 'Vectors', 'Not ordered', SELECT, N, B, LDB, SDIM,
|
|
$ DWORK( IA ), DWORK( N+IA ), V, LDV, DWORK( JWORK ),
|
|
$ AVAILW, BWORK, IERR )
|
|
IF( IERR.GT.0 ) THEN
|
|
INFO = IERR + M
|
|
RETURN
|
|
END IF
|
|
MAXWRK = MAX( MAXWRK, INT( DWORK( JWORK ) ) + JWORK - 1 )
|
|
C
|
|
IF( .NOT.SCHURA ) THEN
|
|
C
|
|
C Recompute the blocking parameters.
|
|
C
|
|
CHUNKA = AVAILW / M
|
|
BLOCKA = MIN( CHUNKA, N ).GT.1
|
|
BLAS3A = CHUNKA.GE.N .AND. BLOCKA
|
|
END IF
|
|
END IF
|
|
C
|
|
IF( .NOT.SCHURB ) THEN
|
|
C
|
|
C Transform the right-hand side: C <-- C*V.
|
|
C Workspace: need a+b+N,
|
|
C prefer a+b+M*N,
|
|
C where b = 2*N, if FACTB = 'N', FACTA = 'N',
|
|
C b = 1+2*N, if FACTB = 'N', FACTA <> 'N',
|
|
C b = 0, if FACTB <> 'N'.
|
|
C
|
|
CHUNKB = AVAILW / N
|
|
BLOCKB = MIN( CHUNKB, M ).GT.1
|
|
BLAS3B = CHUNKB.GE.M .AND. BLOCKB
|
|
C
|
|
IF ( BLAS3B ) THEN
|
|
C
|
|
C Enough workspace for a fast BLAS 3 algorithm.
|
|
C
|
|
CALL DLACPY( 'Full', M, N, C, LDC, DWORK( JWORK ), M )
|
|
CALL DGEMM( 'NoTranspose', 'NoTranspose', M, N, N, ONE,
|
|
$ DWORK( JWORK ), M, V, LDV, ZERO, C, LDC )
|
|
ELSE IF ( BLOCKB ) THEN
|
|
C
|
|
C Use as many rows of C as possible.
|
|
C
|
|
DO 30 I = 1, M, CHUNKB
|
|
BL = MIN( M-I+1, CHUNKB )
|
|
CALL DLACPY( 'Full', BL, N, C( I, 1 ), LDC,
|
|
$ DWORK( JWORK ), BL )
|
|
CALL DGEMM( 'NoTranspose', 'NoTranspose', BL, N, N, ONE,
|
|
$ DWORK( JWORK ), BL, V, LDV, ZERO, C( I, 1 ),
|
|
$ LDC )
|
|
30 CONTINUE
|
|
C
|
|
ELSE
|
|
C
|
|
C Use a BLAS 2 algorithm.
|
|
C
|
|
DO 40 I = 1, M
|
|
CALL DCOPY( N, C( I, 1 ), LDC, DWORK( JWORK ), 1 )
|
|
CALL DGEMV( 'Transpose', N, N, ONE, V, LDV,
|
|
$ DWORK( JWORK ), 1, ZERO, C( I, 1 ), LDC )
|
|
40 CONTINUE
|
|
C
|
|
END IF
|
|
MAXWRK = MAX( MAXWRK, JWORK + M*N - 1 )
|
|
END IF
|
|
C
|
|
C Solve the (transformed) equation.
|
|
C Workspace for DICO = 'D': a+b+2*M.
|
|
C
|
|
IF ( CONT ) THEN
|
|
CALL DTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, LDC,
|
|
$ SCALE, IERR )
|
|
ELSE
|
|
CALL SB04PY( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, LDC,
|
|
$ SCALE, DWORK( JWORK ), IERR )
|
|
MAXWRK = MAX( MAXWRK, JWORK + 2*M - 1 )
|
|
END IF
|
|
IF( IERR.GT.0 )
|
|
$ INFO = M + N + 1
|
|
C
|
|
C Transform back the solution, if needed.
|
|
C
|
|
IF( .NOT.SCHURA ) THEN
|
|
C
|
|
C Transform the right-hand side: C <-- U*C.
|
|
C Workspace: need a+b+M;
|
|
C prefer a+b+M*N.
|
|
C
|
|
IF ( BLAS3A ) THEN
|
|
C
|
|
C Enough workspace for a fast BLAS 3 algorithm.
|
|
C
|
|
CALL DLACPY( 'Full', M, N, C, LDC, DWORK( JWORK ), M )
|
|
CALL DGEMM( 'NoTranspose', 'NoTranspose', M, N, M, ONE,
|
|
$ U, LDU, DWORK( JWORK ), M, ZERO, C, LDC )
|
|
ELSE IF ( BLOCKA ) THEN
|
|
C
|
|
C Use as many columns of C as possible.
|
|
C
|
|
DO 50 J = 1, N, CHUNKA
|
|
BL = MIN( N-J+1, CHUNKA )
|
|
CALL DLACPY( 'Full', M, BL, C( 1, J ), LDC,
|
|
$ DWORK( JWORK ), M )
|
|
CALL DGEMM( 'NoTranspose', 'NoTranspose', M, BL, M, ONE,
|
|
$ U, LDU, DWORK( JWORK ), M, ZERO, C( 1, J ),
|
|
$ LDC )
|
|
50 CONTINUE
|
|
C
|
|
ELSE
|
|
C
|
|
C Use a BLAS 2 algorithm.
|
|
C
|
|
DO 60 J = 1, N
|
|
CALL DCOPY( M, C( 1, J ), 1, DWORK( JWORK ), 1 )
|
|
CALL DGEMV( 'NoTranspose', M, M, ONE, U, LDU,
|
|
$ DWORK( JWORK ), 1, ZERO, C( 1, J ), 1 )
|
|
60 CONTINUE
|
|
C
|
|
END IF
|
|
END IF
|
|
C
|
|
IF( .NOT.SCHURB ) THEN
|
|
C
|
|
C Transform the right-hand side: C <-- C*V'.
|
|
C Workspace: need a+b+N;
|
|
C prefer a+b+M*N.
|
|
C
|
|
IF ( BLAS3B ) THEN
|
|
C
|
|
C Enough workspace for a fast BLAS 3 algorithm.
|
|
C
|
|
CALL DLACPY( 'Full', M, N, C, LDC, DWORK( JWORK ), M )
|
|
CALL DGEMM( 'NoTranspose', 'Transpose', M, N, N, ONE,
|
|
$ DWORK( JWORK ), M, V, LDV, ZERO, C, LDC )
|
|
ELSE IF ( BLOCKB ) THEN
|
|
C
|
|
C Use as many rows of C as possible.
|
|
C
|
|
DO 70 I = 1, M, CHUNKB
|
|
BL = MIN( M-I+1, CHUNKB )
|
|
CALL DLACPY( 'Full', BL, N, C( I, 1 ), LDC,
|
|
$ DWORK( JWORK ), BL )
|
|
CALL DGEMM( 'NoTranspose', 'Transpose', BL, N, N, ONE,
|
|
$ DWORK( JWORK ), BL, V, LDV, ZERO, C( I, 1 ),
|
|
$ LDC )
|
|
70 CONTINUE
|
|
C
|
|
ELSE
|
|
C
|
|
C Use a BLAS 2 algorithm.
|
|
C
|
|
DO 80 I = 1, M
|
|
CALL DCOPY( N, C( I, 1 ), LDC, DWORK( JWORK ), 1 )
|
|
CALL DGEMV( 'NoTranspose', N, N, ONE, V, LDV,
|
|
$ DWORK( JWORK ), 1, ZERO, C( I, 1 ), LDC )
|
|
80 CONTINUE
|
|
C
|
|
END IF
|
|
END IF
|
|
C
|
|
DWORK( 1 ) = DBLE( MAXWRK )
|
|
C
|
|
RETURN
|
|
C *** Last line of SB04PD ***
|
|
END
|