dynare/mex/sources/libslicot/SB04PD.f

673 lines
25 KiB
Fortran

SUBROUTINE SB04PD( DICO, FACTA, FACTB, TRANA, TRANB, ISGN, M, N,
$ A, LDA, U, LDU, B, LDB, V, LDV, C, LDC, SCALE,
$ DWORK, LDWORK, INFO )
C
C SLICOT RELEASE 5.0.
C
C Copyright (c) 2002-2009 NICONET e.V.
C
C This program is free software: you can redistribute it and/or
C modify it under the terms of the GNU General Public License as
C published by the Free Software Foundation, either version 2 of
C the License, or (at your option) any later version.
C
C This program is distributed in the hope that it will be useful,
C but WITHOUT ANY WARRANTY; without even the implied warranty of
C MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
C GNU General Public License for more details.
C
C You should have received a copy of the GNU General Public License
C along with this program. If not, see
C <http://www.gnu.org/licenses/>.
C
C PURPOSE
C
C To solve for X either the real continuous-time Sylvester equation
C
C op(A)*X + ISGN*X*op(B) = scale*C, (1)
C
C or the real discrete-time Sylvester equation
C
C op(A)*X*op(B) + ISGN*X = scale*C, (2)
C
C where op(M) = M or M**T, and ISGN = 1 or -1. A is M-by-M and
C B is N-by-N; the right hand side C and the solution X are M-by-N;
C and scale is an output scale factor, set less than or equal to 1
C to avoid overflow in X. The solution matrix X is overwritten
C onto C.
C
C If A and/or B are not (upper) quasi-triangular, that is, block
C upper triangular with 1-by-1 and 2-by-2 diagonal blocks, they are
C reduced to Schur canonical form, that is, quasi-triangular with
C each 2-by-2 diagonal block having its diagonal elements equal and
C its off-diagonal elements of opposite sign.
C
C ARGUMENTS
C
C Mode Parameters
C
C DICO CHARACTER*1
C Specifies the equation from which X is to be determined
C as follows:
C = 'C': Equation (1), continuous-time case;
C = 'D': Equation (2), discrete-time case.
C
C FACTA CHARACTER*1
C Specifies whether or not the real Schur factorization
C of the matrix A is supplied on entry, as follows:
C = 'F': On entry, A and U contain the factors from the
C real Schur factorization of the matrix A;
C = 'N': The Schur factorization of A will be computed
C and the factors will be stored in A and U;
C = 'S': The matrix A is quasi-triangular (or Schur).
C
C FACTB CHARACTER*1
C Specifies whether or not the real Schur factorization
C of the matrix B is supplied on entry, as follows:
C = 'F': On entry, B and V contain the factors from the
C real Schur factorization of the matrix B;
C = 'N': The Schur factorization of B will be computed
C and the factors will be stored in B and V;
C = 'S': The matrix B is quasi-triangular (or Schur).
C
C TRANA CHARACTER*1
C Specifies the form of op(A) to be used, as follows:
C = 'N': op(A) = A (No transpose);
C = 'T': op(A) = A**T (Transpose);
C = 'C': op(A) = A**T (Conjugate transpose = Transpose).
C
C TRANB CHARACTER*1
C Specifies the form of op(B) to be used, as follows:
C = 'N': op(B) = B (No transpose);
C = 'T': op(B) = B**T (Transpose);
C = 'C': op(B) = B**T (Conjugate transpose = Transpose).
C
C ISGN INTEGER
C Specifies the sign of the equation as described before.
C ISGN may only be 1 or -1.
C
C Input/Output Parameters
C
C M (input) INTEGER
C The order of the matrix A, and the number of rows in the
C matrices X and C. M >= 0.
C
C N (input) INTEGER
C The order of the matrix B, and the number of columns in
C the matrices X and C. N >= 0.
C
C A (input or input/output) DOUBLE PRECISION array,
C dimension (LDA,M)
C On entry, the leading M-by-M part of this array must
C contain the matrix A. If FACTA = 'S', then A contains
C a quasi-triangular matrix, and if FACTA = 'F', then A
C is in Schur canonical form; the elements below the upper
C Hessenberg part of the array A are not referenced.
C On exit, if FACTA = 'N', and INFO = 0 or INFO >= M+1, the
C leading M-by-M upper Hessenberg part of this array
C contains the upper quasi-triangular matrix in Schur
C canonical form from the Schur factorization of A. The
C contents of array A is not modified if FACTA = 'F' or 'S'.
C
C LDA INTEGER
C The leading dimension of array A. LDA >= MAX(1,M).
C
C U (input or output) DOUBLE PRECISION array, dimension
C (LDU,M)
C If FACTA = 'F', then U is an input argument and on entry
C the leading M-by-M part of this array must contain the
C orthogonal matrix U of the real Schur factorization of A.
C If FACTA = 'N', then U is an output argument and on exit,
C if INFO = 0 or INFO >= M+1, it contains the orthogonal
C M-by-M matrix from the real Schur factorization of A.
C If FACTA = 'S', the array U is not referenced.
C
C LDU INTEGER
C The leading dimension of array U.
C LDU >= MAX(1,M), if FACTA = 'F' or 'N';
C LDU >= 1, if FACTA = 'S'.
C
C B (input or input/output) DOUBLE PRECISION array,
C dimension (LDB,N)
C On entry, the leading N-by-N part of this array must
C contain the matrix B. If FACTB = 'S', then B contains
C a quasi-triangular matrix, and if FACTB = 'F', then B
C is in Schur canonical form; the elements below the upper
C Hessenberg part of the array B are not referenced.
C On exit, if FACTB = 'N', and INFO = 0 or INFO = M+N+1,
C the leading N-by-N upper Hessenberg part of this array
C contains the upper quasi-triangular matrix in Schur
C canonical form from the Schur factorization of B. The
C contents of array B is not modified if FACTB = 'F' or 'S'.
C
C LDB (input) INTEGER
C The leading dimension of the array B. LDB >= max(1,N).
C
C V (input or output) DOUBLE PRECISION array, dimension
C (LDV,N)
C If FACTB = 'F', then V is an input argument and on entry
C the leading N-by-N part of this array must contain the
C orthogonal matrix V of the real Schur factorization of B.
C If FACTB = 'N', then V is an output argument and on exit,
C if INFO = 0 or INFO = M+N+1, it contains the orthogonal
C N-by-N matrix from the real Schur factorization of B.
C If FACTB = 'S', the array V is not referenced.
C
C LDV INTEGER
C The leading dimension of array V.
C LDV >= MAX(1,N), if FACTB = 'F' or 'N';
C LDV >= 1, if FACTB = 'S'.
C
C C (input/output) DOUBLE PRECISION array, dimension (LDC,N)
C On entry, the leading M-by-N part of this array must
C contain the right hand side matrix C.
C On exit, if INFO = 0 or INFO = M+N+1, the leading M-by-N
C part of this array contains the solution matrix X.
C
C LDC INTEGER
C The leading dimension of array C. LDC >= MAX(1,M).
C
C SCALE (output) DOUBLE PRECISION
C The scale factor, scale, set less than or equal to 1 to
C prevent the solution overflowing.
C
C Workspace
C
C DWORK DOUBLE PRECISION array, dimension (LDWORK)
C On exit, if INFO = 0 or M+N+1, then: DWORK(1) returns the
C optimal value of LDWORK; if FACTA = 'N', DWORK(1+i) and
C DWORK(1+M+i), i = 1,...,M, contain the real and imaginary
C parts, respectively, of the eigenvalues of A; and, if
C FACTB = 'N', DWORK(1+f+j) and DWORK(1+f+N+j), j = 1,...,N,
C with f = 2*M if FACTA = 'N', and f = 0, otherwise, contain
C the real and imaginary parts, respectively, of the
C eigenvalues of B.
C
C LDWORK INTEGER
C The length of the array DWORK.
C LDWORK >= MAX( 1, a+MAX( c, b+d, b+e ) ),
C where a = 1+2*M, if FACTA = 'N',
C a = 0, if FACTA <> 'N',
C b = 2*N, if FACTB = 'N', FACTA = 'N',
C b = 1+2*N, if FACTB = 'N', FACTA <> 'N',
C b = 0, if FACTB <> 'N',
C c = 3*M, if FACTA = 'N',
C c = M, if FACTA = 'F',
C c = 0, if FACTA = 'S',
C d = 3*N, if FACTB = 'N',
C d = N, if FACTB = 'F',
C d = 0, if FACTB = 'S',
C e = M, if DICO = 'C', FACTA <> 'S',
C e = 0, if DICO = 'C', FACTA = 'S',
C e = 2*M, if DICO = 'D'.
C An upper bound is
C LDWORK = 1+2*M+MAX( 3*M, 5*N, 2*N+2*M ).
C For good performance, LDWORK should be larger, e.g.,
C LDWORK = 1+2*M+MAX( 3*M, 5*N, 2*N+2*M, 2*N+M*N ).
C
C Error Indicator
C
C INFO INTEGER
C = 0: successful exit;
C < 0: if INFO = -i, the i-th argument had an illegal
C value;
C = i: if INFO = i, i = 1,...,M, the QR algorithm failed
C to compute all the eigenvalues of the matrix A
C (see LAPACK Library routine DGEES); the elements
C 2+i:1+M and 2+i+M:1+2*M of DWORK contain the real
C and imaginary parts, respectively, of the
C eigenvalues of A which have converged, and the
C array A contains the partially converged Schur form;
C = M+j: if INFO = M+j, j = 1,...,N, the QR algorithm
C failed to compute all the eigenvalues of the matrix
C B (see LAPACK Library routine DGEES); the elements
C 2+f+j:1+f+N and 2+f+j+N:1+f+2*N of DWORK contain the
C real and imaginary parts, respectively, of the
C eigenvalues of B which have converged, and the
C array B contains the partially converged Schur form;
C as defined for the parameter DWORK,
C f = 2*M, if FACTA = 'N',
C f = 0, if FACTA <> 'N';
C = M+N+1: if DICO = 'C', and the matrices A and -ISGN*B
C have common or very close eigenvalues, or
C if DICO = 'D', and the matrices A and -ISGN*B have
C almost reciprocal eigenvalues (that is, if lambda(i)
C and mu(j) are eigenvalues of A and -ISGN*B, then
C lambda(i) = 1/mu(j) for some i and j);
C perturbed values were used to solve the equation
C (but the matrices A and B are unchanged).
C
C METHOD
C
C An extension and refinement of the algorithms in [1,2] is used.
C If the matrices A and/or B are not quasi-triangular (see PURPOSE),
C they are reduced to Schur canonical form
C
C A = U*S*U', B = V*T*V',
C
C where U, V are orthogonal, and S, T are block upper triangular
C with 1-by-1 and 2-by-2 blocks on their diagonal. The right hand
C side matrix C is updated accordingly,
C
C C = U'*C*V;
C
C then, the solution matrix X of the "reduced" Sylvester equation
C (with A and B in (1) or (2) replaced by S and T, respectively),
C is computed column-wise via a back substitution scheme. A set of
C equivalent linear algebraic systems of equations of order at most
C four are formed and solved using Gaussian elimination with
C complete pivoting. Finally, the solution X of the original
C equation is obtained from the updating formula
C
C X = U*X*V'.
C
C If A and/or B are already quasi-triangular (or in Schur form), the
C initial factorizations and the corresponding updating steps are
C omitted.
C
C REFERENCES
C
C [1] Bartels, R.H. and Stewart, G.W. T
C Solution of the matrix equation A X + XB = C.
C Comm. A.C.M., 15, pp. 820-826, 1972.
C
C [2] Anderson, E., Bai, Z., Bischof, C., Demmel, J., Dongarra, J.,
C Du Croz, J., Greenbaum, A., Hammarling, S., McKenney, A.,
C Ostrouchov, S., and Sorensen, D.
C LAPACK Users' Guide: Second Edition.
C SIAM, Philadelphia, 1995.
C
C NUMERICAL ASPECTS
C
C The algorithm is stable and reliable, since orthogonal
C transformations and Gaussian elimination with complete pivoting
C are used. If INFO = M+N+1, the Sylvester equation is numerically
C singular.
C
C CONTRIBUTORS
C
C D. Sima, University of Bucharest, April 2000.
C V. Sima, Research Institute for Informatics, Bucharest, Apr. 2000.
C
C REVISIONS
C
C -
C
C KEYWORDS
C
C Matrix algebra, orthogonal transformation, real Schur form,
C Sylvester equation.
C
C ******************************************************************
C
C .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
C ..
C .. Scalar Arguments ..
CHARACTER DICO, FACTA, FACTB, TRANA, TRANB
INTEGER INFO, ISGN, LDA, LDB, LDC, LDU, LDV, LDWORK, M,
$ N
DOUBLE PRECISION SCALE
C ..
C .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ),
$ DWORK( * ), U( LDU, * ), V( LDV, * )
C ..
C .. Local Scalars ..
LOGICAL BLAS3A, BLAS3B, BLOCKA, BLOCKB, CONT, NOFACA,
$ NOFACB, NOTRNA, NOTRNB, SCHURA, SCHURB
INTEGER AVAILW, BL, CHUNKA, CHUNKB, I, IA, IB, IERR, J,
$ JWORK, MAXWRK, MINWRK, SDIM
C ..
C .. Local Arrays ..
LOGICAL BWORK( 1 )
C ..
C .. External Functions ..
LOGICAL LSAME, SELECT
EXTERNAL LSAME, SELECT
C ..
C .. External Subroutines ..
EXTERNAL DCOPY, DGEES, DGEMM, DGEMV, DLACPY, DTRSYL,
$ SB04PY, XERBLA
C ..
C .. Intrinsic Functions ..
INTRINSIC DBLE, INT, MAX, MIN
C ..
C .. Executable Statements ..
C
C Decode and Test input parameters
C
CONT = LSAME( DICO, 'C' )
NOFACA = LSAME( FACTA, 'N' )
NOFACB = LSAME( FACTB, 'N' )
SCHURA = LSAME( FACTA, 'S' )
SCHURB = LSAME( FACTB, 'S' )
NOTRNA = LSAME( TRANA, 'N' )
NOTRNB = LSAME( TRANB, 'N' )
C
INFO = 0
IF( .NOT.CONT .AND. .NOT.LSAME( DICO, 'D' ) ) THEN
INFO = -1
ELSE IF( .NOT.NOFACA .AND. .NOT.LSAME( FACTA, 'F' ) .AND.
$ .NOT.SCHURA ) THEN
INFO = -2
ELSE IF( .NOT.NOFACB .AND. .NOT.LSAME( FACTB, 'F' ) .AND.
$ .NOT.SCHURB ) THEN
INFO = -3
ELSE IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND.
$ .NOT.LSAME( TRANA, 'C' ) ) THEN
INFO = -4
ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND.
$ .NOT.LSAME( TRANB, 'C' ) ) THEN
INFO = -5
ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN
INFO = -6
ELSE IF( M.LT.0 ) THEN
INFO = -7
ELSE IF( N.LT.0 ) THEN
INFO = -8
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
INFO = -10
ELSE IF( LDU.LT.1 .OR. ( .NOT.SCHURA .AND. LDU.LT.M ) ) THEN
INFO = -12
ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
INFO = -14
ELSE IF( LDV.LT.1 .OR. ( .NOT.SCHURB .AND. LDV.LT.N ) ) THEN
INFO = -16
ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
INFO = -18
ELSE
IF ( NOFACA ) THEN
IA = 1 + 2*M
MINWRK = 3*M
ELSE
IA = 0
END IF
IF ( SCHURA ) THEN
MINWRK = 0
ELSE IF ( .NOT.NOFACA ) THEN
MINWRK = M
END IF
IB = 0
IF ( NOFACB ) THEN
IB = 2*N
IF ( .NOT.NOFACA )
$ IB = IB + 1
MINWRK = MAX( MINWRK, IB + 3*N )
ELSE IF ( .NOT.SCHURB ) THEN
MINWRK = MAX( MINWRK, N )
END IF
IF ( CONT ) THEN
IF ( .NOT.SCHURA )
$ MINWRK = MAX( MINWRK, IB + M )
ELSE
MINWRK = MAX( MINWRK, IB + 2*M )
END IF
MINWRK = MAX( 1, IA + MINWRK )
IF( LDWORK.LT.MINWRK )
$ INFO = -21
END IF
C
IF( INFO.NE.0 ) THEN
CALL XERBLA( 'SB04PD', -INFO )
RETURN
END IF
C
C Quick return if possible.
C
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
SCALE = ONE
DWORK( 1 ) = ONE
RETURN
END IF
MAXWRK = MINWRK
C
IF( NOFACA ) THEN
C
C Compute the Schur factorization of A.
C Workspace: need 1+5*M;
C prefer larger.
C (Note: Comments in the code beginning "Workspace:" describe the
C minimal amount of real workspace needed at that point in the
C code, as well as the preferred amount for good performance.
C NB refers to the optimal block size for the immediately
C following subroutine, as returned by ILAENV.)
C
JWORK = 2*M + 2
IA = JWORK
AVAILW = LDWORK - JWORK + 1
CALL DGEES( 'Vectors', 'Not ordered', SELECT, M, A, LDA, SDIM,
$ DWORK( 2 ), DWORK( M+2 ), U, LDU, DWORK( JWORK ),
$ AVAILW, BWORK, IERR )
IF( IERR.GT.0 ) THEN
INFO = IERR
RETURN
END IF
MAXWRK = MAX( MAXWRK, INT( DWORK( JWORK ) ) + JWORK - 1 )
ELSE
JWORK = 1
IA = 2
AVAILW = LDWORK
END IF
C
IF( .NOT.SCHURA ) THEN
C
C Transform the right-hand side: C <-- U'*C.
C Workspace: need a+M,
C prefer a+M*N,
C where a = 1+2*M, if FACTA = 'N',
C a = 0, if FACTA <> 'N'.
C
CHUNKA = AVAILW / M
BLOCKA = MIN( CHUNKA, N ).GT.1
BLAS3A = CHUNKA.GE.N .AND. BLOCKA
C
IF ( BLAS3A ) THEN
C
C Enough workspace for a fast BLAS 3 algorithm.
C
CALL DLACPY( 'Full', M, N, C, LDC, DWORK( JWORK ), M )
CALL DGEMM( 'Transpose', 'NoTranspose', M, N, M, ONE,
$ U, LDU, DWORK( JWORK ), M, ZERO, C, LDC )
ELSE IF ( BLOCKA ) THEN
C
C Use as many columns of C as possible.
C
DO 10 J = 1, N, CHUNKA
BL = MIN( N-J+1, CHUNKA )
CALL DLACPY( 'Full', M, BL, C( 1, J ), LDC,
$ DWORK( JWORK ), M )
CALL DGEMM( 'Transpose', 'NoTranspose', M, BL, M, ONE,
$ U, LDU, DWORK( JWORK ), M, ZERO, C( 1, J ),
$ LDC )
10 CONTINUE
C
ELSE
C
C Use a BLAS 2 algorithm.
C
DO 20 J = 1, N
CALL DCOPY( M, C( 1, J ), 1, DWORK( JWORK ), 1 )
CALL DGEMV( 'Transpose', M, M, ONE, U, LDU,
$ DWORK( JWORK ), 1, ZERO, C( 1, J ), 1 )
20 CONTINUE
C
END IF
MAXWRK = MAX( MAXWRK, JWORK + M*N - 1 )
END IF
C
IF( NOFACB ) THEN
C
C Compute the Schur factorization of B.
C Workspace: need 1+MAX(a-1,0)+5*N,
C prefer larger.
C
JWORK = IA + 2*N
AVAILW = LDWORK - JWORK + 1
CALL DGEES( 'Vectors', 'Not ordered', SELECT, N, B, LDB, SDIM,
$ DWORK( IA ), DWORK( N+IA ), V, LDV, DWORK( JWORK ),
$ AVAILW, BWORK, IERR )
IF( IERR.GT.0 ) THEN
INFO = IERR + M
RETURN
END IF
MAXWRK = MAX( MAXWRK, INT( DWORK( JWORK ) ) + JWORK - 1 )
C
IF( .NOT.SCHURA ) THEN
C
C Recompute the blocking parameters.
C
CHUNKA = AVAILW / M
BLOCKA = MIN( CHUNKA, N ).GT.1
BLAS3A = CHUNKA.GE.N .AND. BLOCKA
END IF
END IF
C
IF( .NOT.SCHURB ) THEN
C
C Transform the right-hand side: C <-- C*V.
C Workspace: need a+b+N,
C prefer a+b+M*N,
C where b = 2*N, if FACTB = 'N', FACTA = 'N',
C b = 1+2*N, if FACTB = 'N', FACTA <> 'N',
C b = 0, if FACTB <> 'N'.
C
CHUNKB = AVAILW / N
BLOCKB = MIN( CHUNKB, M ).GT.1
BLAS3B = CHUNKB.GE.M .AND. BLOCKB
C
IF ( BLAS3B ) THEN
C
C Enough workspace for a fast BLAS 3 algorithm.
C
CALL DLACPY( 'Full', M, N, C, LDC, DWORK( JWORK ), M )
CALL DGEMM( 'NoTranspose', 'NoTranspose', M, N, N, ONE,
$ DWORK( JWORK ), M, V, LDV, ZERO, C, LDC )
ELSE IF ( BLOCKB ) THEN
C
C Use as many rows of C as possible.
C
DO 30 I = 1, M, CHUNKB
BL = MIN( M-I+1, CHUNKB )
CALL DLACPY( 'Full', BL, N, C( I, 1 ), LDC,
$ DWORK( JWORK ), BL )
CALL DGEMM( 'NoTranspose', 'NoTranspose', BL, N, N, ONE,
$ DWORK( JWORK ), BL, V, LDV, ZERO, C( I, 1 ),
$ LDC )
30 CONTINUE
C
ELSE
C
C Use a BLAS 2 algorithm.
C
DO 40 I = 1, M
CALL DCOPY( N, C( I, 1 ), LDC, DWORK( JWORK ), 1 )
CALL DGEMV( 'Transpose', N, N, ONE, V, LDV,
$ DWORK( JWORK ), 1, ZERO, C( I, 1 ), LDC )
40 CONTINUE
C
END IF
MAXWRK = MAX( MAXWRK, JWORK + M*N - 1 )
END IF
C
C Solve the (transformed) equation.
C Workspace for DICO = 'D': a+b+2*M.
C
IF ( CONT ) THEN
CALL DTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, LDC,
$ SCALE, IERR )
ELSE
CALL SB04PY( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, LDC,
$ SCALE, DWORK( JWORK ), IERR )
MAXWRK = MAX( MAXWRK, JWORK + 2*M - 1 )
END IF
IF( IERR.GT.0 )
$ INFO = M + N + 1
C
C Transform back the solution, if needed.
C
IF( .NOT.SCHURA ) THEN
C
C Transform the right-hand side: C <-- U*C.
C Workspace: need a+b+M;
C prefer a+b+M*N.
C
IF ( BLAS3A ) THEN
C
C Enough workspace for a fast BLAS 3 algorithm.
C
CALL DLACPY( 'Full', M, N, C, LDC, DWORK( JWORK ), M )
CALL DGEMM( 'NoTranspose', 'NoTranspose', M, N, M, ONE,
$ U, LDU, DWORK( JWORK ), M, ZERO, C, LDC )
ELSE IF ( BLOCKA ) THEN
C
C Use as many columns of C as possible.
C
DO 50 J = 1, N, CHUNKA
BL = MIN( N-J+1, CHUNKA )
CALL DLACPY( 'Full', M, BL, C( 1, J ), LDC,
$ DWORK( JWORK ), M )
CALL DGEMM( 'NoTranspose', 'NoTranspose', M, BL, M, ONE,
$ U, LDU, DWORK( JWORK ), M, ZERO, C( 1, J ),
$ LDC )
50 CONTINUE
C
ELSE
C
C Use a BLAS 2 algorithm.
C
DO 60 J = 1, N
CALL DCOPY( M, C( 1, J ), 1, DWORK( JWORK ), 1 )
CALL DGEMV( 'NoTranspose', M, M, ONE, U, LDU,
$ DWORK( JWORK ), 1, ZERO, C( 1, J ), 1 )
60 CONTINUE
C
END IF
END IF
C
IF( .NOT.SCHURB ) THEN
C
C Transform the right-hand side: C <-- C*V'.
C Workspace: need a+b+N;
C prefer a+b+M*N.
C
IF ( BLAS3B ) THEN
C
C Enough workspace for a fast BLAS 3 algorithm.
C
CALL DLACPY( 'Full', M, N, C, LDC, DWORK( JWORK ), M )
CALL DGEMM( 'NoTranspose', 'Transpose', M, N, N, ONE,
$ DWORK( JWORK ), M, V, LDV, ZERO, C, LDC )
ELSE IF ( BLOCKB ) THEN
C
C Use as many rows of C as possible.
C
DO 70 I = 1, M, CHUNKB
BL = MIN( M-I+1, CHUNKB )
CALL DLACPY( 'Full', BL, N, C( I, 1 ), LDC,
$ DWORK( JWORK ), BL )
CALL DGEMM( 'NoTranspose', 'Transpose', BL, N, N, ONE,
$ DWORK( JWORK ), BL, V, LDV, ZERO, C( I, 1 ),
$ LDC )
70 CONTINUE
C
ELSE
C
C Use a BLAS 2 algorithm.
C
DO 80 I = 1, M
CALL DCOPY( N, C( I, 1 ), LDC, DWORK( JWORK ), 1 )
CALL DGEMV( 'NoTranspose', N, N, ONE, V, LDV,
$ DWORK( JWORK ), 1, ZERO, C( I, 1 ), LDC )
80 CONTINUE
C
END IF
END IF
C
DWORK( 1 ) = DBLE( MAXWRK )
C
RETURN
C *** Last line of SB04PD ***
END