dynare/mex/sources/libslicot/IB01MY.f

1095 lines
39 KiB
Fortran

SUBROUTINE IB01MY( METH, BATCH, CONCT, NOBR, M, L, NSMP, U, LDU,
$ Y, LDY, R, LDR, IWORK, DWORK, LDWORK, IWARN,
$ INFO )
C
C SLICOT RELEASE 5.0.
C
C Copyright (c) 2002-2009 NICONET e.V.
C
C This program is free software: you can redistribute it and/or
C modify it under the terms of the GNU General Public License as
C published by the Free Software Foundation, either version 2 of
C the License, or (at your option) any later version.
C
C This program is distributed in the hope that it will be useful,
C but WITHOUT ANY WARRANTY; without even the implied warranty of
C MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
C GNU General Public License for more details.
C
C You should have received a copy of the GNU General Public License
C along with this program. If not, see
C <http://www.gnu.org/licenses/>.
C
C PURPOSE
C
C To construct an upper triangular factor R of the concatenated
C block Hankel matrices using input-output data, via a fast QR
C algorithm based on displacement rank. The input-output data can,
C optionally, be processed sequentially.
C
C ARGUMENTS
C
C Mode Parameters
C
C METH CHARACTER*1
C Specifies the subspace identification method to be used,
C as follows:
C = 'M': MOESP algorithm with past inputs and outputs;
C = 'N': N4SID algorithm.
C
C BATCH CHARACTER*1
C Specifies whether or not sequential data processing is to
C be used, and, for sequential processing, whether or not
C the current data block is the first block, an intermediate
C block, or the last block, as follows:
C = 'F': the first block in sequential data processing;
C = 'I': an intermediate block in sequential data
C processing;
C = 'L': the last block in sequential data processing;
C = 'O': one block only (non-sequential data processing).
C NOTE that when 100 cycles of sequential data processing
C are completed for BATCH = 'I', a warning is
C issued, to prevent for an infinite loop.
C
C CONCT CHARACTER*1
C Specifies whether or not the successive data blocks in
C sequential data processing belong to a single experiment,
C as follows:
C = 'C': the current data block is a continuation of the
C previous data block and/or it will be continued
C by the next data block;
C = 'N': there is no connection between the current data
C block and the previous and/or the next ones.
C This parameter is not used if BATCH = 'O'.
C
C Input/Output Parameters
C
C NOBR (input) INTEGER
C The number of block rows, s, in the input and output
C block Hankel matrices to be processed. NOBR > 0.
C (In the MOESP theory, NOBR should be larger than n, the
C estimated dimension of state vector.)
C
C M (input) INTEGER
C The number of system inputs. M >= 0.
C When M = 0, no system inputs are processed.
C
C L (input) INTEGER
C The number of system outputs. L > 0.
C
C NSMP (input) INTEGER
C The number of rows of matrices U and Y (number of
C samples, t). (When sequential data processing is used,
C NSMP is the number of samples of the current data
C block.)
C NSMP >= 2*(M+L+1)*NOBR - 1, for non-sequential
C processing;
C NSMP >= 2*NOBR, for sequential processing.
C The total number of samples when calling the routine with
C BATCH = 'L' should be at least 2*(M+L+1)*NOBR - 1.
C The NSMP argument may vary from a cycle to another in
C sequential data processing, but NOBR, M, and L should
C be kept constant. For efficiency, it is advisable to use
C NSMP as large as possible.
C
C U (input) DOUBLE PRECISION array, dimension (LDU,M)
C The leading NSMP-by-M part of this array must contain the
C t-by-m input-data sequence matrix U,
C U = [u_1 u_2 ... u_m]. Column j of U contains the
C NSMP values of the j-th input component for consecutive
C time increments.
C If M = 0, this array is not referenced.
C
C LDU INTEGER
C The leading dimension of the array U.
C LDU >= NSMP, if M > 0;
C LDU >= 1, if M = 0.
C
C Y (input) DOUBLE PRECISION array, dimension (LDY,L)
C The leading NSMP-by-L part of this array must contain the
C t-by-l output-data sequence matrix Y,
C Y = [y_1 y_2 ... y_l]. Column j of Y contains the
C NSMP values of the j-th output component for consecutive
C time increments.
C
C LDY INTEGER
C The leading dimension of the array Y. LDY >= NSMP.
C
C R (output) DOUBLE PRECISION array, dimension
C ( LDR,2*(M+L)*NOBR )
C If INFO = 0 and BATCH = 'L' or 'O', the leading
C 2*(M+L)*NOBR-by-2*(M+L)*NOBR upper triangular part of this
C array contains the upper triangular factor R from the
C QR factorization of the concatenated block Hankel
C matrices.
C
C LDR INTEGER
C The leading dimension of the array R.
C LDR >= 2*(M+L)*NOBR.
C
C Workspace
C
C IWORK INTEGER array, dimension (M+L)
C
C DWORK DOUBLE PRECISION array, dimension (LDWORK)
C On exit, if INFO = 0, DWORK(1) returns the optimal
C value of LDWORK.
C On exit, if INFO = -16, DWORK(1) returns the minimum
C value of LDWORK.
C The first (M+L)*2*NOBR*(M+L+c) elements of DWORK should
C be preserved during successive calls of the routine
C with BATCH = 'F' or 'I', till the final call with
C BATCH = 'L', where
C c = 1, if the successive data blocks do not belong to a
C single experiment (CONCT = 'N');
C c = 2, if the successive data blocks belong to a single
C experiment (CONCT = 'C').
C
C LDWORK INTEGER
C The length of the array DWORK.
C LDWORK >= (M+L)*2*NOBR*(M+L+3),
C if BATCH <> 'O' and CONCT = 'C';
C LDWORK >= (M+L)*2*NOBR*(M+L+1),
C if BATCH = 'F' or 'I' and CONCT = 'N';
C LDWORK >= (M+L)*4*NOBR*(M+L+1)+(M+L)*2*NOBR,
C if BATCH = 'L' and CONCT = 'N',
C or BATCH = 'O'.
C
C Warning Indicator
C
C IWARN INTEGER
C = 0: no warning;
C = 1: the number of 100 cycles in sequential data
C processing has been exhausted without signaling
C that the last block of data was get.
C
C Error Indicator
C
C INFO INTEGER
C = 0: successful exit;
C < 0: if INFO = -i, the i-th argument had an illegal
C value;
C = 1: the fast QR factorization algorithm failed. The
C matrix H'*H is not (numerically) positive definite.
C
C METHOD
C
C Consider the t x 2(m+l)s matrix H of concatenated block Hankel
C matrices
C
C H = [ Uf' Up' Y' ], for METH = 'M',
C s+1,2s,t 1,s,t 1,2s,t
C
C H = [ U' Y' ], for METH = 'N',
C 1,2s,t 1,2s,t
C
C where Up , Uf , U , and Y are block
C 1,s,t s+1,2s,t 1,2s,t 1,2s,t
C Hankel matrices defined in terms of the input and output data [3].
C The fast QR algorithm uses a factorization of H'*H which exploits
C the block-Hankel structure, via a displacement rank technique [5].
C
C REFERENCES
C
C [1] Verhaegen M., and Dewilde, P.
C Subspace Model Identification. Part 1: The output-error
C state-space model identification class of algorithms.
C Int. J. Control, 56, pp. 1187-1210, 1992.
C
C [2] Verhaegen M.
C Subspace Model Identification. Part 3: Analysis of the
C ordinary output-error state-space model identification
C algorithm.
C Int. J. Control, 58, pp. 555-586, 1993.
C
C [3] Verhaegen M.
C Identification of the deterministic part of MIMO state space
C models given in innovations form from input-output data.
C Automatica, Vol.30, No.1, pp.61-74, 1994.
C
C [4] Van Overschee, P., and De Moor, B.
C N4SID: Subspace Algorithms for the Identification of
C Combined Deterministic-Stochastic Systems.
C Automatica, Vol.30, No.1, pp. 75-93, 1994.
C
C [5] Kressner, D., Mastronardi, N., Sima, V., Van Dooren, P., and
C Van Huffel, S.
C A Fast Algorithm for Subspace State-space System
C Identification via Exploitation of the Displacement Structure.
C J. Comput. Appl. Math., Vol.132, No.1, pp. 71-81, 2001.
C
C NUMERICAL ASPECTS
C
C The implemented method is reliable and efficient. Numerical
C difficulties are possible when the matrix H'*H is nearly rank
C defficient. The method cannot be used if the matrix H'*H is not
C numerically positive definite.
C 2 3 2
C The algorithm requires 0(2t(m+l) s)+0(4(m+l) s ) floating point
C operations.
C
C CONTRIBUTORS
C
C V. Sima, Katholieke Universiteit Leuven, June 2000.
C Partly based on Matlab codes developed by N. Mastronardi,
C Katholieke Universiteit Leuven, February 2000.
C
C REVISIONS
C
C V. Sima, July 2000, August 2000, Feb. 2004, May 2009.
C
C KEYWORDS
C
C Displacement rank, Hankel matrix, Householder transformation,
C identification methods, multivariable systems.
C
C ******************************************************************
C
C .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
INTEGER MAXCYC
PARAMETER ( MAXCYC = 100 )
C .. Scalar Arguments ..
INTEGER INFO, IWARN, L, LDR, LDU, LDWORK, LDY, M, NOBR,
$ NSMP
CHARACTER BATCH, CONCT, METH
C .. Array Arguments ..
INTEGER IWORK(*)
DOUBLE PRECISION DWORK(*), R(LDR, *), U(LDU, *), Y(LDY, *)
C .. Local Scalars ..
DOUBLE PRECISION BETA, CS, SN, UPD, TAU
INTEGER I, ICJ, ICOL, ICONN, ICYCLE, IERR, IMAX, ING,
$ INGC, INGP, IPG, IPGC, IPY, IREV, ITAU, J, JD,
$ JDS, JWORK, K, LDRWRK, LLNOBR, LNOBR, LNRG,
$ MAXWRK, MINWRK, MMNOBR, MNOBR, MNRG, NOBR2,
$ NOBR21, NR, NRG, NS, NSM, NSMPSM
LOGICAL CONNEC, FIRST, INTERM, LAST, MOESP, N4SID,
$ ONEBCH
C .. Local Arrays ..
DOUBLE PRECISION DUM(1)
C .. External Functions ..
LOGICAL LSAME
INTEGER IDAMAX
EXTERNAL IDAMAX, LSAME
C .. External Subroutines ..
EXTERNAL DCOPY, DGEMM, DGEQRF, DLACPY, DLARF, DLARFG,
$ DLASET, DORMQR, DSCAL, DSWAP, DSYRK, MA02ED,
$ MA02FD, MB04ID, MB04OD, XERBLA
C .. Intrinsic Functions ..
INTRINSIC ABS, INT, MAX, SQRT
C .. Save Statement ..
C ICYCLE is used to count the cycles for BATCH = 'I'.
C MAXWRK is used to store the optimal workspace.
C NSMPSM is used to sum up the NSMP values for BATCH <> 'O'.
SAVE ICYCLE, MAXWRK, NSMPSM
C ..
C .. Executable Statements ..
C
C Decode the scalar input parameters.
C
MOESP = LSAME( METH, 'M' )
N4SID = LSAME( METH, 'N' )
ONEBCH = LSAME( BATCH, 'O' )
FIRST = LSAME( BATCH, 'F' ) .OR. ONEBCH
INTERM = LSAME( BATCH, 'I' )
LAST = LSAME( BATCH, 'L' ) .OR. ONEBCH
IF( .NOT.ONEBCH ) THEN
CONNEC = LSAME( CONCT, 'C' )
ELSE
CONNEC = .FALSE.
END IF
MNOBR = M*NOBR
LNOBR = L*NOBR
MMNOBR = MNOBR + MNOBR
LLNOBR = LNOBR + LNOBR
NOBR2 = 2*NOBR
NOBR21 = NOBR2 - 1
IWARN = 0
INFO = 0
IF( FIRST ) THEN
ICYCLE = 1
MAXWRK = 1
NSMPSM = 0
END IF
NSMPSM = NSMPSM + NSMP
NR = MMNOBR + LLNOBR
C
C Check the scalar input parameters.
C
IF( .NOT.( MOESP .OR. N4SID ) ) THEN
INFO = -1
ELSE IF( .NOT.( FIRST .OR. INTERM .OR. LAST ) ) THEN
INFO = -2
ELSE IF( .NOT. ONEBCH ) THEN
IF( .NOT.( CONNEC .OR. LSAME( CONCT, 'N' ) ) )
$ INFO = -3
END IF
IF( INFO.EQ.0 ) THEN
IF( NOBR.LE.0 ) THEN
INFO = -4
ELSE IF( M.LT.0 ) THEN
INFO = -5
ELSE IF( L.LE.0 ) THEN
INFO = -6
ELSE IF( NSMP.LT.NOBR2 .OR.
$ ( LAST .AND. NSMPSM.LT.NR+NOBR21 ) ) THEN
INFO = -7
ELSE IF( LDU.LT.1 .OR. ( M.GT.0 .AND. LDU.LT.NSMP ) ) THEN
INFO = -9
ELSE IF( LDY.LT.NSMP ) THEN
INFO = -11
ELSE IF( LDR.LT.NR ) THEN
INFO = -13
ELSE
C
C Compute workspace.
C NRG is the number of positive (or negative) generators.
C
NRG = M + L + 1
IF ( .NOT.ONEBCH .AND. CONNEC ) THEN
MINWRK = NR*( NRG + 2 )
ELSE IF ( FIRST .OR. INTERM ) THEN
MINWRK = NR*NRG
ELSE
MINWRK = 2*NR*NRG + NR
END IF
MAXWRK = MAX( MINWRK, MAXWRK )
C
IF( LDWORK.LT.MINWRK )
$ INFO = -16
END IF
END IF
C
C Return if there are illegal arguments.
C
IF( INFO.NE.0 ) THEN
NSMPSM = 0
IF ( INFO.EQ.-16 )
$ DWORK( 1 ) = MINWRK
CALL XERBLA( 'IB01MY', -INFO )
RETURN
END IF
C
C Compute the R factor from a fast QR factorization of the
C matrix H, a concatenation of two block Hankel matrices.
C Specifically, a displacement rank technique is applied to
C the block Toeplitz matrix, G = (P*H)'*(P*H), where P is a
C 2-by-2 block diagonal matrix, having as diagonal blocks identity
C matrices with columns taken in the reverse order.
C The technique builds and processes the generators of G. The
C matrices G and G1 = H'*H have the same R factor.
C
C Set the parameters for constructing the correlations of the
C current block.
C NSM is the number of processed samples in U and Y, t - 2s.
C IPG and ING are pointers to the "positive" and "negative"
C generators, stored row-wise in the workspace. All "positive"
C generators are stored before any "negative" generators.
C If BATCH <> 'O' and CONCT = 'C', the "connection" elements of
C two successive batches are stored in the same workspace as the
C "negative" generators (which will be computed later on).
C IPY is a pointer to the Y part of the "positive" generators.
C LDRWRK is used as a leading dimension for the workspace part used
C to store the "connection" elements.
C
NS = NSMP - NOBR21
NSM = NS - 1
MNRG = M*NRG
LNRG = L*NRG
C
LDRWRK = 2*NOBR2
IF( FIRST ) THEN
UPD = ZERO
ELSE
UPD = ONE
END IF
DUM(1) = ZERO
C
IPG = 1
IPY = IPG + M
ING = IPG + NRG*NR
ICONN = ING
C
IF( .NOT.FIRST .AND. CONNEC ) THEN
C
C Restore the saved (M+L)*2*NOBR "connection" elements of
C U and Y into their appropriate position in sequential
C processing. The process is performed column-wise, in
C reverse order, first for Y and then for U.
C ICONN is a pointer to the first saved "connection" element.
C Workspace: need (M+L)*2*NOBR*(M+L+3).
C
IREV = ICONN + NR
ICOL = ICONN + 2*NR
C
DO 10 I = 2, M + L
IREV = IREV - NOBR2
ICOL = ICOL - LDRWRK
CALL DCOPY( NOBR2, DWORK(IREV), 1, DWORK(ICOL), 1 )
10 CONTINUE
C
IF ( M.GT.0 )
$ CALL DLACPY( 'Full', NOBR2, M, U, LDU, DWORK(ICONN+NOBR2),
$ LDRWRK )
CALL DLACPY( 'Full', NOBR2, L, Y, LDY,
$ DWORK(ICONN+LDRWRK*M+NOBR2), LDRWRK )
END IF
C
IF ( M.GT.0 ) THEN
C
C Let Guu(i,j) = Guu0(i,j) + u_i*u_j' + u_(i+1)*u_(j+1)' +
C ... + u_(i+NSM-1)*u_(j+NSM-1)',
C where u_i' is the i-th row of U, j = 1 : 2s, i = 1 : j,
C NSM = NSMP - 2s, and Guu0(i,j) is a zero matrix for
C BATCH = 'O' or 'F', and it is the matrix Guu(i,j) computed
C till the current block for BATCH = 'I' or 'L'. The matrix
C Guu(i,j) is m-by-m, and Guu(j,j) is symmetric. The
C submatrices of the first block-row, Guu(1,j), are needed only.
C
C Compute/update Guu(1,1).
C
IF( .NOT.FIRST .AND. CONNEC )
$ CALL DSYRK( 'Upper', 'Transpose', M, NOBR2, ONE,
$ DWORK(ICONN), LDRWRK, UPD, DWORK(IPG), NRG )
CALL DSYRK( 'Upper', 'Transpose', M, NSM, ONE, U, LDU, UPD,
$ DWORK(IPG), NRG )
CALL MA02ED( 'Upper', M, DWORK(IPG), NRG )
C
JD = 1
C
IF( FIRST .OR. .NOT.CONNEC ) THEN
C
DO 20 J = 2, NOBR2
JD = JD + M
C
C Compute/update Guu(1,j).
C
CALL DGEMM( 'Transpose', 'NoTranspose', M, M, NSM, ONE,
$ U, LDU, U(J,1), LDU, UPD,
$ DWORK(IPG+(JD-1)*NRG), NRG )
20 CONTINUE
C
ELSE
C
DO 30 J = 2, NOBR2
JD = JD + M
C
C Compute/update Guu(1,j) for sequential processing
C with connected blocks.
C
CALL DGEMM( 'Transpose', 'NoTranspose', M, M, NOBR2,
$ ONE, DWORK(ICONN), LDRWRK, DWORK(ICONN+J-1),
$ LDRWRK, UPD, DWORK(IPG+(JD-1)*NRG), NRG )
CALL DGEMM( 'Transpose', 'NoTranspose', M, M, NSM, ONE,
$ U, LDU, U(J,1), LDU, ONE,
$ DWORK(IPG+(JD-1)*NRG), NRG )
30 CONTINUE
C
END IF
C
C Let Guy(i,j) = Guy0(i,j) + u_i*y_j' + u_(i+1)*y_(j+1)' +
C ... + u_(i+NSM-1)*y_(j+NSM-1)',
C where u_i' is the i-th row of U, y_j' is the j-th row
C of Y, j = 1 : 2s, i = 1 : 2s, NSM = NSMP - 2s, and
C Guy0(i,j) is a zero matrix for BATCH = 'O' or 'F', and it
C is the matrix Guy(i,j) computed till the current block for
C BATCH = 'I' or 'L'. Guy(i,j) is m-by-L. The submatrices
C of the first block-row, Guy(1,j), as well as the transposes
C of the submatrices of the first block-column, i.e., Gyu(1,j),
C are needed only.
C
JD = MMNOBR + 1
C
IF( FIRST .OR. .NOT.CONNEC ) THEN
C
DO 40 J = 1, NOBR2
C
C Compute/update Guy(1,j).
C
CALL DGEMM( 'Transpose', 'NoTranspose', M, L, NSM, ONE,
$ U, LDU, Y(J,1), LDY, UPD,
$ DWORK(IPG+(JD-1)*NRG), NRG )
JD = JD + L
40 CONTINUE
C
ELSE
C
DO 50 J = 1, NOBR2
C
C Compute/update Guy(1,j) for sequential processing
C with connected blocks.
C
CALL DGEMM( 'Transpose', 'NoTranspose', M, L, NOBR2,
$ ONE, DWORK(ICONN), LDRWRK,
$ DWORK(ICONN+LDRWRK*M+J-1), LDRWRK, UPD,
$ DWORK(IPG+(JD-1)*NRG), NRG )
CALL DGEMM( 'Transpose', 'NoTranspose', M, L, NSM, ONE,
$ U, LDU, Y(J,1), LDY, ONE,
$ DWORK(IPG+(JD-1)*NRG), NRG )
JD = JD + L
50 CONTINUE
C
END IF
C
C Now, the first M "positive" generators have been built.
C Transpose Guy(1,1) in the first block of the Y part of the
C "positive" generators.
C
DO 60 J = 1, L
CALL DCOPY( M, DWORK(IPG+(MMNOBR+J-1)*NRG), 1,
$ DWORK(IPY+J-1), NRG )
60 CONTINUE
C
JD = 1
C
IF( FIRST .OR. .NOT.CONNEC ) THEN
C
DO 70 J = 2, NOBR2
JD = JD + M
C
C Compute/update Gyu(1,j).
C
CALL DGEMM( 'Transpose', 'NoTranspose', L, M, NSM, ONE,
$ Y, LDY, U(J,1), LDU, UPD,
$ DWORK(IPY+(JD-1)*NRG), NRG )
70 CONTINUE
C
ELSE
C
DO 80 J = 2, NOBR2
JD = JD + M
C
C Compute/update Gyu(1,j) for sequential processing
C with connected blocks.
C
CALL DGEMM( 'Transpose', 'NoTranspose', L, M, NOBR2,
$ ONE, DWORK(ICONN+LDRWRK*M), LDRWRK,
$ DWORK(ICONN+J-1), LDRWRK, UPD,
$ DWORK(IPY+(JD-1)*NRG), NRG )
CALL DGEMM( 'Transpose', 'NoTranspose', L, M, NSM, ONE,
$ Y, LDY, U(J,1), LDU, ONE,
$ DWORK(IPY+(JD-1)*NRG), NRG )
80 CONTINUE
C
END IF
C
END IF
C
C Let Gyy(i,j) = Gyy0(i,j) + y_i*y_i' + y_(i+1)*y_(i+1)' + ... +
C y_(i+NSM-1)*y_(i+NSM-1)',
C where y_i' is the i-th row of Y, j = 1 : 2s, i = 1 : j,
C NSM = NSMP - 2s, and Gyy0(i,j) is a zero matrix for
C BATCH = 'O' or 'F', and it is the matrix Gyy(i,j) computed till
C the current block for BATCH = 'I' or 'L'. Gyy(i,j) is L-by-L,
C and Gyy(j,j) is symmetric. The submatrices of the first
C block-row, Gyy(1,j), are needed only.
C
JD = MMNOBR + 1
C
C Compute/update Gyy(1,1).
C
IF( .NOT.FIRST .AND. CONNEC )
$ CALL DSYRK( 'Upper', 'Transpose', L, NOBR2, ONE,
$ DWORK(ICONN+LDRWRK*M), LDRWRK, UPD,
$ DWORK(IPY+MMNOBR*NRG), NRG )
CALL DSYRK( 'Upper', 'Transpose', L, NSM, ONE, Y, LDY, UPD,
$ DWORK(IPY+MMNOBR*NRG), NRG )
CALL MA02ED( 'Upper', L, DWORK(IPY+MMNOBR*NRG), NRG )
C
IF( FIRST .OR. .NOT.CONNEC ) THEN
C
DO 90 J = 2, NOBR2
JD = JD + L
C
C Compute/update Gyy(1,j).
C
CALL DGEMM( 'Transpose', 'NoTranspose', L, L, NSM, ONE, Y,
$ LDY, Y(J,1), LDY, UPD, DWORK(IPY+(JD-1)*NRG),
$ NRG )
90 CONTINUE
C
ELSE
C
DO 100 J = 2, NOBR2
JD = JD + L
C
C Compute/update Gyy(1,j) for sequential processing with
C connected blocks.
C
CALL DGEMM( 'Transpose', 'NoTranspose', L, L, NOBR2, ONE,
$ DWORK(ICONN+LDRWRK*M), LDRWRK,
$ DWORK(ICONN+LDRWRK*M+J-1), LDRWRK, UPD,
$ DWORK(IPY+(JD-1)*NRG), NRG )
CALL DGEMM( 'Transpose', 'NoTranspose', L, L, NSM, ONE, Y,
$ LDY, Y(J,1), LDY, ONE, DWORK(IPY+(JD-1)*NRG),
$ NRG )
100 CONTINUE
C
END IF
C
IF ( .NOT.LAST ) THEN
IF ( FIRST ) THEN
C
C For sequential processing, save the first 2*NOBR-1 rows of
C the first block of U and Y in the appropriate
C (M+L)*(2*NOBR-1) locations of DWORK starting at (1+M)*NRG.
C These will be used to construct the last negative generator.
C
JD = NRG
IF ( M.GT.0 ) THEN
CALL DCOPY( M, DUM, 0, DWORK(JD), NRG )
C
DO 110 J = 1, NOBR21
JD = JD + MNRG
CALL DCOPY( M, U(J,1), LDU, DWORK(JD), NRG )
110 CONTINUE
C
JD = JD + MNRG
END IF
CALL DCOPY( L, DUM, 0, DWORK(JD), NRG )
C
DO 120 J = 1, NOBR21
JD = JD + LNRG
CALL DCOPY( L, Y(J,1), LDY, DWORK(JD), NRG )
120 CONTINUE
C
END IF
C
IF ( CONNEC ) THEN
C
C For sequential processing with connected data blocks,
C save the remaining ("connection") elements of U and Y
C in (M+L)*2*NOBR locations of DWORK starting at ICONN.
C
IF ( M.GT.0 )
$ CALL DLACPY( 'Full', NOBR2, M, U(NS,1), LDU,
$ DWORK(ICONN), NOBR2 )
CALL DLACPY( 'Full', NOBR2, L, Y(NS,1), LDY,
$ DWORK(ICONN+MMNOBR), NOBR2 )
END IF
C
C Return to get new data.
C
ICYCLE = ICYCLE + 1
IF ( ICYCLE.GT.MAXCYC )
$ IWARN = 1
RETURN
END IF
C
IF ( LAST ) THEN
C
C Try to compute the R factor.
C
C Scale the first M+L positive generators and set the first
C M+L negative generators.
C Workspace: need (M+L)*4*NOBR*(M+L+1)+M+L.
C
JWORK = NRG*2*NR + 1
CALL DCOPY( M, DWORK(IPG), NRG+1, DWORK(JWORK), 1 )
CALL DCOPY( L, DWORK(IPY+MMNOBR*NRG), NRG+1, DWORK(JWORK+M),
$ 1 )
C
DO 130 I = 1, M + L
IWORK(I) = IDAMAX( M+L, DWORK(JWORK), 1 )
DWORK(JWORK+IWORK(I)-1) = ZERO
130 CONTINUE
C
DO 150 I = 1, M + L
IMAX = IWORK(I)
IF ( IMAX.LE.M ) THEN
ICOL = IMAX
ELSE
ICOL = MMNOBR - M + IMAX
END IF
BETA = SQRT( ABS( DWORK(IPG+IMAX-1+(ICOL-1)*NRG) ) )
IF ( BETA.EQ.ZERO ) THEN
C
C Error exit.
C
INFO = 1
RETURN
END IF
CALL DSCAL( NR, ONE / BETA, DWORK(IPG+IMAX-1), NRG )
CALL DCOPY( NR, DWORK(IPG+IMAX-1), NRG, DWORK(ING+IMAX-1),
$ NRG )
DWORK(IPG+IMAX-1+(ICOL-1)*NRG) = BETA
DWORK(ING+IMAX-1+(ICOL-1)*NRG) = ZERO
C
DO 140 J = I + 1, M + L
DWORK(IPG+IWORK(J)-1+(ICOL-1)*NRG) = ZERO
140 CONTINUE
C
150 CONTINUE
C
C Compute the last two generators.
C
IF ( .NOT.FIRST ) THEN
C
C For sequential processing, move the stored last negative
C generator.
C
CALL DCOPY( NR, DWORK(NRG), NRG, DWORK(ING+NRG-1), NRG )
END IF
C
JD = NRG
IF ( M.GT.0 ) THEN
C
DO 160 J = NS, NSMP
CALL DCOPY( M, U(J,1), LDU, DWORK(JD), NRG )
JD = JD + MNRG
160 CONTINUE
C
END IF
C
DO 170 J = NS, NSMP
CALL DCOPY( L, Y(J,1), LDY, DWORK(JD), NRG )
JD = JD + LNRG
170 CONTINUE
C
IF ( FIRST ) THEN
IF ( M.GT.0 ) THEN
CALL DCOPY( M, DUM, 0, DWORK(JD), NRG )
C
DO 180 J = 1, NOBR21
JD = JD + MNRG
CALL DCOPY( M, U(J,1), LDU, DWORK(JD), NRG )
180 CONTINUE
C
JD = JD + MNRG
END IF
CALL DCOPY( L, DUM, 0, DWORK(JD), NRG )
C
DO 190 J = 1, NOBR21
JD = JD + LNRG
CALL DCOPY( L, Y(J,1), LDY, DWORK(JD), NRG )
190 CONTINUE
C
END IF
C
ITAU = JWORK
IPGC = IPG + MMNOBR*NRG
C
IF ( M.GT.0 ) THEN
C
C Process the input part of the generators.
C
JWORK = ITAU + M
C
C Reduce the first M columns of the matrix G1 of positive
C generators to an upper triangular form.
C Workspace: need (M+L)*4*NOBR*(M+L+1)+2*M;
C prefer (M+L)*4*NOBR*(M+L+1)+M+M*NB.
C
INGC = ING
CALL DGEQRF( NRG, M, DWORK(IPG), NRG, DWORK(ITAU),
$ DWORK(JWORK), LDWORK-JWORK+1, IERR )
MAXWRK = MAX( MAXWRK, INT( DWORK(JWORK) ) + JWORK - 1 )
C
C Workspace: need (M+L)*4*NOBR*(M+L+1)+(M+L)*2*NOBR;
C prefer (M+L)*4*NOBR*(M+L+1)+M+
C ((M+L)*2*NOBR-M)*NB.
C
CALL DORMQR( 'Left', 'Transpose', NRG, NR-M, M, DWORK(IPG),
$ NRG, DWORK(ITAU), DWORK(IPG+MNRG), NRG,
$ DWORK(JWORK), LDWORK-JWORK+1, IERR )
MAXWRK = MAX( MAXWRK, INT( DWORK(JWORK) ) + JWORK - 1 )
C
C Annihilate, column by column, the first M columns of the
C matrix G2 of negative generators, using Householder
C transformations and modified hyperbolic plane rotations.
C In the DLARF calls, ITAU is a pointer to the workspace
C array.
C
DO 210 J = 1, M
CALL DLARFG( NRG, DWORK(INGC), DWORK(INGC+1), 1, TAU )
BETA = DWORK(INGC)
DWORK(INGC) = ONE
INGP = INGC + NRG
CALL DLARF( 'Left', NRG, NR-J, DWORK(INGC), 1, TAU,
$ DWORK(INGP), NRG, DWORK(ITAU) )
DWORK(INGC) = BETA
C
C Compute the coefficients of the modified hyperbolic
C rotation.
C
CALL MA02FD( DWORK(IPG+(J-1)*(NRG+1)), DWORK(INGC), CS,
$ SN, IERR )
IF( IERR.NE.0 ) THEN
C
C Error return: the matrix H'*H is not (numerically)
C positive definite.
C
INFO = 1
RETURN
END IF
C
DO 200 I = J*NRG, ( NR - 1 )*NRG, NRG
DWORK(IPG+J-1+I) = ( DWORK(IPG+J-1+I) -
$ SN * DWORK(ING+I) ) / CS
DWORK(ING+I) = -SN * DWORK(IPG+J-1+I) +
$ CS * DWORK(ING+I)
200 CONTINUE
C
INGC = INGP
210 CONTINUE
C
C Save one block row of R, and shift the generators for the
C calculation of the following row.
C
CALL DLACPY( 'Upper', M, NR, DWORK(IPG), NRG, R, LDR )
C
DO 220 I = ( MMNOBR - M )*NRG, MNRG, -MNRG
CALL DLACPY( 'Full', M, M, DWORK(IPG+I-MNRG), NRG,
$ DWORK(IPG+I), NRG )
220 CONTINUE
C
DO 230 I = ( NR - L )*NRG, ( MMNOBR + L )*NRG, -LNRG
CALL DLACPY( 'Full', M, L, DWORK(IPG+I-LNRG), NRG,
$ DWORK(IPG+I), NRG )
230 CONTINUE
C
CALL DLASET( 'Full', M, L, ZERO, ZERO, DWORK(IPGC), NRG )
C
C Update the input part of generators using Schur algorithm.
C Workspace: need (M+L)*4*NOBR*(M+L+1)+2*NOBR*(M+L)-M.
C
JDS = MNRG
ICOL = M
C
DO 280 K = 2, NOBR2
CALL MB04OD( 'Full', M, NR-ICOL-M, L+1, DWORK(IPG+JDS),
$ NRG, DWORK(IPY+JDS), NRG,
$ DWORK(IPG+JDS+MNRG), NRG,
$ DWORK(IPY+JDS+MNRG), NRG, DWORK(ITAU),
$ DWORK(JWORK) )
C
DO 250 J = 1, M
ICJ = ICOL + J
CALL DLARFG( NRG, DWORK(INGC), DWORK(INGC+1), 1, TAU )
BETA = DWORK(INGC)
DWORK(INGC) = ONE
INGP = INGC + NRG
CALL DLARF( 'Left', NRG, NR-ICJ, DWORK(INGC), 1, TAU,
$ DWORK(INGP), NRG, DWORK(ITAU) )
DWORK(INGC) = BETA
C
C Compute the coefficients of the modified hyperbolic
C rotation.
C
CALL MA02FD( DWORK(IPG+J-1+(ICJ-1)*NRG), DWORK(INGC),
$ CS, SN, IERR )
IF( IERR.NE.0 ) THEN
C
C Error return: the matrix H'*H is not (numerically)
C positive definite.
C
INFO = 1
RETURN
END IF
C
DO 240 I = ICJ*NRG, ( NR - 1 )*NRG, NRG
DWORK(IPG+J-1+I) = ( DWORK(IPG+J-1+I) -
$ SN * DWORK(ING+I) ) / CS
DWORK(ING+I) = -SN * DWORK(IPG+J-1+I) +
$ CS * DWORK(ING+I)
240 CONTINUE
C
INGC = INGP
250 CONTINUE
C
C Save one block row of R, and shift the generators for the
C calculation of the following row.
C
CALL DLACPY( 'Upper', M, NR-ICOL, DWORK(IPG+JDS), NRG,
$ R(ICOL+1,ICOL+1), LDR )
ICOL = ICOL + M
C
DO 260 I = ( MMNOBR - M )*NRG, ICOL*NRG, -MNRG
CALL DLACPY( 'Full', M, M, DWORK(IPG+I-MNRG), NRG,
$ DWORK(IPG+I), NRG )
260 CONTINUE
C
DO 270 I = ( NR - L )*NRG, ( MMNOBR + L )*NRG, -LNRG
CALL DLACPY( 'Full', M, L, DWORK(IPG+I-LNRG), NRG,
$ DWORK(IPG+I), NRG )
270 CONTINUE
C
CALL DLASET( 'Full', M, L, ZERO, ZERO, DWORK(IPGC), NRG )
JDS = JDS + MNRG
280 CONTINUE
C
END IF
C
C Process the output part of the generators.
C
JWORK = ITAU + L
C
C Reduce the first L columns of the submatrix
C G1(1:M+L+1,2*M*NOBR+1:2*(M+L)*NOBR) to upper triangular form.
C Workspace: need (M+L)*4*NOBR*(M+L+1)+2*L;
C prefer (M+L)*4*NOBR*(M+L+1)+L+L*NB.
C
INGC = ING + MMNOBR*NRG
CALL DGEQRF( NRG, L, DWORK(IPGC), NRG, DWORK(ITAU),
$ DWORK(JWORK), LDWORK-JWORK+1, IERR )
MAXWRK = MAX( MAXWRK, INT( DWORK(JWORK) ) + JWORK - 1 )
C
C Workspace: need (M+L)*4*NOBR*(M+L+1)+L*2*NOBR;
C prefer (M+L)*4*NOBR*(M+L+1)+L+(L*2*NOBR-L)*NB.
C
CALL DORMQR( 'Left', 'Transpose', NRG, LLNOBR-L, L,
$ DWORK(IPGC), NRG, DWORK(ITAU), DWORK(IPGC+LNRG),
$ NRG, DWORK(JWORK), LDWORK-JWORK+1, IERR )
MAXWRK = MAX( MAXWRK, INT( DWORK(JWORK) ) + JWORK - 1 )
C
C Annihilate, column by column, the first L columns of the
C output part of the matrix G2 of negative generators, using
C Householder transformations and modified hyperbolic rotations.
C
DO 300 J = 1, L
CALL DLARFG( NRG, DWORK(INGC), DWORK(INGC+1), 1, TAU )
BETA = DWORK(INGC)
DWORK(INGC) = ONE
INGP = INGC + NRG
CALL DLARF( 'Left', NRG, LLNOBR-J, DWORK(INGC), 1, TAU,
$ DWORK(INGP), NRG, DWORK(ITAU) )
DWORK(INGC) = BETA
C
C Compute the coefficients of the modified hyperbolic
C rotation.
C
CALL MA02FD( DWORK(IPGC+(J-1)*(NRG+1)), DWORK(INGC), CS, SN,
$ IERR )
IF( IERR.NE.0 ) THEN
C
C Error return: the matrix H'*H is not (numerically)
C positive definite.
C
INFO = 1
RETURN
END IF
C
DO 290 I = ( J + MMNOBR )*NRG, ( NR - 1 )*NRG, NRG
DWORK(IPG+J-1+I) = ( DWORK(IPG+J-1+I) -
$ SN * DWORK(ING+I) ) / CS
DWORK(ING+I) = -SN * DWORK(IPG+J-1+I) +
$ CS * DWORK(ING+I)
290 CONTINUE
C
INGC = INGP
300 CONTINUE
C
C Save one block row of R, and shift the generators for the
C calculation of the following row.
C
CALL DLACPY( 'Upper', L, LLNOBR, DWORK(IPGC), NRG,
$ R(MMNOBR+1,MMNOBR+1), LDR )
C
DO 310 I = ( NR - L )*NRG, ( MMNOBR + L )*NRG, -LNRG
CALL DLACPY( 'Full', L, L, DWORK(IPG+I-LNRG), NRG,
$ DWORK(IPG+I), NRG )
310 CONTINUE
C
C Update the output part of generators using the Schur algorithm.
C Workspace: need (M+L)*4*NOBR*(M+L+1)+2*NOBR*L-L.
C
JDS = LNRG
ICOL = L
C
DO 350 K = 2, NOBR2
CALL MB04OD( 'Full', L, LLNOBR-ICOL-L, M+1, DWORK(IPGC+JDS),
$ NRG, DWORK(IPGC+L+JDS), NRG,
$ DWORK(IPGC+JDS+LNRG), NRG,
$ DWORK(IPGC+L+JDS+LNRG), NRG, DWORK(ITAU),
$ DWORK(JWORK) )
C
DO 330 J = 1, L
ICJ = ICOL + J
CALL DLARFG( NRG, DWORK(INGC), DWORK(INGC+1), 1, TAU )
BETA = DWORK(INGC)
DWORK(INGC) = ONE
INGP = INGC + NRG
CALL DLARF( 'Left', NRG, LLNOBR-ICJ, DWORK(INGC), 1,
$ TAU, DWORK(INGP), NRG, DWORK(ITAU) )
DWORK(INGC) = BETA
C
C Compute the coefficients of the modified hyperbolic
C rotation.
C
CALL MA02FD( DWORK(IPGC+J-1+(ICJ-1)*NRG), DWORK(INGC),
$ CS, SN, IERR )
IF( IERR.NE.0 ) THEN
C
C Error return: the matrix H'*H is not (numerically)
C positive definite.
C
INFO = 1
RETURN
END IF
C
DO 320 I = ( ICJ + MMNOBR )*NRG, ( NR - 1 )*NRG, NRG
DWORK(IPG+J-1+I) = ( DWORK(IPG+J-1+I) -
$ SN * DWORK(ING+I) ) / CS
DWORK(ING+I) = -SN * DWORK(IPG+J-1+I) +
$ CS * DWORK(ING+I)
320 CONTINUE
C
INGC = INGP
330 CONTINUE
C
C Save one block row of R, and shift the generators for the
C calculation of the following row.
C
CALL DLACPY( 'Upper', L, LLNOBR-ICOL, DWORK(IPGC+JDS), NRG,
$ R(MMNOBR+ICOL+1,MMNOBR+ICOL+1), LDR )
C
DO 340 I = ( NR - L )*NRG, ( MMNOBR + ICOL )*NRG, -LNRG
CALL DLACPY( 'Full', L, L, DWORK(IPG+I-LNRG), NRG,
$ DWORK(IPG+I), NRG )
340 CONTINUE
C
ICOL = ICOL + L
JDS = JDS + LNRG
350 CONTINUE
C
IF ( MOESP .AND. M.GT.0 ) THEN
C
C For the MOESP algorithm, interchange the past and future
C input parts of the R factor, and compute the new R factor
C using a specialized QR factorization. A tailored fast
C QR factorization for the MOESP algorithm could be slightly
C more efficient.
C
DO 360 J = 1, MNOBR
CALL DSWAP( J, R(1,J), 1, R(1,MNOBR+J), 1 )
CALL DCOPY( MNOBR, R(J+1,MNOBR+J), 1, R(J+1,J), 1 )
CALL DCOPY( MMNOBR-J, DUM, 0, R(J+1,MNOBR+J), 1 )
360 CONTINUE
C
C Triangularize the first two block columns (using structure),
C and apply the transformation to the corresponding part of
C the remaining block columns.
C Workspace: need 2*(M+L)*NOBR.
C
ITAU = 1
JWORK = ITAU + MMNOBR
CALL MB04ID( MMNOBR, MMNOBR, MNOBR-1, LLNOBR, R, LDR,
$ R(1,MMNOBR+1), LDR, DWORK(ITAU),
$ DWORK(JWORK), LDWORK-JWORK+1, IERR )
MAXWRK = MAX( MAXWRK, INT( DWORK(JWORK) ) + JWORK - 1 )
END IF
END IF
C
NSMPSM = 0
ICYCLE = 1
C
C Return optimal workspace in DWORK(1).
C
DWORK( 1 ) = MAXWRK
MAXWRK = 1
RETURN
C
C *** Last line of IB01MY ***
END