dynare/mex/sources/libslicot/IB01RD.f

      SUBROUTINE IB01RD( JOB, N, M, L, NSMP, A, LDA, B, LDB, C, LDC, D,
     $                   LDD, U, LDU, Y, LDY, X0, TOL, IWORK, DWORK,
     $                   LDWORK, IWARN, INFO )
C
C     SLICOT RELEASE 5.0.
C
C     Copyright (c) 2002-2009 NICONET e.V.
C
C     This program is free software: you can redistribute it and/or
C     modify it under the terms of the GNU General Public License as
C     published by the Free Software Foundation, either version 2 of
C     the License, or (at your option) any later version.
C
C     This program is distributed in the hope that it will be useful,
C     but WITHOUT ANY WARRANTY; without even the implied warranty of
C     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
C     GNU General Public License for more details.
C
C     You should have received a copy of the GNU General Public License
C     along with this program.  If not, see
C     <http://www.gnu.org/licenses/>.
C
C     PURPOSE
C
C     To estimate the initial state of a linear time-invariant (LTI)
C     discrete-time system, given the system matrices  (A,B,C,D)  and
C     the input and output trajectories of the system. The model
C     structure is :
C
C           x(k+1) = Ax(k) + Bu(k),   k >= 0,
C           y(k)   = Cx(k) + Du(k),
C
C     where  x(k)  is the  n-dimensional state vector (at time k),
C            u(k)  is the  m-dimensional input vector,
C            y(k)  is the  l-dimensional output vector,
C     and  A, B, C, and D  are real matrices of appropriate dimensions.
C     Matrix  A  is assumed to be in a real Schur form.
C
C     ARGUMENTS
C
C     Mode Parameters
C
C     JOB     CHARACTER*1
C             Specifies whether or not the matrix D is zero, as follows:
C             = 'Z':  the matrix  D  is zero;
C             = 'N':  the matrix  D  is not zero.
C
C     Input/Output Parameters
C
C     N       (input) INTEGER
C             The order of the system.  N >= 0.
C
C     M       (input) INTEGER
C             The number of system inputs.  M >= 0.
C
C     L       (input) INTEGER
C             The number of system outputs.  L > 0.
C
C     NSMP    (input) INTEGER
C             The number of rows of matrices  U  and  Y  (number of
C             samples used,  t).  NSMP >= N.
C
C     A       (input) DOUBLE PRECISION array, dimension (LDA,N)
C             The leading N-by-N part of this array must contain the
C             system state matrix  A  in a real Schur form.
C
C     LDA     INTEGER
C             The leading dimension of the array A.  LDA >= MAX(1,N).
C
C     B       (input) DOUBLE PRECISION array, dimension (LDB,M)
C             The leading N-by-M part of this array must contain the
C             system input matrix  B  (corresponding to the real Schur
C             form of  A).
C             If  N = 0  or  M = 0,  this array is not referenced.
C
C     LDB     INTEGER
C             The leading dimension of the array B.
C             LDB >= N,  if  N > 0  and  M > 0;
C             LDB >= 1,  if  N = 0  or   M = 0.
C
C     C       (input) DOUBLE PRECISION array, dimension (LDC,N)
C             The leading L-by-N part of this array must contain the
C             system output matrix  C  (corresponding to the real Schur
C             form of  A).
C
C     LDC     INTEGER
C             The leading dimension of the array C.  LDC >= L.
C
C     D       (input) DOUBLE PRECISION array, dimension (LDD,M)
C             The leading L-by-M part of this array must contain the
C             system input-output matrix.
C             If  M = 0  or  JOB = 'Z',  this array is not referenced.
C
C     LDD     INTEGER
C             The leading dimension of the array D.
C             LDD >= L,  if  M > 0  and  JOB = 'N';
C             LDD >= 1,  if  M = 0  or   JOB = 'Z'.
C
C     U       (input) DOUBLE PRECISION array, dimension (LDU,M)
C             If  M > 0,  the leading NSMP-by-M part of this array must
C             contain the t-by-m input-data sequence matrix  U,
C             U = [u_1 u_2 ... u_m].  Column  j  of  U  contains the
C             NSMP  values of the j-th input component for consecutive
C             time increments.
C             If M = 0, this array is not referenced.
C
C     LDU     INTEGER
C             The leading dimension of the array U.
C             LDU >= MAX(1,NSMP),  if M > 0;
C             LDU >= 1,            if M = 0.
C
C     Y       (input) DOUBLE PRECISION array, dimension (LDY,L)
C             The leading NSMP-by-L part of this array must contain the
C             t-by-l output-data sequence matrix  Y,
C             Y = [y_1 y_2 ... y_l].  Column  j  of  Y  contains the
C             NSMP  values of the j-th output component for consecutive
C             time increments.
C
C     LDY     INTEGER
C             The leading dimension of the array Y.  LDY >= MAX(1,NSMP).
C
C     X0      (output) DOUBLE PRECISION array, dimension (N)
C             The estimated initial state of the system,  x(0).
C
C     Tolerances
C
C     TOL     DOUBLE PRECISION
C             The tolerance to be used for estimating the rank of
C             matrices. If the user sets  TOL > 0,  then the given value
C             of  TOL  is used as a lower bound for the reciprocal
C             condition number;  a matrix whose estimated condition
C             number is less than  1/TOL  is considered to be of full
C             rank.  If the user sets  TOL <= 0,  then  EPS  is used
C             instead, where  EPS  is the relative machine precision
C             (see LAPACK Library routine DLAMCH).  TOL <= 1.
C
C     Workspace
C
C     IWORK   INTEGER array, dimension (N)
C
C     DWORK   DOUBLE PRECISION array, dimension (LDWORK)
C             On exit, if  INFO = 0,  DWORK(1) returns the optimal value
C             of LDWORK and  DWORK(2)  contains the reciprocal condition
C             number of the triangular factor of the QR factorization of
C             the matrix  Gamma  (see METHOD).
C             On exit, if  INFO = -22,  DWORK(1)  returns the minimum
C             value of LDWORK.
C
C     LDWORK  INTEGER
C             The length of the array DWORK.
C             LDWORK >= max( 2, min( LDW1, LDW2 ) ),  where
C             LDW1 = t*L*(N + 1) + 2*N + max( 2*N*N, 4*N ),
C             LDW2 =   N*(N + 1) + 2*N +
C                      max( q*(N + 1) + 2*N*N + L*N, 4*N ),
C                q = N*L.
C             For good performance,  LDWORK  should be larger.
C             If  LDWORK >= LDW1,  then standard QR factorization of
C             the matrix  Gamma  (see METHOD) is used. Otherwise, the
C             QR factorization is computed sequentially by performing
C             NCYCLE  cycles, each cycle (except possibly the last one)
C             processing  s  samples, where  s  is chosen by equating
C             LDWORK  to  LDW2,  for  q  replaced by  s*L.
C             The computational effort may increase and the accuracy may
C             decrease with the decrease of  s.  Recommended value is
C             LDRWRK = LDW1,  assuming a large enough cache size, to
C             also accommodate  A, B, C, D, U,  and  Y.
C
C     Warning Indicator
C
C     IWARN   INTEGER
C             = 0:  no warning;
C             = 4:  the least squares problem to be solved has a
C                   rank-deficient coefficient matrix.
C
C     Error Indicator
C
C     INFO    INTEGER
C             = 0:  successful exit;
C             < 0:  if INFO = -i, the i-th argument had an illegal
C                   value;
C             = 2:  the singular value decomposition (SVD) algorithm did
C                   not converge.
C
C     METHOD
C
C     An extension and refinement of the method in [1] is used.
C     Specifically, the output y0(k) of the system for zero initial
C     state is computed for k = 0, 1, ...,  t-1 using the given model.
C     Then the following least squares problem is solved for x(0)
C
C                         (     C     )            (   y(0) - y0(0)   )
C                         (    C*A    )            (   y(1) - y0(1)   )
C        Gamma * x(0)  =  (     :     ) * x(0)  =  (        :         ).
C                         (     :     )            (        :         )
C                         ( C*A^(t-1) )            ( y(t-1) - y0(t-1) )
C
C     The coefficient matrix  Gamma  is evaluated using powers of A with
C     exponents 2^k. The QR decomposition of this matrix is computed.
C     If its triangular factor  R  is too ill conditioned, then singular
C     value decomposition of  R  is used.
C
C     If the coefficient matrix cannot be stored in the workspace (i.e.,
C     LDWORK < LDW1),  the QR decomposition is computed sequentially.
C
C     REFERENCES
C
C     [1] Verhaegen M., and Varga, A.
C         Some Experience with the MOESP Class of Subspace Model
C         Identification Methods in Identifying the BO105 Helicopter.
C         Report TR R165-94, DLR Oberpfaffenhofen, 1994.
C
C     NUMERICAL ASPECTS
C
C     The implemented method is numerically stable.
C
C     CONTRIBUTOR
C
C     V. Sima, Research Institute for Informatics, Bucharest, Apr. 2000.
C
C     REVISIONS
C
C     V. Sima, Research Institute for Informatics, Bucharest, Feb. 2004.
C
C     KEYWORDS
C
C     Identification methods; least squares solutions; multivariable
C     systems; QR decomposition; singular value decomposition.
C
C     ******************************************************************
C
C     .. Parameters ..
      DOUBLE PRECISION   ZERO, ONE, TWO, THREE
      PARAMETER          ( ZERO  = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
     $                     THREE = 3.0D0 )
C     IBLOCK is a threshold value for switching to a block algorithm
C     for  U  (to avoid row by row passing through  U).
      INTEGER            IBLOCK
      PARAMETER          ( IBLOCK = 16384 )
C     .. Scalar Arguments ..
      DOUBLE PRECISION   TOL
      INTEGER            INFO, IWARN, L, LDA, LDB, LDC, LDD, LDU,
     $                   LDWORK, LDY, M, N, NSMP
      CHARACTER          JOB
C     .. Array Arguments ..
      DOUBLE PRECISION   A(LDA, *), B(LDB, *), C(LDC, *), D(LDD, *),
     $                   DWORK(*),  U(LDU, *), X0(*), Y(LDY, *)
      INTEGER            IWORK(*)
C     .. Local Scalars ..
      DOUBLE PRECISION   RCOND, TOLL
      INTEGER            I2, IA, IAS, IC, ICYCLE, IE, IERR, IEXPON,
     $                   IG, INIGAM, INIH, INIR, INIT, IQ, IREM, IRHS,
     $                   ISIZE, ISV, ITAU, IU, IUPNT, IUT, IUTRAN, IX,
     $                   IXINIT, IY, IYPNT, J, JWORK, K, LDDW, LDR,
     $                   LDW1, LDW2, MAXWRK, MINSMP, MINWLS, MINWRK, NC,
     $                   NCP1, NCYCLE, NN, NOBS, NRBL, NROW, NSMPL, RANK
      LOGICAL            BLOCK, FIRST, NCYC, POWER2, SWITCH, WITHD
C     .. Local Arrays ..
      DOUBLE PRECISION   DUM(1)
C     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            ILAENV
      DOUBLE PRECISION   DLAMCH
      EXTERNAL           DLAMCH, ILAENV, LSAME
C     .. External Subroutines ..
      EXTERNAL           DAXPY, DCOPY, DGELSS, DGEMV, DGEQRF, DLACPY,
     $                   DLASET, DORMQR, DTRCON, DTRMM, DTRMV, DTRSV,
     $                   MA02AD, MB01TD, MB04OD, XERBLA
C     .. Intrinsic Functions ..
      INTRINSIC          DBLE, INT, LOG, MAX, MIN, MOD
C     .. Executable Statements ..
C
C     Check the input parameters.
C
      WITHD  = LSAME( JOB, 'N' )
      IWARN  = 0
      INFO   = 0
      NN     = N*N
      MINSMP = N
C
      IF( .NOT.( LSAME( JOB, 'Z' ) .OR. WITHD ) ) THEN
         INFO = -1
      ELSE IF( N.LT.0 ) THEN
         INFO = -2
      ELSE IF( M.LT.0 ) THEN
         INFO = -3
      ELSE IF( L.LE.0 ) THEN
         INFO = -4
      ELSE IF( NSMP.LT.MINSMP ) THEN
         INFO = -5
      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
         INFO = -7
      ELSE IF( LDB.LT.1 .OR. ( LDB.LT.N .AND. M.GT.0 ) ) THEN
         INFO = -9
      ELSE IF( LDC.LT.L ) THEN
         INFO = -11
      ELSE IF( LDD.LT.1 .OR. ( WITHD .AND. LDD.LT.L .AND. M.GT.0 ) )
     $      THEN
         INFO = -13
      ELSE IF( LDU.LT.1 .OR. ( M.GT.0 .AND. LDU.LT.NSMP ) ) THEN
         INFO = -15
      ELSE IF( LDY.LT.MAX( 1, NSMP ) ) THEN
         INFO = -17
      ELSE IF( TOL.GT.ONE ) THEN
         INFO = -19
      END IF
C
C     Compute workspace.
C      (Note: Comments in the code beginning "Workspace:" describe the
C       minimal amount of workspace needed at that point in the code,
C       as well as the preferred amount for good performance.
C       NB refers to the optimal block size for the immediately
C       following subroutine, as returned by ILAENV.)
C
      NSMPL  = NSMP*L
      IQ     = MINSMP*L
      NCP1   = N + 1
      ISIZE  = NSMPL*NCP1
      IC     = 2*NN
      MINWLS = MINSMP*NCP1
      ITAU   = IC + L*N
      LDW1   = ISIZE  + 2*N + MAX( IC, 4*N )
      LDW2   = MINWLS + 2*N + MAX( IQ*NCP1 + ITAU, 4*N )
      MINWRK = MAX( MIN( LDW1, LDW2 ), 2 )
      IF ( INFO.EQ.0 .AND. LDWORK.GE.MINWRK ) THEN
         MAXWRK = ISIZE + 2*N + MAX( N*ILAENV( 1, 'DGEQRF', ' ', NSMPL,
     $                                         N, -1, -1 ),
     $                               ILAENV( 1, 'DORMQR', 'LT', NSMPL,
     $                                       1, N, -1 ) )
         MAXWRK = MAX( MAXWRK, MINWRK )
      END IF
C
      IF ( INFO.EQ.0 .AND. LDWORK.LT.MINWRK ) THEN
         INFO = -22
         DWORK(1) = MINWRK
      END IF
C
C     Return if there are illegal arguments.
C
      IF( INFO.NE.0 ) THEN
         CALL XERBLA( 'IB01RD', -INFO )
         RETURN
      END IF
C
C     Quick return if possible.
C
      IF ( N.EQ.0 ) THEN
         DWORK(1) = TWO
         DWORK(2) = ONE
         RETURN
      END IF
C
C     Set up the least squares problem, either directly, if enough
C     workspace, or sequentially, otherwise.
C
      IYPNT = 1
      IUPNT = 1
      INIR  = 1
      IF ( LDWORK.GE.LDW1 ) THEN
C
C        Enough workspace for solving the problem directly.
C
         NCYCLE = 1
         NOBS   = NSMP
         LDDW   = NSMPL
         INIGAM = 1
      ELSE
C
C        NCYCLE > 1  cycles are needed for solving the problem
C        sequentially, taking  NOBS  samples in each cycle (or the
C        remaining samples in the last cycle).
C
         JWORK  = LDWORK - MINWLS - 2*N - ITAU
         LDDW   = JWORK/NCP1
         NOBS   = LDDW/L
         LDDW   = L*NOBS
         NCYCLE = NSMP/NOBS
         IF ( MOD( NSMP, NOBS ).NE.0 )
     $      NCYCLE = NCYCLE + 1
         INIH   = INIR + NN
         INIGAM = INIH + N
      END IF
C
      NCYC   = NCYCLE.GT.1
      IRHS   = INIGAM + LDDW*N
      IXINIT = IRHS   + LDDW
      IC     = IXINIT + N
      IF( NCYC ) THEN
         IA   = IC + L*N
         LDR  = N
         IE   = INIGAM
      ELSE
         INIH = IRHS
         IA   = IC
         LDR  = LDDW
         IE   = IXINIT
      END IF
      IUTRAN = IA
      IAS    = IA + NN
      ITAU   = IA
      DUM(1) = ZERO
C
C     Set block parameters for passing through the array  U.
C
      BLOCK = M.GT.1 .AND. NSMP*M.GE.IBLOCK
      IF ( BLOCK ) THEN
         NRBL = ( LDWORK - IUTRAN + 1 )/M
         NC   = NOBS/NRBL
         IF ( MOD( NOBS, NRBL ).NE.0 )
     $      NC = NC + 1
         INIT  = ( NC - 1 )*NRBL
         BLOCK = BLOCK .AND. NRBL.GT.1
      END IF
C
C     Perform direct of sequential compression of the matrix  Gamma.
C
      DO 150 ICYCLE = 1, NCYCLE
         FIRST = ICYCLE.EQ.1
         IF ( .NOT.FIRST ) THEN
            IF ( ICYCLE.EQ.NCYCLE ) THEN
               NOBS = NSMP - ( NCYCLE - 1 )*NOBS
               LDDW = L*NOBS
               IF ( BLOCK ) THEN
                  NC = NOBS/NRBL
                  IF ( MOD( NOBS, NRBL ).NE.0 )
     $               NC = NC + 1
                  INIT = ( NC - 1 )*NRBL
               END IF
            END IF
         END IF
C
C        Compute the extended observability matrix  Gamma.
C        Workspace: need   s*L*(N + 1) + 2*N*N + 2*N + a + w,
C                   where  s = NOBS,
C                          a = 0,   w = 0,          if NCYCLE = 1,
C                          a = L*N, w = N*(N + 1),  if NCYCLE > 1;
C                   prefer as above, with  s = t,  a = w = 0.
C
         JWORK  = IAS + NN
         IEXPON = INT( LOG( DBLE( NOBS ) )/LOG( TWO ) )
         IREM   = L*( NOBS - 2**IEXPON )
         POWER2 = IREM.EQ.0
         IF ( .NOT.POWER2 )
     $      IEXPON = IEXPON + 1
C
         IF ( FIRST ) THEN
            CALL DLACPY( 'Full', L, N, C, LDC, DWORK(INIGAM), LDDW )
         ELSE
            CALL DLACPY( 'Full', L, N, DWORK(IC), L, DWORK(INIGAM),
     $                   LDDW )
         END IF
C                                       p
C        Use powers of the matrix  A:  A ,  p = 2**(J-1).
C
         CALL DLACPY( 'Upper', N, N, A, LDA, DWORK(IA), N )
         IF ( N.GT.1 )
     $      CALL DCOPY( N-1, A(2,1), LDA+1, DWORK(IA+1), N+1 )
         I2   = L
         NROW = 0
C
         DO 20 J = 1, IEXPON
            IG = INIGAM
            IF ( J.LT.IEXPON .OR. POWER2 ) THEN
               NROW = I2
            ELSE
               NROW = IREM
            END IF
C
            CALL DLACPY( 'Full', NROW, N, DWORK(IG), LDDW, DWORK(IG+I2),
     $                   LDDW )
            CALL DTRMM(  'Right', 'Upper', 'No Transpose', 'Non Unit',
     $                   NROW, N, ONE, DWORK(IA), N, DWORK(IG+I2),
     $                   LDDW )
C                                                            p
C           Compute the contribution of the subdiagonal of  A   to the
C           product.
C
            DO 10 IX = 1, N - 1
               CALL DAXPY( NROW, DWORK(IA+(IX-1)*N+IX), DWORK(IG+LDDW),
     $                     1, DWORK(IG+I2), 1 )
               IG = IG + LDDW
   10       CONTINUE
C
            IF ( J.LT.IEXPON ) THEN
               CALL DLACPY( 'Upper', N, N, DWORK(IA), N, DWORK(IAS), N )
               CALL DCOPY(  N-1, DWORK(IA+1), N+1, DWORK(IAS+1), N+1 )
               CALL MB01TD( N, DWORK(IAS), N, DWORK(IA), N,
     $                      DWORK(JWORK), IERR )
               I2 = I2*2
            END IF
   20    CONTINUE
C
         IF ( NCYC ) THEN
            IG = INIGAM + I2 + NROW - L
            CALL DLACPY( 'Full', L, N, DWORK(IG), LDDW, DWORK(IC), L )
            CALL DTRMM( 'Right', 'Upper', 'No Transpose', 'Non Unit', L,
     $                  N, ONE, A, LDA, DWORK(IC), L )
C
C           Compute the contribution of the subdiagonal of  A  to the
C           product.
C
            DO 30 IX = 1, N - 1
               CALL DAXPY( L, A(IX+1,IX), DWORK(IG+LDDW), 1,
     $                     DWORK(IC+(IX-1)*L), 1 )
               IG = IG + LDDW
   30       CONTINUE
C
         END IF
C
C        Setup (part of) the right hand side of the least squares
C        problem starting from  DWORK(IRHS);  use the estimated output
C        trajectory for zero initial state, or for the saved final state
C        value of the previous cycle.
C        A specialization of SLICOT Library routine TF01ND is used.
C        For large input sets  (NSMP*M >= IBLOCK),  chunks of  U  are
C        transposed, to reduce the number of row-wise passes.
C        Workspace: need   s*L*(N + 1) + N + w;
C                   prefer as above, with  s = t,  w = 0.
C
         IF ( FIRST )
     $      CALL DCOPY( N, DUM, 0, DWORK(IXINIT), 1 )
         CALL DCOPY( N, DWORK(IXINIT), 1, X0, 1 )
         IY = IRHS
C
         DO 40 J = 1, L
            CALL DCOPY( NOBS, Y(IYPNT,J), 1, DWORK(IY), L )
            IY = IY + 1
   40    CONTINUE
C
         IY = IRHS
         IU = IUPNT
         IF ( M.GT.0 ) THEN
            IF ( WITHD ) THEN
C
               IF ( BLOCK ) THEN
                  SWITCH = .TRUE.
                  NROW   = NRBL
C
                  DO 60 K = 1, NOBS
                     IF ( MOD( K-1, NROW ).EQ.0 .AND. SWITCH ) THEN
                        IUT = IUTRAN
                        IF ( K.GT.INIT ) THEN
                           NROW = NOBS - INIT
                           SWITCH = .FALSE.
                        END IF
                        CALL MA02AD( 'Full', NROW, M, U(IU,1), LDU,
     $                               DWORK(IUT), M )
                        IU = IU + NROW
                     END IF
                     CALL DGEMV( 'No transpose', L, N, -ONE, C, LDC, X0,
     $                           1, ONE, DWORK(IY), 1 )
                     CALL DGEMV( 'No transpose', L, M, -ONE, D, LDD,
     $                           DWORK(IUT), 1, ONE, DWORK(IY), 1 )
                     CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', N,
     $                           A, LDA, X0, 1 )
C
                     DO 50 IX = 2, N
                        X0(IX) = X0(IX) + A(IX,IX-1)*DWORK(IXINIT+IX-2)
   50                CONTINUE
C
                     CALL DGEMV( 'No transpose', N, M, ONE, B, LDB,
     $                           DWORK(IUT), 1, ONE, X0, 1 )
                     CALL DCOPY( N, X0, 1, DWORK(IXINIT), 1 )
                     IY  = IY  + L
                     IUT = IUT + M
   60             CONTINUE
C
               ELSE
C
                  DO 80 K = 1, NOBS
                     CALL DGEMV( 'No transpose', L, N, -ONE, C, LDC, X0,
     $                           1, ONE, DWORK(IY), 1 )
                     CALL DGEMV( 'No transpose', L, M, -ONE, D, LDD,
     $                           U(IU,1), LDU, ONE, DWORK(IY), 1 )
                     CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', N,
     $                           A, LDA, X0, 1 )
C
                     DO 70 IX = 2, N
                        X0(IX) = X0(IX) + A(IX,IX-1)*DWORK(IXINIT+IX-2)
   70                CONTINUE
C
                     CALL DGEMV( 'No transpose', N, M, ONE, B, LDB,
     $                           U(IU,1), LDU, ONE, X0, 1 )
                     CALL DCOPY( N, X0, 1, DWORK(IXINIT), 1 )
                     IY = IY + L
                     IU = IU + 1
   80             CONTINUE
C
               END IF
C
            ELSE
C
               IF ( BLOCK ) THEN
                  SWITCH = .TRUE.
                  NROW   = NRBL
C
                  DO 100 K = 1, NOBS
                     IF ( MOD( K-1, NROW ).EQ.0 .AND. SWITCH ) THEN
                        IUT = IUTRAN
                        IF ( K.GT.INIT ) THEN
                           NROW = NOBS - INIT
                           SWITCH = .FALSE.
                        END IF
                        CALL MA02AD( 'Full', NROW, M, U(IU,1), LDU,
     $                               DWORK(IUT), M )
                        IU = IU + NROW
                     END IF
                     CALL DGEMV( 'No transpose', L, N, -ONE, C, LDC, X0,
     $                           1, ONE, DWORK(IY), 1 )
                     CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', N,
     $                           A, LDA, X0, 1 )
C
                     DO 90 IX = 2, N
                        X0(IX) = X0(IX) + A(IX,IX-1)*DWORK(IXINIT+IX-2)
   90                CONTINUE
C
                     CALL DGEMV( 'No transpose', N, M, ONE, B, LDB,
     $                           DWORK(IUT), 1, ONE, X0, 1 )
                     CALL DCOPY( N, X0, 1, DWORK(IXINIT), 1 )
                     IY  = IY  + L
                     IUT = IUT + M
  100             CONTINUE
C
               ELSE
C
                  DO 120 K = 1, NOBS
                     CALL DGEMV( 'No transpose', L, N, -ONE, C, LDC, X0,
     $                           1, ONE, DWORK(IY), 1 )
                     CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', N,
     $                           A, LDA, X0, 1 )
C
                     DO 110 IX = 2, N
                        X0(IX) = X0(IX) + A(IX,IX-1)*DWORK(IXINIT+IX-2)
  110                CONTINUE
C
                     CALL DGEMV( 'No transpose', N, M, ONE, B, LDB,
     $                           U(IU,1), LDU, ONE, X0, 1 )
                     CALL DCOPY( N, X0, 1, DWORK(IXINIT), 1 )
                     IY = IY + L
                     IU = IU + 1
  120             CONTINUE
C
               END IF
C
            END IF
C
         ELSE
C
            DO 140 K = 1, NOBS
               CALL DGEMV( 'No transpose', L, N, -ONE, C, LDC, X0, 1,
     $                     ONE, DWORK(IY), 1 )
               CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', N, A,
     $                     LDA, X0, 1 )
C
               DO 130 IX = 2, N
                  X0(IX) = X0(IX) + A(IX,IX-1)*DWORK(IXINIT+IX-2)
  130          CONTINUE
C
               CALL DCOPY( N, X0, 1, DWORK(IXINIT), 1 )
               IY = IY + L
  140       CONTINUE
C
         END IF
C
C        Compress the data using (sequential) QR factorization.
C        Workspace: need   v + 2*N;
C                   where  v = s*L*(N + 1) + N + a + w.
C
         JWORK = ITAU + N
         IF ( FIRST ) THEN
C
C           Compress the first data segment of  Gamma.
C           Workspace: need   v + 2*N,
C                      prefer v + N + N*NB.
C
            CALL DGEQRF( LDDW, N, DWORK(INIGAM), LDDW, DWORK(ITAU),
     $                   DWORK(JWORK), LDWORK-JWORK+1, IERR )
C
C           Apply the transformation to the right hand side part.
C           Workspace: need   v + N + 1,
C                      prefer v + N + NB.
C
            CALL DORMQR( 'Left', 'Transpose', LDDW, 1, N, DWORK(INIGAM),
     $                   LDDW, DWORK(ITAU), DWORK(IRHS), LDDW,
     $                   DWORK(JWORK), LDWORK-JWORK+1, IERR )
C
            IF ( NCYC ) THEN
C
C              Save the triangular factor of  Gamma  and the
C              corresponding right hand side.
C
               CALL DLACPY( 'Upper', N, NCP1, DWORK(INIGAM), LDDW,
     $                      DWORK(INIR), LDR )
            END IF
         ELSE
C
C           Compress the current (but not the first) data segment of
C           Gamma.
C           Workspace: need   v + N - 1.
C
            CALL MB04OD( 'Full', N, 1,  LDDW, DWORK(INIR), LDR,
     $                   DWORK(INIGAM), LDDW, DWORK(INIH), LDR,
     $                   DWORK(IRHS), LDDW, DWORK(ITAU), DWORK(JWORK) )
         END IF
C
         IUPNT = IUPNT + NOBS
         IYPNT = IYPNT + NOBS
  150 CONTINUE
C
C     Estimate the reciprocal condition number of the triangular factor
C     of the QR decomposition.
C     Workspace: need  u + 3*N, where
C                      u = t*L*(N + 1), if NCYCLE = 1;
C                      u = w,           if NCYCLE > 1.
C
      CALL DTRCON( '1-norm', 'Upper', 'No Transpose', N, DWORK(INIR),
     $             LDR, RCOND, DWORK(IE), IWORK, IERR )
C
      TOLL = TOL
      IF ( TOLL.LE.ZERO )
     $   TOLL = DLAMCH( 'Precision' )
      IF ( RCOND.LE.TOLL**( TWO/THREE ) ) THEN
         IWARN = 4
C
C        The least squares problem is ill-conditioned.
C        Use SVD to solve it.
C        Workspace: need   u + 6*N;
C                   prefer larger.
C
         CALL DLASET( 'Lower', N-1, N-1, ZERO, ZERO, DWORK(INIR+1),
     $                LDR )
         ISV   = IE
         JWORK = ISV + N
         CALL DGELSS( N, N, 1, DWORK(INIR), LDR, DWORK(INIH), LDR,
     $                DWORK(ISV), TOLL, RANK, DWORK(JWORK),
     $                LDWORK-JWORK+1, IERR )
         IF ( IERR.GT.0 ) THEN
C
C           Return if SVD algorithm did not converge.
C
            INFO = 2
            RETURN
         END IF
         MAXWRK = MAX( MAXWRK, INT( DWORK(JWORK) ) - JWORK + 1 )
      ELSE
C
C        Find the least squares solution using QR decomposition only.
C
         CALL DTRSV( 'Upper', 'No Transpose', 'Non Unit', N,
     $               DWORK(INIR), LDR, DWORK(INIH), 1 )
      END IF
C
C     Return the estimated initial state of the system  x0.
C
      CALL DCOPY( N, DWORK(INIH), 1, X0, 1 )
C
      DWORK(1) = MAXWRK
      DWORK(2) = RCOND
C
      RETURN
C
C *** End of IB01RD ***
      END