OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches

Functions

subroutine dgejsv (joba, jobu, jobv, jobr, jobt, jobp, m, n, a, lda, sva, u, ldu, v, ldv, work, lwork, iwork, info)
 DGEJSV
subroutine dgesdd (jobz, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, iwork, info)
 DGESDD
subroutine dgesvd (jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, info)
  DGESVD computes the singular value decomposition (SVD) for GE matrices
subroutine dgesvdq (joba, jobp, jobr, jobu, jobv, m, n, a, lda, s, u, ldu, v, ldv, numrank, iwork, liwork, work, lwork, rwork, lrwork, info)
  DGESVDQ computes the singular value decomposition (SVD) with a QR-Preconditioned QR SVD Method for GE matrices
subroutine dgesvdx (jobu, jobvt, range, m, n, a, lda, vl, vu, il, iu, ns, s, u, ldu, vt, ldvt, work, lwork, iwork, info)
  DGESVDX computes the singular value decomposition (SVD) for GE matrices
subroutine dggsvd3 (jobu, jobv, jobq, m, n, p, k, l, a, lda, b, ldb, alpha, beta, u, ldu, v, ldv, q, ldq, work, lwork, iwork, info)
  DGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices

Detailed Description

This is the group of double singular value driver functions for GE matrices

Function Documentation

◆ dgejsv()

subroutine dgejsv ( character*1 joba,
character*1 jobu,
character*1 jobv,
character*1 jobr,
character*1 jobt,
character*1 jobp,
integer m,
integer n,
double precision, dimension( lda, * ) a,
integer lda,
double precision, dimension( n ) sva,
double precision, dimension( ldu, * ) u,
integer ldu,
double precision, dimension( ldv, * ) v,
integer ldv,
double precision, dimension( lwork ) work,
integer lwork,
integer, dimension( * ) iwork,
integer info )

DGEJSV

Download DGEJSV + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> DGEJSV computes the singular value decomposition (SVD) of a real M-by-N
!> matrix [A], where M >= N. The SVD of [A] is written as
!>
!>              [A] = [U] * [SIGMA] * [V]^t,
!>
!> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N
!> diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and
!> [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are
!> the singular values of [A]. The columns of [U] and [V] are the left and
!> the right singular vectors of [A], respectively. The matrices [U] and [V]
!> are computed and stored in the arrays U and V, respectively. The diagonal
!> of [SIGMA] is computed and stored in the array SVA.
!> DGEJSV can sometimes compute tiny singular values and their singular vectors much
!> more accurately than other SVD routines, see below under Further Details.
!> 
Parameters
[in]JOBA
!>          JOBA is CHARACTER*1
!>        Specifies the level of accuracy:
!>       = 'C': This option works well (high relative accuracy) if A = B * D,
!>             with well-conditioned B and arbitrary diagonal matrix D.
!>             The accuracy cannot be spoiled by COLUMN scaling. The
!>             accuracy of the computed output depends on the condition of
!>             B, and the procedure aims at the best theoretical accuracy.
!>             The relative error max_{i=1:N}|d sigma_i| / sigma_i is
!>             bounded by f(M,N)*epsilon* cond(B), independent of D.
!>             The input matrix is preprocessed with the QRF with column
!>             pivoting. This initial preprocessing and preconditioning by
!>             a rank revealing QR factorization is common for all values of
!>             JOBA. Additional actions are specified as follows:
!>       = 'E': Computation as with 'C' with an additional estimate of the
!>             condition number of B. It provides a realistic error bound.
!>       = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings
!>             D1, D2, and well-conditioned matrix C, this option gives
!>             higher accuracy than the 'C' option. If the structure of the
!>             input matrix is not known, and relative accuracy is
!>             desirable, then this option is advisable. The input matrix A
!>             is preprocessed with QR factorization with FULL (row and
!>             column) pivoting.
!>       = 'G': Computation as with 'F' with an additional estimate of the
!>             condition number of B, where A=D*B. If A has heavily weighted
!>             rows, then using this condition number gives too pessimistic
!>             error bound.
!>       = 'A': Small singular values are the noise and the matrix is treated
!>             as numerically rank deficient. The error in the computed
!>             singular values is bounded by f(m,n)*epsilon*||A||.
!>             The computed SVD A = U * S * V^t restores A up to
!>             f(m,n)*epsilon*||A||.
!>             This gives the procedure the licence to discard (set to zero)
!>             all singular values below N*epsilon*||A||.
!>       = 'R': Similar as in 'A'. Rank revealing property of the initial
!>             QR factorization is used do reveal (using triangular factor)
!>             a gap sigma_{r+1} < epsilon * sigma_r in which case the
!>             numerical RANK is declared to be r. The SVD is computed with
!>             absolute error bounds, but more accurately than with 'A'.
!> 
[in]JOBU
!>          JOBU is CHARACTER*1
!>        Specifies whether to compute the columns of U:
!>       = 'U': N columns of U are returned in the array U.
!>       = 'F': full set of M left sing. vectors is returned in the array U.
!>       = 'W': U may be used as workspace of length M*N. See the description
!>             of U.
!>       = 'N': U is not computed.
!> 
[in]JOBV
!>          JOBV is CHARACTER*1
!>        Specifies whether to compute the matrix V:
!>       = 'V': N columns of V are returned in the array V; Jacobi rotations
!>             are not explicitly accumulated.
!>       = 'J': N columns of V are returned in the array V, but they are
!>             computed as the product of Jacobi rotations. This option is
!>             allowed only if JOBU .NE. 'N', i.e. in computing the full SVD.
!>       = 'W': V may be used as workspace of length N*N. See the description
!>             of V.
!>       = 'N': V is not computed.
!> 
[in]JOBR
!>          JOBR is CHARACTER*1
!>        Specifies the RANGE for the singular values. Issues the licence to
!>        set to zero small positive singular values if they are outside
!>        specified range. If A .NE. 0 is scaled so that the largest singular
!>        value of c*A is around DSQRT(BIG), BIG=SLAMCH('O'), then JOBR issues
!>        the licence to kill columns of A whose norm in c*A is less than
!>        DSQRT(SFMIN) (for JOBR = 'R'), or less than SMALL=SFMIN/EPSLN,
!>        where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E').
!>       = 'N': Do not kill small columns of c*A. This option assumes that
!>             BLAS and QR factorizations and triangular solvers are
!>             implemented to work in that range. If the condition of A
!>             is greater than BIG, use DGESVJ.
!>       = 'R': RESTRICTED range for sigma(c*A) is [DSQRT(SFMIN), DSQRT(BIG)]
!>             (roughly, as described above). This option is recommended.
!>                                            ~~~~~~~~~~~~~~~~~~~~~~~~~~~
!>        For computing the singular values in the FULL range [SFMIN,BIG]
!>        use DGESVJ.
!> 
[in]JOBT
!>          JOBT is CHARACTER*1
!>        If the matrix is square then the procedure may determine to use
!>        transposed A if A^t seems to be better with respect to convergence.
!>        If the matrix is not square, JOBT is ignored. This is subject to
!>        changes in the future.
!>        The decision is based on two values of entropy over the adjoint
!>        orbit of A^t * A. See the descriptions of WORK(6) and WORK(7).
!>       = 'T': transpose if entropy test indicates possibly faster
!>        convergence of Jacobi process if A^t is taken as input. If A is
!>        replaced with A^t, then the row pivoting is included automatically.
!>       = 'N': do not speculate.
!>        This option can be used to compute only the singular values, or the
!>        full SVD (U, SIGMA and V). For only one set of singular vectors
!>        (U or V), the caller should provide both U and V, as one of the
!>        matrices is used as workspace if the matrix A is transposed.
!>        The implementer can easily remove this constraint and make the
!>        code more complicated. See the descriptions of U and V.
!> 
[in]JOBP
!>          JOBP is CHARACTER*1
!>        Issues the licence to introduce structured perturbations to drown
!>        denormalized numbers. This licence should be active if the
!>        denormals are poorly implemented, causing slow computation,
!>        especially in cases of fast convergence (!). For details see [1,2].
!>        For the sake of simplicity, this perturbations are included only
!>        when the full SVD or only the singular values are requested. The
!>        implementer/user can easily add the perturbation for the cases of
!>        computing one set of singular vectors.
!>       = 'P': introduce perturbation
!>       = 'N': do not perturb
!> 
[in]M
!>          M is INTEGER
!>         The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>         The number of columns of the input matrix A. M >= N >= 0.
!> 
[in,out]A
!>          A is DOUBLE PRECISION array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]SVA
!>          SVA is DOUBLE PRECISION array, dimension (N)
!>          On exit,
!>          - For WORK(1)/WORK(2) = ONE: The singular values of A. During the
!>            computation SVA contains Euclidean column norms of the
!>            iterated matrices in the array A.
!>          - For WORK(1) .NE. WORK(2): The singular values of A are
!>            (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if
!>            sigma_max(A) overflows or if small singular values have been
!>            saved from underflow by scaling the input matrix A.
!>          - If JOBR='R' then some of the singular values may be returned
!>            as exact zeros obtained by  because they are
!>            below the numerical rank threshold or are denormalized numbers.
!> 
[out]U
!>          U is DOUBLE PRECISION array, dimension ( LDU, N )
!>          If JOBU = 'U', then U contains on exit the M-by-N matrix of
!>                         the left singular vectors.
!>          If JOBU = 'F', then U contains on exit the M-by-M matrix of
!>                         the left singular vectors, including an ONB
!>                         of the orthogonal complement of the Range(A).
!>          If JOBU = 'W'  .AND. (JOBV = 'V' .AND. JOBT = 'T' .AND. M = N),
!>                         then U is used as workspace if the procedure
!>                         replaces A with A^t. In that case, [V] is computed
!>                         in U as left singular vectors of A^t and then
!>                         copied back to the V array. This 'W' option is just
!>                         a reminder to the caller that in this case U is
!>                         reserved as workspace of length N*N.
!>          If JOBU = 'N'  U is not referenced, unless JOBT='T'.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U,  LDU >= 1.
!>          IF  JOBU = 'U' or 'F' or 'W',  then LDU >= M.
!> 
[out]V
!>          V is DOUBLE PRECISION array, dimension ( LDV, N )
!>          If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of
!>                         the right singular vectors;
!>          If JOBV = 'W', AND (JOBU = 'U' AND JOBT = 'T' AND M = N),
!>                         then V is used as workspace if the pprocedure
!>                         replaces A with A^t. In that case, [U] is computed
!>                         in V as right singular vectors of A^t and then
!>                         copied back to the U array. This 'W' option is just
!>                         a reminder to the caller that in this case V is
!>                         reserved as workspace of length N*N.
!>          If JOBV = 'N'  V is not referenced, unless JOBT='T'.
!> 
[in]LDV
!>          LDV is INTEGER
!>          The leading dimension of the array V,  LDV >= 1.
!>          If JOBV = 'V' or 'J' or 'W', then LDV >= N.
!> 
[out]WORK
!>          WORK is DOUBLE PRECISION array, dimension (LWORK)
!>          On exit, if N > 0 .AND. M > 0 (else not referenced),
!>          WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such
!>                    that SCALE*SVA(1:N) are the computed singular values
!>                    of A. (See the description of SVA().)
!>          WORK(2) = See the description of WORK(1).
!>          WORK(3) = SCONDA is an estimate for the condition number of
!>                    column equilibrated A. (If JOBA = 'E' or 'G')
!>                    SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1).
!>                    It is computed using DPOCON. It holds
!>                    N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
!>                    where R is the triangular factor from the QRF of A.
!>                    However, if R is truncated and the numerical rank is
!>                    determined to be strictly smaller than N, SCONDA is
!>                    returned as -1, thus indicating that the smallest
!>                    singular values might be lost.
!>
!>          If full SVD is needed, the following two condition numbers are
!>          useful for the analysis of the algorithm. They are provided for
!>          a developer/implementer who is familiar with the details of
!>          the method.
!>
!>          WORK(4) = an estimate of the scaled condition number of the
!>                    triangular factor in the first QR factorization.
!>          WORK(5) = an estimate of the scaled condition number of the
!>                    triangular factor in the second QR factorization.
!>          The following two parameters are computed if JOBT = 'T'.
!>          They are provided for a developer/implementer who is familiar
!>          with the details of the method.
!>
!>          WORK(6) = the entropy of A^t*A :: this is the Shannon entropy
!>                    of diag(A^t*A) / Trace(A^t*A) taken as point in the
!>                    probability simplex.
!>          WORK(7) = the entropy of A*A^t.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          Length of WORK to confirm proper allocation of work space.
!>          LWORK depends on the job:
!>
!>          If only SIGMA is needed (JOBU = 'N', JOBV = 'N') and
!>            -> .. no scaled condition estimate required (JOBE = 'N'):
!>               LWORK >= max(2*M+N,4*N+1,7). This is the minimal requirement.
!>               ->> For optimal performance (blocked code) the optimal value
!>               is LWORK >= max(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal
!>               block size for DGEQP3 and DGEQRF.
!>               In general, optimal LWORK is computed as
!>               LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), 7).
!>            -> .. an estimate of the scaled condition number of A is
!>               required (JOBA='E', 'G'). In this case, LWORK is the maximum
!>               of the above and N*N+4*N, i.e. LWORK >= max(2*M+N,N*N+4*N,7).
!>               ->> For optimal performance (blocked code) the optimal value
!>               is LWORK >= max(2*M+N,3*N+(N+1)*NB, N*N+4*N, 7).
!>               In general, the optimal length LWORK is computed as
!>               LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF),
!>                                                     N+N*N+LWORK(DPOCON),7).
!>
!>          If SIGMA and the right singular vectors are needed (JOBV = 'V'),
!>            -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7).
!>            -> For optimal performance, LWORK >= max(2*M+N,3*N+(N+1)*NB,7),
!>               where NB is the optimal block size for DGEQP3, DGEQRF, DGELQF,
!>               DORMLQ. In general, the optimal length LWORK is computed as
!>               LWORK >= max(2*M+N,N+LWORK(DGEQP3), N+LWORK(DPOCON),
!>                       N+LWORK(DGELQF), 2*N+LWORK(DGEQRF), N+LWORK(DORMLQ)).
!>
!>          If SIGMA and the left singular vectors are needed
!>            -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7).
!>            -> For optimal performance:
!>               if JOBU = 'U' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,7),
!>               if JOBU = 'F' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,N+M*NB,7),
!>               where NB is the optimal block size for DGEQP3, DGEQRF, DORMQR.
!>               In general, the optimal length LWORK is computed as
!>               LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DPOCON),
!>                        2*N+LWORK(DGEQRF), N+LWORK(DORMQR)).
!>               Here LWORK(DORMQR) equals N*NB (for JOBU = 'U') or
!>               M*NB (for JOBU = 'F').
!>
!>          If the full SVD is needed: (JOBU = 'U' or JOBU = 'F') and
!>            -> if JOBV = 'V'
!>               the minimal requirement is LWORK >= max(2*M+N,6*N+2*N*N).
!>            -> if JOBV = 'J' the minimal requirement is
!>               LWORK >= max(2*M+N, 4*N+N*N,2*N+N*N+6).
!>            -> For optimal performance, LWORK should be additionally
!>               larger than N+M*NB, where NB is the optimal block size
!>               for DORMQR.
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (M+3*N).
!>          On exit,
!>          IWORK(1) = the numerical rank determined after the initial
!>                     QR factorization with pivoting. See the descriptions
!>                     of JOBA and JOBR.
!>          IWORK(2) = the number of the computed nonzero singular values
!>          IWORK(3) = if nonzero, a warning message:
!>                     If IWORK(3) = 1 then some of the column norms of A
!>                     were denormalized floats. The requested high accuracy
!>                     is not warranted by the data.
!> 
[out]INFO
!>          INFO is INTEGER
!>           < 0:  if INFO = -i, then the i-th argument had an illegal value.
!>           = 0:  successful exit;
!>           > 0:  DGEJSV  did not converge in the maximal allowed number
!>                 of sweeps. The computed values may be inaccurate.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
!>
!>  DGEJSV implements a preconditioned Jacobi SVD algorithm. It uses DGEQP3,
!>  DGEQRF, and DGELQF as preprocessors and preconditioners. Optionally, an
!>  additional row pivoting can be used as a preprocessor, which in some
!>  cases results in much higher accuracy. An example is matrix A with the
!>  structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned
!>  diagonal matrices and C is well-conditioned matrix. In that case, complete
!>  pivoting in the first QR factorizations provides accuracy dependent on the
!>  condition number of C, and independent of D1, D2. Such higher accuracy is
!>  not completely understood theoretically, but it works well in practice.
!>  Further, if A can be written as A = B*D, with well-conditioned B and some
!>  diagonal D, then the high accuracy is guaranteed, both theoretically and
!>  in software, independent of D. For more details see [1], [2].
!>     The computational range for the singular values can be the full range
!>  ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS
!>  & LAPACK routines called by DGEJSV are implemented to work in that range.
!>  If that is not the case, then the restriction for safe computation with
!>  the singular values in the range of normalized IEEE numbers is that the
!>  spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not
!>  overflow. This code (DGEJSV) is best used in this restricted range,
!>  meaning that singular values of magnitude below ||A||_2 / DLAMCH('O') are
!>  returned as zeros. See JOBR for details on this.
!>     Further, this implementation is somewhat slower than the one described
!>  in [1,2] due to replacement of some non-LAPACK components, and because
!>  the choice of some tuning parameters in the iterative part (DGESVJ) is
!>  left to the implementer on a particular machine.
!>     The rank revealing QR factorization (in this code: DGEQP3) should be
!>  implemented as in [3]. We have a new version of DGEQP3 under development
!>  that is more robust than the current one in LAPACK, with a cleaner cut in
!>  rank deficient cases. It will be available in the SIGMA library [4].
!>  If M is much larger than N, it is obvious that the initial QRF with
!>  column pivoting can be preprocessed by the QRF without pivoting. That
!>  well known trick is not used in DGEJSV because in some cases heavy row
!>  weighting can be treated with complete pivoting. The overhead in cases
!>  M much larger than N is then only due to pivoting, but the benefits in
!>  terms of accuracy have prevailed. The implementer/user can incorporate
!>  this extra QRF step easily. The implementer can also improve data movement
!>  (matrix transpose, matrix copy, matrix transposed copy) - this
!>  implementation of DGEJSV uses only the simplest, naive data movement.
!> 
Contributors:
Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany)
References:
!>
!> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
!>     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
!>     LAPACK Working note 169.
!> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
!>     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
!>     LAPACK Working note 170.
!> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR
!>     factorization software - a case study.
!>     ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28.
!>     LAPACK Working note 176.
!> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
!>     QSVD, (H,K)-SVD computations.
!>     Department of Mathematics, University of Zagreb, 2008.
!> 
Bugs, examples and comments:
Please report all bugs and send interesting examples and/or comments to drmac.nosp@m.@mat.nosp@m.h.hr. Thank you.

Definition at line 473 of file dgejsv.f.

476*
477* -- LAPACK computational routine --
478* -- LAPACK is a software package provided by Univ. of Tennessee, --
479* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
480*
481* .. Scalar Arguments ..
482 IMPLICIT NONE
483 INTEGER INFO, LDA, LDU, LDV, LWORK, M, N
484* ..
485* .. Array Arguments ..
486 DOUBLE PRECISION A( LDA, * ), SVA( N ), U( LDU, * ), V( LDV, * ),
487 $ WORK( LWORK )
488 INTEGER IWORK( * )
489 CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
490* ..
491*
492* ===========================================================================
493*
494* .. Local Parameters ..
495 DOUBLE PRECISION ZERO, ONE
496 parameter( zero = 0.0d0, one = 1.0d0 )
497* ..
498* .. Local Scalars ..
499 DOUBLE PRECISION AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1, COND_OK,
500 $ CONDR1, CONDR2, ENTRA, ENTRAT, EPSLN, MAXPRJ, SCALEM,
501 $ SCONDA, SFMIN, SMALL, TEMP1, USCAL1, USCAL2, XSC
502 INTEGER IERR, N1, NR, NUMRANK, p, q, WARNING
503 LOGICAL ALMORT, DEFR, ERREST, GOSCAL, JRACC, KILL, LSVEC,
504 $ L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN,
505 $ NOSCAL, ROWPIV, RSVEC, TRANSP
506* ..
507* .. Intrinsic Functions ..
508 INTRINSIC dabs, dlog, max, min, dble, idnint, dsign, dsqrt
509* ..
510* .. External Functions ..
511 DOUBLE PRECISION DLAMCH, DNRM2
512 INTEGER IDAMAX
513 LOGICAL LSAME
514 EXTERNAL idamax, lsame, dlamch, dnrm2
515* ..
516* .. External Subroutines ..
517 EXTERNAL dcopy, dgelqf, dgeqp3, dgeqrf, dlacpy, dlascl,
520*
521 EXTERNAL dgesvj
522* ..
523*
524* Test the input arguments
525*
526 lsvec = lsame( jobu, 'U' ) .OR. lsame( jobu, 'F' )
527 jracc = lsame( jobv, 'J' )
528 rsvec = lsame( jobv, 'V' ) .OR. jracc
529 rowpiv = lsame( joba, 'F' ) .OR. lsame( joba, 'G' )
530 l2rank = lsame( joba, 'R' )
531 l2aber = lsame( joba, 'A' )
532 errest = lsame( joba, 'E' ) .OR. lsame( joba, 'G' )
533 l2tran = lsame( jobt, 'T' )
534 l2kill = lsame( jobr, 'R' )
535 defr = lsame( jobr, 'N' )
536 l2pert = lsame( jobp, 'P' )
537*
538 IF ( .NOT.(rowpiv .OR. l2rank .OR. l2aber .OR.
539 $ errest .OR. lsame( joba, 'C' ) )) THEN
540 info = - 1
541 ELSE IF ( .NOT.( lsvec .OR. lsame( jobu, 'N' ) .OR.
542 $ lsame( jobu, 'W' )) ) THEN
543 info = - 2
544 ELSE IF ( .NOT.( rsvec .OR. lsame( jobv, 'N' ) .OR.
545 $ lsame( jobv, 'W' )) .OR. ( jracc .AND. (.NOT.lsvec) ) ) THEN
546 info = - 3
547 ELSE IF ( .NOT. ( l2kill .OR. defr ) ) THEN
548 info = - 4
549 ELSE IF ( .NOT. ( l2tran .OR. lsame( jobt, 'N' ) ) ) THEN
550 info = - 5
551 ELSE IF ( .NOT. ( l2pert .OR. lsame( jobp, 'N' ) ) ) THEN
552 info = - 6
553 ELSE IF ( m .LT. 0 ) THEN
554 info = - 7
555 ELSE IF ( ( n .LT. 0 ) .OR. ( n .GT. m ) ) THEN
556 info = - 8
557 ELSE IF ( lda .LT. m ) THEN
558 info = - 10
559 ELSE IF ( lsvec .AND. ( ldu .LT. m ) ) THEN
560 info = - 13
561 ELSE IF ( rsvec .AND. ( ldv .LT. n ) ) THEN
562 info = - 15
563 ELSE IF ( (.NOT.(lsvec .OR. rsvec .OR. errest).AND.
564 & (lwork .LT. max(7,4*n+1,2*m+n))) .OR.
565 & (.NOT.(lsvec .OR. rsvec) .AND. errest .AND.
566 & (lwork .LT. max(7,4*n+n*n,2*m+n))) .OR.
567 & (lsvec .AND. (.NOT.rsvec) .AND. (lwork .LT. max(7,2*m+n,4*n+1)))
568 & .OR.
569 & (rsvec .AND. (.NOT.lsvec) .AND. (lwork .LT. max(7,2*m+n,4*n+1)))
570 & .OR.
571 & (lsvec .AND. rsvec .AND. (.NOT.jracc) .AND.
572 & (lwork.LT.max(2*m+n,6*n+2*n*n)))
573 & .OR. (lsvec .AND. rsvec .AND. jracc .AND.
574 & lwork.LT.max(2*m+n,4*n+n*n,2*n+n*n+6)))
575 & THEN
576 info = - 17
577 ELSE
578* #:)
579 info = 0
580 END IF
581*
582 IF ( info .NE. 0 ) THEN
583* #:(
584 CALL xerbla( 'DGEJSV', - info )
585 RETURN
586 END IF
587*
588* Quick return for void matrix (Y3K safe)
589* #:)
590 IF ( ( m .EQ. 0 ) .OR. ( n .EQ. 0 ) ) THEN
591 iwork(1:3) = 0
592 work(1:7) = 0
593 RETURN
594 ENDIF
595*
596* Determine whether the matrix U should be M x N or M x M
597*
598 IF ( lsvec ) THEN
599 n1 = n
600 IF ( lsame( jobu, 'F' ) ) n1 = m
601 END IF
602*
603* Set numerical parameters
604*
605*! NOTE: Make sure DLAMCH() does not fail on the target architecture.
606*
607 epsln = dlamch('Epsilon')
608 sfmin = dlamch('SafeMinimum')
609 small = sfmin / epsln
610 big = dlamch('O')
611* BIG = ONE / SFMIN
612*
613* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N
614*
615*(!) If necessary, scale SVA() to protect the largest norm from
616* overflow. It is possible that this scaling pushes the smallest
617* column norm left from the underflow threshold (extreme case).
618*
619 scalem = one / dsqrt(dble(m)*dble(n))
620 noscal = .true.
621 goscal = .true.
622 DO 1874 p = 1, n
623 aapp = zero
624 aaqq = one
625 CALL dlassq( m, a(1,p), 1, aapp, aaqq )
626 IF ( aapp .GT. big ) THEN
627 info = - 9
628 CALL xerbla( 'DGEJSV', -info )
629 RETURN
630 END IF
631 aaqq = dsqrt(aaqq)
632 IF ( ( aapp .LT. (big / aaqq) ) .AND. noscal ) THEN
633 sva(p) = aapp * aaqq
634 ELSE
635 noscal = .false.
636 sva(p) = aapp * ( aaqq * scalem )
637 IF ( goscal ) THEN
638 goscal = .false.
639 CALL dscal( p-1, scalem, sva, 1 )
640 END IF
641 END IF
642 1874 CONTINUE
643*
644 IF ( noscal ) scalem = one
645*
646 aapp = zero
647 aaqq = big
648 DO 4781 p = 1, n
649 aapp = max( aapp, sva(p) )
650 IF ( sva(p) .NE. zero ) aaqq = min( aaqq, sva(p) )
651 4781 CONTINUE
652*
653* Quick return for zero M x N matrix
654* #:)
655 IF ( aapp .EQ. zero ) THEN
656 IF ( lsvec ) CALL dlaset( 'G', m, n1, zero, one, u, ldu )
657 IF ( rsvec ) CALL dlaset( 'G', n, n, zero, one, v, ldv )
658 work(1) = one
659 work(2) = one
660 IF ( errest ) work(3) = one
661 IF ( lsvec .AND. rsvec ) THEN
662 work(4) = one
663 work(5) = one
664 END IF
665 IF ( l2tran ) THEN
666 work(6) = zero
667 work(7) = zero
668 END IF
669 iwork(1) = 0
670 iwork(2) = 0
671 iwork(3) = 0
672 RETURN
673 END IF
674*
675* Issue warning if denormalized column norms detected. Override the
676* high relative accuracy request. Issue licence to kill columns
677* (set them to zero) whose norm is less than sigma_max / BIG (roughly).
678* #:(
679 warning = 0
680 IF ( aaqq .LE. sfmin ) THEN
681 l2rank = .true.
682 l2kill = .true.
683 warning = 1
684 END IF
685*
686* Quick return for one-column matrix
687* #:)
688 IF ( n .EQ. 1 ) THEN
689*
690 IF ( lsvec ) THEN
691 CALL dlascl( 'G',0,0,sva(1),scalem, m,1,a(1,1),lda,ierr )
692 CALL dlacpy( 'A', m, 1, a, lda, u, ldu )
693* computing all M left singular vectors of the M x 1 matrix
694 IF ( n1 .NE. n ) THEN
695 CALL dgeqrf( m, n, u,ldu, work, work(n+1),lwork-n,ierr )
696 CALL dorgqr( m,n1,1, u,ldu,work,work(n+1),lwork-n,ierr )
697 CALL dcopy( m, a(1,1), 1, u(1,1), 1 )
698 END IF
699 END IF
700 IF ( rsvec ) THEN
701 v(1,1) = one
702 END IF
703 IF ( sva(1) .LT. (big*scalem) ) THEN
704 sva(1) = sva(1) / scalem
705 scalem = one
706 END IF
707 work(1) = one / scalem
708 work(2) = one
709 IF ( sva(1) .NE. zero ) THEN
710 iwork(1) = 1
711 IF ( ( sva(1) / scalem) .GE. sfmin ) THEN
712 iwork(2) = 1
713 ELSE
714 iwork(2) = 0
715 END IF
716 ELSE
717 iwork(1) = 0
718 iwork(2) = 0
719 END IF
720 iwork(3) = 0
721 IF ( errest ) work(3) = one
722 IF ( lsvec .AND. rsvec ) THEN
723 work(4) = one
724 work(5) = one
725 END IF
726 IF ( l2tran ) THEN
727 work(6) = zero
728 work(7) = zero
729 END IF
730 RETURN
731*
732 END IF
733*
734 transp = .false.
735 l2tran = l2tran .AND. ( m .EQ. n )
736*
737 aatmax = -one
738 aatmin = big
739 IF ( rowpiv .OR. l2tran ) THEN
740*
741* Compute the row norms, needed to determine row pivoting sequence
742* (in the case of heavily row weighted A, row pivoting is strongly
743* advised) and to collect information needed to compare the
744* structures of A * A^t and A^t * A (in the case L2TRAN.EQ..TRUE.).
745*
746 IF ( l2tran ) THEN
747 DO 1950 p = 1, m
748 xsc = zero
749 temp1 = one
750 CALL dlassq( n, a(p,1), lda, xsc, temp1 )
751* DLASSQ gets both the ell_2 and the ell_infinity norm
752* in one pass through the vector
753 work(m+n+p) = xsc * scalem
754 work(n+p) = xsc * (scalem*dsqrt(temp1))
755 aatmax = max( aatmax, work(n+p) )
756 IF (work(n+p) .NE. zero) aatmin = min(aatmin,work(n+p))
757 1950 CONTINUE
758 ELSE
759 DO 1904 p = 1, m
760 work(m+n+p) = scalem*dabs( a(p,idamax(n,a(p,1),lda)) )
761 aatmax = max( aatmax, work(m+n+p) )
762 aatmin = min( aatmin, work(m+n+p) )
763 1904 CONTINUE
764 END IF
765*
766 END IF
767*
768* For square matrix A try to determine whether A^t would be better
769* input for the preconditioned Jacobi SVD, with faster convergence.
770* The decision is based on an O(N) function of the vector of column
771* and row norms of A, based on the Shannon entropy. This should give
772* the right choice in most cases when the difference actually matters.
773* It may fail and pick the slower converging side.
774*
775 entra = zero
776 entrat = zero
777 IF ( l2tran ) THEN
778*
779 xsc = zero
780 temp1 = one
781 CALL dlassq( n, sva, 1, xsc, temp1 )
782 temp1 = one / temp1
783*
784 entra = zero
785 DO 1113 p = 1, n
786 big1 = ( ( sva(p) / xsc )**2 ) * temp1
787 IF ( big1 .NE. zero ) entra = entra + big1 * dlog(big1)
788 1113 CONTINUE
789 entra = - entra / dlog(dble(n))
790*
791* Now, SVA().^2/Trace(A^t * A) is a point in the probability simplex.
792* It is derived from the diagonal of A^t * A. Do the same with the
793* diagonal of A * A^t, compute the entropy of the corresponding
794* probability distribution. Note that A * A^t and A^t * A have the
795* same trace.
796*
797 entrat = zero
798 DO 1114 p = n+1, n+m
799 big1 = ( ( work(p) / xsc )**2 ) * temp1
800 IF ( big1 .NE. zero ) entrat = entrat + big1 * dlog(big1)
801 1114 CONTINUE
802 entrat = - entrat / dlog(dble(m))
803*
804* Analyze the entropies and decide A or A^t. Smaller entropy
805* usually means better input for the algorithm.
806*
807 transp = ( entrat .LT. entra )
808*
809* If A^t is better than A, transpose A.
810*
811 IF ( transp ) THEN
812* In an optimal implementation, this trivial transpose
813* should be replaced with faster transpose.
814 DO 1115 p = 1, n - 1
815 DO 1116 q = p + 1, n
816 temp1 = a(q,p)
817 a(q,p) = a(p,q)
818 a(p,q) = temp1
819 1116 CONTINUE
820 1115 CONTINUE
821 DO 1117 p = 1, n
822 work(m+n+p) = sva(p)
823 sva(p) = work(n+p)
824 1117 CONTINUE
825 temp1 = aapp
826 aapp = aatmax
827 aatmax = temp1
828 temp1 = aaqq
829 aaqq = aatmin
830 aatmin = temp1
831 kill = lsvec
832 lsvec = rsvec
833 rsvec = kill
834 IF ( lsvec ) n1 = n
835*
836 rowpiv = .true.
837 END IF
838*
839 END IF
840* END IF L2TRAN
841*
842* Scale the matrix so that its maximal singular value remains less
843* than DSQRT(BIG) -- the matrix is scaled so that its maximal column
844* has Euclidean norm equal to DSQRT(BIG/N). The only reason to keep
845* DSQRT(BIG) instead of BIG is the fact that DGEJSV uses LAPACK and
846* BLAS routines that, in some implementations, are not capable of
847* working in the full interval [SFMIN,BIG] and that they may provoke
848* overflows in the intermediate results. If the singular values spread
849* from SFMIN to BIG, then DGESVJ will compute them. So, in that case,
850* one should use DGESVJ instead of DGEJSV.
851*
852 big1 = dsqrt( big )
853 temp1 = dsqrt( big / dble(n) )
854*
855 CALL dlascl( 'G', 0, 0, aapp, temp1, n, 1, sva, n, ierr )
856 IF ( aaqq .GT. (aapp * sfmin) ) THEN
857 aaqq = ( aaqq / aapp ) * temp1
858 ELSE
859 aaqq = ( aaqq * temp1 ) / aapp
860 END IF
861 temp1 = temp1 * scalem
862 CALL dlascl( 'G', 0, 0, aapp, temp1, m, n, a, lda, ierr )
863*
864* To undo scaling at the end of this procedure, multiply the
865* computed singular values with USCAL2 / USCAL1.
866*
867 uscal1 = temp1
868 uscal2 = aapp
869*
870 IF ( l2kill ) THEN
871* L2KILL enforces computation of nonzero singular values in
872* the restricted range of condition number of the initial A,
873* sigma_max(A) / sigma_min(A) approx. DSQRT(BIG)/DSQRT(SFMIN).
874 xsc = dsqrt( sfmin )
875 ELSE
876 xsc = small
877*
878* Now, if the condition number of A is too big,
879* sigma_max(A) / sigma_min(A) .GT. DSQRT(BIG/N) * EPSLN / SFMIN,
880* as a precaution measure, the full SVD is computed using DGESVJ
881* with accumulated Jacobi rotations. This provides numerically
882* more robust computation, at the cost of slightly increased run
883* time. Depending on the concrete implementation of BLAS and LAPACK
884* (i.e. how they behave in presence of extreme ill-conditioning) the
885* implementor may decide to remove this switch.
886 IF ( ( aaqq.LT.dsqrt(sfmin) ) .AND. lsvec .AND. rsvec ) THEN
887 jracc = .true.
888 END IF
889*
890 END IF
891 IF ( aaqq .LT. xsc ) THEN
892 DO 700 p = 1, n
893 IF ( sva(p) .LT. xsc ) THEN
894 CALL dlaset( 'A', m, 1, zero, zero, a(1,p), lda )
895 sva(p) = zero
896 END IF
897 700 CONTINUE
898 END IF
899*
900* Preconditioning using QR factorization with pivoting
901*
902 IF ( rowpiv ) THEN
903* Optional row permutation (Bjoerck row pivoting):
904* A result by Cox and Higham shows that the Bjoerck's
905* row pivoting combined with standard column pivoting
906* has similar effect as Powell-Reid complete pivoting.
907* The ell-infinity norms of A are made nonincreasing.
908 DO 1952 p = 1, m - 1
909 q = idamax( m-p+1, work(m+n+p), 1 ) + p - 1
910 iwork(2*n+p) = q
911 IF ( p .NE. q ) THEN
912 temp1 = work(m+n+p)
913 work(m+n+p) = work(m+n+q)
914 work(m+n+q) = temp1
915 END IF
916 1952 CONTINUE
917 CALL dlaswp( n, a, lda, 1, m-1, iwork(2*n+1), 1 )
918 END IF
919*
920* End of the preparation phase (scaling, optional sorting and
921* transposing, optional flushing of small columns).
922*
923* Preconditioning
924*
925* If the full SVD is needed, the right singular vectors are computed
926* from a matrix equation, and for that we need theoretical analysis
927* of the Businger-Golub pivoting. So we use DGEQP3 as the first RR QRF.
928* In all other cases the first RR QRF can be chosen by other criteria
929* (eg speed by replacing global with restricted window pivoting, such
930* as in SGEQPX from TOMS # 782). Good results will be obtained using
931* SGEQPX with properly (!) chosen numerical parameters.
932* Any improvement of DGEQP3 improves overall performance of DGEJSV.
933*
934* A * P1 = Q1 * [ R1^t 0]^t:
935 DO 1963 p = 1, n
936* .. all columns are free columns
937 iwork(p) = 0
938 1963 CONTINUE
939 CALL dgeqp3( m,n,a,lda, iwork,work, work(n+1),lwork-n, ierr )
940*
941* The upper triangular matrix R1 from the first QRF is inspected for
942* rank deficiency and possibilities for deflation, or possible
943* ill-conditioning. Depending on the user specified flag L2RANK,
944* the procedure explores possibilities to reduce the numerical
945* rank by inspecting the computed upper triangular factor. If
946* L2RANK or L2ABER are up, then DGEJSV will compute the SVD of
947* A + dA, where ||dA|| <= f(M,N)*EPSLN.
948*
949 nr = 1
950 IF ( l2aber ) THEN
951* Standard absolute error bound suffices. All sigma_i with
952* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an
953* aggressive enforcement of lower numerical rank by introducing a
954* backward error of the order of N*EPSLN*||A||.
955 temp1 = dsqrt(dble(n))*epsln
956 DO 3001 p = 2, n
957 IF ( dabs(a(p,p)) .GE. (temp1*dabs(a(1,1))) ) THEN
958 nr = nr + 1
959 ELSE
960 GO TO 3002
961 END IF
962 3001 CONTINUE
963 3002 CONTINUE
964 ELSE IF ( l2rank ) THEN
965* .. similarly as above, only slightly more gentle (less aggressive).
966* Sudden drop on the diagonal of R1 is used as the criterion for
967* close-to-rank-deficient.
968 temp1 = dsqrt(sfmin)
969 DO 3401 p = 2, n
970 IF ( ( dabs(a(p,p)) .LT. (epsln*dabs(a(p-1,p-1))) ) .OR.
971 $ ( dabs(a(p,p)) .LT. small ) .OR.
972 $ ( l2kill .AND. (dabs(a(p,p)) .LT. temp1) ) ) GO TO 3402
973 nr = nr + 1
974 3401 CONTINUE
975 3402 CONTINUE
976*
977 ELSE
978* The goal is high relative accuracy. However, if the matrix
979* has high scaled condition number the relative accuracy is in
980* general not feasible. Later on, a condition number estimator
981* will be deployed to estimate the scaled condition number.
982* Here we just remove the underflowed part of the triangular
983* factor. This prevents the situation in which the code is
984* working hard to get the accuracy not warranted by the data.
985 temp1 = dsqrt(sfmin)
986 DO 3301 p = 2, n
987 IF ( ( dabs(a(p,p)) .LT. small ) .OR.
988 $ ( l2kill .AND. (dabs(a(p,p)) .LT. temp1) ) ) GO TO 3302
989 nr = nr + 1
990 3301 CONTINUE
991 3302 CONTINUE
992*
993 END IF
994*
995 almort = .false.
996 IF ( nr .EQ. n ) THEN
997 maxprj = one
998 DO 3051 p = 2, n
999 temp1 = dabs(a(p,p)) / sva(iwork(p))
1000 maxprj = min( maxprj, temp1 )
1001 3051 CONTINUE
1002 IF ( maxprj**2 .GE. one - dble(n)*epsln ) almort = .true.
1003 END IF
1004*
1005*
1006 sconda = - one
1007 condr1 = - one
1008 condr2 = - one
1009*
1010 IF ( errest ) THEN
1011 IF ( n .EQ. nr ) THEN
1012 IF ( rsvec ) THEN
1013* .. V is available as workspace
1014 CALL dlacpy( 'U', n, n, a, lda, v, ldv )
1015 DO 3053 p = 1, n
1016 temp1 = sva(iwork(p))
1017 CALL dscal( p, one/temp1, v(1,p), 1 )
1018 3053 CONTINUE
1019 CALL dpocon( 'U', n, v, ldv, one, temp1,
1020 $ work(n+1), iwork(2*n+m+1), ierr )
1021 ELSE IF ( lsvec ) THEN
1022* .. U is available as workspace
1023 CALL dlacpy( 'U', n, n, a, lda, u, ldu )
1024 DO 3054 p = 1, n
1025 temp1 = sva(iwork(p))
1026 CALL dscal( p, one/temp1, u(1,p), 1 )
1027 3054 CONTINUE
1028 CALL dpocon( 'U', n, u, ldu, one, temp1,
1029 $ work(n+1), iwork(2*n+m+1), ierr )
1030 ELSE
1031 CALL dlacpy( 'U', n, n, a, lda, work(n+1), n )
1032 DO 3052 p = 1, n
1033 temp1 = sva(iwork(p))
1034 CALL dscal( p, one/temp1, work(n+(p-1)*n+1), 1 )
1035 3052 CONTINUE
1036* .. the columns of R are scaled to have unit Euclidean lengths.
1037 CALL dpocon( 'U', n, work(n+1), n, one, temp1,
1038 $ work(n+n*n+1), iwork(2*n+m+1), ierr )
1039 END IF
1040 sconda = one / dsqrt(temp1)
1041* SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1).
1042* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
1043 ELSE
1044 sconda = - one
1045 END IF
1046 END IF
1047*
1048 l2pert = l2pert .AND. ( dabs( a(1,1)/a(nr,nr) ) .GT. dsqrt(big1) )
1049* If there is no violent scaling, artificial perturbation is not needed.
1050*
1051* Phase 3:
1052*
1053 IF ( .NOT. ( rsvec .OR. lsvec ) ) THEN
1054*
1055* Singular Values only
1056*
1057* .. transpose A(1:NR,1:N)
1058 DO 1946 p = 1, min( n-1, nr )
1059 CALL dcopy( n-p, a(p,p+1), lda, a(p+1,p), 1 )
1060 1946 CONTINUE
1061*
1062* The following two DO-loops introduce small relative perturbation
1063* into the strict upper triangle of the lower triangular matrix.
1064* Small entries below the main diagonal are also changed.
1065* This modification is useful if the computing environment does not
1066* provide/allow FLUSH TO ZERO underflow, for it prevents many
1067* annoying denormalized numbers in case of strongly scaled matrices.
1068* The perturbation is structured so that it does not introduce any
1069* new perturbation of the singular values, and it does not destroy
1070* the job done by the preconditioner.
1071* The licence for this perturbation is in the variable L2PERT, which
1072* should be .FALSE. if FLUSH TO ZERO underflow is active.
1073*
1074 IF ( .NOT. almort ) THEN
1075*
1076 IF ( l2pert ) THEN
1077* XSC = DSQRT(SMALL)
1078 xsc = epsln / dble(n)
1079 DO 4947 q = 1, nr
1080 temp1 = xsc*dabs(a(q,q))
1081 DO 4949 p = 1, n
1082 IF ( ( (p.GT.q) .AND. (dabs(a(p,q)).LE.temp1) )
1083 $ .OR. ( p .LT. q ) )
1084 $ a(p,q) = dsign( temp1, a(p,q) )
1085 4949 CONTINUE
1086 4947 CONTINUE
1087 ELSE
1088 CALL dlaset( 'U', nr-1,nr-1, zero,zero, a(1,2),lda )
1089 END IF
1090*
1091* .. second preconditioning using the QR factorization
1092*
1093 CALL dgeqrf( n,nr, a,lda, work, work(n+1),lwork-n, ierr )
1094*
1095* .. and transpose upper to lower triangular
1096 DO 1948 p = 1, nr - 1
1097 CALL dcopy( nr-p, a(p,p+1), lda, a(p+1,p), 1 )
1098 1948 CONTINUE
1099*
1100 END IF
1101*
1102* Row-cyclic Jacobi SVD algorithm with column pivoting
1103*
1104* .. again some perturbation (a "background noise") is added
1105* to drown denormals
1106 IF ( l2pert ) THEN
1107* XSC = DSQRT(SMALL)
1108 xsc = epsln / dble(n)
1109 DO 1947 q = 1, nr
1110 temp1 = xsc*dabs(a(q,q))
1111 DO 1949 p = 1, nr
1112 IF ( ( (p.GT.q) .AND. (dabs(a(p,q)).LE.temp1) )
1113 $ .OR. ( p .LT. q ) )
1114 $ a(p,q) = dsign( temp1, a(p,q) )
1115 1949 CONTINUE
1116 1947 CONTINUE
1117 ELSE
1118 CALL dlaset( 'U', nr-1, nr-1, zero, zero, a(1,2), lda )
1119 END IF
1120*
1121* .. and one-sided Jacobi rotations are started on a lower
1122* triangular matrix (plus perturbation which is ignored in
1123* the part which destroys triangular form (confusing?!))
1124*
1125 CALL dgesvj( 'L', 'NoU', 'NoV', nr, nr, a, lda, sva,
1126 $ n, v, ldv, work, lwork, info )
1127*
1128 scalem = work(1)
1129 numrank = idnint(work(2))
1130*
1131*
1132 ELSE IF ( rsvec .AND. ( .NOT. lsvec ) ) THEN
1133*
1134* -> Singular Values and Right Singular Vectors <-
1135*
1136 IF ( almort ) THEN
1137*
1138* .. in this case NR equals N
1139 DO 1998 p = 1, nr
1140 CALL dcopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1141 1998 CONTINUE
1142 CALL dlaset( 'Upper', nr-1, nr-1, zero, zero, v(1,2), ldv )
1143*
1144 CALL dgesvj( 'L','U','N', n, nr, v,ldv, sva, nr, a,lda,
1145 $ work, lwork, info )
1146 scalem = work(1)
1147 numrank = idnint(work(2))
1148
1149 ELSE
1150*
1151* .. two more QR factorizations ( one QRF is not enough, two require
1152* accumulated product of Jacobi rotations, three are perfect )
1153*
1154 CALL dlaset( 'Lower', nr-1, nr-1, zero, zero, a(2,1), lda )
1155 CALL dgelqf( nr, n, a, lda, work, work(n+1), lwork-n, ierr)
1156 CALL dlacpy( 'Lower', nr, nr, a, lda, v, ldv )
1157 CALL dlaset( 'Upper', nr-1, nr-1, zero, zero, v(1,2), ldv )
1158 CALL dgeqrf( nr, nr, v, ldv, work(n+1), work(2*n+1),
1159 $ lwork-2*n, ierr )
1160 DO 8998 p = 1, nr
1161 CALL dcopy( nr-p+1, v(p,p), ldv, v(p,p), 1 )
1162 8998 CONTINUE
1163 CALL dlaset( 'Upper', nr-1, nr-1, zero, zero, v(1,2), ldv )
1164*
1165 CALL dgesvj( 'Lower', 'U','N', nr, nr, v,ldv, sva, nr, u,
1166 $ ldu, work(n+1), lwork, info )
1167 scalem = work(n+1)
1168 numrank = idnint(work(n+2))
1169 IF ( nr .LT. n ) THEN
1170 CALL dlaset( 'A',n-nr, nr, zero,zero, v(nr+1,1), ldv )
1171 CALL dlaset( 'A',nr, n-nr, zero,zero, v(1,nr+1), ldv )
1172 CALL dlaset( 'A',n-nr,n-nr,zero,one, v(nr+1,nr+1), ldv )
1173 END IF
1174*
1175 CALL dormlq( 'Left', 'Transpose', n, n, nr, a, lda, work,
1176 $ v, ldv, work(n+1), lwork-n, ierr )
1177*
1178 END IF
1179*
1180 DO 8991 p = 1, n
1181 CALL dcopy( n, v(p,1), ldv, a(iwork(p),1), lda )
1182 8991 CONTINUE
1183 CALL dlacpy( 'All', n, n, a, lda, v, ldv )
1184*
1185 IF ( transp ) THEN
1186 CALL dlacpy( 'All', n, n, v, ldv, u, ldu )
1187 END IF
1188*
1189 ELSE IF ( lsvec .AND. ( .NOT. rsvec ) ) THEN
1190*
1191* .. Singular Values and Left Singular Vectors ..
1192*
1193* .. second preconditioning step to avoid need to accumulate
1194* Jacobi rotations in the Jacobi iterations.
1195 DO 1965 p = 1, nr
1196 CALL dcopy( n-p+1, a(p,p), lda, u(p,p), 1 )
1197 1965 CONTINUE
1198 CALL dlaset( 'Upper', nr-1, nr-1, zero, zero, u(1,2), ldu )
1199*
1200 CALL dgeqrf( n, nr, u, ldu, work(n+1), work(2*n+1),
1201 $ lwork-2*n, ierr )
1202*
1203 DO 1967 p = 1, nr - 1
1204 CALL dcopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1 )
1205 1967 CONTINUE
1206 CALL dlaset( 'Upper', nr-1, nr-1, zero, zero, u(1,2), ldu )
1207*
1208 CALL dgesvj( 'Lower', 'U', 'N', nr,nr, u, ldu, sva, nr, a,
1209 $ lda, work(n+1), lwork-n, info )
1210 scalem = work(n+1)
1211 numrank = idnint(work(n+2))
1212*
1213 IF ( nr .LT. m ) THEN
1214 CALL dlaset( 'A', m-nr, nr,zero, zero, u(nr+1,1), ldu )
1215 IF ( nr .LT. n1 ) THEN
1216 CALL dlaset( 'A',nr, n1-nr, zero, zero, u(1,nr+1), ldu )
1217 CALL dlaset( 'A',m-nr,n1-nr,zero,one,u(nr+1,nr+1), ldu )
1218 END IF
1219 END IF
1220*
1221 CALL dormqr( 'Left', 'No Tr', m, n1, n, a, lda, work, u,
1222 $ ldu, work(n+1), lwork-n, ierr )
1223*
1224 IF ( rowpiv )
1225 $ CALL dlaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 )
1226*
1227 DO 1974 p = 1, n1
1228 xsc = one / dnrm2( m, u(1,p), 1 )
1229 CALL dscal( m, xsc, u(1,p), 1 )
1230 1974 CONTINUE
1231*
1232 IF ( transp ) THEN
1233 CALL dlacpy( 'All', n, n, u, ldu, v, ldv )
1234 END IF
1235*
1236 ELSE
1237*
1238* .. Full SVD ..
1239*
1240 IF ( .NOT. jracc ) THEN
1241*
1242 IF ( .NOT. almort ) THEN
1243*
1244* Second Preconditioning Step (QRF [with pivoting])
1245* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is
1246* equivalent to an LQF CALL. Since in many libraries the QRF
1247* seems to be better optimized than the LQF, we do explicit
1248* transpose and use the QRF. This is subject to changes in an
1249* optimized implementation of DGEJSV.
1250*
1251 DO 1968 p = 1, nr
1252 CALL dcopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1253 1968 CONTINUE
1254*
1255* .. the following two loops perturb small entries to avoid
1256* denormals in the second QR factorization, where they are
1257* as good as zeros. This is done to avoid painfully slow
1258* computation with denormals. The relative size of the perturbation
1259* is a parameter that can be changed by the implementer.
1260* This perturbation device will be obsolete on machines with
1261* properly implemented arithmetic.
1262* To switch it off, set L2PERT=.FALSE. To remove it from the
1263* code, remove the action under L2PERT=.TRUE., leave the ELSE part.
1264* The following two loops should be blocked and fused with the
1265* transposed copy above.
1266*
1267 IF ( l2pert ) THEN
1268 xsc = dsqrt(small)
1269 DO 2969 q = 1, nr
1270 temp1 = xsc*dabs( v(q,q) )
1271 DO 2968 p = 1, n
1272 IF ( ( p .GT. q ) .AND. ( dabs(v(p,q)) .LE. temp1 )
1273 $ .OR. ( p .LT. q ) )
1274 $ v(p,q) = dsign( temp1, v(p,q) )
1275 IF ( p .LT. q ) v(p,q) = - v(p,q)
1276 2968 CONTINUE
1277 2969 CONTINUE
1278 ELSE
1279 CALL dlaset( 'U', nr-1, nr-1, zero, zero, v(1,2), ldv )
1280 END IF
1281*
1282* Estimate the row scaled condition number of R1
1283* (If R1 is rectangular, N > NR, then the condition number
1284* of the leading NR x NR submatrix is estimated.)
1285*
1286 CALL dlacpy( 'L', nr, nr, v, ldv, work(2*n+1), nr )
1287 DO 3950 p = 1, nr
1288 temp1 = dnrm2(nr-p+1,work(2*n+(p-1)*nr+p),1)
1289 CALL dscal(nr-p+1,one/temp1,work(2*n+(p-1)*nr+p),1)
1290 3950 CONTINUE
1291 CALL dpocon('Lower',nr,work(2*n+1),nr,one,temp1,
1292 $ work(2*n+nr*nr+1),iwork(m+2*n+1),ierr)
1293 condr1 = one / dsqrt(temp1)
1294* .. here need a second opinion on the condition number
1295* .. then assume worst case scenario
1296* R1 is OK for inverse <=> CONDR1 .LT. DBLE(N)
1297* more conservative <=> CONDR1 .LT. DSQRT(DBLE(N))
1298*
1299 cond_ok = dsqrt(dble(nr))
1300*[TP] COND_OK is a tuning parameter.
1301
1302 IF ( condr1 .LT. cond_ok ) THEN
1303* .. the second QRF without pivoting. Note: in an optimized
1304* implementation, this QRF should be implemented as the QRF
1305* of a lower triangular matrix.
1306* R1^t = Q2 * R2
1307 CALL dgeqrf( n, nr, v, ldv, work(n+1), work(2*n+1),
1308 $ lwork-2*n, ierr )
1309*
1310 IF ( l2pert ) THEN
1311 xsc = dsqrt(small)/epsln
1312 DO 3959 p = 2, nr
1313 DO 3958 q = 1, p - 1
1314 temp1 = xsc * min(dabs(v(p,p)),dabs(v(q,q)))
1315 IF ( dabs(v(q,p)) .LE. temp1 )
1316 $ v(q,p) = dsign( temp1, v(q,p) )
1317 3958 CONTINUE
1318 3959 CONTINUE
1319 END IF
1320*
1321 IF ( nr .NE. n )
1322 $ CALL dlacpy( 'A', n, nr, v, ldv, work(2*n+1), n )
1323* .. save ...
1324*
1325* .. this transposed copy should be better than naive
1326 DO 1969 p = 1, nr - 1
1327 CALL dcopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1 )
1328 1969 CONTINUE
1329*
1330 condr2 = condr1
1331*
1332 ELSE
1333*
1334* .. ill-conditioned case: second QRF with pivoting
1335* Note that windowed pivoting would be equally good
1336* numerically, and more run-time efficient. So, in
1337* an optimal implementation, the next call to DGEQP3
1338* should be replaced with eg. CALL SGEQPX (ACM TOMS #782)
1339* with properly (carefully) chosen parameters.
1340*
1341* R1^t * P2 = Q2 * R2
1342 DO 3003 p = 1, nr
1343 iwork(n+p) = 0
1344 3003 CONTINUE
1345 CALL dgeqp3( n, nr, v, ldv, iwork(n+1), work(n+1),
1346 $ work(2*n+1), lwork-2*n, ierr )
1347** CALL DGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1),
1348** $ LWORK-2*N, IERR )
1349 IF ( l2pert ) THEN
1350 xsc = dsqrt(small)
1351 DO 3969 p = 2, nr
1352 DO 3968 q = 1, p - 1
1353 temp1 = xsc * min(dabs(v(p,p)),dabs(v(q,q)))
1354 IF ( dabs(v(q,p)) .LE. temp1 )
1355 $ v(q,p) = dsign( temp1, v(q,p) )
1356 3968 CONTINUE
1357 3969 CONTINUE
1358 END IF
1359*
1360 CALL dlacpy( 'A', n, nr, v, ldv, work(2*n+1), n )
1361*
1362 IF ( l2pert ) THEN
1363 xsc = dsqrt(small)
1364 DO 8970 p = 2, nr
1365 DO 8971 q = 1, p - 1
1366 temp1 = xsc * min(dabs(v(p,p)),dabs(v(q,q)))
1367 v(p,q) = - dsign( temp1, v(q,p) )
1368 8971 CONTINUE
1369 8970 CONTINUE
1370 ELSE
1371 CALL dlaset( 'L',nr-1,nr-1,zero,zero,v(2,1),ldv )
1372 END IF
1373* Now, compute R2 = L3 * Q3, the LQ factorization.
1374 CALL dgelqf( nr, nr, v, ldv, work(2*n+n*nr+1),
1375 $ work(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr, ierr )
1376* .. and estimate the condition number
1377 CALL dlacpy( 'L',nr,nr,v,ldv,work(2*n+n*nr+nr+1),nr )
1378 DO 4950 p = 1, nr
1379 temp1 = dnrm2( p, work(2*n+n*nr+nr+p), nr )
1380 CALL dscal( p, one/temp1, work(2*n+n*nr+nr+p), nr )
1381 4950 CONTINUE
1382 CALL dpocon( 'L',nr,work(2*n+n*nr+nr+1),nr,one,temp1,
1383 $ work(2*n+n*nr+nr+nr*nr+1),iwork(m+2*n+1),ierr )
1384 condr2 = one / dsqrt(temp1)
1385*
1386 IF ( condr2 .GE. cond_ok ) THEN
1387* .. save the Householder vectors used for Q3
1388* (this overwrites the copy of R2, as it will not be
1389* needed in this branch, but it does not overwritte the
1390* Huseholder vectors of Q2.).
1391 CALL dlacpy( 'U', nr, nr, v, ldv, work(2*n+1), n )
1392* .. and the rest of the information on Q3 is in
1393* WORK(2*N+N*NR+1:2*N+N*NR+N)
1394 END IF
1395*
1396 END IF
1397*
1398 IF ( l2pert ) THEN
1399 xsc = dsqrt(small)
1400 DO 4968 q = 2, nr
1401 temp1 = xsc * v(q,q)
1402 DO 4969 p = 1, q - 1
1403* V(p,q) = - DSIGN( TEMP1, V(q,p) )
1404 v(p,q) = - dsign( temp1, v(p,q) )
1405 4969 CONTINUE
1406 4968 CONTINUE
1407 ELSE
1408 CALL dlaset( 'U', nr-1,nr-1, zero,zero, v(1,2), ldv )
1409 END IF
1410*
1411* Second preconditioning finished; continue with Jacobi SVD
1412* The input matrix is lower trinagular.
1413*
1414* Recover the right singular vectors as solution of a well
1415* conditioned triangular matrix equation.
1416*
1417 IF ( condr1 .LT. cond_ok ) THEN
1418*
1419 CALL dgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u,
1420 $ ldu,work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,info )
1421 scalem = work(2*n+n*nr+nr+1)
1422 numrank = idnint(work(2*n+n*nr+nr+2))
1423 DO 3970 p = 1, nr
1424 CALL dcopy( nr, v(1,p), 1, u(1,p), 1 )
1425 CALL dscal( nr, sva(p), v(1,p), 1 )
1426 3970 CONTINUE
1427
1428* .. pick the right matrix equation and solve it
1429*
1430 IF ( nr .EQ. n ) THEN
1431* :)) .. best case, R1 is inverted. The solution of this matrix
1432* equation is Q2*V2 = the product of the Jacobi rotations
1433* used in DGESVJ, premultiplied with the orthogonal matrix
1434* from the second QR factorization.
1435 CALL dtrsm( 'L','U','N','N', nr,nr,one, a,lda, v,ldv )
1436 ELSE
1437* .. R1 is well conditioned, but non-square. Transpose(R2)
1438* is inverted to get the product of the Jacobi rotations
1439* used in DGESVJ. The Q-factor from the second QR
1440* factorization is then built in explicitly.
1441 CALL dtrsm('L','U','T','N',nr,nr,one,work(2*n+1),
1442 $ n,v,ldv)
1443 IF ( nr .LT. n ) THEN
1444 CALL dlaset('A',n-nr,nr,zero,zero,v(nr+1,1),ldv)
1445 CALL dlaset('A',nr,n-nr,zero,zero,v(1,nr+1),ldv)
1446 CALL dlaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv)
1447 END IF
1448 CALL dormqr('L','N',n,n,nr,work(2*n+1),n,work(n+1),
1449 $ v,ldv,work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr)
1450 END IF
1451*
1452 ELSE IF ( condr2 .LT. cond_ok ) THEN
1453*
1454* :) .. the input matrix A is very likely a relative of
1455* the Kahan matrix :)
1456* The matrix R2 is inverted. The solution of the matrix equation
1457* is Q3^T*V3 = the product of the Jacobi rotations (appplied to
1458* the lower triangular L3 from the LQ factorization of
1459* R2=L3*Q3), pre-multiplied with the transposed Q3.
1460 CALL dgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr, u,
1461 $ ldu, work(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr, info )
1462 scalem = work(2*n+n*nr+nr+1)
1463 numrank = idnint(work(2*n+n*nr+nr+2))
1464 DO 3870 p = 1, nr
1465 CALL dcopy( nr, v(1,p), 1, u(1,p), 1 )
1466 CALL dscal( nr, sva(p), u(1,p), 1 )
1467 3870 CONTINUE
1468 CALL dtrsm('L','U','N','N',nr,nr,one,work(2*n+1),n,u,ldu)
1469* .. apply the permutation from the second QR factorization
1470 DO 873 q = 1, nr
1471 DO 872 p = 1, nr
1472 work(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1473 872 CONTINUE
1474 DO 874 p = 1, nr
1475 u(p,q) = work(2*n+n*nr+nr+p)
1476 874 CONTINUE
1477 873 CONTINUE
1478 IF ( nr .LT. n ) THEN
1479 CALL dlaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv )
1480 CALL dlaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv )
1481 CALL dlaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv )
1482 END IF
1483 CALL dormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),
1484 $ v,ldv,work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1485 ELSE
1486* Last line of defense.
1487* #:( This is a rather pathological case: no scaled condition
1488* improvement after two pivoted QR factorizations. Other
1489* possibility is that the rank revealing QR factorization
1490* or the condition estimator has failed, or the COND_OK
1491* is set very close to ONE (which is unnecessary). Normally,
1492* this branch should never be executed, but in rare cases of
1493* failure of the RRQR or condition estimator, the last line of
1494* defense ensures that DGEJSV completes the task.
1495* Compute the full SVD of L3 using DGESVJ with explicit
1496* accumulation of Jacobi rotations.
1497 CALL dgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr, u,
1498 $ ldu, work(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr, info )
1499 scalem = work(2*n+n*nr+nr+1)
1500 numrank = idnint(work(2*n+n*nr+nr+2))
1501 IF ( nr .LT. n ) THEN
1502 CALL dlaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv )
1503 CALL dlaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv )
1504 CALL dlaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv )
1505 END IF
1506 CALL dormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),
1507 $ v,ldv,work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1508*
1509 CALL dormlq( 'L', 'T', nr, nr, nr, work(2*n+1), n,
1510 $ work(2*n+n*nr+1), u, ldu, work(2*n+n*nr+nr+1),
1511 $ lwork-2*n-n*nr-nr, ierr )
1512 DO 773 q = 1, nr
1513 DO 772 p = 1, nr
1514 work(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1515 772 CONTINUE
1516 DO 774 p = 1, nr
1517 u(p,q) = work(2*n+n*nr+nr+p)
1518 774 CONTINUE
1519 773 CONTINUE
1520*
1521 END IF
1522*
1523* Permute the rows of V using the (column) permutation from the
1524* first QRF. Also, scale the columns to make them unit in
1525* Euclidean norm. This applies to all cases.
1526*
1527 temp1 = dsqrt(dble(n)) * epsln
1528 DO 1972 q = 1, n
1529 DO 972 p = 1, n
1530 work(2*n+n*nr+nr+iwork(p)) = v(p,q)
1531 972 CONTINUE
1532 DO 973 p = 1, n
1533 v(p,q) = work(2*n+n*nr+nr+p)
1534 973 CONTINUE
1535 xsc = one / dnrm2( n, v(1,q), 1 )
1536 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1537 $ CALL dscal( n, xsc, v(1,q), 1 )
1538 1972 CONTINUE
1539* At this moment, V contains the right singular vectors of A.
1540* Next, assemble the left singular vector matrix U (M x N).
1541 IF ( nr .LT. m ) THEN
1542 CALL dlaset( 'A', m-nr, nr, zero, zero, u(nr+1,1), ldu )
1543 IF ( nr .LT. n1 ) THEN
1544 CALL dlaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu)
1545 CALL dlaset('A',m-nr,n1-nr,zero,one,u(nr+1,nr+1),ldu)
1546 END IF
1547 END IF
1548*
1549* The Q matrix from the first QRF is built into the left singular
1550* matrix U. This applies to all cases.
1551*
1552 CALL dormqr( 'Left', 'No_Tr', m, n1, n, a, lda, work, u,
1553 $ ldu, work(n+1), lwork-n, ierr )
1554
1555* The columns of U are normalized. The cost is O(M*N) flops.
1556 temp1 = dsqrt(dble(m)) * epsln
1557 DO 1973 p = 1, nr
1558 xsc = one / dnrm2( m, u(1,p), 1 )
1559 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1560 $ CALL dscal( m, xsc, u(1,p), 1 )
1561 1973 CONTINUE
1562*
1563* If the initial QRF is computed with row pivoting, the left
1564* singular vectors must be adjusted.
1565*
1566 IF ( rowpiv )
1567 $ CALL dlaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 )
1568*
1569 ELSE
1570*
1571* .. the initial matrix A has almost orthogonal columns and
1572* the second QRF is not needed
1573*
1574 CALL dlacpy( 'Upper', n, n, a, lda, work(n+1), n )
1575 IF ( l2pert ) THEN
1576 xsc = dsqrt(small)
1577 DO 5970 p = 2, n
1578 temp1 = xsc * work( n + (p-1)*n + p )
1579 DO 5971 q = 1, p - 1
1580 work(n+(q-1)*n+p)=-dsign(temp1,work(n+(p-1)*n+q))
1581 5971 CONTINUE
1582 5970 CONTINUE
1583 ELSE
1584 CALL dlaset( 'Lower',n-1,n-1,zero,zero,work(n+2),n )
1585 END IF
1586*
1587 CALL dgesvj( 'Upper', 'U', 'N', n, n, work(n+1), n, sva,
1588 $ n, u, ldu, work(n+n*n+1), lwork-n-n*n, info )
1589*
1590 scalem = work(n+n*n+1)
1591 numrank = idnint(work(n+n*n+2))
1592 DO 6970 p = 1, n
1593 CALL dcopy( n, work(n+(p-1)*n+1), 1, u(1,p), 1 )
1594 CALL dscal( n, sva(p), work(n+(p-1)*n+1), 1 )
1595 6970 CONTINUE
1596*
1597 CALL dtrsm( 'Left', 'Upper', 'NoTrans', 'No UD', n, n,
1598 $ one, a, lda, work(n+1), n )
1599 DO 6972 p = 1, n
1600 CALL dcopy( n, work(n+p), n, v(iwork(p),1), ldv )
1601 6972 CONTINUE
1602 temp1 = dsqrt(dble(n))*epsln
1603 DO 6971 p = 1, n
1604 xsc = one / dnrm2( n, v(1,p), 1 )
1605 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1606 $ CALL dscal( n, xsc, v(1,p), 1 )
1607 6971 CONTINUE
1608*
1609* Assemble the left singular vector matrix U (M x N).
1610*
1611 IF ( n .LT. m ) THEN
1612 CALL dlaset( 'A', m-n, n, zero, zero, u(n+1,1), ldu )
1613 IF ( n .LT. n1 ) THEN
1614 CALL dlaset( 'A',n, n1-n, zero, zero, u(1,n+1),ldu )
1615 CALL dlaset( 'A',m-n,n1-n, zero, one,u(n+1,n+1),ldu )
1616 END IF
1617 END IF
1618 CALL dormqr( 'Left', 'No Tr', m, n1, n, a, lda, work, u,
1619 $ ldu, work(n+1), lwork-n, ierr )
1620 temp1 = dsqrt(dble(m))*epsln
1621 DO 6973 p = 1, n1
1622 xsc = one / dnrm2( m, u(1,p), 1 )
1623 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1624 $ CALL dscal( m, xsc, u(1,p), 1 )
1625 6973 CONTINUE
1626*
1627 IF ( rowpiv )
1628 $ CALL dlaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 )
1629*
1630 END IF
1631*
1632* end of the >> almost orthogonal case << in the full SVD
1633*
1634 ELSE
1635*
1636* This branch deploys a preconditioned Jacobi SVD with explicitly
1637* accumulated rotations. It is included as optional, mainly for
1638* experimental purposes. It does perform well, and can also be used.
1639* In this implementation, this branch will be automatically activated
1640* if the condition number sigma_max(A) / sigma_min(A) is predicted
1641* to be greater than the overflow threshold. This is because the
1642* a posteriori computation of the singular vectors assumes robust
1643* implementation of BLAS and some LAPACK procedures, capable of working
1644* in presence of extreme values. Since that is not always the case, ...
1645*
1646 DO 7968 p = 1, nr
1647 CALL dcopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1648 7968 CONTINUE
1649*
1650 IF ( l2pert ) THEN
1651 xsc = dsqrt(small/epsln)
1652 DO 5969 q = 1, nr
1653 temp1 = xsc*dabs( v(q,q) )
1654 DO 5968 p = 1, n
1655 IF ( ( p .GT. q ) .AND. ( dabs(v(p,q)) .LE. temp1 )
1656 $ .OR. ( p .LT. q ) )
1657 $ v(p,q) = dsign( temp1, v(p,q) )
1658 IF ( p .LT. q ) v(p,q) = - v(p,q)
1659 5968 CONTINUE
1660 5969 CONTINUE
1661 ELSE
1662 CALL dlaset( 'U', nr-1, nr-1, zero, zero, v(1,2), ldv )
1663 END IF
1664
1665 CALL dgeqrf( n, nr, v, ldv, work(n+1), work(2*n+1),
1666 $ lwork-2*n, ierr )
1667 CALL dlacpy( 'L', n, nr, v, ldv, work(2*n+1), n )
1668*
1669 DO 7969 p = 1, nr
1670 CALL dcopy( nr-p+1, v(p,p), ldv, u(p,p), 1 )
1671 7969 CONTINUE
1672
1673 IF ( l2pert ) THEN
1674 xsc = dsqrt(small/epsln)
1675 DO 9970 q = 2, nr
1676 DO 9971 p = 1, q - 1
1677 temp1 = xsc * min(dabs(u(p,p)),dabs(u(q,q)))
1678 u(p,q) = - dsign( temp1, u(q,p) )
1679 9971 CONTINUE
1680 9970 CONTINUE
1681 ELSE
1682 CALL dlaset('U', nr-1, nr-1, zero, zero, u(1,2), ldu )
1683 END IF
1684
1685 CALL dgesvj( 'G', 'U', 'V', nr, nr, u, ldu, sva,
1686 $ n, v, ldv, work(2*n+n*nr+1), lwork-2*n-n*nr, info )
1687 scalem = work(2*n+n*nr+1)
1688 numrank = idnint(work(2*n+n*nr+2))
1689
1690 IF ( nr .LT. n ) THEN
1691 CALL dlaset( 'A',n-nr,nr,zero,zero,v(nr+1,1),ldv )
1692 CALL dlaset( 'A',nr,n-nr,zero,zero,v(1,nr+1),ldv )
1693 CALL dlaset( 'A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv )
1694 END IF
1695
1696 CALL dormqr( 'L','N',n,n,nr,work(2*n+1),n,work(n+1),
1697 $ v,ldv,work(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1698*
1699* Permute the rows of V using the (column) permutation from the
1700* first QRF. Also, scale the columns to make them unit in
1701* Euclidean norm. This applies to all cases.
1702*
1703 temp1 = dsqrt(dble(n)) * epsln
1704 DO 7972 q = 1, n
1705 DO 8972 p = 1, n
1706 work(2*n+n*nr+nr+iwork(p)) = v(p,q)
1707 8972 CONTINUE
1708 DO 8973 p = 1, n
1709 v(p,q) = work(2*n+n*nr+nr+p)
1710 8973 CONTINUE
1711 xsc = one / dnrm2( n, v(1,q), 1 )
1712 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
1713 $ CALL dscal( n, xsc, v(1,q), 1 )
1714 7972 CONTINUE
1715*
1716* At this moment, V contains the right singular vectors of A.
1717* Next, assemble the left singular vector matrix U (M x N).
1718*
1719 IF ( nr .LT. m ) THEN
1720 CALL dlaset( 'A', m-nr, nr, zero, zero, u(nr+1,1), ldu )
1721 IF ( nr .LT. n1 ) THEN
1722 CALL dlaset( 'A',nr, n1-nr, zero, zero, u(1,nr+1),ldu )
1723 CALL dlaset( 'A',m-nr,n1-nr, zero, one,u(nr+1,nr+1),ldu )
1724 END IF
1725 END IF
1726*
1727 CALL dormqr( 'Left', 'No Tr', m, n1, n, a, lda, work, u,
1728 $ ldu, work(n+1), lwork-n, ierr )
1729*
1730 IF ( rowpiv )
1731 $ CALL dlaswp( n1, u, ldu, 1, m-1, iwork(2*n+1), -1 )
1732*
1733*
1734 END IF
1735 IF ( transp ) THEN
1736* .. swap U and V because the procedure worked on A^t
1737 DO 6974 p = 1, n
1738 CALL dswap( n, u(1,p), 1, v(1,p), 1 )
1739 6974 CONTINUE
1740 END IF
1741*
1742 END IF
1743* end of the full SVD
1744*
1745* Undo scaling, if necessary (and possible)
1746*
1747 IF ( uscal2 .LE. (big/sva(1))*uscal1 ) THEN
1748 CALL dlascl( 'G', 0, 0, uscal1, uscal2, nr, 1, sva, n, ierr )
1749 uscal1 = one
1750 uscal2 = one
1751 END IF
1752*
1753 IF ( nr .LT. n ) THEN
1754 DO 3004 p = nr+1, n
1755 sva(p) = zero
1756 3004 CONTINUE
1757 END IF
1758*
1759 work(1) = uscal2 * scalem
1760 work(2) = uscal1
1761 IF ( errest ) work(3) = sconda
1762 IF ( lsvec .AND. rsvec ) THEN
1763 work(4) = condr1
1764 work(5) = condr2
1765 END IF
1766 IF ( l2tran ) THEN
1767 work(6) = entra
1768 work(7) = entrat
1769 END IF
1770*
1771 iwork(1) = nr
1772 iwork(2) = numrank
1773 iwork(3) = warning
1774*
1775 RETURN
1776* ..
1777* .. END OF DGEJSV
1778* ..
subroutine dlassq(n, x, incx, scl, sumsq)
DLASSQ updates a sum of squares represented in scaled form.
Definition dlassq.f90:137
subroutine dlacpy(uplo, m, n, a, lda, b, ldb)
DLACPY copies all or part of one two-dimensional array to another.
Definition dlacpy.f:103
subroutine dlascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition dlascl.f:143
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition dlaset.f:110
subroutine xerbla(srname, info)
XERBLA
Definition xerbla.f:60
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
integer function idamax(n, dx, incx)
IDAMAX
Definition idamax.f:71
subroutine dgesvj(joba, jobu, jobv, m, n, a, lda, sva, mv, v, ldv, work, lwork, info)
DGESVJ
Definition dgesvj.f:337
subroutine dgelqf(m, n, a, lda, tau, work, lwork, info)
DGELQF
Definition dgelqf.f:143
subroutine dgeqrf(m, n, a, lda, tau, work, lwork, info)
DGEQRF
Definition dgeqrf.f:146
subroutine dgeqp3(m, n, a, lda, jpvt, tau, work, lwork, info)
DGEQP3
Definition dgeqp3.f:151
subroutine dlaswp(n, a, lda, k1, k2, ipiv, incx)
DLASWP performs a series of row interchanges on a general rectangular matrix.
Definition dlaswp.f:115
subroutine dormqr(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
DORMQR
Definition dormqr.f:167
subroutine dorgqr(m, n, k, a, lda, tau, work, lwork, info)
DORGQR
Definition dorgqr.f:128
subroutine dormlq(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
DORMLQ
Definition dormlq.f:167
subroutine dpocon(uplo, n, a, lda, anorm, rcond, work, iwork, info)
DPOCON
Definition dpocon.f:121
subroutine dscal(n, da, dx, incx)
DSCAL
Definition dscal.f:79
subroutine dswap(n, dx, incx, dy, incy)
DSWAP
Definition dswap.f:82
subroutine dcopy(n, dx, incx, dy, incy)
DCOPY
Definition dcopy.f:82
subroutine dtrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
DTRSM
Definition dtrsm.f:181
real(wp) function dnrm2(n, x, incx)
DNRM2
Definition dnrm2.f90:89
double precision function dlamch(cmach)
DLAMCH
Definition dlamch.f:69
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21

◆ dgesdd()

subroutine dgesdd ( character jobz,
integer m,
integer n,
double precision, dimension( lda, * ) a,
integer lda,
double precision, dimension( * ) s,
double precision, dimension( ldu, * ) u,
integer ldu,
double precision, dimension( ldvt, * ) vt,
integer ldvt,
double precision, dimension( * ) work,
integer lwork,
integer, dimension( * ) iwork,
integer info )

DGESDD

Download DGESDD + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> DGESDD computes the singular value decomposition (SVD) of a real
!> M-by-N matrix A, optionally computing the left and right singular
!> vectors.  If singular vectors are desired, it uses a
!> divide-and-conquer algorithm.
!>
!> The SVD is written
!>
!>      A = U * SIGMA * transpose(V)
!>
!> where SIGMA is an M-by-N matrix which is zero except for its
!> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
!> V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
!> are the singular values of A; they are real and non-negative, and
!> are returned in descending order.  The first min(m,n) columns of
!> U and V are the left and right singular vectors of A.
!>
!> Note that the routine returns VT = V**T, not V.
!>
!> The divide and conquer algorithm makes very mild assumptions about
!> floating point arithmetic. It will work on machines with a guard
!> digit in add/subtract, or on those binary machines without guard
!> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
!> Cray-2. It could conceivably fail on hexadecimal or decimal machines
!> without guard digits, but we know of none.
!> 
Parameters
[in]JOBZ
!>          JOBZ is CHARACTER*1
!>          Specifies options for computing all or part of the matrix U:
!>          = 'A':  all M columns of U and all N rows of V**T are
!>                  returned in the arrays U and VT;
!>          = 'S':  the first min(M,N) columns of U and the first
!>                  min(M,N) rows of V**T are returned in the arrays U
!>                  and VT;
!>          = 'O':  If M >= N, the first N columns of U are overwritten
!>                  on the array A and all rows of V**T are returned in
!>                  the array VT;
!>                  otherwise, all columns of U are returned in the
!>                  array U and the first M rows of V**T are overwritten
!>                  in the array A;
!>          = 'N':  no columns of U or rows of V**T are computed.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the input matrix A.  N >= 0.
!> 
[in,out]A
!>          A is DOUBLE PRECISION array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!>          On exit,
!>          if JOBZ = 'O',  A is overwritten with the first N columns
!>                          of U (the left singular vectors, stored
!>                          columnwise) if M >= N;
!>                          A is overwritten with the first M rows
!>                          of V**T (the right singular vectors, stored
!>                          rowwise) otherwise.
!>          if JOBZ .ne. 'O', the contents of A are destroyed.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]S
!>          S is DOUBLE PRECISION array, dimension (min(M,N))
!>          The singular values of A, sorted so that S(i) >= S(i+1).
!> 
[out]U
!>          U is DOUBLE PRECISION array, dimension (LDU,UCOL)
!>          UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
!>          UCOL = min(M,N) if JOBZ = 'S'.
!>          If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
!>          orthogonal matrix U;
!>          if JOBZ = 'S', U contains the first min(M,N) columns of U
!>          (the left singular vectors, stored columnwise);
!>          if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U.  LDU >= 1; if
!>          JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
!> 
[out]VT
!>          VT is DOUBLE PRECISION array, dimension (LDVT,N)
!>          If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
!>          N-by-N orthogonal matrix V**T;
!>          if JOBZ = 'S', VT contains the first min(M,N) rows of
!>          V**T (the right singular vectors, stored rowwise);
!>          if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
!> 
[in]LDVT
!>          LDVT is INTEGER
!>          The leading dimension of the array VT.  LDVT >= 1;
!>          if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
!>          if JOBZ = 'S', LDVT >= min(M,N).
!> 
[out]WORK
!>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK. LWORK >= 1.
!>          If LWORK = -1, a workspace query is assumed.  The optimal
!>          size for the WORK array is calculated and stored in WORK(1),
!>          and no other work except argument checking is performed.
!>
!>          Let mx = max(M,N) and mn = min(M,N).
!>          If JOBZ = 'N', LWORK >= 3*mn + max( mx, 7*mn ).
!>          If JOBZ = 'O', LWORK >= 3*mn + max( mx, 5*mn*mn + 4*mn ).
!>          If JOBZ = 'S', LWORK >= 4*mn*mn + 7*mn.
!>          If JOBZ = 'A', LWORK >= 4*mn*mn + 6*mn + mx.
!>          These are not tight minimums in all cases; see comments inside code.
!>          For good performance, LWORK should generally be larger;
!>          a query is recommended.
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (8*min(M,N))
!> 
[out]INFO
!>          INFO is INTEGER
!>          <  0:  if INFO = -i, the i-th argument had an illegal value.
!>          = -4:  if A had a NAN entry.
!>          >  0:  DBDSDC did not converge, updating process failed.
!>          =  0:  successful exit.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Contributors:
Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA

Definition at line 217 of file dgesdd.f.

219 implicit none
220*
221* -- LAPACK driver routine --
222* -- LAPACK is a software package provided by Univ. of Tennessee, --
223* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
224*
225* .. Scalar Arguments ..
226 CHARACTER JOBZ
227 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
228* ..
229* .. Array Arguments ..
230 INTEGER IWORK( * )
231 DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
232 $ VT( LDVT, * ), WORK( * )
233* ..
234*
235* =====================================================================
236*
237* .. Parameters ..
238 DOUBLE PRECISION ZERO, ONE
239 parameter( zero = 0.0d0, one = 1.0d0 )
240* ..
241* .. Local Scalars ..
242 LOGICAL LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS
243 INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IL,
244 $ IR, ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT,
245 $ LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK,
246 $ MNTHR, NWORK, WRKBL
247 INTEGER LWORK_DGEBRD_MN, LWORK_DGEBRD_MM,
248 $ LWORK_DGEBRD_NN, LWORK_DGELQF_MN,
249 $ LWORK_DGEQRF_MN,
250 $ LWORK_DORGBR_P_MM, LWORK_DORGBR_Q_NN,
251 $ LWORK_DORGLQ_MN, LWORK_DORGLQ_NN,
252 $ LWORK_DORGQR_MM, LWORK_DORGQR_MN,
253 $ LWORK_DORMBR_PRT_MM, LWORK_DORMBR_QLN_MM,
254 $ LWORK_DORMBR_PRT_MN, LWORK_DORMBR_QLN_MN,
255 $ LWORK_DORMBR_PRT_NN, LWORK_DORMBR_QLN_NN
256 DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
257* ..
258* .. Local Arrays ..
259 INTEGER IDUM( 1 )
260 DOUBLE PRECISION DUM( 1 )
261* ..
262* .. External Subroutines ..
263 EXTERNAL dbdsdc, dgebrd, dgelqf, dgemm, dgeqrf, dlacpy,
265 $ xerbla
266* ..
267* .. External Functions ..
268 LOGICAL LSAME, DISNAN
269 DOUBLE PRECISION DLAMCH, DLANGE, DROUNDUP_LWORK
270 EXTERNAL dlamch, dlange, lsame, disnan,
272* ..
273* .. Intrinsic Functions ..
274 INTRINSIC int, max, min, sqrt
275* ..
276* .. Executable Statements ..
277*
278* Test the input arguments
279*
280 info = 0
281 minmn = min( m, n )
282 wntqa = lsame( jobz, 'A' )
283 wntqs = lsame( jobz, 'S' )
284 wntqas = wntqa .OR. wntqs
285 wntqo = lsame( jobz, 'O' )
286 wntqn = lsame( jobz, 'N' )
287 lquery = ( lwork.EQ.-1 )
288*
289 IF( .NOT.( wntqa .OR. wntqs .OR. wntqo .OR. wntqn ) ) THEN
290 info = -1
291 ELSE IF( m.LT.0 ) THEN
292 info = -2
293 ELSE IF( n.LT.0 ) THEN
294 info = -3
295 ELSE IF( lda.LT.max( 1, m ) ) THEN
296 info = -5
297 ELSE IF( ldu.LT.1 .OR. ( wntqas .AND. ldu.LT.m ) .OR.
298 $ ( wntqo .AND. m.LT.n .AND. ldu.LT.m ) ) THEN
299 info = -8
300 ELSE IF( ldvt.LT.1 .OR. ( wntqa .AND. ldvt.LT.n ) .OR.
301 $ ( wntqs .AND. ldvt.LT.minmn ) .OR.
302 $ ( wntqo .AND. m.GE.n .AND. ldvt.LT.n ) ) THEN
303 info = -10
304 END IF
305*
306* Compute workspace
307* Note: Comments in the code beginning "Workspace:" describe the
308* minimal amount of workspace allocated at that point in the code,
309* as well as the preferred amount for good performance.
310* NB refers to the optimal block size for the immediately
311* following subroutine, as returned by ILAENV.
312*
313 IF( info.EQ.0 ) THEN
314 minwrk = 1
315 maxwrk = 1
316 bdspac = 0
317 mnthr = int( minmn*11.0d0 / 6.0d0 )
318 IF( m.GE.n .AND. minmn.GT.0 ) THEN
319*
320* Compute space needed for DBDSDC
321*
322 IF( wntqn ) THEN
323* dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
324* keep 7*N for backwards compatibility.
325 bdspac = 7*n
326 ELSE
327 bdspac = 3*n*n + 4*n
328 END IF
329*
330* Compute space preferred for each routine
331 CALL dgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),
332 $ dum(1), dum(1), -1, ierr )
333 lwork_dgebrd_mn = int( dum(1) )
334*
335 CALL dgebrd( n, n, dum(1), n, dum(1), dum(1), dum(1),
336 $ dum(1), dum(1), -1, ierr )
337 lwork_dgebrd_nn = int( dum(1) )
338*
339 CALL dgeqrf( m, n, dum(1), m, dum(1), dum(1), -1, ierr )
340 lwork_dgeqrf_mn = int( dum(1) )
341*
342 CALL dorgbr( 'Q', n, n, n, dum(1), n, dum(1), dum(1), -1,
343 $ ierr )
344 lwork_dorgbr_q_nn = int( dum(1) )
345*
346 CALL dorgqr( m, m, n, dum(1), m, dum(1), dum(1), -1, ierr )
347 lwork_dorgqr_mm = int( dum(1) )
348*
349 CALL dorgqr( m, n, n, dum(1), m, dum(1), dum(1), -1, ierr )
350 lwork_dorgqr_mn = int( dum(1) )
351*
352 CALL dormbr( 'P', 'R', 'T', n, n, n, dum(1), n,
353 $ dum(1), dum(1), n, dum(1), -1, ierr )
354 lwork_dormbr_prt_nn = int( dum(1) )
355*
356 CALL dormbr( 'Q', 'L', 'N', n, n, n, dum(1), n,
357 $ dum(1), dum(1), n, dum(1), -1, ierr )
358 lwork_dormbr_qln_nn = int( dum(1) )
359*
360 CALL dormbr( 'Q', 'L', 'N', m, n, n, dum(1), m,
361 $ dum(1), dum(1), m, dum(1), -1, ierr )
362 lwork_dormbr_qln_mn = int( dum(1) )
363*
364 CALL dormbr( 'Q', 'L', 'N', m, m, n, dum(1), m,
365 $ dum(1), dum(1), m, dum(1), -1, ierr )
366 lwork_dormbr_qln_mm = int( dum(1) )
367*
368 IF( m.GE.mnthr ) THEN
369 IF( wntqn ) THEN
370*
371* Path 1 (M >> N, JOBZ='N')
372*
373 wrkbl = n + lwork_dgeqrf_mn
374 wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )
375 maxwrk = max( wrkbl, bdspac + n )
376 minwrk = bdspac + n
377 ELSE IF( wntqo ) THEN
378*
379* Path 2 (M >> N, JOBZ='O')
380*
381 wrkbl = n + lwork_dgeqrf_mn
382 wrkbl = max( wrkbl, n + lwork_dorgqr_mn )
383 wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )
384 wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn )
385 wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
386 wrkbl = max( wrkbl, 3*n + bdspac )
387 maxwrk = wrkbl + 2*n*n
388 minwrk = bdspac + 2*n*n + 3*n
389 ELSE IF( wntqs ) THEN
390*
391* Path 3 (M >> N, JOBZ='S')
392*
393 wrkbl = n + lwork_dgeqrf_mn
394 wrkbl = max( wrkbl, n + lwork_dorgqr_mn )
395 wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )
396 wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn )
397 wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
398 wrkbl = max( wrkbl, 3*n + bdspac )
399 maxwrk = wrkbl + n*n
400 minwrk = bdspac + n*n + 3*n
401 ELSE IF( wntqa ) THEN
402*
403* Path 4 (M >> N, JOBZ='A')
404*
405 wrkbl = n + lwork_dgeqrf_mn
406 wrkbl = max( wrkbl, n + lwork_dorgqr_mm )
407 wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )
408 wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn )
409 wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
410 wrkbl = max( wrkbl, 3*n + bdspac )
411 maxwrk = wrkbl + n*n
412 minwrk = n*n + max( 3*n + bdspac, n + m )
413 END IF
414 ELSE
415*
416* Path 5 (M >= N, but not much larger)
417*
418 wrkbl = 3*n + lwork_dgebrd_mn
419 IF( wntqn ) THEN
420* Path 5n (M >= N, jobz='N')
421 maxwrk = max( wrkbl, 3*n + bdspac )
422 minwrk = 3*n + max( m, bdspac )
423 ELSE IF( wntqo ) THEN
424* Path 5o (M >= N, jobz='O')
425 wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
426 wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mn )
427 wrkbl = max( wrkbl, 3*n + bdspac )
428 maxwrk = wrkbl + m*n
429 minwrk = 3*n + max( m, n*n + bdspac )
430 ELSE IF( wntqs ) THEN
431* Path 5s (M >= N, jobz='S')
432 wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mn )
433 wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
434 maxwrk = max( wrkbl, 3*n + bdspac )
435 minwrk = 3*n + max( m, bdspac )
436 ELSE IF( wntqa ) THEN
437* Path 5a (M >= N, jobz='A')
438 wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mm )
439 wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
440 maxwrk = max( wrkbl, 3*n + bdspac )
441 minwrk = 3*n + max( m, bdspac )
442 END IF
443 END IF
444 ELSE IF( minmn.GT.0 ) THEN
445*
446* Compute space needed for DBDSDC
447*
448 IF( wntqn ) THEN
449* dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
450* keep 7*N for backwards compatibility.
451 bdspac = 7*m
452 ELSE
453 bdspac = 3*m*m + 4*m
454 END IF
455*
456* Compute space preferred for each routine
457 CALL dgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),
458 $ dum(1), dum(1), -1, ierr )
459 lwork_dgebrd_mn = int( dum(1) )
460*
461 CALL dgebrd( m, m, a, m, s, dum(1), dum(1),
462 $ dum(1), dum(1), -1, ierr )
463 lwork_dgebrd_mm = int( dum(1) )
464*
465 CALL dgelqf( m, n, a, m, dum(1), dum(1), -1, ierr )
466 lwork_dgelqf_mn = int( dum(1) )
467*
468 CALL dorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
469 lwork_dorglq_nn = int( dum(1) )
470*
471 CALL dorglq( m, n, m, a, m, dum(1), dum(1), -1, ierr )
472 lwork_dorglq_mn = int( dum(1) )
473*
474 CALL dorgbr( 'P', m, m, m, a, n, dum(1), dum(1), -1, ierr )
475 lwork_dorgbr_p_mm = int( dum(1) )
476*
477 CALL dormbr( 'P', 'R', 'T', m, m, m, dum(1), m,
478 $ dum(1), dum(1), m, dum(1), -1, ierr )
479 lwork_dormbr_prt_mm = int( dum(1) )
480*
481 CALL dormbr( 'P', 'R', 'T', m, n, m, dum(1), m,
482 $ dum(1), dum(1), m, dum(1), -1, ierr )
483 lwork_dormbr_prt_mn = int( dum(1) )
484*
485 CALL dormbr( 'P', 'R', 'T', n, n, m, dum(1), n,
486 $ dum(1), dum(1), n, dum(1), -1, ierr )
487 lwork_dormbr_prt_nn = int( dum(1) )
488*
489 CALL dormbr( 'Q', 'L', 'N', m, m, m, dum(1), m,
490 $ dum(1), dum(1), m, dum(1), -1, ierr )
491 lwork_dormbr_qln_mm = int( dum(1) )
492*
493 IF( n.GE.mnthr ) THEN
494 IF( wntqn ) THEN
495*
496* Path 1t (N >> M, JOBZ='N')
497*
498 wrkbl = m + lwork_dgelqf_mn
499 wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )
500 maxwrk = max( wrkbl, bdspac + m )
501 minwrk = bdspac + m
502 ELSE IF( wntqo ) THEN
503*
504* Path 2t (N >> M, JOBZ='O')
505*
506 wrkbl = m + lwork_dgelqf_mn
507 wrkbl = max( wrkbl, m + lwork_dorglq_mn )
508 wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )
509 wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
510 wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm )
511 wrkbl = max( wrkbl, 3*m + bdspac )
512 maxwrk = wrkbl + 2*m*m
513 minwrk = bdspac + 2*m*m + 3*m
514 ELSE IF( wntqs ) THEN
515*
516* Path 3t (N >> M, JOBZ='S')
517*
518 wrkbl = m + lwork_dgelqf_mn
519 wrkbl = max( wrkbl, m + lwork_dorglq_mn )
520 wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )
521 wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
522 wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm )
523 wrkbl = max( wrkbl, 3*m + bdspac )
524 maxwrk = wrkbl + m*m
525 minwrk = bdspac + m*m + 3*m
526 ELSE IF( wntqa ) THEN
527*
528* Path 4t (N >> M, JOBZ='A')
529*
530 wrkbl = m + lwork_dgelqf_mn
531 wrkbl = max( wrkbl, m + lwork_dorglq_nn )
532 wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )
533 wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
534 wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm )
535 wrkbl = max( wrkbl, 3*m + bdspac )
536 maxwrk = wrkbl + m*m
537 minwrk = m*m + max( 3*m + bdspac, m + n )
538 END IF
539 ELSE
540*
541* Path 5t (N > M, but not much larger)
542*
543 wrkbl = 3*m + lwork_dgebrd_mn
544 IF( wntqn ) THEN
545* Path 5tn (N > M, jobz='N')
546 maxwrk = max( wrkbl, 3*m + bdspac )
547 minwrk = 3*m + max( n, bdspac )
548 ELSE IF( wntqo ) THEN
549* Path 5to (N > M, jobz='O')
550 wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
551 wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mn )
552 wrkbl = max( wrkbl, 3*m + bdspac )
553 maxwrk = wrkbl + m*n
554 minwrk = 3*m + max( n, m*m + bdspac )
555 ELSE IF( wntqs ) THEN
556* Path 5ts (N > M, jobz='S')
557 wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
558 wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mn )
559 maxwrk = max( wrkbl, 3*m + bdspac )
560 minwrk = 3*m + max( n, bdspac )
561 ELSE IF( wntqa ) THEN
562* Path 5ta (N > M, jobz='A')
563 wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
564 wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_nn )
565 maxwrk = max( wrkbl, 3*m + bdspac )
566 minwrk = 3*m + max( n, bdspac )
567 END IF
568 END IF
569 END IF
570
571 maxwrk = max( maxwrk, minwrk )
572 work( 1 ) = droundup_lwork( maxwrk )
573*
574 IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
575 info = -12
576 END IF
577 END IF
578*
579 IF( info.NE.0 ) THEN
580 CALL xerbla( 'DGESDD', -info )
581 RETURN
582 ELSE IF( lquery ) THEN
583 RETURN
584 END IF
585*
586* Quick return if possible
587*
588 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
589 RETURN
590 END IF
591*
592* Get machine constants
593*
594 eps = dlamch( 'P' )
595 smlnum = sqrt( dlamch( 'S' ) ) / eps
596 bignum = one / smlnum
597*
598* Scale A if max element outside range [SMLNUM,BIGNUM]
599*
600 anrm = dlange( 'M', m, n, a, lda, dum )
601 IF( disnan( anrm ) ) THEN
602 info = -4
603 RETURN
604 END IF
605 iscl = 0
606 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
607 iscl = 1
608 CALL dlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
609 ELSE IF( anrm.GT.bignum ) THEN
610 iscl = 1
611 CALL dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
612 END IF
613*
614 IF( m.GE.n ) THEN
615*
616* A has at least as many rows as columns. If A has sufficiently
617* more rows than columns, first reduce using the QR
618* decomposition (if sufficient workspace available)
619*
620 IF( m.GE.mnthr ) THEN
621*
622 IF( wntqn ) THEN
623*
624* Path 1 (M >> N, JOBZ='N')
625* No singular vectors to be computed
626*
627 itau = 1
628 nwork = itau + n
629*
630* Compute A=Q*R
631* Workspace: need N [tau] + N [work]
632* Workspace: prefer N [tau] + N*NB [work]
633*
634 CALL dgeqrf( m, n, a, lda, work( itau ), work( nwork ),
635 $ lwork - nwork + 1, ierr )
636*
637* Zero out below R
638*
639 CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )
640 ie = 1
641 itauq = ie + n
642 itaup = itauq + n
643 nwork = itaup + n
644*
645* Bidiagonalize R in A
646* Workspace: need 3*N [e, tauq, taup] + N [work]
647* Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work]
648*
649 CALL dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
650 $ work( itaup ), work( nwork ), lwork-nwork+1,
651 $ ierr )
652 nwork = ie + n
653*
654* Perform bidiagonal SVD, computing singular values only
655* Workspace: need N [e] + BDSPAC
656*
657 CALL dbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,
658 $ dum, idum, work( nwork ), iwork, info )
659*
660 ELSE IF( wntqo ) THEN
661*
662* Path 2 (M >> N, JOBZ = 'O')
663* N left singular vectors to be overwritten on A and
664* N right singular vectors to be computed in VT
665*
666 ir = 1
667*
668* WORK(IR) is LDWRKR by N
669*
670 IF( lwork .GE. lda*n + n*n + 3*n + bdspac ) THEN
671 ldwrkr = lda
672 ELSE
673 ldwrkr = ( lwork - n*n - 3*n - bdspac ) / n
674 END IF
675 itau = ir + ldwrkr*n
676 nwork = itau + n
677*
678* Compute A=Q*R
679* Workspace: need N*N [R] + N [tau] + N [work]
680* Workspace: prefer N*N [R] + N [tau] + N*NB [work]
681*
682 CALL dgeqrf( m, n, a, lda, work( itau ), work( nwork ),
683 $ lwork - nwork + 1, ierr )
684*
685* Copy R to WORK(IR), zeroing out below it
686*
687 CALL dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
688 CALL dlaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),
689 $ ldwrkr )
690*
691* Generate Q in A
692* Workspace: need N*N [R] + N [tau] + N [work]
693* Workspace: prefer N*N [R] + N [tau] + N*NB [work]
694*
695 CALL dorgqr( m, n, n, a, lda, work( itau ),
696 $ work( nwork ), lwork - nwork + 1, ierr )
697 ie = itau
698 itauq = ie + n
699 itaup = itauq + n
700 nwork = itaup + n
701*
702* Bidiagonalize R in WORK(IR)
703* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
704* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
705*
706 CALL dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
707 $ work( itauq ), work( itaup ), work( nwork ),
708 $ lwork - nwork + 1, ierr )
709*
710* WORK(IU) is N by N
711*
712 iu = nwork
713 nwork = iu + n*n
714*
715* Perform bidiagonal SVD, computing left singular vectors
716* of bidiagonal matrix in WORK(IU) and computing right
717* singular vectors of bidiagonal matrix in VT
718* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC
719*
720 CALL dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,
721 $ vt, ldvt, dum, idum, work( nwork ), iwork,
722 $ info )
723*
724* Overwrite WORK(IU) by left singular vectors of R
725* and VT by right singular vectors of R
726* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N [work]
727* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
728*
729 CALL dormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
730 $ work( itauq ), work( iu ), n, work( nwork ),
731 $ lwork - nwork + 1, ierr )
732 CALL dormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,
733 $ work( itaup ), vt, ldvt, work( nwork ),
734 $ lwork - nwork + 1, ierr )
735*
736* Multiply Q in A by left singular vectors of R in
737* WORK(IU), storing result in WORK(IR) and copying to A
738* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U]
739* Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U]
740*
741 DO 10 i = 1, m, ldwrkr
742 chunk = min( m - i + 1, ldwrkr )
743 CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
744 $ lda, work( iu ), n, zero, work( ir ),
745 $ ldwrkr )
746 CALL dlacpy( 'F', chunk, n, work( ir ), ldwrkr,
747 $ a( i, 1 ), lda )
748 10 CONTINUE
749*
750 ELSE IF( wntqs ) THEN
751*
752* Path 3 (M >> N, JOBZ='S')
753* N left singular vectors to be computed in U and
754* N right singular vectors to be computed in VT
755*
756 ir = 1
757*
758* WORK(IR) is N by N
759*
760 ldwrkr = n
761 itau = ir + ldwrkr*n
762 nwork = itau + n
763*
764* Compute A=Q*R
765* Workspace: need N*N [R] + N [tau] + N [work]
766* Workspace: prefer N*N [R] + N [tau] + N*NB [work]
767*
768 CALL dgeqrf( m, n, a, lda, work( itau ), work( nwork ),
769 $ lwork - nwork + 1, ierr )
770*
771* Copy R to WORK(IR), zeroing out below it
772*
773 CALL dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
774 CALL dlaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),
775 $ ldwrkr )
776*
777* Generate Q in A
778* Workspace: need N*N [R] + N [tau] + N [work]
779* Workspace: prefer N*N [R] + N [tau] + N*NB [work]
780*
781 CALL dorgqr( m, n, n, a, lda, work( itau ),
782 $ work( nwork ), lwork - nwork + 1, ierr )
783 ie = itau
784 itauq = ie + n
785 itaup = itauq + n
786 nwork = itaup + n
787*
788* Bidiagonalize R in WORK(IR)
789* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
790* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
791*
792 CALL dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
793 $ work( itauq ), work( itaup ), work( nwork ),
794 $ lwork - nwork + 1, ierr )
795*
796* Perform bidiagonal SVD, computing left singular vectors
797* of bidiagoal matrix in U and computing right singular
798* vectors of bidiagonal matrix in VT
799* Workspace: need N*N [R] + 3*N [e, tauq, taup] + BDSPAC
800*
801 CALL dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
802 $ ldvt, dum, idum, work( nwork ), iwork,
803 $ info )
804*
805* Overwrite U by left singular vectors of R and VT
806* by right singular vectors of R
807* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
808* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work]
809*
810 CALL dormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,
811 $ work( itauq ), u, ldu, work( nwork ),
812 $ lwork - nwork + 1, ierr )
813*
814 CALL dormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,
815 $ work( itaup ), vt, ldvt, work( nwork ),
816 $ lwork - nwork + 1, ierr )
817*
818* Multiply Q in A by left singular vectors of R in
819* WORK(IR), storing result in U
820* Workspace: need N*N [R]
821*
822 CALL dlacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr )
823 CALL dgemm( 'N', 'N', m, n, n, one, a, lda, work( ir ),
824 $ ldwrkr, zero, u, ldu )
825*
826 ELSE IF( wntqa ) THEN
827*
828* Path 4 (M >> N, JOBZ='A')
829* M left singular vectors to be computed in U and
830* N right singular vectors to be computed in VT
831*
832 iu = 1
833*
834* WORK(IU) is N by N
835*
836 ldwrku = n
837 itau = iu + ldwrku*n
838 nwork = itau + n
839*
840* Compute A=Q*R, copying result to U
841* Workspace: need N*N [U] + N [tau] + N [work]
842* Workspace: prefer N*N [U] + N [tau] + N*NB [work]
843*
844 CALL dgeqrf( m, n, a, lda, work( itau ), work( nwork ),
845 $ lwork - nwork + 1, ierr )
846 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
847*
848* Generate Q in U
849* Workspace: need N*N [U] + N [tau] + M [work]
850* Workspace: prefer N*N [U] + N [tau] + M*NB [work]
851 CALL dorgqr( m, m, n, u, ldu, work( itau ),
852 $ work( nwork ), lwork - nwork + 1, ierr )
853*
854* Produce R in A, zeroing out other entries
855*
856 CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )
857 ie = itau
858 itauq = ie + n
859 itaup = itauq + n
860 nwork = itaup + n
861*
862* Bidiagonalize R in A
863* Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work]
864* Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work]
865*
866 CALL dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
867 $ work( itaup ), work( nwork ), lwork-nwork+1,
868 $ ierr )
869*
870* Perform bidiagonal SVD, computing left singular vectors
871* of bidiagonal matrix in WORK(IU) and computing right
872* singular vectors of bidiagonal matrix in VT
873* Workspace: need N*N [U] + 3*N [e, tauq, taup] + BDSPAC
874*
875 CALL dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,
876 $ vt, ldvt, dum, idum, work( nwork ), iwork,
877 $ info )
878*
879* Overwrite WORK(IU) by left singular vectors of R and VT
880* by right singular vectors of R
881* Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work]
882* Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work]
883*
884 CALL dormbr( 'Q', 'L', 'N', n, n, n, a, lda,
885 $ work( itauq ), work( iu ), ldwrku,
886 $ work( nwork ), lwork - nwork + 1, ierr )
887 CALL dormbr( 'P', 'R', 'T', n, n, n, a, lda,
888 $ work( itaup ), vt, ldvt, work( nwork ),
889 $ lwork - nwork + 1, ierr )
890*
891* Multiply Q in U by left singular vectors of R in
892* WORK(IU), storing result in A
893* Workspace: need N*N [U]
894*
895 CALL dgemm( 'N', 'N', m, n, n, one, u, ldu, work( iu ),
896 $ ldwrku, zero, a, lda )
897*
898* Copy left singular vectors of A from A to U
899*
900 CALL dlacpy( 'F', m, n, a, lda, u, ldu )
901*
902 END IF
903*
904 ELSE
905*
906* M .LT. MNTHR
907*
908* Path 5 (M >= N, but not much larger)
909* Reduce to bidiagonal form without QR decomposition
910*
911 ie = 1
912 itauq = ie + n
913 itaup = itauq + n
914 nwork = itaup + n
915*
916* Bidiagonalize A
917* Workspace: need 3*N [e, tauq, taup] + M [work]
918* Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work]
919*
920 CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
921 $ work( itaup ), work( nwork ), lwork-nwork+1,
922 $ ierr )
923 IF( wntqn ) THEN
924*
925* Path 5n (M >= N, JOBZ='N')
926* Perform bidiagonal SVD, only computing singular values
927* Workspace: need 3*N [e, tauq, taup] + BDSPAC
928*
929 CALL dbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,
930 $ dum, idum, work( nwork ), iwork, info )
931 ELSE IF( wntqo ) THEN
932* Path 5o (M >= N, JOBZ='O')
933 iu = nwork
934 IF( lwork .GE. m*n + 3*n + bdspac ) THEN
935*
936* WORK( IU ) is M by N
937*
938 ldwrku = m
939 nwork = iu + ldwrku*n
940 CALL dlaset( 'F', m, n, zero, zero, work( iu ),
941 $ ldwrku )
942* IR is unused; silence compile warnings
943 ir = -1
944 ELSE
945*
946* WORK( IU ) is N by N
947*
948 ldwrku = n
949 nwork = iu + ldwrku*n
950*
951* WORK(IR) is LDWRKR by N
952*
953 ir = nwork
954 ldwrkr = ( lwork - n*n - 3*n ) / n
955 END IF
956 nwork = iu + ldwrku*n
957*
958* Perform bidiagonal SVD, computing left singular vectors
959* of bidiagonal matrix in WORK(IU) and computing right
960* singular vectors of bidiagonal matrix in VT
961* Workspace: need 3*N [e, tauq, taup] + N*N [U] + BDSPAC
962*
963 CALL dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ),
964 $ ldwrku, vt, ldvt, dum, idum, work( nwork ),
965 $ iwork, info )
966*
967* Overwrite VT by right singular vectors of A
968* Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work]
969* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
970*
971 CALL dormbr( 'P', 'R', 'T', n, n, n, a, lda,
972 $ work( itaup ), vt, ldvt, work( nwork ),
973 $ lwork - nwork + 1, ierr )
974*
975 IF( lwork .GE. m*n + 3*n + bdspac ) THEN
976*
977* Path 5o-fast
978* Overwrite WORK(IU) by left singular vectors of A
979* Workspace: need 3*N [e, tauq, taup] + M*N [U] + N [work]
980* Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work]
981*
982 CALL dormbr( 'Q', 'L', 'N', m, n, n, a, lda,
983 $ work( itauq ), work( iu ), ldwrku,
984 $ work( nwork ), lwork - nwork + 1, ierr )
985*
986* Copy left singular vectors of A from WORK(IU) to A
987*
988 CALL dlacpy( 'F', m, n, work( iu ), ldwrku, a, lda )
989 ELSE
990*
991* Path 5o-slow
992* Generate Q in A
993* Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work]
994* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
995*
996 CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),
997 $ work( nwork ), lwork - nwork + 1, ierr )
998*
999* Multiply Q in A by left singular vectors of
1000* bidiagonal matrix in WORK(IU), storing result in
1001* WORK(IR) and copying to A
1002* Workspace: need 3*N [e, tauq, taup] + N*N [U] + NB*N [R]
1003* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N [R]
1004*
1005 DO 20 i = 1, m, ldwrkr
1006 chunk = min( m - i + 1, ldwrkr )
1007 CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
1008 $ lda, work( iu ), ldwrku, zero,
1009 $ work( ir ), ldwrkr )
1010 CALL dlacpy( 'F', chunk, n, work( ir ), ldwrkr,
1011 $ a( i, 1 ), lda )
1012 20 CONTINUE
1013 END IF
1014*
1015 ELSE IF( wntqs ) THEN
1016*
1017* Path 5s (M >= N, JOBZ='S')
1018* Perform bidiagonal SVD, computing left singular vectors
1019* of bidiagonal matrix in U and computing right singular
1020* vectors of bidiagonal matrix in VT
1021* Workspace: need 3*N [e, tauq, taup] + BDSPAC
1022*
1023 CALL dlaset( 'F', m, n, zero, zero, u, ldu )
1024 CALL dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
1025 $ ldvt, dum, idum, work( nwork ), iwork,
1026 $ info )
1027*
1028* Overwrite U by left singular vectors of A and VT
1029* by right singular vectors of A
1030* Workspace: need 3*N [e, tauq, taup] + N [work]
1031* Workspace: prefer 3*N [e, tauq, taup] + N*NB [work]
1032*
1033 CALL dormbr( 'Q', 'L', 'N', m, n, n, a, lda,
1034 $ work( itauq ), u, ldu, work( nwork ),
1035 $ lwork - nwork + 1, ierr )
1036 CALL dormbr( 'P', 'R', 'T', n, n, n, a, lda,
1037 $ work( itaup ), vt, ldvt, work( nwork ),
1038 $ lwork - nwork + 1, ierr )
1039 ELSE IF( wntqa ) THEN
1040*
1041* Path 5a (M >= N, JOBZ='A')
1042* Perform bidiagonal SVD, computing left singular vectors
1043* of bidiagonal matrix in U and computing right singular
1044* vectors of bidiagonal matrix in VT
1045* Workspace: need 3*N [e, tauq, taup] + BDSPAC
1046*
1047 CALL dlaset( 'F', m, m, zero, zero, u, ldu )
1048 CALL dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
1049 $ ldvt, dum, idum, work( nwork ), iwork,
1050 $ info )
1051*
1052* Set the right corner of U to identity matrix
1053*
1054 IF( m.GT.n ) THEN
1055 CALL dlaset( 'F', m - n, m - n, zero, one, u(n+1,n+1),
1056 $ ldu )
1057 END IF
1058*
1059* Overwrite U by left singular vectors of A and VT
1060* by right singular vectors of A
1061* Workspace: need 3*N [e, tauq, taup] + M [work]
1062* Workspace: prefer 3*N [e, tauq, taup] + M*NB [work]
1063*
1064 CALL dormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1065 $ work( itauq ), u, ldu, work( nwork ),
1066 $ lwork - nwork + 1, ierr )
1067 CALL dormbr( 'P', 'R', 'T', n, n, m, a, lda,
1068 $ work( itaup ), vt, ldvt, work( nwork ),
1069 $ lwork - nwork + 1, ierr )
1070 END IF
1071*
1072 END IF
1073*
1074 ELSE
1075*
1076* A has more columns than rows. If A has sufficiently more
1077* columns than rows, first reduce using the LQ decomposition (if
1078* sufficient workspace available)
1079*
1080 IF( n.GE.mnthr ) THEN
1081*
1082 IF( wntqn ) THEN
1083*
1084* Path 1t (N >> M, JOBZ='N')
1085* No singular vectors to be computed
1086*
1087 itau = 1
1088 nwork = itau + m
1089*
1090* Compute A=L*Q
1091* Workspace: need M [tau] + M [work]
1092* Workspace: prefer M [tau] + M*NB [work]
1093*
1094 CALL dgelqf( m, n, a, lda, work( itau ), work( nwork ),
1095 $ lwork - nwork + 1, ierr )
1096*
1097* Zero out above L
1098*
1099 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
1100 ie = 1
1101 itauq = ie + m
1102 itaup = itauq + m
1103 nwork = itaup + m
1104*
1105* Bidiagonalize L in A
1106* Workspace: need 3*M [e, tauq, taup] + M [work]
1107* Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work]
1108*
1109 CALL dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
1110 $ work( itaup ), work( nwork ), lwork-nwork+1,
1111 $ ierr )
1112 nwork = ie + m
1113*
1114* Perform bidiagonal SVD, computing singular values only
1115* Workspace: need M [e] + BDSPAC
1116*
1117 CALL dbdsdc( 'U', 'N', m, s, work( ie ), dum, 1, dum, 1,
1118 $ dum, idum, work( nwork ), iwork, info )
1119*
1120 ELSE IF( wntqo ) THEN
1121*
1122* Path 2t (N >> M, JOBZ='O')
1123* M right singular vectors to be overwritten on A and
1124* M left singular vectors to be computed in U
1125*
1126 ivt = 1
1127*
1128* WORK(IVT) is M by M
1129* WORK(IL) is M by M; it is later resized to M by chunk for gemm
1130*
1131 il = ivt + m*m
1132 IF( lwork .GE. m*n + m*m + 3*m + bdspac ) THEN
1133 ldwrkl = m
1134 chunk = n
1135 ELSE
1136 ldwrkl = m
1137 chunk = ( lwork - m*m ) / m
1138 END IF
1139 itau = il + ldwrkl*m
1140 nwork = itau + m
1141*
1142* Compute A=L*Q
1143* Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
1144* Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
1145*
1146 CALL dgelqf( m, n, a, lda, work( itau ), work( nwork ),
1147 $ lwork - nwork + 1, ierr )
1148*
1149* Copy L to WORK(IL), zeroing about above it
1150*
1151 CALL dlacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1152 CALL dlaset( 'U', m - 1, m - 1, zero, zero,
1153 $ work( il + ldwrkl ), ldwrkl )
1154*
1155* Generate Q in A
1156* Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
1157* Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
1158*
1159 CALL dorglq( m, n, m, a, lda, work( itau ),
1160 $ work( nwork ), lwork - nwork + 1, ierr )
1161 ie = itau
1162 itauq = ie + m
1163 itaup = itauq + m
1164 nwork = itaup + m
1165*
1166* Bidiagonalize L in WORK(IL)
1167* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work]
1168* Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
1169*
1170 CALL dgebrd( m, m, work( il ), ldwrkl, s, work( ie ),
1171 $ work( itauq ), work( itaup ), work( nwork ),
1172 $ lwork - nwork + 1, ierr )
1173*
1174* Perform bidiagonal SVD, computing left singular vectors
1175* of bidiagonal matrix in U, and computing right singular
1176* vectors of bidiagonal matrix in WORK(IVT)
1177* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC
1178*
1179 CALL dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,
1180 $ work( ivt ), m, dum, idum, work( nwork ),
1181 $ iwork, info )
1182*
1183* Overwrite U by left singular vectors of L and WORK(IVT)
1184* by right singular vectors of L
1185* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work]
1186* Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
1187*
1188 CALL dormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1189 $ work( itauq ), u, ldu, work( nwork ),
1190 $ lwork - nwork + 1, ierr )
1191 CALL dormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,
1192 $ work( itaup ), work( ivt ), m,
1193 $ work( nwork ), lwork - nwork + 1, ierr )
1194*
1195* Multiply right singular vectors of L in WORK(IVT) by Q
1196* in A, storing result in WORK(IL) and copying to A
1197* Workspace: need M*M [VT] + M*M [L]
1198* Workspace: prefer M*M [VT] + M*N [L]
1199* At this point, L is resized as M by chunk.
1200*
1201 DO 30 i = 1, n, chunk
1202 blk = min( n - i + 1, chunk )
1203 CALL dgemm( 'N', 'N', m, blk, m, one, work( ivt ), m,
1204 $ a( 1, i ), lda, zero, work( il ), ldwrkl )
1205 CALL dlacpy( 'F', m, blk, work( il ), ldwrkl,
1206 $ a( 1, i ), lda )
1207 30 CONTINUE
1208*
1209 ELSE IF( wntqs ) THEN
1210*
1211* Path 3t (N >> M, JOBZ='S')
1212* M right singular vectors to be computed in VT and
1213* M left singular vectors to be computed in U
1214*
1215 il = 1
1216*
1217* WORK(IL) is M by M
1218*
1219 ldwrkl = m
1220 itau = il + ldwrkl*m
1221 nwork = itau + m
1222*
1223* Compute A=L*Q
1224* Workspace: need M*M [L] + M [tau] + M [work]
1225* Workspace: prefer M*M [L] + M [tau] + M*NB [work]
1226*
1227 CALL dgelqf( m, n, a, lda, work( itau ), work( nwork ),
1228 $ lwork - nwork + 1, ierr )
1229*
1230* Copy L to WORK(IL), zeroing out above it
1231*
1232 CALL dlacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
1233 CALL dlaset( 'U', m - 1, m - 1, zero, zero,
1234 $ work( il + ldwrkl ), ldwrkl )
1235*
1236* Generate Q in A
1237* Workspace: need M*M [L] + M [tau] + M [work]
1238* Workspace: prefer M*M [L] + M [tau] + M*NB [work]
1239*
1240 CALL dorglq( m, n, m, a, lda, work( itau ),
1241 $ work( nwork ), lwork - nwork + 1, ierr )
1242 ie = itau
1243 itauq = ie + m
1244 itaup = itauq + m
1245 nwork = itaup + m
1246*
1247* Bidiagonalize L in WORK(IU).
1248* Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work]
1249* Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
1250*
1251 CALL dgebrd( m, m, work( il ), ldwrkl, s, work( ie ),
1252 $ work( itauq ), work( itaup ), work( nwork ),
1253 $ lwork - nwork + 1, ierr )
1254*
1255* Perform bidiagonal SVD, computing left singular vectors
1256* of bidiagonal matrix in U and computing right singular
1257* vectors of bidiagonal matrix in VT
1258* Workspace: need M*M [L] + 3*M [e, tauq, taup] + BDSPAC
1259*
1260 CALL dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu, vt,
1261 $ ldvt, dum, idum, work( nwork ), iwork,
1262 $ info )
1263*
1264* Overwrite U by left singular vectors of L and VT
1265* by right singular vectors of L
1266* Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work]
1267* Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
1268*
1269 CALL dormbr( 'Q', 'L', 'N', m, m, m, work( il ), ldwrkl,
1270 $ work( itauq ), u, ldu, work( nwork ),
1271 $ lwork - nwork + 1, ierr )
1272 CALL dormbr( 'P', 'R', 'T', m, m, m, work( il ), ldwrkl,
1273 $ work( itaup ), vt, ldvt, work( nwork ),
1274 $ lwork - nwork + 1, ierr )
1275*
1276* Multiply right singular vectors of L in WORK(IL) by
1277* Q in A, storing result in VT
1278* Workspace: need M*M [L]
1279*
1280 CALL dlacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl )
1281 CALL dgemm( 'N', 'N', m, n, m, one, work( il ), ldwrkl,
1282 $ a, lda, zero, vt, ldvt )
1283*
1284 ELSE IF( wntqa ) THEN
1285*
1286* Path 4t (N >> M, JOBZ='A')
1287* N right singular vectors to be computed in VT and
1288* M left singular vectors to be computed in U
1289*
1290 ivt = 1
1291*
1292* WORK(IVT) is M by M
1293*
1294 ldwkvt = m
1295 itau = ivt + ldwkvt*m
1296 nwork = itau + m
1297*
1298* Compute A=L*Q, copying result to VT
1299* Workspace: need M*M [VT] + M [tau] + M [work]
1300* Workspace: prefer M*M [VT] + M [tau] + M*NB [work]
1301*
1302 CALL dgelqf( m, n, a, lda, work( itau ), work( nwork ),
1303 $ lwork - nwork + 1, ierr )
1304 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
1305*
1306* Generate Q in VT
1307* Workspace: need M*M [VT] + M [tau] + N [work]
1308* Workspace: prefer M*M [VT] + M [tau] + N*NB [work]
1309*
1310 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
1311 $ work( nwork ), lwork - nwork + 1, ierr )
1312*
1313* Produce L in A, zeroing out other entries
1314*
1315 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
1316 ie = itau
1317 itauq = ie + m
1318 itaup = itauq + m
1319 nwork = itaup + m
1320*
1321* Bidiagonalize L in A
1322* Workspace: need M*M [VT] + 3*M [e, tauq, taup] + M [work]
1323* Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work]
1324*
1325 CALL dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
1326 $ work( itaup ), work( nwork ), lwork-nwork+1,
1327 $ ierr )
1328*
1329* Perform bidiagonal SVD, computing left singular vectors
1330* of bidiagonal matrix in U and computing right singular
1331* vectors of bidiagonal matrix in WORK(IVT)
1332* Workspace: need M*M [VT] + 3*M [e, tauq, taup] + BDSPAC
1333*
1334 CALL dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,
1335 $ work( ivt ), ldwkvt, dum, idum,
1336 $ work( nwork ), iwork, info )
1337*
1338* Overwrite U by left singular vectors of L and WORK(IVT)
1339* by right singular vectors of L
1340* Workspace: need M*M [VT] + 3*M [e, tauq, taup]+ M [work]
1341* Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work]
1342*
1343 CALL dormbr( 'Q', 'L', 'N', m, m, m, a, lda,
1344 $ work( itauq ), u, ldu, work( nwork ),
1345 $ lwork - nwork + 1, ierr )
1346 CALL dormbr( 'P', 'R', 'T', m, m, m, a, lda,
1347 $ work( itaup ), work( ivt ), ldwkvt,
1348 $ work( nwork ), lwork - nwork + 1, ierr )
1349*
1350* Multiply right singular vectors of L in WORK(IVT) by
1351* Q in VT, storing result in A
1352* Workspace: need M*M [VT]
1353*
1354 CALL dgemm( 'N', 'N', m, n, m, one, work( ivt ), ldwkvt,
1355 $ vt, ldvt, zero, a, lda )
1356*
1357* Copy right singular vectors of A from A to VT
1358*
1359 CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
1360*
1361 END IF
1362*
1363 ELSE
1364*
1365* N .LT. MNTHR
1366*
1367* Path 5t (N > M, but not much larger)
1368* Reduce to bidiagonal form without LQ decomposition
1369*
1370 ie = 1
1371 itauq = ie + m
1372 itaup = itauq + m
1373 nwork = itaup + m
1374*
1375* Bidiagonalize A
1376* Workspace: need 3*M [e, tauq, taup] + N [work]
1377* Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work]
1378*
1379 CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1380 $ work( itaup ), work( nwork ), lwork-nwork+1,
1381 $ ierr )
1382 IF( wntqn ) THEN
1383*
1384* Path 5tn (N > M, JOBZ='N')
1385* Perform bidiagonal SVD, only computing singular values
1386* Workspace: need 3*M [e, tauq, taup] + BDSPAC
1387*
1388 CALL dbdsdc( 'L', 'N', m, s, work( ie ), dum, 1, dum, 1,
1389 $ dum, idum, work( nwork ), iwork, info )
1390 ELSE IF( wntqo ) THEN
1391* Path 5to (N > M, JOBZ='O')
1392 ldwkvt = m
1393 ivt = nwork
1394 IF( lwork .GE. m*n + 3*m + bdspac ) THEN
1395*
1396* WORK( IVT ) is M by N
1397*
1398 CALL dlaset( 'F', m, n, zero, zero, work( ivt ),
1399 $ ldwkvt )
1400 nwork = ivt + ldwkvt*n
1401* IL is unused; silence compile warnings
1402 il = -1
1403 ELSE
1404*
1405* WORK( IVT ) is M by M
1406*
1407 nwork = ivt + ldwkvt*m
1408 il = nwork
1409*
1410* WORK(IL) is M by CHUNK
1411*
1412 chunk = ( lwork - m*m - 3*m ) / m
1413 END IF
1414*
1415* Perform bidiagonal SVD, computing left singular vectors
1416* of bidiagonal matrix in U and computing right singular
1417* vectors of bidiagonal matrix in WORK(IVT)
1418* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + BDSPAC
1419*
1420 CALL dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu,
1421 $ work( ivt ), ldwkvt, dum, idum,
1422 $ work( nwork ), iwork, info )
1423*
1424* Overwrite U by left singular vectors of A
1425* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work]
1426* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
1427*
1428 CALL dormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1429 $ work( itauq ), u, ldu, work( nwork ),
1430 $ lwork - nwork + 1, ierr )
1431*
1432 IF( lwork .GE. m*n + 3*m + bdspac ) THEN
1433*
1434* Path 5to-fast
1435* Overwrite WORK(IVT) by left singular vectors of A
1436* Workspace: need 3*M [e, tauq, taup] + M*N [VT] + M [work]
1437* Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work]
1438*
1439 CALL dormbr( 'P', 'R', 'T', m, n, m, a, lda,
1440 $ work( itaup ), work( ivt ), ldwkvt,
1441 $ work( nwork ), lwork - nwork + 1, ierr )
1442*
1443* Copy right singular vectors of A from WORK(IVT) to A
1444*
1445 CALL dlacpy( 'F', m, n, work( ivt ), ldwkvt, a, lda )
1446 ELSE
1447*
1448* Path 5to-slow
1449* Generate P**T in A
1450* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work]
1451* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
1452*
1453 CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),
1454 $ work( nwork ), lwork - nwork + 1, ierr )
1455*
1456* Multiply Q in A by right singular vectors of
1457* bidiagonal matrix in WORK(IVT), storing result in
1458* WORK(IL) and copying to A
1459* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M*NB [L]
1460* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N [L]
1461*
1462 DO 40 i = 1, n, chunk
1463 blk = min( n - i + 1, chunk )
1464 CALL dgemm( 'N', 'N', m, blk, m, one, work( ivt ),
1465 $ ldwkvt, a( 1, i ), lda, zero,
1466 $ work( il ), m )
1467 CALL dlacpy( 'F', m, blk, work( il ), m, a( 1, i ),
1468 $ lda )
1469 40 CONTINUE
1470 END IF
1471 ELSE IF( wntqs ) THEN
1472*
1473* Path 5ts (N > M, JOBZ='S')
1474* Perform bidiagonal SVD, computing left singular vectors
1475* of bidiagonal matrix in U and computing right singular
1476* vectors of bidiagonal matrix in VT
1477* Workspace: need 3*M [e, tauq, taup] + BDSPAC
1478*
1479 CALL dlaset( 'F', m, n, zero, zero, vt, ldvt )
1480 CALL dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,
1481 $ ldvt, dum, idum, work( nwork ), iwork,
1482 $ info )
1483*
1484* Overwrite U by left singular vectors of A and VT
1485* by right singular vectors of A
1486* Workspace: need 3*M [e, tauq, taup] + M [work]
1487* Workspace: prefer 3*M [e, tauq, taup] + M*NB [work]
1488*
1489 CALL dormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1490 $ work( itauq ), u, ldu, work( nwork ),
1491 $ lwork - nwork + 1, ierr )
1492 CALL dormbr( 'P', 'R', 'T', m, n, m, a, lda,
1493 $ work( itaup ), vt, ldvt, work( nwork ),
1494 $ lwork - nwork + 1, ierr )
1495 ELSE IF( wntqa ) THEN
1496*
1497* Path 5ta (N > M, JOBZ='A')
1498* Perform bidiagonal SVD, computing left singular vectors
1499* of bidiagonal matrix in U and computing right singular
1500* vectors of bidiagonal matrix in VT
1501* Workspace: need 3*M [e, tauq, taup] + BDSPAC
1502*
1503 CALL dlaset( 'F', n, n, zero, zero, vt, ldvt )
1504 CALL dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,
1505 $ ldvt, dum, idum, work( nwork ), iwork,
1506 $ info )
1507*
1508* Set the right corner of VT to identity matrix
1509*
1510 IF( n.GT.m ) THEN
1511 CALL dlaset( 'F', n-m, n-m, zero, one, vt(m+1,m+1),
1512 $ ldvt )
1513 END IF
1514*
1515* Overwrite U by left singular vectors of A and VT
1516* by right singular vectors of A
1517* Workspace: need 3*M [e, tauq, taup] + N [work]
1518* Workspace: prefer 3*M [e, tauq, taup] + N*NB [work]
1519*
1520 CALL dormbr( 'Q', 'L', 'N', m, m, n, a, lda,
1521 $ work( itauq ), u, ldu, work( nwork ),
1522 $ lwork - nwork + 1, ierr )
1523 CALL dormbr( 'P', 'R', 'T', n, n, m, a, lda,
1524 $ work( itaup ), vt, ldvt, work( nwork ),
1525 $ lwork - nwork + 1, ierr )
1526 END IF
1527*
1528 END IF
1529*
1530 END IF
1531*
1532* Undo scaling if necessary
1533*
1534 IF( iscl.EQ.1 ) THEN
1535 IF( anrm.GT.bignum )
1536 $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
1537 $ ierr )
1538 IF( anrm.LT.smlnum )
1539 $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
1540 $ ierr )
1541 END IF
1542*
1543* Return optimal workspace in WORK(1)
1544*
1545 work( 1 ) = droundup_lwork( maxwrk )
1546*
1547 RETURN
1548*
1549* End of DGESDD
1550*
double precision function droundup_lwork(lwork)
DROUNDUP_LWORK
logical function disnan(din)
DISNAN tests input for NaN.
Definition disnan.f:59
subroutine dbdsdc(uplo, compq, n, d, e, u, ldu, vt, ldvt, q, iq, work, iwork, info)
DBDSDC
Definition dbdsdc.f:205
subroutine dorgbr(vect, m, n, k, a, lda, tau, work, lwork, info)
DORGBR
Definition dorgbr.f:157
double precision function dlange(norm, m, n, a, lda, work)
DLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value ...
Definition dlange.f:114
subroutine dgebrd(m, n, a, lda, d, e, tauq, taup, work, lwork, info)
DGEBRD
Definition dgebrd.f:205
subroutine dormbr(vect, side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
DORMBR
Definition dormbr.f:195
subroutine dorglq(m, n, k, a, lda, tau, work, lwork, info)
DORGLQ
Definition dorglq.f:127
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:187

◆ dgesvd()

subroutine dgesvd ( character jobu,
character jobvt,
integer m,
integer n,
double precision, dimension( lda, * ) a,
integer lda,
double precision, dimension( * ) s,
double precision, dimension( ldu, * ) u,
integer ldu,
double precision, dimension( ldvt, * ) vt,
integer ldvt,
double precision, dimension( * ) work,
integer lwork,
integer info )

DGESVD computes the singular value decomposition (SVD) for GE matrices

Download DGESVD + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> DGESVD computes the singular value decomposition (SVD) of a real
!> M-by-N matrix A, optionally computing the left and/or right singular
!> vectors. The SVD is written
!>
!>      A = U * SIGMA * transpose(V)
!>
!> where SIGMA is an M-by-N matrix which is zero except for its
!> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
!> V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
!> are the singular values of A; they are real and non-negative, and
!> are returned in descending order.  The first min(m,n) columns of
!> U and V are the left and right singular vectors of A.
!>
!> Note that the routine returns V**T, not V.
!> 
Parameters
[in]JOBU
!>          JOBU is CHARACTER*1
!>          Specifies options for computing all or part of the matrix U:
!>          = 'A':  all M columns of U are returned in array U:
!>          = 'S':  the first min(m,n) columns of U (the left singular
!>                  vectors) are returned in the array U;
!>          = 'O':  the first min(m,n) columns of U (the left singular
!>                  vectors) are overwritten on the array A;
!>          = 'N':  no columns of U (no left singular vectors) are
!>                  computed.
!> 
[in]JOBVT
!>          JOBVT is CHARACTER*1
!>          Specifies options for computing all or part of the matrix
!>          V**T:
!>          = 'A':  all N rows of V**T are returned in the array VT;
!>          = 'S':  the first min(m,n) rows of V**T (the right singular
!>                  vectors) are returned in the array VT;
!>          = 'O':  the first min(m,n) rows of V**T (the right singular
!>                  vectors) are overwritten on the array A;
!>          = 'N':  no rows of V**T (no right singular vectors) are
!>                  computed.
!>
!>          JOBVT and JOBU cannot both be 'O'.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the input matrix A.  N >= 0.
!> 
[in,out]A
!>          A is DOUBLE PRECISION array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!>          On exit,
!>          if JOBU = 'O',  A is overwritten with the first min(m,n)
!>                          columns of U (the left singular vectors,
!>                          stored columnwise);
!>          if JOBVT = 'O', A is overwritten with the first min(m,n)
!>                          rows of V**T (the right singular vectors,
!>                          stored rowwise);
!>          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
!>                          are destroyed.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]S
!>          S is DOUBLE PRECISION array, dimension (min(M,N))
!>          The singular values of A, sorted so that S(i) >= S(i+1).
!> 
[out]U
!>          U is DOUBLE PRECISION array, dimension (LDU,UCOL)
!>          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
!>          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
!>          if JOBU = 'S', U contains the first min(m,n) columns of U
!>          (the left singular vectors, stored columnwise);
!>          if JOBU = 'N' or 'O', U is not referenced.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U.  LDU >= 1; if
!>          JOBU = 'S' or 'A', LDU >= M.
!> 
[out]VT
!>          VT is DOUBLE PRECISION array, dimension (LDVT,N)
!>          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
!>          V**T;
!>          if JOBVT = 'S', VT contains the first min(m,n) rows of
!>          V**T (the right singular vectors, stored rowwise);
!>          if JOBVT = 'N' or 'O', VT is not referenced.
!> 
[in]LDVT
!>          LDVT is INTEGER
!>          The leading dimension of the array VT.  LDVT >= 1; if
!>          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
!> 
[out]WORK
!>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
!>          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
!>          superdiagonal elements of an upper bidiagonal matrix B
!>          whose diagonal is in S (not necessarily sorted). B
!>          satisfies A = U * B * VT, so it has the same singular values
!>          as A, and singular vectors related by U and VT.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK.
!>          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
!>             - PATH 1  (M much larger than N, JOBU='N')
!>             - PATH 1t (N much larger than M, JOBVT='N')
!>          LWORK >= MAX(1,3*MIN(M,N) + MAX(M,N),5*MIN(M,N)) for the other paths
!>          For good performance, LWORK should generally be larger.
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit.
!>          < 0:  if INFO = -i, the i-th argument had an illegal value.
!>          > 0:  if DBDSQR did not converge, INFO specifies how many
!>                superdiagonals of an intermediate bidiagonal form B
!>                did not converge to zero. See the description of WORK
!>                above for details.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 209 of file dgesvd.f.

211*
212* -- LAPACK driver routine --
213* -- LAPACK is a software package provided by Univ. of Tennessee, --
214* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
215*
216* .. Scalar Arguments ..
217 CHARACTER JOBU, JOBVT
218 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
219* ..
220* .. Array Arguments ..
221 DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
222 $ VT( LDVT, * ), WORK( * )
223* ..
224*
225* =====================================================================
226*
227* .. Parameters ..
228 DOUBLE PRECISION ZERO, ONE
229 parameter( zero = 0.0d0, one = 1.0d0 )
230* ..
231* .. Local Scalars ..
232 LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
233 $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
234 INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
235 $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
236 $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
237 $ NRVT, WRKBL
238 INTEGER LWORK_DGEQRF, LWORK_DORGQR_N, LWORK_DORGQR_M,
239 $ LWORK_DGEBRD, LWORK_DORGBR_P, LWORK_DORGBR_Q,
240 $ LWORK_DGELQF, LWORK_DORGLQ_N, LWORK_DORGLQ_M
241 DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
242* ..
243* .. Local Arrays ..
244 DOUBLE PRECISION DUM( 1 )
245* ..
246* .. External Subroutines ..
247 EXTERNAL dbdsqr, dgebrd, dgelqf, dgemm, dgeqrf, dlacpy,
249 $ xerbla
250* ..
251* .. External Functions ..
252 LOGICAL LSAME
253 INTEGER ILAENV
254 DOUBLE PRECISION DLAMCH, DLANGE
255 EXTERNAL lsame, ilaenv, dlamch, dlange
256* ..
257* .. Intrinsic Functions ..
258 INTRINSIC max, min, sqrt
259* ..
260* .. Executable Statements ..
261*
262* Test the input arguments
263*
264 info = 0
265 minmn = min( m, n )
266 wntua = lsame( jobu, 'A' )
267 wntus = lsame( jobu, 'S' )
268 wntuas = wntua .OR. wntus
269 wntuo = lsame( jobu, 'O' )
270 wntun = lsame( jobu, 'N' )
271 wntva = lsame( jobvt, 'A' )
272 wntvs = lsame( jobvt, 'S' )
273 wntvas = wntva .OR. wntvs
274 wntvo = lsame( jobvt, 'O' )
275 wntvn = lsame( jobvt, 'N' )
276 lquery = ( lwork.EQ.-1 )
277*
278 IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
279 info = -1
280 ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
281 $ ( wntvo .AND. wntuo ) ) THEN
282 info = -2
283 ELSE IF( m.LT.0 ) THEN
284 info = -3
285 ELSE IF( n.LT.0 ) THEN
286 info = -4
287 ELSE IF( lda.LT.max( 1, m ) ) THEN
288 info = -6
289 ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
290 info = -9
291 ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
292 $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
293 info = -11
294 END IF
295*
296* Compute workspace
297* (Note: Comments in the code beginning "Workspace:" describe the
298* minimal amount of workspace needed at that point in the code,
299* as well as the preferred amount for good performance.
300* NB refers to the optimal block size for the immediately
301* following subroutine, as returned by ILAENV.)
302*
303 IF( info.EQ.0 ) THEN
304 minwrk = 1
305 maxwrk = 1
306 IF( m.GE.n .AND. minmn.GT.0 ) THEN
307*
308* Compute space needed for DBDSQR
309*
310 mnthr = ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 )
311 bdspac = 5*n
312* Compute space needed for DGEQRF
313 CALL dgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
314 lwork_dgeqrf = int( dum(1) )
315* Compute space needed for DORGQR
316 CALL dorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
317 lwork_dorgqr_n = int( dum(1) )
318 CALL dorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
319 lwork_dorgqr_m = int( dum(1) )
320* Compute space needed for DGEBRD
321 CALL dgebrd( n, n, a, lda, s, dum(1), dum(1),
322 $ dum(1), dum(1), -1, ierr )
323 lwork_dgebrd = int( dum(1) )
324* Compute space needed for DORGBR P
325 CALL dorgbr( 'P', n, n, n, a, lda, dum(1),
326 $ dum(1), -1, ierr )
327 lwork_dorgbr_p = int( dum(1) )
328* Compute space needed for DORGBR Q
329 CALL dorgbr( 'Q', n, n, n, a, lda, dum(1),
330 $ dum(1), -1, ierr )
331 lwork_dorgbr_q = int( dum(1) )
332*
333 IF( m.GE.mnthr ) THEN
334 IF( wntun ) THEN
335*
336* Path 1 (M much larger than N, JOBU='N')
337*
338 maxwrk = n + lwork_dgeqrf
339 maxwrk = max( maxwrk, 3*n + lwork_dgebrd )
340 IF( wntvo .OR. wntvas )
341 $ maxwrk = max( maxwrk, 3*n + lwork_dorgbr_p )
342 maxwrk = max( maxwrk, bdspac )
343 minwrk = max( 4*n, bdspac )
344 ELSE IF( wntuo .AND. wntvn ) THEN
345*
346* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
347*
348 wrkbl = n + lwork_dgeqrf
349 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
350 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
351 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
352 wrkbl = max( wrkbl, bdspac )
353 maxwrk = max( n*n + wrkbl, n*n + m*n + n )
354 minwrk = max( 3*n + m, bdspac )
355 ELSE IF( wntuo .AND. wntvas ) THEN
356*
357* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
358* 'A')
359*
360 wrkbl = n + lwork_dgeqrf
361 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
362 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
363 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
364 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
365 wrkbl = max( wrkbl, bdspac )
366 maxwrk = max( n*n + wrkbl, n*n + m*n + n )
367 minwrk = max( 3*n + m, bdspac )
368 ELSE IF( wntus .AND. wntvn ) THEN
369*
370* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
371*
372 wrkbl = n + lwork_dgeqrf
373 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
374 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
375 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
376 wrkbl = max( wrkbl, bdspac )
377 maxwrk = n*n + wrkbl
378 minwrk = max( 3*n + m, bdspac )
379 ELSE IF( wntus .AND. wntvo ) THEN
380*
381* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
382*
383 wrkbl = n + lwork_dgeqrf
384 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
385 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
386 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
387 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
388 wrkbl = max( wrkbl, bdspac )
389 maxwrk = 2*n*n + wrkbl
390 minwrk = max( 3*n + m, bdspac )
391 ELSE IF( wntus .AND. wntvas ) THEN
392*
393* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
394* 'A')
395*
396 wrkbl = n + lwork_dgeqrf
397 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
398 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
399 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
400 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
401 wrkbl = max( wrkbl, bdspac )
402 maxwrk = n*n + wrkbl
403 minwrk = max( 3*n + m, bdspac )
404 ELSE IF( wntua .AND. wntvn ) THEN
405*
406* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
407*
408 wrkbl = n + lwork_dgeqrf
409 wrkbl = max( wrkbl, n + lwork_dorgqr_m )
410 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
411 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
412 wrkbl = max( wrkbl, bdspac )
413 maxwrk = n*n + wrkbl
414 minwrk = max( 3*n + m, bdspac )
415 ELSE IF( wntua .AND. wntvo ) THEN
416*
417* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
418*
419 wrkbl = n + lwork_dgeqrf
420 wrkbl = max( wrkbl, n + lwork_dorgqr_m )
421 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
422 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
423 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
424 wrkbl = max( wrkbl, bdspac )
425 maxwrk = 2*n*n + wrkbl
426 minwrk = max( 3*n + m, bdspac )
427 ELSE IF( wntua .AND. wntvas ) THEN
428*
429* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
430* 'A')
431*
432 wrkbl = n + lwork_dgeqrf
433 wrkbl = max( wrkbl, n + lwork_dorgqr_m )
434 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
435 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
436 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
437 wrkbl = max( wrkbl, bdspac )
438 maxwrk = n*n + wrkbl
439 minwrk = max( 3*n + m, bdspac )
440 END IF
441 ELSE
442*
443* Path 10 (M at least N, but not much larger)
444*
445 CALL dgebrd( m, n, a, lda, s, dum(1), dum(1),
446 $ dum(1), dum(1), -1, ierr )
447 lwork_dgebrd = int( dum(1) )
448 maxwrk = 3*n + lwork_dgebrd
449 IF( wntus .OR. wntuo ) THEN
450 CALL dorgbr( 'Q', m, n, n, a, lda, dum(1),
451 $ dum(1), -1, ierr )
452 lwork_dorgbr_q = int( dum(1) )
453 maxwrk = max( maxwrk, 3*n + lwork_dorgbr_q )
454 END IF
455 IF( wntua ) THEN
456 CALL dorgbr( 'Q', m, m, n, a, lda, dum(1),
457 $ dum(1), -1, ierr )
458 lwork_dorgbr_q = int( dum(1) )
459 maxwrk = max( maxwrk, 3*n + lwork_dorgbr_q )
460 END IF
461 IF( .NOT.wntvn ) THEN
462 maxwrk = max( maxwrk, 3*n + lwork_dorgbr_p )
463 END IF
464 maxwrk = max( maxwrk, bdspac )
465 minwrk = max( 3*n + m, bdspac )
466 END IF
467 ELSE IF( minmn.GT.0 ) THEN
468*
469* Compute space needed for DBDSQR
470*
471 mnthr = ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 )
472 bdspac = 5*m
473* Compute space needed for DGELQF
474 CALL dgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
475 lwork_dgelqf = int( dum(1) )
476* Compute space needed for DORGLQ
477 CALL dorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
478 lwork_dorglq_n = int( dum(1) )
479 CALL dorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
480 lwork_dorglq_m = int( dum(1) )
481* Compute space needed for DGEBRD
482 CALL dgebrd( m, m, a, lda, s, dum(1), dum(1),
483 $ dum(1), dum(1), -1, ierr )
484 lwork_dgebrd = int( dum(1) )
485* Compute space needed for DORGBR P
486 CALL dorgbr( 'P', m, m, m, a, n, dum(1),
487 $ dum(1), -1, ierr )
488 lwork_dorgbr_p = int( dum(1) )
489* Compute space needed for DORGBR Q
490 CALL dorgbr( 'Q', m, m, m, a, n, dum(1),
491 $ dum(1), -1, ierr )
492 lwork_dorgbr_q = int( dum(1) )
493 IF( n.GE.mnthr ) THEN
494 IF( wntvn ) THEN
495*
496* Path 1t(N much larger than M, JOBVT='N')
497*
498 maxwrk = m + lwork_dgelqf
499 maxwrk = max( maxwrk, 3*m + lwork_dgebrd )
500 IF( wntuo .OR. wntuas )
501 $ maxwrk = max( maxwrk, 3*m + lwork_dorgbr_q )
502 maxwrk = max( maxwrk, bdspac )
503 minwrk = max( 4*m, bdspac )
504 ELSE IF( wntvo .AND. wntun ) THEN
505*
506* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
507*
508 wrkbl = m + lwork_dgelqf
509 wrkbl = max( wrkbl, m + lwork_dorglq_m )
510 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
511 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
512 wrkbl = max( wrkbl, bdspac )
513 maxwrk = max( m*m + wrkbl, m*m + m*n + m )
514 minwrk = max( 3*m + n, bdspac )
515 ELSE IF( wntvo .AND. wntuas ) THEN
516*
517* Path 3t(N much larger than M, JOBU='S' or 'A',
518* JOBVT='O')
519*
520 wrkbl = m + lwork_dgelqf
521 wrkbl = max( wrkbl, m + lwork_dorglq_m )
522 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
523 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
524 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
525 wrkbl = max( wrkbl, bdspac )
526 maxwrk = max( m*m + wrkbl, m*m + m*n + m )
527 minwrk = max( 3*m + n, bdspac )
528 ELSE IF( wntvs .AND. wntun ) THEN
529*
530* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
531*
532 wrkbl = m + lwork_dgelqf
533 wrkbl = max( wrkbl, m + lwork_dorglq_m )
534 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
535 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
536 wrkbl = max( wrkbl, bdspac )
537 maxwrk = m*m + wrkbl
538 minwrk = max( 3*m + n, bdspac )
539 ELSE IF( wntvs .AND. wntuo ) THEN
540*
541* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
542*
543 wrkbl = m + lwork_dgelqf
544 wrkbl = max( wrkbl, m + lwork_dorglq_m )
545 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
546 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
547 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
548 wrkbl = max( wrkbl, bdspac )
549 maxwrk = 2*m*m + wrkbl
550 minwrk = max( 3*m + n, bdspac )
551 ELSE IF( wntvs .AND. wntuas ) THEN
552*
553* Path 6t(N much larger than M, JOBU='S' or 'A',
554* JOBVT='S')
555*
556 wrkbl = m + lwork_dgelqf
557 wrkbl = max( wrkbl, m + lwork_dorglq_m )
558 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
559 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
560 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
561 wrkbl = max( wrkbl, bdspac )
562 maxwrk = m*m + wrkbl
563 minwrk = max( 3*m + n, bdspac )
564 ELSE IF( wntva .AND. wntun ) THEN
565*
566* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
567*
568 wrkbl = m + lwork_dgelqf
569 wrkbl = max( wrkbl, m + lwork_dorglq_n )
570 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
571 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
572 wrkbl = max( wrkbl, bdspac )
573 maxwrk = m*m + wrkbl
574 minwrk = max( 3*m + n, bdspac )
575 ELSE IF( wntva .AND. wntuo ) THEN
576*
577* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
578*
579 wrkbl = m + lwork_dgelqf
580 wrkbl = max( wrkbl, m + lwork_dorglq_n )
581 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
582 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
583 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
584 wrkbl = max( wrkbl, bdspac )
585 maxwrk = 2*m*m + wrkbl
586 minwrk = max( 3*m + n, bdspac )
587 ELSE IF( wntva .AND. wntuas ) THEN
588*
589* Path 9t(N much larger than M, JOBU='S' or 'A',
590* JOBVT='A')
591*
592 wrkbl = m + lwork_dgelqf
593 wrkbl = max( wrkbl, m + lwork_dorglq_n )
594 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
595 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
596 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
597 wrkbl = max( wrkbl, bdspac )
598 maxwrk = m*m + wrkbl
599 minwrk = max( 3*m + n, bdspac )
600 END IF
601 ELSE
602*
603* Path 10t(N greater than M, but not much larger)
604*
605 CALL dgebrd( m, n, a, lda, s, dum(1), dum(1),
606 $ dum(1), dum(1), -1, ierr )
607 lwork_dgebrd = int( dum(1) )
608 maxwrk = 3*m + lwork_dgebrd
609 IF( wntvs .OR. wntvo ) THEN
610* Compute space needed for DORGBR P
611 CALL dorgbr( 'P', m, n, m, a, n, dum(1),
612 $ dum(1), -1, ierr )
613 lwork_dorgbr_p = int( dum(1) )
614 maxwrk = max( maxwrk, 3*m + lwork_dorgbr_p )
615 END IF
616 IF( wntva ) THEN
617 CALL dorgbr( 'P', n, n, m, a, n, dum(1),
618 $ dum(1), -1, ierr )
619 lwork_dorgbr_p = int( dum(1) )
620 maxwrk = max( maxwrk, 3*m + lwork_dorgbr_p )
621 END IF
622 IF( .NOT.wntun ) THEN
623 maxwrk = max( maxwrk, 3*m + lwork_dorgbr_q )
624 END IF
625 maxwrk = max( maxwrk, bdspac )
626 minwrk = max( 3*m + n, bdspac )
627 END IF
628 END IF
629 maxwrk = max( maxwrk, minwrk )
630 work( 1 ) = maxwrk
631*
632 IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
633 info = -13
634 END IF
635 END IF
636*
637 IF( info.NE.0 ) THEN
638 CALL xerbla( 'DGESVD', -info )
639 RETURN
640 ELSE IF( lquery ) THEN
641 RETURN
642 END IF
643*
644* Quick return if possible
645*
646 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
647 RETURN
648 END IF
649*
650* Get machine constants
651*
652 eps = dlamch( 'P' )
653 smlnum = sqrt( dlamch( 'S' ) ) / eps
654 bignum = one / smlnum
655*
656* Scale A if max element outside range [SMLNUM,BIGNUM]
657*
658 anrm = dlange( 'M', m, n, a, lda, dum )
659 iscl = 0
660 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
661 iscl = 1
662 CALL dlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
663 ELSE IF( anrm.GT.bignum ) THEN
664 iscl = 1
665 CALL dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
666 END IF
667*
668 IF( m.GE.n ) THEN
669*
670* A has at least as many rows as columns. If A has sufficiently
671* more rows than columns, first reduce using the QR
672* decomposition (if sufficient workspace available)
673*
674 IF( m.GE.mnthr ) THEN
675*
676 IF( wntun ) THEN
677*
678* Path 1 (M much larger than N, JOBU='N')
679* No left singular vectors to be computed
680*
681 itau = 1
682 iwork = itau + n
683*
684* Compute A=Q*R
685* (Workspace: need 2*N, prefer N + N*NB)
686*
687 CALL dgeqrf( m, n, a, lda, work( itau ), work( iwork ),
688 $ lwork-iwork+1, ierr )
689*
690* Zero out below R
691*
692 IF( n .GT. 1 ) THEN
693 CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
694 $ lda )
695 END IF
696 ie = 1
697 itauq = ie + n
698 itaup = itauq + n
699 iwork = itaup + n
700*
701* Bidiagonalize R in A
702* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
703*
704 CALL dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
705 $ work( itaup ), work( iwork ), lwork-iwork+1,
706 $ ierr )
707 ncvt = 0
708 IF( wntvo .OR. wntvas ) THEN
709*
710* If right singular vectors desired, generate P'.
711* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
712*
713 CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
714 $ work( iwork ), lwork-iwork+1, ierr )
715 ncvt = n
716 END IF
717 iwork = ie + n
718*
719* Perform bidiagonal QR iteration, computing right
720* singular vectors of A in A if desired
721* (Workspace: need BDSPAC)
722*
723 CALL dbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a, lda,
724 $ dum, 1, dum, 1, work( iwork ), info )
725*
726* If right singular vectors desired in VT, copy them there
727*
728 IF( wntvas )
729 $ CALL dlacpy( 'F', n, n, a, lda, vt, ldvt )
730*
731 ELSE IF( wntuo .AND. wntvn ) THEN
732*
733* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
734* N left singular vectors to be overwritten on A and
735* no right singular vectors to be computed
736*
737 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
738*
739* Sufficient workspace for a fast algorithm
740*
741 ir = 1
742 IF( lwork.GE.max( wrkbl, lda*n + n ) + lda*n ) THEN
743*
744* WORK(IU) is LDA by N, WORK(IR) is LDA by N
745*
746 ldwrku = lda
747 ldwrkr = lda
748 ELSE IF( lwork.GE.max( wrkbl, lda*n + n ) + n*n ) THEN
749*
750* WORK(IU) is LDA by N, WORK(IR) is N by N
751*
752 ldwrku = lda
753 ldwrkr = n
754 ELSE
755*
756* WORK(IU) is LDWRKU by N, WORK(IR) is N by N
757*
758 ldwrku = ( lwork-n*n-n ) / n
759 ldwrkr = n
760 END IF
761 itau = ir + ldwrkr*n
762 iwork = itau + n
763*
764* Compute A=Q*R
765* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
766*
767 CALL dgeqrf( m, n, a, lda, work( itau ),
768 $ work( iwork ), lwork-iwork+1, ierr )
769*
770* Copy R to WORK(IR) and zero out below it
771*
772 CALL dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
773 CALL dlaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),
774 $ ldwrkr )
775*
776* Generate Q in A
777* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
778*
779 CALL dorgqr( m, n, n, a, lda, work( itau ),
780 $ work( iwork ), lwork-iwork+1, ierr )
781 ie = itau
782 itauq = ie + n
783 itaup = itauq + n
784 iwork = itaup + n
785*
786* Bidiagonalize R in WORK(IR)
787* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
788*
789 CALL dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
790 $ work( itauq ), work( itaup ),
791 $ work( iwork ), lwork-iwork+1, ierr )
792*
793* Generate left vectors bidiagonalizing R
794* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
795*
796 CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
797 $ work( itauq ), work( iwork ),
798 $ lwork-iwork+1, ierr )
799 iwork = ie + n
800*
801* Perform bidiagonal QR iteration, computing left
802* singular vectors of R in WORK(IR)
803* (Workspace: need N*N + BDSPAC)
804*
805 CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum, 1,
806 $ work( ir ), ldwrkr, dum, 1,
807 $ work( iwork ), info )
808 iu = ie + n
809*
810* Multiply Q in A by left singular vectors of R in
811* WORK(IR), storing result in WORK(IU) and copying to A
812* (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
813*
814 DO 10 i = 1, m, ldwrku
815 chunk = min( m-i+1, ldwrku )
816 CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
817 $ lda, work( ir ), ldwrkr, zero,
818 $ work( iu ), ldwrku )
819 CALL dlacpy( 'F', chunk, n, work( iu ), ldwrku,
820 $ a( i, 1 ), lda )
821 10 CONTINUE
822*
823 ELSE
824*
825* Insufficient workspace for a fast algorithm
826*
827 ie = 1
828 itauq = ie + n
829 itaup = itauq + n
830 iwork = itaup + n
831*
832* Bidiagonalize A
833* (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
834*
835 CALL dgebrd( m, n, a, lda, s, work( ie ),
836 $ work( itauq ), work( itaup ),
837 $ work( iwork ), lwork-iwork+1, ierr )
838*
839* Generate left vectors bidiagonalizing A
840* (Workspace: need 4*N, prefer 3*N + N*NB)
841*
842 CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),
843 $ work( iwork ), lwork-iwork+1, ierr )
844 iwork = ie + n
845*
846* Perform bidiagonal QR iteration, computing left
847* singular vectors of A in A
848* (Workspace: need BDSPAC)
849*
850 CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum, 1,
851 $ a, lda, dum, 1, work( iwork ), info )
852*
853 END IF
854*
855 ELSE IF( wntuo .AND. wntvas ) THEN
856*
857* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
858* N left singular vectors to be overwritten on A and
859* N right singular vectors to be computed in VT
860*
861 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
862*
863* Sufficient workspace for a fast algorithm
864*
865 ir = 1
866 IF( lwork.GE.max( wrkbl, lda*n + n ) + lda*n ) THEN
867*
868* WORK(IU) is LDA by N and WORK(IR) is LDA by N
869*
870 ldwrku = lda
871 ldwrkr = lda
872 ELSE IF( lwork.GE.max( wrkbl, lda*n + n ) + n*n ) THEN
873*
874* WORK(IU) is LDA by N and WORK(IR) is N by N
875*
876 ldwrku = lda
877 ldwrkr = n
878 ELSE
879*
880* WORK(IU) is LDWRKU by N and WORK(IR) is N by N
881*
882 ldwrku = ( lwork-n*n-n ) / n
883 ldwrkr = n
884 END IF
885 itau = ir + ldwrkr*n
886 iwork = itau + n
887*
888* Compute A=Q*R
889* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
890*
891 CALL dgeqrf( m, n, a, lda, work( itau ),
892 $ work( iwork ), lwork-iwork+1, ierr )
893*
894* Copy R to VT, zeroing out below it
895*
896 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
897 IF( n.GT.1 )
898 $ CALL dlaset( 'L', n-1, n-1, zero, zero,
899 $ vt( 2, 1 ), ldvt )
900*
901* Generate Q in A
902* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
903*
904 CALL dorgqr( m, n, n, a, lda, work( itau ),
905 $ work( iwork ), lwork-iwork+1, ierr )
906 ie = itau
907 itauq = ie + n
908 itaup = itauq + n
909 iwork = itaup + n
910*
911* Bidiagonalize R in VT, copying result to WORK(IR)
912* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
913*
914 CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
915 $ work( itauq ), work( itaup ),
916 $ work( iwork ), lwork-iwork+1, ierr )
917 CALL dlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
918*
919* Generate left vectors bidiagonalizing R in WORK(IR)
920* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
921*
922 CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
923 $ work( itauq ), work( iwork ),
924 $ lwork-iwork+1, ierr )
925*
926* Generate right vectors bidiagonalizing R in VT
927* (Workspace: need N*N + 4*N-1, prefer N*N + 3*N + (N-1)*NB)
928*
929 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
930 $ work( iwork ), lwork-iwork+1, ierr )
931 iwork = ie + n
932*
933* Perform bidiagonal QR iteration, computing left
934* singular vectors of R in WORK(IR) and computing right
935* singular vectors of R in VT
936* (Workspace: need N*N + BDSPAC)
937*
938 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt, ldvt,
939 $ work( ir ), ldwrkr, dum, 1,
940 $ work( iwork ), info )
941 iu = ie + n
942*
943* Multiply Q in A by left singular vectors of R in
944* WORK(IR), storing result in WORK(IU) and copying to A
945* (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
946*
947 DO 20 i = 1, m, ldwrku
948 chunk = min( m-i+1, ldwrku )
949 CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
950 $ lda, work( ir ), ldwrkr, zero,
951 $ work( iu ), ldwrku )
952 CALL dlacpy( 'F', chunk, n, work( iu ), ldwrku,
953 $ a( i, 1 ), lda )
954 20 CONTINUE
955*
956 ELSE
957*
958* Insufficient workspace for a fast algorithm
959*
960 itau = 1
961 iwork = itau + n
962*
963* Compute A=Q*R
964* (Workspace: need 2*N, prefer N + N*NB)
965*
966 CALL dgeqrf( m, n, a, lda, work( itau ),
967 $ work( iwork ), lwork-iwork+1, ierr )
968*
969* Copy R to VT, zeroing out below it
970*
971 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
972 IF( n.GT.1 )
973 $ CALL dlaset( 'L', n-1, n-1, zero, zero,
974 $ vt( 2, 1 ), ldvt )
975*
976* Generate Q in A
977* (Workspace: need 2*N, prefer N + N*NB)
978*
979 CALL dorgqr( m, n, n, a, lda, work( itau ),
980 $ work( iwork ), lwork-iwork+1, ierr )
981 ie = itau
982 itauq = ie + n
983 itaup = itauq + n
984 iwork = itaup + n
985*
986* Bidiagonalize R in VT
987* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
988*
989 CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
990 $ work( itauq ), work( itaup ),
991 $ work( iwork ), lwork-iwork+1, ierr )
992*
993* Multiply Q in A by left vectors bidiagonalizing R
994* (Workspace: need 3*N + M, prefer 3*N + M*NB)
995*
996 CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
997 $ work( itauq ), a, lda, work( iwork ),
998 $ lwork-iwork+1, ierr )
999*
1000* Generate right vectors bidiagonalizing R in VT
1001* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1002*
1003 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1004 $ work( iwork ), lwork-iwork+1, ierr )
1005 iwork = ie + n
1006*
1007* Perform bidiagonal QR iteration, computing left
1008* singular vectors of A in A and computing right
1009* singular vectors of A in VT
1010* (Workspace: need BDSPAC)
1011*
1012 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt, ldvt,
1013 $ a, lda, dum, 1, work( iwork ), info )
1014*
1015 END IF
1016*
1017 ELSE IF( wntus ) THEN
1018*
1019 IF( wntvn ) THEN
1020*
1021* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1022* N left singular vectors to be computed in U and
1023* no right singular vectors to be computed
1024*
1025 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1026*
1027* Sufficient workspace for a fast algorithm
1028*
1029 ir = 1
1030 IF( lwork.GE.wrkbl+lda*n ) THEN
1031*
1032* WORK(IR) is LDA by N
1033*
1034 ldwrkr = lda
1035 ELSE
1036*
1037* WORK(IR) is N by N
1038*
1039 ldwrkr = n
1040 END IF
1041 itau = ir + ldwrkr*n
1042 iwork = itau + n
1043*
1044* Compute A=Q*R
1045* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1046*
1047 CALL dgeqrf( m, n, a, lda, work( itau ),
1048 $ work( iwork ), lwork-iwork+1, ierr )
1049*
1050* Copy R to WORK(IR), zeroing out below it
1051*
1052 CALL dlacpy( 'U', n, n, a, lda, work( ir ),
1053 $ ldwrkr )
1054 CALL dlaset( 'L', n-1, n-1, zero, zero,
1055 $ work( ir+1 ), ldwrkr )
1056*
1057* Generate Q in A
1058* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1059*
1060 CALL dorgqr( m, n, n, a, lda, work( itau ),
1061 $ work( iwork ), lwork-iwork+1, ierr )
1062 ie = itau
1063 itauq = ie + n
1064 itaup = itauq + n
1065 iwork = itaup + n
1066*
1067* Bidiagonalize R in WORK(IR)
1068* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1069*
1070 CALL dgebrd( n, n, work( ir ), ldwrkr, s,
1071 $ work( ie ), work( itauq ),
1072 $ work( itaup ), work( iwork ),
1073 $ lwork-iwork+1, ierr )
1074*
1075* Generate left vectors bidiagonalizing R in WORK(IR)
1076* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1077*
1078 CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1079 $ work( itauq ), work( iwork ),
1080 $ lwork-iwork+1, ierr )
1081 iwork = ie + n
1082*
1083* Perform bidiagonal QR iteration, computing left
1084* singular vectors of R in WORK(IR)
1085* (Workspace: need N*N + BDSPAC)
1086*
1087 CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1088 $ 1, work( ir ), ldwrkr, dum, 1,
1089 $ work( iwork ), info )
1090*
1091* Multiply Q in A by left singular vectors of R in
1092* WORK(IR), storing result in U
1093* (Workspace: need N*N)
1094*
1095 CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1096 $ work( ir ), ldwrkr, zero, u, ldu )
1097*
1098 ELSE
1099*
1100* Insufficient workspace for a fast algorithm
1101*
1102 itau = 1
1103 iwork = itau + n
1104*
1105* Compute A=Q*R, copying result to U
1106* (Workspace: need 2*N, prefer N + N*NB)
1107*
1108 CALL dgeqrf( m, n, a, lda, work( itau ),
1109 $ work( iwork ), lwork-iwork+1, ierr )
1110 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1111*
1112* Generate Q in U
1113* (Workspace: need 2*N, prefer N + N*NB)
1114*
1115 CALL dorgqr( m, n, n, u, ldu, work( itau ),
1116 $ work( iwork ), lwork-iwork+1, ierr )
1117 ie = itau
1118 itauq = ie + n
1119 itaup = itauq + n
1120 iwork = itaup + n
1121*
1122* Zero out below R in A
1123*
1124 IF( n .GT. 1 ) THEN
1125 CALL dlaset( 'L', n-1, n-1, zero, zero,
1126 $ a( 2, 1 ), lda )
1127 END IF
1128*
1129* Bidiagonalize R in A
1130* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1131*
1132 CALL dgebrd( n, n, a, lda, s, work( ie ),
1133 $ work( itauq ), work( itaup ),
1134 $ work( iwork ), lwork-iwork+1, ierr )
1135*
1136* Multiply Q in U by left vectors bidiagonalizing R
1137* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1138*
1139 CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1140 $ work( itauq ), u, ldu, work( iwork ),
1141 $ lwork-iwork+1, ierr )
1142 iwork = ie + n
1143*
1144* Perform bidiagonal QR iteration, computing left
1145* singular vectors of A in U
1146* (Workspace: need BDSPAC)
1147*
1148 CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1149 $ 1, u, ldu, dum, 1, work( iwork ),
1150 $ info )
1151*
1152 END IF
1153*
1154 ELSE IF( wntvo ) THEN
1155*
1156* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1157* N left singular vectors to be computed in U and
1158* N right singular vectors to be overwritten on A
1159*
1160 IF( lwork.GE.2*n*n+max( 4*n, bdspac ) ) THEN
1161*
1162* Sufficient workspace for a fast algorithm
1163*
1164 iu = 1
1165 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1166*
1167* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1168*
1169 ldwrku = lda
1170 ir = iu + ldwrku*n
1171 ldwrkr = lda
1172 ELSE IF( lwork.GE.wrkbl+( lda + n )*n ) THEN
1173*
1174* WORK(IU) is LDA by N and WORK(IR) is N by N
1175*
1176 ldwrku = lda
1177 ir = iu + ldwrku*n
1178 ldwrkr = n
1179 ELSE
1180*
1181* WORK(IU) is N by N and WORK(IR) is N by N
1182*
1183 ldwrku = n
1184 ir = iu + ldwrku*n
1185 ldwrkr = n
1186 END IF
1187 itau = ir + ldwrkr*n
1188 iwork = itau + n
1189*
1190* Compute A=Q*R
1191* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1192*
1193 CALL dgeqrf( m, n, a, lda, work( itau ),
1194 $ work( iwork ), lwork-iwork+1, ierr )
1195*
1196* Copy R to WORK(IU), zeroing out below it
1197*
1198 CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1199 $ ldwrku )
1200 CALL dlaset( 'L', n-1, n-1, zero, zero,
1201 $ work( iu+1 ), ldwrku )
1202*
1203* Generate Q in A
1204* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1205*
1206 CALL dorgqr( m, n, n, a, lda, work( itau ),
1207 $ work( iwork ), lwork-iwork+1, ierr )
1208 ie = itau
1209 itauq = ie + n
1210 itaup = itauq + n
1211 iwork = itaup + n
1212*
1213* Bidiagonalize R in WORK(IU), copying result to
1214* WORK(IR)
1215* (Workspace: need 2*N*N + 4*N,
1216* prefer 2*N*N+3*N+2*N*NB)
1217*
1218 CALL dgebrd( n, n, work( iu ), ldwrku, s,
1219 $ work( ie ), work( itauq ),
1220 $ work( itaup ), work( iwork ),
1221 $ lwork-iwork+1, ierr )
1222 CALL dlacpy( 'U', n, n, work( iu ), ldwrku,
1223 $ work( ir ), ldwrkr )
1224*
1225* Generate left bidiagonalizing vectors in WORK(IU)
1226* (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
1227*
1228 CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1229 $ work( itauq ), work( iwork ),
1230 $ lwork-iwork+1, ierr )
1231*
1232* Generate right bidiagonalizing vectors in WORK(IR)
1233* (Workspace: need 2*N*N + 4*N-1,
1234* prefer 2*N*N+3*N+(N-1)*NB)
1235*
1236 CALL dorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1237 $ work( itaup ), work( iwork ),
1238 $ lwork-iwork+1, ierr )
1239 iwork = ie + n
1240*
1241* Perform bidiagonal QR iteration, computing left
1242* singular vectors of R in WORK(IU) and computing
1243* right singular vectors of R in WORK(IR)
1244* (Workspace: need 2*N*N + BDSPAC)
1245*
1246 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ),
1247 $ work( ir ), ldwrkr, work( iu ),
1248 $ ldwrku, dum, 1, work( iwork ), info )
1249*
1250* Multiply Q in A by left singular vectors of R in
1251* WORK(IU), storing result in U
1252* (Workspace: need N*N)
1253*
1254 CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1255 $ work( iu ), ldwrku, zero, u, ldu )
1256*
1257* Copy right singular vectors of R to A
1258* (Workspace: need N*N)
1259*
1260 CALL dlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1261 $ lda )
1262*
1263 ELSE
1264*
1265* Insufficient workspace for a fast algorithm
1266*
1267 itau = 1
1268 iwork = itau + n
1269*
1270* Compute A=Q*R, copying result to U
1271* (Workspace: need 2*N, prefer N + N*NB)
1272*
1273 CALL dgeqrf( m, n, a, lda, work( itau ),
1274 $ work( iwork ), lwork-iwork+1, ierr )
1275 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1276*
1277* Generate Q in U
1278* (Workspace: need 2*N, prefer N + N*NB)
1279*
1280 CALL dorgqr( m, n, n, u, ldu, work( itau ),
1281 $ work( iwork ), lwork-iwork+1, ierr )
1282 ie = itau
1283 itauq = ie + n
1284 itaup = itauq + n
1285 iwork = itaup + n
1286*
1287* Zero out below R in A
1288*
1289 IF( n .GT. 1 ) THEN
1290 CALL dlaset( 'L', n-1, n-1, zero, zero,
1291 $ a( 2, 1 ), lda )
1292 END IF
1293*
1294* Bidiagonalize R in A
1295* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1296*
1297 CALL dgebrd( n, n, a, lda, s, work( ie ),
1298 $ work( itauq ), work( itaup ),
1299 $ work( iwork ), lwork-iwork+1, ierr )
1300*
1301* Multiply Q in U by left vectors bidiagonalizing R
1302* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1303*
1304 CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1305 $ work( itauq ), u, ldu, work( iwork ),
1306 $ lwork-iwork+1, ierr )
1307*
1308* Generate right vectors bidiagonalizing R in A
1309* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1310*
1311 CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
1312 $ work( iwork ), lwork-iwork+1, ierr )
1313 iwork = ie + n
1314*
1315* Perform bidiagonal QR iteration, computing left
1316* singular vectors of A in U and computing right
1317* singular vectors of A in A
1318* (Workspace: need BDSPAC)
1319*
1320 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1321 $ lda, u, ldu, dum, 1, work( iwork ),
1322 $ info )
1323*
1324 END IF
1325*
1326 ELSE IF( wntvas ) THEN
1327*
1328* Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1329* or 'A')
1330* N left singular vectors to be computed in U and
1331* N right singular vectors to be computed in VT
1332*
1333 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1334*
1335* Sufficient workspace for a fast algorithm
1336*
1337 iu = 1
1338 IF( lwork.GE.wrkbl+lda*n ) THEN
1339*
1340* WORK(IU) is LDA by N
1341*
1342 ldwrku = lda
1343 ELSE
1344*
1345* WORK(IU) is N by N
1346*
1347 ldwrku = n
1348 END IF
1349 itau = iu + ldwrku*n
1350 iwork = itau + n
1351*
1352* Compute A=Q*R
1353* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1354*
1355 CALL dgeqrf( m, n, a, lda, work( itau ),
1356 $ work( iwork ), lwork-iwork+1, ierr )
1357*
1358* Copy R to WORK(IU), zeroing out below it
1359*
1360 CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1361 $ ldwrku )
1362 CALL dlaset( 'L', n-1, n-1, zero, zero,
1363 $ work( iu+1 ), ldwrku )
1364*
1365* Generate Q in A
1366* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1367*
1368 CALL dorgqr( m, n, n, a, lda, work( itau ),
1369 $ work( iwork ), lwork-iwork+1, ierr )
1370 ie = itau
1371 itauq = ie + n
1372 itaup = itauq + n
1373 iwork = itaup + n
1374*
1375* Bidiagonalize R in WORK(IU), copying result to VT
1376* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1377*
1378 CALL dgebrd( n, n, work( iu ), ldwrku, s,
1379 $ work( ie ), work( itauq ),
1380 $ work( itaup ), work( iwork ),
1381 $ lwork-iwork+1, ierr )
1382 CALL dlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1383 $ ldvt )
1384*
1385* Generate left bidiagonalizing vectors in WORK(IU)
1386* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1387*
1388 CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1389 $ work( itauq ), work( iwork ),
1390 $ lwork-iwork+1, ierr )
1391*
1392* Generate right bidiagonalizing vectors in VT
1393* (Workspace: need N*N + 4*N-1,
1394* prefer N*N+3*N+(N-1)*NB)
1395*
1396 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1397 $ work( iwork ), lwork-iwork+1, ierr )
1398 iwork = ie + n
1399*
1400* Perform bidiagonal QR iteration, computing left
1401* singular vectors of R in WORK(IU) and computing
1402* right singular vectors of R in VT
1403* (Workspace: need N*N + BDSPAC)
1404*
1405 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1406 $ ldvt, work( iu ), ldwrku, dum, 1,
1407 $ work( iwork ), info )
1408*
1409* Multiply Q in A by left singular vectors of R in
1410* WORK(IU), storing result in U
1411* (Workspace: need N*N)
1412*
1413 CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1414 $ work( iu ), ldwrku, zero, u, ldu )
1415*
1416 ELSE
1417*
1418* Insufficient workspace for a fast algorithm
1419*
1420 itau = 1
1421 iwork = itau + n
1422*
1423* Compute A=Q*R, copying result to U
1424* (Workspace: need 2*N, prefer N + N*NB)
1425*
1426 CALL dgeqrf( m, n, a, lda, work( itau ),
1427 $ work( iwork ), lwork-iwork+1, ierr )
1428 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1429*
1430* Generate Q in U
1431* (Workspace: need 2*N, prefer N + N*NB)
1432*
1433 CALL dorgqr( m, n, n, u, ldu, work( itau ),
1434 $ work( iwork ), lwork-iwork+1, ierr )
1435*
1436* Copy R to VT, zeroing out below it
1437*
1438 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
1439 IF( n.GT.1 )
1440 $ CALL dlaset( 'L', n-1, n-1, zero, zero,
1441 $ vt( 2, 1 ), ldvt )
1442 ie = itau
1443 itauq = ie + n
1444 itaup = itauq + n
1445 iwork = itaup + n
1446*
1447* Bidiagonalize R in VT
1448* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1449*
1450 CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
1451 $ work( itauq ), work( itaup ),
1452 $ work( iwork ), lwork-iwork+1, ierr )
1453*
1454* Multiply Q in U by left bidiagonalizing vectors
1455* in VT
1456* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1457*
1458 CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1459 $ work( itauq ), u, ldu, work( iwork ),
1460 $ lwork-iwork+1, ierr )
1461*
1462* Generate right bidiagonalizing vectors in VT
1463* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1464*
1465 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1466 $ work( iwork ), lwork-iwork+1, ierr )
1467 iwork = ie + n
1468*
1469* Perform bidiagonal QR iteration, computing left
1470* singular vectors of A in U and computing right
1471* singular vectors of A in VT
1472* (Workspace: need BDSPAC)
1473*
1474 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1475 $ ldvt, u, ldu, dum, 1, work( iwork ),
1476 $ info )
1477*
1478 END IF
1479*
1480 END IF
1481*
1482 ELSE IF( wntua ) THEN
1483*
1484 IF( wntvn ) THEN
1485*
1486* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1487* M left singular vectors to be computed in U and
1488* no right singular vectors to be computed
1489*
1490 IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1491*
1492* Sufficient workspace for a fast algorithm
1493*
1494 ir = 1
1495 IF( lwork.GE.wrkbl+lda*n ) THEN
1496*
1497* WORK(IR) is LDA by N
1498*
1499 ldwrkr = lda
1500 ELSE
1501*
1502* WORK(IR) is N by N
1503*
1504 ldwrkr = n
1505 END IF
1506 itau = ir + ldwrkr*n
1507 iwork = itau + n
1508*
1509* Compute A=Q*R, copying result to U
1510* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1511*
1512 CALL dgeqrf( m, n, a, lda, work( itau ),
1513 $ work( iwork ), lwork-iwork+1, ierr )
1514 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1515*
1516* Copy R to WORK(IR), zeroing out below it
1517*
1518 CALL dlacpy( 'U', n, n, a, lda, work( ir ),
1519 $ ldwrkr )
1520 CALL dlaset( 'L', n-1, n-1, zero, zero,
1521 $ work( ir+1 ), ldwrkr )
1522*
1523* Generate Q in U
1524* (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
1525*
1526 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1527 $ work( iwork ), lwork-iwork+1, ierr )
1528 ie = itau
1529 itauq = ie + n
1530 itaup = itauq + n
1531 iwork = itaup + n
1532*
1533* Bidiagonalize R in WORK(IR)
1534* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1535*
1536 CALL dgebrd( n, n, work( ir ), ldwrkr, s,
1537 $ work( ie ), work( itauq ),
1538 $ work( itaup ), work( iwork ),
1539 $ lwork-iwork+1, ierr )
1540*
1541* Generate left bidiagonalizing vectors in WORK(IR)
1542* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1543*
1544 CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1545 $ work( itauq ), work( iwork ),
1546 $ lwork-iwork+1, ierr )
1547 iwork = ie + n
1548*
1549* Perform bidiagonal QR iteration, computing left
1550* singular vectors of R in WORK(IR)
1551* (Workspace: need N*N + BDSPAC)
1552*
1553 CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1554 $ 1, work( ir ), ldwrkr, dum, 1,
1555 $ work( iwork ), info )
1556*
1557* Multiply Q in U by left singular vectors of R in
1558* WORK(IR), storing result in A
1559* (Workspace: need N*N)
1560*
1561 CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1562 $ work( ir ), ldwrkr, zero, a, lda )
1563*
1564* Copy left singular vectors of A from A to U
1565*
1566 CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1567*
1568 ELSE
1569*
1570* Insufficient workspace for a fast algorithm
1571*
1572 itau = 1
1573 iwork = itau + n
1574*
1575* Compute A=Q*R, copying result to U
1576* (Workspace: need 2*N, prefer N + N*NB)
1577*
1578 CALL dgeqrf( m, n, a, lda, work( itau ),
1579 $ work( iwork ), lwork-iwork+1, ierr )
1580 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1581*
1582* Generate Q in U
1583* (Workspace: need N + M, prefer N + M*NB)
1584*
1585 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1586 $ work( iwork ), lwork-iwork+1, ierr )
1587 ie = itau
1588 itauq = ie + n
1589 itaup = itauq + n
1590 iwork = itaup + n
1591*
1592* Zero out below R in A
1593*
1594 IF( n .GT. 1 ) THEN
1595 CALL dlaset( 'L', n-1, n-1, zero, zero,
1596 $ a( 2, 1 ), lda )
1597 END IF
1598*
1599* Bidiagonalize R in A
1600* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1601*
1602 CALL dgebrd( n, n, a, lda, s, work( ie ),
1603 $ work( itauq ), work( itaup ),
1604 $ work( iwork ), lwork-iwork+1, ierr )
1605*
1606* Multiply Q in U by left bidiagonalizing vectors
1607* in A
1608* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1609*
1610 CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1611 $ work( itauq ), u, ldu, work( iwork ),
1612 $ lwork-iwork+1, ierr )
1613 iwork = ie + n
1614*
1615* Perform bidiagonal QR iteration, computing left
1616* singular vectors of A in U
1617* (Workspace: need BDSPAC)
1618*
1619 CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1620 $ 1, u, ldu, dum, 1, work( iwork ),
1621 $ info )
1622*
1623 END IF
1624*
1625 ELSE IF( wntvo ) THEN
1626*
1627* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1628* M left singular vectors to be computed in U and
1629* N right singular vectors to be overwritten on A
1630*
1631 IF( lwork.GE.2*n*n+max( n+m, 4*n, bdspac ) ) THEN
1632*
1633* Sufficient workspace for a fast algorithm
1634*
1635 iu = 1
1636 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1637*
1638* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1639*
1640 ldwrku = lda
1641 ir = iu + ldwrku*n
1642 ldwrkr = lda
1643 ELSE IF( lwork.GE.wrkbl+( lda + n )*n ) THEN
1644*
1645* WORK(IU) is LDA by N and WORK(IR) is N by N
1646*
1647 ldwrku = lda
1648 ir = iu + ldwrku*n
1649 ldwrkr = n
1650 ELSE
1651*
1652* WORK(IU) is N by N and WORK(IR) is N by N
1653*
1654 ldwrku = n
1655 ir = iu + ldwrku*n
1656 ldwrkr = n
1657 END IF
1658 itau = ir + ldwrkr*n
1659 iwork = itau + n
1660*
1661* Compute A=Q*R, copying result to U
1662* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1663*
1664 CALL dgeqrf( m, n, a, lda, work( itau ),
1665 $ work( iwork ), lwork-iwork+1, ierr )
1666 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1667*
1668* Generate Q in U
1669* (Workspace: need 2*N*N + N + M, prefer 2*N*N + N + M*NB)
1670*
1671 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1672 $ work( iwork ), lwork-iwork+1, ierr )
1673*
1674* Copy R to WORK(IU), zeroing out below it
1675*
1676 CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1677 $ ldwrku )
1678 CALL dlaset( 'L', n-1, n-1, zero, zero,
1679 $ work( iu+1 ), ldwrku )
1680 ie = itau
1681 itauq = ie + n
1682 itaup = itauq + n
1683 iwork = itaup + n
1684*
1685* Bidiagonalize R in WORK(IU), copying result to
1686* WORK(IR)
1687* (Workspace: need 2*N*N + 4*N,
1688* prefer 2*N*N+3*N+2*N*NB)
1689*
1690 CALL dgebrd( n, n, work( iu ), ldwrku, s,
1691 $ work( ie ), work( itauq ),
1692 $ work( itaup ), work( iwork ),
1693 $ lwork-iwork+1, ierr )
1694 CALL dlacpy( 'U', n, n, work( iu ), ldwrku,
1695 $ work( ir ), ldwrkr )
1696*
1697* Generate left bidiagonalizing vectors in WORK(IU)
1698* (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
1699*
1700 CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1701 $ work( itauq ), work( iwork ),
1702 $ lwork-iwork+1, ierr )
1703*
1704* Generate right bidiagonalizing vectors in WORK(IR)
1705* (Workspace: need 2*N*N + 4*N-1,
1706* prefer 2*N*N+3*N+(N-1)*NB)
1707*
1708 CALL dorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1709 $ work( itaup ), work( iwork ),
1710 $ lwork-iwork+1, ierr )
1711 iwork = ie + n
1712*
1713* Perform bidiagonal QR iteration, computing left
1714* singular vectors of R in WORK(IU) and computing
1715* right singular vectors of R in WORK(IR)
1716* (Workspace: need 2*N*N + BDSPAC)
1717*
1718 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ),
1719 $ work( ir ), ldwrkr, work( iu ),
1720 $ ldwrku, dum, 1, work( iwork ), info )
1721*
1722* Multiply Q in U by left singular vectors of R in
1723* WORK(IU), storing result in A
1724* (Workspace: need N*N)
1725*
1726 CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1727 $ work( iu ), ldwrku, zero, a, lda )
1728*
1729* Copy left singular vectors of A from A to U
1730*
1731 CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1732*
1733* Copy right singular vectors of R from WORK(IR) to A
1734*
1735 CALL dlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1736 $ lda )
1737*
1738 ELSE
1739*
1740* Insufficient workspace for a fast algorithm
1741*
1742 itau = 1
1743 iwork = itau + n
1744*
1745* Compute A=Q*R, copying result to U
1746* (Workspace: need 2*N, prefer N + N*NB)
1747*
1748 CALL dgeqrf( m, n, a, lda, work( itau ),
1749 $ work( iwork ), lwork-iwork+1, ierr )
1750 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1751*
1752* Generate Q in U
1753* (Workspace: need N + M, prefer N + M*NB)
1754*
1755 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1756 $ work( iwork ), lwork-iwork+1, ierr )
1757 ie = itau
1758 itauq = ie + n
1759 itaup = itauq + n
1760 iwork = itaup + n
1761*
1762* Zero out below R in A
1763*
1764 IF( n .GT. 1 ) THEN
1765 CALL dlaset( 'L', n-1, n-1, zero, zero,
1766 $ a( 2, 1 ), lda )
1767 END IF
1768*
1769* Bidiagonalize R in A
1770* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1771*
1772 CALL dgebrd( n, n, a, lda, s, work( ie ),
1773 $ work( itauq ), work( itaup ),
1774 $ work( iwork ), lwork-iwork+1, ierr )
1775*
1776* Multiply Q in U by left bidiagonalizing vectors
1777* in A
1778* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1779*
1780 CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1781 $ work( itauq ), u, ldu, work( iwork ),
1782 $ lwork-iwork+1, ierr )
1783*
1784* Generate right bidiagonalizing vectors in A
1785* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1786*
1787 CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
1788 $ work( iwork ), lwork-iwork+1, ierr )
1789 iwork = ie + n
1790*
1791* Perform bidiagonal QR iteration, computing left
1792* singular vectors of A in U and computing right
1793* singular vectors of A in A
1794* (Workspace: need BDSPAC)
1795*
1796 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1797 $ lda, u, ldu, dum, 1, work( iwork ),
1798 $ info )
1799*
1800 END IF
1801*
1802 ELSE IF( wntvas ) THEN
1803*
1804* Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1805* or 'A')
1806* M left singular vectors to be computed in U and
1807* N right singular vectors to be computed in VT
1808*
1809 IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1810*
1811* Sufficient workspace for a fast algorithm
1812*
1813 iu = 1
1814 IF( lwork.GE.wrkbl+lda*n ) THEN
1815*
1816* WORK(IU) is LDA by N
1817*
1818 ldwrku = lda
1819 ELSE
1820*
1821* WORK(IU) is N by N
1822*
1823 ldwrku = n
1824 END IF
1825 itau = iu + ldwrku*n
1826 iwork = itau + n
1827*
1828* Compute A=Q*R, copying result to U
1829* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1830*
1831 CALL dgeqrf( m, n, a, lda, work( itau ),
1832 $ work( iwork ), lwork-iwork+1, ierr )
1833 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1834*
1835* Generate Q in U
1836* (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
1837*
1838 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1839 $ work( iwork ), lwork-iwork+1, ierr )
1840*
1841* Copy R to WORK(IU), zeroing out below it
1842*
1843 CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1844 $ ldwrku )
1845 CALL dlaset( 'L', n-1, n-1, zero, zero,
1846 $ work( iu+1 ), ldwrku )
1847 ie = itau
1848 itauq = ie + n
1849 itaup = itauq + n
1850 iwork = itaup + n
1851*
1852* Bidiagonalize R in WORK(IU), copying result to VT
1853* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1854*
1855 CALL dgebrd( n, n, work( iu ), ldwrku, s,
1856 $ work( ie ), work( itauq ),
1857 $ work( itaup ), work( iwork ),
1858 $ lwork-iwork+1, ierr )
1859 CALL dlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1860 $ ldvt )
1861*
1862* Generate left bidiagonalizing vectors in WORK(IU)
1863* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1864*
1865 CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1866 $ work( itauq ), work( iwork ),
1867 $ lwork-iwork+1, ierr )
1868*
1869* Generate right bidiagonalizing vectors in VT
1870* (Workspace: need N*N + 4*N-1,
1871* prefer N*N+3*N+(N-1)*NB)
1872*
1873 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1874 $ work( iwork ), lwork-iwork+1, ierr )
1875 iwork = ie + n
1876*
1877* Perform bidiagonal QR iteration, computing left
1878* singular vectors of R in WORK(IU) and computing
1879* right singular vectors of R in VT
1880* (Workspace: need N*N + BDSPAC)
1881*
1882 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1883 $ ldvt, work( iu ), ldwrku, dum, 1,
1884 $ work( iwork ), info )
1885*
1886* Multiply Q in U by left singular vectors of R in
1887* WORK(IU), storing result in A
1888* (Workspace: need N*N)
1889*
1890 CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1891 $ work( iu ), ldwrku, zero, a, lda )
1892*
1893* Copy left singular vectors of A from A to U
1894*
1895 CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1896*
1897 ELSE
1898*
1899* Insufficient workspace for a fast algorithm
1900*
1901 itau = 1
1902 iwork = itau + n
1903*
1904* Compute A=Q*R, copying result to U
1905* (Workspace: need 2*N, prefer N + N*NB)
1906*
1907 CALL dgeqrf( m, n, a, lda, work( itau ),
1908 $ work( iwork ), lwork-iwork+1, ierr )
1909 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1910*
1911* Generate Q in U
1912* (Workspace: need N + M, prefer N + M*NB)
1913*
1914 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1915 $ work( iwork ), lwork-iwork+1, ierr )
1916*
1917* Copy R from A to VT, zeroing out below it
1918*
1919 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
1920 IF( n.GT.1 )
1921 $ CALL dlaset( 'L', n-1, n-1, zero, zero,
1922 $ vt( 2, 1 ), ldvt )
1923 ie = itau
1924 itauq = ie + n
1925 itaup = itauq + n
1926 iwork = itaup + n
1927*
1928* Bidiagonalize R in VT
1929* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1930*
1931 CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
1932 $ work( itauq ), work( itaup ),
1933 $ work( iwork ), lwork-iwork+1, ierr )
1934*
1935* Multiply Q in U by left bidiagonalizing vectors
1936* in VT
1937* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1938*
1939 CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1940 $ work( itauq ), u, ldu, work( iwork ),
1941 $ lwork-iwork+1, ierr )
1942*
1943* Generate right bidiagonalizing vectors in VT
1944* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1945*
1946 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1947 $ work( iwork ), lwork-iwork+1, ierr )
1948 iwork = ie + n
1949*
1950* Perform bidiagonal QR iteration, computing left
1951* singular vectors of A in U and computing right
1952* singular vectors of A in VT
1953* (Workspace: need BDSPAC)
1954*
1955 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1956 $ ldvt, u, ldu, dum, 1, work( iwork ),
1957 $ info )
1958*
1959 END IF
1960*
1961 END IF
1962*
1963 END IF
1964*
1965 ELSE
1966*
1967* M .LT. MNTHR
1968*
1969* Path 10 (M at least N, but not much larger)
1970* Reduce to bidiagonal form without QR decomposition
1971*
1972 ie = 1
1973 itauq = ie + n
1974 itaup = itauq + n
1975 iwork = itaup + n
1976*
1977* Bidiagonalize A
1978* (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
1979*
1980 CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1981 $ work( itaup ), work( iwork ), lwork-iwork+1,
1982 $ ierr )
1983 IF( wntuas ) THEN
1984*
1985* If left singular vectors desired in U, copy result to U
1986* and generate left bidiagonalizing vectors in U
1987* (Workspace: need 3*N + NCU, prefer 3*N + NCU*NB)
1988*
1989 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1990 IF( wntus )
1991 $ ncu = n
1992 IF( wntua )
1993 $ ncu = m
1994 CALL dorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
1995 $ work( iwork ), lwork-iwork+1, ierr )
1996 END IF
1997 IF( wntvas ) THEN
1998*
1999* If right singular vectors desired in VT, copy result to
2000* VT and generate right bidiagonalizing vectors in VT
2001* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
2002*
2003 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
2004 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2005 $ work( iwork ), lwork-iwork+1, ierr )
2006 END IF
2007 IF( wntuo ) THEN
2008*
2009* If left singular vectors desired in A, generate left
2010* bidiagonalizing vectors in A
2011* (Workspace: need 4*N, prefer 3*N + N*NB)
2012*
2013 CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),
2014 $ work( iwork ), lwork-iwork+1, ierr )
2015 END IF
2016 IF( wntvo ) THEN
2017*
2018* If right singular vectors desired in A, generate right
2019* bidiagonalizing vectors in A
2020* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
2021*
2022 CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
2023 $ work( iwork ), lwork-iwork+1, ierr )
2024 END IF
2025 iwork = ie + n
2026 IF( wntuas .OR. wntuo )
2027 $ nru = m
2028 IF( wntun )
2029 $ nru = 0
2030 IF( wntvas .OR. wntvo )
2031 $ ncvt = n
2032 IF( wntvn )
2033 $ ncvt = 0
2034 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2035*
2036* Perform bidiagonal QR iteration, if desired, computing
2037* left singular vectors in U and computing right singular
2038* vectors in VT
2039* (Workspace: need BDSPAC)
2040*
2041 CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2042 $ ldvt, u, ldu, dum, 1, work( iwork ), info )
2043 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2044*
2045* Perform bidiagonal QR iteration, if desired, computing
2046* left singular vectors in U and computing right singular
2047* vectors in A
2048* (Workspace: need BDSPAC)
2049*
2050 CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a, lda,
2051 $ u, ldu, dum, 1, work( iwork ), info )
2052 ELSE
2053*
2054* Perform bidiagonal QR iteration, if desired, computing
2055* left singular vectors in A and computing right singular
2056* vectors in VT
2057* (Workspace: need BDSPAC)
2058*
2059 CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2060 $ ldvt, a, lda, dum, 1, work( iwork ), info )
2061 END IF
2062*
2063 END IF
2064*
2065 ELSE
2066*
2067* A has more columns than rows. If A has sufficiently more
2068* columns than rows, first reduce using the LQ decomposition (if
2069* sufficient workspace available)
2070*
2071 IF( n.GE.mnthr ) THEN
2072*
2073 IF( wntvn ) THEN
2074*
2075* Path 1t(N much larger than M, JOBVT='N')
2076* No right singular vectors to be computed
2077*
2078 itau = 1
2079 iwork = itau + m
2080*
2081* Compute A=L*Q
2082* (Workspace: need 2*M, prefer M + M*NB)
2083*
2084 CALL dgelqf( m, n, a, lda, work( itau ), work( iwork ),
2085 $ lwork-iwork+1, ierr )
2086*
2087* Zero out above L
2088*
2089 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
2090 ie = 1
2091 itauq = ie + m
2092 itaup = itauq + m
2093 iwork = itaup + m
2094*
2095* Bidiagonalize L in A
2096* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2097*
2098 CALL dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
2099 $ work( itaup ), work( iwork ), lwork-iwork+1,
2100 $ ierr )
2101 IF( wntuo .OR. wntuas ) THEN
2102*
2103* If left singular vectors desired, generate Q
2104* (Workspace: need 4*M, prefer 3*M + M*NB)
2105*
2106 CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2107 $ work( iwork ), lwork-iwork+1, ierr )
2108 END IF
2109 iwork = ie + m
2110 nru = 0
2111 IF( wntuo .OR. wntuas )
2112 $ nru = m
2113*
2114* Perform bidiagonal QR iteration, computing left singular
2115* vectors of A in A if desired
2116* (Workspace: need BDSPAC)
2117*
2118 CALL dbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1, a,
2119 $ lda, dum, 1, work( iwork ), info )
2120*
2121* If left singular vectors desired in U, copy them there
2122*
2123 IF( wntuas )
2124 $ CALL dlacpy( 'F', m, m, a, lda, u, ldu )
2125*
2126 ELSE IF( wntvo .AND. wntun ) THEN
2127*
2128* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2129* M right singular vectors to be overwritten on A and
2130* no left singular vectors to be computed
2131*
2132 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2133*
2134* Sufficient workspace for a fast algorithm
2135*
2136 ir = 1
2137 IF( lwork.GE.max( wrkbl, lda*n + m ) + lda*m ) THEN
2138*
2139* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2140*
2141 ldwrku = lda
2142 chunk = n
2143 ldwrkr = lda
2144 ELSE IF( lwork.GE.max( wrkbl, lda*n + m ) + m*m ) THEN
2145*
2146* WORK(IU) is LDA by N and WORK(IR) is M by M
2147*
2148 ldwrku = lda
2149 chunk = n
2150 ldwrkr = m
2151 ELSE
2152*
2153* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2154*
2155 ldwrku = m
2156 chunk = ( lwork-m*m-m ) / m
2157 ldwrkr = m
2158 END IF
2159 itau = ir + ldwrkr*m
2160 iwork = itau + m
2161*
2162* Compute A=L*Q
2163* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2164*
2165 CALL dgelqf( m, n, a, lda, work( itau ),
2166 $ work( iwork ), lwork-iwork+1, ierr )
2167*
2168* Copy L to WORK(IR) and zero out above it
2169*
2170 CALL dlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2171 CALL dlaset( 'U', m-1, m-1, zero, zero,
2172 $ work( ir+ldwrkr ), ldwrkr )
2173*
2174* Generate Q in A
2175* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2176*
2177 CALL dorglq( m, n, m, a, lda, work( itau ),
2178 $ work( iwork ), lwork-iwork+1, ierr )
2179 ie = itau
2180 itauq = ie + m
2181 itaup = itauq + m
2182 iwork = itaup + m
2183*
2184* Bidiagonalize L in WORK(IR)
2185* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2186*
2187 CALL dgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),
2188 $ work( itauq ), work( itaup ),
2189 $ work( iwork ), lwork-iwork+1, ierr )
2190*
2191* Generate right vectors bidiagonalizing L
2192* (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
2193*
2194 CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2195 $ work( itaup ), work( iwork ),
2196 $ lwork-iwork+1, ierr )
2197 iwork = ie + m
2198*
2199* Perform bidiagonal QR iteration, computing right
2200* singular vectors of L in WORK(IR)
2201* (Workspace: need M*M + BDSPAC)
2202*
2203 CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2204 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2205 $ work( iwork ), info )
2206 iu = ie + m
2207*
2208* Multiply right singular vectors of L in WORK(IR) by Q
2209* in A, storing result in WORK(IU) and copying to A
2210* (Workspace: need M*M + 2*M, prefer M*M + M*N + M)
2211*
2212 DO 30 i = 1, n, chunk
2213 blk = min( n-i+1, chunk )
2214 CALL dgemm( 'N', 'N', m, blk, m, one, work( ir ),
2215 $ ldwrkr, a( 1, i ), lda, zero,
2216 $ work( iu ), ldwrku )
2217 CALL dlacpy( 'F', m, blk, work( iu ), ldwrku,
2218 $ a( 1, i ), lda )
2219 30 CONTINUE
2220*
2221 ELSE
2222*
2223* Insufficient workspace for a fast algorithm
2224*
2225 ie = 1
2226 itauq = ie + m
2227 itaup = itauq + m
2228 iwork = itaup + m
2229*
2230* Bidiagonalize A
2231* (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
2232*
2233 CALL dgebrd( m, n, a, lda, s, work( ie ),
2234 $ work( itauq ), work( itaup ),
2235 $ work( iwork ), lwork-iwork+1, ierr )
2236*
2237* Generate right vectors bidiagonalizing A
2238* (Workspace: need 4*M, prefer 3*M + M*NB)
2239*
2240 CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),
2241 $ work( iwork ), lwork-iwork+1, ierr )
2242 iwork = ie + m
2243*
2244* Perform bidiagonal QR iteration, computing right
2245* singular vectors of A in A
2246* (Workspace: need BDSPAC)
2247*
2248 CALL dbdsqr( 'L', m, n, 0, 0, s, work( ie ), a, lda,
2249 $ dum, 1, dum, 1, work( iwork ), info )
2250*
2251 END IF
2252*
2253 ELSE IF( wntvo .AND. wntuas ) THEN
2254*
2255* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2256* M right singular vectors to be overwritten on A and
2257* M left singular vectors to be computed in U
2258*
2259 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2260*
2261* Sufficient workspace for a fast algorithm
2262*
2263 ir = 1
2264 IF( lwork.GE.max( wrkbl, lda*n + m ) + lda*m ) THEN
2265*
2266* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2267*
2268 ldwrku = lda
2269 chunk = n
2270 ldwrkr = lda
2271 ELSE IF( lwork.GE.max( wrkbl, lda*n + m ) + m*m ) THEN
2272*
2273* WORK(IU) is LDA by N and WORK(IR) is M by M
2274*
2275 ldwrku = lda
2276 chunk = n
2277 ldwrkr = m
2278 ELSE
2279*
2280* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2281*
2282 ldwrku = m
2283 chunk = ( lwork-m*m-m ) / m
2284 ldwrkr = m
2285 END IF
2286 itau = ir + ldwrkr*m
2287 iwork = itau + m
2288*
2289* Compute A=L*Q
2290* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2291*
2292 CALL dgelqf( m, n, a, lda, work( itau ),
2293 $ work( iwork ), lwork-iwork+1, ierr )
2294*
2295* Copy L to U, zeroing about above it
2296*
2297 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2298 CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2299 $ ldu )
2300*
2301* Generate Q in A
2302* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2303*
2304 CALL dorglq( m, n, m, a, lda, work( itau ),
2305 $ work( iwork ), lwork-iwork+1, ierr )
2306 ie = itau
2307 itauq = ie + m
2308 itaup = itauq + m
2309 iwork = itaup + m
2310*
2311* Bidiagonalize L in U, copying result to WORK(IR)
2312* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2313*
2314 CALL dgebrd( m, m, u, ldu, s, work( ie ),
2315 $ work( itauq ), work( itaup ),
2316 $ work( iwork ), lwork-iwork+1, ierr )
2317 CALL dlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2318*
2319* Generate right vectors bidiagonalizing L in WORK(IR)
2320* (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
2321*
2322 CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2323 $ work( itaup ), work( iwork ),
2324 $ lwork-iwork+1, ierr )
2325*
2326* Generate left vectors bidiagonalizing L in U
2327* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
2328*
2329 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2330 $ work( iwork ), lwork-iwork+1, ierr )
2331 iwork = ie + m
2332*
2333* Perform bidiagonal QR iteration, computing left
2334* singular vectors of L in U, and computing right
2335* singular vectors of L in WORK(IR)
2336* (Workspace: need M*M + BDSPAC)
2337*
2338 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2339 $ work( ir ), ldwrkr, u, ldu, dum, 1,
2340 $ work( iwork ), info )
2341 iu = ie + m
2342*
2343* Multiply right singular vectors of L in WORK(IR) by Q
2344* in A, storing result in WORK(IU) and copying to A
2345* (Workspace: need M*M + 2*M, prefer M*M + M*N + M))
2346*
2347 DO 40 i = 1, n, chunk
2348 blk = min( n-i+1, chunk )
2349 CALL dgemm( 'N', 'N', m, blk, m, one, work( ir ),
2350 $ ldwrkr, a( 1, i ), lda, zero,
2351 $ work( iu ), ldwrku )
2352 CALL dlacpy( 'F', m, blk, work( iu ), ldwrku,
2353 $ a( 1, i ), lda )
2354 40 CONTINUE
2355*
2356 ELSE
2357*
2358* Insufficient workspace for a fast algorithm
2359*
2360 itau = 1
2361 iwork = itau + m
2362*
2363* Compute A=L*Q
2364* (Workspace: need 2*M, prefer M + M*NB)
2365*
2366 CALL dgelqf( m, n, a, lda, work( itau ),
2367 $ work( iwork ), lwork-iwork+1, ierr )
2368*
2369* Copy L to U, zeroing out above it
2370*
2371 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2372 CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2373 $ ldu )
2374*
2375* Generate Q in A
2376* (Workspace: need 2*M, prefer M + M*NB)
2377*
2378 CALL dorglq( m, n, m, a, lda, work( itau ),
2379 $ work( iwork ), lwork-iwork+1, ierr )
2380 ie = itau
2381 itauq = ie + m
2382 itaup = itauq + m
2383 iwork = itaup + m
2384*
2385* Bidiagonalize L in U
2386* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2387*
2388 CALL dgebrd( m, m, u, ldu, s, work( ie ),
2389 $ work( itauq ), work( itaup ),
2390 $ work( iwork ), lwork-iwork+1, ierr )
2391*
2392* Multiply right vectors bidiagonalizing L by Q in A
2393* (Workspace: need 3*M + N, prefer 3*M + N*NB)
2394*
2395 CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2396 $ work( itaup ), a, lda, work( iwork ),
2397 $ lwork-iwork+1, ierr )
2398*
2399* Generate left vectors bidiagonalizing L in U
2400* (Workspace: need 4*M, prefer 3*M + M*NB)
2401*
2402 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2403 $ work( iwork ), lwork-iwork+1, ierr )
2404 iwork = ie + m
2405*
2406* Perform bidiagonal QR iteration, computing left
2407* singular vectors of A in U and computing right
2408* singular vectors of A in A
2409* (Workspace: need BDSPAC)
2410*
2411 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), a, lda,
2412 $ u, ldu, dum, 1, work( iwork ), info )
2413*
2414 END IF
2415*
2416 ELSE IF( wntvs ) THEN
2417*
2418 IF( wntun ) THEN
2419*
2420* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2421* M right singular vectors to be computed in VT and
2422* no left singular vectors to be computed
2423*
2424 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2425*
2426* Sufficient workspace for a fast algorithm
2427*
2428 ir = 1
2429 IF( lwork.GE.wrkbl+lda*m ) THEN
2430*
2431* WORK(IR) is LDA by M
2432*
2433 ldwrkr = lda
2434 ELSE
2435*
2436* WORK(IR) is M by M
2437*
2438 ldwrkr = m
2439 END IF
2440 itau = ir + ldwrkr*m
2441 iwork = itau + m
2442*
2443* Compute A=L*Q
2444* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2445*
2446 CALL dgelqf( m, n, a, lda, work( itau ),
2447 $ work( iwork ), lwork-iwork+1, ierr )
2448*
2449* Copy L to WORK(IR), zeroing out above it
2450*
2451 CALL dlacpy( 'L', m, m, a, lda, work( ir ),
2452 $ ldwrkr )
2453 CALL dlaset( 'U', m-1, m-1, zero, zero,
2454 $ work( ir+ldwrkr ), ldwrkr )
2455*
2456* Generate Q in A
2457* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2458*
2459 CALL dorglq( m, n, m, a, lda, work( itau ),
2460 $ work( iwork ), lwork-iwork+1, ierr )
2461 ie = itau
2462 itauq = ie + m
2463 itaup = itauq + m
2464 iwork = itaup + m
2465*
2466* Bidiagonalize L in WORK(IR)
2467* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2468*
2469 CALL dgebrd( m, m, work( ir ), ldwrkr, s,
2470 $ work( ie ), work( itauq ),
2471 $ work( itaup ), work( iwork ),
2472 $ lwork-iwork+1, ierr )
2473*
2474* Generate right vectors bidiagonalizing L in
2475* WORK(IR)
2476* (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
2477*
2478 CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2479 $ work( itaup ), work( iwork ),
2480 $ lwork-iwork+1, ierr )
2481 iwork = ie + m
2482*
2483* Perform bidiagonal QR iteration, computing right
2484* singular vectors of L in WORK(IR)
2485* (Workspace: need M*M + BDSPAC)
2486*
2487 CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2488 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2489 $ work( iwork ), info )
2490*
2491* Multiply right singular vectors of L in WORK(IR) by
2492* Q in A, storing result in VT
2493* (Workspace: need M*M)
2494*
2495 CALL dgemm( 'N', 'N', m, n, m, one, work( ir ),
2496 $ ldwrkr, a, lda, zero, vt, ldvt )
2497*
2498 ELSE
2499*
2500* Insufficient workspace for a fast algorithm
2501*
2502 itau = 1
2503 iwork = itau + m
2504*
2505* Compute A=L*Q
2506* (Workspace: need 2*M, prefer M + M*NB)
2507*
2508 CALL dgelqf( m, n, a, lda, work( itau ),
2509 $ work( iwork ), lwork-iwork+1, ierr )
2510*
2511* Copy result to VT
2512*
2513 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2514*
2515* Generate Q in VT
2516* (Workspace: need 2*M, prefer M + M*NB)
2517*
2518 CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2519 $ work( iwork ), lwork-iwork+1, ierr )
2520 ie = itau
2521 itauq = ie + m
2522 itaup = itauq + m
2523 iwork = itaup + m
2524*
2525* Zero out above L in A
2526*
2527 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2528 $ lda )
2529*
2530* Bidiagonalize L in A
2531* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2532*
2533 CALL dgebrd( m, m, a, lda, s, work( ie ),
2534 $ work( itauq ), work( itaup ),
2535 $ work( iwork ), lwork-iwork+1, ierr )
2536*
2537* Multiply right vectors bidiagonalizing L by Q in VT
2538* (Workspace: need 3*M + N, prefer 3*M + N*NB)
2539*
2540 CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
2541 $ work( itaup ), vt, ldvt,
2542 $ work( iwork ), lwork-iwork+1, ierr )
2543 iwork = ie + m
2544*
2545* Perform bidiagonal QR iteration, computing right
2546* singular vectors of A in VT
2547* (Workspace: need BDSPAC)
2548*
2549 CALL dbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
2550 $ ldvt, dum, 1, dum, 1, work( iwork ),
2551 $ info )
2552*
2553 END IF
2554*
2555 ELSE IF( wntuo ) THEN
2556*
2557* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2558* M right singular vectors to be computed in VT and
2559* M left singular vectors to be overwritten on A
2560*
2561 IF( lwork.GE.2*m*m+max( 4*m, bdspac ) ) THEN
2562*
2563* Sufficient workspace for a fast algorithm
2564*
2565 iu = 1
2566 IF( lwork.GE.wrkbl+2*lda*m ) THEN
2567*
2568* WORK(IU) is LDA by M and WORK(IR) is LDA by M
2569*
2570 ldwrku = lda
2571 ir = iu + ldwrku*m
2572 ldwrkr = lda
2573 ELSE IF( lwork.GE.wrkbl+( lda + m )*m ) THEN
2574*
2575* WORK(IU) is LDA by M and WORK(IR) is M by M
2576*
2577 ldwrku = lda
2578 ir = iu + ldwrku*m
2579 ldwrkr = m
2580 ELSE
2581*
2582* WORK(IU) is M by M and WORK(IR) is M by M
2583*
2584 ldwrku = m
2585 ir = iu + ldwrku*m
2586 ldwrkr = m
2587 END IF
2588 itau = ir + ldwrkr*m
2589 iwork = itau + m
2590*
2591* Compute A=L*Q
2592* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
2593*
2594 CALL dgelqf( m, n, a, lda, work( itau ),
2595 $ work( iwork ), lwork-iwork+1, ierr )
2596*
2597* Copy L to WORK(IU), zeroing out below it
2598*
2599 CALL dlacpy( 'L', m, m, a, lda, work( iu ),
2600 $ ldwrku )
2601 CALL dlaset( 'U', m-1, m-1, zero, zero,
2602 $ work( iu+ldwrku ), ldwrku )
2603*
2604* Generate Q in A
2605* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
2606*
2607 CALL dorglq( m, n, m, a, lda, work( itau ),
2608 $ work( iwork ), lwork-iwork+1, ierr )
2609 ie = itau
2610 itauq = ie + m
2611 itaup = itauq + m
2612 iwork = itaup + m
2613*
2614* Bidiagonalize L in WORK(IU), copying result to
2615* WORK(IR)
2616* (Workspace: need 2*M*M + 4*M,
2617* prefer 2*M*M+3*M+2*M*NB)
2618*
2619 CALL dgebrd( m, m, work( iu ), ldwrku, s,
2620 $ work( ie ), work( itauq ),
2621 $ work( itaup ), work( iwork ),
2622 $ lwork-iwork+1, ierr )
2623 CALL dlacpy( 'L', m, m, work( iu ), ldwrku,
2624 $ work( ir ), ldwrkr )
2625*
2626* Generate right bidiagonalizing vectors in WORK(IU)
2627* (Workspace: need 2*M*M + 4*M-1,
2628* prefer 2*M*M+3*M+(M-1)*NB)
2629*
2630 CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
2631 $ work( itaup ), work( iwork ),
2632 $ lwork-iwork+1, ierr )
2633*
2634* Generate left bidiagonalizing vectors in WORK(IR)
2635* (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
2636*
2637 CALL dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
2638 $ work( itauq ), work( iwork ),
2639 $ lwork-iwork+1, ierr )
2640 iwork = ie + m
2641*
2642* Perform bidiagonal QR iteration, computing left
2643* singular vectors of L in WORK(IR) and computing
2644* right singular vectors of L in WORK(IU)
2645* (Workspace: need 2*M*M + BDSPAC)
2646*
2647 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2648 $ work( iu ), ldwrku, work( ir ),
2649 $ ldwrkr, dum, 1, work( iwork ), info )
2650*
2651* Multiply right singular vectors of L in WORK(IU) by
2652* Q in A, storing result in VT
2653* (Workspace: need M*M)
2654*
2655 CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
2656 $ ldwrku, a, lda, zero, vt, ldvt )
2657*
2658* Copy left singular vectors of L to A
2659* (Workspace: need M*M)
2660*
2661 CALL dlacpy( 'F', m, m, work( ir ), ldwrkr, a,
2662 $ lda )
2663*
2664 ELSE
2665*
2666* Insufficient workspace for a fast algorithm
2667*
2668 itau = 1
2669 iwork = itau + m
2670*
2671* Compute A=L*Q, copying result to VT
2672* (Workspace: need 2*M, prefer M + M*NB)
2673*
2674 CALL dgelqf( m, n, a, lda, work( itau ),
2675 $ work( iwork ), lwork-iwork+1, ierr )
2676 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2677*
2678* Generate Q in VT
2679* (Workspace: need 2*M, prefer M + M*NB)
2680*
2681 CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2682 $ work( iwork ), lwork-iwork+1, ierr )
2683 ie = itau
2684 itauq = ie + m
2685 itaup = itauq + m
2686 iwork = itaup + m
2687*
2688* Zero out above L in A
2689*
2690 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2691 $ lda )
2692*
2693* Bidiagonalize L in A
2694* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2695*
2696 CALL dgebrd( m, m, a, lda, s, work( ie ),
2697 $ work( itauq ), work( itaup ),
2698 $ work( iwork ), lwork-iwork+1, ierr )
2699*
2700* Multiply right vectors bidiagonalizing L by Q in VT
2701* (Workspace: need 3*M + N, prefer 3*M + N*NB)
2702*
2703 CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
2704 $ work( itaup ), vt, ldvt,
2705 $ work( iwork ), lwork-iwork+1, ierr )
2706*
2707* Generate left bidiagonalizing vectors of L in A
2708* (Workspace: need 4*M, prefer 3*M + M*NB)
2709*
2710 CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2711 $ work( iwork ), lwork-iwork+1, ierr )
2712 iwork = ie + m
2713*
2714* Perform bidiagonal QR iteration, compute left
2715* singular vectors of A in A and compute right
2716* singular vectors of A in VT
2717* (Workspace: need BDSPAC)
2718*
2719 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2720 $ ldvt, a, lda, dum, 1, work( iwork ),
2721 $ info )
2722*
2723 END IF
2724*
2725 ELSE IF( wntuas ) THEN
2726*
2727* Path 6t(N much larger than M, JOBU='S' or 'A',
2728* JOBVT='S')
2729* M right singular vectors to be computed in VT and
2730* M left singular vectors to be computed in U
2731*
2732 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2733*
2734* Sufficient workspace for a fast algorithm
2735*
2736 iu = 1
2737 IF( lwork.GE.wrkbl+lda*m ) THEN
2738*
2739* WORK(IU) is LDA by N
2740*
2741 ldwrku = lda
2742 ELSE
2743*
2744* WORK(IU) is LDA by M
2745*
2746 ldwrku = m
2747 END IF
2748 itau = iu + ldwrku*m
2749 iwork = itau + m
2750*
2751* Compute A=L*Q
2752* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2753*
2754 CALL dgelqf( m, n, a, lda, work( itau ),
2755 $ work( iwork ), lwork-iwork+1, ierr )
2756*
2757* Copy L to WORK(IU), zeroing out above it
2758*
2759 CALL dlacpy( 'L', m, m, a, lda, work( iu ),
2760 $ ldwrku )
2761 CALL dlaset( 'U', m-1, m-1, zero, zero,
2762 $ work( iu+ldwrku ), ldwrku )
2763*
2764* Generate Q in A
2765* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2766*
2767 CALL dorglq( m, n, m, a, lda, work( itau ),
2768 $ work( iwork ), lwork-iwork+1, ierr )
2769 ie = itau
2770 itauq = ie + m
2771 itaup = itauq + m
2772 iwork = itaup + m
2773*
2774* Bidiagonalize L in WORK(IU), copying result to U
2775* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2776*
2777 CALL dgebrd( m, m, work( iu ), ldwrku, s,
2778 $ work( ie ), work( itauq ),
2779 $ work( itaup ), work( iwork ),
2780 $ lwork-iwork+1, ierr )
2781 CALL dlacpy( 'L', m, m, work( iu ), ldwrku, u,
2782 $ ldu )
2783*
2784* Generate right bidiagonalizing vectors in WORK(IU)
2785* (Workspace: need M*M + 4*M-1,
2786* prefer M*M+3*M+(M-1)*NB)
2787*
2788 CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
2789 $ work( itaup ), work( iwork ),
2790 $ lwork-iwork+1, ierr )
2791*
2792* Generate left bidiagonalizing vectors in U
2793* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
2794*
2795 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2796 $ work( iwork ), lwork-iwork+1, ierr )
2797 iwork = ie + m
2798*
2799* Perform bidiagonal QR iteration, computing left
2800* singular vectors of L in U and computing right
2801* singular vectors of L in WORK(IU)
2802* (Workspace: need M*M + BDSPAC)
2803*
2804 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2805 $ work( iu ), ldwrku, u, ldu, dum, 1,
2806 $ work( iwork ), info )
2807*
2808* Multiply right singular vectors of L in WORK(IU) by
2809* Q in A, storing result in VT
2810* (Workspace: need M*M)
2811*
2812 CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
2813 $ ldwrku, a, lda, zero, vt, ldvt )
2814*
2815 ELSE
2816*
2817* Insufficient workspace for a fast algorithm
2818*
2819 itau = 1
2820 iwork = itau + m
2821*
2822* Compute A=L*Q, copying result to VT
2823* (Workspace: need 2*M, prefer M + M*NB)
2824*
2825 CALL dgelqf( m, n, a, lda, work( itau ),
2826 $ work( iwork ), lwork-iwork+1, ierr )
2827 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2828*
2829* Generate Q in VT
2830* (Workspace: need 2*M, prefer M + M*NB)
2831*
2832 CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2833 $ work( iwork ), lwork-iwork+1, ierr )
2834*
2835* Copy L to U, zeroing out above it
2836*
2837 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2838 CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2839 $ ldu )
2840 ie = itau
2841 itauq = ie + m
2842 itaup = itauq + m
2843 iwork = itaup + m
2844*
2845* Bidiagonalize L in U
2846* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2847*
2848 CALL dgebrd( m, m, u, ldu, s, work( ie ),
2849 $ work( itauq ), work( itaup ),
2850 $ work( iwork ), lwork-iwork+1, ierr )
2851*
2852* Multiply right bidiagonalizing vectors in U by Q
2853* in VT
2854* (Workspace: need 3*M + N, prefer 3*M + N*NB)
2855*
2856 CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2857 $ work( itaup ), vt, ldvt,
2858 $ work( iwork ), lwork-iwork+1, ierr )
2859*
2860* Generate left bidiagonalizing vectors in U
2861* (Workspace: need 4*M, prefer 3*M + M*NB)
2862*
2863 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2864 $ work( iwork ), lwork-iwork+1, ierr )
2865 iwork = ie + m
2866*
2867* Perform bidiagonal QR iteration, computing left
2868* singular vectors of A in U and computing right
2869* singular vectors of A in VT
2870* (Workspace: need BDSPAC)
2871*
2872 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2873 $ ldvt, u, ldu, dum, 1, work( iwork ),
2874 $ info )
2875*
2876 END IF
2877*
2878 END IF
2879*
2880 ELSE IF( wntva ) THEN
2881*
2882 IF( wntun ) THEN
2883*
2884* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2885* N right singular vectors to be computed in VT and
2886* no left singular vectors to be computed
2887*
2888 IF( lwork.GE.m*m+max( n + m, 4*m, bdspac ) ) THEN
2889*
2890* Sufficient workspace for a fast algorithm
2891*
2892 ir = 1
2893 IF( lwork.GE.wrkbl+lda*m ) THEN
2894*
2895* WORK(IR) is LDA by M
2896*
2897 ldwrkr = lda
2898 ELSE
2899*
2900* WORK(IR) is M by M
2901*
2902 ldwrkr = m
2903 END IF
2904 itau = ir + ldwrkr*m
2905 iwork = itau + m
2906*
2907* Compute A=L*Q, copying result to VT
2908* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2909*
2910 CALL dgelqf( m, n, a, lda, work( itau ),
2911 $ work( iwork ), lwork-iwork+1, ierr )
2912 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2913*
2914* Copy L to WORK(IR), zeroing out above it
2915*
2916 CALL dlacpy( 'L', m, m, a, lda, work( ir ),
2917 $ ldwrkr )
2918 CALL dlaset( 'U', m-1, m-1, zero, zero,
2919 $ work( ir+ldwrkr ), ldwrkr )
2920*
2921* Generate Q in VT
2922* (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
2923*
2924 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
2925 $ work( iwork ), lwork-iwork+1, ierr )
2926 ie = itau
2927 itauq = ie + m
2928 itaup = itauq + m
2929 iwork = itaup + m
2930*
2931* Bidiagonalize L in WORK(IR)
2932* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2933*
2934 CALL dgebrd( m, m, work( ir ), ldwrkr, s,
2935 $ work( ie ), work( itauq ),
2936 $ work( itaup ), work( iwork ),
2937 $ lwork-iwork+1, ierr )
2938*
2939* Generate right bidiagonalizing vectors in WORK(IR)
2940* (Workspace: need M*M + 4*M-1,
2941* prefer M*M+3*M+(M-1)*NB)
2942*
2943 CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2944 $ work( itaup ), work( iwork ),
2945 $ lwork-iwork+1, ierr )
2946 iwork = ie + m
2947*
2948* Perform bidiagonal QR iteration, computing right
2949* singular vectors of L in WORK(IR)
2950* (Workspace: need M*M + BDSPAC)
2951*
2952 CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2953 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2954 $ work( iwork ), info )
2955*
2956* Multiply right singular vectors of L in WORK(IR) by
2957* Q in VT, storing result in A
2958* (Workspace: need M*M)
2959*
2960 CALL dgemm( 'N', 'N', m, n, m, one, work( ir ),
2961 $ ldwrkr, vt, ldvt, zero, a, lda )
2962*
2963* Copy right singular vectors of A from A to VT
2964*
2965 CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
2966*
2967 ELSE
2968*
2969* Insufficient workspace for a fast algorithm
2970*
2971 itau = 1
2972 iwork = itau + m
2973*
2974* Compute A=L*Q, copying result to VT
2975* (Workspace: need 2*M, prefer M + M*NB)
2976*
2977 CALL dgelqf( m, n, a, lda, work( itau ),
2978 $ work( iwork ), lwork-iwork+1, ierr )
2979 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2980*
2981* Generate Q in VT
2982* (Workspace: need M + N, prefer M + N*NB)
2983*
2984 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
2985 $ work( iwork ), lwork-iwork+1, ierr )
2986 ie = itau
2987 itauq = ie + m
2988 itaup = itauq + m
2989 iwork = itaup + m
2990*
2991* Zero out above L in A
2992*
2993 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2994 $ lda )
2995*
2996* Bidiagonalize L in A
2997* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2998*
2999 CALL dgebrd( m, m, a, lda, s, work( ie ),
3000 $ work( itauq ), work( itaup ),
3001 $ work( iwork ), lwork-iwork+1, ierr )
3002*
3003* Multiply right bidiagonalizing vectors in A by Q
3004* in VT
3005* (Workspace: need 3*M + N, prefer 3*M + N*NB)
3006*
3007 CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
3008 $ work( itaup ), vt, ldvt,
3009 $ work( iwork ), lwork-iwork+1, ierr )
3010 iwork = ie + m
3011*
3012* Perform bidiagonal QR iteration, computing right
3013* singular vectors of A in VT
3014* (Workspace: need BDSPAC)
3015*
3016 CALL dbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
3017 $ ldvt, dum, 1, dum, 1, work( iwork ),
3018 $ info )
3019*
3020 END IF
3021*
3022 ELSE IF( wntuo ) THEN
3023*
3024* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3025* N right singular vectors to be computed in VT and
3026* M left singular vectors to be overwritten on A
3027*
3028 IF( lwork.GE.2*m*m+max( n + m, 4*m, bdspac ) ) THEN
3029*
3030* Sufficient workspace for a fast algorithm
3031*
3032 iu = 1
3033 IF( lwork.GE.wrkbl+2*lda*m ) THEN
3034*
3035* WORK(IU) is LDA by M and WORK(IR) is LDA by M
3036*
3037 ldwrku = lda
3038 ir = iu + ldwrku*m
3039 ldwrkr = lda
3040 ELSE IF( lwork.GE.wrkbl+( lda + m )*m ) THEN
3041*
3042* WORK(IU) is LDA by M and WORK(IR) is M by M
3043*
3044 ldwrku = lda
3045 ir = iu + ldwrku*m
3046 ldwrkr = m
3047 ELSE
3048*
3049* WORK(IU) is M by M and WORK(IR) is M by M
3050*
3051 ldwrku = m
3052 ir = iu + ldwrku*m
3053 ldwrkr = m
3054 END IF
3055 itau = ir + ldwrkr*m
3056 iwork = itau + m
3057*
3058* Compute A=L*Q, copying result to VT
3059* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
3060*
3061 CALL dgelqf( m, n, a, lda, work( itau ),
3062 $ work( iwork ), lwork-iwork+1, ierr )
3063 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3064*
3065* Generate Q in VT
3066* (Workspace: need 2*M*M + M + N, prefer 2*M*M + M + N*NB)
3067*
3068 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3069 $ work( iwork ), lwork-iwork+1, ierr )
3070*
3071* Copy L to WORK(IU), zeroing out above it
3072*
3073 CALL dlacpy( 'L', m, m, a, lda, work( iu ),
3074 $ ldwrku )
3075 CALL dlaset( 'U', m-1, m-1, zero, zero,
3076 $ work( iu+ldwrku ), ldwrku )
3077 ie = itau
3078 itauq = ie + m
3079 itaup = itauq + m
3080 iwork = itaup + m
3081*
3082* Bidiagonalize L in WORK(IU), copying result to
3083* WORK(IR)
3084* (Workspace: need 2*M*M + 4*M,
3085* prefer 2*M*M+3*M+2*M*NB)
3086*
3087 CALL dgebrd( m, m, work( iu ), ldwrku, s,
3088 $ work( ie ), work( itauq ),
3089 $ work( itaup ), work( iwork ),
3090 $ lwork-iwork+1, ierr )
3091 CALL dlacpy( 'L', m, m, work( iu ), ldwrku,
3092 $ work( ir ), ldwrkr )
3093*
3094* Generate right bidiagonalizing vectors in WORK(IU)
3095* (Workspace: need 2*M*M + 4*M-1,
3096* prefer 2*M*M+3*M+(M-1)*NB)
3097*
3098 CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
3099 $ work( itaup ), work( iwork ),
3100 $ lwork-iwork+1, ierr )
3101*
3102* Generate left bidiagonalizing vectors in WORK(IR)
3103* (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
3104*
3105 CALL dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
3106 $ work( itauq ), work( iwork ),
3107 $ lwork-iwork+1, ierr )
3108 iwork = ie + m
3109*
3110* Perform bidiagonal QR iteration, computing left
3111* singular vectors of L in WORK(IR) and computing
3112* right singular vectors of L in WORK(IU)
3113* (Workspace: need 2*M*M + BDSPAC)
3114*
3115 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
3116 $ work( iu ), ldwrku, work( ir ),
3117 $ ldwrkr, dum, 1, work( iwork ), info )
3118*
3119* Multiply right singular vectors of L in WORK(IU) by
3120* Q in VT, storing result in A
3121* (Workspace: need M*M)
3122*
3123 CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
3124 $ ldwrku, vt, ldvt, zero, a, lda )
3125*
3126* Copy right singular vectors of A from A to VT
3127*
3128 CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
3129*
3130* Copy left singular vectors of A from WORK(IR) to A
3131*
3132 CALL dlacpy( 'F', m, m, work( ir ), ldwrkr, a,
3133 $ lda )
3134*
3135 ELSE
3136*
3137* Insufficient workspace for a fast algorithm
3138*
3139 itau = 1
3140 iwork = itau + m
3141*
3142* Compute A=L*Q, copying result to VT
3143* (Workspace: need 2*M, prefer M + M*NB)
3144*
3145 CALL dgelqf( m, n, a, lda, work( itau ),
3146 $ work( iwork ), lwork-iwork+1, ierr )
3147 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3148*
3149* Generate Q in VT
3150* (Workspace: need M + N, prefer M + N*NB)
3151*
3152 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3153 $ work( iwork ), lwork-iwork+1, ierr )
3154 ie = itau
3155 itauq = ie + m
3156 itaup = itauq + m
3157 iwork = itaup + m
3158*
3159* Zero out above L in A
3160*
3161 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
3162 $ lda )
3163*
3164* Bidiagonalize L in A
3165* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3166*
3167 CALL dgebrd( m, m, a, lda, s, work( ie ),
3168 $ work( itauq ), work( itaup ),
3169 $ work( iwork ), lwork-iwork+1, ierr )
3170*
3171* Multiply right bidiagonalizing vectors in A by Q
3172* in VT
3173* (Workspace: need 3*M + N, prefer 3*M + N*NB)
3174*
3175 CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
3176 $ work( itaup ), vt, ldvt,
3177 $ work( iwork ), lwork-iwork+1, ierr )
3178*
3179* Generate left bidiagonalizing vectors in A
3180* (Workspace: need 4*M, prefer 3*M + M*NB)
3181*
3182 CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
3183 $ work( iwork ), lwork-iwork+1, ierr )
3184 iwork = ie + m
3185*
3186* Perform bidiagonal QR iteration, computing left
3187* singular vectors of A in A and computing right
3188* singular vectors of A in VT
3189* (Workspace: need BDSPAC)
3190*
3191 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3192 $ ldvt, a, lda, dum, 1, work( iwork ),
3193 $ info )
3194*
3195 END IF
3196*
3197 ELSE IF( wntuas ) THEN
3198*
3199* Path 9t(N much larger than M, JOBU='S' or 'A',
3200* JOBVT='A')
3201* N right singular vectors to be computed in VT and
3202* M left singular vectors to be computed in U
3203*
3204 IF( lwork.GE.m*m+max( n + m, 4*m, bdspac ) ) THEN
3205*
3206* Sufficient workspace for a fast algorithm
3207*
3208 iu = 1
3209 IF( lwork.GE.wrkbl+lda*m ) THEN
3210*
3211* WORK(IU) is LDA by M
3212*
3213 ldwrku = lda
3214 ELSE
3215*
3216* WORK(IU) is M by M
3217*
3218 ldwrku = m
3219 END IF
3220 itau = iu + ldwrku*m
3221 iwork = itau + m
3222*
3223* Compute A=L*Q, copying result to VT
3224* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
3225*
3226 CALL dgelqf( m, n, a, lda, work( itau ),
3227 $ work( iwork ), lwork-iwork+1, ierr )
3228 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3229*
3230* Generate Q in VT
3231* (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
3232*
3233 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3234 $ work( iwork ), lwork-iwork+1, ierr )
3235*
3236* Copy L to WORK(IU), zeroing out above it
3237*
3238 CALL dlacpy( 'L', m, m, a, lda, work( iu ),
3239 $ ldwrku )
3240 CALL dlaset( 'U', m-1, m-1, zero, zero,
3241 $ work( iu+ldwrku ), ldwrku )
3242 ie = itau
3243 itauq = ie + m
3244 itaup = itauq + m
3245 iwork = itaup + m
3246*
3247* Bidiagonalize L in WORK(IU), copying result to U
3248* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
3249*
3250 CALL dgebrd( m, m, work( iu ), ldwrku, s,
3251 $ work( ie ), work( itauq ),
3252 $ work( itaup ), work( iwork ),
3253 $ lwork-iwork+1, ierr )
3254 CALL dlacpy( 'L', m, m, work( iu ), ldwrku, u,
3255 $ ldu )
3256*
3257* Generate right bidiagonalizing vectors in WORK(IU)
3258* (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
3259*
3260 CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
3261 $ work( itaup ), work( iwork ),
3262 $ lwork-iwork+1, ierr )
3263*
3264* Generate left bidiagonalizing vectors in U
3265* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
3266*
3267 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3268 $ work( iwork ), lwork-iwork+1, ierr )
3269 iwork = ie + m
3270*
3271* Perform bidiagonal QR iteration, computing left
3272* singular vectors of L in U and computing right
3273* singular vectors of L in WORK(IU)
3274* (Workspace: need M*M + BDSPAC)
3275*
3276 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
3277 $ work( iu ), ldwrku, u, ldu, dum, 1,
3278 $ work( iwork ), info )
3279*
3280* Multiply right singular vectors of L in WORK(IU) by
3281* Q in VT, storing result in A
3282* (Workspace: need M*M)
3283*
3284 CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
3285 $ ldwrku, vt, ldvt, zero, a, lda )
3286*
3287* Copy right singular vectors of A from A to VT
3288*
3289 CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
3290*
3291 ELSE
3292*
3293* Insufficient workspace for a fast algorithm
3294*
3295 itau = 1
3296 iwork = itau + m
3297*
3298* Compute A=L*Q, copying result to VT
3299* (Workspace: need 2*M, prefer M + M*NB)
3300*
3301 CALL dgelqf( m, n, a, lda, work( itau ),
3302 $ work( iwork ), lwork-iwork+1, ierr )
3303 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3304*
3305* Generate Q in VT
3306* (Workspace: need M + N, prefer M + N*NB)
3307*
3308 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3309 $ work( iwork ), lwork-iwork+1, ierr )
3310*
3311* Copy L to U, zeroing out above it
3312*
3313 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
3314 CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
3315 $ ldu )
3316 ie = itau
3317 itauq = ie + m
3318 itaup = itauq + m
3319 iwork = itaup + m
3320*
3321* Bidiagonalize L in U
3322* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3323*
3324 CALL dgebrd( m, m, u, ldu, s, work( ie ),
3325 $ work( itauq ), work( itaup ),
3326 $ work( iwork ), lwork-iwork+1, ierr )
3327*
3328* Multiply right bidiagonalizing vectors in U by Q
3329* in VT
3330* (Workspace: need 3*M + N, prefer 3*M + N*NB)
3331*
3332 CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
3333 $ work( itaup ), vt, ldvt,
3334 $ work( iwork ), lwork-iwork+1, ierr )
3335*
3336* Generate left bidiagonalizing vectors in U
3337* (Workspace: need 4*M, prefer 3*M + M*NB)
3338*
3339 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3340 $ work( iwork ), lwork-iwork+1, ierr )
3341 iwork = ie + m
3342*
3343* Perform bidiagonal QR iteration, computing left
3344* singular vectors of A in U and computing right
3345* singular vectors of A in VT
3346* (Workspace: need BDSPAC)
3347*
3348 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3349 $ ldvt, u, ldu, dum, 1, work( iwork ),
3350 $ info )
3351*
3352 END IF
3353*
3354 END IF
3355*
3356 END IF
3357*
3358 ELSE
3359*
3360* N .LT. MNTHR
3361*
3362* Path 10t(N greater than M, but not much larger)
3363* Reduce to bidiagonal form without LQ decomposition
3364*
3365 ie = 1
3366 itauq = ie + m
3367 itaup = itauq + m
3368 iwork = itaup + m
3369*
3370* Bidiagonalize A
3371* (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
3372*
3373 CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
3374 $ work( itaup ), work( iwork ), lwork-iwork+1,
3375 $ ierr )
3376 IF( wntuas ) THEN
3377*
3378* If left singular vectors desired in U, copy result to U
3379* and generate left bidiagonalizing vectors in U
3380* (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
3381*
3382 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
3383 CALL dorgbr( 'Q', m, m, n, u, ldu, work( itauq ),
3384 $ work( iwork ), lwork-iwork+1, ierr )
3385 END IF
3386 IF( wntvas ) THEN
3387*
3388* If right singular vectors desired in VT, copy result to
3389* VT and generate right bidiagonalizing vectors in VT
3390* (Workspace: need 3*M + NRVT, prefer 3*M + NRVT*NB)
3391*
3392 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3393 IF( wntva )
3394 $ nrvt = n
3395 IF( wntvs )
3396 $ nrvt = m
3397 CALL dorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3398 $ work( iwork ), lwork-iwork+1, ierr )
3399 END IF
3400 IF( wntuo ) THEN
3401*
3402* If left singular vectors desired in A, generate left
3403* bidiagonalizing vectors in A
3404* (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
3405*
3406 CALL dorgbr( 'Q', m, m, n, a, lda, work( itauq ),
3407 $ work( iwork ), lwork-iwork+1, ierr )
3408 END IF
3409 IF( wntvo ) THEN
3410*
3411* If right singular vectors desired in A, generate right
3412* bidiagonalizing vectors in A
3413* (Workspace: need 4*M, prefer 3*M + M*NB)
3414*
3415 CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),
3416 $ work( iwork ), lwork-iwork+1, ierr )
3417 END IF
3418 iwork = ie + m
3419 IF( wntuas .OR. wntuo )
3420 $ nru = m
3421 IF( wntun )
3422 $ nru = 0
3423 IF( wntvas .OR. wntvo )
3424 $ ncvt = n
3425 IF( wntvn )
3426 $ ncvt = 0
3427 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3428*
3429* Perform bidiagonal QR iteration, if desired, computing
3430* left singular vectors in U and computing right singular
3431* vectors in VT
3432* (Workspace: need BDSPAC)
3433*
3434 CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3435 $ ldvt, u, ldu, dum, 1, work( iwork ), info )
3436 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3437*
3438* Perform bidiagonal QR iteration, if desired, computing
3439* left singular vectors in U and computing right singular
3440* vectors in A
3441* (Workspace: need BDSPAC)
3442*
3443 CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a, lda,
3444 $ u, ldu, dum, 1, work( iwork ), info )
3445 ELSE
3446*
3447* Perform bidiagonal QR iteration, if desired, computing
3448* left singular vectors in A and computing right singular
3449* vectors in VT
3450* (Workspace: need BDSPAC)
3451*
3452 CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3453 $ ldvt, a, lda, dum, 1, work( iwork ), info )
3454 END IF
3455*
3456 END IF
3457*
3458 END IF
3459*
3460* If DBDSQR failed to converge, copy unconverged superdiagonals
3461* to WORK( 2:MINMN )
3462*
3463 IF( info.NE.0 ) THEN
3464 IF( ie.GT.2 ) THEN
3465 DO 50 i = 1, minmn - 1
3466 work( i+1 ) = work( i+ie-1 )
3467 50 CONTINUE
3468 END IF
3469 IF( ie.LT.2 ) THEN
3470 DO 60 i = minmn - 1, 1, -1
3471 work( i+1 ) = work( i+ie-1 )
3472 60 CONTINUE
3473 END IF
3474 END IF
3475*
3476* Undo scaling if necessary
3477*
3478 IF( iscl.EQ.1 ) THEN
3479 IF( anrm.GT.bignum )
3480 $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3481 $ ierr )
3482 IF( info.NE.0 .AND. anrm.GT.bignum )
3483 $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn-1, 1, work( 2 ),
3484 $ minmn, ierr )
3485 IF( anrm.LT.smlnum )
3486 $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3487 $ ierr )
3488 IF( info.NE.0 .AND. anrm.LT.smlnum )
3489 $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1, work( 2 ),
3490 $ minmn, ierr )
3491 END IF
3492*
3493* Return optimal workspace in WORK(1)
3494*
3495 work( 1 ) = maxwrk
3496*
3497 RETURN
3498*
3499* End of DGESVD
3500*
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:162
subroutine dbdsqr(uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c, ldc, work, info)
DBDSQR
Definition dbdsqr.f:241

◆ dgesvdq()

subroutine dgesvdq ( character joba,
character jobp,
character jobr,
character jobu,
character jobv,
integer m,
integer n,
double precision, dimension( lda, * ) a,
integer lda,
double precision, dimension( * ) s,
double precision, dimension( ldu, * ) u,
integer ldu,
double precision, dimension( ldv, * ) v,
integer ldv,
integer numrank,
integer, dimension( * ) iwork,
integer liwork,
double precision, dimension( * ) work,
integer lwork,
double precision, dimension( * ) rwork,
integer lrwork,
integer info )

DGESVDQ computes the singular value decomposition (SVD) with a QR-Preconditioned QR SVD Method for GE matrices

Download DGESVDQ + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> DGESVDQ computes the singular value decomposition (SVD) of a real
!> M-by-N matrix A, where M >= N. The SVD of A is written as
!>                                    [++]   [xx]   [x0]   [xx]
!>              A = U * SIGMA * V^*,  [++] = [xx] * [ox] * [xx]
!>                                    [++]   [xx]
!> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal
!> matrix, and V is an N-by-N orthogonal matrix. The diagonal elements
!> of SIGMA are the singular values of A. The columns of U and V are the
!> left and the right singular vectors of A, respectively.
!> 
Parameters
[in]JOBA
!>  JOBA is CHARACTER*1
!>  Specifies the level of accuracy in the computed SVD
!>  = 'A' The requested accuracy corresponds to having the backward
!>        error bounded by || delta A ||_F <= f(m,n) * EPS * || A ||_F,
!>        where EPS = DLAMCH('Epsilon'). This authorises DGESVDQ to
!>        truncate the computed triangular factor in a rank revealing
!>        QR factorization whenever the truncated part is below the
!>        threshold of the order of EPS * ||A||_F. This is aggressive
!>        truncation level.
!>  = 'M' Similarly as with 'A', but the truncation is more gentle: it
!>        is allowed only when there is a drop on the diagonal of the
!>        triangular factor in the QR factorization. This is medium
!>        truncation level.
!>  = 'H' High accuracy requested. No numerical rank determination based
!>        on the rank revealing QR factorization is attempted.
!>  = 'E' Same as 'H', and in addition the condition number of column
!>        scaled A is estimated and returned in  RWORK(1).
!>        N^(-1/4)*RWORK(1) <= ||pinv(A_scaled)||_2 <= N^(1/4)*RWORK(1)
!> 
[in]JOBP
!>  JOBP is CHARACTER*1
!>  = 'P' The rows of A are ordered in decreasing order with respect to
!>        ||A(i,:)||_\infty. This enhances numerical accuracy at the cost
!>        of extra data movement. Recommended for numerical robustness.
!>  = 'N' No row pivoting.
!> 
[in]JOBR
!>          JOBR is CHARACTER*1
!>          = 'T' After the initial pivoted QR factorization, DGESVD is applied to
!>          the transposed R**T of the computed triangular factor R. This involves
!>          some extra data movement (matrix transpositions). Useful for
!>          experiments, research and development.
!>          = 'N' The triangular factor R is given as input to DGESVD. This may be
!>          preferred as it involves less data movement.
!> 
[in]JOBU
!>          JOBU is CHARACTER*1
!>          = 'A' All M left singular vectors are computed and returned in the
!>          matrix U. See the description of U.
!>          = 'S' or 'U' N = min(M,N) left singular vectors are computed and returned
!>          in the matrix U. See the description of U.
!>          = 'R' Numerical rank NUMRANK is determined and only NUMRANK left singular
!>          vectors are computed and returned in the matrix U.
!>          = 'F' The N left singular vectors are returned in factored form as the
!>          product of the Q factor from the initial QR factorization and the
!>          N left singular vectors of (R**T , 0)**T. If row pivoting is used,
!>          then the necessary information on the row pivoting is stored in
!>          IWORK(N+1:N+M-1).
!>          = 'N' The left singular vectors are not computed.
!> 
[in]JOBV
!>          JOBV is CHARACTER*1
!>          = 'A', 'V' All N right singular vectors are computed and returned in
!>          the matrix V.
!>          = 'R' Numerical rank NUMRANK is determined and only NUMRANK right singular
!>          vectors are computed and returned in the matrix V. This option is
!>          allowed only if JOBU = 'R' or JOBU = 'N'; otherwise it is illegal.
!>          = 'N' The right singular vectors are not computed.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the input matrix A.  M >= N >= 0.
!> 
[in,out]A
!>          A is DOUBLE PRECISION array of dimensions LDA x N
!>          On entry, the input matrix A.
!>          On exit, if JOBU .NE. 'N' or JOBV .NE. 'N', the lower triangle of A contains
!>          the Householder vectors as stored by DGEQP3. If JOBU = 'F', these Householder
!>          vectors together with WORK(1:N) can be used to restore the Q factors from
!>          the initial pivoted QR factorization of A. See the description of U.
!> 
[in]LDA
!>          LDA is INTEGER.
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]S
!>          S is DOUBLE PRECISION array of dimension N.
!>          The singular values of A, ordered so that S(i) >= S(i+1).
!> 
[out]U
!>          U is DOUBLE PRECISION array, dimension
!>          LDU x M if JOBU = 'A'; see the description of LDU. In this case,
!>          on exit, U contains the M left singular vectors.
!>          LDU x N if JOBU = 'S', 'U', 'R' ; see the description of LDU. In this
!>          case, U contains the leading N or the leading NUMRANK left singular vectors.
!>          LDU x N if JOBU = 'F' ; see the description of LDU. In this case U
!>          contains N x N orthogonal matrix that can be used to form the left
!>          singular vectors.
!>          If JOBU = 'N', U is not referenced.
!> 
[in]LDU
!>          LDU is INTEGER.
!>          The leading dimension of the array U.
!>          If JOBU = 'A', 'S', 'U', 'R',  LDU >= max(1,M).
!>          If JOBU = 'F',                 LDU >= max(1,N).
!>          Otherwise,                     LDU >= 1.
!> 
[out]V
!>          V is DOUBLE PRECISION array, dimension
!>          LDV x N if JOBV = 'A', 'V', 'R' or if JOBA = 'E' .
!>          If JOBV = 'A', or 'V',  V contains the N-by-N orthogonal matrix  V**T;
!>          If JOBV = 'R', V contains the first NUMRANK rows of V**T (the right
!>          singular vectors, stored rowwise, of the NUMRANK largest singular values).
!>          If JOBV = 'N' and JOBA = 'E', V is used as a workspace.
!>          If JOBV = 'N', and JOBA.NE.'E', V is not referenced.
!> 
[in]LDV
!>          LDV is INTEGER
!>          The leading dimension of the array V.
!>          If JOBV = 'A', 'V', 'R',  or JOBA = 'E', LDV >= max(1,N).
!>          Otherwise,                               LDV >= 1.
!> 
[out]NUMRANK
!>          NUMRANK is INTEGER
!>          NUMRANK is the numerical rank first determined after the rank
!>          revealing QR factorization, following the strategy specified by the
!>          value of JOBA. If JOBV = 'R' and JOBU = 'R', only NUMRANK
!>          leading singular values and vectors are then requested in the call
!>          of DGESVD. The final value of NUMRANK might be further reduced if
!>          some singular values are computed as zeros.
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (max(1, LIWORK)).
!>          On exit, IWORK(1:N) contains column pivoting permutation of the
!>          rank revealing QR factorization.
!>          If JOBP = 'P', IWORK(N+1:N+M-1) contains the indices of the sequence
!>          of row swaps used in row pivoting. These can be used to restore the
!>          left singular vectors in the case JOBU = 'F'.
!>
!>          If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0,
!>          IWORK(1) returns the minimal LIWORK.
!> 
[in]LIWORK
!>          LIWORK is INTEGER
!>          The dimension of the array IWORK.
!>          LIWORK >= N + M - 1,     if JOBP = 'P' and JOBA .NE. 'E';
!>          LIWORK >= N              if JOBP = 'N' and JOBA .NE. 'E';
!>          LIWORK >= N + M - 1 + N, if JOBP = 'P' and JOBA = 'E';
!>          LIWORK >= N + N          if JOBP = 'N' and JOBA = 'E'.
!>
!>          If LIWORK = -1, then a workspace query is assumed; the routine
!>          only calculates and returns the optimal and minimal sizes
!>          for the WORK, IWORK, and RWORK arrays, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]WORK
!>          WORK is DOUBLE PRECISION array, dimension (max(2, LWORK)), used as a workspace.
!>          On exit, if, on entry, LWORK.NE.-1, WORK(1:N) contains parameters
!>          needed to recover the Q factor from the QR factorization computed by
!>          DGEQP3.
!>
!>          If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0,
!>          WORK(1) returns the optimal LWORK, and
!>          WORK(2) returns the minimal LWORK.
!> 
[in,out]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK. It is determined as follows:
!>          Let  LWQP3 = 3*N+1,  LWCON = 3*N, and let
!>          LWORQ = { MAX( N, 1 ),  if JOBU = 'R', 'S', or 'U'
!>                  { MAX( M, 1 ),  if JOBU = 'A'
!>          LWSVD = MAX( 5*N, 1 )
!>          LWLQF = MAX( N/2, 1 ), LWSVD2 = MAX( 5*(N/2), 1 ), LWORLQ = MAX( N, 1 ),
!>          LWQRF = MAX( N/2, 1 ), LWORQ2 = MAX( N, 1 )
!>          Then the minimal value of LWORK is:
!>          = MAX( N + LWQP3, LWSVD )        if only the singular values are needed;
!>          = MAX( N + LWQP3, LWCON, LWSVD ) if only the singular values are needed,
!>                                   and a scaled condition estimate requested;
!>
!>          = N + MAX( LWQP3, LWSVD, LWORQ ) if the singular values and the left
!>                                   singular vectors are requested;
!>          = N + MAX( LWQP3, LWCON, LWSVD, LWORQ ) if the singular values and the left
!>                                   singular vectors are requested, and also
!>                                   a scaled condition estimate requested;
!>
!>          = N + MAX( LWQP3, LWSVD )        if the singular values and the right
!>                                   singular vectors are requested;
!>          = N + MAX( LWQP3, LWCON, LWSVD ) if the singular values and the right
!>                                   singular vectors are requested, and also
!>                                   a scaled condition etimate requested;
!>
!>          = N + MAX( LWQP3, LWSVD, LWORQ ) if the full SVD is requested with JOBV = 'R';
!>                                   independent of JOBR;
!>          = N + MAX( LWQP3, LWCON, LWSVD, LWORQ ) if the full SVD is requested,
!>                                   JOBV = 'R' and, also a scaled condition
!>                                   estimate requested; independent of JOBR;
!>          = MAX( N + MAX( LWQP3, LWSVD, LWORQ ),
!>         N + MAX( LWQP3, N/2+LWLQF, N/2+LWSVD2, N/2+LWORLQ, LWORQ) ) if the
!>                         full SVD is requested with JOBV = 'A' or 'V', and
!>                         JOBR ='N'
!>          = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWORQ ),
!>         N + MAX( LWQP3, LWCON, N/2+LWLQF, N/2+LWSVD2, N/2+LWORLQ, LWORQ ) )
!>                         if the full SVD is requested with JOBV = 'A' or 'V', and
!>                         JOBR ='N', and also a scaled condition number estimate
!>                         requested.
!>          = MAX( N + MAX( LWQP3, LWSVD, LWORQ ),
!>         N + MAX( LWQP3, N/2+LWQRF, N/2+LWSVD2, N/2+LWORQ2, LWORQ ) ) if the
!>                         full SVD is requested with JOBV = 'A', 'V', and JOBR ='T'
!>          = MAX( N + MAX( LWQP3, LWCON, LWSVD, LWORQ ),
!>         N + MAX( LWQP3, LWCON, N/2+LWQRF, N/2+LWSVD2, N/2+LWORQ2, LWORQ ) )
!>                         if the full SVD is requested with JOBV = 'A' or 'V', and
!>                         JOBR ='T', and also a scaled condition number estimate
!>                         requested.
!>          Finally, LWORK must be at least two: LWORK = MAX( 2, LWORK ).
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates and returns the optimal and minimal sizes
!>          for the WORK, IWORK, and RWORK arrays, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]RWORK
!>          RWORK is DOUBLE PRECISION array, dimension (max(1, LRWORK)).
!>          On exit,
!>          1. If JOBA = 'E', RWORK(1) contains an estimate of the condition
!>          number of column scaled A. If A = C * D where D is diagonal and C
!>          has unit columns in the Euclidean norm, then, assuming full column rank,
!>          N^(-1/4) * RWORK(1) <= ||pinv(C)||_2 <= N^(1/4) * RWORK(1).
!>          Otherwise, RWORK(1) = -1.
!>          2. RWORK(2) contains the number of singular values computed as
!>          exact zeros in DGESVD applied to the upper triangular or trapezoidal
!>          R (from the initial QR factorization). In case of early exit (no call to
!>          DGESVD, such as in the case of zero matrix) RWORK(2) = -1.
!>
!>          If LIWORK, LWORK, or LRWORK = -1, then on exit, if INFO = 0,
!>          RWORK(1) returns the minimal LRWORK.
!> 
[in]LRWORK
!>          LRWORK is INTEGER.
!>          The dimension of the array RWORK.
!>          If JOBP ='P', then LRWORK >= MAX(2, M).
!>          Otherwise, LRWORK >= 2
!>
!>          If LRWORK = -1, then a workspace query is assumed; the routine
!>          only calculates and returns the optimal and minimal sizes
!>          for the WORK, IWORK, and RWORK arrays, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit.
!>          < 0:  if INFO = -i, the i-th argument had an illegal value.
!>          > 0:  if DBDSQR did not converge, INFO specifies how many superdiagonals
!>          of an intermediate bidiagonal form B (computed in DGESVD) did not
!>          converge to zero.
!> 
Further Details:
!>
!>   1. The data movement (matrix transpose) is coded using simple nested
!>   DO-loops because BLAS and LAPACK do not provide corresponding subroutines.
!>   Those DO-loops are easily identified in this source code - by the CONTINUE
!>   statements labeled with 11**. In an optimized version of this code, the
!>   nested DO loops should be replaced with calls to an optimized subroutine.
!>   2. This code scales A by 1/SQRT(M) if the largest ABS(A(i,j)) could cause
!>   column norm overflow. This is the minial precaution and it is left to the
!>   SVD routine (CGESVD) to do its own preemptive scaling if potential over-
!>   or underflows are detected. To avoid repeated scanning of the array A,
!>   an optimal implementation would do all necessary scaling before calling
!>   CGESVD and the scaling in CGESVD can be switched off.
!>   3. Other comments related to code optimization are given in comments in the
!>   code, enlosed in [[double brackets]].
!> 
Bugs, examples and comments
!>  Please report all bugs and send interesting examples and/or comments to
!>  drmac@math.hr. Thank you.
!> 
References
!>  [1] Zlatko Drmac, Algorithm 977: A QR-Preconditioned QR SVD Method for
!>      Computing the SVD with High Accuracy. ACM Trans. Math. Softw.
!>      44(1): 11:1-11:30 (2017)
!>
!>  SIGMA library, xGESVDQ section updated February 2016.
!>  Developed and coded by Zlatko Drmac, Department of Mathematics
!>  University of Zagreb, Croatia, drmac@math.hr
!> 
Contributors:
!> Developed and coded by Zlatko Drmac, Department of Mathematics
!>  University of Zagreb, Croatia, drmac@math.hr
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 412 of file dgesvdq.f.

415* .. Scalar Arguments ..
416 IMPLICIT NONE
417 CHARACTER JOBA, JOBP, JOBR, JOBU, JOBV
418 INTEGER M, N, LDA, LDU, LDV, NUMRANK, LIWORK, LWORK, LRWORK,
419 $ INFO
420* ..
421* .. Array Arguments ..
422 DOUBLE PRECISION A( LDA, * ), U( LDU, * ), V( LDV, * ), WORK( * )
423 DOUBLE PRECISION S( * ), RWORK( * )
424 INTEGER IWORK( * )
425*
426* =====================================================================
427*
428* .. Parameters ..
429 DOUBLE PRECISION ZERO, ONE
430 parameter( zero = 0.0d0, one = 1.0d0 )
431* .. Local Scalars ..
432 INTEGER IERR, IWOFF, NR, N1, OPTRATIO, p, q
433 INTEGER LWCON, LWQP3, LWRK_DGELQF, LWRK_DGESVD, LWRK_DGESVD2,
434 $ LWRK_DGEQP3, LWRK_DGEQRF, LWRK_DORMLQ, LWRK_DORMQR,
435 $ LWRK_DORMQR2, LWLQF, LWQRF, LWSVD, LWSVD2, LWORQ,
436 $ LWORQ2, LWORLQ, MINWRK, MINWRK2, OPTWRK, OPTWRK2,
437 $ IMINWRK, RMINWRK
438 LOGICAL ACCLA, ACCLM, ACCLH, ASCALED, CONDA, DNTWU, DNTWV,
439 $ LQUERY, LSVC0, LSVEC, ROWPRM, RSVEC, RTRANS, WNTUA,
440 $ WNTUF, WNTUR, WNTUS, WNTVA, WNTVR
441 DOUBLE PRECISION BIG, EPSLN, RTMP, SCONDA, SFMIN
442* .. Local Arrays
443 DOUBLE PRECISION RDUMMY(1)
444* ..
445* .. External Subroutines (BLAS, LAPACK)
446 EXTERNAL dgelqf, dgeqp3, dgeqrf, dgesvd, dlacpy, dlapmt,
448 $ dormqr, xerbla
449* ..
450* .. External Functions (BLAS, LAPACK)
451 LOGICAL LSAME
452 INTEGER IDAMAX
453 DOUBLE PRECISION DLANGE, DNRM2, DLAMCH
454 EXTERNAL dlange, lsame, idamax, dnrm2, dlamch
455* ..
456* .. Intrinsic Functions ..
457*
458 INTRINSIC abs, max, min, dble, sqrt
459*
460* Test the input arguments
461*
462 wntus = lsame( jobu, 'S' ) .OR. lsame( jobu, 'U' )
463 wntur = lsame( jobu, 'R' )
464 wntua = lsame( jobu, 'A' )
465 wntuf = lsame( jobu, 'F' )
466 lsvc0 = wntus .OR. wntur .OR. wntua
467 lsvec = lsvc0 .OR. wntuf
468 dntwu = lsame( jobu, 'N' )
469*
470 wntvr = lsame( jobv, 'R' )
471 wntva = lsame( jobv, 'A' ) .OR. lsame( jobv, 'V' )
472 rsvec = wntvr .OR. wntva
473 dntwv = lsame( jobv, 'N' )
474*
475 accla = lsame( joba, 'A' )
476 acclm = lsame( joba, 'M' )
477 conda = lsame( joba, 'E' )
478 acclh = lsame( joba, 'H' ) .OR. conda
479*
480 rowprm = lsame( jobp, 'P' )
481 rtrans = lsame( jobr, 'T' )
482*
483 IF ( rowprm ) THEN
484 IF ( conda ) THEN
485 iminwrk = max( 1, n + m - 1 + n )
486 ELSE
487 iminwrk = max( 1, n + m - 1 )
488 END IF
489 rminwrk = max( 2, m )
490 ELSE
491 IF ( conda ) THEN
492 iminwrk = max( 1, n + n )
493 ELSE
494 iminwrk = max( 1, n )
495 END IF
496 rminwrk = 2
497 END IF
498 lquery = (liwork .EQ. -1 .OR. lwork .EQ. -1 .OR. lrwork .EQ. -1)
499 info = 0
500 IF ( .NOT. ( accla .OR. acclm .OR. acclh ) ) THEN
501 info = -1
502 ELSE IF ( .NOT.( rowprm .OR. lsame( jobp, 'N' ) ) ) THEN
503 info = -2
504 ELSE IF ( .NOT.( rtrans .OR. lsame( jobr, 'N' ) ) ) THEN
505 info = -3
506 ELSE IF ( .NOT.( lsvec .OR. dntwu ) ) THEN
507 info = -4
508 ELSE IF ( wntur .AND. wntva ) THEN
509 info = -5
510 ELSE IF ( .NOT.( rsvec .OR. dntwv )) THEN
511 info = -5
512 ELSE IF ( m.LT.0 ) THEN
513 info = -6
514 ELSE IF ( ( n.LT.0 ) .OR. ( n.GT.m ) ) THEN
515 info = -7
516 ELSE IF ( lda.LT.max( 1, m ) ) THEN
517 info = -9
518 ELSE IF ( ldu.LT.1 .OR. ( lsvc0 .AND. ldu.LT.m ) .OR.
519 $ ( wntuf .AND. ldu.LT.n ) ) THEN
520 info = -12
521 ELSE IF ( ldv.LT.1 .OR. ( rsvec .AND. ldv.LT.n ) .OR.
522 $ ( conda .AND. ldv.LT.n ) ) THEN
523 info = -14
524 ELSE IF ( liwork .LT. iminwrk .AND. .NOT. lquery ) THEN
525 info = -17
526 END IF
527*
528*
529 IF ( info .EQ. 0 ) THEN
530* .. compute the minimal and the optimal workspace lengths
531* [[The expressions for computing the minimal and the optimal
532* values of LWORK are written with a lot of redundancy and
533* can be simplified. However, this detailed form is easier for
534* maintenance and modifications of the code.]]
535*
536* .. minimal workspace length for DGEQP3 of an M x N matrix
537 lwqp3 = 3 * n + 1
538* .. minimal workspace length for DORMQR to build left singular vectors
539 IF ( wntus .OR. wntur ) THEN
540 lworq = max( n , 1 )
541 ELSE IF ( wntua ) THEN
542 lworq = max( m , 1 )
543 END IF
544* .. minimal workspace length for DPOCON of an N x N matrix
545 lwcon = 3 * n
546* .. DGESVD of an N x N matrix
547 lwsvd = max( 5 * n, 1 )
548 IF ( lquery ) THEN
549 CALL dgeqp3( m, n, a, lda, iwork, rdummy, rdummy, -1,
550 $ ierr )
551 lwrk_dgeqp3 = int( rdummy(1) )
552 IF ( wntus .OR. wntur ) THEN
553 CALL dormqr( 'L', 'N', m, n, n, a, lda, rdummy, u,
554 $ ldu, rdummy, -1, ierr )
555 lwrk_dormqr = int( rdummy(1) )
556 ELSE IF ( wntua ) THEN
557 CALL dormqr( 'L', 'N', m, m, n, a, lda, rdummy, u,
558 $ ldu, rdummy, -1, ierr )
559 lwrk_dormqr = int( rdummy(1) )
560 ELSE
561 lwrk_dormqr = 0
562 END IF
563 END IF
564 minwrk = 2
565 optwrk = 2
566 IF ( .NOT. (lsvec .OR. rsvec )) THEN
567* .. minimal and optimal sizes of the workspace if
568* only the singular values are requested
569 IF ( conda ) THEN
570 minwrk = max( n+lwqp3, lwcon, lwsvd )
571 ELSE
572 minwrk = max( n+lwqp3, lwsvd )
573 END IF
574 IF ( lquery ) THEN
575 CALL dgesvd( 'N', 'N', n, n, a, lda, s, u, ldu,
576 $ v, ldv, rdummy, -1, ierr )
577 lwrk_dgesvd = int( rdummy(1) )
578 IF ( conda ) THEN
579 optwrk = max( n+lwrk_dgeqp3, n+lwcon, lwrk_dgesvd )
580 ELSE
581 optwrk = max( n+lwrk_dgeqp3, lwrk_dgesvd )
582 END IF
583 END IF
584 ELSE IF ( lsvec .AND. (.NOT.rsvec) ) THEN
585* .. minimal and optimal sizes of the workspace if the
586* singular values and the left singular vectors are requested
587 IF ( conda ) THEN
588 minwrk = n + max( lwqp3, lwcon, lwsvd, lworq )
589 ELSE
590 minwrk = n + max( lwqp3, lwsvd, lworq )
591 END IF
592 IF ( lquery ) THEN
593 IF ( rtrans ) THEN
594 CALL dgesvd( 'N', 'O', n, n, a, lda, s, u, ldu,
595 $ v, ldv, rdummy, -1, ierr )
596 ELSE
597 CALL dgesvd( 'O', 'N', n, n, a, lda, s, u, ldu,
598 $ v, ldv, rdummy, -1, ierr )
599 END IF
600 lwrk_dgesvd = int( rdummy(1) )
601 IF ( conda ) THEN
602 optwrk = n + max( lwrk_dgeqp3, lwcon, lwrk_dgesvd,
603 $ lwrk_dormqr )
604 ELSE
605 optwrk = n + max( lwrk_dgeqp3, lwrk_dgesvd,
606 $ lwrk_dormqr )
607 END IF
608 END IF
609 ELSE IF ( rsvec .AND. (.NOT.lsvec) ) THEN
610* .. minimal and optimal sizes of the workspace if the
611* singular values and the right singular vectors are requested
612 IF ( conda ) THEN
613 minwrk = n + max( lwqp3, lwcon, lwsvd )
614 ELSE
615 minwrk = n + max( lwqp3, lwsvd )
616 END IF
617 IF ( lquery ) THEN
618 IF ( rtrans ) THEN
619 CALL dgesvd( 'O', 'N', n, n, a, lda, s, u, ldu,
620 $ v, ldv, rdummy, -1, ierr )
621 ELSE
622 CALL dgesvd( 'N', 'O', n, n, a, lda, s, u, ldu,
623 $ v, ldv, rdummy, -1, ierr )
624 END IF
625 lwrk_dgesvd = int( rdummy(1) )
626 IF ( conda ) THEN
627 optwrk = n + max( lwrk_dgeqp3, lwcon, lwrk_dgesvd )
628 ELSE
629 optwrk = n + max( lwrk_dgeqp3, lwrk_dgesvd )
630 END IF
631 END IF
632 ELSE
633* .. minimal and optimal sizes of the workspace if the
634* full SVD is requested
635 IF ( rtrans ) THEN
636 minwrk = max( lwqp3, lwsvd, lworq )
637 IF ( conda ) minwrk = max( minwrk, lwcon )
638 minwrk = minwrk + n
639 IF ( wntva ) THEN
640* .. minimal workspace length for N x N/2 DGEQRF
641 lwqrf = max( n/2, 1 )
642* .. minimal workspace length for N/2 x N/2 DGESVD
643 lwsvd2 = max( 5 * (n/2), 1 )
644 lworq2 = max( n, 1 )
645 minwrk2 = max( lwqp3, n/2+lwqrf, n/2+lwsvd2,
646 $ n/2+lworq2, lworq )
647 IF ( conda ) minwrk2 = max( minwrk2, lwcon )
648 minwrk2 = n + minwrk2
649 minwrk = max( minwrk, minwrk2 )
650 END IF
651 ELSE
652 minwrk = max( lwqp3, lwsvd, lworq )
653 IF ( conda ) minwrk = max( minwrk, lwcon )
654 minwrk = minwrk + n
655 IF ( wntva ) THEN
656* .. minimal workspace length for N/2 x N DGELQF
657 lwlqf = max( n/2, 1 )
658 lwsvd2 = max( 5 * (n/2), 1 )
659 lworlq = max( n , 1 )
660 minwrk2 = max( lwqp3, n/2+lwlqf, n/2+lwsvd2,
661 $ n/2+lworlq, lworq )
662 IF ( conda ) minwrk2 = max( minwrk2, lwcon )
663 minwrk2 = n + minwrk2
664 minwrk = max( minwrk, minwrk2 )
665 END IF
666 END IF
667 IF ( lquery ) THEN
668 IF ( rtrans ) THEN
669 CALL dgesvd( 'O', 'A', n, n, a, lda, s, u, ldu,
670 $ v, ldv, rdummy, -1, ierr )
671 lwrk_dgesvd = int( rdummy(1) )
672 optwrk = max(lwrk_dgeqp3,lwrk_dgesvd,lwrk_dormqr)
673 IF ( conda ) optwrk = max( optwrk, lwcon )
674 optwrk = n + optwrk
675 IF ( wntva ) THEN
676 CALL dgeqrf(n,n/2,u,ldu,rdummy,rdummy,-1,ierr)
677 lwrk_dgeqrf = int( rdummy(1) )
678 CALL dgesvd( 'S', 'O', n/2,n/2, v,ldv, s, u,ldu,
679 $ v, ldv, rdummy, -1, ierr )
680 lwrk_dgesvd2 = int( rdummy(1) )
681 CALL dormqr( 'R', 'C', n, n, n/2, u, ldu, rdummy,
682 $ v, ldv, rdummy, -1, ierr )
683 lwrk_dormqr2 = int( rdummy(1) )
684 optwrk2 = max( lwrk_dgeqp3, n/2+lwrk_dgeqrf,
685 $ n/2+lwrk_dgesvd2, n/2+lwrk_dormqr2 )
686 IF ( conda ) optwrk2 = max( optwrk2, lwcon )
687 optwrk2 = n + optwrk2
688 optwrk = max( optwrk, optwrk2 )
689 END IF
690 ELSE
691 CALL dgesvd( 'S', 'O', n, n, a, lda, s, u, ldu,
692 $ v, ldv, rdummy, -1, ierr )
693 lwrk_dgesvd = int( rdummy(1) )
694 optwrk = max(lwrk_dgeqp3,lwrk_dgesvd,lwrk_dormqr)
695 IF ( conda ) optwrk = max( optwrk, lwcon )
696 optwrk = n + optwrk
697 IF ( wntva ) THEN
698 CALL dgelqf(n/2,n,u,ldu,rdummy,rdummy,-1,ierr)
699 lwrk_dgelqf = int( rdummy(1) )
700 CALL dgesvd( 'S','O', n/2,n/2, v, ldv, s, u, ldu,
701 $ v, ldv, rdummy, -1, ierr )
702 lwrk_dgesvd2 = int( rdummy(1) )
703 CALL dormlq( 'R', 'N', n, n, n/2, u, ldu, rdummy,
704 $ v, ldv, rdummy,-1,ierr )
705 lwrk_dormlq = int( rdummy(1) )
706 optwrk2 = max( lwrk_dgeqp3, n/2+lwrk_dgelqf,
707 $ n/2+lwrk_dgesvd2, n/2+lwrk_dormlq )
708 IF ( conda ) optwrk2 = max( optwrk2, lwcon )
709 optwrk2 = n + optwrk2
710 optwrk = max( optwrk, optwrk2 )
711 END IF
712 END IF
713 END IF
714 END IF
715*
716 minwrk = max( 2, minwrk )
717 optwrk = max( 2, optwrk )
718 IF ( lwork .LT. minwrk .AND. (.NOT.lquery) ) info = -19
719*
720 END IF
721*
722 IF (info .EQ. 0 .AND. lrwork .LT. rminwrk .AND. .NOT. lquery) THEN
723 info = -21
724 END IF
725 IF( info.NE.0 ) THEN
726 CALL xerbla( 'DGESVDQ', -info )
727 RETURN
728 ELSE IF ( lquery ) THEN
729*
730* Return optimal workspace
731*
732 iwork(1) = iminwrk
733 work(1) = optwrk
734 work(2) = minwrk
735 rwork(1) = rminwrk
736 RETURN
737 END IF
738*
739* Quick return if the matrix is void.
740*
741 IF( ( m.EQ.0 ) .OR. ( n.EQ.0 ) ) THEN
742* .. all output is void.
743 RETURN
744 END IF
745*
746 big = dlamch('O')
747 ascaled = .false.
748 iwoff = 1
749 IF ( rowprm ) THEN
750 iwoff = m
751* .. reordering the rows in decreasing sequence in the
752* ell-infinity norm - this enhances numerical robustness in
753* the case of differently scaled rows.
754 DO 1904 p = 1, m
755* RWORK(p) = ABS( A(p,ICAMAX(N,A(p,1),LDA)) )
756* [[DLANGE will return NaN if an entry of the p-th row is Nan]]
757 rwork(p) = dlange( 'M', 1, n, a(p,1), lda, rdummy )
758* .. check for NaN's and Inf's
759 IF ( ( rwork(p) .NE. rwork(p) ) .OR.
760 $ ( (rwork(p)*zero) .NE. zero ) ) THEN
761 info = -8
762 CALL xerbla( 'DGESVDQ', -info )
763 RETURN
764 END IF
765 1904 CONTINUE
766 DO 1952 p = 1, m - 1
767 q = idamax( m-p+1, rwork(p), 1 ) + p - 1
768 iwork(n+p) = q
769 IF ( p .NE. q ) THEN
770 rtmp = rwork(p)
771 rwork(p) = rwork(q)
772 rwork(q) = rtmp
773 END IF
774 1952 CONTINUE
775*
776 IF ( rwork(1) .EQ. zero ) THEN
777* Quick return: A is the M x N zero matrix.
778 numrank = 0
779 CALL dlaset( 'G', n, 1, zero, zero, s, n )
780 IF ( wntus ) CALL dlaset('G', m, n, zero, one, u, ldu)
781 IF ( wntua ) CALL dlaset('G', m, m, zero, one, u, ldu)
782 IF ( wntva ) CALL dlaset('G', n, n, zero, one, v, ldv)
783 IF ( wntuf ) THEN
784 CALL dlaset( 'G', n, 1, zero, zero, work, n )
785 CALL dlaset( 'G', m, n, zero, one, u, ldu )
786 END IF
787 DO 5001 p = 1, n
788 iwork(p) = p
789 5001 CONTINUE
790 IF ( rowprm ) THEN
791 DO 5002 p = n + 1, n + m - 1
792 iwork(p) = p - n
793 5002 CONTINUE
794 END IF
795 IF ( conda ) rwork(1) = -1
796 rwork(2) = -1
797 RETURN
798 END IF
799*
800 IF ( rwork(1) .GT. big / sqrt(dble(m)) ) THEN
801* .. to prevent overflow in the QR factorization, scale the
802* matrix by 1/sqrt(M) if too large entry detected
803 CALL dlascl('G',0,0,sqrt(dble(m)),one, m,n, a,lda, ierr)
804 ascaled = .true.
805 END IF
806 CALL dlaswp( n, a, lda, 1, m-1, iwork(n+1), 1 )
807 END IF
808*
809* .. At this stage, preemptive scaling is done only to avoid column
810* norms overflows during the QR factorization. The SVD procedure should
811* have its own scaling to save the singular values from overflows and
812* underflows. That depends on the SVD procedure.
813*
814 IF ( .NOT.rowprm ) THEN
815 rtmp = dlange( 'M', m, n, a, lda, rdummy )
816 IF ( ( rtmp .NE. rtmp ) .OR.
817 $ ( (rtmp*zero) .NE. zero ) ) THEN
818 info = -8
819 CALL xerbla( 'DGESVDQ', -info )
820 RETURN
821 END IF
822 IF ( rtmp .GT. big / sqrt(dble(m)) ) THEN
823* .. to prevent overflow in the QR factorization, scale the
824* matrix by 1/sqrt(M) if too large entry detected
825 CALL dlascl('G',0,0, sqrt(dble(m)),one, m,n, a,lda, ierr)
826 ascaled = .true.
827 END IF
828 END IF
829*
830* .. QR factorization with column pivoting
831*
832* A * P = Q * [ R ]
833* [ 0 ]
834*
835 DO 1963 p = 1, n
836* .. all columns are free columns
837 iwork(p) = 0
838 1963 CONTINUE
839 CALL dgeqp3( m, n, a, lda, iwork, work, work(n+1), lwork-n,
840 $ ierr )
841*
842* If the user requested accuracy level allows truncation in the
843* computed upper triangular factor, the matrix R is examined and,
844* if possible, replaced with its leading upper trapezoidal part.
845*
846 epsln = dlamch('E')
847 sfmin = dlamch('S')
848* SMALL = SFMIN / EPSLN
849 nr = n
850*
851 IF ( accla ) THEN
852*
853* Standard absolute error bound suffices. All sigma_i with
854* sigma_i < N*EPS*||A||_F are flushed to zero. This is an
855* aggressive enforcement of lower numerical rank by introducing a
856* backward error of the order of N*EPS*||A||_F.
857 nr = 1
858 rtmp = sqrt(dble(n))*epsln
859 DO 3001 p = 2, n
860 IF ( abs(a(p,p)) .LT. (rtmp*abs(a(1,1))) ) GO TO 3002
861 nr = nr + 1
862 3001 CONTINUE
863 3002 CONTINUE
864*
865 ELSEIF ( acclm ) THEN
866* .. similarly as above, only slightly more gentle (less aggressive).
867* Sudden drop on the diagonal of R is used as the criterion for being
868* close-to-rank-deficient. The threshold is set to EPSLN=DLAMCH('E').
869* [[This can be made more flexible by replacing this hard-coded value
870* with a user specified threshold.]] Also, the values that underflow
871* will be truncated.
872 nr = 1
873 DO 3401 p = 2, n
874 IF ( ( abs(a(p,p)) .LT. (epsln*abs(a(p-1,p-1))) ) .OR.
875 $ ( abs(a(p,p)) .LT. sfmin ) ) GO TO 3402
876 nr = nr + 1
877 3401 CONTINUE
878 3402 CONTINUE
879*
880 ELSE
881* .. RRQR not authorized to determine numerical rank except in the
882* obvious case of zero pivots.
883* .. inspect R for exact zeros on the diagonal;
884* R(i,i)=0 => R(i:N,i:N)=0.
885 nr = 1
886 DO 3501 p = 2, n
887 IF ( abs(a(p,p)) .EQ. zero ) GO TO 3502
888 nr = nr + 1
889 3501 CONTINUE
890 3502 CONTINUE
891*
892 IF ( conda ) THEN
893* Estimate the scaled condition number of A. Use the fact that it is
894* the same as the scaled condition number of R.
895* .. V is used as workspace
896 CALL dlacpy( 'U', n, n, a, lda, v, ldv )
897* Only the leading NR x NR submatrix of the triangular factor
898* is considered. Only if NR=N will this give a reliable error
899* bound. However, even for NR < N, this can be used on an
900* expert level and obtain useful information in the sense of
901* perturbation theory.
902 DO 3053 p = 1, nr
903 rtmp = dnrm2( p, v(1,p), 1 )
904 CALL dscal( p, one/rtmp, v(1,p), 1 )
905 3053 CONTINUE
906 IF ( .NOT. ( lsvec .OR. rsvec ) ) THEN
907 CALL dpocon( 'U', nr, v, ldv, one, rtmp,
908 $ work, iwork(n+iwoff), ierr )
909 ELSE
910 CALL dpocon( 'U', nr, v, ldv, one, rtmp,
911 $ work(n+1), iwork(n+iwoff), ierr )
912 END IF
913 sconda = one / sqrt(rtmp)
914* For NR=N, SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1),
915* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
916* See the reference [1] for more details.
917 END IF
918*
919 ENDIF
920*
921 IF ( wntur ) THEN
922 n1 = nr
923 ELSE IF ( wntus .OR. wntuf) THEN
924 n1 = n
925 ELSE IF ( wntua ) THEN
926 n1 = m
927 END IF
928*
929 IF ( .NOT. ( rsvec .OR. lsvec ) ) THEN
930*.......................................................................
931* .. only the singular values are requested
932*.......................................................................
933 IF ( rtrans ) THEN
934*
935* .. compute the singular values of R**T = [A](1:NR,1:N)**T
936* .. set the lower triangle of [A] to [A](1:NR,1:N)**T and
937* the upper triangle of [A] to zero.
938 DO 1146 p = 1, min( n, nr )
939 DO 1147 q = p + 1, n
940 a(q,p) = a(p,q)
941 IF ( q .LE. nr ) a(p,q) = zero
942 1147 CONTINUE
943 1146 CONTINUE
944*
945 CALL dgesvd( 'N', 'N', n, nr, a, lda, s, u, ldu,
946 $ v, ldv, work, lwork, info )
947*
948 ELSE
949*
950* .. compute the singular values of R = [A](1:NR,1:N)
951*
952 IF ( nr .GT. 1 )
953 $ CALL dlaset( 'L', nr-1,nr-1, zero,zero, a(2,1), lda )
954 CALL dgesvd( 'N', 'N', nr, n, a, lda, s, u, ldu,
955 $ v, ldv, work, lwork, info )
956*
957 END IF
958*
959 ELSE IF ( lsvec .AND. ( .NOT. rsvec) ) THEN
960*.......................................................................
961* .. the singular values and the left singular vectors requested
962*.......................................................................""""""""
963 IF ( rtrans ) THEN
964* .. apply DGESVD to R**T
965* .. copy R**T into [U] and overwrite [U] with the right singular
966* vectors of R
967 DO 1192 p = 1, nr
968 DO 1193 q = p, n
969 u(q,p) = a(p,q)
970 1193 CONTINUE
971 1192 CONTINUE
972 IF ( nr .GT. 1 )
973 $ CALL dlaset( 'U', nr-1,nr-1, zero,zero, u(1,2), ldu )
974* .. the left singular vectors not computed, the NR right singular
975* vectors overwrite [U](1:NR,1:NR) as transposed. These
976* will be pre-multiplied by Q to build the left singular vectors of A.
977 CALL dgesvd( 'N', 'O', n, nr, u, ldu, s, u, ldu,
978 $ u, ldu, work(n+1), lwork-n, info )
979*
980 DO 1119 p = 1, nr
981 DO 1120 q = p + 1, nr
982 rtmp = u(q,p)
983 u(q,p) = u(p,q)
984 u(p,q) = rtmp
985 1120 CONTINUE
986 1119 CONTINUE
987*
988 ELSE
989* .. apply DGESVD to R
990* .. copy R into [U] and overwrite [U] with the left singular vectors
991 CALL dlacpy( 'U', nr, n, a, lda, u, ldu )
992 IF ( nr .GT. 1 )
993 $ CALL dlaset( 'L', nr-1, nr-1, zero, zero, u(2,1), ldu )
994* .. the right singular vectors not computed, the NR left singular
995* vectors overwrite [U](1:NR,1:NR)
996 CALL dgesvd( 'O', 'N', nr, n, u, ldu, s, u, ldu,
997 $ v, ldv, work(n+1), lwork-n, info )
998* .. now [U](1:NR,1:NR) contains the NR left singular vectors of
999* R. These will be pre-multiplied by Q to build the left singular
1000* vectors of A.
1001 END IF
1002*
1003* .. assemble the left singular vector matrix U of dimensions
1004* (M x NR) or (M x N) or (M x M).
1005 IF ( ( nr .LT. m ) .AND. ( .NOT.wntuf ) ) THEN
1006 CALL dlaset('A', m-nr, nr, zero, zero, u(nr+1,1), ldu)
1007 IF ( nr .LT. n1 ) THEN
1008 CALL dlaset( 'A',nr,n1-nr,zero,zero,u(1,nr+1), ldu )
1009 CALL dlaset( 'A',m-nr,n1-nr,zero,one,
1010 $ u(nr+1,nr+1), ldu )
1011 END IF
1012 END IF
1013*
1014* The Q matrix from the first QRF is built into the left singular
1015* vectors matrix U.
1016*
1017 IF ( .NOT.wntuf )
1018 $ CALL dormqr( 'L', 'N', m, n1, n, a, lda, work, u,
1019 $ ldu, work(n+1), lwork-n, ierr )
1020 IF ( rowprm .AND. .NOT.wntuf )
1021 $ CALL dlaswp( n1, u, ldu, 1, m-1, iwork(n+1), -1 )
1022*
1023 ELSE IF ( rsvec .AND. ( .NOT. lsvec ) ) THEN
1024*.......................................................................
1025* .. the singular values and the right singular vectors requested
1026*.......................................................................
1027 IF ( rtrans ) THEN
1028* .. apply DGESVD to R**T
1029* .. copy R**T into V and overwrite V with the left singular vectors
1030 DO 1165 p = 1, nr
1031 DO 1166 q = p, n
1032 v(q,p) = (a(p,q))
1033 1166 CONTINUE
1034 1165 CONTINUE
1035 IF ( nr .GT. 1 )
1036 $ CALL dlaset( 'U', nr-1,nr-1, zero,zero, v(1,2), ldv )
1037* .. the left singular vectors of R**T overwrite V, the right singular
1038* vectors not computed
1039 IF ( wntvr .OR. ( nr .EQ. n ) ) THEN
1040 CALL dgesvd( 'O', 'N', n, nr, v, ldv, s, u, ldu,
1041 $ u, ldu, work(n+1), lwork-n, info )
1042*
1043 DO 1121 p = 1, nr
1044 DO 1122 q = p + 1, nr
1045 rtmp = v(q,p)
1046 v(q,p) = v(p,q)
1047 v(p,q) = rtmp
1048 1122 CONTINUE
1049 1121 CONTINUE
1050*
1051 IF ( nr .LT. n ) THEN
1052 DO 1103 p = 1, nr
1053 DO 1104 q = nr + 1, n
1054 v(p,q) = v(q,p)
1055 1104 CONTINUE
1056 1103 CONTINUE
1057 END IF
1058 CALL dlapmt( .false., nr, n, v, ldv, iwork )
1059 ELSE
1060* .. need all N right singular vectors and NR < N
1061* [!] This is simple implementation that augments [V](1:N,1:NR)
1062* by padding a zero block. In the case NR << N, a more efficient
1063* way is to first use the QR factorization. For more details
1064* how to implement this, see the " FULL SVD " branch.
1065 CALL dlaset('G', n, n-nr, zero, zero, v(1,nr+1), ldv)
1066 CALL dgesvd( 'O', 'N', n, n, v, ldv, s, u, ldu,
1067 $ u, ldu, work(n+1), lwork-n, info )
1068*
1069 DO 1123 p = 1, n
1070 DO 1124 q = p + 1, n
1071 rtmp = v(q,p)
1072 v(q,p) = v(p,q)
1073 v(p,q) = rtmp
1074 1124 CONTINUE
1075 1123 CONTINUE
1076 CALL dlapmt( .false., n, n, v, ldv, iwork )
1077 END IF
1078*
1079 ELSE
1080* .. aply DGESVD to R
1081* .. copy R into V and overwrite V with the right singular vectors
1082 CALL dlacpy( 'U', nr, n, a, lda, v, ldv )
1083 IF ( nr .GT. 1 )
1084 $ CALL dlaset( 'L', nr-1, nr-1, zero, zero, v(2,1), ldv )
1085* .. the right singular vectors overwrite V, the NR left singular
1086* vectors stored in U(1:NR,1:NR)
1087 IF ( wntvr .OR. ( nr .EQ. n ) ) THEN
1088 CALL dgesvd( 'N', 'O', nr, n, v, ldv, s, u, ldu,
1089 $ v, ldv, work(n+1), lwork-n, info )
1090 CALL dlapmt( .false., nr, n, v, ldv, iwork )
1091* .. now [V](1:NR,1:N) contains V(1:N,1:NR)**T
1092 ELSE
1093* .. need all N right singular vectors and NR < N
1094* [!] This is simple implementation that augments [V](1:NR,1:N)
1095* by padding a zero block. In the case NR << N, a more efficient
1096* way is to first use the LQ factorization. For more details
1097* how to implement this, see the " FULL SVD " branch.
1098 CALL dlaset('G', n-nr, n, zero,zero, v(nr+1,1), ldv)
1099 CALL dgesvd( 'N', 'O', n, n, v, ldv, s, u, ldu,
1100 $ v, ldv, work(n+1), lwork-n, info )
1101 CALL dlapmt( .false., n, n, v, ldv, iwork )
1102 END IF
1103* .. now [V] contains the transposed matrix of the right singular
1104* vectors of A.
1105 END IF
1106*
1107 ELSE
1108*.......................................................................
1109* .. FULL SVD requested
1110*.......................................................................
1111 IF ( rtrans ) THEN
1112*
1113* .. apply DGESVD to R**T [[this option is left for R&D&T]]
1114*
1115 IF ( wntvr .OR. ( nr .EQ. n ) ) THEN
1116* .. copy R**T into [V] and overwrite [V] with the left singular
1117* vectors of R**T
1118 DO 1168 p = 1, nr
1119 DO 1169 q = p, n
1120 v(q,p) = a(p,q)
1121 1169 CONTINUE
1122 1168 CONTINUE
1123 IF ( nr .GT. 1 )
1124 $ CALL dlaset( 'U', nr-1,nr-1, zero,zero, v(1,2), ldv )
1125*
1126* .. the left singular vectors of R**T overwrite [V], the NR right
1127* singular vectors of R**T stored in [U](1:NR,1:NR) as transposed
1128 CALL dgesvd( 'O', 'A', n, nr, v, ldv, s, v, ldv,
1129 $ u, ldu, work(n+1), lwork-n, info )
1130* .. assemble V
1131 DO 1115 p = 1, nr
1132 DO 1116 q = p + 1, nr
1133 rtmp = v(q,p)
1134 v(q,p) = v(p,q)
1135 v(p,q) = rtmp
1136 1116 CONTINUE
1137 1115 CONTINUE
1138 IF ( nr .LT. n ) THEN
1139 DO 1101 p = 1, nr
1140 DO 1102 q = nr+1, n
1141 v(p,q) = v(q,p)
1142 1102 CONTINUE
1143 1101 CONTINUE
1144 END IF
1145 CALL dlapmt( .false., nr, n, v, ldv, iwork )
1146*
1147 DO 1117 p = 1, nr
1148 DO 1118 q = p + 1, nr
1149 rtmp = u(q,p)
1150 u(q,p) = u(p,q)
1151 u(p,q) = rtmp
1152 1118 CONTINUE
1153 1117 CONTINUE
1154*
1155 IF ( ( nr .LT. m ) .AND. .NOT.(wntuf)) THEN
1156 CALL dlaset('A', m-nr,nr, zero,zero, u(nr+1,1), ldu)
1157 IF ( nr .LT. n1 ) THEN
1158 CALL dlaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu)
1159 CALL dlaset( 'A',m-nr,n1-nr,zero,one,
1160 $ u(nr+1,nr+1), ldu )
1161 END IF
1162 END IF
1163*
1164 ELSE
1165* .. need all N right singular vectors and NR < N
1166* .. copy R**T into [V] and overwrite [V] with the left singular
1167* vectors of R**T
1168* [[The optimal ratio N/NR for using QRF instead of padding
1169* with zeros. Here hard coded to 2; it must be at least
1170* two due to work space constraints.]]
1171* OPTRATIO = ILAENV(6, 'DGESVD', 'S' // 'O', NR,N,0,0)
1172* OPTRATIO = MAX( OPTRATIO, 2 )
1173 optratio = 2
1174 IF ( optratio*nr .GT. n ) THEN
1175 DO 1198 p = 1, nr
1176 DO 1199 q = p, n
1177 v(q,p) = a(p,q)
1178 1199 CONTINUE
1179 1198 CONTINUE
1180 IF ( nr .GT. 1 )
1181 $ CALL dlaset('U',nr-1,nr-1, zero,zero, v(1,2),ldv)
1182*
1183 CALL dlaset('A',n,n-nr,zero,zero,v(1,nr+1),ldv)
1184 CALL dgesvd( 'O', 'A', n, n, v, ldv, s, v, ldv,
1185 $ u, ldu, work(n+1), lwork-n, info )
1186*
1187 DO 1113 p = 1, n
1188 DO 1114 q = p + 1, n
1189 rtmp = v(q,p)
1190 v(q,p) = v(p,q)
1191 v(p,q) = rtmp
1192 1114 CONTINUE
1193 1113 CONTINUE
1194 CALL dlapmt( .false., n, n, v, ldv, iwork )
1195* .. assemble the left singular vector matrix U of dimensions
1196* (M x N1), i.e. (M x N) or (M x M).
1197*
1198 DO 1111 p = 1, n
1199 DO 1112 q = p + 1, n
1200 rtmp = u(q,p)
1201 u(q,p) = u(p,q)
1202 u(p,q) = rtmp
1203 1112 CONTINUE
1204 1111 CONTINUE
1205*
1206 IF ( ( n .LT. m ) .AND. .NOT.(wntuf)) THEN
1207 CALL dlaset('A',m-n,n,zero,zero,u(n+1,1),ldu)
1208 IF ( n .LT. n1 ) THEN
1209 CALL dlaset('A',n,n1-n,zero,zero,u(1,n+1),ldu)
1210 CALL dlaset('A',m-n,n1-n,zero,one,
1211 $ u(n+1,n+1), ldu )
1212 END IF
1213 END IF
1214 ELSE
1215* .. copy R**T into [U] and overwrite [U] with the right
1216* singular vectors of R
1217 DO 1196 p = 1, nr
1218 DO 1197 q = p, n
1219 u(q,nr+p) = a(p,q)
1220 1197 CONTINUE
1221 1196 CONTINUE
1222 IF ( nr .GT. 1 )
1223 $ CALL dlaset('U',nr-1,nr-1,zero,zero,u(1,nr+2),ldu)
1224 CALL dgeqrf( n, nr, u(1,nr+1), ldu, work(n+1),
1225 $ work(n+nr+1), lwork-n-nr, ierr )
1226 DO 1143 p = 1, nr
1227 DO 1144 q = 1, n
1228 v(q,p) = u(p,nr+q)
1229 1144 CONTINUE
1230 1143 CONTINUE
1231 CALL dlaset('U',nr-1,nr-1,zero,zero,v(1,2),ldv)
1232 CALL dgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,
1233 $ v,ldv, work(n+nr+1),lwork-n-nr, info )
1234 CALL dlaset('A',n-nr,nr,zero,zero,v(nr+1,1),ldv)
1235 CALL dlaset('A',nr,n-nr,zero,zero,v(1,nr+1),ldv)
1236 CALL dlaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv)
1237 CALL dormqr('R','C', n, n, nr, u(1,nr+1), ldu,
1238 $ work(n+1),v,ldv,work(n+nr+1),lwork-n-nr,ierr)
1239 CALL dlapmt( .false., n, n, v, ldv, iwork )
1240* .. assemble the left singular vector matrix U of dimensions
1241* (M x NR) or (M x N) or (M x M).
1242 IF ( ( nr .LT. m ) .AND. .NOT.(wntuf)) THEN
1243 CALL dlaset('A',m-nr,nr,zero,zero,u(nr+1,1),ldu)
1244 IF ( nr .LT. n1 ) THEN
1245 CALL dlaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu)
1246 CALL dlaset( 'A',m-nr,n1-nr,zero,one,
1247 $ u(nr+1,nr+1),ldu)
1248 END IF
1249 END IF
1250 END IF
1251 END IF
1252*
1253 ELSE
1254*
1255* .. apply DGESVD to R [[this is the recommended option]]
1256*
1257 IF ( wntvr .OR. ( nr .EQ. n ) ) THEN
1258* .. copy R into [V] and overwrite V with the right singular vectors
1259 CALL dlacpy( 'U', nr, n, a, lda, v, ldv )
1260 IF ( nr .GT. 1 )
1261 $ CALL dlaset( 'L', nr-1,nr-1, zero,zero, v(2,1), ldv )
1262* .. the right singular vectors of R overwrite [V], the NR left
1263* singular vectors of R stored in [U](1:NR,1:NR)
1264 CALL dgesvd( 'S', 'O', nr, n, v, ldv, s, u, ldu,
1265 $ v, ldv, work(n+1), lwork-n, info )
1266 CALL dlapmt( .false., nr, n, v, ldv, iwork )
1267* .. now [V](1:NR,1:N) contains V(1:N,1:NR)**T
1268* .. assemble the left singular vector matrix U of dimensions
1269* (M x NR) or (M x N) or (M x M).
1270 IF ( ( nr .LT. m ) .AND. .NOT.(wntuf)) THEN
1271 CALL dlaset('A', m-nr,nr, zero,zero, u(nr+1,1), ldu)
1272 IF ( nr .LT. n1 ) THEN
1273 CALL dlaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu)
1274 CALL dlaset( 'A',m-nr,n1-nr,zero,one,
1275 $ u(nr+1,nr+1), ldu )
1276 END IF
1277 END IF
1278*
1279 ELSE
1280* .. need all N right singular vectors and NR < N
1281* .. the requested number of the left singular vectors
1282* is then N1 (N or M)
1283* [[The optimal ratio N/NR for using LQ instead of padding
1284* with zeros. Here hard coded to 2; it must be at least
1285* two due to work space constraints.]]
1286* OPTRATIO = ILAENV(6, 'DGESVD', 'S' // 'O', NR,N,0,0)
1287* OPTRATIO = MAX( OPTRATIO, 2 )
1288 optratio = 2
1289 IF ( optratio * nr .GT. n ) THEN
1290 CALL dlacpy( 'U', nr, n, a, lda, v, ldv )
1291 IF ( nr .GT. 1 )
1292 $ CALL dlaset('L', nr-1,nr-1, zero,zero, v(2,1),ldv)
1293* .. the right singular vectors of R overwrite [V], the NR left
1294* singular vectors of R stored in [U](1:NR,1:NR)
1295 CALL dlaset('A', n-nr,n, zero,zero, v(nr+1,1),ldv)
1296 CALL dgesvd( 'S', 'O', n, n, v, ldv, s, u, ldu,
1297 $ v, ldv, work(n+1), lwork-n, info )
1298 CALL dlapmt( .false., n, n, v, ldv, iwork )
1299* .. now [V] contains the transposed matrix of the right
1300* singular vectors of A. The leading N left singular vectors
1301* are in [U](1:N,1:N)
1302* .. assemble the left singular vector matrix U of dimensions
1303* (M x N1), i.e. (M x N) or (M x M).
1304 IF ( ( n .LT. m ) .AND. .NOT.(wntuf)) THEN
1305 CALL dlaset('A',m-n,n,zero,zero,u(n+1,1),ldu)
1306 IF ( n .LT. n1 ) THEN
1307 CALL dlaset('A',n,n1-n,zero,zero,u(1,n+1),ldu)
1308 CALL dlaset( 'A',m-n,n1-n,zero,one,
1309 $ u(n+1,n+1), ldu )
1310 END IF
1311 END IF
1312 ELSE
1313 CALL dlacpy( 'U', nr, n, a, lda, u(nr+1,1), ldu )
1314 IF ( nr .GT. 1 )
1315 $ CALL dlaset('L',nr-1,nr-1,zero,zero,u(nr+2,1),ldu)
1316 CALL dgelqf( nr, n, u(nr+1,1), ldu, work(n+1),
1317 $ work(n+nr+1), lwork-n-nr, ierr )
1318 CALL dlacpy('L',nr,nr,u(nr+1,1),ldu,v,ldv)
1319 IF ( nr .GT. 1 )
1320 $ CALL dlaset('U',nr-1,nr-1,zero,zero,v(1,2),ldv)
1321 CALL dgesvd( 'S', 'O', nr, nr, v, ldv, s, u, ldu,
1322 $ v, ldv, work(n+nr+1), lwork-n-nr, info )
1323 CALL dlaset('A',n-nr,nr,zero,zero,v(nr+1,1),ldv)
1324 CALL dlaset('A',nr,n-nr,zero,zero,v(1,nr+1),ldv)
1325 CALL dlaset('A',n-nr,n-nr,zero,one,v(nr+1,nr+1),ldv)
1326 CALL dormlq('R','N',n,n,nr,u(nr+1,1),ldu,work(n+1),
1327 $ v, ldv, work(n+nr+1),lwork-n-nr,ierr)
1328 CALL dlapmt( .false., n, n, v, ldv, iwork )
1329* .. assemble the left singular vector matrix U of dimensions
1330* (M x NR) or (M x N) or (M x M).
1331 IF ( ( nr .LT. m ) .AND. .NOT.(wntuf)) THEN
1332 CALL dlaset('A',m-nr,nr,zero,zero,u(nr+1,1),ldu)
1333 IF ( nr .LT. n1 ) THEN
1334 CALL dlaset('A',nr,n1-nr,zero,zero,u(1,nr+1),ldu)
1335 CALL dlaset( 'A',m-nr,n1-nr,zero,one,
1336 $ u(nr+1,nr+1), ldu )
1337 END IF
1338 END IF
1339 END IF
1340 END IF
1341* .. end of the "R**T or R" branch
1342 END IF
1343*
1344* The Q matrix from the first QRF is built into the left singular
1345* vectors matrix U.
1346*
1347 IF ( .NOT. wntuf )
1348 $ CALL dormqr( 'L', 'N', m, n1, n, a, lda, work, u,
1349 $ ldu, work(n+1), lwork-n, ierr )
1350 IF ( rowprm .AND. .NOT.wntuf )
1351 $ CALL dlaswp( n1, u, ldu, 1, m-1, iwork(n+1), -1 )
1352*
1353* ... end of the "full SVD" branch
1354 END IF
1355*
1356* Check whether some singular values are returned as zeros, e.g.
1357* due to underflow, and update the numerical rank.
1358 p = nr
1359 DO 4001 q = p, 1, -1
1360 IF ( s(q) .GT. zero ) GO TO 4002
1361 nr = nr - 1
1362 4001 CONTINUE
1363 4002 CONTINUE
1364*
1365* .. if numerical rank deficiency is detected, the truncated
1366* singular values are set to zero.
1367 IF ( nr .LT. n ) CALL dlaset( 'G', n-nr,1, zero,zero, s(nr+1), n )
1368* .. undo scaling; this may cause overflow in the largest singular
1369* values.
1370 IF ( ascaled )
1371 $ CALL dlascl( 'G',0,0, one,sqrt(dble(m)), nr,1, s, n, ierr )
1372 IF ( conda ) rwork(1) = sconda
1373 rwork(2) = p - nr
1374* .. p-NR is the number of singular values that are computed as
1375* exact zeros in DGESVD() applied to the (possibly truncated)
1376* full row rank triangular (trapezoidal) factor of A.
1377 numrank = nr
1378*
1379 RETURN
1380*
1381* End of DGESVDQ
1382*
subroutine dgesvd(jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, info)
DGESVD computes the singular value decomposition (SVD) for GE matrices
Definition dgesvd.f:211
subroutine dlapmt(forwrd, m, n, x, ldx, k)
DLAPMT performs a forward or backward permutation of the columns of a matrix.
Definition dlapmt.f:104

◆ dgesvdx()

subroutine dgesvdx ( character jobu,
character jobvt,
character range,
integer m,
integer n,
double precision, dimension( lda, * ) a,
integer lda,
double precision vl,
double precision vu,
integer il,
integer iu,
integer ns,
double precision, dimension( * ) s,
double precision, dimension( ldu, * ) u,
integer ldu,
double precision, dimension( ldvt, * ) vt,
integer ldvt,
double precision, dimension( * ) work,
integer lwork,
integer, dimension( * ) iwork,
integer info )

DGESVDX computes the singular value decomposition (SVD) for GE matrices

Download DGESVDX + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!>  DGESVDX computes the singular value decomposition (SVD) of a real
!>  M-by-N matrix A, optionally computing the left and/or right singular
!>  vectors. The SVD is written
!>
!>      A = U * SIGMA * transpose(V)
!>
!>  where SIGMA is an M-by-N matrix which is zero except for its
!>  min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
!>  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
!>  are the singular values of A; they are real and non-negative, and
!>  are returned in descending order.  The first min(m,n) columns of
!>  U and V are the left and right singular vectors of A.
!>
!>  DGESVDX uses an eigenvalue problem for obtaining the SVD, which
!>  allows for the computation of a subset of singular values and
!>  vectors. See DBDSVDX for details.
!>
!>  Note that the routine returns V**T, not V.
!> 
Parameters
[in]JOBU
!>          JOBU is CHARACTER*1
!>          Specifies options for computing all or part of the matrix U:
!>          = 'V':  the first min(m,n) columns of U (the left singular
!>                  vectors) or as specified by RANGE are returned in
!>                  the array U;
!>          = 'N':  no columns of U (no left singular vectors) are
!>                  computed.
!> 
[in]JOBVT
!>          JOBVT is CHARACTER*1
!>           Specifies options for computing all or part of the matrix
!>           V**T:
!>           = 'V':  the first min(m,n) rows of V**T (the right singular
!>                   vectors) or as specified by RANGE are returned in
!>                   the array VT;
!>           = 'N':  no rows of V**T (no right singular vectors) are
!>                   computed.
!> 
[in]RANGE
!>          RANGE is CHARACTER*1
!>          = 'A': all singular values will be found.
!>          = 'V': all singular values in the half-open interval (VL,VU]
!>                 will be found.
!>          = 'I': the IL-th through IU-th singular values will be found.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the input matrix A.  N >= 0.
!> 
[in,out]A
!>          A is DOUBLE PRECISION array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!>          On exit, the contents of A are destroyed.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[in]VL
!>          VL is DOUBLE PRECISION
!>          If RANGE='V', the lower bound of the interval to
!>          be searched for singular values. VU > VL.
!>          Not referenced if RANGE = 'A' or 'I'.
!> 
[in]VU
!>          VU is DOUBLE PRECISION
!>          If RANGE='V', the upper bound of the interval to
!>          be searched for singular values. VU > VL.
!>          Not referenced if RANGE = 'A' or 'I'.
!> 
[in]IL
!>          IL is INTEGER
!>          If RANGE='I', the index of the
!>          smallest singular value to be returned.
!>          1 <= IL <= IU <= min(M,N), if min(M,N) > 0.
!>          Not referenced if RANGE = 'A' or 'V'.
!> 
[in]IU
!>          IU is INTEGER
!>          If RANGE='I', the index of the
!>          largest singular value to be returned.
!>          1 <= IL <= IU <= min(M,N), if min(M,N) > 0.
!>          Not referenced if RANGE = 'A' or 'V'.
!> 
[out]NS
!>          NS is INTEGER
!>          The total number of singular values found,
!>          0 <= NS <= min(M,N).
!>          If RANGE = 'A', NS = min(M,N); if RANGE = 'I', NS = IU-IL+1.
!> 
[out]S
!>          S is DOUBLE PRECISION array, dimension (min(M,N))
!>          The singular values of A, sorted so that S(i) >= S(i+1).
!> 
[out]U
!>          U is DOUBLE PRECISION array, dimension (LDU,UCOL)
!>          If JOBU = 'V', U contains columns of U (the left singular
!>          vectors, stored columnwise) as specified by RANGE; if
!>          JOBU = 'N', U is not referenced.
!>          Note: The user must ensure that UCOL >= NS; if RANGE = 'V',
!>          the exact value of NS is not known in advance and an upper
!>          bound must be used.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U.  LDU >= 1; if
!>          JOBU = 'V', LDU >= M.
!> 
[out]VT
!>          VT is DOUBLE PRECISION array, dimension (LDVT,N)
!>          If JOBVT = 'V', VT contains the rows of V**T (the right singular
!>          vectors, stored rowwise) as specified by RANGE; if JOBVT = 'N',
!>          VT is not referenced.
!>          Note: The user must ensure that LDVT >= NS; if RANGE = 'V',
!>          the exact value of NS is not known in advance and an upper
!>          bound must be used.
!> 
[in]LDVT
!>          LDVT is INTEGER
!>          The leading dimension of the array VT.  LDVT >= 1; if
!>          JOBVT = 'V', LDVT >= NS (see above).
!> 
[out]WORK
!>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK.
!>          LWORK >= MAX(1,MIN(M,N)*(MIN(M,N)+4)) for the paths (see
!>          comments inside the code):
!>             - PATH 1  (M much larger than N)
!>             - PATH 1t (N much larger than M)
!>          LWORK >= MAX(1,MIN(M,N)*2+MAX(M,N)) for the other paths.
!>          For good performance, LWORK should generally be larger.
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (12*MIN(M,N))
!>          If INFO = 0, the first NS elements of IWORK are zero. If INFO > 0,
!>          then IWORK contains the indices of the eigenvectors that failed
!>          to converge in DBDSVDX/DSTEVX.
!> 
[out]INFO
!>     INFO is INTEGER
!>           = 0:  successful exit
!>           < 0:  if INFO = -i, the i-th argument had an illegal value
!>           > 0:  if INFO = i, then i eigenvectors failed to converge
!>                 in DBDSVDX/DSTEVX.
!>                 if INFO = N*2 + 1, an internal error occurred in
!>                 DBDSVDX
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 260 of file dgesvdx.f.

263*
264* -- LAPACK driver routine --
265* -- LAPACK is a software package provided by Univ. of Tennessee, --
266* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
267*
268* .. Scalar Arguments ..
269 CHARACTER JOBU, JOBVT, RANGE
270 INTEGER IL, INFO, IU, LDA, LDU, LDVT, LWORK, M, N, NS
271 DOUBLE PRECISION VL, VU
272* ..
273* .. Array Arguments ..
274 INTEGER IWORK( * )
275 DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
276 $ VT( LDVT, * ), WORK( * )
277* ..
278*
279* =====================================================================
280*
281* .. Parameters ..
282 DOUBLE PRECISION ZERO, ONE
283 parameter( zero = 0.0d0, one = 1.0d0 )
284* ..
285* .. Local Scalars ..
286 CHARACTER JOBZ, RNGTGK
287 LOGICAL ALLS, INDS, LQUERY, VALS, WANTU, WANTVT
288 INTEGER I, ID, IE, IERR, ILQF, ILTGK, IQRF, ISCL,
289 $ ITAU, ITAUP, ITAUQ, ITEMP, ITGKZ, IUTGK,
290 $ J, MAXWRK, MINMN, MINWRK, MNTHR
291 DOUBLE PRECISION ABSTOL, ANRM, BIGNUM, EPS, SMLNUM
292* ..
293* .. Local Arrays ..
294 DOUBLE PRECISION DUM( 1 )
295* ..
296* .. External Subroutines ..
297 EXTERNAL dbdsvdx, dgebrd, dgelqf, dgeqrf, dlacpy,
299 $ dcopy, xerbla
300* ..
301* .. External Functions ..
302 LOGICAL LSAME
303 INTEGER ILAENV
304 DOUBLE PRECISION DLAMCH, DLANGE
305 EXTERNAL lsame, ilaenv, dlamch, dlange
306* ..
307* .. Intrinsic Functions ..
308 INTRINSIC max, min, sqrt
309* ..
310* .. Executable Statements ..
311*
312* Test the input arguments.
313*
314 ns = 0
315 info = 0
316 abstol = 2*dlamch('S')
317 lquery = ( lwork.EQ.-1 )
318 minmn = min( m, n )
319
320 wantu = lsame( jobu, 'V' )
321 wantvt = lsame( jobvt, 'V' )
322 IF( wantu .OR. wantvt ) THEN
323 jobz = 'V'
324 ELSE
325 jobz = 'N'
326 END IF
327 alls = lsame( range, 'A' )
328 vals = lsame( range, 'V' )
329 inds = lsame( range, 'I' )
330*
331 info = 0
332 IF( .NOT.lsame( jobu, 'V' ) .AND.
333 $ .NOT.lsame( jobu, 'N' ) ) THEN
334 info = -1
335 ELSE IF( .NOT.lsame( jobvt, 'V' ) .AND.
336 $ .NOT.lsame( jobvt, 'N' ) ) THEN
337 info = -2
338 ELSE IF( .NOT.( alls .OR. vals .OR. inds ) ) THEN
339 info = -3
340 ELSE IF( m.LT.0 ) THEN
341 info = -4
342 ELSE IF( n.LT.0 ) THEN
343 info = -5
344 ELSE IF( m.GT.lda ) THEN
345 info = -7
346 ELSE IF( minmn.GT.0 ) THEN
347 IF( vals ) THEN
348 IF( vl.LT.zero ) THEN
349 info = -8
350 ELSE IF( vu.LE.vl ) THEN
351 info = -9
352 END IF
353 ELSE IF( inds ) THEN
354 IF( il.LT.1 .OR. il.GT.max( 1, minmn ) ) THEN
355 info = -10
356 ELSE IF( iu.LT.min( minmn, il ) .OR. iu.GT.minmn ) THEN
357 info = -11
358 END IF
359 END IF
360 IF( info.EQ.0 ) THEN
361 IF( wantu .AND. ldu.LT.m ) THEN
362 info = -15
363 ELSE IF( wantvt ) THEN
364 IF( inds ) THEN
365 IF( ldvt.LT.iu-il+1 ) THEN
366 info = -17
367 END IF
368 ELSE IF( ldvt.LT.minmn ) THEN
369 info = -17
370 END IF
371 END IF
372 END IF
373 END IF
374*
375* Compute workspace
376* (Note: Comments in the code beginning "Workspace:" describe the
377* minimal amount of workspace needed at that point in the code,
378* as well as the preferred amount for good performance.
379* NB refers to the optimal block size for the immediately
380* following subroutine, as returned by ILAENV.)
381*
382 IF( info.EQ.0 ) THEN
383 minwrk = 1
384 maxwrk = 1
385 IF( minmn.GT.0 ) THEN
386 IF( m.GE.n ) THEN
387 mnthr = ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 )
388 IF( m.GE.mnthr ) THEN
389*
390* Path 1 (M much larger than N)
391*
392 maxwrk = n +
393 $ n*ilaenv( 1, 'DGEQRF', ' ', m, n, -1, -1 )
394 maxwrk = max( maxwrk, n*(n+5) + 2*n*
395 $ ilaenv( 1, 'DGEBRD', ' ', n, n, -1, -1 ) )
396 IF (wantu) THEN
397 maxwrk = max(maxwrk,n*(n*3+6)+n*
398 $ ilaenv( 1, 'DORMQR', ' ', n, n, -1, -1 ) )
399 END IF
400 IF (wantvt) THEN
401 maxwrk = max(maxwrk,n*(n*3+6)+n*
402 $ ilaenv( 1, 'DORMLQ', ' ', n, n, -1, -1 ) )
403 END IF
404 minwrk = n*(n*3+20)
405 ELSE
406*
407* Path 2 (M at least N, but not much larger)
408*
409 maxwrk = 4*n + ( m+n )*
410 $ ilaenv( 1, 'DGEBRD', ' ', m, n, -1, -1 )
411 IF (wantu) THEN
412 maxwrk = max(maxwrk,n*(n*2+5)+n*
413 $ ilaenv( 1, 'DORMQR', ' ', n, n, -1, -1 ) )
414 END IF
415 IF (wantvt) THEN
416 maxwrk = max(maxwrk,n*(n*2+5)+n*
417 $ ilaenv( 1, 'DORMLQ', ' ', n, n, -1, -1 ) )
418 END IF
419 minwrk = max(n*(n*2+19),4*n+m)
420 END IF
421 ELSE
422 mnthr = ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 )
423 IF( n.GE.mnthr ) THEN
424*
425* Path 1t (N much larger than M)
426*
427 maxwrk = m +
428 $ m*ilaenv( 1, 'DGELQF', ' ', m, n, -1, -1 )
429 maxwrk = max( maxwrk, m*(m+5) + 2*m*
430 $ ilaenv( 1, 'DGEBRD', ' ', m, m, -1, -1 ) )
431 IF (wantu) THEN
432 maxwrk = max(maxwrk,m*(m*3+6)+m*
433 $ ilaenv( 1, 'DORMQR', ' ', m, m, -1, -1 ) )
434 END IF
435 IF (wantvt) THEN
436 maxwrk = max(maxwrk,m*(m*3+6)+m*
437 $ ilaenv( 1, 'DORMLQ', ' ', m, m, -1, -1 ) )
438 END IF
439 minwrk = m*(m*3+20)
440 ELSE
441*
442* Path 2t (N at least M, but not much larger)
443*
444 maxwrk = 4*m + ( m+n )*
445 $ ilaenv( 1, 'DGEBRD', ' ', m, n, -1, -1 )
446 IF (wantu) THEN
447 maxwrk = max(maxwrk,m*(m*2+5)+m*
448 $ ilaenv( 1, 'DORMQR', ' ', m, m, -1, -1 ) )
449 END IF
450 IF (wantvt) THEN
451 maxwrk = max(maxwrk,m*(m*2+5)+m*
452 $ ilaenv( 1, 'DORMLQ', ' ', m, m, -1, -1 ) )
453 END IF
454 minwrk = max(m*(m*2+19),4*m+n)
455 END IF
456 END IF
457 END IF
458 maxwrk = max( maxwrk, minwrk )
459 work( 1 ) = dble( maxwrk )
460*
461 IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
462 info = -19
463 END IF
464 END IF
465*
466 IF( info.NE.0 ) THEN
467 CALL xerbla( 'DGESVDX', -info )
468 RETURN
469 ELSE IF( lquery ) THEN
470 RETURN
471 END IF
472*
473* Quick return if possible
474*
475 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
476 RETURN
477 END IF
478*
479* Set singular values indices accord to RANGE.
480*
481 IF( alls ) THEN
482 rngtgk = 'I'
483 iltgk = 1
484 iutgk = min( m, n )
485 ELSE IF( inds ) THEN
486 rngtgk = 'I'
487 iltgk = il
488 iutgk = iu
489 ELSE
490 rngtgk = 'V'
491 iltgk = 0
492 iutgk = 0
493 END IF
494*
495* Get machine constants
496*
497 eps = dlamch( 'P' )
498 smlnum = sqrt( dlamch( 'S' ) ) / eps
499 bignum = one / smlnum
500*
501* Scale A if max element outside range [SMLNUM,BIGNUM]
502*
503 anrm = dlange( 'M', m, n, a, lda, dum )
504 iscl = 0
505 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
506 iscl = 1
507 CALL dlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, info )
508 ELSE IF( anrm.GT.bignum ) THEN
509 iscl = 1
510 CALL dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, info )
511 END IF
512*
513 IF( m.GE.n ) THEN
514*
515* A has at least as many rows as columns. If A has sufficiently
516* more rows than columns, first reduce A using the QR
517* decomposition.
518*
519 IF( m.GE.mnthr ) THEN
520*
521* Path 1 (M much larger than N):
522* A = Q * R = Q * ( QB * B * PB**T )
523* = Q * ( QB * ( UB * S * VB**T ) * PB**T )
524* U = Q * QB * UB; V**T = VB**T * PB**T
525*
526* Compute A=Q*R
527* (Workspace: need 2*N, prefer N+N*NB)
528*
529 itau = 1
530 itemp = itau + n
531 CALL dgeqrf( m, n, a, lda, work( itau ), work( itemp ),
532 $ lwork-itemp+1, info )
533*
534* Copy R into WORK and bidiagonalize it:
535* (Workspace: need N*N+5*N, prefer N*N+4*N+2*N*NB)
536*
537 iqrf = itemp
538 id = iqrf + n*n
539 ie = id + n
540 itauq = ie + n
541 itaup = itauq + n
542 itemp = itaup + n
543 CALL dlacpy( 'U', n, n, a, lda, work( iqrf ), n )
544 CALL dlaset( 'L', n-1, n-1, zero, zero, work( iqrf+1 ), n )
545 CALL dgebrd( n, n, work( iqrf ), n, work( id ), work( ie ),
546 $ work( itauq ), work( itaup ), work( itemp ),
547 $ lwork-itemp+1, info )
548*
549* Solve eigenvalue problem TGK*Z=Z*S.
550* (Workspace: need 14*N + 2*N*(N+1))
551*
552 itgkz = itemp
553 itemp = itgkz + n*(n*2+1)
554 CALL dbdsvdx( 'U', jobz, rngtgk, n, work( id ), work( ie ),
555 $ vl, vu, iltgk, iutgk, ns, s, work( itgkz ),
556 $ n*2, work( itemp ), iwork, info)
557*
558* If needed, compute left singular vectors.
559*
560 IF( wantu ) THEN
561 j = itgkz
562 DO i = 1, ns
563 CALL dcopy( n, work( j ), 1, u( 1,i ), 1 )
564 j = j + n*2
565 END DO
566 CALL dlaset( 'A', m-n, ns, zero, zero, u( n+1,1 ), ldu )
567*
568* Call DORMBR to compute QB*UB.
569* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
570*
571 CALL dormbr( 'Q', 'L', 'N', n, ns, n, work( iqrf ), n,
572 $ work( itauq ), u, ldu, work( itemp ),
573 $ lwork-itemp+1, info )
574*
575* Call DORMQR to compute Q*(QB*UB).
576* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
577*
578 CALL dormqr( 'L', 'N', m, ns, n, a, lda,
579 $ work( itau ), u, ldu, work( itemp ),
580 $ lwork-itemp+1, info )
581 END IF
582*
583* If needed, compute right singular vectors.
584*
585 IF( wantvt) THEN
586 j = itgkz + n
587 DO i = 1, ns
588 CALL dcopy( n, work( j ), 1, vt( i,1 ), ldvt )
589 j = j + n*2
590 END DO
591*
592* Call DORMBR to compute VB**T * PB**T
593* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
594*
595 CALL dormbr( 'P', 'R', 'T', ns, n, n, work( iqrf ), n,
596 $ work( itaup ), vt, ldvt, work( itemp ),
597 $ lwork-itemp+1, info )
598 END IF
599 ELSE
600*
601* Path 2 (M at least N, but not much larger)
602* Reduce A to bidiagonal form without QR decomposition
603* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T
604* U = QB * UB; V**T = VB**T * PB**T
605*
606* Bidiagonalize A
607* (Workspace: need 4*N+M, prefer 4*N+(M+N)*NB)
608*
609 id = 1
610 ie = id + n
611 itauq = ie + n
612 itaup = itauq + n
613 itemp = itaup + n
614 CALL dgebrd( m, n, a, lda, work( id ), work( ie ),
615 $ work( itauq ), work( itaup ), work( itemp ),
616 $ lwork-itemp+1, info )
617*
618* Solve eigenvalue problem TGK*Z=Z*S.
619* (Workspace: need 14*N + 2*N*(N+1))
620*
621 itgkz = itemp
622 itemp = itgkz + n*(n*2+1)
623 CALL dbdsvdx( 'U', jobz, rngtgk, n, work( id ), work( ie ),
624 $ vl, vu, iltgk, iutgk, ns, s, work( itgkz ),
625 $ n*2, work( itemp ), iwork, info)
626*
627* If needed, compute left singular vectors.
628*
629 IF( wantu ) THEN
630 j = itgkz
631 DO i = 1, ns
632 CALL dcopy( n, work( j ), 1, u( 1,i ), 1 )
633 j = j + n*2
634 END DO
635 CALL dlaset( 'A', m-n, ns, zero, zero, u( n+1,1 ), ldu )
636*
637* Call DORMBR to compute QB*UB.
638* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
639*
640 CALL dormbr( 'Q', 'L', 'N', m, ns, n, a, lda,
641 $ work( itauq ), u, ldu, work( itemp ),
642 $ lwork-itemp+1, ierr )
643 END IF
644*
645* If needed, compute right singular vectors.
646*
647 IF( wantvt) THEN
648 j = itgkz + n
649 DO i = 1, ns
650 CALL dcopy( n, work( j ), 1, vt( i,1 ), ldvt )
651 j = j + n*2
652 END DO
653*
654* Call DORMBR to compute VB**T * PB**T
655* (Workspace in WORK( ITEMP ): need N, prefer N*NB)
656*
657 CALL dormbr( 'P', 'R', 'T', ns, n, n, a, lda,
658 $ work( itaup ), vt, ldvt, work( itemp ),
659 $ lwork-itemp+1, ierr )
660 END IF
661 END IF
662 ELSE
663*
664* A has more columns than rows. If A has sufficiently more
665* columns than rows, first reduce A using the LQ decomposition.
666*
667 IF( n.GE.mnthr ) THEN
668*
669* Path 1t (N much larger than M):
670* A = L * Q = ( QB * B * PB**T ) * Q
671* = ( QB * ( UB * S * VB**T ) * PB**T ) * Q
672* U = QB * UB ; V**T = VB**T * PB**T * Q
673*
674* Compute A=L*Q
675* (Workspace: need 2*M, prefer M+M*NB)
676*
677 itau = 1
678 itemp = itau + m
679 CALL dgelqf( m, n, a, lda, work( itau ), work( itemp ),
680 $ lwork-itemp+1, info )
681
682* Copy L into WORK and bidiagonalize it:
683* (Workspace in WORK( ITEMP ): need M*M+5*N, prefer M*M+4*M+2*M*NB)
684*
685 ilqf = itemp
686 id = ilqf + m*m
687 ie = id + m
688 itauq = ie + m
689 itaup = itauq + m
690 itemp = itaup + m
691 CALL dlacpy( 'L', m, m, a, lda, work( ilqf ), m )
692 CALL dlaset( 'U', m-1, m-1, zero, zero, work( ilqf+m ), m )
693 CALL dgebrd( m, m, work( ilqf ), m, work( id ), work( ie ),
694 $ work( itauq ), work( itaup ), work( itemp ),
695 $ lwork-itemp+1, info )
696*
697* Solve eigenvalue problem TGK*Z=Z*S.
698* (Workspace: need 2*M*M+14*M)
699*
700 itgkz = itemp
701 itemp = itgkz + m*(m*2+1)
702 CALL dbdsvdx( 'U', jobz, rngtgk, m, work( id ), work( ie ),
703 $ vl, vu, iltgk, iutgk, ns, s, work( itgkz ),
704 $ m*2, work( itemp ), iwork, info)
705*
706* If needed, compute left singular vectors.
707*
708 IF( wantu ) THEN
709 j = itgkz
710 DO i = 1, ns
711 CALL dcopy( m, work( j ), 1, u( 1,i ), 1 )
712 j = j + m*2
713 END DO
714*
715* Call DORMBR to compute QB*UB.
716* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
717*
718 CALL dormbr( 'Q', 'L', 'N', m, ns, m, work( ilqf ), m,
719 $ work( itauq ), u, ldu, work( itemp ),
720 $ lwork-itemp+1, info )
721 END IF
722*
723* If needed, compute right singular vectors.
724*
725 IF( wantvt) THEN
726 j = itgkz + m
727 DO i = 1, ns
728 CALL dcopy( m, work( j ), 1, vt( i,1 ), ldvt )
729 j = j + m*2
730 END DO
731 CALL dlaset( 'A', ns, n-m, zero, zero, vt( 1,m+1 ), ldvt)
732*
733* Call DORMBR to compute (VB**T)*(PB**T)
734* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
735*
736 CALL dormbr( 'P', 'R', 'T', ns, m, m, work( ilqf ), m,
737 $ work( itaup ), vt, ldvt, work( itemp ),
738 $ lwork-itemp+1, info )
739*
740* Call DORMLQ to compute ((VB**T)*(PB**T))*Q.
741* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
742*
743 CALL dormlq( 'R', 'N', ns, n, m, a, lda,
744 $ work( itau ), vt, ldvt, work( itemp ),
745 $ lwork-itemp+1, info )
746 END IF
747 ELSE
748*
749* Path 2t (N greater than M, but not much larger)
750* Reduce to bidiagonal form without LQ decomposition
751* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T
752* U = QB * UB; V**T = VB**T * PB**T
753*
754* Bidiagonalize A
755* (Workspace: need 4*M+N, prefer 4*M+(M+N)*NB)
756*
757 id = 1
758 ie = id + m
759 itauq = ie + m
760 itaup = itauq + m
761 itemp = itaup + m
762 CALL dgebrd( m, n, a, lda, work( id ), work( ie ),
763 $ work( itauq ), work( itaup ), work( itemp ),
764 $ lwork-itemp+1, info )
765*
766* Solve eigenvalue problem TGK*Z=Z*S.
767* (Workspace: need 2*M*M+14*M)
768*
769 itgkz = itemp
770 itemp = itgkz + m*(m*2+1)
771 CALL dbdsvdx( 'L', jobz, rngtgk, m, work( id ), work( ie ),
772 $ vl, vu, iltgk, iutgk, ns, s, work( itgkz ),
773 $ m*2, work( itemp ), iwork, info)
774*
775* If needed, compute left singular vectors.
776*
777 IF( wantu ) THEN
778 j = itgkz
779 DO i = 1, ns
780 CALL dcopy( m, work( j ), 1, u( 1,i ), 1 )
781 j = j + m*2
782 END DO
783*
784* Call DORMBR to compute QB*UB.
785* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
786*
787 CALL dormbr( 'Q', 'L', 'N', m, ns, n, a, lda,
788 $ work( itauq ), u, ldu, work( itemp ),
789 $ lwork-itemp+1, info )
790 END IF
791*
792* If needed, compute right singular vectors.
793*
794 IF( wantvt) THEN
795 j = itgkz + m
796 DO i = 1, ns
797 CALL dcopy( m, work( j ), 1, vt( i,1 ), ldvt )
798 j = j + m*2
799 END DO
800 CALL dlaset( 'A', ns, n-m, zero, zero, vt( 1,m+1 ), ldvt)
801*
802* Call DORMBR to compute VB**T * PB**T
803* (Workspace in WORK( ITEMP ): need M, prefer M*NB)
804*
805 CALL dormbr( 'P', 'R', 'T', ns, n, m, a, lda,
806 $ work( itaup ), vt, ldvt, work( itemp ),
807 $ lwork-itemp+1, info )
808 END IF
809 END IF
810 END IF
811*
812* Undo scaling if necessary
813*
814 IF( iscl.EQ.1 ) THEN
815 IF( anrm.GT.bignum )
816 $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1,
817 $ s, minmn, info )
818 IF( anrm.LT.smlnum )
819 $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1,
820 $ s, minmn, info )
821 END IF
822*
823* Return optimal workspace in WORK(1)
824*
825 work( 1 ) = dble( maxwrk )
826*
827 RETURN
828*
829* End of DGESVDX
830*
subroutine dbdsvdx(uplo, jobz, range, n, d, e, vl, vu, il, iu, ns, s, z, ldz, work, iwork, info)
DBDSVDX
Definition dbdsvdx.f:226
initmumps id

◆ dggsvd3()

subroutine dggsvd3 ( character jobu,
character jobv,
character jobq,
integer m,
integer n,
integer p,
integer k,
integer l,
double precision, dimension( lda, * ) a,
integer lda,
double precision, dimension( ldb, * ) b,
integer ldb,
double precision, dimension( * ) alpha,
double precision, dimension( * ) beta,
double precision, dimension( ldu, * ) u,
integer ldu,
double precision, dimension( ldv, * ) v,
integer ldv,
double precision, dimension( ldq, * ) q,
integer ldq,
double precision, dimension( * ) work,
integer lwork,
integer, dimension( * ) iwork,
integer info )

DGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices

Download DGGSVD3 + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> DGGSVD3 computes the generalized singular value decomposition (GSVD)
!> of an M-by-N real matrix A and P-by-N real matrix B:
!>
!>       U**T*A*Q = D1*( 0 R ),    V**T*B*Q = D2*( 0 R )
!>
!> where U, V and Q are orthogonal matrices.
!> Let K+L = the effective numerical rank of the matrix (A**T,B**T)**T,
!> then R is a K+L-by-K+L nonsingular upper triangular matrix, D1 and
!> D2 are M-by-(K+L) and P-by-(K+L)  matrices and of the
!> following structures, respectively:
!>
!> If M-K-L >= 0,
!>
!>                     K  L
!>        D1 =     K ( I  0 )
!>                 L ( 0  C )
!>             M-K-L ( 0  0 )
!>
!>                   K  L
!>        D2 =   L ( 0  S )
!>             P-L ( 0  0 )
!>
!>                 N-K-L  K    L
!>   ( 0 R ) = K (  0   R11  R12 )
!>             L (  0    0   R22 )
!>
!> where
!>
!>   C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
!>   S = diag( BETA(K+1),  ... , BETA(K+L) ),
!>   C**2 + S**2 = I.
!>
!>   R is stored in A(1:K+L,N-K-L+1:N) on exit.
!>
!> If M-K-L < 0,
!>
!>                   K M-K K+L-M
!>        D1 =   K ( I  0    0   )
!>             M-K ( 0  C    0   )
!>
!>                     K M-K K+L-M
!>        D2 =   M-K ( 0  S    0  )
!>             K+L-M ( 0  0    I  )
!>               P-L ( 0  0    0  )
!>
!>                    N-K-L  K   M-K  K+L-M
!>   ( 0 R ) =     K ( 0    R11  R12  R13  )
!>               M-K ( 0     0   R22  R23  )
!>             K+L-M ( 0     0    0   R33  )
!>
!> where
!>
!>   C = diag( ALPHA(K+1), ... , ALPHA(M) ),
!>   S = diag( BETA(K+1),  ... , BETA(M) ),
!>   C**2 + S**2 = I.
!>
!>   (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored
!>   ( 0  R22 R23 )
!>   in B(M-K+1:L,N+M-K-L+1:N) on exit.
!>
!> The routine computes C, S, R, and optionally the orthogonal
!> transformation matrices U, V and Q.
!>
!> In particular, if B is an N-by-N nonsingular matrix, then the GSVD of
!> A and B implicitly gives the SVD of A*inv(B):
!>                      A*inv(B) = U*(D1*inv(D2))*V**T.
!> If ( A**T,B**T)**T  has orthonormal columns, then the GSVD of A and B is
!> also equal to the CS decomposition of A and B. Furthermore, the GSVD
!> can be used to derive the solution of the eigenvalue problem:
!>                      A**T*A x = lambda* B**T*B x.
!> In some literature, the GSVD of A and B is presented in the form
!>                  U**T*A*X = ( 0 D1 ),   V**T*B*X = ( 0 D2 )
!> where U and V are orthogonal and X is nonsingular, D1 and D2 are
!> ``diagonal''.  The former GSVD form can be converted to the latter
!> form by taking the nonsingular matrix X as
!>
!>                      X = Q*( I   0    )
!>                            ( 0 inv(R) ).
!> 
Parameters
[in]JOBU
!>          JOBU is CHARACTER*1
!>          = 'U':  Orthogonal matrix U is computed;
!>          = 'N':  U is not computed.
!> 
[in]JOBV
!>          JOBV is CHARACTER*1
!>          = 'V':  Orthogonal matrix V is computed;
!>          = 'N':  V is not computed.
!> 
[in]JOBQ
!>          JOBQ is CHARACTER*1
!>          = 'Q':  Orthogonal matrix Q is computed;
!>          = 'N':  Q is not computed.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the matrices A and B.  N >= 0.
!> 
[in]P
!>          P is INTEGER
!>          The number of rows of the matrix B.  P >= 0.
!> 
[out]K
!>          K is INTEGER
!> 
[out]L
!>          L is INTEGER
!>
!>          On exit, K and L specify the dimension of the subblocks
!>          described in Purpose.
!>          K + L = effective numerical rank of (A**T,B**T)**T.
!> 
[in,out]A
!>          A is DOUBLE PRECISION array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!>          On exit, A contains the triangular matrix R, or part of R.
!>          See Purpose for details.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A. LDA >= max(1,M).
!> 
[in,out]B
!>          B is DOUBLE PRECISION array, dimension (LDB,N)
!>          On entry, the P-by-N matrix B.
!>          On exit, B contains the triangular matrix R if M-K-L < 0.
!>          See Purpose for details.
!> 
[in]LDB
!>          LDB is INTEGER
!>          The leading dimension of the array B. LDB >= max(1,P).
!> 
[out]ALPHA
!>          ALPHA is DOUBLE PRECISION array, dimension (N)
!> 
[out]BETA
!>          BETA is DOUBLE PRECISION array, dimension (N)
!>
!>          On exit, ALPHA and BETA contain the generalized singular
!>          value pairs of A and B;
!>            ALPHA(1:K) = 1,
!>            BETA(1:K)  = 0,
!>          and if M-K-L >= 0,
!>            ALPHA(K+1:K+L) = C,
!>            BETA(K+1:K+L)  = S,
!>          or if M-K-L < 0,
!>            ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0
!>            BETA(K+1:M) =S, BETA(M+1:K+L) =1
!>          and
!>            ALPHA(K+L+1:N) = 0
!>            BETA(K+L+1:N)  = 0
!> 
[out]U
!>          U is DOUBLE PRECISION array, dimension (LDU,M)
!>          If JOBU = 'U', U contains the M-by-M orthogonal matrix U.
!>          If JOBU = 'N', U is not referenced.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U. LDU >= max(1,M) if
!>          JOBU = 'U'; LDU >= 1 otherwise.
!> 
[out]V
!>          V is DOUBLE PRECISION array, dimension (LDV,P)
!>          If JOBV = 'V', V contains the P-by-P orthogonal matrix V.
!>          If JOBV = 'N', V is not referenced.
!> 
[in]LDV
!>          LDV is INTEGER
!>          The leading dimension of the array V. LDV >= max(1,P) if
!>          JOBV = 'V'; LDV >= 1 otherwise.
!> 
[out]Q
!>          Q is DOUBLE PRECISION array, dimension (LDQ,N)
!>          If JOBQ = 'Q', Q contains the N-by-N orthogonal matrix Q.
!>          If JOBQ = 'N', Q is not referenced.
!> 
[in]LDQ
!>          LDQ is INTEGER
!>          The leading dimension of the array Q. LDQ >= max(1,N) if
!>          JOBQ = 'Q'; LDQ >= 1 otherwise.
!> 
[out]WORK
!>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK.
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]IWORK
!>          IWORK is INTEGER array, dimension (N)
!>          On exit, IWORK stores the sorting information. More
!>          precisely, the following loop will sort ALPHA
!>             for I = K+1, min(M,K+L)
!>                 swap ALPHA(I) and ALPHA(IWORK(I))
!>             endfor
!>          such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N).
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit.
!>          < 0:  if INFO = -i, the i-th argument had an illegal value.
!>          > 0:  if INFO = 1, the Jacobi-type procedure failed to
!>                converge.  For further details, see subroutine DTGSJA.
!> 
Internal Parameters:
!>  TOLA    DOUBLE PRECISION
!>  TOLB    DOUBLE PRECISION
!>          TOLA and TOLB are the thresholds to determine the effective
!>          rank of (A**T,B**T)**T. Generally, they are set to
!>                   TOLA = MAX(M,N)*norm(A)*MACHEPS,
!>                   TOLB = MAX(P,N)*norm(B)*MACHEPS.
!>          The size of TOLA and TOLB may affect the size of backward
!>          errors of the decomposition.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Contributors:
Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA
Further Details:
DGGSVD3 replaces the deprecated subroutine DGGSVD.

Definition at line 346 of file dggsvd3.f.

349*
350* -- LAPACK driver routine --
351* -- LAPACK is a software package provided by Univ. of Tennessee, --
352* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
353*
354* .. Scalar Arguments ..
355 CHARACTER JOBQ, JOBU, JOBV
356 INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P,
357 $ LWORK
358* ..
359* .. Array Arguments ..
360 INTEGER IWORK( * )
361 DOUBLE PRECISION A( LDA, * ), ALPHA( * ), B( LDB, * ),
362 $ BETA( * ), Q( LDQ, * ), U( LDU, * ),
363 $ V( LDV, * ), WORK( * )
364* ..
365*
366* =====================================================================
367*
368* .. Local Scalars ..
369 LOGICAL WANTQ, WANTU, WANTV, LQUERY
370 INTEGER I, IBND, ISUB, J, NCYCLE, LWKOPT
371 DOUBLE PRECISION ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL
372* ..
373* .. External Functions ..
374 LOGICAL LSAME
375 DOUBLE PRECISION DLAMCH, DLANGE
376 EXTERNAL lsame, dlamch, dlange
377* ..
378* .. External Subroutines ..
379 EXTERNAL dcopy, dggsvp3, dtgsja, xerbla
380* ..
381* .. Intrinsic Functions ..
382 INTRINSIC max, min
383* ..
384* .. Executable Statements ..
385*
386* Decode and test the input parameters
387*
388 wantu = lsame( jobu, 'U' )
389 wantv = lsame( jobv, 'V' )
390 wantq = lsame( jobq, 'Q' )
391 lquery = ( lwork.EQ.-1 )
392 lwkopt = 1
393*
394* Test the input arguments
395*
396 info = 0
397 IF( .NOT.( wantu .OR. lsame( jobu, 'N' ) ) ) THEN
398 info = -1
399 ELSE IF( .NOT.( wantv .OR. lsame( jobv, 'N' ) ) ) THEN
400 info = -2
401 ELSE IF( .NOT.( wantq .OR. lsame( jobq, 'N' ) ) ) THEN
402 info = -3
403 ELSE IF( m.LT.0 ) THEN
404 info = -4
405 ELSE IF( n.LT.0 ) THEN
406 info = -5
407 ELSE IF( p.LT.0 ) THEN
408 info = -6
409 ELSE IF( lda.LT.max( 1, m ) ) THEN
410 info = -10
411 ELSE IF( ldb.LT.max( 1, p ) ) THEN
412 info = -12
413 ELSE IF( ldu.LT.1 .OR. ( wantu .AND. ldu.LT.m ) ) THEN
414 info = -16
415 ELSE IF( ldv.LT.1 .OR. ( wantv .AND. ldv.LT.p ) ) THEN
416 info = -18
417 ELSE IF( ldq.LT.1 .OR. ( wantq .AND. ldq.LT.n ) ) THEN
418 info = -20
419 ELSE IF( lwork.LT.1 .AND. .NOT.lquery ) THEN
420 info = -24
421 END IF
422*
423* Compute workspace
424*
425 IF( info.EQ.0 ) THEN
426 CALL dggsvp3( jobu, jobv, jobq, m, p, n, a, lda, b, ldb, tola,
427 $ tolb, k, l, u, ldu, v, ldv, q, ldq, iwork, work,
428 $ work, -1, info )
429 lwkopt = n + int( work( 1 ) )
430 lwkopt = max( 2*n, lwkopt )
431 lwkopt = max( 1, lwkopt )
432 work( 1 ) = dble( lwkopt )
433 END IF
434*
435 IF( info.NE.0 ) THEN
436 CALL xerbla( 'DGGSVD3', -info )
437 RETURN
438 END IF
439 IF( lquery ) THEN
440 RETURN
441 ENDIF
442*
443* Compute the Frobenius norm of matrices A and B
444*
445 anorm = dlange( '1', m, n, a, lda, work )
446 bnorm = dlange( '1', p, n, b, ldb, work )
447*
448* Get machine precision and set up threshold for determining
449* the effective numerical rank of the matrices A and B.
450*
451 ulp = dlamch( 'Precision' )
452 unfl = dlamch( 'Safe Minimum' )
453 tola = max( m, n )*max( anorm, unfl )*ulp
454 tolb = max( p, n )*max( bnorm, unfl )*ulp
455*
456* Preprocessing
457*
458 CALL dggsvp3( jobu, jobv, jobq, m, p, n, a, lda, b, ldb, tola,
459 $ tolb, k, l, u, ldu, v, ldv, q, ldq, iwork, work,
460 $ work( n+1 ), lwork-n, info )
461*
462* Compute the GSVD of two upper "triangular" matrices
463*
464 CALL dtgsja( jobu, jobv, jobq, m, p, n, k, l, a, lda, b, ldb,
465 $ tola, tolb, alpha, beta, u, ldu, v, ldv, q, ldq,
466 $ work, ncycle, info )
467*
468* Sort the singular values and store the pivot indices in IWORK
469* Copy ALPHA to WORK, then sort ALPHA in WORK
470*
471 CALL dcopy( n, alpha, 1, work, 1 )
472 ibnd = min( l, m-k )
473 DO 20 i = 1, ibnd
474*
475* Scan for largest ALPHA(K+I)
476*
477 isub = i
478 smax = work( k+i )
479 DO 10 j = i + 1, ibnd
480 temp = work( k+j )
481 IF( temp.GT.smax ) THEN
482 isub = j
483 smax = temp
484 END IF
485 10 CONTINUE
486 IF( isub.NE.i ) THEN
487 work( k+isub ) = work( k+i )
488 work( k+i ) = smax
489 iwork( k+i ) = k + isub
490 ELSE
491 iwork( k+i ) = k + i
492 END IF
493 20 CONTINUE
494*
495 work( 1 ) = dble( lwkopt )
496 RETURN
497*
498* End of DGGSVD3
499*
#define alpha
Definition eval.h:35
subroutine dtgsja(jobu, jobv, jobq, m, p, n, k, l, a, lda, b, ldb, tola, tolb, alpha, beta, u, ldu, v, ldv, q, ldq, work, ncycle, info)
DTGSJA
Definition dtgsja.f:378
subroutine dggsvp3(jobu, jobv, jobq, m, p, n, a, lda, b, ldb, tola, tolb, k, l, u, ldu, v, ldv, q, ldq, iwork, tau, work, lwork, info)
DGGSVP3
Definition dggsvp3.f:272