dgesdd_8f_source.html

*> \brief \b DGESDD

*

*  =========== DOCUMENTATION ===========

*

* Online html documentation available at

*            http://www.netlib.org/lapack/explore-html/

*

*> \htmlonly

*> Download DGESDD + dependencies

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesdd.f">

*> [TGZ]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesdd.f">

*> [ZIP]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesdd.f">

*> [TXT]</a>

*> \endhtmlonly

*

*  Definition:

*  ===========

*

*       SUBROUTINE DGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,

*                          WORK, LWORK, IWORK, INFO )

*

*       .. Scalar Arguments ..

*       CHARACTER          JOBZ

*       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N

*       ..

*       .. Array Arguments ..

*       INTEGER            IWORK( * )

*       DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),

*      $                   VT( LDVT, * ), WORK( * )

*       ..

*

*

*> \par Purpose:

*  =============

*>

*> \verbatim

*>

*> DGESDD computes the singular value decomposition (SVD) of a real

*> M-by-N matrix A, optionally computing the left and right singular

*> vectors.  If singular vectors are desired, it uses a

*> divide-and-conquer algorithm.

*>

*> The SVD is written

*>

*>      A = U * SIGMA * transpose(V)

*>

*> where SIGMA is an M-by-N matrix which is zero except for its

*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and

*> V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA

*> are the singular values of A; they are real and non-negative, and

*> are returned in descending order.  The first min(m,n) columns of

*> U and V are the left and right singular vectors of A.

*>

*> Note that the routine returns VT = V**T, not V.

*>

*> The divide and conquer algorithm makes very mild assumptions about

*> floating point arithmetic. It will work on machines with a guard

*> digit in add/subtract, or on those binary machines without guard

*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or

*> Cray-2. It could conceivably fail on hexadecimal or decimal machines

*> without guard digits, but we know of none.

*> \endverbatim

*

*  Arguments:

*  ==========

*

*> \param[in] JOBZ

*> \verbatim

*>          JOBZ is CHARACTER*1

*>          Specifies options for computing all or part of the matrix U:

*>          = 'A':  all M columns of U and all N rows of V**T are

*>                  returned in the arrays U and VT;

*>          = 'S':  the first min(M,N) columns of U and the first

*>                  min(M,N) rows of V**T are returned in the arrays U

*>                  and VT;

*>          = 'O':  If M >= N, the first N columns of U are overwritten

*>                  on the array A and all rows of V**T are returned in

*>                  the array VT;

*>                  otherwise, all columns of U are returned in the

*>                  array U and the first M rows of V**T are overwritten

*>                  in the array A;

*>          = 'N':  no columns of U or rows of V**T are computed.

*> \endverbatim

*>

*> \param[in] M

*> \verbatim

*>          M is INTEGER

*>          The number of rows of the input matrix A.  M >= 0.

*> \endverbatim

*>

*> \param[in] N

*> \verbatim

*>          N is INTEGER

*>          The number of columns of the input matrix A.  N >= 0.

*> \endverbatim

*>

*> \param[in,out] A

*> \verbatim

*>          A is DOUBLE PRECISION array, dimension (LDA,N)

*>          On entry, the M-by-N matrix A.

*>          On exit,

*>          if JOBZ = 'O',  A is overwritten with the first N columns

*>                          of U (the left singular vectors, stored

*>                          columnwise) if M >= N;

*>                          A is overwritten with the first M rows

*>                          of V**T (the right singular vectors, stored

*>                          rowwise) otherwise.

*>          if JOBZ .ne. 'O', the contents of A are destroyed.

*> \endverbatim

*>

*> \param[in] LDA

*> \verbatim

*>          LDA is INTEGER

*>          The leading dimension of the array A.  LDA >= max(1,M).

*> \endverbatim

*>

*> \param[out] S

*> \verbatim

*>          S is DOUBLE PRECISION array, dimension (min(M,N))

*>          The singular values of A, sorted so that S(i) >= S(i+1).

*> \endverbatim

*>

*> \param[out] U

*> \verbatim

*>          U is DOUBLE PRECISION array, dimension (LDU,UCOL)

*>          UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;

*>          UCOL = min(M,N) if JOBZ = 'S'.

*>          If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M

*>          orthogonal matrix U;

*>          if JOBZ = 'S', U contains the first min(M,N) columns of U

*>          (the left singular vectors, stored columnwise);

*>          if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.

*> \endverbatim

*>

*> \param[in] LDU

*> \verbatim

*>          LDU is INTEGER

*>          The leading dimension of the array U.  LDU >= 1; if

*>          JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.

*> \endverbatim

*>

*> \param[out] VT

*> \verbatim

*>          VT is DOUBLE PRECISION array, dimension (LDVT,N)

*>          If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the

*>          N-by-N orthogonal matrix V**T;

*>          if JOBZ = 'S', VT contains the first min(M,N) rows of

*>          V**T (the right singular vectors, stored rowwise);

*>          if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.

*> \endverbatim

*>

*> \param[in] LDVT

*> \verbatim

*>          LDVT is INTEGER

*>          The leading dimension of the array VT.  LDVT >= 1;

*>          if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;

*>          if JOBZ = 'S', LDVT >= min(M,N).

*> \endverbatim

*>

*> \param[out] WORK

*> \verbatim

*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))

*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;

*> \endverbatim

*>

*> \param[in] LWORK

*> \verbatim

*>          LWORK is INTEGER

*>          The dimension of the array WORK. LWORK >= 1.

*>          If LWORK = -1, a workspace query is assumed.  The optimal

*>          size for the WORK array is calculated and stored in WORK(1),

*>          and no other work except argument checking is performed.

*>

*>          Let mx = max(M,N) and mn = min(M,N).

*>          If JOBZ = 'N', LWORK >= 3*mn + max( mx, 7*mn ).

*>          If JOBZ = 'O', LWORK >= 3*mn + max( mx, 5*mn*mn + 4*mn ).

*>          If JOBZ = 'S', LWORK >= 4*mn*mn + 7*mn.

*>          If JOBZ = 'A', LWORK >= 4*mn*mn + 6*mn + mx.

*>          These are not tight minimums in all cases; see comments inside code.

*>          For good performance, LWORK should generally be larger;

*>          a query is recommended.

*> \endverbatim

*>

*> \param[out] IWORK

*> \verbatim

*>          IWORK is INTEGER array, dimension (8*min(M,N))

*> \endverbatim

*>

*> \param[out] INFO

*> \verbatim

*>          INFO is INTEGER

*>          <  0:  if INFO = -i, the i-th argument had an illegal value.

*>          = -4:  if A had a NAN entry.

*>          >  0:  DBDSDC did not converge, updating process failed.

*>          =  0:  successful exit.

*> \endverbatim

*

*  Authors:

*  ========

*

*> \author Univ. of Tennessee

*> \author Univ. of California Berkeley

*> \author Univ. of Colorado Denver

*> \author NAG Ltd.

*

*> \ingroup doubleGEsing

*

*> \par Contributors:

*  ==================

*>

*>     Ming Gu and Huan Ren, Computer Science Division, University of

*>     California at Berkeley, USA

*>

*  =====================================================================


      SUBROUTINE dgesdd( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,

     $                   WORK, LWORK, IWORK, INFO )

      implicit none

*

*  -- LAPACK driver routine --

*  -- LAPACK is a software package provided by Univ. of Tennessee,    --

*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

*

*     .. Scalar Arguments ..

      CHARACTER          JOBZ

      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N

*     ..

*     .. Array Arguments ..

      INTEGER            IWORK( * )

      DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),

     $                   vt( ldvt, * ), work( * )

*     ..

*

*  =====================================================================

*

*     .. Parameters ..

      DOUBLE PRECISION   ZERO, ONE

      parameter( zero = 0.0d0, one = 1.0d0 )

*     ..

*     .. Local Scalars ..

      LOGICAL            LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS

      INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IL,

     $                   ir, iscl, itau, itaup, itauq, iu, ivt, ldwkvt,

     $                   ldwrkl, ldwrkr, ldwrku, maxwrk, minmn, minwrk,

     $                   mnthr, nwork, wrkbl

      INTEGER            LWORK_DGEBRD_MN, LWORK_DGEBRD_MM,

     $                   lwork_dgebrd_nn, lwork_dgelqf_mn,

     $                   lwork_dgeqrf_mn,

     $                   lwork_dorgbr_p_mm, lwork_dorgbr_q_nn,

     $                   lwork_dorglq_mn, lwork_dorglq_nn,

     $                   lwork_dorgqr_mm, lwork_dorgqr_mn,

     $                   lwork_dormbr_prt_mm, lwork_dormbr_qln_mm,

     $                   lwork_dormbr_prt_mn, lwork_dormbr_qln_mn,

     $                   lwork_dormbr_prt_nn, lwork_dormbr_qln_nn

      DOUBLE PRECISION   ANRM, BIGNUM, EPS, SMLNUM

*     ..

*     .. Local Arrays ..

      INTEGER            IDUM( 1 )

      DOUBLE PRECISION   DUM( 1 )

*     ..

*     .. External Subroutines ..

      EXTERNAL           dbdsdc, dgebrd, dgelqf, dgemm, dgeqrf, dlacpy,

     $                   dlascl, dlaset, dorgbr, dorglq, dorgqr, dormbr,

     $                   xerbla

*     ..

*     .. External Functions ..

      LOGICAL            LSAME, DISNAN

      DOUBLE PRECISION   DLAMCH, DLANGE, DROUNDUP_LWORK

      EXTERNAL           dlamch, dlange, lsame, disnan,

     $                   droundup_lwork

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          int, max, min, sqrt

*     ..

*     .. Executable Statements ..

*

*     Test the input arguments

*

      info   = 0

      minmn  = min( m, n )

      wntqa  = lsame( jobz, 'A' )

      wntqs  = lsame( jobz, 'S' )

      wntqas = wntqa .OR. wntqs

      wntqo  = lsame( jobz, 'O' )

      wntqn  = lsame( jobz, 'N' )

      lquery = ( lwork.EQ.-1 )

*

      IF( .NOT.( wntqa .OR. wntqs .OR. wntqo .OR. wntqn ) ) THEN

         info = -1

      ELSE IF( m.LT.0 ) THEN

         info = -2

      ELSE IF( n.LT.0 ) THEN

         info = -3

      ELSE IF( lda.LT.max( 1, m ) ) THEN

         info = -5

      ELSE IF( ldu.LT.1 .OR. ( wntqas .AND. ldu.LT.m ) .OR.

     $         ( wntqo .AND. m.LT.n .AND. ldu.LT.m ) ) THEN

         info = -8

      ELSE IF( ldvt.LT.1 .OR. ( wntqa .AND. ldvt.LT.n ) .OR.

     $         ( wntqs .AND. ldvt.LT.minmn ) .OR.

     $         ( wntqo .AND. m.GE.n .AND. ldvt.LT.n ) ) THEN

         info = -10

      END IF

*

*     Compute workspace

*       Note: Comments in the code beginning "Workspace:" describe the

*       minimal amount of workspace allocated at that point in the code,

*       as well as the preferred amount for good performance.

*       NB refers to the optimal block size for the immediately

*       following subroutine, as returned by ILAENV.

*

      IF( info.EQ.0 ) THEN

         minwrk = 1

         maxwrk = 1

         bdspac = 0

         mnthr  = int( minmn*11.0d0 / 6.0d0 )

         IF( m.GE.n .AND. minmn.GT.0 ) THEN

*

*           Compute space needed for DBDSDC

*

            IF( wntqn ) THEN

*              dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)

*              keep 7*N for backwards compatibility.

               bdspac = 7*n

            ELSE

               bdspac = 3*n*n + 4*n

            END IF

*

*           Compute space preferred for each routine

            CALL dgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),

     $                   dum(1), dum(1), -1, ierr )

            lwork_dgebrd_mn = int( dum(1) )

*

            CALL dgebrd( n, n, dum(1), n, dum(1), dum(1), dum(1),

     $                   dum(1), dum(1), -1, ierr )

            lwork_dgebrd_nn = int( dum(1) )

*

            CALL dgeqrf( m, n, dum(1), m, dum(1), dum(1), -1, ierr )

            lwork_dgeqrf_mn = int( dum(1) )

*

            CALL dorgbr( 'Q', n, n, n, dum(1), n, dum(1), dum(1), -1,

     $                   ierr )

            lwork_dorgbr_q_nn = int( dum(1) )

*

            CALL dorgqr( m, m, n, dum(1), m, dum(1), dum(1), -1, ierr )

            lwork_dorgqr_mm = int( dum(1) )

*

            CALL dorgqr( m, n, n, dum(1), m, dum(1), dum(1), -1, ierr )

            lwork_dorgqr_mn = int( dum(1) )

*

            CALL dormbr( 'P', 'R', 'T', n, n, n, dum(1), n,

     $                   dum(1), dum(1), n, dum(1), -1, ierr )

            lwork_dormbr_prt_nn = int( dum(1) )

*

            CALL dormbr( 'Q', 'L', 'N', n, n, n, dum(1), n,

     $                   dum(1), dum(1), n, dum(1), -1, ierr )

            lwork_dormbr_qln_nn = int( dum(1) )

*

            CALL dormbr( 'Q', 'L', 'N', m, n, n, dum(1), m,

     $                   dum(1), dum(1), m, dum(1), -1, ierr )

            lwork_dormbr_qln_mn = int( dum(1) )

*

            CALL dormbr( 'Q', 'L', 'N', m, m, n, dum(1), m,

     $                   dum(1), dum(1), m, dum(1), -1, ierr )

            lwork_dormbr_qln_mm = int( dum(1) )

*

            IF( m.GE.mnthr ) THEN

               IF( wntqn ) THEN

*

*                 Path 1 (M >> N, JOBZ='N')

*

                  wrkbl = n + lwork_dgeqrf_mn

                  wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )

                  maxwrk = max( wrkbl, bdspac + n )

                  minwrk = bdspac + n

               ELSE IF( wntqo ) THEN

*

*                 Path 2 (M >> N, JOBZ='O')

*

                  wrkbl = n + lwork_dgeqrf_mn

                  wrkbl = max( wrkbl,   n + lwork_dorgqr_mn )

                  wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )

                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn )

                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )

                  wrkbl = max( wrkbl, 3*n + bdspac )

                  maxwrk = wrkbl + 2*n*n

                  minwrk = bdspac + 2*n*n + 3*n

               ELSE IF( wntqs ) THEN

*

*                 Path 3 (M >> N, JOBZ='S')

*

                  wrkbl = n + lwork_dgeqrf_mn

                  wrkbl = max( wrkbl,   n + lwork_dorgqr_mn )

                  wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )

                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn )

                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )

                  wrkbl = max( wrkbl, 3*n + bdspac )

                  maxwrk = wrkbl + n*n

                  minwrk = bdspac + n*n + 3*n

               ELSE IF( wntqa ) THEN

*

*                 Path 4 (M >> N, JOBZ='A')

*

                  wrkbl = n + lwork_dgeqrf_mn

                  wrkbl = max( wrkbl,   n + lwork_dorgqr_mm )

                  wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )

                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn )

                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )

                  wrkbl = max( wrkbl, 3*n + bdspac )

                  maxwrk = wrkbl + n*n

                  minwrk = n*n + max( 3*n + bdspac, n + m )

               END IF

            ELSE

*

*              Path 5 (M >= N, but not much larger)

*

               wrkbl = 3*n + lwork_dgebrd_mn

               IF( wntqn ) THEN

*                 Path 5n (M >= N, jobz='N')

                  maxwrk = max( wrkbl, 3*n + bdspac )

                  minwrk = 3*n + max( m, bdspac )

               ELSE IF( wntqo ) THEN

*                 Path 5o (M >= N, jobz='O')

                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )

                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mn )

                  wrkbl = max( wrkbl, 3*n + bdspac )

                  maxwrk = wrkbl + m*n

                  minwrk = 3*n + max( m, n*n + bdspac )

               ELSE IF( wntqs ) THEN

*                 Path 5s (M >= N, jobz='S')

                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mn )

                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )

                  maxwrk = max( wrkbl, 3*n + bdspac )

                  minwrk = 3*n + max( m, bdspac )

               ELSE IF( wntqa ) THEN

*                 Path 5a (M >= N, jobz='A')

                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mm )

                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )

                  maxwrk = max( wrkbl, 3*n + bdspac )

                  minwrk = 3*n + max( m, bdspac )

               END IF

            END IF

         ELSE IF( minmn.GT.0 ) THEN

*

*           Compute space needed for DBDSDC

*

            IF( wntqn ) THEN

*              dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)

*              keep 7*N for backwards compatibility.

               bdspac = 7*m

            ELSE

               bdspac = 3*m*m + 4*m

            END IF

*

*           Compute space preferred for each routine

            CALL dgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),

     $                   dum(1), dum(1), -1, ierr )

            lwork_dgebrd_mn = int( dum(1) )

*

            CALL dgebrd( m, m, a, m, s, dum(1), dum(1),

     $                   dum(1), dum(1), -1, ierr )

            lwork_dgebrd_mm = int( dum(1) )

*

            CALL dgelqf( m, n, a, m, dum(1), dum(1), -1, ierr )

            lwork_dgelqf_mn = int( dum(1) )

*

            CALL dorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )

            lwork_dorglq_nn = int( dum(1) )

*

            CALL dorglq( m, n, m, a, m, dum(1), dum(1), -1, ierr )

            lwork_dorglq_mn = int( dum(1) )

*

            CALL dorgbr( 'P', m, m, m, a, n, dum(1), dum(1), -1, ierr )

            lwork_dorgbr_p_mm = int( dum(1) )

*

            CALL dormbr( 'P', 'R', 'T', m, m, m, dum(1), m,

     $                   dum(1), dum(1), m, dum(1), -1, ierr )

            lwork_dormbr_prt_mm = int( dum(1) )

*

            CALL dormbr( 'P', 'R', 'T', m, n, m, dum(1), m,

     $                   dum(1), dum(1), m, dum(1), -1, ierr )

            lwork_dormbr_prt_mn = int( dum(1) )

*

            CALL dormbr( 'P', 'R', 'T', n, n, m, dum(1), n,

     $                   dum(1), dum(1), n, dum(1), -1, ierr )

            lwork_dormbr_prt_nn = int( dum(1) )

*

            CALL dormbr( 'Q', 'L', 'N', m, m, m, dum(1), m,

     $                   dum(1), dum(1), m, dum(1), -1, ierr )

            lwork_dormbr_qln_mm = int( dum(1) )

*

            IF( n.GE.mnthr ) THEN

               IF( wntqn ) THEN

*

*                 Path 1t (N >> M, JOBZ='N')

*

                  wrkbl = m + lwork_dgelqf_mn

                  wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )

                  maxwrk = max( wrkbl, bdspac + m )

                  minwrk = bdspac + m

               ELSE IF( wntqo ) THEN

*

*                 Path 2t (N >> M, JOBZ='O')

*

                  wrkbl = m + lwork_dgelqf_mn

                  wrkbl = max( wrkbl,   m + lwork_dorglq_mn )

                  wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )

                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )

                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm )

                  wrkbl = max( wrkbl, 3*m + bdspac )

                  maxwrk = wrkbl + 2*m*m

                  minwrk = bdspac + 2*m*m + 3*m

               ELSE IF( wntqs ) THEN

*

*                 Path 3t (N >> M, JOBZ='S')

*

                  wrkbl = m + lwork_dgelqf_mn

                  wrkbl = max( wrkbl,   m + lwork_dorglq_mn )

                  wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )

                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )

                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm )

                  wrkbl = max( wrkbl, 3*m + bdspac )

                  maxwrk = wrkbl + m*m

                  minwrk = bdspac + m*m + 3*m

               ELSE IF( wntqa ) THEN

*

*                 Path 4t (N >> M, JOBZ='A')

*

                  wrkbl = m + lwork_dgelqf_mn

                  wrkbl = max( wrkbl,   m + lwork_dorglq_nn )

                  wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )

                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )

                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm )

                  wrkbl = max( wrkbl, 3*m + bdspac )

                  maxwrk = wrkbl + m*m

                  minwrk = m*m + max( 3*m + bdspac, m + n )

               END IF

            ELSE

*

*              Path 5t (N > M, but not much larger)

*

               wrkbl = 3*m + lwork_dgebrd_mn

               IF( wntqn ) THEN

*                 Path 5tn (N > M, jobz='N')

                  maxwrk = max( wrkbl, 3*m + bdspac )

                  minwrk = 3*m + max( n, bdspac )

               ELSE IF( wntqo ) THEN

*                 Path 5to (N > M, jobz='O')

                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )

                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mn )

                  wrkbl = max( wrkbl, 3*m + bdspac )

                  maxwrk = wrkbl + m*n

                  minwrk = 3*m + max( n, m*m + bdspac )

               ELSE IF( wntqs ) THEN

*                 Path 5ts (N > M, jobz='S')

                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )

                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mn )

                  maxwrk = max( wrkbl, 3*m + bdspac )

                  minwrk = 3*m + max( n, bdspac )

               ELSE IF( wntqa ) THEN

*                 Path 5ta (N > M, jobz='A')

                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )

                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_nn )

                  maxwrk = max( wrkbl, 3*m + bdspac )

                  minwrk = 3*m + max( n, bdspac )

               END IF

            END IF

         END IF


         maxwrk = max( maxwrk, minwrk )

         work( 1 ) = droundup_lwork( maxwrk )

*

         IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN

            info = -12

         END IF

      END IF

*

      IF( info.NE.0 ) THEN

         CALL xerbla( 'DGESDD', -info )

         RETURN

      ELSE IF( lquery ) THEN

         RETURN

      END IF

*

*     Quick return if possible

*

      IF( m.EQ.0 .OR. n.EQ.0 ) THEN

         RETURN

      END IF

*

*     Get machine constants

*

      eps = dlamch( 'P' )

      smlnum = sqrt( dlamch( 'S' ) ) / eps

      bignum = one / smlnum

*

*     Scale A if max element outside range [SMLNUM,BIGNUM]

*

      anrm = dlange( 'M', m, n, a, lda, dum )

      IF( disnan( anrm ) ) THEN

          info = -4

          RETURN

      END IF

      iscl = 0

      IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN

         iscl = 1

         CALL dlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )

      ELSE IF( anrm.GT.bignum ) THEN

         iscl = 1

         CALL dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )

      END IF

*

      IF( m.GE.n ) THEN

*

*        A has at least as many rows as columns. If A has sufficiently

*        more rows than columns, first reduce using the QR

*        decomposition (if sufficient workspace available)

*

         IF( m.GE.mnthr ) THEN

*

            IF( wntqn ) THEN

*

*              Path 1 (M >> N, JOBZ='N')

*              No singular vectors to be computed

*

               itau = 1

               nwork = itau + n

*

*              Compute A=Q*R

*              Workspace: need   N [tau] + N    [work]

*              Workspace: prefer N [tau] + N*NB [work]

*

               CALL dgeqrf( m, n, a, lda, work( itau ), work( nwork ),

     $                      lwork - nwork + 1, ierr )

*

*              Zero out below R

*

               CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )

               ie = 1

               itauq = ie + n

               itaup = itauq + n

               nwork = itaup + n

*

*              Bidiagonalize R in A

*              Workspace: need   3*N [e, tauq, taup] + N      [work]

*              Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work]

*

               CALL dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),

     $                      work( itaup ), work( nwork ), lwork-nwork+1,

     $                      ierr )

               nwork = ie + n

*

*              Perform bidiagonal SVD, computing singular values only

*              Workspace: need   N [e] + BDSPAC

*

               CALL dbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum, 1,

     $                      dum, idum, work( nwork ), iwork, info )

*

            ELSE IF( wntqo ) THEN

*

*              Path 2 (M >> N, JOBZ = 'O')

*              N left singular vectors to be overwritten on A and

*              N right singular vectors to be computed in VT

*

               ir = 1

*

*              WORK(IR) is LDWRKR by N

*

               IF( lwork .GE. lda*n + n*n + 3*n + bdspac ) THEN

                  ldwrkr = lda

               ELSE

                  ldwrkr = ( lwork - n*n - 3*n - bdspac ) / n

               END IF

               itau = ir + ldwrkr*n

               nwork = itau + n

*

*              Compute A=Q*R

*              Workspace: need   N*N [R] + N [tau] + N    [work]

*              Workspace: prefer N*N [R] + N [tau] + N*NB [work]

*

               CALL dgeqrf( m, n, a, lda, work( itau ), work( nwork ),

     $                      lwork - nwork + 1, ierr )

*

*              Copy R to WORK(IR), zeroing out below it

*

               CALL dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )

               CALL dlaset( 'L', n - 1, n - 1, zero, zero, work(ir+1),

     $                      ldwrkr )

*

*              Generate Q in A

*              Workspace: need   N*N [R] + N [tau] + N    [work]

*              Workspace: prefer N*N [R] + N [tau] + N*NB [work]

*

               CALL dorgqr( m, n, n, a, lda, work( itau ),

     $                      work( nwork ), lwork - nwork + 1, ierr )

               ie = itau

               itauq = ie + n

               itaup = itauq + n

               nwork = itaup + n

*

*              Bidiagonalize R in WORK(IR)

*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N      [work]

*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]

*

               CALL dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),

     $                      work( itauq ), work( itaup ), work( nwork ),

     $                      lwork - nwork + 1, ierr )

*

*              WORK(IU) is N by N

*

               iu = nwork

               nwork = iu + n*n

*

*              Perform bidiagonal SVD, computing left singular vectors

*              of bidiagonal matrix in WORK(IU) and computing right

*              singular vectors of bidiagonal matrix in VT

*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC

*

               CALL dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ), n,

     $                      vt, ldvt, dum, idum, work( nwork ), iwork,

     $                      info )

*

*              Overwrite WORK(IU) by left singular vectors of R

*              and VT by right singular vectors of R

*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N    [work]

*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work]

*

               CALL dormbr( 'Q', 'L', 'N', n, n, n, work( ir ), ldwrkr,

     $                      work( itauq ), work( iu ), n, work( nwork ),

     $                      lwork - nwork + 1, ierr )

               CALL dormbr( 'P', 'R', 'T', n, n, n, work( ir ), ldwrkr,

     $                      work( itaup ), vt, ldvt, work( nwork ),

     $                      lwork - nwork + 1, ierr )

*

*              Multiply Q in A by left singular vectors of R in

*              WORK(IU), storing result in WORK(IR) and copying to A

*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U]

*              Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U]

*

               DO 10 i = 1, m, ldwrkr

                  chunk = min( m - i + 1, ldwrkr )

                  CALL dgemm( 'N', 'n', CHUNK, N, N, ONE, A( I, 1 ),

     $                        LDA, WORK( IU ), N, ZERO, WORK( IR ),

     $                        LDWRKR )

                  CALL DLACPY( 'f', CHUNK, N, WORK( IR ), LDWRKR,

     $                         A( I, 1 ), LDA )

   10          CONTINUE

*

            ELSE IF( WNTQS ) THEN

*

*              Path 3 (M >> N, JOBZ='S')

*              N left singular vectors to be computed in U and

*              N right singular vectors to be computed in VT

*

               IR = 1

*

*              WORK(IR) is N by N

*

               LDWRKR = N

               ITAU = IR + LDWRKR*N

               NWORK = ITAU + N

*

*              Compute A=Q*R

*              Workspace: need   N*N [R] + N [tau] + N    [work]

*              Workspace: prefer N*N [R] + N [tau] + N*NB [work]

*

               CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

*

*              Copy R to WORK(IR), zeroing out below it

*

               CALL DLACPY( 'u', N, N, A, LDA, WORK( IR ), LDWRKR )

               CALL DLASET( 'l', N - 1, N - 1, ZERO, ZERO, WORK(IR+1),

     $                      LDWRKR )

*

*              Generate Q in A

*              Workspace: need   N*N [R] + N [tau] + N    [work]

*              Workspace: prefer N*N [R] + N [tau] + N*NB [work]

*

               CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),

     $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )

               IE = ITAU

               ITAUQ = IE + N

               ITAUP = ITAUQ + N

               NWORK = ITAUP + N

*

*              Bidiagonalize R in WORK(IR)

*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N      [work]

*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]

*

               CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),

     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

*

*              Perform bidiagonal SVD, computing left singular vectors

*              of bidiagoal matrix in U and computing right singular

*              vectors of bidiagonal matrix in VT

*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + BDSPAC

*

               CALL DBDSDC( 'u', 'i', N, S, WORK( IE ), U, LDU, VT,

     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,

     $                      INFO )

*

*              Overwrite U by left singular vectors of R and VT

*              by right singular vectors of R

*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N    [work]

*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work]

*

               CALL DORMBR( 'q', 'l', 'n', N, N, N, WORK( IR ), LDWRKR,

     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

*

               CALL DORMBR( 'p', 'r', 't', N, N, N, WORK( IR ), LDWRKR,

     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

*

*              Multiply Q in A by left singular vectors of R in

*              WORK(IR), storing result in U

*              Workspace: need   N*N [R]

*

               CALL DLACPY( 'f', N, N, U, LDU, WORK( IR ), LDWRKR )

               CALL DGEMM( 'n', 'n', M, N, N, ONE, A, LDA, WORK( IR ),

     $                     LDWRKR, ZERO, U, LDU )

*

            ELSE IF( WNTQA ) THEN

*

*              Path 4 (M >> N, JOBZ='A')

*              M left singular vectors to be computed in U and

*              N right singular vectors to be computed in VT

*

               IU = 1

*

*              WORK(IU) is N by N

*

               LDWRKU = N

               ITAU = IU + LDWRKU*N

               NWORK = ITAU + N

*

*              Compute A=Q*R, copying result to U

*              Workspace: need   N*N [U] + N [tau] + N    [work]

*              Workspace: prefer N*N [U] + N [tau] + N*NB [work]

*

               CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

               CALL DLACPY( 'l', M, N, A, LDA, U, LDU )

*

*              Generate Q in U

*              Workspace: need   N*N [U] + N [tau] + M    [work]

*              Workspace: prefer N*N [U] + N [tau] + M*NB [work]

               CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),

     $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )

*

*              Produce R in A, zeroing out other entries

*

               CALL DLASET( 'l', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )

               IE = ITAU

               ITAUQ = IE + N

               ITAUP = ITAUQ + N

               NWORK = ITAUP + N

*

*              Bidiagonalize R in A

*              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + N      [work]

*              Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work]

*

               CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),

     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,

     $                      IERR )

*

*              Perform bidiagonal SVD, computing left singular vectors

*              of bidiagonal matrix in WORK(IU) and computing right

*              singular vectors of bidiagonal matrix in VT

*              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + BDSPAC

*

               CALL DBDSDC( 'u', 'i', N, S, WORK( IE ), WORK( IU ), N,

     $                      VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK,

     $                      INFO )

*

*              Overwrite WORK(IU) by left singular vectors of R and VT

*              by right singular vectors of R

*              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + N    [work]

*              Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work]

*

               CALL DORMBR( 'q', 'l', 'n', N, N, N, A, LDA,

     $                      WORK( ITAUQ ), WORK( IU ), LDWRKU,

     $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )

               CALL DORMBR( 'p', 'r', 't', N, N, N, A, LDA,

     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

*

*              Multiply Q in U by left singular vectors of R in

*              WORK(IU), storing result in A

*              Workspace: need   N*N [U]

*

               CALL DGEMM( 'n', 'n', M, N, N, ONE, U, LDU, WORK( IU ),

     $                     LDWRKU, ZERO, A, LDA )

*

*              Copy left singular vectors of A from A to U

*

               CALL DLACPY( 'f', M, N, A, LDA, U, LDU )

*

            END IF

*

         ELSE

*

*           M .LT. MNTHR

*

*           Path 5 (M >= N, but not much larger)

*           Reduce to bidiagonal form without QR decomposition

*

            IE = 1

            ITAUQ = IE + N

            ITAUP = ITAUQ + N

            NWORK = ITAUP + N

*

*           Bidiagonalize A

*           Workspace: need   3*N [e, tauq, taup] + M        [work]

*           Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work]

*

            CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),

     $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,

     $                   IERR )

            IF( WNTQN ) THEN

*

*              Path 5n (M >= N, JOBZ='N')

*              Perform bidiagonal SVD, only computing singular values

*              Workspace: need   3*N [e, tauq, taup] + BDSPAC

*

               CALL DBDSDC( 'u', 'n', N, S, WORK( IE ), DUM, 1, DUM, 1,

     $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )

            ELSE IF( WNTQO ) THEN

*              Path 5o (M >= N, JOBZ='O')

               IU = NWORK

.GE.               IF( LWORK  M*N + 3*N + BDSPAC ) THEN

*

*                 WORK( IU ) is M by N

*

                  LDWRKU = M

                  NWORK = IU + LDWRKU*N

                  CALL DLASET( 'f', M, N, ZERO, ZERO, WORK( IU ),

     $                         LDWRKU )

*                 IR is unused; silence compile warnings

                  IR = -1

               ELSE

*

*                 WORK( IU ) is N by N

*

                  LDWRKU = N

                  NWORK = IU + LDWRKU*N

*

*                 WORK(IR) is LDWRKR by N

*

                  IR = NWORK

                  LDWRKR = ( LWORK - N*N - 3*N ) / N

               END IF

               NWORK = IU + LDWRKU*N

*

*              Perform bidiagonal SVD, computing left singular vectors

*              of bidiagonal matrix in WORK(IU) and computing right

*              singular vectors of bidiagonal matrix in VT

*              Workspace: need   3*N [e, tauq, taup] + N*N [U] + BDSPAC

*

               CALL DBDSDC( 'u', 'i', N, S, WORK( IE ), WORK( IU ),

     $                      LDWRKU, VT, LDVT, DUM, IDUM, WORK( NWORK ),

     $                      IWORK, INFO )

*

*              Overwrite VT by right singular vectors of A

*              Workspace: need   3*N [e, tauq, taup] + N*N [U] + N    [work]

*              Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]

*

               CALL DORMBR( 'p', 'r', 't', N, N, N, A, LDA,

     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

*

.GE.               IF( LWORK  M*N + 3*N + BDSPAC ) THEN

*

*                 Path 5o-fast

*                 Overwrite WORK(IU) by left singular vectors of A

*                 Workspace: need   3*N [e, tauq, taup] + M*N [U] + N    [work]

*                 Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work]

*

                  CALL DORMBR( 'q', 'l', 'n', M, N, N, A, LDA,

     $                         WORK( ITAUQ ), WORK( IU ), LDWRKU,

     $                         WORK( NWORK ), LWORK - NWORK + 1, IERR )

*

*                 Copy left singular vectors of A from WORK(IU) to A

*

                  CALL DLACPY( 'f', M, N, WORK( IU ), LDWRKU, A, LDA )

               ELSE

*

*                 Path 5o-slow

*                 Generate Q in A

*                 Workspace: need   3*N [e, tauq, taup] + N*N [U] + N    [work]

*                 Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]

*

                  CALL DORGBR( 'q', M, N, N, A, LDA, WORK( ITAUQ ),

     $                         WORK( NWORK ), LWORK - NWORK + 1, IERR )

*

*                 Multiply Q in A by left singular vectors of

*                 bidiagonal matrix in WORK(IU), storing result in

*                 WORK(IR) and copying to A

*                 Workspace: need   3*N [e, tauq, taup] + N*N [U] + NB*N [R]

*                 Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N  [R]

*

                  DO 20 I = 1, M, LDWRKR

                     CHUNK = MIN( M - I + 1, LDWRKR )

                     CALL DGEMM( 'n', 'n', CHUNK, N, N, ONE, A( I, 1 ),

     $                           LDA, WORK( IU ), LDWRKU, ZERO,

     $                           WORK( IR ), LDWRKR )

                     CALL DLACPY( 'f', CHUNK, N, WORK( IR ), LDWRKR,

     $                            A( I, 1 ), LDA )

   20             CONTINUE

               END IF

*

            ELSE IF( WNTQS ) THEN

*

*              Path 5s (M >= N, JOBZ='S')

*              Perform bidiagonal SVD, computing left singular vectors

*              of bidiagonal matrix in U and computing right singular

*              vectors of bidiagonal matrix in VT

*              Workspace: need   3*N [e, tauq, taup] + BDSPAC

*

               CALL DLASET( 'f', M, N, ZERO, ZERO, U, LDU )

               CALL DBDSDC( 'u', 'i', N, S, WORK( IE ), U, LDU, VT,

     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,

     $                      INFO )

*

*              Overwrite U by left singular vectors of A and VT

*              by right singular vectors of A

*              Workspace: need   3*N [e, tauq, taup] + N    [work]

*              Workspace: prefer 3*N [e, tauq, taup] + N*NB [work]

*

               CALL DORMBR( 'q', 'l', 'n', M, N, N, A, LDA,

     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

               CALL DORMBR( 'p', 'r', 't', N, N, N, A, LDA,

     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

            ELSE IF( WNTQA ) THEN

*

*              Path 5a (M >= N, JOBZ='A')

*              Perform bidiagonal SVD, computing left singular vectors

*              of bidiagonal matrix in U and computing right singular

*              vectors of bidiagonal matrix in VT

*              Workspace: need   3*N [e, tauq, taup] + BDSPAC

*

               CALL DLASET( 'f', M, M, ZERO, ZERO, U, LDU )

               CALL DBDSDC( 'u', 'i', N, S, WORK( IE ), U, LDU, VT,

     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,

     $                      INFO )

*

*              Set the right corner of U to identity matrix

*

.GT.               IF( MN ) THEN

                  CALL DLASET( 'f', M - N, M - N, ZERO, ONE, U(N+1,N+1),

     $                         LDU )

               END IF

*

*              Overwrite U by left singular vectors of A and VT

*              by right singular vectors of A

*              Workspace: need   3*N [e, tauq, taup] + M    [work]

*              Workspace: prefer 3*N [e, tauq, taup] + M*NB [work]

*

               CALL DORMBR( 'q', 'l', 'n', M, M, N, A, LDA,

     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

               CALL DORMBR( 'p', 'r', 't', N, N, M, A, LDA,

     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

            END IF

*

         END IF

*

      ELSE

*

*        A has more columns than rows. If A has sufficiently more

*        columns than rows, first reduce using the LQ decomposition (if

*        sufficient workspace available)

*

.GE.         IF( NMNTHR ) THEN

*

            IF( WNTQN ) THEN

*

*              Path 1t (N >> M, JOBZ='N')

*              No singular vectors to be computed

*

               ITAU = 1

               NWORK = ITAU + M

*

*              Compute A=L*Q

*              Workspace: need   M [tau] + M [work]

*              Workspace: prefer M [tau] + M*NB [work]

*

               CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

*

*              Zero out above L

*

               CALL DLASET( 'u', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )

               IE = 1

               ITAUQ = IE + M

               ITAUP = ITAUQ + M

               NWORK = ITAUP + M

*

*              Bidiagonalize L in A

*              Workspace: need   3*M [e, tauq, taup] + M      [work]

*              Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work]

*

               CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),

     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,

     $                      IERR )

               NWORK = IE + M

*

*              Perform bidiagonal SVD, computing singular values only

*              Workspace: need   M [e] + BDSPAC

*

               CALL DBDSDC( 'u', 'n', M, S, WORK( IE ), DUM, 1, DUM, 1,

     $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )

*

            ELSE IF( WNTQO ) THEN

*

*              Path 2t (N >> M, JOBZ='O')

*              M right singular vectors to be overwritten on A and

*              M left singular vectors to be computed in U

*

               IVT = 1

*

*              WORK(IVT) is M by M

*              WORK(IL)  is M by M; it is later resized to M by chunk for gemm

*

               IL = IVT + M*M

.GE.               IF( LWORK  M*N + M*M + 3*M + BDSPAC ) THEN

                  LDWRKL = M

                  CHUNK = N

               ELSE

                  LDWRKL = M

                  CHUNK = ( LWORK - M*M ) / M

               END IF

               ITAU = IL + LDWRKL*M

               NWORK = ITAU + M

*

*              Compute A=L*Q

*              Workspace: need   M*M [VT] + M*M [L] + M [tau] + M    [work]

*              Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]

*

               CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

*

*              Copy L to WORK(IL), zeroing about above it

*

               CALL DLACPY( 'l', M, M, A, LDA, WORK( IL ), LDWRKL )

               CALL DLASET( 'u', M - 1, M - 1, ZERO, ZERO,

     $                      WORK( IL + LDWRKL ), LDWRKL )

*

*              Generate Q in A

*              Workspace: need   M*M [VT] + M*M [L] + M [tau] + M    [work]

*              Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]

*

               CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),

     $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )

               IE = ITAU

               ITAUQ = IE + M

               ITAUP = ITAUQ + M

               NWORK = ITAUP + M

*

*              Bidiagonalize L in WORK(IL)

*              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M      [work]

*              Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]

*

               CALL DGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ),

     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

*

*              Perform bidiagonal SVD, computing left singular vectors

*              of bidiagonal matrix in U, and computing right singular

*              vectors of bidiagonal matrix in WORK(IVT)

*              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC

*

               CALL DBDSDC( 'u', 'i', M, S, WORK( IE ), U, LDU,

     $                      WORK( IVT ), M, DUM, IDUM, WORK( NWORK ),

     $                      IWORK, INFO )

*

*              Overwrite U by left singular vectors of L and WORK(IVT)

*              by right singular vectors of L

*              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M    [work]

*              Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work]

*

               CALL DORMBR( 'q', 'l', 'n', M, M, M, WORK( IL ), LDWRKL,

     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

               CALL DORMBR( 'p', 'r', 't', M, M, M, WORK( IL ), LDWRKL,

     $                      WORK( ITAUP ), WORK( IVT ), M,

     $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )

*

*              Multiply right singular vectors of L in WORK(IVT) by Q

*              in A, storing result in WORK(IL) and copying to A

*              Workspace: need   M*M [VT] + M*M [L]

*              Workspace: prefer M*M [VT] + M*N [L]

*              At this point, L is resized as M by chunk.

*

               DO 30 I = 1, N, CHUNK

                  BLK = MIN( N - I + 1, CHUNK )

                  CALL DGEMM( 'n', 'n', M, BLK, M, ONE, WORK( IVT ), M,

     $                        A( 1, I ), LDA, ZERO, WORK( IL ), LDWRKL )

                  CALL DLACPY( 'f', M, BLK, WORK( IL ), LDWRKL,

     $                         A( 1, I ), LDA )

   30          CONTINUE

*

            ELSE IF( WNTQS ) THEN

*

*              Path 3t (N >> M, JOBZ='S')

*              M right singular vectors to be computed in VT and

*              M left singular vectors to be computed in U

*

               IL = 1

*

*              WORK(IL) is M by M

*

               LDWRKL = M

               ITAU = IL + LDWRKL*M

               NWORK = ITAU + M

*

*              Compute A=L*Q

*              Workspace: need   M*M [L] + M [tau] + M    [work]

*              Workspace: prefer M*M [L] + M [tau] + M*NB [work]

*

               CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

*

*              Copy L to WORK(IL), zeroing out above it

*

               CALL DLACPY( 'l', M, M, A, LDA, WORK( IL ), LDWRKL )

               CALL DLASET( 'u', M - 1, M - 1, ZERO, ZERO,

     $                      WORK( IL + LDWRKL ), LDWRKL )

*

*              Generate Q in A

*              Workspace: need   M*M [L] + M [tau] + M    [work]

*              Workspace: prefer M*M [L] + M [tau] + M*NB [work]

*

               CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),

     $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )

               IE = ITAU

               ITAUQ = IE + M

               ITAUP = ITAUQ + M

               NWORK = ITAUP + M

*

*              Bidiagonalize L in WORK(IU).

*              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + M      [work]

*              Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]

*

               CALL DGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ),

     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

*

*              Perform bidiagonal SVD, computing left singular vectors

*              of bidiagonal matrix in U and computing right singular

*              vectors of bidiagonal matrix in VT

*              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + BDSPAC

*

               CALL DBDSDC( 'u', 'i', M, S, WORK( IE ), U, LDU, VT,

     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,

     $                      INFO )

*

*              Overwrite U by left singular vectors of L and VT

*              by right singular vectors of L

*              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + M    [work]

*              Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work]

*

               CALL DORMBR( 'q', 'l', 'n', M, M, M, WORK( IL ), LDWRKL,

     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

               CALL DORMBR( 'p', 'r', 't', M, M, M, WORK( IL ), LDWRKL,

     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

*

*              Multiply right singular vectors of L in WORK(IL) by

*              Q in A, storing result in VT

*              Workspace: need   M*M [L]

*

               CALL DLACPY( 'f', M, M, VT, LDVT, WORK( IL ), LDWRKL )

               CALL DGEMM( 'n', 'n', M, N, M, ONE, WORK( IL ), LDWRKL,

     $                     A, LDA, ZERO, VT, LDVT )

*

            ELSE IF( WNTQA ) THEN

*

*              Path 4t (N >> M, JOBZ='A')

*              N right singular vectors to be computed in VT and

*              M left singular vectors to be computed in U

*

               IVT = 1

*

*              WORK(IVT) is M by M

*

               LDWKVT = M

               ITAU = IVT + LDWKVT*M

               NWORK = ITAU + M

*

*              Compute A=L*Q, copying result to VT

*              Workspace: need   M*M [VT] + M [tau] + M    [work]

*              Workspace: prefer M*M [VT] + M [tau] + M*NB [work]

*

               CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

               CALL DLACPY( 'u', M, N, A, LDA, VT, LDVT )

*

*              Generate Q in VT

*              Workspace: need   M*M [VT] + M [tau] + N    [work]

*              Workspace: prefer M*M [VT] + M [tau] + N*NB [work]

*

               CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),

     $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )

*

*              Produce L in A, zeroing out other entries

*

               CALL DLASET( 'u', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )

               IE = ITAU

               ITAUQ = IE + M

               ITAUP = ITAUQ + M

               NWORK = ITAUP + M

*

*              Bidiagonalize L in A

*              Workspace: need   M*M [VT] + 3*M [e, tauq, taup] + M      [work]

*              Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work]

*

               CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),

     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,

     $                      IERR )

*

*              Perform bidiagonal SVD, computing left singular vectors

*              of bidiagonal matrix in U and computing right singular

*              vectors of bidiagonal matrix in WORK(IVT)

*              Workspace: need   M*M [VT] + 3*M [e, tauq, taup] + BDSPAC

*

               CALL DBDSDC( 'u', 'i', M, S, WORK( IE ), U, LDU,

     $                      WORK( IVT ), LDWKVT, DUM, IDUM,

     $                      WORK( NWORK ), IWORK, INFO )

*

*              Overwrite U by left singular vectors of L and WORK(IVT)

*              by right singular vectors of L

*              Workspace: need   M*M [VT] + 3*M [e, tauq, taup]+ M    [work]

*              Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work]

*

               CALL DORMBR( 'q', 'l', 'n', M, M, M, A, LDA,

     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

               CALL DORMBR( 'p', 'r', 't', M, M, M, A, LDA,

     $                      WORK( ITAUP ), WORK( IVT ), LDWKVT,

     $                      WORK( NWORK ), LWORK - NWORK + 1, IERR )

*

*              Multiply right singular vectors of L in WORK(IVT) by

*              Q in VT, storing result in A

*              Workspace: need   M*M [VT]

*

               CALL DGEMM( 'n', 'n', M, N, M, ONE, WORK( IVT ), LDWKVT,

     $                     VT, LDVT, ZERO, A, LDA )

*

*              Copy right singular vectors of A from A to VT

*

               CALL DLACPY( 'f', M, N, A, LDA, VT, LDVT )

*

            END IF

*

         ELSE

*

*           N .LT. MNTHR

*

*           Path 5t (N > M, but not much larger)

*           Reduce to bidiagonal form without LQ decomposition

*

            IE = 1

            ITAUQ = IE + M

            ITAUP = ITAUQ + M

            NWORK = ITAUP + M

*

*           Bidiagonalize A

*           Workspace: need   3*M [e, tauq, taup] + N        [work]

*           Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work]

*

            CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),

     $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,

     $                   IERR )

            IF( WNTQN ) THEN

*

*              Path 5tn (N > M, JOBZ='N')

*              Perform bidiagonal SVD, only computing singular values

*              Workspace: need   3*M [e, tauq, taup] + BDSPAC

*

               CALL DBDSDC( 'l', 'n', M, S, WORK( IE ), DUM, 1, DUM, 1,

     $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )

            ELSE IF( WNTQO ) THEN

*              Path 5to (N > M, JOBZ='O')

               LDWKVT = M

               IVT = NWORK

.GE.               IF( LWORK  M*N + 3*M + BDSPAC ) THEN

*

*                 WORK( IVT ) is M by N

*

                  CALL DLASET( 'f', M, N, ZERO, ZERO, WORK( IVT ),

     $                         LDWKVT )

                  NWORK = IVT + LDWKVT*N

*                 IL is unused; silence compile warnings

                  IL = -1

               ELSE

*

*                 WORK( IVT ) is M by M

*

                  NWORK = IVT + LDWKVT*M

                  IL = NWORK

*

*                 WORK(IL) is M by CHUNK

*

                  CHUNK = ( LWORK - M*M - 3*M ) / M

               END IF

*

*              Perform bidiagonal SVD, computing left singular vectors

*              of bidiagonal matrix in U and computing right singular

*              vectors of bidiagonal matrix in WORK(IVT)

*              Workspace: need   3*M [e, tauq, taup] + M*M [VT] + BDSPAC

*

               CALL DBDSDC( 'l', 'i', M, S, WORK( IE ), U, LDU,

     $                      WORK( IVT ), LDWKVT, DUM, IDUM,

     $                      WORK( NWORK ), IWORK, INFO )

*

*              Overwrite U by left singular vectors of A

*              Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M    [work]

*              Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]

*

               CALL DORMBR( 'q', 'l', 'n', M, M, N, A, LDA,

     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

*

.GE.               IF( LWORK  M*N + 3*M + BDSPAC ) THEN

*

*                 Path 5to-fast

*                 Overwrite WORK(IVT) by left singular vectors of A

*                 Workspace: need   3*M [e, tauq, taup] + M*N [VT] + M    [work]

*                 Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work]

*

                  CALL DORMBR( 'p', 'r', 't', M, N, M, A, LDA,

     $                         WORK( ITAUP ), WORK( IVT ), LDWKVT,

     $                         WORK( NWORK ), LWORK - NWORK + 1, IERR )

*

*                 Copy right singular vectors of A from WORK(IVT) to A

*

                  CALL DLACPY( 'f', M, N, WORK( IVT ), LDWKVT, A, LDA )

               ELSE

*

*                 Path 5to-slow

*                 Generate P**T in A

*                 Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M    [work]

*                 Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]

*

                  CALL DORGBR( 'p', M, N, M, A, LDA, WORK( ITAUP ),

     $                         WORK( NWORK ), LWORK - NWORK + 1, IERR )

*

*                 Multiply Q in A by right singular vectors of

*                 bidiagonal matrix in WORK(IVT), storing result in

*                 WORK(IL) and copying to A

*                 Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M*NB [L]

*                 Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N  [L]

*

                  DO 40 I = 1, N, CHUNK

                     BLK = MIN( N - I + 1, CHUNK )

                     CALL DGEMM( 'n', 'n', M, BLK, M, ONE, WORK( IVT ),

     $                           LDWKVT, A( 1, I ), LDA, ZERO,

     $                           WORK( IL ), M )

                     CALL DLACPY( 'f', M, BLK, WORK( IL ), M, A( 1, I ),

     $                            LDA )

   40             CONTINUE

               END IF

            ELSE IF( WNTQS ) THEN

*

*              Path 5ts (N > M, JOBZ='S')

*              Perform bidiagonal SVD, computing left singular vectors

*              of bidiagonal matrix in U and computing right singular

*              vectors of bidiagonal matrix in VT

*              Workspace: need   3*M [e, tauq, taup] + BDSPAC

*

               CALL DLASET( 'f', M, N, ZERO, ZERO, VT, LDVT )

               CALL DBDSDC( 'l', 'i', M, S, WORK( IE ), U, LDU, VT,

     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,

     $                      INFO )

*

*              Overwrite U by left singular vectors of A and VT

*              by right singular vectors of A

*              Workspace: need   3*M [e, tauq, taup] + M    [work]

*              Workspace: prefer 3*M [e, tauq, taup] + M*NB [work]

*

               CALL DORMBR( 'q', 'l', 'n', M, M, N, A, LDA,

     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

               CALL DORMBR( 'p', 'r', 't', M, N, M, A, LDA,

     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

            ELSE IF( WNTQA ) THEN

*

*              Path 5ta (N > M, JOBZ='A')

*              Perform bidiagonal SVD, computing left singular vectors

*              of bidiagonal matrix in U and computing right singular

*              vectors of bidiagonal matrix in VT

*              Workspace: need   3*M [e, tauq, taup] + BDSPAC

*

               CALL DLASET( 'f', N, N, ZERO, ZERO, VT, LDVT )

               CALL DBDSDC( 'l', 'i', M, S, WORK( IE ), U, LDU, VT,

     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,

     $                      INFO )

*

*              Set the right corner of VT to identity matrix

*

.GT.               IF( NM ) THEN

                  CALL DLASET( 'f', N-M, N-M, ZERO, ONE, VT(M+1,M+1),

     $                         LDVT )

               END IF

*

*              Overwrite U by left singular vectors of A and VT

*              by right singular vectors of A

*              Workspace: need   3*M [e, tauq, taup] + N    [work]

*              Workspace: prefer 3*M [e, tauq, taup] + N*NB [work]

*

               CALL DORMBR( 'q', 'l', 'n', M, M, N, A, LDA,

     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

               CALL DORMBR( 'p', 'r', 't', N, N, M, A, LDA,

     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),

     $                      LWORK - NWORK + 1, IERR )

            END IF

*

         END IF

*

      END IF

*

*     Undo scaling if necessary

*

.EQ.      IF( ISCL1 ) THEN

.GT.         IF( ANRMBIGNUM )

     $      CALL DLASCL( 'g', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,

     $                   IERR )

.LT.         IF( ANRMSMLNUM )

     $      CALL DLASCL( 'g', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,

     $                   IERR )

      END IF

*

*     Return optimal workspace in WORK(1)

*

      WORK( 1 ) = DROUNDUP_LWORK( MAXWRK )

*

      RETURN

*

*     End of DGESDD

*


      END

dlacpy
subroutine dlacpy(uplo, m, n, a, lda, b, ldb)
DLACPY copies all or part of one two-dimensional array to another.
Definition dlacpy.f:103

dlascl
subroutine dlascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition dlascl.f:143

dlaset
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition dlaset.f:110

dbdsdc
subroutine dbdsdc(uplo, compq, n, d, e, u, ldu, vt, ldvt, q, iq, work, iwork, info)
DBDSDC
Definition dbdsdc.f:205

xerbla
subroutine xerbla(srname, info)
XERBLA
Definition xerbla.f:60

dorgbr
subroutine dorgbr(vect, m, n, k, a, lda, tau, work, lwork, info)
DORGBR
Definition dorgbr.f:157

dgebrd
subroutine dgebrd(m, n, a, lda, d, e, tauq, taup, work, lwork, info)
DGEBRD
Definition dgebrd.f:205

dgelqf
subroutine dgelqf(m, n, a, lda, tau, work, lwork, info)
DGELQF
Definition dgelqf.f:143

dgeqrf
subroutine dgeqrf(m, n, a, lda, tau, work, lwork, info)
DGEQRF
Definition dgeqrf.f:146

dgesdd
subroutine dgesdd(jobz, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, iwork, info)
DGESDD
Definition dgesdd.f:219

dormbr
subroutine dormbr(vect, side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
DORMBR
Definition dormbr.f:195

dorgqr
subroutine dorgqr(m, n, k, a, lda, tau, work, lwork, info)
DORGQR
Definition dorgqr.f:128

dorglq
subroutine dorglq(m, n, k, a, lda, tau, work, lwork, info)
DORGLQ
Definition dorglq.f:127

dgemm
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:187

min
#define min(a, b)
Definition macros.h:20

max
#define max(a, b)
Definition macros.h:21