VARIANTS_2lu_2REC_2zgetrf_8f_source.html

C> \brief \b ZGETRF VARIANT: iterative version of Sivan Toledo's recursive LU algorithm

*

*  =========== DOCUMENTATION ===========

*

* Online html documentation available at

*            http://www.netlib.org/lapack/explore-html/

*

*  Definition:

*  ===========

*

*       SUBROUTINE ZGETRF( M, N, A, LDA, IPIV, INFO )

*

*       .. Scalar Arguments ..

*       INTEGER            INFO, LDA, M, N

*       ..

*       .. Array Arguments ..

*       INTEGER            IPIV( * )

*       COMPLEX*16         A( LDA, * )

*       ..

*

*  Purpose

*  =======

*

C>\details \b Purpose:

C>\verbatim

C>

C> ZGETRF computes an LU factorization of a general M-by-N matrix A

C> using partial pivoting with row interchanges.

C>

C> The factorization has the form

C>    A = P * L * U

C> where P is a permutation matrix, L is lower triangular with unit

C> diagonal elements (lower trapezoidal if m > n), and U is upper

C> triangular (upper trapezoidal if m < n).

C>

C> This code implements an iterative version of Sivan Toledo's recursive

C> LU algorithm[1].  For square matrices, this iterative versions should

C> be within a factor of two of the optimum number of memory transfers.

C>

C> The pattern is as follows, with the large blocks of U being updated

C> in one call to DTRSM, and the dotted lines denoting sections that

C> have had all pending permutations applied:

C>

C>  1 2 3 4 5 6 7 8

C> +-+-+---+-------+------

C> | |1|   |       |

C> |.+-+ 2 |       |

C> | | |   |       |

C> |.|.+-+-+   4   |

C> | | | |1|       |

C> | | |.+-+       |

C> | | | | |       |

C> |.|.|.|.+-+-+---+  8

C> | | | | | |1|   |

C> | | | | |.+-+ 2 |

C> | | | | | | |   |

C> | | | | |.|.+-+-+

C> | | | | | | | |1|

C> | | | | | | |.+-+

C> | | | | | | | | |

C> |.|.|.|.|.|.|.|.+-----

C> | | | | | | | | |

C>

C> The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in

C> the binary expansion of the current column.  Each Schur update is

C> applied as soon as the necessary portion of U is available.

C>

C> [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with

C> Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),

C> 1065-1081. http://dx.doi.org/10.1137/S0895479896297744

C>

C>\endverbatim

*

*  Arguments:

*  ==========

*

C> \param[in] M

C> \verbatim

C>          M is INTEGER

C>          The number of rows of the matrix A.  M >= 0.

C> \endverbatim

C>

C> \param[in] N

C> \verbatim

C>          N is INTEGER

C>          The number of columns of the matrix A.  N >= 0.

C> \endverbatim

C>

C> \param[in,out] A

C> \verbatim

C>          A is COMPLEX*16 array, dimension (LDA,N)

C>          On entry, the M-by-N matrix to be factored.

C>          On exit, the factors L and U from the factorization

C>          A = P*L*U; the unit diagonal elements of L are not stored.

C> \endverbatim

C>

C> \param[in] LDA

C> \verbatim

C>          LDA is INTEGER

C>          The leading dimension of the array A.  LDA >= max(1,M).

C> \endverbatim

C>

C> \param[out] IPIV

C> \verbatim

C>          IPIV is INTEGER array, dimension (min(M,N))

C>          The pivot indices; for 1 <= i <= min(M,N), row i of the

C>          matrix was interchanged with row IPIV(i).

C> \endverbatim

C>

C> \param[out] INFO

C> \verbatim

C>          INFO is INTEGER

C>          = 0:  successful exit

C>          < 0:  if INFO = -i, the i-th argument had an illegal value

C>          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization

C>                has been completed, but the factor U is exactly

C>                singular, and division by zero will occur if it is used

C>                to solve a system of equations.

C> \endverbatim

C>

*

*  Authors:

*  ========

*

C> \author Univ. of Tennessee

C> \author Univ. of California Berkeley

C> \author Univ. of Colorado Denver

C> \author NAG Ltd.

*

C> \date December 2016

*

C> \ingroup variantsGEcomputational

*

*  =====================================================================

      SUBROUTINE zgetrf( M, N, A, LDA, IPIV, INFO )

*

*  -- LAPACK computational routine (version 3.X) --

*  -- LAPACK is a software package provided by Univ. of Tennessee,    --

*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

*

*     .. Scalar Arguments ..

      INTEGER            INFO, LDA, M, N

*     ..

*     .. Array Arguments ..

      INTEGER            IPIV( * )

      COMPLEX*16         A( LDA, * )

*     ..

*

*  =====================================================================

*

*     .. Parameters ..

      COMPLEX*16         ONE, NEGONE

      DOUBLE PRECISION   ZERO

      parameter( one = (1.0d+0, 0.0d+0) )

      parameter( negone = (-1.0d+0, 0.0d+0) )

      parameter( zero = 0.0d+0 )

*     ..

*     .. Local Scalars ..

      DOUBLE PRECISION   SFMIN, PIVMAG

      COMPLEX*16         TMP

      INTEGER            I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD

      INTEGER            KSTART, IPIVSTART, JPIVSTART, KCOLS

*     ..

*     .. External Functions ..

      DOUBLE PRECISION   DLAMCH

      INTEGER            IZAMAX

      LOGICAL            DISNAN

      EXTERNAL           dlamch, izamax, disnan

*     ..

*     .. External Subroutines ..

      EXTERNAL           ztrsm, zscal, xerbla, zlaswp

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          max, min, iand, abs

*     ..

*     .. Executable Statements ..

*

*     Test the input parameters.

*

      info = 0

      IF( m.LT.0 ) THEN

         info = -1

      ELSE IF( n.LT.0 ) THEN

         info = -2

      ELSE IF( lda.LT.max( 1, m ) ) THEN

         info = -4

      END IF

      IF( info.NE.0 ) THEN

         CALL xerbla( 'ZGETRF', -info )

         RETURN

      END IF

*

*     Quick return if possible

*

      IF( m.EQ.0 .OR. n.EQ.0 )

     $   RETURN

*

*     Compute machine safe minimum

*

      sfmin = dlamch( 'S' )

*

      nstep = min( m, n )

      DO j = 1, nstep

         kahead = iand( j, -j )

         kstart = j + 1 - kahead

         kcols = min( kahead, m-j )

*

*        Find pivot.

*

         jp = j - 1 + izamax( m-j+1, a( j, j ), 1 )

         ipiv( j ) = jp


!        Permute just this column.

         IF (jp .NE. j) THEN

            tmp = a( j, j )

            a( j, j ) = a( jp, j )

            a( jp, j ) = tmp

         END IF


!        Apply pending permutations to L

         ntopiv = 1

         ipivstart = j

         jpivstart = j - ntopiv

         DO WHILE ( ntopiv .LT. kahead )

            CALL zlaswp( ntopiv, a( 1, jpivstart ), lda, ipivstart, j,

     $           ipiv, 1 )

            ipivstart = ipivstart - ntopiv;

            ntopiv = ntopiv * 2;

            jpivstart = jpivstart - ntopiv;

         END DO


!        Permute U block to match L

         CALL zlaswp( kcols, a( 1,j+1 ), lda, kstart, j, ipiv, 1 )


!        Factor the current column

         pivmag = abs( a( j, j ) )

         IF( pivmag.NE.zero .AND. .NOT.disnan( pivmag ) ) THEN

               IF( pivmag .GE. sfmin ) THEN

                  CALL zscal( m-j, one / a( j, j ), a( j+1, j ), 1 )

               ELSE

                 DO i = 1, m-j

                    a( j+i, j ) = a( j+i, j ) / a( j, j )

                 END DO

               END IF

         ELSE IF( pivmag .EQ. zero .AND. info .EQ. 0 ) THEN

            info = j

         END IF


!        Solve for U block.

         CALL ztrsm( 'Left', 'Lower', 'No transpose', 'Unit', kahead,

     $        kcols, one, a( kstart, kstart ), lda,

     $        a( kstart, j+1 ), lda )

!        Schur complement.

         CALL zgemm( 'No transpose', 'No transpose', m-j,

     $        kcols, kahead, negone, a( j+1, kstart ), lda,

     $        a( kstart, j+1 ), lda, one, a( j+1, j+1 ), lda )

      END DO


!     Handle pivot permutations on the way out of the recursion

      npived = iand( nstep, -nstep )

      j = nstep - npived

      DO WHILE ( j .GT. 0 )

         ntopiv = iand( j, -j )

         CALL zlaswp( ntopiv, a( 1, j-ntopiv+1 ), lda, j+1, nstep,

     $        ipiv, 1 )

         j = j - ntopiv

      END DO


!     If short and wide, handle the rest of the columns.

      IF ( m .LT. n ) THEN

         CALL zlaswp( n-m, a( 1, m+kcols+1 ), lda, 1, m, ipiv, 1 )

         CALL ztrsm( 'Left', 'Lower', 'No transpose', 'Unit', m,

     $        n-m, one, a, lda, a( 1,m+kcols+1 ), lda )

      END IF


      RETURN

*

*     End of ZGETRF

*

      END

xerbla
subroutine xerbla(srname, info)
XERBLA
Definition xerbla.f:60

zlaswp
subroutine zlaswp(n, a, lda, k1, k2, ipiv, incx)
ZLASWP performs a series of row interchanges on a general rectangular matrix.
Definition zlaswp.f:115

zscal
subroutine zscal(n, za, zx, incx)
ZSCAL
Definition zscal.f:78

zgemm
subroutine zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
ZGEMM
Definition zgemm.f:187

ztrsm
subroutine ztrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
ZTRSM
Definition ztrsm.f:180

zgetrf
subroutine zgetrf(m, n, a, lda, ipiv, info)
ZGETRF VARIANT: Crout Level 3 BLAS version of the algorithm.
Definition zgetrf.f:102

min
#define min(a, b)
Definition macros.h:20

max
#define max(a, b)
Definition macros.h:21