OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
zgetrf.f
Go to the documentation of this file.
1C> \brief \b ZGETRF VARIANT: iterative version of Sivan Toledo's recursive LU algorithm
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8* Definition:
9* ===========
10*
11* SUBROUTINE ZGETRF( M, N, A, LDA, IPIV, INFO )
12*
13* .. Scalar Arguments ..
14* INTEGER INFO, LDA, M, N
15* ..
16* .. Array Arguments ..
17* INTEGER IPIV( * )
18* COMPLEX*16 A( LDA, * )
19* ..
20*
21* Purpose
22* =======
23*
24C>\details \b Purpose:
25C>\verbatim
26C>
27C> ZGETRF computes an LU factorization of a general M-by-N matrix A
28C> using partial pivoting with row interchanges.
29C>
30C> The factorization has the form
31C> A = P * L * U
32C> where P is a permutation matrix, L is lower triangular with unit
33C> diagonal elements (lower trapezoidal if m > n), and U is upper
34C> triangular (upper trapezoidal if m < n).
35C>
36C> This code implements an iterative version of Sivan Toledo's recursive
37C> LU algorithm[1]. For square matrices, this iterative versions should
38C> be within a factor of two of the optimum number of memory transfers.
39C>
40C> The pattern is as follows, with the large blocks of U being updated
41C> in one call to DTRSM, and the dotted lines denoting sections that
42C> have had all pending permutations applied:
43C>
44C> 1 2 3 4 5 6 7 8
45C> +-+-+---+-------+------
46C> | |1| | |
47C> |.+-+ 2 | |
48C> | | | | |
49C> |.|.+-+-+ 4 |
50C> | | | |1| |
51C> | | |.+-+ |
52C> | | | | | |
53C> |.|.|.|.+-+-+---+ 8
54C> | | | | | |1| |
55C> | | | | |.+-+ 2 |
56C> | | | | | | | |
57C> | | | | |.|.+-+-+
58C> | | | | | | | |1|
59C> | | | | | | |.+-+
60C> | | | | | | | | |
61C> |.|.|.|.|.|.|.|.+-----
62C> | | | | | | | | |
63C>
64C> The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in
65C> the binary expansion of the current column. Each Schur update is
66C> applied as soon as the necessary portion of U is available.
67C>
68C> [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with
69C> Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
70C> 1065-1081. http://dx.doi.org/10.1137/S0895479896297744
71C>
72C>\endverbatim
73*
74* Arguments:
75* ==========
76*
77C> \param[in] M
78C> \verbatim
79C> M is INTEGER
80C> The number of rows of the matrix A. M >= 0.
81C> \endverbatim
82C>
83C> \param[in] N
84C> \verbatim
85C> N is INTEGER
86C> The number of columns of the matrix A. N >= 0.
87C> \endverbatim
88C>
89C> \param[in,out] A
90C> \verbatim
91C> A is COMPLEX*16 array, dimension (LDA,N)
92C> On entry, the M-by-N matrix to be factored.
93C> On exit, the factors L and U from the factorization
94C> A = P*L*U; the unit diagonal elements of L are not stored.
95C> \endverbatim
96C>
97C> \param[in] LDA
98C> \verbatim
99C> LDA is INTEGER
100C> The leading dimension of the array A. LDA >= max(1,M).
101C> \endverbatim
102C>
103C> \param[out] IPIV
104C> \verbatim
105C> IPIV is INTEGER array, dimension (min(M,N))
106C> The pivot indices; for 1 <= i <= min(M,N), row i of the
107C> matrix was interchanged with row IPIV(i).
108C> \endverbatim
109C>
110C> \param[out] INFO
111C> \verbatim
112C> INFO is INTEGER
113C> = 0: successful exit
114C> < 0: if INFO = -i, the i-th argument had an illegal value
115C> > 0: if INFO = i, U(i,i) is exactly zero. The factorization
116C> has been completed, but the factor U is exactly
117C> singular, and division by zero will occur if it is used
118C> to solve a system of equations.
119C> \endverbatim
120C>
121*
122* Authors:
123* ========
124*
125C> \author Univ. of Tennessee
126C> \author Univ. of California Berkeley
127C> \author Univ. of Colorado Denver
128C> \author NAG Ltd.
129*
130C> \date December 2016
131*
132C> \ingroup variantsGEcomputational
133*
134* =====================================================================
135 SUBROUTINE zgetrf( M, N, A, LDA, IPIV, INFO )
136*
137* -- LAPACK computational routine (version 3.X) --
138* -- LAPACK is a software package provided by Univ. of Tennessee, --
139* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
140*
141* .. Scalar Arguments ..
142 INTEGER INFO, LDA, M, N
143* ..
144* .. Array Arguments ..
145 INTEGER IPIV( * )
146 COMPLEX*16 A( LDA, * )
147* ..
148*
149* =====================================================================
150*
151* .. Parameters ..
152 COMPLEX*16 ONE, NEGONE
153 DOUBLE PRECISION ZERO
154 parameter( one = (1.0d+0, 0.0d+0) )
155 parameter( negone = (-1.0d+0, 0.0d+0) )
156 parameter( zero = 0.0d+0 )
157* ..
158* .. Local Scalars ..
159 DOUBLE PRECISION SFMIN, PIVMAG
160 COMPLEX*16 TMP
161 INTEGER I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD
162 INTEGER KSTART, IPIVSTART, JPIVSTART, KCOLS
163* ..
164* .. External Functions ..
165 DOUBLE PRECISION DLAMCH
166 INTEGER IZAMAX
167 LOGICAL DISNAN
168 EXTERNAL dlamch, izamax, disnan
169* ..
170* .. External Subroutines ..
171 EXTERNAL ztrsm, zscal, xerbla, zlaswp
172* ..
173* .. Intrinsic Functions ..
174 INTRINSIC max, min, iand, abs
175* ..
176* .. Executable Statements ..
177*
178* Test the input parameters.
179*
180 info = 0
181 IF( m.LT.0 ) THEN
182 info = -1
183 ELSE IF( n.LT.0 ) THEN
184 info = -2
185 ELSE IF( lda.LT.max( 1, m ) ) THEN
186 info = -4
187 END IF
188 IF( info.NE.0 ) THEN
189 CALL xerbla( 'ZGETRF', -info )
190 RETURN
191 END IF
192*
193* Quick return if possible
194*
195 IF( m.EQ.0 .OR. n.EQ.0 )
196 $ RETURN
197*
198* Compute machine safe minimum
199*
200 sfmin = dlamch( 'S' )
201*
202 nstep = min( m, n )
203 DO j = 1, nstep
204 kahead = iand( j, -j )
205 kstart = j + 1 - kahead
206 kcols = min( kahead, m-j )
207*
208* Find pivot.
209*
210 jp = j - 1 + izamax( m-j+1, a( j, j ), 1 )
211 ipiv( j ) = jp
212
213! Permute just this column.
214 IF (jp .NE. j) THEN
215 tmp = a( j, j )
216 a( j, j ) = a( jp, j )
217 a( jp, j ) = tmp
218 END IF
219
220! Apply pending permutations to L
221 ntopiv = 1
222 ipivstart = j
223 jpivstart = j - ntopiv
224 DO WHILE ( ntopiv .LT. kahead )
225 CALL zlaswp( ntopiv, a( 1, jpivstart ), lda, ipivstart, j,
226 $ ipiv, 1 )
227 ipivstart = ipivstart - ntopiv;
228 ntopiv = ntopiv * 2;
229 jpivstart = jpivstart - ntopiv;
230 END DO
231
232! Permute U block to match L
233 CALL zlaswp( kcols, a( 1,j+1 ), lda, kstart, j, ipiv, 1 )
234
235! Factor the current column
236 pivmag = abs( a( j, j ) )
237 IF( pivmag.NE.zero .AND. .NOT.disnan( pivmag ) ) THEN
238 IF( pivmag .GE. sfmin ) THEN
239 CALL zscal( m-j, one / a( j, j ), a( j+1, j ), 1 )
240 ELSE
241 DO i = 1, m-j
242 a( j+i, j ) = a( j+i, j ) / a( j, j )
243 END DO
244 END IF
245 ELSE IF( pivmag .EQ. zero .AND. info .EQ. 0 ) THEN
246 info = j
247 END IF
248
249! Solve for U block.
250 CALL ztrsm( 'Left', 'Lower', 'No transpose', 'Unit', kahead,
251 $ kcols, one, a( kstart, kstart ), lda,
252 $ a( kstart, j+1 ), lda )
253! Schur complement.
254 CALL zgemm( 'No transpose', 'No transpose', m-j,
255 $ kcols, kahead, negone, a( j+1, kstart ), lda,
256 $ a( kstart, j+1 ), lda, one, a( j+1, j+1 ), lda )
257 END DO
258
259! Handle pivot permutations on the way out of the recursion
260 npived = iand( nstep, -nstep )
261 j = nstep - npived
262 DO WHILE ( j .GT. 0 )
263 ntopiv = iand( j, -j )
264 CALL zlaswp( ntopiv, a( 1, j-ntopiv+1 ), lda, j+1, nstep,
265 $ ipiv, 1 )
266 j = j - ntopiv
267 END DO
268
269! If short and wide, handle the rest of the columns.
270 IF ( m .LT. n ) THEN
271 CALL zlaswp( n-m, a( 1, m+kcols+1 ), lda, 1, m, ipiv, 1 )
272 CALL ztrsm( 'Left', 'Lower', 'No transpose', 'Unit', m,
273 $ n-m, one, a, lda, a( 1,m+kcols+1 ), lda )
274 END IF
275
276 RETURN
277*
278* End of ZGETRF
279*
280 END
subroutine xerbla(srname, info)
XERBLA
Definition xerbla.f:60
subroutine zlaswp(n, a, lda, k1, k2, ipiv, incx)
ZLASWP performs a series of row interchanges on a general rectangular matrix.
Definition zlaswp.f:115
subroutine zscal(n, za, zx, incx)
ZSCAL
Definition zscal.f:78
subroutine zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
ZGEMM
Definition zgemm.f:187
subroutine ztrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
ZTRSM
Definition ztrsm.f:180
subroutine zgetrf(m, n, a, lda, ipiv, info)
ZGETRF VARIANT: Crout Level 3 BLAS version of the algorithm.
Definition zgetrf.f:102
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21