OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pdlaqr4.f File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine pdlaqr4 (wantt, wantz, n, ilo, ihi, a, desca, wr, wi, iloz, ihiz, z, descz, t, ldt, v, ldv, work, lwork, info)

Function/Subroutine Documentation

◆ pdlaqr4()

subroutine pdlaqr4 ( logical wantt,
logical wantz,
integer n,
integer ilo,
integer ihi,
double precision, dimension( * ) a,
integer, dimension( * ) desca,
double precision, dimension( * ) wr,
double precision, dimension( * ) wi,
integer iloz,
integer ihiz,
double precision, dimension( * ) z,
integer, dimension( * ) descz,
double precision, dimension( ldt, * ) t,
integer ldt,
double precision, dimension( ldv, * ) v,
integer ldv,
double precision, dimension( * ) work,
integer lwork,
integer info )

Definition at line 1 of file pdlaqr4.f.

4*
5* Contribution from the Department of Computing Science and HPC2N,
6* Umea University, Sweden
7*
8* -- ScaLAPACK routine (version 2.0.2) --
9* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
10* May 1 2012
11*
12 IMPLICIT NONE
13*
14* .. Scalar Arguments ..
15 LOGICAL WANTT, WANTZ
16 INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDT, LDV, LWORK, N
17* ..
18* .. Array Arguments ..
19 INTEGER DESCA( * ), DESCZ( * )
20 DOUBLE PRECISION A( * ), T( LDT, * ), V( LDV, * ), WI( * ),
21 $ WORK( * ), WR( * ), Z( * )
22* ..
23*
24* Purpose
25* =======
26*
27* PDLAQR4 is an auxiliary routine used to find the Schur decomposition
28* and or eigenvalues of a matrix already in Hessenberg form from cols
29* ILO to IHI. This routine requires that the active block is small
30* enough, i.e. IHI-ILO+1 .LE. LDT, so that it can be solved by LAPACK.
31* Normally, it is called by PDLAQR1. All the inputs are assumed to be
32* valid without checking.
33*
34* Notes
35* =====
36*
37* Each global data object is described by an associated description
38* vector. This vector stores the information required to establish
39* the mapping between an object element and its corresponding process
40* and memory location.
41*
42* Let A be a generic term for any 2D block cyclicly distributed array.
43* Such a global array has an associated description vector DESCA.
44* In the following comments, the character _ should be read as
45* "of the global array".
46*
47* NOTATION STORED IN EXPLANATION
48* --------------- -------------- --------------------------------------
49* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
50* DTYPE_A = 1.
51* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
52* the BLACS process grid A is distribu-
53* ted over. The context itself is glo-
54* bal, but the handle (the integer
55* value) may vary.
56* M_A (global) DESCA( M_ ) The number of rows in the global
57* array A.
58* N_A (global) DESCA( N_ ) The number of columns in the global
59* array A.
60* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
61* the rows of the array.
62* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
63* the columns of the array.
64* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
65* row of the array A is distributed.
66* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
67* first column of the array A is
68* distributed.
69* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
70* array. LLD_A >= MAX(1,LOCr(M_A)).
71*
72* Let K be the number of rows or columns of a distributed matrix,
73* and assume that its process grid has dimension p x q.
74* LOCr( K ) denotes the number of elements of K that a process
75* would receive if K were distributed over the p processes of its
76* process column.
77* Similarly, LOCc( K ) denotes the number of elements of K that a
78* process would receive if K were distributed over the q processes of
79* its process row.
80* The values of LOCr() and LOCc() may be determined via a call to the
81* ScaLAPACK tool function, NUMROC:
82* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
83* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
84* An upper bound for these quantities may be computed by:
85* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
86* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
87*
88* Arguments
89* =========
90*
91* WANTT (global input) LOGICAL
92* = .TRUE. : the full Schur form T is required;
93* = .FALSE.: only eigenvalues are required.
94*
95* WANTZ (global input) LOGICAL
96* = .TRUE. : the matrix of Schur vectors Z is required;
97* = .FALSE.: Schur vectors are not required.
98*
99* N (global input) INTEGER
100* The order of the Hessenberg matrix A (and Z if WANTZ).
101* N >= 0.
102*
103* ILO (global input) INTEGER
104* IHI (global input) INTEGER
105* It is assumed that A is already upper quasi-triangular in
106* rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless
107* ILO = 1). PDLAQR4 works primarily with the Hessenberg
108* submatrix in rows and columns ILO to IHI, but applies
109* transformations to all of H if WANTT is .TRUE..
110* 1 <= ILO <= max(1,IHI); IHI <= N.
111*
112* A (global input/output) DOUBLE PRECISION array, dimension
113* (DESCA(LLD_),*)
114* On entry, the upper Hessenberg matrix A.
115* On exit, if WANTT is .TRUE., A is upper quasi-triangular in
116* rows and columns ILO:IHI, with any 2-by-2 or larger diagonal
117* blocks not yet in standard form. If WANTT is .FALSE., the
118* contents of A are unspecified on exit.
119*
120* DESCA (global and local input) INTEGER array of dimension DLEN_.
121* The array descriptor for the distributed matrix A.
122*
123* WR (global replicated output) DOUBLE PRECISION array,
124* dimension (N)
125* WI (global replicated output) DOUBLE PRECISION array,
126* dimension (N)
127* The real and imaginary parts, respectively, of the computed
128* eigenvalues ILO to IHI are stored in the corresponding
129* elements of WR and WI. If two eigenvalues are computed as a
130* complex conjugate pair, they are stored in consecutive
131* elements of WR and WI, say the i-th and (i+1)th, with
132* WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the
133* eigenvalues are stored in the same order as on the diagonal
134* of the Schur form returned in A. A may be returned with
135* larger diagonal blocks until the next release.
136*
137* ILOZ (global input) INTEGER
138* IHIZ (global input) INTEGER
139* Specify the rows of Z to which transformations must be
140* applied if WANTZ is .TRUE..
141* 1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
142*
143* Z (global input/output) DOUBLE PRECISION array.
144* If WANTZ is .TRUE., on entry Z must contain the current
145* matrix Z of transformations accumulated by PDHSEQR, and on
146* exit Z has been updated; transformations are applied only to
147* the submatrix Z(ILOZ:IHIZ,ILO:IHI).
148* If WANTZ is .FALSE., Z is not referenced.
149*
150* DESCZ (global and local input) INTEGER array of dimension DLEN_.
151* The array descriptor for the distributed matrix Z.
152*
153* T (local workspace) DOUBLE PRECISION array, dimension LDT*NW.
154*
155* LDT (local input) INTEGER
156* The leading dimension of the array T.
157* LDT >= IHI-ILO+1.
158*
159* V (local workspace) DOUBLE PRECISION array, dimension LDV*NW.
160*
161* LDV (local input) INTEGER
162* The leading dimension of the array V.
163* LDV >= IHI-ILO+1.
164*
165* WORK (local workspace) DOUBLE PRECISION array, dimension LWORK.
166*
167* LWORK (local input) INTEGER
168* The dimension of the work array WORK.
169* LWORK >= IHI-ILO+1.
170* WORK(LWORK) is a local array and LWORK is assumed big enough.
171* Typically LWORK >= 4*LDS*LDS if this routine is called by
172* PDLAQR1. (LDS = 385, see PDLAQR1)
173*
174* INFO (global output) INTEGER
175* < 0: parameter number -INFO incorrect or inconsistent;
176* = 0: successful exit;
177* > 0: PDLAQR4 failed to compute all the eigenvalues ILO to IHI
178* in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
179* elements i+1:ihi of WR and WI contain those eigenvalues
180* which have been successfully computed.
181*
182* ================================================================
183* Implemented by
184* Meiyue Shao, Department of Computing Science and HPC2N,
185* Umea University, Sweden
186*
187* ================================================================
188* References:
189* B. Kagstrom, D. Kressner, and M. Shao,
190* On Aggressive Early Deflation in Parallel Variants of the QR
191* Algorithm.
192* Para 2010, to appear.
193*
194* ================================================================
195* .. Parameters ..
196 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
197 $ LLD_, MB_, M_, NB_, N_, RSRC_
198 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
199 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
200 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
201 DOUBLE PRECISION ZERO, ONE
202 parameter( zero = 0.0d+0, one = 1.0d+0 )
203* ..
204* .. Local Scalars ..
205 INTEGER CONTXT, HBL, I, I1, I2, IAFIRST, ICOL, ICOL1,
206 $ ICOL2, II, IROW, IROW1, IROW2, ITMP1, ITMP2,
207 $ IERR, J, JAFIRST, JJ, K, L, LDA, LDZ, LLDTMP,
208 $ MYCOL, MYROW, NODE, NPCOL, NPROW, NH, NMIN, NZ,
209 $ HSTEP, VSTEP, KKROW, KKCOL, KLN, LTOP, LEFT,
210 $ RIGHT, UP, DOWN, D1, D2
211* ..
212* .. Local Arrays ..
213 INTEGER DESCT( 9 ), DESCV( 9 ), DESCWH( 9 ),
214 $ DESCWV( 9 )
215* ..
216* .. External Functions ..
217 INTEGER NUMROC, ILAENV
218 EXTERNAL numroc, ilaenv
219* ..
220* .. External Subroutines ..
221 EXTERNAL blacs_gridinfo, infog2l, dlaset,
222 $ dlahqr, dlaqr4, descinit, pdgemm, pdgemr2d,
223 $ dgemm, dlamov, dgesd2d, dgerv2d,
224 $ dgebs2d, dgebr2d, igebs2d, igebr2d
225* ..
226* .. Intrinsic Functions ..
227 INTRINSIC max, min, mod
228* ..
229* .. Executable Statements ..
230*
231 info = 0
232*
233 nh = ihi - ilo + 1
234 nz = ihiz - iloz + 1
235 IF( n.EQ.0 .OR. nh.EQ.0 )
236 $ RETURN
237*
238* NODE (IAFIRST,JAFIRST) OWNS A(1,1)
239*
240 hbl = desca( mb_ )
241 contxt = desca( ctxt_ )
242 lda = desca( lld_ )
243 iafirst = desca( rsrc_ )
244 jafirst = desca( csrc_ )
245 ldz = descz( lld_ )
246 CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )
247 node = myrow*npcol + mycol
248 left = mod( mycol+npcol-1, npcol )
249 right = mod( mycol+1, npcol )
250 up = mod( myrow+nprow-1, nprow )
251 down = mod( myrow+1, nprow )
252*
253* I1 and I2 are the indices of the first row and last column of A
254* to which transformations must be applied.
255*
256 i = ihi
257 l = ilo
258 IF( wantt ) THEN
259 i1 = 1
260 i2 = n
261 ltop = 1
262 ELSE
263 i1 = l
264 i2 = i
265 ltop = l
266 END IF
267*
268* Copy the diagonal block to local and call LAPACK.
269*
270 CALL infog2l( ilo, ilo, desca, nprow, npcol, myrow, mycol,
271 $ irow, icol, ii, jj )
272 IF ( myrow .EQ. ii ) THEN
273 CALL descinit( desct, nh, nh, nh, nh, ii, jj, contxt,
274 $ ldt, ierr )
275 CALL descinit( descv, nh, nh, nh, nh, ii, jj, contxt,
276 $ ldv, ierr )
277 ELSE
278 CALL descinit( desct, nh, nh, nh, nh, ii, jj, contxt,
279 $ 1, ierr )
280 CALL descinit( descv, nh, nh, nh, nh, ii, jj, contxt,
281 $ 1, ierr )
282 END IF
283 CALL pdgemr2d( nh, nh, a, ilo, ilo, desca, t, 1, 1, desct,
284 $ contxt )
285 IF ( myrow .EQ. ii .AND. mycol .EQ. jj ) THEN
286 CALL dlaset( 'all', NH, NH, ZERO, ONE, V, LDV )
287 NMIN = ILAENV( 12, 'dlaqr3', 'sv', NH, 1, NH, LWORK )
288.GT. IF( NH NMIN ) THEN
289 CALL DLAQR4( .TRUE., .TRUE., NH, 1, NH, T, LDT, WR( ILO ),
290 $ WI( ILO ), 1, NH, V, LDV, WORK, LWORK, INFO )
291* Clean up the scratch used by DLAQR4.
292 CALL DLASET( 'l', NH-2, NH-2, ZERO, ZERO, T( 3, 1 ), LDT )
293 ELSE
294 CALL DLAHQR( .TRUE., .TRUE., NH, 1, NH, T, LDT, WR( ILO ),
295 $ WI( ILO ), 1, NH, V, LDV, INFO )
296 END IF
297 CALL DGEBS2D( CONTXT, 'all', ' ', NH, NH, V, LDV )
298 CALL IGEBS2D( CONTXT, 'all', ' ', 1, 1, INFO, 1 )
299 ELSE
300 CALL DGEBR2D( CONTXT, 'all', ' ', NH, NH, V, LDV, II, JJ )
301 CALL IGEBR2D( CONTXT, 'all', ' ', 1, 1, INFO, 1, II, JJ )
302 END IF
303.NE. IF( INFO 0 ) INFO = INFO+ILO-1
304*
305* Copy the local matrix back to the diagonal block.
306*
307 CALL PDGEMR2D( NH, NH, T, 1, 1, DESCT, A, ILO, ILO, DESCA,
308 $ CONTXT )
309*
310* Update T and Z.
311*
312.LE. IF( MOD( ILO-1, HBL )+NH HBL ) THEN
313*
314* Simplest case: the diagonal block is located on one processor.
315* Call DGEMM directly to perform the update.
316*
317 HSTEP = LWORK / NH
318 VSTEP = HSTEP
319*
320 IF( WANTT ) THEN
321*
322* Update horizontal slab in A.
323*
324 CALL INFOG2L( ILO, I+1, DESCA, NPROW, NPCOL, MYROW,
325 $ MYCOL, IROW, ICOL, II, JJ )
326.EQ. IF( MYROW II ) THEN
327 ICOL1 = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
328 DO 10 KKCOL = ICOL, ICOL1, HSTEP
329 KLN = MIN( HSTEP, ICOL1-KKCOL+1 )
330 CALL DGEMM( 't', 'n', NH, KLN, NH, ONE, V,
331 $ LDV, A( IROW+(KKCOL-1)*LDA ), LDA, ZERO, WORK,
332 $ NH )
333 CALL DLAMOV( 'a', NH, KLN, WORK, NH,
334 $ A( IROW+(KKCOL-1)*LDA ), LDA )
335 10 CONTINUE
336 END IF
337*
338* Update vertical slab in A.
339*
340 CALL INFOG2L( LTOP, ILO, DESCA, NPROW, NPCOL, MYROW,
341 $ MYCOL, IROW, ICOL, II, JJ )
342.EQ. IF( MYCOL JJ ) THEN
343 CALL INFOG2L( ILO-1, ILO, DESCA, NPROW, NPCOL,
344 $ MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
345.NE. IF( MYROW ITMP1 ) IROW1 = IROW1-1
346 DO 20 KKROW = IROW, IROW1, VSTEP
347 KLN = MIN( VSTEP, IROW1-KKROW+1 )
348 CALL DGEMM( 'n', 'n', KLN, NH, NH, ONE,
349 $ A( KKROW+(ICOL-1)*LDA ), LDA, V, LDV, ZERO,
350 $ WORK, KLN )
351 CALL DLAMOV( 'a', KLN, NH, WORK, KLN,
352 $ A( KKROW+(ICOL-1)*LDA ), LDA )
353 20 CONTINUE
354 END IF
355 END IF
356*
357* Update vertical slab in Z.
358*
359 IF( WANTZ ) THEN
360 CALL INFOG2L( ILOZ, ILO, DESCZ, NPROW, NPCOL, MYROW,
361 $ MYCOL, IROW, ICOL, II, JJ )
362.EQ. IF( MYCOL JJ ) THEN
363 CALL INFOG2L( IHIZ, ILO, DESCZ, NPROW, NPCOL,
364 $ MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
365.NE. IF( MYROW ITMP1 ) IROW1 = IROW1-1
366 DO 30 KKROW = IROW, IROW1, VSTEP
367 KLN = MIN( VSTEP, IROW1-KKROW+1 )
368 CALL DGEMM( 'n', 'n', KLN, NH, NH, ONE,
369 $ Z( KKROW+(ICOL-1)*LDZ ), LDZ, V, LDV, ZERO,
370 $ WORK, KLN )
371 CALL DLAMOV( 'a', KLN, NH, WORK, KLN,
372 $ Z( KKROW+(ICOL-1)*LDZ ), LDZ )
373 30 CONTINUE
374 END IF
375 END IF
376*
377.LE. ELSE IF( MOD( ILO-1, HBL )+NH 2*HBL ) THEN
378*
379* More complicated case: the diagonal block lay on a 2x2
380* processor mesh.
381* Call DGEMM locally and communicate by pair.
382*
383 D1 = HBL - MOD( ILO-1, HBL )
384 D2 = NH - D1
385 HSTEP = LWORK / NH
386 VSTEP = HSTEP
387*
388 IF( WANTT ) THEN
389*
390* Update horizontal slab in A.
391*
392 CALL INFOG2L( ILO, I+1, DESCA, NPROW, NPCOL, MYROW,
393 $ MYCOL, IROW, ICOL, II, JJ )
394.EQ. IF( MYROW UP ) THEN
395.EQ. IF( MYROW II ) THEN
396 ICOL1 = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
397 DO 40 KKCOL = ICOL, ICOL1, HSTEP
398 KLN = MIN( HSTEP, ICOL1-KKCOL+1 )
399 CALL DGEMM( 't', 'n', NH, KLN, NH, ONE, V,
400 $ NH, A( IROW+(KKCOL-1)*LDA ), LDA, ZERO,
401 $ WORK, NH )
402 CALL DLAMOV( 'a', NH, KLN, WORK, NH,
403 $ A( IROW+(KKCOL-1)*LDA ), LDA )
404 40 CONTINUE
405 END IF
406 ELSE
407.EQ. IF( MYROW II ) THEN
408 ICOL1 = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
409 DO 50 KKCOL = ICOL, ICOL1, HSTEP
410 KLN = MIN( HSTEP, ICOL1-KKCOL+1 )
411 CALL DGEMM( 't', 'n', D2, KLN, D1, ONE,
412 $ V( 1, D1+1 ), LDV, A( IROW+(KKCOL-1)*LDA ),
413 $ LDA, ZERO, WORK( D1+1 ), NH )
414 CALL DGESD2D( CONTXT, D2, KLN, WORK( D1+1 ),
415 $ NH, DOWN, MYCOL )
416 CALL DGERV2D( CONTXT, D1, KLN, WORK, NH, DOWN,
417 $ MYCOL )
418 CALL DGEMM( 't', 'n', D1, KLN, D1, ONE,
419 $ V, LDV, A( IROW+(KKCOL-1)*LDA ), LDA, ONE,
420 $ WORK, NH )
421 CALL DLAMOV( 'a', D1, KLN, WORK, NH,
422 $ A( IROW+(KKCOL-1)*LDA ), LDA )
423 50 CONTINUE
424.EQ. ELSE IF( UP II ) THEN
425 ICOL1 = NUMROC( N, HBL, MYCOL, JAFIRST, NPCOL )
426 DO 60 KKCOL = ICOL, ICOL1, HSTEP
427 KLN = MIN( HSTEP, ICOL1-KKCOL+1 )
428 CALL DGEMM( 't', 'n', D1, KLN, D2, ONE,
429 $ V( D1+1, 1 ), LDV, A( IROW+(KKCOL-1)*LDA ),
430 $ LDA, ZERO, WORK, NH )
431 CALL DGESD2D( CONTXT, D1, KLN, WORK, NH, UP,
432 $ MYCOL )
433 CALL DGERV2D( CONTXT, D2, KLN, WORK( D1+1 ),
434 $ NH, UP, MYCOL )
435 CALL DGEMM( 't', 'n', D2, KLN, D2, ONE,
436 $ V( D1+1, D1+1 ), LDV,
437 $ A( IROW+(KKCOL-1)*LDA ), LDA, ONE,
438 $ WORK( D1+1 ), NH )
439 CALL DLAMOV( 'a', D2, KLN, WORK( D1+1 ), NH,
440 $ A( IROW+(KKCOL-1)*LDA ), LDA )
441 60 CONTINUE
442 END IF
443 END IF
444*
445* Update vertical slab in A.
446*
447 CALL INFOG2L( LTOP, ILO, DESCA, NPROW, NPCOL, MYROW,
448 $ MYCOL, IROW, ICOL, II, JJ )
449.EQ. IF( MYCOL LEFT ) THEN
450.EQ. IF( MYCOL JJ ) THEN
451 CALL INFOG2L( ILO-1, ILO, DESCA, NPROW, NPCOL,
452 $ MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
453.NE. IF( MYROW ITMP1 ) IROW1 = IROW1-1
454 DO 70 KKROW = IROW, IROW1, VSTEP
455 KLN = MIN( VSTEP, IROW1-KKROW+1 )
456 CALL DGEMM( 'n', 'n', KLN, NH, NH, ONE,
457 $ A( KKROW+(ICOL-1)*LDA ), LDA, V, LDV,
458 $ ZERO, WORK, KLN )
459 CALL DLAMOV( 'a', KLN, NH, WORK, KLN,
460 $ A( KKROW+(ICOL-1)*LDA ), LDA )
461 70 CONTINUE
462 END IF
463 ELSE
464.EQ. IF( MYCOL JJ ) THEN
465 CALL INFOG2L( ILO-1, ILO, DESCA, NPROW, NPCOL,
466 $ MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
467.NE. IF( MYROW ITMP1 ) IROW1 = IROW1-1
468 DO 80 KKROW = IROW, IROW1, VSTEP
469 KLN = MIN( VSTEP, IROW1-KKROW+1 )
470 CALL DGEMM( 'n', 'n', KLN, D2, D1, ONE,
471 $ A( KKROW+(ICOL-1)*LDA ), LDA, V( 1, D1+1 ),
472 $ LDV, ZERO, WORK( 1+D1*KLN ), KLN )
473 CALL DGESD2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),
474 $ KLN, MYROW, RIGHT )
475 CALL DGERV2D( CONTXT, KLN, D1, WORK, KLN, MYROW,
476 $ RIGHT )
477 CALL DGEMM( 'n', 'n', KLN, D1, D1, ONE,
478 $ A( KKROW+(ICOL-1)*LDA ), LDA, V, LDV, ONE,
479 $ WORK, KLN )
480 CALL DLAMOV( 'a', KLN, D1, WORK, KLN,
481 $ A( KKROW+(ICOL-1)*LDA ), LDA )
482 80 CONTINUE
483.EQ. ELSE IF ( LEFT JJ ) THEN
484 CALL INFOG2L( ILO-1, ILO, DESCA, NPROW, NPCOL,
485 $ MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
486.NE. IF( MYROW ITMP1 ) IROW1 = IROW1-1
487 DO 90 KKROW = IROW, IROW1, VSTEP
488 KLN = MIN( VSTEP, IROW1-KKROW+1 )
489 CALL DGEMM( 'n', 'n', KLN, D1, D2, ONE,
490 $ A( KKROW+(ICOL-1)*LDA ), LDA, V( D1+1, 1 ),
491 $ LDV, ZERO, WORK, KLN )
492 CALL DGESD2D( CONTXT, KLN, D1, WORK, KLN, MYROW,
493 $ LEFT )
494 CALL DGERV2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),
495 $ KLN, MYROW, LEFT )
496 CALL DGEMM( 'n', 'n', KLN, D2, D2, ONE,
497 $ A( KKROW+(ICOL-1)*LDA ), LDA, V( D1+1, D1+1 ),
498 $ LDV, ONE, WORK( 1+D1*KLN ), KLN )
499 CALL DLAMOV( 'a', KLN, D2, WORK( 1+D1*KLN ), KLN,
500 $ A( KKROW+(ICOL-1)*LDA ), LDA )
501 90 CONTINUE
502 END IF
503 END IF
504 END IF
505*
506* Update vertical slab in Z.
507*
508 IF( WANTZ ) THEN
509 CALL INFOG2L( ILOZ, ILO, DESCZ, NPROW, NPCOL, MYROW,
510 $ MYCOL, IROW, ICOL, II, JJ )
511.EQ. IF( MYCOL LEFT ) THEN
512.EQ. IF( MYCOL JJ ) THEN
513 CALL INFOG2L( IHIZ, ILO, DESCZ, NPROW, NPCOL,
514 $ MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
515.NE. IF( MYROW ITMP1 ) IROW1 = IROW1-1
516 DO 100 KKROW = IROW, IROW1, VSTEP
517 KLN = MIN( VSTEP, IROW1-KKROW+1 )
518 CALL DGEMM( 'n', 'n', KLN, NH, NH, ONE,
519 $ Z( KKROW+(ICOL-1)*LDZ ), LDZ, V, LDV, ZERO,
520 $ WORK, KLN )
521 CALL DLAMOV( 'a', KLN, NH, WORK, KLN,
522 $ Z( KKROW+(ICOL-1)*LDZ ), LDZ )
523 100 CONTINUE
524 END IF
525 ELSE
526.EQ. IF( MYCOL JJ ) THEN
527 CALL INFOG2L( IHIZ, ILO, DESCZ, NPROW, NPCOL,
528 $ MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
529.NE. IF( MYROW ITMP1 ) IROW1 = IROW1-1
530 DO 110 KKROW = IROW, IROW1, VSTEP
531 KLN = MIN( VSTEP, IROW1-KKROW+1 )
532 CALL DGEMM( 'n', 'n', KLN, D2, D1, ONE,
533 $ Z( KKROW+(ICOL-1)*LDZ ), LDZ, V( 1, D1+1 ),
534 $ LDV, ZERO, WORK( 1+D1*KLN ), KLN )
535 CALL DGESD2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),
536 $ KLN, MYROW, RIGHT )
537 CALL DGERV2D( CONTXT, KLN, D1, WORK, KLN, MYROW,
538 $ RIGHT )
539 CALL DGEMM( 'n', 'n', KLN, D1, D1, ONE,
540 $ Z( KKROW+(ICOL-1)*LDZ ), LDZ, V, LDV, ONE,
541 $ WORK, KLN )
542 CALL DLAMOV( 'a', KLN, D1, WORK, KLN,
543 $ Z( KKROW+(ICOL-1)*LDZ ), LDZ )
544 110 CONTINUE
545.EQ. ELSE IF( LEFT JJ ) THEN
546 CALL INFOG2L( IHIZ, ILO, DESCZ, NPROW, NPCOL,
547 $ MYROW, MYCOL, IROW1, ICOL1, ITMP1, ITMP2 )
548.NE. IF( MYROW ITMP1 ) IROW1 = IROW1-1
549 DO 120 KKROW = IROW, IROW1, VSTEP
550 KLN = MIN( VSTEP, IROW1-KKROW+1 )
551 CALL DGEMM( 'n', 'n', KLN, D1, D2, ONE,
552 $ Z( KKROW+(ICOL-1)*LDZ ), LDZ, V( D1+1, 1 ),
553 $ LDV, ZERO, WORK, KLN )
554 CALL DGESD2D( CONTXT, KLN, D1, WORK, KLN, MYROW,
555 $ LEFT )
556 CALL DGERV2D( CONTXT, KLN, D2, WORK( 1+D1*KLN ),
557 $ KLN, MYROW, LEFT )
558 CALL DGEMM( 'n', 'n', KLN, D2, D2, ONE,
559 $ Z( KKROW+(ICOL-1)*LDZ ), LDZ,
560 $ V( D1+1, D1+1 ), LDV, ONE, WORK( 1+D1*KLN ),
561 $ KLN )
562 CALL DLAMOV( 'a', KLN, D2, WORK( 1+D1*KLN ),
563 $ KLN, Z( KKROW+(ICOL-1)*LDZ ), LDZ )
564 120 CONTINUE
565 END IF
566 END IF
567 END IF
568*
569 ELSE
570*
571* Most complicated case: the diagonal block lay across the border
572* of the processor mesh.
573* Treat V as a distributed matrix and call PDGEMM.
574*
575 HSTEP = LWORK / NH * NPCOL
576 VSTEP = LWORK / NH * NPROW
577 LLDTMP = NUMROC( NH, NH, MYROW, 0, NPROW )
578 LLDTMP = MAX( 1, LLDTMP )
579 CALL DESCINIT( DESCV, NH, NH, NH, NH, 0, 0, CONTXT,
580 $ LLDTMP, IERR )
581 CALL DESCINIT( DESCWH, NH, HSTEP, NH, LWORK / NH, 0, 0,
582 $ CONTXT, LLDTMP, IERR )
583*
584 IF( WANTT ) THEN
585*
586* Update horizontal slab in A.
587*
588 DO 130 KKCOL = I+1, N, HSTEP
589 KLN = MIN( HSTEP, N-KKCOL+1 )
590 CALL PDGEMM( 't', 'n', NH, KLN, NH, ONE, V, 1, 1,
591 $ DESCV, A, ILO, KKCOL, DESCA, ZERO, WORK, 1, 1,
592 $ DESCWH )
593 CALL PDGEMR2D( NH, KLN, WORK, 1, 1, DESCWH, A,
594 $ ILO, KKCOL, DESCA, CONTXT )
595 130 CONTINUE
596*
597* Update vertical slab in A.
598*
599 DO 140 KKROW = LTOP, ILO-1, VSTEP
600 KLN = MIN( VSTEP, ILO-KKROW )
601 LLDTMP = NUMROC( KLN, LWORK / NH, MYROW, 0, NPROW )
602 LLDTMP = MAX( 1, LLDTMP )
603 CALL DESCINIT( DESCWV, KLN, NH, LWORK / NH, NH, 0, 0,
604 $ CONTXT, LLDTMP, IERR )
605 CALL PDGEMM( 'n', 'n', KLN, NH, NH, ONE, A, KKROW,
606 $ ILO, DESCA, V, 1, 1, DESCV, ZERO, WORK, 1, 1,
607 $ DESCWV )
608 CALL PDGEMR2D( KLN, NH, WORK, 1, 1, DESCWV, A, KKROW,
609 $ ILO, DESCA, CONTXT )
610 140 CONTINUE
611 END IF
612*
613* Update vertical slab in Z.
614*
615 IF( WANTZ ) THEN
616 DO 150 KKROW = ILOZ, IHIZ, VSTEP
617 KLN = MIN( VSTEP, IHIZ-KKROW+1 )
618 LLDTMP = NUMROC( KLN, LWORK / NH, MYROW, 0, NPROW )
619 LLDTMP = MAX( 1, LLDTMP )
620 CALL DESCINIT( DESCWV, KLN, NH, LWORK / NH, NH, 0, 0,
621 $ CONTXT, LLDTMP, IERR )
622 CALL PDGEMM( 'n', 'n', KLN, NH, NH, ONE, Z, KKROW,
623 $ ILO, DESCZ, V, 1, 1, DESCV, ZERO, WORK, 1, 1,
624 $ DESCWV )
625 CALL PDGEMR2D( KLN, NH, WORK, 1, 1, DESCWV, Z,
626 $ KKROW, ILO, DESCZ, CONTXT )
627 150 CONTINUE
628 END IF
629 END IF
630*
631* END OF PDLAQR4
632*
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:162
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition dlaset.f:110
subroutine dlahqr(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, iloz, ihiz, z, ldz, info)
DLAHQR computes the eigenvalues and Schur factorization of an upper Hessenberg matrix,...
Definition dlahqr.f:207
subroutine dlaqr4(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, iloz, ihiz, z, ldz, work, lwork, info)
DLAQR4 computes the eigenvalues of a Hessenberg matrix, and optionally the matrices from the Schur de...
Definition dlaqr4.f:263
subroutine dlaqr3(wantt, wantz, n, ktop, kbot, nw, h, ldh, iloz, ihiz, z, ldz, ns, nd, sr, si, v, ldv, nh, t, ldt, nv, wv, ldwv, work, lwork)
DLAQR3 performs the orthogonal similarity transformation of a Hessenberg matrix to detect and deflate...
Definition dlaqr3.f:275
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:187
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine dgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1082
subroutine dgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1123
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition mpi.f:937
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition mpi.f:777
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786