OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pdlaqr5.f File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine pdlaqr5 (wantt, wantz, kacc22, n, ktop, kbot, nshfts, sr, si, h, desch, iloz, ihiz, z, descz, work, lwork, iwork, liwork)

Function/Subroutine Documentation

◆ pdlaqr5()

subroutine pdlaqr5 ( logical wantt,
logical wantz,
integer kacc22,
integer n,
integer ktop,
integer kbot,
integer nshfts,
double precision, dimension( * ) sr,
double precision, dimension( * ) si,
double precision, dimension( * ) h,
integer, dimension( * ) desch,
integer iloz,
integer ihiz,
double precision, dimension( * ) z,
integer, dimension( * ) descz,
double precision, dimension( * ) work,
integer lwork,
integer, dimension( * ) iwork,
integer liwork )

Definition at line 1 of file pdlaqr5.f.

4*
5* Contribution from the Department of Computing Science and HPC2N,
6* Umea University, Sweden
7*
8* -- ScaLAPACK routine (version 2.0.2) --
9* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
10* May 1 2012
11*
12 IMPLICIT NONE
13*
14* .. Scalar Arguments ..
15 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, N, NSHFTS,
16 $ LWORK, LIWORK
17 LOGICAL WANTT, WANTZ
18* ..
19* .. Array Arguments ..
20 INTEGER DESCH( * ), DESCZ( * ), IWORK( * )
21 DOUBLE PRECISION H( * ), SI( * ), SR( * ), Z( * ), WORK( * )
22* ..
23*
24* Purpose
25* =======
26*
27* This auxiliary subroutine called by PDLAQR0 performs a
28* single small-bulge multi-shift QR sweep by chasing separated
29* groups of bulges along the main block diagonal of H.
30*
31* WANTT (global input) logical scalar
32* WANTT = .TRUE. if the quasi-triangular Schur factor
33* is being computed. WANTT is set to .FALSE. otherwise.
34*
35* WANTZ (global input) logical scalar
36* WANTZ = .TRUE. if the orthogonal Schur factor is being
37* computed. WANTZ is set to .FALSE. otherwise.
38*
39* KACC22 (global input) integer with value 0, 1, or 2.
40* Specifies the computation mode of far-from-diagonal
41* orthogonal updates.
42* = 1: PDLAQR5 accumulates reflections and uses matrix-matrix
43* multiply to update the far-from-diagonal matrix entries.
44* = 2: PDLAQR5 accumulates reflections, uses matrix-matrix
45* multiply to update the far-from-diagonal matrix entries,
46* and takes advantage of 2-by-2 block structure during
47* matrix multiplies.
48*
49* N (global input) integer scalar
50* N is the order of the Hessenberg matrix H upon which this
51* subroutine operates.
52*
53* KTOP (global input) integer scalar
54* KBOT (global input) integer scalar
55* These are the first and last rows and columns of an
56* isolated diagonal block upon which the QR sweep is to be
57* applied. It is assumed without a check that
58* either KTOP = 1 or H(KTOP,KTOP-1) = 0
59* and
60* either KBOT = N or H(KBOT+1,KBOT) = 0.
61*
62* NSHFTS (global input) integer scalar
63* NSHFTS gives the number of simultaneous shifts. NSHFTS
64* must be positive and even.
65*
66* SR (global input) DOUBLE PRECISION array of size (NSHFTS)
67* SI (global input) DOUBLE PRECISION array of size (NSHFTS)
68* SR contains the real parts and SI contains the imaginary
69* parts of the NSHFTS shifts of origin that define the
70* multi-shift QR sweep.
71*
72* H (local input/output) DOUBLE PRECISION array of size
73* (DESCH(LLD_),*)
74* On input H contains a Hessenberg matrix. On output a
75* multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
76* to the isolated diagonal block in rows and columns KTOP
77* through KBOT.
78*
79* DESCH (global and local input) INTEGER array of dimension DLEN_.
80* The array descriptor for the distributed matrix H.
81*
82* ILOZ (global input) INTEGER
83* IHIZ (global input) INTEGER
84* Specify the rows of Z to which transformations must be
85* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
86*
87* Z (local input/output) DOUBLE PRECISION array of size
88* (DESCZ(LLD_),*)
89* If WANTZ = .TRUE., then the QR Sweep orthogonal
90* similarity transformation is accumulated into
91* Z(ILOZ:IHIZ,ILO:IHI) from the right.
92* If WANTZ = .FALSE., then Z is unreferenced.
93*
94* DESCZ (global and local input) INTEGER array of dimension DLEN_.
95* The array descriptor for the distributed matrix Z.
96*
97* WORK (local workspace) DOUBLE PRECISION array, dimension(DWORK)
98*
99* LWORK (local input) INTEGER
100* The length of the workspace array WORK.
101*
102* IWORK (local workspace) INTEGER array, dimension (LIWORK)
103*
104* LIWORK (local input) INTEGER
105* The length of the workspace array IWORK.
106*
107* ================================================================
108* Based on contributions by
109* Robert Granat, Department of Computing Science and HPC2N,
110* University of Umea, Sweden.
111*
112* ============================================================
113* References:
114* K. Braman, R. Byers, and R. Mathias,
115* The Multi-Shift QR Algorithm Part I: Maintaining Well Focused
116* Shifts, and Level 3 Performance.
117* SIAM J. Matrix Anal. Appl., 23(4):929--947, 2002.
118*
119* R. Granat, B. Kagstrom, and D. Kressner,
120* A Novel Parallel QR Algorithm for Hybrid Distributed Momory HPC
121* Systems.
122* SIAM J. Sci. Comput., 32(4):2345--2378, 2010.
123*
124* ============================================================
125* .. Parameters ..
126 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
127 $ LLD_, MB_, M_, NB_, N_, RSRC_
128 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
129 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
130 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
131 DOUBLE PRECISION ZERO, ONE
132 parameter( zero = 0.0d0, one = 1.0d0 )
133 INTEGER NTINY
134 parameter( ntiny = 11 )
135* ..
136* .. Local Scalars ..
137 DOUBLE PRECISION ALPHA, BETA, H11, H12, H21, H22, REFSUM,
138 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
139 $ ULP, TAU, ELEM, STAMP, DDUM, ORTH
140 INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
141 $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
142 $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL,
143 $ NS, NU, LLDH, LLDZ, LLDU, LLDV, LLDW, LLDWH,
144 $ INFO, ICTXT, NPROW, NPCOL, NB, IROFFH, ITOP,
145 $ NWIN, MYROW, MYCOL, LNS, NUMWIN, LKACC22,
146 $ LCHAIN, WIN, IDONEJOB, IPNEXT, ANMWIN, LENRBUF,
147 $ LENCBUF, ICHOFF, LRSRC, LCSRC, LKTOP, LKBOT,
148 $ II, JJ, SWIN, EWIN, LNWIN, DIM, LLKTOP, LLKBOT,
149 $ IPV, IPU, IPH, IPW, KU, KWH, KWV, NVE, LKS,
150 $ IDUM, NHO, DIR, WINID, INDX, ILOC, JLOC, RSRC1,
151 $ CSRC1, RSRC2, CSRC2, RSRC3, CSRC3, RSRC4, IPUU,
152 $ CSRC4, LROWS, LCOLS, INDXS, KS, JLOC1, ILOC1,
153 $ LKTOP1, LKTOP2, WCHUNK, NUMCHUNK, ODDEVEN,
154 $ CHUNKNUM, DIM1, DIM4, IPW3, HROWS, ZROWS,
155 $ HCOLS, IPW1, IPW2, RSRC, EAST, JLOC4, ILOC4,
156 $ WEST, CSRC, SOUTH, NORHT, INDXE, NORTH,
157 $ IHH, IPIW, LKBOT1, NPROCS, LIROFFH,
158 $ WINFIN, RWS3, CLS3, INDX2, HROWS2,
159 $ ZROWS2, HCOLS2, MNRBUF,
160 $ MXRBUF, MNCBUF, MXCBUF, LWKOPT
161 LOGICAL BLK22, BMP22, INTRO, DONEJOB, ODDNPROW,
162 $ ODDNPCOL, LQUERY, BCDONE
163 CHARACTER JBCMPZ*2, JOB
164* ..
165* .. External Functions ..
166 LOGICAL LSAME
167 INTEGER PILAENVX, ICEIL, INDXG2P, INDXG2L, NUMROC
168 DOUBLE PRECISION DLAMCH, DLANGE
169 EXTERNAL dlamch, pilaenvx, iceil, indxg2p, indxg2l,
171* ..
172* .. Intrinsic Functions ..
173 INTRINSIC abs, dble, max, min, mod
174* ..
175* .. Local Arrays ..
176 DOUBLE PRECISION VT( 3 )
177* ..
178* .. External Subroutines ..
179 EXTERNAL dgemm, dlabad, dlamov, dlaqr1, dlarfg, dlaset,
180 $ dtrmm, dlaqr6
181* ..
182* .. Executable Statements ..
183*
184 info = 0
185 ictxt = desch( ctxt_ )
186 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
187 nprocs = nprow*npcol
188 lldh = desch( lld_ )
189 lldz = descz( lld_ )
190 nb = desch( mb_ )
191 iroffh = mod( ktop - 1, nb )
192 lquery = lwork.EQ.-1 .OR. liwork.EQ.-1
193*
194* If there are no shifts, then there is nothing to do.
195*
196 IF( .NOT. lquery .AND. nshfts.LT.2 )
197 $ RETURN
198*
199* If the active block is empty or 1-by-1, then there
200* is nothing to do.
201*
202 IF( .NOT. lquery .AND. ktop.GE.kbot )
203 $ RETURN
204*
205* Shuffle shifts into pairs of real shifts and pairs of
206* complex conjugate shifts assuming complex conjugate
207* shifts are already adjacent to one another.
208*
209 IF( .NOT. lquery ) THEN
210 DO 10 i = 1, nshfts - 2, 2
211 IF( si( i ).NE.-si( i+1 ) ) THEN
212*
213 swap = sr( i )
214 sr( i ) = sr( i+1 )
215 sr( i+1 ) = sr( i+2 )
216 sr( i+2 ) = swap
217*
218 swap = si( i )
219 si( i ) = si( i+1 )
220 si( i+1 ) = si( i+2 )
221 si( i+2 ) = swap
222 END IF
223 10 CONTINUE
224 END IF
225*
226* NSHFTS is supposed to be even, but if is odd,
227* then simply reduce it by one. The shuffle above
228* ensures that the dropped shift is real and that
229* the remaining shifts are paired.
230*
231 ns = nshfts - mod( nshfts, 2 )
232*
233* Extract the size of the computational window.
234*
235 nwin = pilaenvx( ictxt, 19, 'PDLAQR5', jbcmpz, n, nb, nb, nb )
236 nwin = min( nwin, kbot-ktop+1 )
237*
238* Adjust number of simultaneous shifts if it exceeds the limit
239* set by the number of diagonal blocks in the active submatrix
240* H(KTOP:KBOT,KTOP:KBOT).
241*
242 ns = max( 2, min( ns, iceil( kbot-ktop+1, nb )*nwin/3 ) )
243 ns = ns - mod( ns, 2 )
244
245*
246* Decide the number of simultaneous computational windows
247* from the number of shifts - each window should contain up to
248* (NWIN / 3) shifts. Also compute the number of shifts per
249* window and make sure that number is even.
250*
251 lns = min( max( 2, nwin / 3 ), max( 2, ns / min(nprow,npcol) ) )
252 lns = lns - mod( lns, 2 )
253 numwin = max( 1, min( iceil( ns, lns ),
254 $ iceil( kbot-ktop+1, nb ) - 1 ) )
255 IF( nprow.NE.npcol ) THEN
256 numwin = min( numwin, min(nprow,npcol) )
257 lns = min( lns, max( 2, ns / min(nprow,npcol) ) )
258 lns = lns - mod( lns, 2 )
259 END IF
260*
261* Machine constants for deflation.
262*
263 safmin = dlamch( 'SAFE MINIMUM' )
264 safmax = one / safmin
265 CALL dlabad( safmin, safmax )
266 ulp = dlamch( 'PRECISION' )
267 smlnum = safmin*( dble( n ) / ulp )
268*
269* Use accumulated reflections to update far-from-diagonal
270* entries on a local level?
271*
272 IF( lns.LT.14 ) THEN
273 lkacc22 = 1
274 ELSE
275 lkacc22 = 2
276 END IF
277*
278* If so, exploit the 2-by-2 block structure?
279* ( Usually it is not efficient to exploit the 2-by-2 structure
280* because the block size is too small. )
281*
282 blk22 = ( lns.GT.2 ) .AND. ( kacc22.EQ.2 )
283*
284* Clear trash.
285*
286 IF( .NOT. lquery .AND. ktop+2.LE.kbot )
287 $ CALL pdelset( h, ktop+2, ktop, desch, zero )
288*
289* NBMPS = number of 2-shift bulges in each chain
290*
291 nbmps = lns / 2
292*
293* KDU = width of slab
294*
295 kdu = 6*nbmps - 3
296*
297* LCHAIN = length of each chain
298*
299 lchain = 3 * nbmps + 1
300*
301* Check if workspace query.
302*
303 IF( lquery ) THEN
304 hrows = numroc( n, nb, myrow, desch(rsrc_), nprow )
305 hcols = numroc( n, nb, mycol, desch(csrc_), npcol )
306 lwkopt = (5+2*numwin)*nb**2 + 2*hrows*nb + hcols*nb +
307 $ max( hrows*nb, hcols*nb )
308 work(1) = dble(lwkopt)
309 iwork(1) = 5*numwin
310 RETURN
311 END IF
312*
313* Check if KTOP and KBOT are valid.
314*
315 IF( ktop.LT.1 .OR. kbot.GT.n ) RETURN
316*
317* Create and chase NUMWIN chains of NBMPS bulges.
318*
319* Set up window introduction.
320*
321 anmwin = 0
322 intro = .true.
323 ipiw = 1
324*
325* Main loop:
326* While-loop over the computational windows which is
327* terminated when all windows have been introduced,
328* chased down to the bottom of the considered submatrix
329* and chased off.
330*
331 20 CONTINUE
332*
333* Set up next window as long as we have less than the prescribed
334* number of windows. Each window is described an integer quadruple:
335* 1. Local value of KTOP (below denoted by LKTOP)
336* 2. Local value of KBOT (below denoted by LKBOT)
337* 3-4. Processor indices (LRSRC,LCSRC) associated with the window.
338* (5. Mark that decides if a window is fully processed or not)
339*
340* Notice - the next window is only introduced if the first block
341* in the active submatrix does not contain any other windows.
342*
343 IF( anmwin.GT.0 ) THEN
344 lktop = iwork( 1+(anmwin-1)*5 )
345 ELSE
346 lktop = ktop
347 END IF
348 IF( intro .AND. (anmwin.EQ.0 .OR. lktop.GT.iceil(ktop,nb)*nb) )
349 $ THEN
350 anmwin = anmwin + 1
351*
352* Structure of IWORK:
353* IWORK( 1+(WIN-1)*5 ): start position
354* IWORK( 2+(WIN-1)*5 ): stop position
355* IWORK( 3+(WIN-1)*5 ): processor row id
356* IWORK( 4+(WIN-1)*5 ): processor col id
357* IWORK( 5+(WIN-1)*5 ): window status (0, 1, or 2)
358*
359 iwork( 1+(anmwin-1)*5 ) = ktop
360 iwork( 2+(anmwin-1)*5 ) = ktop +
361 $ min( nwin,nb-iroffh,kbot-ktop+1 ) - 1
362 iwork( 3+(anmwin-1)*5 ) = indxg2p( iwork(1+(anmwin-1)*5), nb,
363 $ myrow, desch(rsrc_), nprow )
364 iwork( 4+(anmwin-1)*5 ) = indxg2p( iwork(2+(anmwin-1)*5), nb,
365 $ mycol, desch(csrc_), npcol )
366 iwork( 5+(anmwin-1)*5 ) = 0
367 ipiw = 6+(anmwin-1)*5
368 IF( anmwin.EQ.numwin ) intro = .false.
369 END IF
370*
371* Do-loop over the number of windows.
372*
373 ipnext = 1
374 donejob = .false.
375 idonejob = 0
376 lenrbuf = 0
377 lencbuf = 0
378 ichoff = 0
379 DO 40 win = 1, anmwin
380*
381* Extract window information to simplify the rest.
382*
383 lrsrc = iwork( 3+(win-1)*5 )
384 lcsrc = iwork( 4+(win-1)*5 )
385 lktop = iwork( 1+(win-1)*5 )
386 lkbot = iwork( 2+(win-1)*5 )
387 lnwin = lkbot - lktop + 1
388*
389* Check if anything to do for current window, i.e., if the local
390* chain of bulges has reached the next block border etc.
391*
392 IF( iwork(5+(win-1)*5).LT.2 .AND. lnwin.GT.1 .AND.
393 $ (lnwin.GT.lchain .OR. lkbot.EQ.kbot ) ) THEN
394 liroffh = mod(lktop-1,nb)
395 swin = lktop-liroffh
396 ewin = min(kbot,lktop-liroffh+nb-1)
397 dim = ewin-swin+1
398 IF( dim.LE.ntiny .AND. .NOT.lkbot.EQ.kbot ) THEN
399 iwork( 5+(win-1)*5 ) = 2
400 GO TO 45
401 END IF
402 idonejob = 1
403 IF( iwork(5+(win-1)*5).EQ.0 ) THEN
404 iwork(5+(win-1)*5) = 1
405 END IF
406*
407* Let the process that owns the corresponding window do the
408* local bulge chase.
409*
410 IF( myrow.EQ.lrsrc .AND. mycol.EQ.lcsrc ) THEN
411*
412* Set the kind of job to do in DLAQR6:
413* 1. JOB = 'I': Introduce and chase bulges in window WIN
414* 2. JOB = 'C': Chase bulges from top to bottom of window WIN
415* 3. JOB = 'O': Chase bulges off window WIN
416* 4. JOB = 'A': All of 1-3 above is done - this will for
417* example happen for very small active
418* submatrices (like 2-by-2)
419*
420 llkbot = llktop + lnwin - 1
421 IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot ) THEN
422 job = 'All steps'
423 ichoff = 1
424 ELSEIF( lktop.EQ.ktop ) THEN
425 job = 'Introduce and chase'
426 ELSEIF( lkbot.EQ.kbot ) THEN
427 job = 'Off-chase bulges'
428 ichoff = 1
429 ELSE
430 job = 'Chase bulges'
431 END IF
432*
433* Copy submatrix of H corresponding to window WIN into
434* workspace and set out additional workspace for storing
435* orthogonal transformations. This submatrix must be at
436* least (NTINY+1)-by-(NTINY+1) to fit into DLAQR6 - if not,
437* abort and go for cross border bulge chasing with this
438* particular window.
439*
440 ii = indxg2l( swin, nb, myrow, desch(rsrc_), nprow )
441 jj = indxg2l( swin, nb, mycol, desch(csrc_), npcol )
442 llktop = 1 + liroffh
443 llkbot = llktop + lnwin - 1
444*
445 ipu = ipnext
446 iph = ipu + lnwin**2
447 ipuu = iph + max(ntiny+1,dim)**2
448 ipv = ipuu + max(ntiny+1,dim)**2
449 ipnext = iph
450*
451 IF( lsame( job, 'a.OR.' ) LSAME( JOB, 'o.AND.' )
452.LT. $ DIMNTINY+1 ) THEN
453 CALL DLASET( 'all', NTINY+1, NTINY+1, ZERO, ONE,
454 $ WORK(IPH), NTINY+1 )
455 END IF
456 CALL DLAMOV( 'upper', DIM, DIM, H(II+(JJ-1)*LLDH), LLDH,
457 $ WORK(IPH), MAX(NTINY+1,DIM) )
458 CALL DCOPY( DIM-1, H(II+(JJ-1)*LLDH+1), LLDH+1,
459 $ WORK(IPH+1), MAX(NTINY+1,DIM)+1 )
460 IF( LSAME( JOB, 'c.OR.' ) LSAME( JOB, 'o') ) THEN
461 CALL DCOPY( DIM-2, H(II+(JJ-1)*LLDH+2), LLDH+1,
462 $ WORK(IPH+2), MAX(NTINY+1,DIM)+1 )
463 CALL DCOPY( DIM-3, H(II+(JJ-1)*LLDH+3), LLDH+1,
464 $ WORK(IPH+3), MAX(NTINY+1,DIM)+1 )
465 CALL DLASET( 'lower', DIM-4, DIM-4, ZERO,
466 $ ZERO, WORK(IPH+4), MAX(NTINY+1,DIM) )
467 ELSE
468 CALL DLASET( 'lower', DIM-2, DIM-2, ZERO,
469 $ ZERO, WORK(IPH+2), MAX(NTINY+1,DIM) )
470 END IF
471*
472 KU = MAX(NTINY+1,DIM) - KDU + 1
473 KWH = KDU + 1
474 NHO = ( MAX(NTINY+1,DIM)-KDU+1-4 ) - ( KDU+1 ) + 1
475 KWV = KDU + 4
476 NVE = MAX(NTINY+1,DIM) - KDU - KWV + 1
477 CALL DLASET( 'all', MAX(NTINY+1,DIM),
478 $ MAX(NTINY+1,DIM), ZERO, ONE, WORK(IPUU),
479 $ MAX(NTINY+1,DIM) )
480*
481* Small-bulge multi-shift QR sweep.
482*
483 LKS = MAX( 1, NS - WIN*LNS + 1 )
484 CALL DLAQR6( JOB, WANTT, .TRUE., LKACC22,
485 $ MAX(NTINY+1,DIM), LLKTOP, LLKBOT, LNS, SR( LKS ),
486 $ SI( LKS ), WORK(IPH), MAX(NTINY+1,DIM), LLKTOP,
487 $ LLKBOT, WORK(IPUU), MAX(NTINY+1,DIM), WORK(IPU),
488 $ 3, WORK( IPH+KU-1 ),
489 $ MAX(NTINY+1,DIM), NVE, WORK( IPH+KWV-1 ),
490 $ MAX(NTINY+1,DIM), NHO, WORK( IPH-1+KU+(KWH-1)*
491 $ MAX(NTINY+1,DIM) ), MAX(NTINY+1,DIM) )
492*
493* Copy submatrix of H back.
494*
495 CALL DLAMOV( 'upper', DIM, DIM, WORK(IPH),
496 $ MAX(NTINY+1,DIM), H(II+(JJ-1)*LLDH), LLDH )
497 CALL DCOPY( DIM-1, WORK(IPH+1), MAX(NTINY+1,DIM)+1,
498 $ H(II+(JJ-1)*LLDH+1), LLDH+1 )
499 IF( LSAME( JOB, 'i.OR.' ) LSAME( JOB, 'c' ) ) THEN
500 CALL DCOPY( DIM-2, WORK(IPH+2), DIM+1,
501 $ H(II+(JJ-1)*LLDH+2), LLDH+1 )
502 CALL DCOPY( DIM-3, WORK(IPH+3), DIM+1,
503 $ H(II+(JJ-1)*LLDH+3), LLDH+1 )
504 ELSE
505 CALL DLASET( 'lower', dim-2, dim-2, zero,
506 $ zero, h(ii+(jj-1)*lldh+2), lldh )
507 END IF
508*
509* Copy actual submatrix of U to the correct place
510* of the buffer.
511*
512 CALL dlamov( 'All', lnwin, lnwin,
513 $ work(ipuu+(max(ntiny+1,dim)*liroffh)+liroffh),
514 $ max(ntiny+1,dim), work(ipu), lnwin )
515 END IF
516*
517* In case the local submatrix was smaller than
518* (NTINY+1)-by-(NTINY+1) we go here and proceed.
519*
520 45 CONTINUE
521 ELSE
522 iwork( 5+(win-1)*5 ) = 2
523 END IF
524*
525* Increment counter for buffers of orthogonal transformations.
526*
527 IF( myrow.EQ.lrsrc .OR. mycol.EQ.lcsrc ) THEN
528 IF( idonejob.EQ.1 .AND. iwork(5+(win-1)*5).LT.2 ) THEN
529 IF( myrow.EQ.lrsrc ) lenrbuf = lenrbuf + lnwin*lnwin
530 IF( mycol.EQ.lcsrc ) lencbuf = lencbuf + lnwin*lnwin
531 END IF
532 END IF
533 40 CONTINUE
534*
535* Did some work in the above do-loop?
536*
537 CALL igsum2d( ictxt, 'All', '1-Tree', 1, 1, idonejob, 1, -1, -1 )
538 donejob = idonejob.GT.0
539*
540* Chased off bulges from first window?
541*
542 IF( nprocs.GT.1 )
543 $ CALL igamx2d( ictxt, 'All', '1-Tree', 1, 1, ichoff, 1, -1,
544 $ -1, -1, -1, -1 )
545*
546* If work was done in the do-loop over local windows, perform
547* updates, otherwise go for cross border bulge chasing and updates.
548*
549 IF( donejob ) THEN
550*
551* Broadcast orthogonal transformations.
552*
553 49 CONTINUE
554 IF( lenrbuf.GT.0 .OR. lencbuf.GT.0 ) THEN
555 DO 50 dir = 1, 2
556 bcdone = .false.
557 DO 60 win = 1, anmwin
558 IF( ( lenrbuf.EQ.0 .AND. lencbuf.EQ.0 ) .OR.
559 $ bcdone ) GO TO 62
560 lrsrc = iwork( 3+(win-1)*5 )
561 lcsrc = iwork( 4+(win-1)*5 )
562 IF( myrow.EQ.lrsrc .AND. mycol.EQ.lcsrc ) THEN
563 IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.
564 $ npcol.GT.1 ) THEN
565 CALL dgebs2d( ictxt, 'Row', '1-Tree', lenrbuf,
566 $ 1, work, lenrbuf )
567 ELSEIF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.
568 $ nprow.GT.1 ) THEN
569 CALL dgebs2d( ictxt, 'Col', '1-Tree', lencbuf,
570 $ 1, work, lencbuf )
571 END IF
572 IF( lenrbuf.GT.0 )
573 $ CALL dlamov( 'All', lenrbuf, 1, work, lenrbuf,
574 $ work(1+lenrbuf), lencbuf )
575 bcdone = .true.
576 ELSEIF( myrow.EQ.lrsrc .AND. dir.EQ.1 ) THEN
577 IF( lenrbuf.GT.0 .AND. npcol.GT.1 ) THEN
578 CALL dgebr2d( ictxt, 'Row', '1-Tree', lenrbuf,
579 $ 1, work, lenrbuf, lrsrc, lcsrc )
580 bcdone = .true.
581 END IF
582 ELSEIF( mycol.EQ.lcsrc .AND. dir.EQ.2 ) THEN
583 IF( lencbuf.GT.0 .AND. nprow.GT.1 ) THEN
584 CALL dgebr2d( ictxt, 'Col', '1-Tree', lencbuf,
585 $ 1, work(1+lenrbuf), lencbuf, lrsrc, lcsrc )
586 bcdone = .true.
587 END IF
588 END IF
589 62 CONTINUE
590 60 CONTINUE
591 50 CONTINUE
592 END IF
593*
594* Compute updates - make sure to skip windows that was skipped
595* regarding local bulge chasing.
596*
597 DO 65 dir = 1, 2
598 winid = 0
599 IF( dir.EQ.1 ) THEN
600 ipnext = 1
601 ELSE
602 ipnext = 1 + lenrbuf
603 END IF
604 DO 70 win = 1, anmwin
605 IF( iwork( 5+(win-1)*5 ).EQ.2 ) GO TO 75
606 lrsrc = iwork( 3+(win-1)*5 )
607 lcsrc = iwork( 4+(win-1)*5 )
608 lktop = iwork( 1+(win-1)*5 )
609 lkbot = iwork( 2+(win-1)*5 )
610 lnwin = lkbot - lktop + 1
611 IF( (myrow.EQ.lrsrc.AND.lenrbuf.GT.0.AND.dir.EQ.1) .OR.
612 $ (mycol.EQ.lcsrc.AND.lencbuf.GT.0.AND.dir.EQ.2 ) )
613 $ THEN
614*
615* Set up workspaces.
616*
617 ipu = ipnext
618 ipnext = ipu + lnwin*lnwin
619 ipw = 1 + lenrbuf + lencbuf
620 liroffh = mod(lktop-1,nb)
621 winid = winid + 1
622*
623* Recompute JOB to see if block structure of U could
624* possibly be exploited or not.
625*
626 IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot ) THEN
627 job = 'All steps'
628 ELSEIF( lktop.EQ.ktop ) THEN
629 job = 'Introduce and chase'
630 ELSEIF( lkbot.EQ.kbot ) THEN
631 job = 'Off-chase bulges'
632 ELSE
633 job = 'Chase bulges'
634 END IF
635 END IF
636*
637* Use U to update far-from-diagonal entries in H.
638* If required, use U to update Z as well.
639*
640 IF( .NOT. blk22 .OR. .NOT. lsame(job,'C')
641 $ .OR. lns.LE.2 ) THEN
642*
643 IF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.
644 $ mycol.EQ.lcsrc ) THEN
645 IF( wantt ) THEN
646 DO 80 indx = 1, lktop-liroffh-1, nb
647 CALL infog2l( indx, lktop, desch, nprow,
648 $ npcol, myrow, mycol, iloc, jloc, rsrc1,
649 $ csrc1 )
650 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
651 lrows = min( nb, lktop-indx )
652 CALL dgemm('No transpose', 'No transpose',
653 $ lrows, lnwin, lnwin, one,
654 $ h((jloc-1)*lldh+iloc), lldh,
655 $ work( ipu ), lnwin, zero,
656 $ work(ipw),
657 $ lrows )
658 CALL dlamov( 'All', lrows, lnwin,
659 $ work(ipw), lrows,
660 $ h((jloc-1)*lldh+iloc), lldh )
661 END IF
662 80 CONTINUE
663 END IF
664 IF( wantz ) THEN
665 DO 90 indx = 1, n, nb
666 CALL infog2l( indx, lktop, descz, nprow,
667 $ npcol, myrow, mycol, iloc, jloc, rsrc1,
668 $ csrc1 )
669 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
670 lrows = min(nb,n-indx+1)
671 CALL dgemm( 'No transpose',
672 $ 'No transpose', lrows, lnwin, lnwin,
673 $ one, z((jloc-1)*lldz+iloc), lldz,
674 $ work( ipu ), lnwin, zero,
675 $ work(ipw), lrows )
676 CALL dlamov( 'All', lrows, lnwin,
677 $ work(ipw), lrows,
678 $ z((jloc-1)*lldz+iloc), lldz )
679 END IF
680 90 CONTINUE
681 END IF
682 END IF
683*
684* Update the rows of H affected by the bulge-chase.
685*
686 IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.
687 $ myrow.EQ.lrsrc ) THEN
688 IF( wantt ) THEN
689 IF( iceil(lkbot,nb).EQ.iceil(kbot,nb) ) THEN
690 lcols = min(iceil(kbot,nb)*nb,n) - kbot
691 ELSE
692 lcols = 0
693 END IF
694 IF( lcols.GT.0 ) THEN
695 indx = kbot + 1
696 CALL infog2l( lktop, indx, desch, nprow,
697 $ npcol, myrow, mycol, iloc, jloc,
698 $ rsrc1, csrc1 )
699 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
700 CALL dgemm( 'Transpose', 'No Transpose',
701 $ lnwin, lcols, lnwin, one, work(ipu),
702 $ lnwin, h((jloc-1)*lldh+iloc), lldh,
703 $ zero, work(ipw), lnwin )
704 CALL dlamov( 'All', lnwin, lcols,
705 $ work(ipw), lnwin,
706 $ h((jloc-1)*lldh+iloc), lldh )
707 END IF
708 END IF
709 93 CONTINUE
710 indxs = iceil(lkbot,nb)*nb + 1
711 DO 95 indx = indxs, n, nb
712 CALL infog2l( lktop, indx,
713 $ desch, nprow, npcol, myrow, mycol,
714 $ iloc, jloc, rsrc1, csrc1 )
715 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
716 lcols = min( nb, n-indx+1 )
717 CALL dgemm( 'Transpose', 'No Transpose',
718 $ lnwin, lcols, lnwin, one, work(ipu),
719 $ lnwin, h((jloc-1)*lldh+iloc), lldh,
720 $ zero, work(ipw),
721 $ lnwin )
722 CALL dlamov( 'All', lnwin, lcols,
723 $ work(ipw), lnwin,
724 $ h((jloc-1)*lldh+iloc), lldh )
725 END IF
726 95 CONTINUE
727 END IF
728 END IF
729 ELSE
730 ks = lnwin-lns/2*3
731*
732* The LNWIN-by-LNWIN matrix U containing the accumulated
733* orthogonal transformations has the following structure:
734*
735* [ U11 U12 ]
736* U = [ ],
737* [ U21 U22 ]
738*
739* where U21 is KS-by-KS upper triangular and U12 is
740* (LNWIN-KS)-by-(LNWIN-KS) lower triangular.
741* Here, KS = LNS.
742*
743* Update the columns of H and Z affected by the bulge
744* chasing.
745*
746* Compute H2*U21 + H1*U11 in workspace.
747*
748 IF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.
749 $ mycol.EQ.lcsrc ) THEN
750 IF( wantt ) THEN
751 DO 100 indx = 1, lktop-liroffh-1, nb
752 CALL infog2l( indx, lktop, desch, nprow,
753 $ npcol, myrow, mycol, iloc, jloc, rsrc1,
754 $ csrc1 )
755 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
756 jloc1 = indxg2l( lktop+lnwin-ks, nb,
757 $ mycol, desch( csrc_ ), npcol )
758 lrows = min( nb, lktop-indx )
759 CALL dlamov( 'All', lrows, ks,
760 $ h((jloc1-1)*lldh+iloc ), lldh,
761 $ work(ipw), lrows )
762 CALL dtrmm( 'Right', 'Upper',
763 $ 'No transpose','Non-unit', lrows,
764 $ ks, one, work( ipu+lnwin-ks ), lnwin,
765 $ work(ipw), lrows )
766 CALL dgemm('No transpose', 'No transpose',
767 $ lrows, ks, lnwin-ks, one,
768 $ h((jloc-1)*lldh+iloc), lldh,
769 $ work( ipu ), lnwin, one, work(ipw),
770 $ lrows )
771*
772* Compute H1*U12 + H2*U22 in workspace.
773*
774 CALL dlamov( 'All', lrows, lnwin-ks,
775 $ h((jloc-1)*lldh+iloc), lldh,
776 $ work( ipw+ks*lrows ), lrows )
777 CALL dtrmm( 'Right', 'Lower',
778 $ 'No transpose', 'Non-Unit',
779 $ lrows, lnwin-ks, one,
780 $ work( ipu+lnwin*ks ), lnwin,
781 $ work( ipw+ks*lrows ), lrows )
782 CALL dgemm('No transpose', 'No transpose',
783 $ lrows, lnwin-ks, ks, one,
784 $ h((jloc1-1)*lldh+iloc), lldh,
785 $ work( ipu+lnwin*ks+lnwin-ks ), lnwin,
786 $ one, work( ipw+ks*lrows ), lrows )
787*
788* Copy workspace to H.
789*
790 CALL dlamov( 'All', lrows, lnwin,
791 $ work(ipw), lrows,
792 $ h((jloc-1)*lldh+iloc), lldh )
793 END IF
794 100 CONTINUE
795 END IF
796*
797 IF( wantz ) THEN
798*
799* Compute Z2*U21 + Z1*U11 in workspace.
800*
801 DO 110 indx = 1, n, nb
802 CALL infog2l( indx, lktop, descz, nprow,
803 $ npcol, myrow, mycol, iloc, jloc, rsrc1,
804 $ csrc1 )
805 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
806 jloc1 = indxg2l( lktop+lnwin-ks, nb,
807 $ mycol, descz( csrc_ ), npcol )
808 lrows = min(nb,n-indx+1)
809 CALL dlamov( 'All', lrows, ks,
810 $ z((jloc1-1)*lldz+iloc ), lldz,
811 $ work(ipw), lrows )
812 CALL dtrmm( 'Right', 'Upper',
813 $ 'No transpose', 'Non-unit',
814 $ lrows, ks, one, work( ipu+lnwin-ks ),
815 $ lnwin, work(ipw), lrows )
816 CALL dgemm( 'No transpose',
817 $ 'No transpose', lrows, ks, lnwin-ks,
818 $ one, z((jloc-1)*lldz+iloc), lldz,
819 $ work( ipu ), lnwin, one, work(ipw),
820 $ lrows )
821*
822* Compute Z1*U12 + Z2*U22 in workspace.
823*
824 CALL dlamov( 'All', lrows, lnwin-ks,
825 $ z((jloc-1)*lldz+iloc), lldz,
826 $ work( ipw+ks*lrows ), lrows)
827 CALL dtrmm( 'Right', 'Lower',
828 $ 'No transpose', 'Non-unit',
829 $ lrows, lnwin-ks, one,
830 $ work( ipu+lnwin*ks ), lnwin,
831 $ work( ipw+ks*lrows ), lrows )
832 CALL dgemm( 'No transpose',
833 $ 'No transpose', lrows, lnwin-ks, ks,
834 $ one, z((jloc1-1)*lldz+iloc), lldz,
835 $ work( ipu+lnwin*ks+lnwin-ks ), lnwin,
836 $ one, work( ipw+ks*lrows ),
837 $ lrows )
838*
839* Copy workspace to Z.
840*
841 CALL dlamov( 'All', lrows, lnwin,
842 $ work(ipw), lrows,
843 $ z((jloc-1)*lldz+iloc), lldz )
844 END IF
845 110 CONTINUE
846 END IF
847 END IF
848*
849 IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.
850 $ myrow.EQ.lrsrc ) THEN
851 IF( wantt ) THEN
852 indxs = iceil(lkbot,nb)*nb + 1
853 DO 120 indx = indxs, n, nb
854 CALL infog2l( lktop, indx,
855 $ desch, nprow, npcol, myrow, mycol, iloc,
856 $ jloc, rsrc1, csrc1 )
857 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN
858*
859* Compute U21**T*H2 + U11**T*H1 in workspace.
860*
861 iloc1 = indxg2l( lktop+lnwin-ks, nb,
862 $ myrow, desch( rsrc_ ), nprow )
863 lcols = min( nb, n-indx+1 )
864 CALL dlamov( 'All', ks, lcols,
865 $ h((jloc-1)*lldh+iloc1), lldh,
866 $ work(ipw), lnwin )
867 CALL dtrmm( 'Left', 'Upper', 'Transpose',
868 $ 'Non-unit', ks, lcols, one,
869 $ work( ipu+lnwin-ks ), lnwin,
870 $ work(ipw), lnwin )
871 CALL dgemm( 'Transpose', 'No transpose',
872 $ ks, lcols, lnwin-ks, one, work(ipu),
873 $ lnwin, h((jloc-1)*lldh+iloc), lldh,
874 $ one, work(ipw), lnwin )
875*
876* Compute U12**T*H1 + U22**T*H2 in workspace.
877*
878 CALL dlamov( 'All', lnwin-ks, lcols,
879 $ h((jloc-1)*lldh+iloc), lldh,
880 $ work( ipw+ks ), lnwin )
881 CALL dtrmm( 'Left', 'Lower', 'Transpose',
882 $ 'Non-unit', lnwin-ks, lcols, one,
883 $ work( ipu+lnwin*ks ), lnwin,
884 $ work( ipw+ks ), lnwin )
885 CALL dgemm( 'Transpose', 'No Transpose',
886 $ lnwin-ks, lcols, ks, one,
887 $ work( ipu+lnwin*ks+lnwin-ks ), lnwin,
888 $ h((jloc-1)*lldh+iloc1), lldh,
889 $ one, work( ipw+ks ), lnwin )
890*
891* Copy workspace to H.
892*
893 CALL dlamov( 'All', lnwin, lcols,
894 $ work(ipw), lnwin,
895 $ h((jloc-1)*lldh+iloc), lldh )
896 END IF
897 120 CONTINUE
898 END IF
899 END IF
900 END IF
901*
902* Update position information about current window.
903*
904 IF( dir.EQ.2 ) THEN
905 IF( lkbot.EQ.kbot ) THEN
906 lktop = kbot+1
907 lkbot = kbot+1
908 iwork( 1+(win-1)*5 ) = lktop
909 iwork( 2+(win-1)*5 ) = lkbot
910 iwork( 5+(win-1)*5 ) = 2
911 ELSE
912 lktop = min( lktop + lnwin - lchain,
913 $ iceil( lktop, nb )*nb - lchain + 1,
914 $ kbot )
915 iwork( 1+(win-1)*5 ) = lktop
916 lkbot = min( lkbot + lnwin - lchain,
917 $ iceil( lkbot, nb )*nb, kbot )
918 iwork( 2+(win-1)*5 ) = lkbot
919 lnwin = lkbot-lktop+1
920 IF( lnwin.EQ.lchain ) iwork(5+(win-1)*5) = 2
921 END IF
922 END IF
923 75 CONTINUE
924 70 CONTINUE
925 65 CONTINUE
926*
927* If bulges were chasen off from first window, the window is
928* removed.
929*
930 IF( ichoff.GT.0 ) THEN
931 DO 128 win = 2, anmwin
932 iwork( 1+(win-2)*5 ) = iwork( 1+(win-1)*5 )
933 iwork( 2+(win-2)*5 ) = iwork( 2+(win-1)*5 )
934 iwork( 3+(win-2)*5 ) = iwork( 3+(win-1)*5 )
935 iwork( 4+(win-2)*5 ) = iwork( 4+(win-1)*5 )
936 iwork( 5+(win-2)*5 ) = iwork( 5+(win-1)*5 )
937 128 CONTINUE
938 anmwin = anmwin - 1
939 ipiw = 6+(anmwin-1)*5
940 END IF
941*
942* If we have no more windows, return.
943*
944 IF( anmwin.LT.1 ) RETURN
945*
946 ELSE
947*
948* Set up windows such that as many bulges as possible can be
949* moved over the border to the next block. Make sure that the
950* cross border window is at least (NTINY+1)-by-(NTINY+1), unless
951* we are chasing off the bulges from the last window. This is
952* accomplished by setting the bottom index LKBOT such that the
953* local window has the correct size.
954*
955* If LKBOT then becomes larger than KBOT, the endpoint of the whole
956* global submatrix, or LKTOP from a window located already residing
957* at the other side of the border, this is taken care of by some
958* dirty tricks.
959*
960 DO 130 win = 1, anmwin
961 lktop1 = iwork( 1+(win-1)*5 )
962 lkbot = iwork( 2+(win-1)*5 )
963 lnwin = max( 6, min( lkbot - lktop1 + 1, lchain ) )
964 lkbot1 = max( min( kbot, iceil(lktop1,nb)*nb+lchain),
965 $ min( kbot, min( lktop1+2*lnwin-1,
966 $ (iceil(lktop1,nb)+1)*nb ) ) )
967 iwork( 2+(win-1)*5 ) = lkbot1
968 130 CONTINUE
969 ichoff = 0
970*
971* Keep a record over what windows that were moved over the borders
972* such that we can delay some windows due to lack of space on the
973* other side of the border; we do not want to leave any of the
974* bulges behind...
975*
976* IWORK( 5+(WIN-1)*5 ) = 0: window WIN has not been processed
977* IWORK( 5+(WIN-1)*5 ) = 1: window WIN is being processed (need to
978* know for updates)
979* IWORK( 5+(WIN-1)*5 ) = 2: window WIN has been fully processed
980*
981* So, start by marking all windows as not processed.
982*
983 DO 135 win = 1, anmwin
984 iwork( 5+(win-1)*5 ) = 0
985 135 CONTINUE
986*
987* Do the cross border bulge-chase as follows: Start from the
988* first window (the one that is closest to be chased off the
989* diagonal of H) and take the odd windows first followed by the
990* even ones. To not get into hang-problems on processor meshes
991* with at least one odd dimension, the windows will in such a case
992* be processed in chunks of {the minimum odd process dimension}-1
993* windows to avoid overlapping processor scopes in forming the
994* cross border computational windows and the cross border update
995* regions.
996*
997 wchunk = max( 1, min( anmwin, nprow-1, npcol-1 ) )
998 numchunk = iceil( anmwin, wchunk )
999*
1000* Based on the computed chunk of windows, start working with
1001* crossborder bulge-chasing. Repeat this as long as there is
1002* still work left to do (137 is a kind of do-while statement).
1003*
1004 137 CONTINUE
1005*
1006* Zero out LENRBUF and LENCBUF each time we restart this loop.
1007*
1008 lenrbuf = 0
1009 lencbuf = 0
1010*
1011 DO 140 oddeven = 1, min( 2, anmwin )
1012 DO 150 chunknum = 1, numchunk
1013 ipnext = 1
1014 DO 160 win = oddeven+(chunknum-1)*wchunk,
1015 $ min(anmwin,max(1,oddeven+(chunknum)*wchunk-1)), 2
1016*
1017* Get position and size of the WIN:th active window and
1018* make sure that we skip the cross border bulge for this
1019* window if the window is not shared between several data
1020* layout blocks (and processors).
1021*
1022* Also, delay windows that do not have sufficient size of
1023* the other side of the border. Moreover, make sure to skip
1024* windows that was already processed in the last round of
1025* the do-while loop (137).
1026*
1027 IF( iwork( 5+(win-1)*5 ).EQ.2 ) GO TO 165
1028 lktop = iwork( 1+(win-1)*5 )
1029 lkbot = iwork( 2+(win-1)*5 )
1030 IF( win.GT.1 ) THEN
1031 lktop2 = iwork( 1+(win-2)*5 )
1032 ELSE
1033 lktop2 = kbot+1
1034 END IF
1035 IF( iceil(lktop,nb).EQ.iceil(lkbot,nb) .OR.
1036 $ lkbot.GE.lktop2 ) GO TO 165
1037 lnwin = lkbot - lktop + 1
1038 IF( lnwin.LE.ntiny .AND. lkbot.NE.kbot .AND.
1039 $ .NOT. mod(lkbot,nb).EQ.0 ) GO TO 165
1040*
1041* If window is going to be processed, mark it as processed.
1042*
1043 iwork( 5+(win-1)*5 ) = 1
1044*
1045* Extract processors for current cross border window,
1046* as below:
1047*
1048* 1 | 2
1049* --+--
1050* 3 | 4
1051*
1052 rsrc1 = iwork( 3+(win-1)*5 )
1053 csrc1 = iwork( 4+(win-1)*5 )
1054 rsrc2 = rsrc1
1055 csrc2 = mod( csrc1+1, npcol )
1056 rsrc3 = mod( rsrc1+1, nprow )
1057 csrc3 = csrc1
1058 rsrc4 = mod( rsrc1+1, nprow )
1059 csrc4 = mod( csrc1+1, npcol )
1060*
1061* Form group of four processors for cross border window.
1062*
1063 IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.
1064 $ ( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) .OR.
1065 $ ( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) .OR.
1066 $ ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) ) THEN
1067*
1068* Compute the upper and lower parts of the active
1069* window.
1070*
1071 dim1 = nb - mod(lktop-1,nb)
1072 dim4 = lnwin - dim1
1073*
1074* Temporarily compute a new value of the size of the
1075* computational window that is larger than or equal to
1076* NTINY+1; call the *real* value DIM.
1077*
1078 dim = lnwin
1079 lnwin = max(ntiny+1,lnwin)
1080*
1081* Divide workspace.
1082*
1083 ipu = ipnext
1084 iph = ipu + dim**2
1085 ipuu = iph + lnwin**2
1086 ipv = ipuu + lnwin**2
1087 ipnext = iph
1088 IF( dim.LT.lnwin ) THEN
1089 CALL dlaset( 'All', lnwin, lnwin, zero,
1090 $ one, work( iph ), lnwin )
1091 ELSE
1092 CALL dlaset( 'All', dim, dim, zero,
1093 $ zero, work( iph ), lnwin )
1094 END IF
1095*
1096* Form the active window.
1097*
1098 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN
1099 iloc = indxg2l( lktop, nb, myrow,
1100 $ desch( rsrc_ ), nprow )
1101 jloc = indxg2l( lktop, nb, mycol,
1102 $ desch( csrc_ ), npcol )
1103 CALL dlamov( 'All', dim1, dim1,
1104 $ h((jloc-1)*lldh+iloc), lldh, work(iph),
1105 $ lnwin )
1106 IF( rsrc1.NE.rsrc4 .OR. csrc1.NE.csrc4 ) THEN
1107* Proc#1 <==> Proc#4
1108 CALL dgesd2d( ictxt, dim1, dim1,
1109 $ work(iph), lnwin, rsrc4, csrc4 )
1110 CALL dgerv2d( ictxt, dim4, dim4,
1111 $ work(iph+dim1*lnwin+dim1),
1112 $ lnwin, rsrc4, csrc4 )
1113 END IF
1114 END IF
1115 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN
1116 iloc = indxg2l( lktop+dim1, nb, myrow,
1117 $ desch( rsrc_ ), nprow )
1118 jloc = indxg2l( lktop+dim1, nb, mycol,
1119 $ desch( csrc_ ), npcol )
1120 CALL dlamov( 'All', dim4, dim4,
1121 $ h((jloc-1)*lldh+iloc), lldh,
1122 $ work(iph+dim1*lnwin+dim1),
1123 $ lnwin )
1124 IF( rsrc4.NE.rsrc1 .OR. csrc4.NE.csrc1 ) THEN
1125* Proc#4 <==> Proc#1
1126 CALL dgesd2d( ictxt, dim4, dim4,
1127 $ work(iph+dim1*lnwin+dim1),
1128 $ lnwin, rsrc1, csrc1 )
1129 CALL dgerv2d( ictxt, dim1, dim1,
1130 $ work(iph), lnwin, rsrc1, csrc1 )
1131 END IF
1132 END IF
1133 IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN
1134 iloc = indxg2l( lktop, nb, myrow,
1135 $ desch( rsrc_ ), nprow )
1136 jloc = indxg2l( lktop+dim1, nb, mycol,
1137 $ desch( csrc_ ), npcol )
1138 CALL dlamov( 'All', dim1, dim4,
1139 $ h((jloc-1)*lldh+iloc), lldh,
1140 $ work(iph+dim1*lnwin), lnwin )
1141 IF( rsrc2.NE.rsrc1 .OR. csrc2.NE.csrc1 ) THEN
1142* Proc#2 ==> Proc#1
1143 CALL dgesd2d( ictxt, dim1, dim4,
1144 $ work(iph+dim1*lnwin),
1145 $ lnwin, rsrc1, csrc1 )
1146 END IF
1147 END IF
1148 IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN
1149 IF( rsrc2.NE.rsrc4 .OR. csrc2.NE.csrc4 ) THEN
1150* Proc#2 ==> Proc#4
1151 CALL dgesd2d( ictxt, dim1, dim4,
1152 $ work(iph+dim1*lnwin),
1153 $ lnwin, rsrc4, csrc4 )
1154 END IF
1155 END IF
1156 IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN
1157 iloc = indxg2l( lktop+dim1, nb, myrow,
1158 $ desch( rsrc_ ), nprow )
1159 jloc = indxg2l( lktop+dim1-1, nb, mycol,
1160 $ desch( csrc_ ), npcol )
1161 CALL dlamov( 'All', 1, 1,
1162 $ h((jloc-1)*lldh+iloc), lldh,
1163 $ work(iph+(dim1-1)*lnwin+dim1),
1164 $ lnwin )
1165 IF( rsrc3.NE.rsrc1 .OR. csrc3.NE.csrc1 ) THEN
1166* Proc#3 ==> Proc#1
1167 CALL dgesd2d( ictxt, 1, 1,
1168 $ work(iph+(dim1-1)*lnwin+dim1),
1169 $ lnwin, rsrc1, csrc1 )
1170 END IF
1171 END IF
1172 IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN
1173 IF( rsrc3.NE.rsrc4 .OR. csrc3.NE.csrc4 ) THEN
1174* Proc#3 ==> Proc#4
1175 CALL dgesd2d( ictxt, 1, 1,
1176 $ work(iph+(dim1-1)*lnwin+dim1),
1177 $ lnwin, rsrc4, csrc4 )
1178 END IF
1179 END IF
1180 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN
1181 IF( rsrc1.NE.rsrc2 .OR. csrc1.NE.csrc2 ) THEN
1182* Proc#1 <== Proc#2
1183 CALL dgerv2d( ictxt, dim1, dim4,
1184 $ work(iph+dim1*lnwin),
1185 $ lnwin, rsrc2, csrc2 )
1186 END IF
1187 IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 ) THEN
1188* Proc#1 <== Proc#3
1189 CALL dgerv2d( ictxt, 1, 1,
1190 $ work(iph+(dim1-1)*lnwin+dim1),
1191 $ lnwin, rsrc3, csrc3 )
1192 END IF
1193 END IF
1194 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN
1195 IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 ) THEN
1196* Proc#4 <== Proc#2
1197 CALL dgerv2d( ictxt, dim1, dim4,
1198 $ work(iph+dim1*lnwin),
1199 $ lnwin, rsrc2, csrc2 )
1200 END IF
1201 IF( rsrc4.NE.rsrc3 .OR. csrc4.NE.csrc3 ) THEN
1202* Proc#4 <== Proc#3
1203 CALL dgerv2d( ictxt, 1, 1,
1204 $ work(iph+(dim1-1)*lnwin+dim1),
1205 $ lnwin, rsrc3, csrc3 )
1206 END IF
1207 END IF
1208*
1209* Prepare for call to DLAQR6 - it could happen that no
1210* bulges where introduced in the pre-cross border step
1211* since the chain was too long to fit in the top-left
1212* part of the cross border window. In such a case, the
1213* bulges are introduced here instead. It could also
1214* happen that the bottom-right part is too small to hold
1215* the whole chain -- in such a case, the bulges are
1216* chasen off immediately, as well.
1217*
1218 IF( (myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1) .OR.
1219 $ (myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4) ) THEN
1220 IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot .AND.
1221 $ (dim1.LE.lchain .OR. dim1.LE.ntiny ) ) THEN
1222 job = 'All steps'
1223 ichoff = 1
1224 ELSEIF( lktop.EQ.ktop .AND.
1225 $ ( dim1.LE.lchain .OR. dim1.LE.ntiny ) ) THEN
1226 job = 'Introduce and chase'
1227 ELSEIF( lkbot.EQ.kbot ) THEN
1228 job = 'Off-chase bulges'
1229 ichoff = 1
1230 ELSE
1231 job = 'chase bulges'
1232 END IF
1233 KU = LNWIN - KDU + 1
1234 KWH = KDU + 1
1235 NHO = ( LNWIN-KDU+1-4 ) - ( KDU+1 ) + 1
1236 KWV = KDU + 4
1237 NVE = LNWIN - KDU - KWV + 1
1238 CALL DLASET( 'all', LNWIN, LNWIN,
1239 $ ZERO, ONE, WORK(IPUU), LNWIN )
1240*
1241* Small-bulge multi-shift QR sweep.
1242*
1243 LKS = MAX(1, NS - WIN*LNS + 1)
1244 CALL DLAQR6( JOB, WANTT, .TRUE., LKACC22, LNWIN,
1245 $ 1, DIM, LNS, SR( LKS ), SI( LKS ),
1246 $ WORK(IPH), LNWIN, 1, DIM,
1247 $ WORK(IPUU), LNWIN, WORK(IPU), 3,
1248 $ WORK( IPH+KU-1 ), LNWIN, NVE,
1249 $ WORK( IPH+KWV-1 ), LNWIN, NHO,
1250 $ WORK( IPH-1+KU+(KWH-1)*LNWIN ), LNWIN )
1251*
1252* Copy local submatrices of H back to global matrix.
1253*
1254.EQ..AND..EQ. IF( MYROWRSRC1 MYCOLCSRC1 ) THEN
1255 ILOC = INDXG2L( LKTOP, NB, MYROW,
1256 $ DESCH( RSRC_ ), NPROW )
1257 JLOC = INDXG2L( LKTOP, NB, MYCOL,
1258 $ DESCH( CSRC_ ), NPCOL )
1259 CALL DLAMOV( 'all', DIM1, DIM1, WORK(IPH),
1260 $ LNWIN, H((JLOC-1)*LLDH+ILOC),
1261 $ LLDH )
1262 END IF
1263.EQ..AND..EQ. IF( MYROWRSRC4 MYCOLCSRC4 ) THEN
1264 ILOC = INDXG2L( LKTOP+DIM1, NB, MYROW,
1265 $ DESCH( RSRC_ ), NPROW )
1266 JLOC = INDXG2L( LKTOP+DIM1, NB, MYCOL,
1267 $ DESCH( CSRC_ ), NPCOL )
1268 CALL DLAMOV( 'all', DIM4, DIM4,
1269 $ WORK(IPH+DIM1*LNWIN+DIM1),
1270 $ LNWIN, H((JLOC-1)*LLDH+ILOC), LLDH )
1271 END IF
1272*
1273* Copy actual submatrix of U to the correct place of
1274* the buffer.
1275*
1276 CALL DLAMOV( 'all', DIM, DIM,
1277 $ WORK(IPUU), LNWIN, WORK(IPU), DIM )
1278 END IF
1279*
1280* Return data to process 2 and 3.
1281*
1282 RWS3 = MIN(3,DIM4)
1283 CLS3 = MIN(3,DIM1)
1284.EQ..AND..EQ. IF( MYROWRSRC1 MYCOLCSRC1 ) THEN
1285.NE..OR..NE. IF( RSRC1RSRC3 CSRC1CSRC3 ) THEN
1286* Proc#1 ==> Proc#3
1287 CALL DGESD2D( ICTXT, RWS3, CLS3,
1288 $ WORK( IPH+(DIM1-CLS3)*LNWIN+DIM1 ),
1289 $ LNWIN, RSRC3, CSRC3 )
1290 END IF
1291 END IF
1292.EQ..AND..EQ. IF( MYROWRSRC4 MYCOLCSRC4 ) THEN
1293.NE..OR..NE. IF( RSRC4RSRC2 CSRC4CSRC2 ) THEN
1294* Proc#4 ==> Proc#2
1295 CALL DGESD2D( ICTXT, DIM1, DIM4,
1296 $ WORK( IPH+DIM1*LNWIN),
1297 $ LNWIN, RSRC2, CSRC2 )
1298 END IF
1299 END IF
1300.EQ..AND..EQ. IF( MYROWRSRC2 MYCOLCSRC2 ) THEN
1301 ILOC = INDXG2L( LKTOP, NB, MYROW,
1302 $ DESCH( RSRC_ ), NPROW )
1303 JLOC = INDXG2L( LKTOP+DIM1, NB, MYCOL,
1304 $ DESCH( CSRC_ ), NPCOL )
1305.NE..OR..NE. IF( RSRC2RSRC4 CSRC2CSRC4 ) THEN
1306* Proc#2 <== Proc#4
1307 CALL DGERV2D( ICTXT, DIM1, DIM4,
1308 $ WORK(IPH+DIM1*LNWIN),
1309 $ LNWIN, RSRC4, CSRC4 )
1310 END IF
1311 CALL DLAMOV( 'all', DIM1, DIM4,
1312 $ WORK( IPH+DIM1*LNWIN ), LNWIN,
1313 $ H((JLOC-1)*LLDH+ILOC), LLDH )
1314 END IF
1315.EQ..AND..EQ. IF( MYROWRSRC3 MYCOLCSRC3 ) THEN
1316 ILOC = INDXG2L( LKTOP+DIM1, NB, MYROW,
1317 $ DESCH( RSRC_ ), NPROW )
1318 JLOC = INDXG2L( LKTOP+DIM1-CLS3, NB, MYCOL,
1319 $ DESCH( CSRC_ ), NPCOL )
1320.NE..OR..NE. IF( RSRC3RSRC1 CSRC3CSRC1 ) THEN
1321* Proc#3 <== Proc#1
1322 CALL DGERV2D( ICTXT, RWS3, CLS3,
1323 $ WORK( IPH+(DIM1-CLS3)*LNWIN+DIM1 ),
1324 $ LNWIN, RSRC1, CSRC1 )
1325 END IF
1326 CALL DLAMOV( 'upper', RWS3, CLS3,
1327 $ WORK( IPH+(DIM1-CLS3)*LNWIN+DIM1 ),
1328 $ LNWIN, H((JLOC-1)*LLDH+ILOC),
1329 $ LLDH )
1330.GT..AND..GT. IF( RWS31 CLS31 ) THEN
1331 ELEM = WORK( IPH+(DIM1-CLS3)*LNWIN+DIM1+1 )
1332.NE. IF( ELEMZERO ) THEN
1333 CALL DLAMOV( 'lower', RWS3-1, CLS3-1,
1334 $ WORK( IPH+(DIM1-CLS3)*LNWIN+DIM1+1 ),
1335 $ LNWIN, H((JLOC-1)*LLDH+ILOC+1), LLDH )
1336 END IF
1337 END IF
1338 END IF
1339*
1340* Restore correct value of LNWIN.
1341*
1342 LNWIN = DIM
1343*
1344 END IF
1345*
1346* Increment counter for buffers of orthogonal
1347* transformations.
1348*
1349.EQ..OR..EQ..OR. IF( MYROWRSRC1 MYCOLCSRC1
1350.EQ..OR..EQ. $ MYROWRSRC4 MYCOLCSRC4 ) THEN
1351.EQ..OR..EQ. IF( MYROWRSRC1 MYROWRSRC4 )
1352 $ LENRBUF = LENRBUF + LNWIN*LNWIN
1353.EQ..OR..EQ. IF( MYCOLCSRC1 MYCOLCSRC4 )
1354 $ LENCBUF = LENCBUF + LNWIN*LNWIN
1355 END IF
1356*
1357* If no cross border bulge chasing was performed for the
1358* current WIN:th window, the processor jump to this point
1359* and consider the next one.
1360*
1361 165 CONTINUE
1362*
1363 160 CONTINUE
1364*
1365* Broadcast orthogonal transformations -- this will only happen
1366* if the buffer associated with the orthogonal transformations
1367* is not empty (controlled by LENRBUF, for row-wise
1368* broadcasts, and LENCBUF, for column-wise broadcasts).
1369*
1370 DO 170 DIR = 1, 2
1371 BCDONE = .FALSE.
1372 DO 180 WIN = ODDEVEN+(CHUNKNUM-1)*WCHUNK,
1373 $ MIN(ANMWIN,MAX(1,ODDEVEN+(CHUNKNUM)*WCHUNK-1)), 2
1374.EQ..AND..EQ..OR. IF( ( LENRBUF0 LENCBUF0 )
1375 $ BCDONE ) GO TO 185
1376 RSRC1 = IWORK( 3+(WIN-1)*5 )
1377 CSRC1 = IWORK( 4+(WIN-1)*5 )
1378 RSRC4 = MOD( RSRC1+1, NPROW )
1379 CSRC4 = MOD( CSRC1+1, NPCOL )
1380.EQ..AND..EQ..OR. IF( ( MYROWRSRC1 MYCOLCSRC1 )
1381.EQ..AND..EQ. $ ( MYROWRSRC4 MYCOLCSRC4 ) ) THEN
1382.EQ..AND..GT..AND. IF( DIR1 LENRBUF0
1383.GT..AND..GT. $ NPCOL1 NPROCS2 ) THEN
1384.EQ..OR..EQ. IF( MYROWRSRC1 ( MYROWRSRC4
1385.AND..NE. $ RSRC4RSRC1 ) ) THEN
1386 CALL DGEBS2D( ICTXT, 'row', '1-tree',
1387 $ LENRBUF, 1, WORK, LENRBUF )
1388 ELSE
1389 CALL DGEBR2D( ICTXT, 'row', '1-tree',
1390 $ LENRBUF, 1, WORK, LENRBUF, RSRC1,
1391 $ CSRC1 )
1392 END IF
1393.EQ..AND..GT..AND. ELSEIF( DIR2 LENCBUF0
1394.GT..AND..GT. $ NPROW1 NPROCS2 ) THEN
1395.EQ..OR..EQ. IF( MYCOLCSRC1 ( MYCOLCSRC4
1396.AND..NE. $ CSRC4CSRC1 ) ) THEN
1397 CALL DGEBS2D( ICTXT, 'col', '1-tree',
1398 $ LENCBUF, 1, WORK, LENCBUF )
1399 ELSE
1400 CALL DGEBR2D( ICTXT, 'col', '1-tree',
1401 $ LENCBUF, 1, WORK(1+LENRBUF), LENCBUF,
1402 $ RSRC1, CSRC1 )
1403 END IF
1404 END IF
1405.GT..AND..EQ..OR. IF( LENRBUF0 ( MYCOLCSRC1
1406.EQ..AND..NE. $ ( MYCOLCSRC4 CSRC4CSRC1 ) ) )
1407 $ CALL DLAMOV( 'all', LENRBUF, 1, WORK, LENRBUF,
1408 $ WORK(1+LENRBUF), LENCBUF )
1409 BCDONE = .TRUE.
1410.EQ..AND..EQ. ELSEIF( MYROWRSRC1 DIR1 ) THEN
1411.GT..AND..GT. IF( LENRBUF0 NPCOL1 )
1412 $ CALL DGEBR2D( ICTXT, 'row', '1-tree', LENRBUF,
1413 $ 1, WORK, LENRBUF, RSRC1, CSRC1 )
1414 BCDONE = .TRUE.
1415.EQ..AND..EQ. ELSEIF( MYCOLCSRC1 DIR2 ) THEN
1416.GT..AND..GT. IF( LENCBUF0 NPROW1 )
1417 $ CALL DGEBR2D( ICTXT, 'col', '1-tree', LENCBUF,
1418 $ 1, WORK(1+LENRBUF), LENCBUF, RSRC1, CSRC1 )
1419 BCDONE = .TRUE.
1420.EQ..AND..EQ. ELSEIF( MYROWRSRC4 DIR1 ) THEN
1421.GT..AND..GT. IF( LENRBUF0 NPCOL1 )
1422 $ CALL DGEBR2D( ICTXT, 'row', '1-tree', LENRBUF,
1423 $ 1, WORK, LENRBUF, RSRC4, CSRC4 )
1424 BCDONE = .TRUE.
1425.EQ..AND..EQ. ELSEIF( MYCOLCSRC4 DIR2 ) THEN
1426.GT..AND..GT. IF( LENCBUF0 NPROW1 )
1427 $ CALL DGEBR2D( ICTXT, 'col', '1-tree', LENCBUF,
1428 $ 1, WORK(1+LENRBUF), LENCBUF, RSRC4, CSRC4 )
1429 BCDONE = .TRUE.
1430 END IF
1431 185 CONTINUE
1432 180 CONTINUE
1433 170 CONTINUE
1434*
1435* Prepare for computing cross border updates by exchanging
1436* data in cross border update regions in H and Z.
1437*
1438 DO 190 DIR = 1, 2
1439 WINID = 0
1440 IPW3 = 1
1441 DO 200 WIN = ODDEVEN+(CHUNKNUM-1)*WCHUNK,
1442 $ MIN(ANMWIN,MAX(1,ODDEVEN+(CHUNKNUM)*WCHUNK-1)), 2
1443.NE. IF( IWORK( 5+(WIN-1)*5 )1 ) GO TO 205
1444*
1445* Make sure this part of the code is only executed when
1446* there has been some work performed on the WIN:th
1447* window.
1448*
1449 LKTOP = IWORK( 1+(WIN-1)*5 )
1450 LKBOT = IWORK( 2+(WIN-1)*5 )
1451*
1452* Extract processor indices associated with
1453* the current window.
1454*
1455 RSRC1 = IWORK( 3+(WIN-1)*5 )
1456 CSRC1 = IWORK( 4+(WIN-1)*5 )
1457 RSRC4 = MOD( RSRC1+1, NPROW )
1458 CSRC4 = MOD( CSRC1+1, NPCOL )
1459*
1460* Compute local number of rows and columns
1461* of H and Z to exchange.
1462*
1463.EQ..OR..EQ..AND..EQ. IF(((MYCOLCSRC1MYCOLCSRC4)DIR2)
1464.OR..EQ..OR..EQ..AND. $ ((MYROWRSRC1MYROWRSRC4)
1465.EQ. $ DIR1)) THEN
1466 WINID = WINID + 1
1467 LNWIN = LKBOT - LKTOP + 1
1468 IPU = IPNEXT
1469 DIM1 = NB - MOD(LKTOP-1,NB)
1470 DIM4 = LNWIN - DIM1
1471 IPNEXT = IPU + LNWIN*LNWIN
1472.EQ. IF( DIR2 ) THEN
1473 IF( WANTZ ) THEN
1474 ZROWS = NUMROC( N, NB, MYROW, DESCZ( RSRC_ ),
1475 $ NPROW )
1476 ELSE
1477 ZROWS = 0
1478 END IF
1479 IF( WANTT ) THEN
1480 HROWS = NUMROC( LKTOP-1, NB, MYROW,
1481 $ DESCH( RSRC_ ), NPROW )
1482 ELSE
1483 HROWS = 0
1484 END IF
1485 ELSE
1486 ZROWS = 0
1487 HROWS = 0
1488 END IF
1489.EQ. IF( DIR1 ) THEN
1490 IF( WANTT ) THEN
1491 HCOLS = NUMROC( N - (LKTOP+DIM1-1), NB,
1492 $ MYCOL, CSRC4, NPCOL )
1493.EQ. IF( MYCOLCSRC4 ) HCOLS = HCOLS - DIM4
1494 ELSE
1495 HCOLS = 0
1496 END IF
1497 ELSE
1498 HCOLS = 0
1499 END IF
1500 IPW = MAX( 1 + LENRBUF + LENCBUF, IPW3 )
1501 IPW1 = IPW + HROWS * LNWIN
1502 IF( WANTZ ) THEN
1503 IPW2 = IPW1 + LNWIN * HCOLS
1504 IPW3 = IPW2 + ZROWS * LNWIN
1505 ELSE
1506 IPW3 = IPW1 + LNWIN * HCOLS
1507 END IF
1508 END IF
1509*
1510* Let each process row and column involved in the updates
1511* exchange data in H and Z with their neighbours.
1512*
1513.EQ..AND..AND..GT. IF( DIR2 WANTT LENCBUF0 ) THEN
1514.EQ..OR..EQ. IF( MYCOLCSRC1 MYCOLCSRC4 ) THEN
1515 DO 210 INDX = 1, NPROW
1516.EQ. IF( MYCOLCSRC1 ) THEN
1517 CALL INFOG2L( 1+(INDX-1)*NB, LKTOP, DESCH,
1518 $ NPROW, NPCOL, MYROW, MYCOL, ILOC,
1519 $ JLOC1, RSRC, CSRC1 )
1520.EQ. IF( MYROWRSRC ) THEN
1521 CALL DLAMOV( 'all', HROWS, DIM1,
1522 $ H((JLOC1-1)*LLDH+ILOC), LLDH,
1523 $ WORK(IPW), HROWS )
1524.GT. IF( NPCOL1 ) THEN
1525 EAST = MOD( MYCOL + 1, NPCOL )
1526 CALL DGESD2D( ICTXT, HROWS, DIM1,
1527 $ WORK(IPW), HROWS, RSRC, EAST )
1528 CALL DGERV2D( ICTXT, HROWS, DIM4,
1529 $ WORK(IPW+HROWS*DIM1), HROWS,
1530 $ RSRC, EAST )
1531 END IF
1532 END IF
1533 END IF
1534.EQ. IF( MYCOLCSRC4 ) THEN
1535 CALL INFOG2L( 1+(INDX-1)*NB, LKTOP+DIM1,
1536 $ DESCH, NPROW, NPCOL, MYROW, MYCOL,
1537 $ ILOC, JLOC4, RSRC, CSRC4 )
1538.EQ. IF( MYROWRSRC ) THEN
1539 CALL DLAMOV( 'all', HROWS, DIM4,
1540 $ H((JLOC4-1)*LLDH+ILOC), LLDH,
1541 $ WORK(IPW+HROWS*DIM1), HROWS )
1542.GT. IF( NPCOL1 ) THEN
1543 WEST = MOD( MYCOL - 1 + NPCOL,
1544 $ NPCOL )
1545 CALL DGESD2D( ICTXT, HROWS, DIM4,
1546 $ WORK(IPW+HROWS*DIM1), HROWS,
1547 $ RSRC, WEST )
1548 CALL DGERV2D( ICTXT, HROWS, DIM1,
1549 $ WORK(IPW), HROWS, RSRC, WEST )
1550 END IF
1551 END IF
1552 END IF
1553 210 CONTINUE
1554 END IF
1555 END IF
1556*
1557.EQ..AND..AND..GT. IF( DIR1 WANTT LENRBUF0 ) THEN
1558.EQ..OR..EQ. IF( MYROWRSRC1 MYROWRSRC4 ) THEN
1559 DO 220 INDX = 1, NPCOL
1560.EQ. IF( MYROWRSRC1 ) THEN
1561.EQ. IF( INDX1 ) THEN
1562.LT. IF( LKBOTN ) THEN
1563 CALL INFOG2L( LKTOP, LKBOT+1, DESCH,
1564 $ NPROW, NPCOL, MYROW, MYCOL,
1565 $ ILOC1, JLOC, RSRC1, CSRC )
1566 ELSE
1567 CSRC = -1
1568 END IF
1569.NE. ELSEIF( MOD(LKBOT,NB)0 ) THEN
1570 CALL INFOG2L( LKTOP,
1571 $ (ICEIL(LKBOT,NB)+(INDX-2))*NB+1,
1572 $ DESCH, NPROW, NPCOL, MYROW, MYCOL,
1573 $ ILOC1, JLOC, RSRC1, CSRC )
1574 ELSE
1575 CALL INFOG2L( LKTOP,
1576 $ (ICEIL(LKBOT,NB)+(INDX-1))*NB+1,
1577 $ DESCH, NPROW, NPCOL, MYROW, MYCOL,
1578 $ ILOC1, JLOC, RSRC1, CSRC )
1579 END IF
1580.EQ. IF( MYCOLCSRC ) THEN
1581 CALL DLAMOV( 'all', DIM1, HCOLS,
1582 $ H((JLOC-1)*LLDH+ILOC1), LLDH,
1583 $ WORK(IPW1), LNWIN )
1584.GT. IF( NPROW1 ) THEN
1585 SOUTH = MOD( MYROW + 1, NPROW )
1586 CALL DGESD2D( ICTXT, DIM1, HCOLS,
1587 $ WORK(IPW1), LNWIN, SOUTH,
1588 $ CSRC )
1589 CALL DGERV2D( ICTXT, DIM4, HCOLS,
1590 $ WORK(IPW1+DIM1), LNWIN, SOUTH,
1591 $ CSRC )
1592 END IF
1593 END IF
1594 END IF
1595.EQ. IF( MYROWRSRC4 ) THEN
1596.EQ. IF( INDX1 ) THEN
1597.LT. IF( LKBOTN ) THEN
1598 CALL INFOG2L( LKTOP+DIM1, LKBOT+1,
1599 $ DESCH, NPROW, NPCOL, MYROW,
1600 $ MYCOL, ILOC4, JLOC, RSRC4,
1601 $ CSRC )
1602 ELSE
1603 CSRC = -1
1604 END IF
1605.NE. ELSEIF( MOD(LKBOT,NB)0 ) THEN
1606 CALL INFOG2L( LKTOP+DIM1,
1607 $ (ICEIL(LKBOT,NB)+(INDX-2))*NB+1,
1608 $ DESCH, NPROW, NPCOL, MYROW, MYCOL,
1609 $ ILOC4, JLOC, RSRC4, CSRC )
1610 ELSE
1611 CALL INFOG2L( LKTOP+DIM1,
1612 $ (ICEIL(LKBOT,NB)+(INDX-1))*NB+1,
1613 $ DESCH, NPROW, NPCOL, MYROW, MYCOL,
1614 $ ILOC4, JLOC, RSRC4, CSRC )
1615 END IF
1616.EQ. IF( MYCOLCSRC ) THEN
1617 CALL DLAMOV( 'all', DIM4, HCOLS,
1618 $ H((JLOC-1)*LLDH+ILOC4), LLDH,
1619 $ WORK(IPW1+DIM1), LNWIN )
1620.GT. IF( NPROW1 ) THEN
1621 NORTH = MOD( MYROW - 1 + NPROW,
1622 $ NPROW )
1623 CALL DGESD2D( ICTXT, DIM4, HCOLS,
1624 $ WORK(IPW1+DIM1), LNWIN, NORTH,
1625 $ CSRC )
1626 CALL DGERV2D( ICTXT, DIM1, HCOLS,
1627 $ WORK(IPW1), LNWIN, NORTH,
1628 $ CSRC )
1629 END IF
1630 END IF
1631 END IF
1632 220 CONTINUE
1633 END IF
1634 END IF
1635*
1636.EQ..AND..AND..GT. IF( DIR2 WANTZ LENCBUF0) THEN
1637.EQ..OR..EQ. IF( MYCOLCSRC1 MYCOLCSRC4 ) THEN
1638 DO 230 INDX = 1, NPROW
1639.EQ. IF( MYCOLCSRC1 ) THEN
1640 CALL INFOG2L( 1+(INDX-1)*NB, LKTOP,
1641 $ DESCZ, NPROW, NPCOL, MYROW, MYCOL,
1642 $ ILOC, JLOC1, RSRC, CSRC1 )
1643.EQ. IF( MYROWRSRC ) THEN
1644 CALL DLAMOV( 'all', ZROWS, DIM1,
1645 $ Z((JLOC1-1)*LLDZ+ILOC), LLDZ,
1646 $ WORK(IPW2), ZROWS )
1647.GT. IF( NPCOL1 ) THEN
1648 EAST = MOD( MYCOL + 1, NPCOL )
1649 CALL DGESD2D( ICTXT, ZROWS, DIM1,
1650 $ WORK(IPW2), ZROWS, RSRC,
1651 $ EAST )
1652 CALL DGERV2D( ICTXT, ZROWS, DIM4,
1653 $ WORK(IPW2+ZROWS*DIM1),
1654 $ ZROWS, RSRC, EAST )
1655 END IF
1656 END IF
1657 END IF
1658.EQ. IF( MYCOLCSRC4 ) THEN
1659 CALL INFOG2L( 1+(INDX-1)*NB,
1660 $ LKTOP+DIM1, DESCZ, NPROW, NPCOL,
1661 $ MYROW, MYCOL, ILOC, JLOC4, RSRC,
1662 $ CSRC4 )
1663.EQ. IF( MYROWRSRC ) THEN
1664 CALL DLAMOV( 'all', ZROWS, DIM4,
1665 $ Z((JLOC4-1)*LLDZ+ILOC), LLDZ,
1666 $ WORK(IPW2+ZROWS*DIM1), ZROWS )
1667.GT. IF( NPCOL1 ) THEN
1668 WEST = MOD( MYCOL - 1 + NPCOL,
1669 $ NPCOL )
1670 CALL DGESD2D( ICTXT, ZROWS, DIM4,
1671 $ WORK(IPW2+ZROWS*DIM1),
1672 $ ZROWS, RSRC, WEST )
1673 CALL DGERV2D( ICTXT, ZROWS, DIM1,
1674 $ WORK(IPW2), ZROWS, RSRC,
1675 $ WEST )
1676 END IF
1677 END IF
1678 END IF
1679 230 CONTINUE
1680 END IF
1681 END IF
1682*
1683* If no exchanges was performed for the current window,
1684* all processors jump to this point and try the next
1685* one.
1686*
1687 205 CONTINUE
1688*
1689 200 CONTINUE
1690*
1691* Compute crossborder bulge-chase updates.
1692*
1693 WINID = 0
1694.EQ. IF( DIR1 ) THEN
1695 IPNEXT = 1
1696 ELSE
1697 IPNEXT = 1 + LENRBUF
1698 END IF
1699 IPW3 = 1
1700 DO 240 WIN = ODDEVEN+(CHUNKNUM-1)*WCHUNK,
1701 $ MIN(ANMWIN,MAX(1,ODDEVEN+(CHUNKNUM)*WCHUNK-1)), 2
1702.NE. IF( IWORK( 5+(WIN-1)*5 )1 ) GO TO 245
1703*
1704* Only perform this part of the code if there was really
1705* some work performed on the WIN:th window.
1706*
1707 LKTOP = IWORK( 1+(WIN-1)*5 )
1708 LKBOT = IWORK( 2+(WIN-1)*5 )
1709 LNWIN = LKBOT - LKTOP + 1
1710*
1711* Extract the processor indices associated with
1712* the current window.
1713*
1714 RSRC1 = IWORK( 3+(WIN-1)*5 )
1715 CSRC1 = IWORK( 4+(WIN-1)*5 )
1716 RSRC4 = MOD( RSRC1+1, NPROW )
1717 CSRC4 = MOD( CSRC1+1, NPCOL )
1718*
1719.EQ..OR..EQ..AND..EQ. IF(((MYCOLCSRC1MYCOLCSRC4)DIR2)
1720.OR..EQ..OR..EQ..AND. $ ((MYROWRSRC1MYROWRSRC4)
1721.EQ. $ DIR1)) THEN
1722*
1723* Set up workspaces.
1724*
1725 WINID = WINID + 1
1726 LKTOP = IWORK( 1+(WIN-1)*5 )
1727 LKBOT = IWORK( 2+(WIN-1)*5 )
1728 LNWIN = LKBOT - LKTOP + 1
1729 DIM1 = NB - MOD(LKTOP-1,NB)
1730 DIM4 = LNWIN - DIM1
1731 IPU = IPNEXT + (WINID-1)*LNWIN*LNWIN
1732.EQ. IF( DIR2 ) THEN
1733 IF( WANTZ ) THEN
1734 ZROWS = NUMROC( N, NB, MYROW, DESCZ( RSRC_ ),
1735 $ NPROW )
1736 ELSE
1737 ZROWS = 0
1738 END IF
1739 IF( WANTT ) THEN
1740 HROWS = NUMROC( LKTOP-1, NB, MYROW,
1741 $ DESCH( RSRC_ ), NPROW )
1742 ELSE
1743 HROWS = 0
1744 END IF
1745 ELSE
1746 ZROWS = 0
1747 HROWS = 0
1748 END IF
1749.EQ. IF( DIR1 ) THEN
1750 IF( WANTT ) THEN
1751 HCOLS = NUMROC( N - (LKTOP+DIM1-1), NB,
1752 $ MYCOL, CSRC4, NPCOL )
1753.EQ. IF( MYCOLCSRC4 ) HCOLS = HCOLS - DIM4
1754 ELSE
1755 HCOLS = 0
1756 END IF
1757 ELSE
1758 HCOLS = 0
1759 END IF
1760*
1761* IPW = local copy of overlapping column block of H
1762* IPW1 = local copy of overlapping row block of H
1763* IPW2 = local copy of overlapping column block of Z
1764* IPW3 = workspace for right hand side of matrix
1765* multiplication
1766*
1767 IPW = MAX( 1 + LENRBUF + LENCBUF, IPW3 )
1768 IPW1 = IPW + HROWS * LNWIN
1769 IF( WANTZ ) THEN
1770 IPW2 = IPW1 + LNWIN * HCOLS
1771 IPW3 = IPW2 + ZROWS * LNWIN
1772 ELSE
1773 IPW3 = IPW1 + LNWIN * HCOLS
1774 END IF
1775*
1776* Recompute job to see if special structure of U
1777* could possibly be exploited.
1778*
1779.EQ..AND..EQ. IF( LKTOPKTOP LKBOTKBOT ) THEN
1780 JOB = 'all steps'
1781.EQ..AND. ELSEIF( LKTOPKTOP
1782.LT..OR..LE. $ ( DIM1LCHAIN+1 DIM1NTINY ) )
1783 $ THEN
1784 JOB = 'introduce and chase'
1785.EQ. ELSEIF( LKBOTKBOT ) THEN
1786 JOB = 'off-chase bulges'
1787 ELSE
1788 JOB = 'chase bulges'
1789 END IF
1790 END IF
1791*
1792* Test if to exploit sparsity structure of
1793* orthogonal matrix U.
1794*
1795 KS = DIM1+DIM4-LNS/2*3
1796.NOT..OR..NE..OR. IF( BLK22 DIM1KS
1797.NE..OR. $ DIM4KS LSAME(JOB,'i.OR.')
1798 $ LSAME(JOB,'o.OR..LE.') LNS2 ) THEN
1799*
1800* Update the columns of H and Z.
1801*
1802.EQ..AND..AND..GT. IF( DIR2 WANTT LENCBUF0 ) THEN
1803 DO 250 INDX = 1, MIN(LKTOP-1,1+(NPROW-1)*NB), NB
1804.EQ. IF( MYCOLCSRC1 ) THEN
1805 CALL INFOG2L( INDX, LKTOP, DESCH, NPROW,
1806 $ NPCOL, MYROW, MYCOL, ILOC, JLOC,
1807 $ RSRC, CSRC1 )
1808.EQ. IF( MYROWRSRC ) THEN
1809 CALL DGEMM( 'no transpose',
1810 $ 'no transpose', HROWS, DIM1,
1811 $ LNWIN, ONE, WORK( IPW ), HROWS,
1812 $ WORK( IPU ), LNWIN, ZERO,
1813 $ WORK(IPW3), HROWS )
1814 CALL DLAMOV( 'all', HROWS, DIM1,
1815 $ WORK(IPW3), HROWS,
1816 $ H((JLOC-1)*LLDH+ILOC), LLDH )
1817 END IF
1818 END IF
1819.EQ. IF( MYCOLCSRC4 ) THEN
1820 CALL INFOG2L( INDX, LKTOP+DIM1, DESCH,
1821 $ NPROW, NPCOL, MYROW, MYCOL, ILOC,
1822 $ JLOC, RSRC, CSRC4 )
1823.EQ. IF( MYROWRSRC ) THEN
1824 CALL DGEMM( 'no transpose',
1825 $ 'no transpose', HROWS, DIM4,
1826 $ LNWIN, ONE, WORK( IPW ), HROWS,
1827 $ WORK( IPU+LNWIN*DIM1 ), LNWIN,
1828 $ ZERO, WORK(IPW3), HROWS )
1829 CALL DLAMOV( 'all', HROWS, DIM4,
1830 $ WORK(IPW3), HROWS,
1831 $ H((JLOC-1)*LLDH+ILOC), LLDH )
1832 END IF
1833 END IF
1834 250 CONTINUE
1835 END IF
1836*
1837.EQ..AND..AND..GT. IF( DIR2 WANTZ LENCBUF0 ) THEN
1838 DO 260 INDX = 1, MIN(N,1+(NPROW-1)*NB), NB
1839.EQ. IF( MYCOLCSRC1 ) THEN
1840 CALL INFOG2L( INDX, LKTOP, DESCZ, NPROW,
1841 $ NPCOL, MYROW, MYCOL, ILOC, JLOC,
1842 $ RSRC, CSRC1 )
1843.EQ. IF( MYROWRSRC ) THEN
1844 CALL DGEMM( 'no transpose',
1845 $ 'no transpose', ZROWS, DIM1,
1846 $ LNWIN, ONE, WORK( IPW2 ),
1847 $ ZROWS, WORK( IPU ), LNWIN,
1848 $ ZERO, WORK(IPW3), ZROWS )
1849 CALL DLAMOV( 'all', ZROWS, DIM1,
1850 $ WORK(IPW3), ZROWS,
1851 $ Z((JLOC-1)*LLDZ+ILOC), LLDZ )
1852 END IF
1853 END IF
1854.EQ. IF( MYCOLCSRC4 ) THEN
1855 CALL INFOG2L( INDX, LKTOP+DIM1, DESCZ,
1856 $ NPROW, NPCOL, MYROW, MYCOL, ILOC,
1857 $ JLOC, RSRC, CSRC4 )
1858.EQ. IF( MYROWRSRC ) THEN
1859 CALL DGEMM( 'no transpose',
1860 $ 'no transpose', ZROWS, DIM4,
1861 $ LNWIN, ONE, WORK( IPW2 ),
1862 $ ZROWS,
1863 $ WORK( IPU+LNWIN*DIM1 ), LNWIN,
1864 $ ZERO, WORK(IPW3), ZROWS )
1865 CALL DLAMOV( 'all', ZROWS, DIM4,
1866 $ WORK(IPW3), ZROWS,
1867 $ Z((JLOC-1)*LLDZ+ILOC), LLDZ )
1868 END IF
1869 END IF
1870 260 CONTINUE
1871 END IF
1872*
1873* Update the rows of H.
1874*
1875.EQ..AND..AND..GT. IF( DIR1 WANTT LENRBUF0 ) THEN
1876.LT. IF( LKBOTN ) THEN
1877.EQ..AND..EQ..AND. IF( MYROWRSRC1MYCOLCSRC4
1878.NE. $ MOD(LKBOT,NB)0 ) THEN
1879 INDX = LKBOT + 1
1880 CALL INFOG2L( LKTOP, INDX, DESCH, NPROW,
1881 $ NPCOL, MYROW, MYCOL, ILOC, JLOC,
1882 $ RSRC1, CSRC4 )
1883 CALL DGEMM( 'transpose', 'no transpose',
1884 $ DIM1, HCOLS, LNWIN, ONE, WORK(IPU),
1885 $ LNWIN, WORK( IPW1 ), LNWIN, ZERO,
1886 $ WORK(IPW3), DIM1 )
1887 CALL DLAMOV( 'all', DIM1, HCOLS,
1888 $ WORK(IPW3), DIM1,
1889 $ H((JLOC-1)*LLDH+ILOC), LLDH )
1890 END IF
1891.EQ..AND..EQ..AND. IF( MYROWRSRC4MYCOLCSRC4
1892.NE. $ MOD(LKBOT,NB)0 ) THEN
1893 INDX = LKBOT + 1
1894 CALL INFOG2L( LKTOP+DIM1, INDX, DESCH,
1895 $ NPROW, NPCOL, MYROW, MYCOL, ILOC,
1896 $ JLOC, RSRC4, CSRC4 )
1897 CALL DGEMM( 'transpose', 'no transpose',
1898 $ DIM4, HCOLS, LNWIN, ONE,
1899 $ WORK( IPU+DIM1*LNWIN ), LNWIN,
1900 $ WORK( IPW1), LNWIN, ZERO,
1901 $ WORK(IPW3), DIM4 )
1902 CALL DLAMOV( 'all', DIM4, HCOLS,
1903 $ WORK(IPW3), DIM4,
1904 $ H((JLOC-1)*LLDH+ILOC), LLDH )
1905 END IF
1906 INDXS = ICEIL(LKBOT,NB)*NB + 1
1907.NE. IF( MOD(LKBOT,NB)0 ) THEN
1908 INDXE = MIN(N,INDXS+(NPCOL-2)*NB)
1909 ELSE
1910 INDXE = MIN(N,INDXS+(NPCOL-1)*NB)
1911 END IF
1912 DO 270 INDX = INDXS, INDXE, NB
1913.EQ. IF( MYROWRSRC1 ) THEN
1914 CALL INFOG2L( LKTOP, INDX, DESCH,
1915 $ NPROW, NPCOL, MYROW, MYCOL, ILOC,
1916 $ JLOC, RSRC1, CSRC )
1917.EQ. IF( MYCOLCSRC ) THEN
1918 CALL DGEMM( 'transpose',
1919 $ 'no transpose', DIM1, HCOLS,
1920 $ LNWIN, ONE, WORK( IPU ), LNWIN,
1921 $ WORK( IPW1 ), LNWIN, ZERO,
1922 $ WORK(IPW3), DIM1 )
1923 CALL DLAMOV( 'all', DIM1, HCOLS,
1924 $ WORK(IPW3), DIM1,
1925 $ H((JLOC-1)*LLDH+ILOC), LLDH )
1926 END IF
1927 END IF
1928.EQ. IF( MYROWRSRC4 ) THEN
1929 CALL INFOG2L( LKTOP+DIM1, INDX, DESCH,
1930 $ NPROW, NPCOL, MYROW, MYCOL, ILOC,
1931 $ JLOC, RSRC4, CSRC )
1932.EQ. IF( MYCOLCSRC ) THEN
1933 CALL DGEMM( 'transpose',
1934 $ 'no transpose', DIM4, HCOLS,
1935 $ LNWIN, ONE,
1936 $ WORK( IPU+LNWIN*DIM1 ), LNWIN,
1937 $ WORK( IPW1 ), LNWIN,
1938 $ ZERO, WORK(IPW3), DIM4 )
1939 CALL DLAMOV( 'all', DIM4, HCOLS,
1940 $ WORK(IPW3), DIM4,
1941 $ H((JLOC-1)*LLDH+ILOC), LLDH )
1942 END IF
1943 END IF
1944 270 CONTINUE
1945 END IF
1946 END IF
1947 ELSE
1948*
1949* Update the columns of H and Z.
1950*
1951* Compute H2*U21 + H1*U11 on the left side of the border.
1952*
1953.EQ..AND..AND..GT. IF( DIR2 WANTT LENCBUF0 ) THEN
1954 INDXE = MIN(LKTOP-1,1+(NPROW-1)*NB)
1955 DO 280 INDX = 1, INDXE, NB
1956.EQ. IF( MYCOLCSRC1 ) THEN
1957 CALL INFOG2L( INDX, LKTOP, DESCH, NPROW,
1958 $ NPCOL, MYROW, MYCOL, ILOC, JLOC,
1959 $ RSRC, CSRC1 )
1960.EQ. IF( MYROWRSRC ) THEN
1961 CALL DLAMOV( 'all', HROWS, KS,
1962 $ WORK( IPW+HROWS*DIM4), HROWS,
1963 $ WORK(IPW3), HROWS )
1964 CALL DTRMM( 'right', 'upper',
1965 $ 'no transpose',
1966 $ 'non-unit', HROWS, KS, ONE,
1967 $ WORK( IPU+DIM4 ), LNWIN,
1968 $ WORK(IPW3), HROWS )
1969 CALL DGEMM( 'no transpose',
1970 $ 'no transpose', HROWS, KS, DIM4,
1971 $ ONE, WORK( IPW ), HROWS,
1972 $ WORK( IPU ), LNWIN, ONE,
1973 $ WORK(IPW3), HROWS )
1974 CALL DLAMOV( 'all', HROWS, KS,
1975 $ WORK(IPW3), HROWS,
1976 $ H((JLOC-1)*LLDH+ILOC), LLDH )
1977 END IF
1978 END IF
1979*
1980* Compute H1*U12 + H2*U22 on the right side of
1981* the border.
1982*
1983.EQ. IF( MYCOLCSRC4 ) THEN
1984 CALL INFOG2L( INDX, LKTOP+DIM1, DESCH,
1985 $ NPROW, NPCOL, MYROW, MYCOL, ILOC,
1986 $ JLOC, RSRC, CSRC4 )
1987.EQ. IF( MYROWRSRC ) THEN
1988 CALL DLAMOV( 'all', HROWS, DIM4,
1989 $ WORK(IPW), HROWS, WORK( IPW3 ),
1990 $ HROWS )
1991 CALL DTRMM( 'right', 'lower',
1992 $ 'no transpose',
1993 $ 'non-unit', HROWS, DIM4, ONE,
1994 $ WORK( IPU+LNWIN*KS ), LNWIN,
1995 $ WORK( IPW3 ), HROWS )
1996 CALL DGEMM( 'no transpose',
1997 $ 'no transpose', HROWS, DIM4, KS,
1998 $ ONE, WORK( IPW+HROWS*DIM4),
1999 $ HROWS,
2000 $ WORK( IPU+LNWIN*KS+DIM4 ), LNWIN,
2001 $ ONE, WORK( IPW3 ), HROWS )
2002 CALL DLAMOV( 'all', HROWS, DIM4,
2003 $ WORK(IPW3), HROWS,
2004 $ H((JLOC-1)*LLDH+ILOC), LLDH )
2005 END IF
2006 END IF
2007 280 CONTINUE
2008 END IF
2009*
2010.EQ..AND..AND..GT. IF( DIR2 WANTZ LENCBUF0 ) THEN
2011*
2012* Compute Z2*U21 + Z1*U11 on the left side
2013* of border.
2014*
2015 INDXE = MIN(N,1+(NPROW-1)*NB)
2016 DO 290 INDX = 1, INDXE, NB
2017.EQ. IF( MYCOLCSRC1 ) THEN
2018 CALL INFOG2L( INDX, I, DESCZ, NPROW,
2019 $ NPCOL, MYROW, MYCOL, ILOC, JLOC,
2020 $ RSRC, CSRC1 )
2021.EQ. IF( MYROWRSRC ) THEN
2022 CALL DLAMOV( 'all', ZROWS, KS,
2023 $ WORK( IPW2+ZROWS*DIM4),
2024 $ ZROWS, WORK(IPW3), ZROWS )
2025 CALL DTRMM( 'right', 'upper',
2026 $ 'no transpose',
2027 $ 'non-unit', ZROWS, KS, ONE,
2028 $ WORK( IPU+DIM4 ), LNWIN,
2029 $ WORK(IPW3), ZROWS )
2030 CALL DGEMM( 'no transpose',
2031 $ 'no transpose', ZROWS, KS,
2032 $ DIM4, ONE, WORK( IPW2 ),
2033 $ ZROWS, WORK( IPU ), LNWIN,
2034 $ ONE, WORK(IPW3), ZROWS )
2035 CALL DLAMOV( 'all', ZROWS, KS,
2036 $ WORK(IPW3), ZROWS,
2037 $ Z((JLOC-1)*LLDZ+ILOC), LLDZ )
2038 END IF
2039 END IF
2040*
2041* Compute Z1*U12 + Z2*U22 on the right side
2042* of border.
2043*
2044.EQ. IF( MYCOLCSRC4 ) THEN
2045 CALL INFOG2L( INDX, I+DIM1, DESCZ,
2046 $ NPROW, NPCOL, MYROW, MYCOL, ILOC,
2047 $ JLOC, RSRC, CSRC4 )
2048.EQ. IF( MYROWRSRC ) THEN
2049 CALL DLAMOV( 'all', ZROWS, DIM4,
2050 $ WORK(IPW2), ZROWS,
2051 $ WORK( IPW3 ), ZROWS )
2052 CALL DTRMM( 'right', 'lower',
2053 $ 'no transpose',
2054 $ 'non-unit', ZROWS, DIM4,
2055 $ ONE, WORK( IPU+LNWIN*KS ),
2056 $ LNWIN, WORK( IPW3 ), ZROWS )
2057 CALL DGEMM( 'no transpose',
2058 $ 'no transpose', ZROWS, DIM4,
2059 $ KS, ONE,
2060 $ WORK( IPW2+ZROWS*(DIM4)),
2061 $ ZROWS,
2062 $ WORK( IPU+LNWIN*KS+DIM4 ),
2063 $ LNWIN, ONE, WORK( IPW3 ),
2064 $ ZROWS )
2065 CALL DLAMOV( 'all', ZROWS, DIM4,
2066 $ WORK(IPW3), ZROWS,
2067 $ Z((JLOC-1)*LLDZ+ILOC), LLDZ )
2068 END IF
2069 END IF
2070 290 CONTINUE
2071 END IF
2072*
2073.EQ..AND..AND..GT. IF( DIR1 WANTT LENRBUF0) THEN
2074.LT. IF ( LKBOTN ) THEN
2075*
2076* Compute U21**T*H2 + U11**T*H1 on the upper
2077* side of the border.
2078*
2079.EQ..AND..EQ..AND. IF( MYROWRSRC1MYCOLCSRC4
2080.NE. $ MOD(LKBOT,NB)0 ) THEN
2081 INDX = LKBOT + 1
2082 CALL INFOG2L( LKTOP, INDX, DESCH, NPROW,
2083 $ NPCOL, MYROW, MYCOL, ILOC, JLOC,
2084 $ RSRC1, CSRC4 )
2085 CALL DLAMOV( 'all', KS, HCOLS,
2086 $ WORK( IPW1+DIM4 ), LNWIN,
2087 $ WORK(IPW3), KS )
2088 CALL DTRMM( 'left', 'upper', 'Transpose',
2089 $ 'Non-unit', ks, hcols, one,
2090 $ work( ipu+dim4 ), lnwin,
2091 $ work(ipw3), ks )
2092 CALL dgemm( 'Transpose', 'No transpose',
2093 $ ks, hcols, dim4, one, work(ipu),
2094 $ lnwin, work(ipw1), lnwin,
2095 $ one, work(ipw3), ks )
2096 CALL dlamov( 'All', ks, hcols,
2097 $ work(ipw3), ks,
2098 $ h((jloc-1)*lldh+iloc), lldh )
2099 END IF
2100*
2101* Compute U12**T*H1 + U22**T*H2 one the lower
2102* side of the border.
2103*
2104 IF( myrow.EQ.rsrc4.AND.mycol.EQ.csrc4.AND.
2105 $ mod(lkbot,nb).NE.0 ) THEN
2106 indx = lkbot + 1
2107 CALL infog2l( lktop+dim1, indx, desch,
2108 $ nprow, npcol, myrow, mycol, iloc,
2109 $ jloc, rsrc4, csrc4 )
2110 CALL dlamov( 'All', dim4, hcols,
2111 $ work( ipw1 ), lnwin,
2112 $ work( ipw3 ), dim4 )
2113 CALL dtrmm( 'Left', 'Lower', 'Transpose',
2114 $ 'Non-unit', dim4, hcols, one,
2115 $ work( ipu+lnwin*ks ), lnwin,
2116 $ work( ipw3 ), dim4 )
2117 CALL dgemm( 'Transpose', 'No Transpose',
2118 $ dim4, hcols, ks, one,
2119 $ work( ipu+lnwin*ks+dim4 ), lnwin,
2120 $ work( ipw1+dim1 ), lnwin,
2121 $ one, work( ipw3), dim4 )
2122 CALL dlamov( 'All', dim4, hcols,
2123 $ work(ipw3), dim4,
2124 $ h((jloc-1)*lldh+iloc), lldh )
2125 END IF
2126*
2127* Compute U21**T*H2 + U11**T*H1 on upper side
2128* on border.
2129*
2130 indxs = iceil(lkbot,nb)*nb+1
2131 IF( mod(lkbot,nb).NE.0 ) THEN
2132 indxe = min(n,indxs+(npcol-2)*nb)
2133 ELSE
2134 indxe = min(n,indxs+(npcol-1)*nb)
2135 END IF
2136 DO 300 indx = indxs, indxe, nb
2137 IF( myrow.EQ.rsrc1 ) THEN
2138 CALL infog2l( lktop, indx, desch,
2139 $ nprow, npcol, myrow, mycol, iloc,
2140 $ jloc, rsrc1, csrc )
2141 IF( mycol.EQ.csrc ) THEN
2142 CALL dlamov( 'All', ks, hcols,
2143 $ work( ipw1+dim4 ), lnwin,
2144 $ work(ipw3), ks )
2145 CALL dtrmm( 'Left', 'Upper',
2146 $ 'Transpose', 'Non-unit',
2147 $ ks, hcols, one,
2148 $ work( ipu+dim4 ), lnwin,
2149 $ work(ipw3), ks )
2150 CALL dgemm( 'Transpose',
2151 $ 'No transpose', ks, hcols,
2152 $ dim4, one, work(ipu), lnwin,
2153 $ work(ipw1), lnwin, one,
2154 $ work(ipw3), ks )
2155 CALL dlamov( 'All', ks, hcols,
2156 $ work(ipw3), ks,
2157 $ h((jloc-1)*lldh+iloc), lldh )
2158 END IF
2159 END IF
2160*
2161* Compute U12**T*H1 + U22**T*H2 on lower
2162* side of border.
2163*
2164 IF( myrow.EQ.rsrc4 ) THEN
2165 CALL infog2l( lktop+dim1, indx, desch,
2166 $ nprow, npcol, myrow, mycol, iloc,
2167 $ jloc, rsrc4, csrc )
2168 IF( mycol.EQ.csrc ) THEN
2169 CALL dlamov( 'All', dim4, hcols,
2170 $ work( ipw1 ), lnwin,
2171 $ work( ipw3 ), dim4 )
2172 CALL dtrmm( 'Left', 'Lower',
2173 $ 'Transpose','Non-unit',
2174 $ dim4, hcols, one,
2175 $ work( ipu+lnwin*ks ), lnwin,
2176 $ work( ipw3 ), dim4 )
2177 CALL dgemm( 'Transpose',
2178 $ 'No Transpose', dim4, hcols,
2179 $ ks, one,
2180 $ work( ipu+lnwin*ks+dim4 ),
2181 $ lnwin, work( ipw1+dim1 ),
2182 $ lnwin, one, work( ipw3),
2183 $ dim4 )
2184 CALL dlamov( 'All', dim4, hcols,
2185 $ work(ipw3), dim4,
2186 $ h((jloc-1)*lldh+iloc), lldh )
2187 END IF
2188 END IF
2189 300 CONTINUE
2190 END IF
2191 END IF
2192 END IF
2193*
2194* Update window information - mark processed windows are
2195* completed.
2196*
2197 IF( dir.EQ.2 ) THEN
2198 IF( lkbot.EQ.kbot ) THEN
2199 lktop = kbot+1
2200 lkbot = kbot+1
2201 iwork( 1+(win-1)*5 ) = lktop
2202 iwork( 2+(win-1)*5 ) = lkbot
2203 ELSE
2204 lktop = min( lktop + lnwin - lchain,
2205 $ min( kbot, iceil( lkbot, nb )*nb ) -
2206 $ lchain + 1 )
2207 iwork( 1+(win-1)*5 ) = lktop
2208 lkbot = min( max( lkbot + lnwin - lchain,
2209 $ lktop + nwin - 1), min( kbot,
2210 $ iceil( lkbot, nb )*nb ) )
2211 iwork( 2+(win-1)*5 ) = lkbot
2212 END IF
2213 IF( iwork( 5+(win-1)*5 ).EQ.1 )
2214 $ iwork( 5+(win-1)*5 ) = 2
2215 iwork( 3+(win-1)*5 ) = rsrc4
2216 iwork( 4+(win-1)*5 ) = csrc4
2217 END IF
2218*
2219* If nothing was done for the WIN:th window, all
2220* processors come here and consider the next one
2221* instead.
2222*
2223 245 CONTINUE
2224 240 CONTINUE
2225 190 CONTINUE
2226 150 CONTINUE
2227 140 CONTINUE
2228*
2229* Chased off bulges from first window?
2230*
2231 IF( nprocs.GT.1 )
2232 $ CALL igamx2d( ictxt, 'All', '1-Tree', 1, 1, ichoff, 1,
2233 $ -1, -1, -1, -1, -1 )
2234*
2235* If the bulge was chasen off from first window it is removed.
2236*
2237 IF( ichoff.GT.0 ) THEN
2238 DO 198 win = 2, anmwin
2239 iwork( 1+(win-2)*5 ) = iwork( 1+(win-1)*5 )
2240 iwork( 2+(win-2)*5 ) = iwork( 2+(win-1)*5 )
2241 iwork( 3+(win-2)*5 ) = iwork( 3+(win-1)*5 )
2242 iwork( 4+(win-2)*5 ) = iwork( 4+(win-1)*5 )
2243 198 CONTINUE
2244 anmwin = anmwin - 1
2245 ipiw = 6+(anmwin-1)*5
2246 END IF
2247*
2248* If we have no more windows, return.
2249*
2250 IF( anmwin.LT.1 ) RETURN
2251*
2252* Check for any more windows to bring over the border.
2253*
2254 winfin = 0
2255 DO 199 win = 1, anmwin
2256 winfin = winfin+iwork( 5+(win-1)*5 )
2257 199 CONTINUE
2258 IF( winfin.LT.2*anmwin ) GO TO 137
2259*
2260* Zero out process mark for each window - this is legal now when
2261* the process starts over with local bulge-chasing etc.
2262*
2263 DO 201 win = 1, anmwin
2264 iwork( 5+(win-1)*5 ) = 0
2265 201 CONTINUE
2266*
2267 END IF
2268*
2269* Go back to local bulge-chase and see if there is more work to do.
2270*
2271 GO TO 20
2272*
2273* End of PDLAQR5
2274*
subroutine dlaqr6(job, wantt, wantz, kacc22, n, ktop, kbot, nshfts, sr, si, h, ldh, iloz, ihiz, z, ldz, v, ldv, u, ldu, nv, wv, ldwv, nh, wh, ldwh)
Definition dlaqr6.f:4
subroutine dlabad(small, large)
DLABAD
Definition dlabad.f:74
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition dlaset.f:110
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
double precision function dlange(norm, m, n, a, lda, work)
DLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value ...
Definition dlange.f:114
subroutine dlaqr1(n, h, ldh, sr1, si1, sr2, si2, v)
DLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition dlaqr1.f:121
subroutine dlarfg(n, alpha, x, incx, tau)
DLARFG generates an elementary reflector (Householder matrix).
Definition dlarfg.f:106
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:187
subroutine dtrmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
DTRMM
Definition dtrmm.f:177
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function indxg2l(indxglob, nb, iproc, isrcproc, nprocs)
Definition indxg2l.f:2
double precision function dlamch(cmach)
DLAMCH
Definition dlamch.f:69
#define swap(a, b, tmp)
Definition macros.h:40
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine dgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1082
integer function indxg2p(indxglob, nb, iproc, isrcproc, nprocs)
Definition mpi.f:947
subroutine dgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1123
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition mpi.f:937
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pdelset(a, ia, ja, desca, alpha)
Definition pdelset.f:2
integer function pilaenvx(ictxt, ispec, name, opts, n1, n2, n3, n4)
Definition pilaenvx.f:3