OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
zlaqr5.f
Go to the documentation of this file.
1*> \brief \b ZLAQR5 performs a single small-bulge multi-shift QR sweep.
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download ZLAQR5 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlaqr5.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlaqr5.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlaqr5.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
22* H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
23* WV, LDWV, NH, WH, LDWH )
24*
25* .. Scalar Arguments ..
26* INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27* $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28* LOGICAL WANTT, WANTZ
29* ..
30* .. Array Arguments ..
31* COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
32* $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
33* ..
34*
35*
36*> \par Purpose:
37* =============
38*>
39*> \verbatim
40*>
41*> ZLAQR5, called by ZLAQR0, performs a
42*> single small-bulge multi-shift QR sweep.
43*> \endverbatim
44*
45* Arguments:
46* ==========
47*
48*> \param[in] WANTT
49*> \verbatim
50*> WANTT is LOGICAL
51*> WANTT = .true. if the triangular Schur factor
52*> is being computed. WANTT is set to .false. otherwise.
53*> \endverbatim
54*>
55*> \param[in] WANTZ
56*> \verbatim
57*> WANTZ is LOGICAL
58*> WANTZ = .true. if the unitary Schur factor is being
59*> computed. WANTZ is set to .false. otherwise.
60*> \endverbatim
61*>
62*> \param[in] KACC22
63*> \verbatim
64*> KACC22 is INTEGER with value 0, 1, or 2.
65*> Specifies the computation mode of far-from-diagonal
66*> orthogonal updates.
67*> = 0: ZLAQR5 does not accumulate reflections and does not
68*> use matrix-matrix multiply to update far-from-diagonal
69*> matrix entries.
70*> = 1: ZLAQR5 accumulates reflections and uses matrix-matrix
71*> multiply to update the far-from-diagonal matrix entries.
72*> = 2: Same as KACC22 = 1. This option used to enable exploiting
73*> the 2-by-2 structure during matrix multiplications, but
74*> this is no longer supported.
75*> \endverbatim
76*>
77*> \param[in] N
78*> \verbatim
79*> N is INTEGER
80*> N is the order of the Hessenberg matrix H upon which this
81*> subroutine operates.
82*> \endverbatim
83*>
84*> \param[in] KTOP
85*> \verbatim
86*> KTOP is INTEGER
87*> \endverbatim
88*>
89*> \param[in] KBOT
90*> \verbatim
91*> KBOT is INTEGER
92*> These are the first and last rows and columns of an
93*> isolated diagonal block upon which the QR sweep is to be
94*> applied. It is assumed without a check that
95*> either KTOP = 1 or H(KTOP,KTOP-1) = 0
96*> and
97*> either KBOT = N or H(KBOT+1,KBOT) = 0.
98*> \endverbatim
99*>
100*> \param[in] NSHFTS
101*> \verbatim
102*> NSHFTS is INTEGER
103*> NSHFTS gives the number of simultaneous shifts. NSHFTS
104*> must be positive and even.
105*> \endverbatim
106*>
107*> \param[in,out] S
108*> \verbatim
109*> S is COMPLEX*16 array, dimension (NSHFTS)
110*> S contains the shifts of origin that define the multi-
111*> shift QR sweep. On output S may be reordered.
112*> \endverbatim
113*>
114*> \param[in,out] H
115*> \verbatim
116*> H is COMPLEX*16 array, dimension (LDH,N)
117*> On input H contains a Hessenberg matrix. On output a
118*> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
119*> to the isolated diagonal block in rows and columns KTOP
120*> through KBOT.
121*> \endverbatim
122*>
123*> \param[in] LDH
124*> \verbatim
125*> LDH is INTEGER
126*> LDH is the leading dimension of H just as declared in the
127*> calling procedure. LDH >= MAX(1,N).
128*> \endverbatim
129*>
130*> \param[in] ILOZ
131*> \verbatim
132*> ILOZ is INTEGER
133*> \endverbatim
134*>
135*> \param[in] IHIZ
136*> \verbatim
137*> IHIZ is INTEGER
138*> Specify the rows of Z to which transformations must be
139*> applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
140*> \endverbatim
141*>
142*> \param[in,out] Z
143*> \verbatim
144*> Z is COMPLEX*16 array, dimension (LDZ,IHIZ)
145*> If WANTZ = .TRUE., then the QR Sweep unitary
146*> similarity transformation is accumulated into
147*> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
148*> If WANTZ = .FALSE., then Z is unreferenced.
149*> \endverbatim
150*>
151*> \param[in] LDZ
152*> \verbatim
153*> LDZ is INTEGER
154*> LDA is the leading dimension of Z just as declared in
155*> the calling procedure. LDZ >= N.
156*> \endverbatim
157*>
158*> \param[out] V
159*> \verbatim
160*> V is COMPLEX*16 array, dimension (LDV,NSHFTS/2)
161*> \endverbatim
162*>
163*> \param[in] LDV
164*> \verbatim
165*> LDV is INTEGER
166*> LDV is the leading dimension of V as declared in the
167*> calling procedure. LDV >= 3.
168*> \endverbatim
169*>
170*> \param[out] U
171*> \verbatim
172*> U is COMPLEX*16 array, dimension (LDU,2*NSHFTS)
173*> \endverbatim
174*>
175*> \param[in] LDU
176*> \verbatim
177*> LDU is INTEGER
178*> LDU is the leading dimension of U just as declared in the
179*> in the calling subroutine. LDU >= 2*NSHFTS.
180*> \endverbatim
181*>
182*> \param[in] NV
183*> \verbatim
184*> NV is INTEGER
185*> NV is the number of rows in WV agailable for workspace.
186*> NV >= 1.
187*> \endverbatim
188*>
189*> \param[out] WV
190*> \verbatim
191*> WV is COMPLEX*16 array, dimension (LDWV,2*NSHFTS)
192*> \endverbatim
193*>
194*> \param[in] LDWV
195*> \verbatim
196*> LDWV is INTEGER
197*> LDWV is the leading dimension of WV as declared in the
198*> in the calling subroutine. LDWV >= NV.
199*> \endverbatim
200*
201*> \param[in] NH
202*> \verbatim
203*> NH is INTEGER
204*> NH is the number of columns in array WH available for
205*> workspace. NH >= 1.
206*> \endverbatim
207*>
208*> \param[out] WH
209*> \verbatim
210*> WH is COMPLEX*16 array, dimension (LDWH,NH)
211*> \endverbatim
212*>
213*> \param[in] LDWH
214*> \verbatim
215*> LDWH is INTEGER
216*> Leading dimension of WH just as declared in the
217*> calling procedure. LDWH >= 2*NSHFTS.
218*> \endverbatim
219*>
220* Authors:
221* ========
222*
223*> \author Univ. of Tennessee
224*> \author Univ. of California Berkeley
225*> \author Univ. of Colorado Denver
226*> \author NAG Ltd.
227*
228*> \ingroup complex16OTHERauxiliary
229*
230*> \par Contributors:
231* ==================
232*>
233*> Karen Braman and Ralph Byers, Department of Mathematics,
234*> University of Kansas, USA
235*>
236*> Lars Karlsson, Daniel Kressner, and Bruno Lang
237*>
238*> Thijs Steel, Department of Computer science,
239*> KU Leuven, Belgium
240*
241*> \par References:
242* ================
243*>
244*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
245*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
246*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
247*> 929--947, 2002.
248*>
249*> Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed
250*> chains of bulges in multishift QR algorithms.
251*> ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
252*>
253* =====================================================================
254 SUBROUTINE zlaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
255 $ H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
256 $ WV, LDWV, NH, WH, LDWH )
257 IMPLICIT NONE
258*
259* -- LAPACK auxiliary routine --
260* -- LAPACK is a software package provided by Univ. of Tennessee, --
261* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
262*
263* .. Scalar Arguments ..
264 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
265 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
266 LOGICAL WANTT, WANTZ
267* ..
268* .. Array Arguments ..
269 COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
270 $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
271* ..
272*
273* ================================================================
274* .. Parameters ..
275 COMPLEX*16 ZERO, ONE
276 PARAMETER ( ZERO = ( 0.0d0, 0.0d0 ),
277 $ one = ( 1.0d0, 0.0d0 ) )
278 DOUBLE PRECISION RZERO, RONE
279 PARAMETER ( RZERO = 0.0d0, rone = 1.0d0 )
280* ..
281* .. Local Scalars ..
282 COMPLEX*16 ALPHA, BETA, CDUM, REFSUM
283 DOUBLE PRECISION H11, H12, H21, H22, SAFMAX, SAFMIN, SCL,
284 $ smlnum, tst1, tst2, ulp
285 INTEGER I2, I4, INCOL, J, JBOT, JCOL, JLEN,
286 $ JROW, JTOP, K, K1, KDU, KMS, KRCOL,
287 $ m, m22, mbot, mtop, nbmps, ndcol,
288 $ ns, nu
289 LOGICAL ACCUM, BMP22
290* ..
291* .. External Functions ..
292 DOUBLE PRECISION DLAMCH
293 EXTERNAL DLAMCH
294* ..
295* .. Intrinsic Functions ..
296*
297 INTRINSIC abs, dble, dconjg, dimag, max, min, mod
298* ..
299* .. Local Arrays ..
300 COMPLEX*16 VT( 3 )
301* ..
302* .. External Subroutines ..
303 EXTERNAL dlabad, zgemm, zlacpy, zlaqr1, zlarfg, zlaset,
304 $ ztrmm
305* ..
306* .. Statement Functions ..
307 DOUBLE PRECISION CABS1
308* ..
309* .. Statement Function definitions ..
310 cabs1( cdum ) = abs( dble( cdum ) ) + abs( dimag( cdum ) )
311* ..
312* .. Executable Statements ..
313*
314* ==== If there are no shifts, then there is nothing to do. ====
315*
316 IF( nshfts.LT.2 )
317 $ RETURN
318*
319* ==== If the active block is empty or 1-by-1, then there
320* . is nothing to do. ====
321*
322 IF( ktop.GE.kbot )
323 $ RETURN
324*
325* ==== NSHFTS is supposed to be even, but if it is odd,
326* . then simply reduce it by one. ====
327*
328 ns = nshfts - mod( nshfts, 2 )
329*
330* ==== Machine constants for deflation ====
331*
332 safmin = dlamch( 'SAFE MINIMUM' )
333 safmax = rone / safmin
334 CALL dlabad( safmin, safmax )
335 ulp = dlamch( 'PRECISION' )
336 smlnum = safmin*( dble( n ) / ulp )
337*
338* ==== Use accumulated reflections to update far-from-diagonal
339* . entries ? ====
340*
341 accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
342*
343* ==== clear trash ====
344*
345 IF( ktop+2.LE.kbot )
346 $ h( ktop+2, ktop ) = zero
347*
348* ==== NBMPS = number of 2-shift bulges in the chain ====
349*
350 nbmps = ns / 2
351*
352* ==== KDU = width of slab ====
353*
354 kdu = 4*nbmps
355*
356* ==== Create and chase chains of NBMPS bulges ====
357*
358 DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
359*
360* JTOP = Index from which updates from the right start.
361*
362 IF( accum ) THEN
363 jtop = max( ktop, incol )
364 ELSE IF( wantt ) THEN
365 jtop = 1
366 ELSE
367 jtop = ktop
368 END IF
369*
370 ndcol = incol + kdu
371 IF( accum )
372 $ CALL zlaset( 'ALL', kdu, kdu, zero, one, u, ldu )
373*
374* ==== Near-the-diagonal bulge chase. The following loop
375* . performs the near-the-diagonal part of a small bulge
376* . multi-shift QR sweep. Each 4*NBMPS column diagonal
377* . chunk extends from column INCOL to column NDCOL
378* . (including both column INCOL and column NDCOL). The
379* . following loop chases a 2*NBMPS+1 column long chain of
380* . NBMPS bulges 2*NBMPS columns to the right. (INCOL
381* . may be less than KTOP and and NDCOL may be greater than
382* . KBOT indicating phantom columns from which to chase
383* . bulges before they are actually introduced or to which
384* . to chase bulges beyond column KBOT.) ====
385*
386 DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
387*
388* ==== Bulges number MTOP to MBOT are active double implicit
389* . shift bulges. There may or may not also be small
390* . 2-by-2 bulge, if there is room. The inactive bulges
391* . (if any) must wait until the active bulges have moved
392* . down the diagonal to make room. The phantom matrix
393* . paradigm described above helps keep track. ====
394*
395 mtop = max( 1, ( ktop-krcol ) / 2+1 )
396 mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
397 m22 = mbot + 1
398 bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
399 $ ( kbot-2 )
400*
401* ==== Generate reflections to chase the chain right
402* . one column. (The minimum value of K is KTOP-1.) ====
403*
404 IF ( bmp22 ) THEN
405*
406* ==== Special case: 2-by-2 reflection at bottom treated
407* . separately ====
408*
409 k = krcol + 2*( m22-1 )
410 IF( k.EQ.ktop-1 ) THEN
411 CALL zlaqr1( 2, h( k+1, k+1 ), ldh, s( 2*m22-1 ),
412 $ s( 2*m22 ), v( 1, m22 ) )
413 beta = v( 1, m22 )
414 CALL zlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
415 ELSE
416 beta = h( k+1, k )
417 v( 2, m22 ) = h( k+2, k )
418 CALL zlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
419 h( k+1, k ) = beta
420 h( k+2, k ) = zero
421 END IF
422
423*
424* ==== Perform update from right within
425* . computational window. ====
426*
427 DO 30 j = jtop, min( kbot, k+3 )
428 refsum = v( 1, m22 )*( h( j, k+1 )+v( 2, m22 )*
429 $ h( j, k+2 ) )
430 h( j, k+1 ) = h( j, k+1 ) - refsum
431 h( j, k+2 ) = h( j, k+2 ) -
432 $ refsum*dconjg( v( 2, m22 ) )
433 30 CONTINUE
434*
435* ==== Perform update from left within
436* . computational window. ====
437*
438 IF( accum ) THEN
439 jbot = min( ndcol, kbot )
440 ELSE IF( wantt ) THEN
441 jbot = n
442 ELSE
443 jbot = kbot
444 END IF
445 DO 40 j = k+1, jbot
446 refsum = dconjg( v( 1, m22 ) )*
447 $ ( h( k+1, j )+dconjg( v( 2, m22 ) )*
448 $ h( k+2, j ) )
449 h( k+1, j ) = h( k+1, j ) - refsum
450 h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m22 )
451 40 CONTINUE
452*
453* ==== The following convergence test requires that
454* . the tradition small-compared-to-nearby-diagonals
455* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
456* . criteria both be satisfied. The latter improves
457* . accuracy in some examples. Falling back on an
458* . alternate convergence criterion when TST1 or TST2
459* . is zero (as done here) is traditional but probably
460* . unnecessary. ====
461*
462 IF( k.GE.ktop ) THEN
463 IF( h( k+1, k ).NE.zero ) THEN
464 tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
465 IF( tst1.EQ.rzero ) THEN
466 IF( k.GE.ktop+1 )
467 $ tst1 = tst1 + cabs1( h( k, k-1 ) )
468 IF( k.GE.ktop+2 )
469 $ tst1 = tst1 + cabs1( h( k, k-2 ) )
470 IF( k.GE.ktop+3 )
471 $ tst1 = tst1 + cabs1( h( k, k-3 ) )
472 IF( k.LE.kbot-2 )
473 $ tst1 = tst1 + cabs1( h( k+2, k+1 ) )
474 IF( k.LE.kbot-3 )
475 $ tst1 = tst1 + cabs1( h( k+3, k+1 ) )
476 IF( k.LE.kbot-4 )
477 $ tst1 = tst1 + cabs1( h( k+4, k+1 ) )
478 END IF
479 IF( cabs1( h( k+1, k ) )
480 $ .LE.max( smlnum, ulp*tst1 ) ) THEN
481 h12 = max( cabs1( h( k+1, k ) ),
482 $ cabs1( h( k, k+1 ) ) )
483 h21 = min( cabs1( h( k+1, k ) ),
484 $ cabs1( h( k, k+1 ) ) )
485 h11 = max( cabs1( h( k+1, k+1 ) ),
486 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
487 h22 = min( cabs1( h( k+1, k+1 ) ),
488 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
489 scl = h11 + h12
490 tst2 = h22*( h11 / scl )
491*
492 IF( tst2.EQ.rzero .OR. h21*( h12 / scl ).LE.
493 $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
494 END IF
495 END IF
496 END IF
497*
498* ==== Accumulate orthogonal transformations. ====
499*
500 IF( accum ) THEN
501 kms = k - incol
502 DO 50 j = max( 1, ktop-incol ), kdu
503 refsum = v( 1, m22 )*( u( j, kms+1 )+
504 $ v( 2, m22 )*u( j, kms+2 ) )
505 u( j, kms+1 ) = u( j, kms+1 ) - refsum
506 u( j, kms+2 ) = u( j, kms+2 ) -
507 $ refsum*dconjg( v( 2, m22 ) )
508 50 CONTINUE
509 ELSE IF( wantz ) THEN
510 DO 60 j = iloz, ihiz
511 refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
512 $ z( j, k+2 ) )
513 z( j, k+1 ) = z( j, k+1 ) - refsum
514 z( j, k+2 ) = z( j, k+2 ) -
515 $ refsum*dconjg( v( 2, m22 ) )
516 60 CONTINUE
517 END IF
518 END IF
519*
520* ==== Normal case: Chain of 3-by-3 reflections ====
521*
522 DO 80 m = mbot, mtop, -1
523 k = krcol + 2*( m-1 )
524 IF( k.EQ.ktop-1 ) THEN
525 CALL zlaqr1( 3, h( ktop, ktop ), ldh, s( 2*m-1 ),
526 $ s( 2*m ), v( 1, m ) )
527 alpha = v( 1, m )
528 CALL zlarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
529 ELSE
530*
531* ==== Perform delayed transformation of row below
532* . Mth bulge. Exploit fact that first two elements
533* . of row are actually zero. ====
534*
535 refsum = v( 1, m )*v( 3, m )*h( k+3, k+2 )
536 h( k+3, k ) = -refsum
537 h( k+3, k+1 ) = -refsum*dconjg( v( 2, m ) )
538 h( k+3, k+2 ) = h( k+3, k+2 ) -
539 $ refsum*dconjg( v( 3, m ) )
540*
541* ==== Calculate reflection to move
542* . Mth bulge one step. ====
543*
544 beta = h( k+1, k )
545 v( 2, m ) = h( k+2, k )
546 v( 3, m ) = h( k+3, k )
547 CALL zlarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
548*
549* ==== A Bulge may collapse because of vigilant
550* . deflation or destructive underflow. In the
551* . underflow case, try the two-small-subdiagonals
552* . trick to try to reinflate the bulge. ====
553*
554 IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
555 $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
556*
557* ==== Typical case: not collapsed (yet). ====
558*
559 h( k+1, k ) = beta
560 h( k+2, k ) = zero
561 h( k+3, k ) = zero
562 ELSE
563*
564* ==== Atypical case: collapsed. Attempt to
565* . reintroduce ignoring H(K+1,K) and H(K+2,K).
566* . If the fill resulting from the new
567* . reflector is too large, then abandon it.
568* . Otherwise, use the new one. ====
569*
570 CALL zlaqr1( 3, h( k+1, k+1 ), ldh, s( 2*m-1 ),
571 $ s( 2*m ), vt )
572 alpha = vt( 1 )
573 CALL zlarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
574 refsum = dconjg( vt( 1 ) )*
575 $ ( h( k+1, k )+dconjg( vt( 2 ) )*
576 $ h( k+2, k ) )
577*
578 IF( cabs1( h( k+2, k )-refsum*vt( 2 ) )+
579 $ cabs1( refsum*vt( 3 ) ).GT.ulp*
580 $ ( cabs1( h( k, k ) )+cabs1( h( k+1,
581 $ k+1 ) )+cabs1( h( k+2, k+2 ) ) ) ) THEN
582*
583* ==== Starting a new bulge here would
584* . create non-negligible fill. Use
585* . the old one with trepidation. ====
586*
587 h( k+1, k ) = beta
588 h( k+2, k ) = zero
589 h( k+3, k ) = zero
590 ELSE
591*
592* ==== Starting a new bulge here would
593* . create only negligible fill.
594* . Replace the old reflector with
595* . the new one. ====
596*
597 h( k+1, k ) = h( k+1, k ) - refsum
598 h( k+2, k ) = zero
599 h( k+3, k ) = zero
600 v( 1, m ) = vt( 1 )
601 v( 2, m ) = vt( 2 )
602 v( 3, m ) = vt( 3 )
603 END IF
604 END IF
605 END IF
606*
607* ==== Apply reflection from the right and
608* . the first column of update from the left.
609* . These updates are required for the vigilant
610* . deflation check. We still delay most of the
611* . updates from the left for efficiency. ====
612*
613 DO 70 j = jtop, min( kbot, k+3 )
614 refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*
615 $ h( j, k+2 )+v( 3, m )*h( j, k+3 ) )
616 h( j, k+1 ) = h( j, k+1 ) - refsum
617 h( j, k+2 ) = h( j, k+2 ) -
618 $ refsum*dconjg( v( 2, m ) )
619 h( j, k+3 ) = h( j, k+3 ) -
620 $ refsum*dconjg( v( 3, m ) )
621 70 CONTINUE
622*
623* ==== Perform update from left for subsequent
624* . column. ====
625*
626 refsum = dconjg( v( 1, m ) )*( h( k+1, k+1 )
627 $ +dconjg( v( 2, m ) )*h( k+2, k+1 )
628 $ +dconjg( v( 3, m ) )*h( k+3, k+1 ) )
629 h( k+1, k+1 ) = h( k+1, k+1 ) - refsum
630 h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*v( 2, m )
631 h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*v( 3, m )
632*
633* ==== The following convergence test requires that
634* . the tradition small-compared-to-nearby-diagonals
635* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
636* . criteria both be satisfied. The latter improves
637* . accuracy in some examples. Falling back on an
638* . alternate convergence criterion when TST1 or TST2
639* . is zero (as done here) is traditional but probably
640* . unnecessary. ====
641*
642 IF( k.LT.ktop)
643 $ cycle
644 IF( h( k+1, k ).NE.zero ) THEN
645 tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
646 IF( tst1.EQ.rzero ) THEN
647 IF( k.GE.ktop+1 )
648 $ tst1 = tst1 + cabs1( h( k, k-1 ) )
649 IF( k.GE.ktop+2 )
650 $ tst1 = tst1 + cabs1( h( k, k-2 ) )
651 IF( k.GE.ktop+3 )
652 $ tst1 = tst1 + cabs1( h( k, k-3 ) )
653 IF( k.LE.kbot-2 )
654 $ tst1 = tst1 + cabs1( h( k+2, k+1 ) )
655 IF( k.LE.kbot-3 )
656 $ tst1 = tst1 + cabs1( h( k+3, k+1 ) )
657 IF( k.LE.kbot-4 )
658 $ tst1 = tst1 + cabs1( h( k+4, k+1 ) )
659 END IF
660 IF( cabs1( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
661 $ THEN
662 h12 = max( cabs1( h( k+1, k ) ),
663 $ cabs1( h( k, k+1 ) ) )
664 h21 = min( cabs1( h( k+1, k ) ),
665 $ cabs1( h( k, k+1 ) ) )
666 h11 = max( cabs1( h( k+1, k+1 ) ),
667 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
668 h22 = min( cabs1( h( k+1, k+1 ) ),
669 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
670 scl = h11 + h12
671 tst2 = h22*( h11 / scl )
672*
673 IF( tst2.EQ.rzero .OR. h21*( h12 / scl ).LE.
674 $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
675 END IF
676 END IF
677 80 CONTINUE
678*
679* ==== Multiply H by reflections from the left ====
680*
681 IF( accum ) THEN
682 jbot = min( ndcol, kbot )
683 ELSE IF( wantt ) THEN
684 jbot = n
685 ELSE
686 jbot = kbot
687 END IF
688*
689 DO 100 m = mbot, mtop, -1
690 k = krcol + 2*( m-1 )
691 DO 90 j = max( ktop, krcol + 2*m ), jbot
692 refsum = dconjg( v( 1, m ) )*
693 $ ( h( k+1, j )+dconjg( v( 2, m ) )*
694 $ h( k+2, j )+dconjg( v( 3, m ) )*h( k+3, j ) )
695 h( k+1, j ) = h( k+1, j ) - refsum
696 h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m )
697 h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m )
698 90 CONTINUE
699 100 CONTINUE
700*
701* ==== Accumulate orthogonal transformations. ====
702*
703 IF( accum ) THEN
704*
705* ==== Accumulate U. (If needed, update Z later
706* . with an efficient matrix-matrix
707* . multiply.) ====
708*
709 DO 120 m = mbot, mtop, -1
710 k = krcol + 2*( m-1 )
711 kms = k - incol
712 i2 = max( 1, ktop-incol )
713 i2 = max( i2, kms-(krcol-incol)+1 )
714 i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
715 DO 110 j = i2, i4
716 refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*
717 $ u( j, kms+2 )+v( 3, m )*u( j, kms+3 ) )
718 u( j, kms+1 ) = u( j, kms+1 ) - refsum
719 u( j, kms+2 ) = u( j, kms+2 ) -
720 $ refsum*dconjg( v( 2, m ) )
721 u( j, kms+3 ) = u( j, kms+3 ) -
722 $ refsum*dconjg( v( 3, m ) )
723 110 CONTINUE
724 120 CONTINUE
725 ELSE IF( wantz ) THEN
726*
727* ==== U is not accumulated, so update Z
728* . now by multiplying by reflections
729* . from the right. ====
730*
731 DO 140 m = mbot, mtop, -1
732 k = krcol + 2*( m-1 )
733 DO 130 j = iloz, ihiz
734 refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*
735 $ z( j, k+2 )+v( 3, m )*z( j, k+3 ) )
736 z( j, k+1 ) = z( j, k+1 ) - refsum
737 z( j, k+2 ) = z( j, k+2 ) -
738 $ refsum*dconjg( v( 2, m ) )
739 z( j, k+3 ) = z( j, k+3 ) -
740 $ refsum*dconjg( v( 3, m ) )
741 130 CONTINUE
742 140 CONTINUE
743 END IF
744*
745* ==== End of near-the-diagonal bulge chase. ====
746*
747 145 CONTINUE
748*
749* ==== Use U (if accumulated) to update far-from-diagonal
750* . entries in H. If required, use U to update Z as
751* . well. ====
752*
753 IF( accum ) THEN
754 IF( wantt ) THEN
755 jtop = 1
756 jbot = n
757 ELSE
758 jtop = ktop
759 jbot = kbot
760 END IF
761 k1 = max( 1, ktop-incol )
762 nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
763*
764* ==== Horizontal Multiply ====
765*
766 DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
767 jlen = min( nh, jbot-jcol+1 )
768 CALL zgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
769 $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
770 $ ldwh )
771 CALL zlacpy( 'ALL', nu, jlen, wh, ldwh,
772 $ h( incol+k1, jcol ), ldh )
773 150 CONTINUE
774*
775* ==== Vertical multiply ====
776*
777 DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
778 jlen = min( nv, max( ktop, incol )-jrow )
779 CALL zgemm( 'N', 'N', jlen, nu, nu, one,
780 $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
781 $ ldu, zero, wv, ldwv )
782 CALL zlacpy( 'ALL', jlen, nu, wv, ldwv,
783 $ h( jrow, incol+k1 ), ldh )
784 160 CONTINUE
785*
786* ==== Z multiply (also vertical) ====
787*
788 IF( wantz ) THEN
789 DO 170 jrow = iloz, ihiz, nv
790 jlen = min( nv, ihiz-jrow+1 )
791 CALL zgemm( 'N', 'N', jlen, nu, nu, one,
792 $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
793 $ ldu, zero, wv, ldwv )
794 CALL zlacpy( 'ALL', jlen, nu, wv, ldwv,
795 $ z( jrow, incol+k1 ), ldz )
796 170 CONTINUE
797 END IF
798 END IF
799 180 CONTINUE
800*
801* ==== End of ZLAQR5 ====
802*
803 END
subroutine dlabad(small, large)
DLABAD
Definition dlabad.f:74
subroutine zlacpy(uplo, m, n, a, lda, b, ldb)
ZLACPY copies all or part of one two-dimensional array to another.
Definition zlacpy.f:103
subroutine zlarfg(n, alpha, x, incx, tau)
ZLARFG generates an elementary reflector (Householder matrix).
Definition zlarfg.f:106
subroutine zlaset(uplo, m, n, alpha, beta, a, lda)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition zlaset.f:106
subroutine zlaqr5(wantt, wantz, kacc22, n, ktop, kbot, nshfts, s, h, ldh, iloz, ihiz, z, ldz, v, ldv, u, ldu, nv, wv, ldwv, nh, wh, ldwh)
ZLAQR5 performs a single small-bulge multi-shift QR sweep.
Definition zlaqr5.f:257
subroutine zlaqr1(n, h, ldh, s1, s2, v)
ZLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition zlaqr1.f:107
subroutine ztrmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
ZTRMM
Definition ztrmm.f:177
subroutine zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
ZGEMM
Definition zgemm.f:187
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21