OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
dgghd3.f
Go to the documentation of this file.
1*> \brief \b DGGHD3
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download DGGHD3 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgghd3.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgghd3.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgghd3.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE DGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
22* LDQ, Z, LDZ, WORK, LWORK, INFO )
23*
24* .. Scalar Arguments ..
25* CHARACTER COMPQ, COMPZ
26* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
27* ..
28* .. Array Arguments ..
29* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
30* $ Z( LDZ, * ), WORK( * )
31* ..
32*
33*
34*> \par Purpose:
35* =============
36*>
37*> \verbatim
38*>
39*> DGGHD3 reduces a pair of real matrices (A,B) to generalized upper
40*> Hessenberg form using orthogonal transformations, where A is a
41*> general matrix and B is upper triangular. The form of the
42*> generalized eigenvalue problem is
43*> A*x = lambda*B*x,
44*> and B is typically made upper triangular by computing its QR
45*> factorization and moving the orthogonal matrix Q to the left side
46*> of the equation.
47*>
48*> This subroutine simultaneously reduces A to a Hessenberg matrix H:
49*> Q**T*A*Z = H
50*> and transforms B to another upper triangular matrix T:
51*> Q**T*B*Z = T
52*> in order to reduce the problem to its standard form
53*> H*y = lambda*T*y
54*> where y = Z**T*x.
55*>
56*> The orthogonal matrices Q and Z are determined as products of Givens
57*> rotations. They may either be formed explicitly, or they may be
58*> postmultiplied into input matrices Q1 and Z1, so that
59*>
60*> Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T
61*>
62*> Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T
63*>
64*> If Q1 is the orthogonal matrix from the QR factorization of B in the
65*> original equation A*x = lambda*B*x, then DGGHD3 reduces the original
66*> problem to generalized Hessenberg form.
67*>
68*> This is a blocked variant of DGGHRD, using matrix-matrix
69*> multiplications for parts of the computation to enhance performance.
70*> \endverbatim
71*
72* Arguments:
73* ==========
74*
75*> \param[in] COMPQ
76*> \verbatim
77*> COMPQ is CHARACTER*1
78*> = 'N': do not compute Q;
79*> = 'I': Q is initialized to the unit matrix, and the
80*> orthogonal matrix Q is returned;
81*> = 'V': Q must contain an orthogonal matrix Q1 on entry,
82*> and the product Q1*Q is returned.
83*> \endverbatim
84*>
85*> \param[in] COMPZ
86*> \verbatim
87*> COMPZ is CHARACTER*1
88*> = 'N': do not compute Z;
89*> = 'I': Z is initialized to the unit matrix, and the
90*> orthogonal matrix Z is returned;
91*> = 'V': Z must contain an orthogonal matrix Z1 on entry,
92*> and the product Z1*Z is returned.
93*> \endverbatim
94*>
95*> \param[in] N
96*> \verbatim
97*> N is INTEGER
98*> The order of the matrices A and B. N >= 0.
99*> \endverbatim
100*>
101*> \param[in] ILO
102*> \verbatim
103*> ILO is INTEGER
104*> \endverbatim
105*>
106*> \param[in] IHI
107*> \verbatim
108*> IHI is INTEGER
109*>
110*> ILO and IHI mark the rows and columns of A which are to be
111*> reduced. It is assumed that A is already upper triangular
112*> in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are
113*> normally set by a previous call to DGGBAL; otherwise they
114*> should be set to 1 and N respectively.
115*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
116*> \endverbatim
117*>
118*> \param[in,out] A
119*> \verbatim
120*> A is DOUBLE PRECISION array, dimension (LDA, N)
121*> On entry, the N-by-N general matrix to be reduced.
122*> On exit, the upper triangle and the first subdiagonal of A
123*> are overwritten with the upper Hessenberg matrix H, and the
124*> rest is set to zero.
125*> \endverbatim
126*>
127*> \param[in] LDA
128*> \verbatim
129*> LDA is INTEGER
130*> The leading dimension of the array A. LDA >= max(1,N).
131*> \endverbatim
132*>
133*> \param[in,out] B
134*> \verbatim
135*> B is DOUBLE PRECISION array, dimension (LDB, N)
136*> On entry, the N-by-N upper triangular matrix B.
137*> On exit, the upper triangular matrix T = Q**T B Z. The
138*> elements below the diagonal are set to zero.
139*> \endverbatim
140*>
141*> \param[in] LDB
142*> \verbatim
143*> LDB is INTEGER
144*> The leading dimension of the array B. LDB >= max(1,N).
145*> \endverbatim
146*>
147*> \param[in,out] Q
148*> \verbatim
149*> Q is DOUBLE PRECISION array, dimension (LDQ, N)
150*> On entry, if COMPQ = 'V', the orthogonal matrix Q1,
151*> typically from the QR factorization of B.
152*> On exit, if COMPQ='I', the orthogonal matrix Q, and if
153*> COMPQ = 'V', the product Q1*Q.
154*> Not referenced if COMPQ='N'.
155*> \endverbatim
156*>
157*> \param[in] LDQ
158*> \verbatim
159*> LDQ is INTEGER
160*> The leading dimension of the array Q.
161*> LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise.
162*> \endverbatim
163*>
164*> \param[in,out] Z
165*> \verbatim
166*> Z is DOUBLE PRECISION array, dimension (LDZ, N)
167*> On entry, if COMPZ = 'V', the orthogonal matrix Z1.
168*> On exit, if COMPZ='I', the orthogonal matrix Z, and if
169*> COMPZ = 'V', the product Z1*Z.
170*> Not referenced if COMPZ='N'.
171*> \endverbatim
172*>
173*> \param[in] LDZ
174*> \verbatim
175*> LDZ is INTEGER
176*> The leading dimension of the array Z.
177*> LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise.
178*> \endverbatim
179*>
180*> \param[out] WORK
181*> \verbatim
182*> WORK is DOUBLE PRECISION array, dimension (LWORK)
183*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
184*> \endverbatim
185*>
186*> \param[in] LWORK
187*> \verbatim
188*> LWORK is INTEGER
189*> The length of the array WORK. LWORK >= 1.
190*> For optimum performance LWORK >= 6*N*NB, where NB is the
191*> optimal blocksize.
192*>
193*> If LWORK = -1, then a workspace query is assumed; the routine
194*> only calculates the optimal size of the WORK array, returns
195*> this value as the first entry of the WORK array, and no error
196*> message related to LWORK is issued by XERBLA.
197*> \endverbatim
198*>
199*> \param[out] INFO
200*> \verbatim
201*> INFO is INTEGER
202*> = 0: successful exit.
203*> < 0: if INFO = -i, the i-th argument had an illegal value.
204*> \endverbatim
205*
206* Authors:
207* ========
208*
209*> \author Univ. of Tennessee
210*> \author Univ. of California Berkeley
211*> \author Univ. of Colorado Denver
212*> \author NAG Ltd.
213*
214*> \ingroup doubleOTHERcomputational
215*
216*> \par Further Details:
217* =====================
218*>
219*> \verbatim
220*>
221*> This routine reduces A to Hessenberg form and maintains B in triangular form
222*> using a blocked variant of Moler and Stewart's original algorithm,
223*> as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti
224*> (BIT 2008).
225*> \endverbatim
226*>
227* =====================================================================
228 SUBROUTINE dgghd3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
229 $ LDQ, Z, LDZ, WORK, LWORK, INFO )
230*
231* -- LAPACK computational routine --
232* -- LAPACK is a software package provided by Univ. of Tennessee, --
233* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
234*
235 IMPLICIT NONE
236*
237* .. Scalar Arguments ..
238 CHARACTER COMPQ, COMPZ
239 INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK
240* ..
241* .. Array Arguments ..
242 DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
243 $ z( ldz, * ), work( * )
244* ..
245*
246* =====================================================================
247*
248* .. Parameters ..
249 DOUBLE PRECISION ZERO, ONE
250 parameter( zero = 0.0d+0, one = 1.0d+0 )
251* ..
252* .. Local Scalars ..
253 LOGICAL BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ
254 CHARACTER*1 COMPQ2, COMPZ2
255 INTEGER COLA, I, IERR, J, J0, JCOL, JJ, JROW, K,
256 $ kacc22, len, lwkopt, n2nb, nb, nblst, nbmin,
257 $ nh, nnb, nx, ppw, ppwo, pw, top, topq
258 DOUBLE PRECISION C, C1, C2, S, S1, S2, TEMP, TEMP1, TEMP2, TEMP3
259* ..
260* .. External Functions ..
261 LOGICAL LSAME
262 INTEGER ILAENV
263 EXTERNAL ilaenv, lsame
264* ..
265* .. External Subroutines ..
266 EXTERNAL dgghrd, dlartg, dlaset, dorm22, drot, dgemm,
268* ..
269* .. Intrinsic Functions ..
270 INTRINSIC dble, max
271* ..
272* .. Executable Statements ..
273*
274* Decode and test the input parameters.
275*
276 info = 0
277 nb = ilaenv( 1, 'DGGHD3', ' ', n, ilo, ihi, -1 )
278 lwkopt = max( 6*n*nb, 1 )
279 work( 1 ) = dble( lwkopt )
280 initq = lsame( compq, 'I' )
281 wantq = initq .OR. lsame( compq, 'V' )
282 initz = lsame( compz, 'I' )
283 wantz = initz .OR. lsame( compz, 'V' )
284 lquery = ( lwork.EQ.-1 )
285*
286 IF( .NOT.lsame( compq, 'N' ) .AND. .NOT.wantq ) THEN
287 info = -1
288 ELSE IF( .NOT.lsame( compz, 'N' ) .AND. .NOT.wantz ) THEN
289 info = -2
290 ELSE IF( n.LT.0 ) THEN
291 info = -3
292 ELSE IF( ilo.LT.1 ) THEN
293 info = -4
294 ELSE IF( ihi.GT.n .OR. ihi.LT.ilo-1 ) THEN
295 info = -5
296 ELSE IF( lda.LT.max( 1, n ) ) THEN
297 info = -7
298 ELSE IF( ldb.LT.max( 1, n ) ) THEN
299 info = -9
300 ELSE IF( ( wantq .AND. ldq.LT.n ) .OR. ldq.LT.1 ) THEN
301 info = -11
302 ELSE IF( ( wantz .AND. ldz.LT.n ) .OR. ldz.LT.1 ) THEN
303 info = -13
304 ELSE IF( lwork.LT.1 .AND. .NOT.lquery ) THEN
305 info = -15
306 END IF
307 IF( info.NE.0 ) THEN
308 CALL xerbla( 'DGGHD3', -info )
309 RETURN
310 ELSE IF( lquery ) THEN
311 RETURN
312 END IF
313*
314* Initialize Q and Z if desired.
315*
316 IF( initq )
317 $ CALL dlaset( 'All', n, n, zero, one, q, ldq )
318 IF( initz )
319 $ CALL dlaset( 'All', n, n, zero, one, z, ldz )
320*
321* Zero out lower triangle of B.
322*
323 IF( n.GT.1 )
324 $ CALL dlaset( 'Lower', n-1, n-1, zero, zero, b(2, 1), ldb )
325*
326* Quick return if possible
327*
328 nh = ihi - ilo + 1
329 IF( nh.LE.1 ) THEN
330 work( 1 ) = one
331 RETURN
332 END IF
333*
334* Determine the blocksize.
335*
336 nbmin = ilaenv( 2, 'DGGHD3', ' ', n, ilo, ihi, -1 )
337 IF( nb.GT.1 .AND. nb.LT.nh ) THEN
338*
339* Determine when to use unblocked instead of blocked code.
340*
341 nx = max( nb, ilaenv( 3, 'DGGHD3', ' ', n, ilo, ihi, -1 ) )
342 IF( nx.LT.nh ) THEN
343*
344* Determine if workspace is large enough for blocked code.
345*
346 IF( lwork.LT.lwkopt ) THEN
347*
348* Not enough workspace to use optimal NB: determine the
349* minimum value of NB, and reduce NB or force use of
350* unblocked code.
351*
352 nbmin = max( 2, ilaenv( 2, 'dgghd3', ' ', N, ILO, IHI,
353 $ -1 ) )
354.GE. IF( LWORK6*N*NBMIN ) THEN
355 NB = LWORK / ( 6*N )
356 ELSE
357 NB = 1
358 END IF
359 END IF
360 END IF
361 END IF
362*
363.LT..OR..GE. IF( NBNBMIN NBNH ) THEN
364*
365* Use unblocked code below
366*
367 JCOL = ILO
368*
369 ELSE
370*
371* Use blocked code
372*
373 KACC22 = ILAENV( 16, 'dgghd3', ' ', N, ILO, IHI, -1 )
374.EQ. BLK22 = KACC222
375 DO JCOL = ILO, IHI-2, NB
376 NNB = MIN( NB, IHI-JCOL-1 )
377*
378* Initialize small orthogonal factors that will hold the
379* accumulated Givens rotations in workspace.
380* N2NB denotes the number of 2*NNB-by-2*NNB factors
381* NBLST denotes the (possibly smaller) order of the last
382* factor.
383*
384 N2NB = ( IHI-JCOL-1 ) / NNB - 1
385 NBLST = IHI - JCOL - N2NB*NNB
386 CALL DLASET( 'all', NBLST, NBLST, ZERO, ONE, WORK, NBLST )
387 PW = NBLST * NBLST + 1
388 DO I = 1, N2NB
389 CALL DLASET( 'all', 2*NNB, 2*NNB, ZERO, ONE,
390 $ WORK( PW ), 2*NNB )
391 PW = PW + 4*NNB*NNB
392 END DO
393*
394* Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form.
395*
396 DO J = JCOL, JCOL+NNB-1
397*
398* Reduce Jth column of A. Store cosines and sines in Jth
399* column of A and B, respectively.
400*
401 DO I = IHI, J+2, -1
402 TEMP = A( I-1, J )
403 CALL DLARTG( TEMP, A( I, J ), C, S, A( I-1, J ) )
404 A( I, J ) = C
405 B( I, J ) = S
406 END DO
407*
408* Accumulate Givens rotations into workspace array.
409*
410 PPW = ( NBLST + 1 )*( NBLST - 2 ) - J + JCOL + 1
411 LEN = 2 + J - JCOL
412 JROW = J + N2NB*NNB + 2
413 DO I = IHI, JROW, -1
414 C = A( I, J )
415 S = B( I, J )
416 DO JJ = PPW, PPW+LEN-1
417 TEMP = WORK( JJ + NBLST )
418 WORK( JJ + NBLST ) = C*TEMP - S*WORK( JJ )
419 WORK( JJ ) = S*TEMP + C*WORK( JJ )
420 END DO
421 LEN = LEN + 1
422 PPW = PPW - NBLST - 1
423 END DO
424*
425 PPWO = NBLST*NBLST + ( NNB+J-JCOL-1 )*2*NNB + NNB
426 J0 = JROW - NNB
427 DO JROW = J0, J+2, -NNB
428 PPW = PPWO
429 LEN = 2 + J - JCOL
430 DO I = JROW+NNB-1, JROW, -1
431 C = A( I, J )
432 S = B( I, J )
433 DO JJ = PPW, PPW+LEN-1
434 TEMP = WORK( JJ + 2*NNB )
435 WORK( JJ + 2*NNB ) = C*TEMP - S*WORK( JJ )
436 WORK( JJ ) = S*TEMP + C*WORK( JJ )
437 END DO
438 LEN = LEN + 1
439 PPW = PPW - 2*NNB - 1
440 END DO
441 PPWO = PPWO + 4*NNB*NNB
442 END DO
443*
444* TOP denotes the number of top rows in A and B that will
445* not be updated during the next steps.
446*
447.LE. IF( JCOL2 ) THEN
448 TOP = 0
449 ELSE
450 TOP = JCOL
451 END IF
452*
453* Propagate transformations through B and replace stored
454* left sines/cosines by right sines/cosines.
455*
456 DO JJ = N, J+1, -1
457*
458* Update JJth column of B.
459*
460 DO I = MIN( JJ+1, IHI ), J+2, -1
461 C = A( I, J )
462 S = B( I, J )
463 TEMP = B( I, JJ )
464 B( I, JJ ) = C*TEMP - S*B( I-1, JJ )
465 B( I-1, JJ ) = S*TEMP + C*B( I-1, JJ )
466 END DO
467*
468* Annihilate B( JJ+1, JJ ).
469*
470.LT. IF( JJIHI ) THEN
471 TEMP = B( JJ+1, JJ+1 )
472 CALL DLARTG( TEMP, B( JJ+1, JJ ), C, S,
473 $ B( JJ+1, JJ+1 ) )
474 B( JJ+1, JJ ) = ZERO
475 CALL DROT( JJ-TOP, B( TOP+1, JJ+1 ), 1,
476 $ B( TOP+1, JJ ), 1, C, S )
477 A( JJ+1, J ) = C
478 B( JJ+1, J ) = -S
479 END IF
480 END DO
481*
482* Update A by transformations from right.
483* Explicit loop unrolling provides better performance
484* compared to DLASR.
485* CALL DLASR( 'Right', 'Variable', 'Backward', IHI-TOP,
486* $ IHI-J, A( J+2, J ), B( J+2, J ),
487* $ A( TOP+1, J+1 ), LDA )
488*
489 JJ = MOD( IHI-J-1, 3 )
490 DO I = IHI-J-3, JJ+1, -3
491 C = A( J+1+I, J )
492 S = -B( J+1+I, J )
493 C1 = A( J+2+I, J )
494 S1 = -B( J+2+I, J )
495 C2 = A( J+3+I, J )
496 S2 = -B( J+3+I, J )
497*
498 DO K = TOP+1, IHI
499 TEMP = A( K, J+I )
500 TEMP1 = A( K, J+I+1 )
501 TEMP2 = A( K, J+I+2 )
502 TEMP3 = A( K, J+I+3 )
503 A( K, J+I+3 ) = C2*TEMP3 + S2*TEMP2
504 TEMP2 = -S2*TEMP3 + C2*TEMP2
505 A( K, J+I+2 ) = C1*TEMP2 + S1*TEMP1
506 TEMP1 = -S1*TEMP2 + C1*TEMP1
507 A( K, J+I+1 ) = C*TEMP1 + S*TEMP
508 A( K, J+I ) = -S*TEMP1 + C*TEMP
509 END DO
510 END DO
511*
512.GT. IF( JJ0 ) THEN
513 DO I = JJ, 1, -1
514 CALL DROT( IHI-TOP, A( TOP+1, J+I+1 ), 1,
515 $ A( TOP+1, J+I ), 1, A( J+1+I, J ),
516 $ -B( J+1+I, J ) )
517 END DO
518 END IF
519*
520* Update (J+1)th column of A by transformations from left.
521*
522.LT. IF ( J JCOL + NNB - 1 ) THEN
523 LEN = 1 + J - JCOL
524*
525* Multiply with the trailing accumulated orthogonal
526* matrix, which takes the form
527*
528* [ U11 U12 ]
529* U = [ ],
530* [ U21 U22 ]
531*
532* where U21 is a LEN-by-LEN matrix and U12 is lower
533* triangular.
534*
535 JROW = IHI - NBLST + 1
536 CALL DGEMV( 'transpose', NBLST, LEN, ONE, WORK,
537 $ NBLST, A( JROW, J+1 ), 1, ZERO,
538 $ WORK( PW ), 1 )
539 PPW = PW + LEN
540 DO I = JROW, JROW+NBLST-LEN-1
541 WORK( PPW ) = A( I, J+1 )
542 PPW = PPW + 1
543 END DO
544 CALL DTRMV( 'lower', 'transpose', 'non-unit',
545 $ NBLST-LEN, WORK( LEN*NBLST + 1 ), NBLST,
546 $ WORK( PW+LEN ), 1 )
547 CALL DGEMV( 'transpose', LEN, NBLST-LEN, ONE,
548 $ WORK( (LEN+1)*NBLST - LEN + 1 ), NBLST,
549 $ A( JROW+NBLST-LEN, J+1 ), 1, ONE,
550 $ WORK( PW+LEN ), 1 )
551 PPW = PW
552 DO I = JROW, JROW+NBLST-1
553 A( I, J+1 ) = WORK( PPW )
554 PPW = PPW + 1
555 END DO
556*
557* Multiply with the other accumulated orthogonal
558* matrices, which take the form
559*
560* [ U11 U12 0 ]
561* [ ]
562* U = [ U21 U22 0 ],
563* [ ]
564* [ 0 0 I ]
565*
566* where I denotes the (NNB-LEN)-by-(NNB-LEN) identity
567* matrix, U21 is a LEN-by-LEN upper triangular matrix
568* and U12 is an NNB-by-NNB lower triangular matrix.
569*
570 PPWO = 1 + NBLST*NBLST
571 J0 = JROW - NNB
572 DO JROW = J0, JCOL+1, -NNB
573 PPW = PW + LEN
574 DO I = JROW, JROW+NNB-1
575 WORK( PPW ) = A( I, J+1 )
576 PPW = PPW + 1
577 END DO
578 PPW = PW
579 DO I = JROW+NNB, JROW+NNB+LEN-1
580 WORK( PPW ) = A( I, J+1 )
581 PPW = PPW + 1
582 END DO
583 CALL DTRMV( 'upper', 'transpose', 'non-unit', LEN,
584 $ WORK( PPWO + NNB ), 2*NNB, WORK( PW ),
585 $ 1 )
586 CALL DTRMV( 'lower', 'transpose', 'non-unit', NNB,
587 $ WORK( PPWO + 2*LEN*NNB ),
588 $ 2*NNB, WORK( PW + LEN ), 1 )
589 CALL DGEMV( 'transpose', NNB, LEN, ONE,
590 $ WORK( PPWO ), 2*NNB, A( JROW, J+1 ), 1,
591 $ ONE, WORK( PW ), 1 )
592 CALL DGEMV( 'transpose', LEN, NNB, ONE,
593 $ WORK( PPWO + 2*LEN*NNB + NNB ), 2*NNB,
594 $ A( JROW+NNB, J+1 ), 1, ONE,
595 $ WORK( PW+LEN ), 1 )
596 PPW = PW
597 DO I = JROW, JROW+LEN+NNB-1
598 A( I, J+1 ) = WORK( PPW )
599 PPW = PPW + 1
600 END DO
601 PPWO = PPWO + 4*NNB*NNB
602 END DO
603 END IF
604 END DO
605*
606* Apply accumulated orthogonal matrices to A.
607*
608 COLA = N - JCOL - NNB + 1
609 J = IHI - NBLST + 1
610 CALL DGEMM( 'transpose', 'no transpose', NBLST,
611 $ COLA, NBLST, ONE, WORK, NBLST,
612 $ A( J, JCOL+NNB ), LDA, ZERO, WORK( PW ),
613 $ NBLST )
614 CALL DLACPY( 'all', NBLST, COLA, WORK( PW ), NBLST,
615 $ A( J, JCOL+NNB ), LDA )
616 PPWO = NBLST*NBLST + 1
617 J0 = J - NNB
618 DO J = J0, JCOL+1, -NNB
619 IF ( BLK22 ) THEN
620*
621* Exploit the structure of
622*
623* [ U11 U12 ]
624* U = [ ]
625* [ U21 U22 ],
626*
627* where all blocks are NNB-by-NNB, U21 is upper
628* triangular and U12 is lower triangular.
629*
630 CALL DORM22( 'left', 'transpose', 2*NNB, COLA, NNB,
631 $ NNB, WORK( PPWO ), 2*NNB,
632 $ A( J, JCOL+NNB ), LDA, WORK( PW ),
633 $ LWORK-PW+1, IERR )
634 ELSE
635*
636* Ignore the structure of U.
637*
638 CALL DGEMM( 'transpose', 'no transpose', 2*NNB,
639 $ COLA, 2*NNB, ONE, WORK( PPWO ), 2*NNB,
640 $ A( J, JCOL+NNB ), LDA, ZERO, WORK( PW ),
641 $ 2*NNB )
642 CALL DLACPY( 'all', 2*NNB, COLA, WORK( PW ), 2*NNB,
643 $ A( J, JCOL+NNB ), LDA )
644 END IF
645 PPWO = PPWO + 4*NNB*NNB
646 END DO
647*
648* Apply accumulated orthogonal matrices to Q.
649*
650 IF( WANTQ ) THEN
651 J = IHI - NBLST + 1
652 IF ( INITQ ) THEN
653 TOPQ = MAX( 2, J - JCOL + 1 )
654 NH = IHI - TOPQ + 1
655 ELSE
656 TOPQ = 1
657 NH = N
658 END IF
659 CALL DGEMM( 'no transpose', 'no transpose', NH,
660 $ NBLST, NBLST, ONE, Q( TOPQ, J ), LDQ,
661 $ WORK, NBLST, ZERO, WORK( PW ), NH )
662 CALL DLACPY( 'all', NH, NBLST, WORK( PW ), NH,
663 $ Q( TOPQ, J ), LDQ )
664 PPWO = NBLST*NBLST + 1
665 J0 = J - NNB
666 DO J = J0, JCOL+1, -NNB
667 IF ( INITQ ) THEN
668 TOPQ = MAX( 2, J - JCOL + 1 )
669 NH = IHI - TOPQ + 1
670 END IF
671 IF ( BLK22 ) THEN
672*
673* Exploit the structure of U.
674*
675 CALL DORM22( 'right', 'no transpose', NH, 2*NNB,
676 $ NNB, NNB, WORK( PPWO ), 2*NNB,
677 $ Q( TOPQ, J ), LDQ, WORK( PW ),
678 $ LWORK-PW+1, IERR )
679 ELSE
680*
681* Ignore the structure of U.
682*
683 CALL DGEMM( 'no transpose', 'no transpose', NH,
684 $ 2*NNB, 2*NNB, ONE, Q( TOPQ, J ), LDQ,
685 $ WORK( PPWO ), 2*NNB, ZERO, WORK( PW ),
686 $ NH )
687 CALL DLACPY( 'all', NH, 2*NNB, WORK( PW ), NH,
688 $ Q( TOPQ, J ), LDQ )
689 END IF
690 PPWO = PPWO + 4*NNB*NNB
691 END DO
692 END IF
693*
694* Accumulate right Givens rotations if required.
695*
696.OR..GT. IF ( WANTZ TOP0 ) THEN
697*
698* Initialize small orthogonal factors that will hold the
699* accumulated Givens rotations in workspace.
700*
701 CALL DLASET( 'all', NBLST, NBLST, ZERO, ONE, WORK,
702 $ NBLST )
703 PW = NBLST * NBLST + 1
704 DO I = 1, N2NB
705 CALL DLASET( 'all', 2*NNB, 2*NNB, ZERO, ONE,
706 $ WORK( PW ), 2*NNB )
707 PW = PW + 4*NNB*NNB
708 END DO
709*
710* Accumulate Givens rotations into workspace array.
711*
712 DO J = JCOL, JCOL+NNB-1
713 PPW = ( NBLST + 1 )*( NBLST - 2 ) - J + JCOL + 1
714 LEN = 2 + J - JCOL
715 JROW = J + N2NB*NNB + 2
716 DO I = IHI, JROW, -1
717 C = A( I, J )
718 A( I, J ) = ZERO
719 S = B( I, J )
720 B( I, J ) = ZERO
721 DO JJ = PPW, PPW+LEN-1
722 TEMP = WORK( JJ + NBLST )
723 WORK( JJ + NBLST ) = C*TEMP - S*WORK( JJ )
724 WORK( JJ ) = S*TEMP + C*WORK( JJ )
725 END DO
726 LEN = LEN + 1
727 PPW = PPW - NBLST - 1
728 END DO
729*
730 PPWO = NBLST*NBLST + ( NNB+J-JCOL-1 )*2*NNB + NNB
731 J0 = JROW - NNB
732 DO JROW = J0, J+2, -NNB
733 PPW = PPWO
734 LEN = 2 + J - JCOL
735 DO I = JROW+NNB-1, JROW, -1
736 C = A( I, J )
737 A( I, J ) = ZERO
738 S = B( I, J )
739 B( I, J ) = ZERO
740 DO JJ = PPW, PPW+LEN-1
741 TEMP = WORK( JJ + 2*NNB )
742 WORK( JJ + 2*NNB ) = C*TEMP - S*WORK( JJ )
743 WORK( JJ ) = S*TEMP + C*WORK( JJ )
744 END DO
745 LEN = LEN + 1
746 PPW = PPW - 2*NNB - 1
747 END DO
748 PPWO = PPWO + 4*NNB*NNB
749 END DO
750 END DO
751 ELSE
752*
753 CALL DLASET( 'lower', IHI - JCOL - 1, NNB, ZERO, ZERO,
754 $ A( JCOL + 2, JCOL ), LDA )
755 CALL DLASET( 'lower', IHI - JCOL - 1, NNB, ZERO, ZERO,
756 $ B( JCOL + 2, JCOL ), LDB )
757 END IF
758*
759* Apply accumulated orthogonal matrices to A and B.
760*
761.GT. IF ( TOP0 ) THEN
762 J = IHI - NBLST + 1
763 CALL DGEMM( 'no transpose', 'no transpose', TOP,
764 $ NBLST, NBLST, ONE, A( 1, J ), LDA,
765 $ WORK, NBLST, ZERO, WORK( PW ), TOP )
766 CALL DLACPY( 'all', TOP, NBLST, WORK( PW ), TOP,
767 $ A( 1, J ), LDA )
768 PPWO = NBLST*NBLST + 1
769 J0 = J - NNB
770 DO J = J0, JCOL+1, -NNB
771 IF ( BLK22 ) THEN
772*
773* Exploit the structure of U.
774*
775 CALL DORM22( 'right', 'no transpose', TOP, 2*NNB,
776 $ NNB, NNB, WORK( PPWO ), 2*NNB,
777 $ A( 1, J ), LDA, WORK( PW ),
778 $ LWORK-PW+1, IERR )
779 ELSE
780*
781* Ignore the structure of U.
782*
783 CALL DGEMM( 'no transpose', 'No Transpose', top,
784 $ 2*nnb, 2*nnb, one, a( 1, j ), lda,
785 $ work( ppwo ), 2*nnb, zero,
786 $ work( pw ), top )
787 CALL dlacpy( 'All', top, 2*nnb, work( pw ), top,
788 $ a( 1, j ), lda )
789 END IF
790 ppwo = ppwo + 4*nnb*nnb
791 END DO
792*
793 j = ihi - nblst + 1
794 CALL dgemm( 'No Transpose', 'No Transpose', top,
795 $ nblst, nblst, one, b( 1, j ), ldb,
796 $ work, nblst, zero, work( pw ), top )
797 CALL dlacpy( 'All', top, nblst, work( pw ), top,
798 $ b( 1, j ), ldb )
799 ppwo = nblst*nblst + 1
800 j0 = j - nnb
801 DO j = j0, jcol+1, -nnb
802 IF ( blk22 ) THEN
803*
804* Exploit the structure of U.
805*
806 CALL dorm22( 'Right', 'No Transpose', top, 2*nnb,
807 $ nnb, nnb, work( ppwo ), 2*nnb,
808 $ b( 1, j ), ldb, work( pw ),
809 $ lwork-pw+1, ierr )
810 ELSE
811*
812* Ignore the structure of U.
813*
814 CALL dgemm( 'No Transpose', 'no transpose', TOP,
815 $ 2*NNB, 2*NNB, ONE, B( 1, J ), LDB,
816 $ WORK( PPWO ), 2*NNB, ZERO,
817 $ WORK( PW ), TOP )
818 CALL DLACPY( 'all', TOP, 2*NNB, WORK( PW ), TOP,
819 $ B( 1, J ), LDB )
820 END IF
821 PPWO = PPWO + 4*NNB*NNB
822 END DO
823 END IF
824*
825* Apply accumulated orthogonal matrices to Z.
826*
827 IF( WANTZ ) THEN
828 J = IHI - NBLST + 1
829 IF ( INITQ ) THEN
830 TOPQ = MAX( 2, J - JCOL + 1 )
831 NH = IHI - TOPQ + 1
832 ELSE
833 TOPQ = 1
834 NH = N
835 END IF
836 CALL DGEMM( 'no transpose', 'no transpose', NH,
837 $ NBLST, NBLST, ONE, Z( TOPQ, J ), LDZ,
838 $ WORK, NBLST, ZERO, WORK( PW ), NH )
839 CALL DLACPY( 'all', NH, NBLST, WORK( PW ), NH,
840 $ Z( TOPQ, J ), LDZ )
841 PPWO = NBLST*NBLST + 1
842 J0 = J - NNB
843 DO J = J0, JCOL+1, -NNB
844 IF ( INITQ ) THEN
845 TOPQ = MAX( 2, J - JCOL + 1 )
846 NH = IHI - TOPQ + 1
847 END IF
848 IF ( BLK22 ) THEN
849*
850* Exploit the structure of U.
851*
852 CALL DORM22( 'right', 'no transpose', NH, 2*NNB,
853 $ NNB, NNB, WORK( PPWO ), 2*NNB,
854 $ Z( TOPQ, J ), LDZ, WORK( PW ),
855 $ LWORK-PW+1, IERR )
856 ELSE
857*
858* Ignore the structure of U.
859*
860 CALL DGEMM( 'no transpose', 'no transpose', NH,
861 $ 2*NNB, 2*NNB, ONE, Z( TOPQ, J ), LDZ,
862 $ WORK( PPWO ), 2*NNB, ZERO, WORK( PW ),
863 $ NH )
864 CALL DLACPY( 'all', NH, 2*NNB, WORK( PW ), NH,
865 $ Z( TOPQ, J ), LDZ )
866 END IF
867 PPWO = PPWO + 4*NNB*NNB
868 END DO
869 END IF
870 END DO
871 END IF
872*
873* Use unblocked code to reduce the rest of the matrix
874* Avoid re-initialization of modified Q and Z.
875*
876 COMPQ2 = COMPQ
877 COMPZ2 = COMPZ
878.NE. IF ( JCOLILO ) THEN
879 IF ( WANTQ )
880 $ COMPQ2 = 'v'
881 IF ( WANTZ )
882 $ COMPZ2 = 'v'
883 END IF
884*
885.LT. IF ( JCOLIHI )
886 $ CALL DGGHRD( COMPQ2, COMPZ2, N, JCOL, IHI, A, LDA, B, LDB, Q,
887 $ LDQ, Z, LDZ, IERR )
888 WORK( 1 ) = DBLE( LWKOPT )
889*
890 RETURN
891*
892* End of DGGHD3
893*
894 END
subroutine dlartg(f, g, c, s, r)
DLARTG generates a plane rotation with real cosine and real sine.
Definition dlartg.f90:113
subroutine dlacpy(uplo, m, n, a, lda, b, ldb)
DLACPY copies all or part of one two-dimensional array to another.
Definition dlacpy.f:103
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition dlaset.f:110
subroutine xerbla(srname, info)
XERBLA
Definition xerbla.f:60
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
subroutine dorm22(side, trans, m, n, n1, n2, q, ldq, c, ldc, work, lwork, info)
DORM22 multiplies a general matrix by a banded orthogonal matrix.
Definition dorm22.f:163
subroutine dgghrd(compq, compz, n, ilo, ihi, a, lda, b, ldb, q, ldq, z, ldz, info)
DGGHRD
Definition dgghrd.f:207
subroutine dgghd3(compq, compz, n, ilo, ihi, a, lda, b, ldb, q, ldq, z, ldz, work, lwork, info)
DGGHD3
Definition dgghd3.f:230
subroutine drot(n, dx, incx, dy, incy, c, s)
DROT
Definition drot.f:92
subroutine dgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
DGEMV
Definition dgemv.f:156
subroutine dtrmv(uplo, trans, diag, n, a, lda, x, incx)
DTRMV
Definition dtrmv.f:147
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:187
#define max(a, b)
Definition macros.h:21