OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pzhettrd.f File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine pzhettrd (uplo, n, a, ia, ja, desca, d, e, tau, work, lwork, info)

Function/Subroutine Documentation

◆ pzhettrd()

subroutine pzhettrd ( character uplo,
integer n,
complex*16, dimension( * ) a,
integer ia,
integer ja,
integer, dimension( * ) desca,
double precision, dimension( * ) d,
double precision, dimension( * ) e,
complex*16, dimension( * ) tau,
complex*16, dimension( * ) work,
integer lwork,
integer info )

Definition at line 1 of file pzhettrd.f.

3*
4* -- ScaLAPACK routine (version 2.0.2) --
5* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6* May 1 2012
7*
8* .. Scalar Arguments ..
9 CHARACTER UPLO
10 INTEGER IA, INFO, JA, LWORK, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * )
14 DOUBLE PRECISION D( * ), E( * )
15 COMPLEX*16 A( * ), TAU( * ), WORK( * )
16* ..
17*
18* Purpose
19*
20* =======
21*
22* PZHETTRD reduces a complex Hermitian matrix sub( A ) to Hermitian
23* tridiagonal form T by an unitary similarity transformation:
24* Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
25*
26* Notes
27* =====
28*
29* Each global data object is described by an associated description
30* vector. This vector stores the information required to establish
31* the mapping between an object element and its corresponding
32* process and memory location.
33*
34* Let A be a generic term for any 2D block cyclicly distributed
35* array.
36* Such a global array has an associated description vector DESCA.
37* In the following comments, the character _ should be read as
38* "of the global array".
39*
40* NOTATION STORED IN EXPLANATION
41* --------------- -------------- -----------------------------------
42* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
43* DTYPE_A = 1.
44* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle,
45* indicating the BLACS process grid A is distribu-
46* ted over. The context itself is glo-
47* bal, but the handle (the integer
48* value) may vary.
49* M_A (global) DESCA( M_ ) The number of rows in the global
50* array A.
51* N_A (global) DESCA( N_ ) The number of columns in the global
52* array A.
53* MB_A (global) DESCA( MB_ ) The blocking factor used to
54* distribute the rows of the array.
55* NB_A (global) DESCA( NB_ ) The blocking factor used to
56* distribute the columns of the array.
57* RSRC_A (global) DESCA( RSRC_ ) The process row over which the
58* first row of the array A is distributed.
59* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
60* first column of the array A is
61* distributed.
62* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
63* array. LLD_A >= MAX(1,LOCp(M_A)).
64*
65* Let K be the number of rows or columns of a distributed matrix,
66* and assume that its process grid has dimension p x q.
67* LOCp( K ) denotes the number of elements of K that a process
68* would receive if K were distributed over the p processes of its
69* process column.
70* Similarly, LOCq( K ) denotes the number of elements of K that a
71* process would receive if K were distributed over the q processes
72* of its process row.
73* The values of LOCp() and LOCq() may be determined via a call to
74* the ScaLAPACK tool function, NUMROC:
75* LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
76* LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
77* An upper bound for these quantities may be computed by:
78* LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
79* LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
80*
81* Arguments
82* =========
83*
84* UPLO (global input) CHARACTER
85* Specifies whether the upper or lower triangular part of the
86* Hermitian matrix sub( A ) is stored:
87* = 'U': Upper triangular
88* = 'L': Lower triangular
89*
90* N (global input) INTEGER
91* The number of rows and columns to be operated on, i.e. the
92* order of the distributed submatrix sub( A ). N >= 0.
93*
94* A (local input/local output) COMPLEX*16 pointer into the
95* local memory to an array of dimension (LLD_A,LOCq(JA+N-1)).
96* On entry, this array contains the local pieces of the
97* Hermitian distributed matrix sub( A ). If UPLO = 'U', the
98* leading N-by-N upper triangular part of sub( A ) contains
99* the upper triangular part of the matrix, and its strictly
100* lower triangular part is not referenced. If UPLO = 'L', the
101* leading N-by-N lower triangular part of sub( A ) contains the
102* lower triangular part of the matrix, and its strictly upper
103* triangular part is not referenced. On exit, if UPLO = 'U',
104* the diagonal and first superdiagonal of sub( A ) are over-
105* written by the corresponding elements of the tridiagonal
106* matrix T, and the elements above the first superdiagonal,
107* with the array TAU, represent the unitary matrix Q as a
108* product of elementary reflectors; if UPLO = 'L', the diagonal
109* and first subdiagonal of sub( A ) are overwritten by the
110* corresponding elements of the tridiagonal matrix T, and the
111* elements below the first subdiagonal, with the array TAU,
112* represent the unitary matrix Q as a product of elementary
113* reflectors. See Further Details.
114*
115* IA (global input) INTEGER
116* The row index in the global array A indicating the first
117* row of sub( A ).
118*
119* JA (global input) INTEGER
120* The column index in the global array A indicating the
121* first column of sub( A ).
122*
123* DESCA (global and local input) INTEGER array of dimension DLEN_.
124* The array descriptor for the distributed matrix A.
125*
126* D (local output) DOUBLE PRECISION array, dim LOCq(JA+N-1)
127* The diagonal elements of the tridiagonal matrix T:
128* D(i) = A(i,i). D is tied to the distributed matrix A.
129*
130* E (local output) DOUBLE PRECISION array, dim LOCq(JA+N-1)
131* if UPLO = 'U', LOCq(JA+N-2) otherwise. The off-diagonal
132* elements of the tridiagonal matrix T: E(i) = A(i,i+1) if
133* UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the
134* distributed matrix A.
135*
136* TAU (local output) COMPLEX*16, array, dimension
137* LOCq(JA+N-1). This array contains the scalar factors TAU of
138* the elementary reflectors. TAU is tied to the distributed
139* matrix A.
140*
141* WORK (local workspace) COMPLEX*16 array, dimension (LWORK)
142* On exit, WORK( 1 ) returns the minimal and optimal workspace
143*
144* LWORK (local input) INTEGER
145* The dimension of the array WORK.
146* LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + NPS
147* Where:
148* NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB )
149* ANB = PJLAENV( DESCA( CTXT_ ), 3, 'PZHETTRD', 'L', 0, 0,
150* 0, 0 )
151*
152* NUMROC is a ScaLAPACK tool function;
153* PJLAENV is a ScaLAPACK envionmental inquiry function
154* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
155* the subroutine BLACS_GRIDINFO.
156*
157* INFO (global output) INTEGER
158* = 0: successful exit
159* < 0: If the i-th argument is an array and the j-entry had
160* an illegal value, then INFO = -(i*100+j), if the i-th
161* argument is a scalar and had an illegal value, then
162* INFO = -i.
163*
164* Further Details
165* ===============
166*
167* If UPLO = 'U', the matrix Q is represented as a product of
168* elementary reflectors
169*
170* Q = H(n-1) . . . H(2) H(1).
171*
172* Each H(i) has the form
173*
174* H(i) = I - tau * v * v'
175*
176* where tau is a complex scalar, and v is a complex vector with
177* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
178* A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1).
179*
180* If UPLO = 'L', the matrix Q is represented as a product of
181* elementary reflectors
182*
183* Q = H(1) H(2) . . . H(n-1).
184*
185* Each H(i) has the form
186*
187* H(i) = I - tau * v * v'
188*
189* where tau is a complex scalar, and v is a complex vector with
190* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in
191* A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
192*
193* The contents of sub( A ) on exit are illustrated by the following
194* examples with n = 5:
195*
196* if UPLO = 'U': if UPLO = 'L':
197*
198* ( d e v2 v3 v4 ) ( d )
199* ( d e v3 v4 ) ( e d )
200* ( d e v4 ) ( v1 e d )
201* ( d e ) ( v1 v2 e d )
202* ( d ) ( v1 v2 v3 e d )
203*
204* where d and e denote diagonal and off-diagonal elements of T, and
205* vi denotes an element of the vector defining H(i).
206*
207* Data storage requirements
208* =========================
209*
210* PZHETTRD is not intended to be called directly. All users are
211* encourage to call PZHETRD which will then call PZHETTRD if
212* appropriate. A must be in cyclic format (i.e. MB = NB = 1),
213* the process grid must be square ( i.e. NPROW = NPCOL ) and
214* only lower triangular storage is supported.
215*
216* Local variables
217* ===============
218*
219* PZHETTRD uses five local arrays:
220* WORK ( InV ) dimension ( NP, ANB+1): array V
221* WORK ( InH ) dimension ( NP, ANB+1): array H
222* WORK ( InVT ) dimension ( NQ, ANB+1): transpose of the array V
223* WORK ( InHT ) dimension ( NQ, ANB+1): transpose of the array H
224* WORK ( InVTT ) dimension ( NQ, 1): transpose of the array VT
225*
226* Arrays V and H are replicated across all processor columns.
227* Arrays V^T and H^T are replicated across all processor rows.
228*
229* WORK ( InVT ), or V^T, is stored as a tall skinny
230* array ( NQ x ANB-1 ) for efficiency. Since only the lower
231* triangular portion of A is updated, Av is computed as:
232* tril(A) * v + v^T * tril(A,-1). This is performed as
233* two local triangular matrix-vector multiplications (both in
234* MVR2) followed by a transpose and a sum across the columns.
235* In the local computation, WORK( InVT ) is used to compute
236* tril(A) * v and WORK( InV ) is used to compute
237* v^T * tril(A,-1)
238*
239* The following variables are global indices into A:
240* INDEX: The current global row and column number.
241* MAXINDEX: The global row and column for the first row and
242* column in the trailing block of A.
243* LIIB, LIJB: The first row, column in
244*
245* The following variables point into the arrays A, V, H, V^T, H^T:
246* BINDEX =INDEX-MININDEX: The column index in V, H, V^T, H^T.
247* LII: local index I: The local row number for row INDEX
248* LIJ: local index J: The local column number for column INDEX
249* LIIP1: local index I+1: The local row number for row INDEX+1
250* LIJP1: local index J+1: The local col number for col INDEX+1
251* LTLI: lower triangular local index I: The local row for the
252* upper left entry in tril( A(INDEX, INDEX) )
253* LTLIP1: lower triangular local index I+1: The local row for the
254* upper left entry in tril( A(INDEX+1, INDEX+1) )
255*
256* Details: The distinction between LII and LTLI (and between
257* LIIP1 and LTLIP1) is subtle. Within the current processor
258* column (i.e. MYCOL .eq. CURCOL) they are the same. However,
259* on some processors, A( LII, LIJ ) points to an element
260* above the diagonal, on these processors, LTLI = LII+1.
261*
262* The following variables give the number of rows and/or columns
263* in various matrices:
264* NP: The number of local rows in A( 1:N, 1:N )
265* NQ: The number of local columns in A( 1:N, 1:N )
266* NPM0: The number of local rows in A( INDEX:N, INDEX:N )
267* NQM0: The number of local columns in A( INDEX:N, INDEX:N )
268* NPM1: The number of local rows in A( INDEX+1:N, INDEX:N )
269* NQM1: The number of local columns in A( INDEX+1:N, INDEX:N )
270* LTNM0: The number of local rows & columns in
271* tril( A( INDEX:N, INDEX:N ) )
272* LTNM1: The number of local rows & columns in
273* tril( A( INDEX+1:N, INDEX+1:N ) )
274* NOTE: LTNM0 == LTNM1 on all processors except the diagonal
275* processors, i.e. those where MYCOL == MYROW.
276*
277* Invariants:
278* NP = NPM0 + LII - 1
279* NQ = NQM0 + LIJ - 1
280* NP = NPM1 + LIIP1 - 1
281* NQ = NQM1 + LIJP1 - 1
282* NP = LTLI + LTNM0 - 1
283* NP = LTLIP1 + LTNM1 - 1
284*
285* Temporary variables. The following variables are used within
286* a few lines after they are set and do hold state from one loop
287* iteration to the next:
288*
289* The matrix A:
290* The matrix A does not hold the same values that it would
291* in an unblocked code nor the values that it would hold in
292* in a blocked code.
293*
294* The value of A is confusing. It is easiest to state the
295* difference between trueA and A at the point that MVR2 is called,
296* so we will start there.
297*
298* Let trueA be the value that A would
299* have at a given point in an unblocked code and A
300* be the value that A has in this code at the same point.
301*
302* At the time of the call to MVR2,
303* trueA = A + V' * H + H' * V
304* where H = H( MAXINDEX:N, 1:BINDEX ) and
305* V = V( MAXINDEX:N, 1:BINDEX ).
306*
307* At the bottom of the inner loop,
308* trueA = A + V' * H + H' * V + v' * h + h' * v
309* where H = H( MAXINDEX:N, 1:BINDEX ) and
310* V = V( MAXINDEX:N, 1:BINDEX ) and
311* v = V( liip1:N, BINDEX+1 ) and
312* h = H( liip1:N, BINDEX+1 )
313*
314* At the top of the loop, BINDEX gets incremented, hence:
315* trueA = A + V' * H + H' * V + v' * h + h' * v
316* where H = H( MAXINDEX:N, 1:BINDEX-1 ) and
317* V = V( MAXINDEX:N, 1:BINDEX-1 ) and
318* v = V( liip1:N, BINDEX ) and
319* h = H( liip1:N, BINDEX )
320*
321*
322* A gets updated at the bottom of the outer loop
323* After this update, trueA = A + v' * h + h' * v
324* where v = V( liip1:N, BINDEX ) and
325* h = H( liip1:N, BINDEX ) and BINDEX = 0
326* Indeed, the previous loop invariant as stated above for the
327* top of the loop still holds, but with BINDEX = 0, H and V
328* are null matrices.
329*
330* After the current column of A is updated,
331* trueA( INDEX, INDEX:N ) = A( INDEX, INDEX:N )
332* the rest of A is untouched.
333*
334* After the current block column of A is updated,
335* trueA = A + V' * H + H' * V
336* where H = H( MAXINDEX:N, 1:BINDEX ) and
337* V = V( MAXINDEX:N, 1:BINDEX )
338*
339* This brings us back to the point at which mvr2 is called.
340*
341*
342* Details of the parallelization:
343*
344* We delay spreading v across to all processor columns (which
345* would naturally happen at the bottom of the loop) in order to
346* combine the spread of v( : , i-1 ) with the spread of h( : , i )
347*
348* In order to compute h( :, i ), we must update A( :, i )
349* which means that the processor column owning A( :, i ) must
350* have: c, tau, v( i, i ) and h( i, i ).
351*
352* The traditional
353* way of computing v (and the one used in pzlatrd.f and
354* zlatrd.f) is:
355* v = tau * v
356* c = v' * h
357* alpha = - tau * c / 2
358* v = v + alpha * h
359* However, the traditional way of computing v requires that tau
360* be broadcast to all processors in the current column (to compute
361* v = tau * v) and then a sum-to-all is required (to
362* compute v' * h ). We use the following formula instead:
363* c = v' * h
364* v = tau * ( v - c * tau' * h / 2 )
365* The above formula allows tau to be spread down in the
366* same call to DGSUM2D which performs the sum-to-all of c.
367*
368* The computation of v, which could be performed in any processor
369* column (or other procesor subsets), is performed in the
370* processor column that owns A( :, i+1 ) so that A( :, i+1 )
371* can be updated prior to spreading v across.
372*
373* We keep the block column of A up-to-date to minimize the
374* work required in updating the current column of A. Updating
375* the block column of A is reasonably load balanced whereas
376* updating the current column of A is not (only the current
377* processor column is involved).
378*
379* In the following overview of the steps performed, M in the
380* margin indicates message traffic and C indicates O(n^2 nb/sqrt(p))
381* or more flops per processor.
382*
383* Inner loop:
384* A( index:n, index ) -= ( v * ht(bindex) + h * vt( bindex) )
385*M h = house( A(index:n, index) )
386*M Spread v, h across
387*M vt = v^T; ht = h^T
388* A( index+1:n, index+1:maxindex ) -=
389* ( v * ht(index+1:maxindex) + h *vt(index+1:maxindex) )
390*C v = tril(A) * h; vt = ht * tril(A,-1)
391*MorC v = v - H*V*h - V*H*h
392*M v = v + vt^T
393*M c = v' * h
394* v = tau * ( v - c * tau' * h / 2 )
395*C A = A - H*V - V*H
396*
397*
398*
399* =================================================================
400*
401* .. Parameters ..
402 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
403 $ MB_, NB_, RSRC_, CSRC_, LLD_
404 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
405 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
406 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
407 DOUBLE PRECISION ONE
408 PARAMETER ( one = 1.0d0 )
409 COMPLEX*16 Z_ONE, Z_NEGONE, Z_ZERO
410 parameter( z_one = 1.0d0, z_negone = -1.0d0,
411 $ z_zero = 0.0d0 )
412 DOUBLE PRECISION ZERO
413 parameter( zero = 0.0d+0 )
414* ..
415*
416*
417* .. Local Scalars ..
418*
419*
420 LOGICAL BALANCED, INTERLEAVE, TWOGEMMS, UPPER
421 INTEGER ANB, BINDEX, CURCOL, CURROW, I, ICTXT, INDEX,
422 $ INDEXA, INDEXINH, INDEXINV, INH, INHB, INHT,
423 $ INHTB, INTMP, INV, INVB, INVT, INVTB, J, LDA,
424 $ LDV, LDZG, LII, LIIB, LIIP1, LIJ, LIJB, LIJP1,
425 $ LTLIP1, LTNM1, LWMIN, MAXINDEX, MININDEX,
426 $ MYCOL, MYFIRSTROW, MYROW, MYSETNUM, NBZG, NP,
427 $ NPB, NPCOL, NPM0, NPM1, NPROW, NPS, NPSET, NQ,
428 $ NQB, NQM1, NUMROWS, NXTCOL, NXTROW, PBMAX,
429 $ PBMIN, PBSIZE, PNB, ROWSPERPROC
430 DOUBLE PRECISION NORM, SAFMAX, SAFMIN
431 COMPLEX*16 ALPHA, BETA, C, CONJTOPH, CONJTOPV,
432 $ ONEOVERBETA, TOPH, TOPNV, TOPTAU, TOPV
433* ..
434* .. Local Arrays ..
435*
436*
437*
438*
439 INTEGER IDUM1( 1 ), IDUM2( 1 )
440 DOUBLE PRECISION DTMP( 5 )
441 COMPLEX*16 CC( 3 )
442* ..
443* .. External Subroutines ..
445 $ dgebs2d, dgsum2d, pchk1mat, pdtreecomb,
447 $ zgerv2d, zgesd2d, zgsum2d, zlamov, zscal,
448 $ ztrmvt
449* ..
450* .. External Functions ..
451*
452 LOGICAL LSAME
453 INTEGER ICEIL, NUMROC, PJLAENV
454 DOUBLE PRECISION DZNRM2, PDLAMCH
455 EXTERNAL lsame, iceil, numroc, pjlaenv, dznrm2, pdlamch
456* ..
457* .. Intrinsic Functions ..
458 INTRINSIC dble, dcmplx, dconjg, dimag, ichar, max, min,
459 $ mod, sign, sqrt
460* ..
461*
462*
463* .. Executable Statements ..
464* This is just to keep ftnchek and toolpack/1 happy
465 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
466 $ rsrc_.LT.0 )RETURN
467*
468*
469*
470* Further details
471* ===============
472*
473* At the top of the loop, v and nh have been computed but not
474* spread across. Hence, A is out-of-date even after the
475* rank 2k update. Furthermore, we compute the next v before
476* nh is spread across.
477*
478* I claim that if we used a sum-to-all on NV, by summing CC within
479* each column, that we could compute NV locally and could avoid
480* spreading V across. Bruce claims that sum-to-all can be made
481* to cost no more than sum-to-one on the Paragon. If that is
482* true, this would be a win. But,
483* the BLACS sum-to-all is just a sum-to-one followed by a broadcast,
484* and hence the present scheme is better for now.
485*
486* Get grid parameters
487*
488 ictxt = desca( ctxt_ )
489 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
490*
491 safmax = sqrt( pdlamch( ictxt, 'O' ) ) / n
492 safmin = sqrt( pdlamch( ictxt, 's' ) )
493*
494* Test the input parameters
495*
496 INFO = 0
497.EQ. IF( NPROW-1 ) THEN
498 INFO = -( 600+CTXT_ )
499 ELSE
500*
501* Here we set execution options for PZHETTRD
502*
503 PNB = PJLAENV( ICTXT, 2, 'pzhettrd', 'l', 0, 0, 0, 0 )
504 ANB = PJLAENV( ICTXT, 3, 'pzhettrd', 'l', 0, 0, 0, 0 )
505*
506 INTERLEAVE = ( PJLAENV( ICTXT, 4, 'pzhettrd', 'l', 1, 0, 0,
507.EQ. $ 0 )1 )
508 TWOGEMMS = ( PJLAENV( ICTXT, 4, 'pzhettrd', 'l', 2, 0, 0,
509.EQ. $ 0 )1 )
510 BALANCED = ( PJLAENV( ICTXT, 4, 'pzhettrd', 'l', 3, 0, 0,
511.EQ. $ 0 )1 )
512*
513 CALL CHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, INFO )
514*
515*
516 UPPER = LSAME( UPLO, 'u' )
517.EQ..AND..NE. IF( INFO0 DESCA( NB_ )1 )
518 $ INFO = 600 + NB_
519.EQ. IF( INFO0 ) THEN
520*
521*
522* Here is the arithmetic:
523* Let maxnpq = max( np, nq, 2 * ANB )
524* LDV = 4 * max( np, nq ) + 2
525* LWMIN = 2 * ( ANB + 1 ) * LDV + MAX( np, 2 * ANB )
526* = 2 * ( ANB + 1 ) * ( 4 * NPS + 2 ) + NPS
527*
528* This overestimates memory requirements when ANB > NP/2
529* Memory requirements are lower when interleave = .false.
530* Hence, we could have two sets of memory requirements,
531* one for interleave and one for
532*
533*
534 NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB )
535 LWMIN = 2*( ANB+1 )*( 4*NPS+2 ) + NPS
536*
537 WORK( 1 ) = DCMPLX( LWMIN )
538.NOT. IF( LSAME( UPLO, 'l' ) ) THEN
539 INFO = -1
540.NE. ELSE IF( IA1 ) THEN
541 INFO = -4
542.NE. ELSE IF( JA1 ) THEN
543 INFO = -5
544.NE. ELSE IF( NPROWNPCOL ) THEN
545 INFO = -( 600+CTXT_ )
546.NE. ELSE IF( DESCA( DTYPE_ )1 ) THEN
547 INFO = -( 600+DTYPE_ )
548.NE. ELSE IF( DESCA( MB_ )1 ) THEN
549 INFO = -( 600+MB_ )
550.NE. ELSE IF( DESCA( NB_ )1 ) THEN
551 INFO = -( 600+NB_ )
552.NE. ELSE IF( DESCA( RSRC_ )0 ) THEN
553 INFO = -( 600+RSRC_ )
554.NE. ELSE IF( DESCA( CSRC_ )0 ) THEN
555 INFO = -( 600+CSRC_ )
556.LT. ELSE IF( LWORKLWMIN ) THEN
557 INFO = -11
558 END IF
559 END IF
560 IF( UPPER ) THEN
561 IDUM1( 1 ) = ICHAR( 'u' )
562 ELSE
563 IDUM1( 1 ) = ICHAR( 'l' )
564 END IF
565 IDUM2( 1 ) = 1
566*
567 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2,
568 $ INFO )
569 END IF
570*
571.NE. IF( INFO0 ) THEN
572 CALL PXERBLA( ICTXT, 'pzhettrd', -INFO )
573 RETURN
574 END IF
575*
576* Quick return if possible
577*
578.EQ. IF( N0 )
579 $ RETURN
580*
581*
582*
583* Reduce the lower triangle of sub( A )
584 NP = NUMROC( N, 1, MYROW, 0, NPROW )
585 NQ = NUMROC( N, 1, MYCOL, 0, NPCOL )
586*
587 NXTROW = 0
588 NXTCOL = 0
589*
590 LIIP1 = 1
591 LIJP1 = 1
592 NPM1 = NP
593 NQM1 = NQ
594*
595 LDA = DESCA( LLD_ )
596 ICTXT = DESCA( CTXT_ )
597*
598*
599*
600* Miscellaneous details:
601* Put tau, D and E in the right places
602* Check signs
603* Place all the arrays in WORK, control their placement
604* in memory.
605*
606*
607*
608* Loop invariants
609* A(LIIP1, LIJ) points to the first element of A(I+1,J)
610* NPM1,NQM1 = the number of rows, cols in A( LII+1:N,LIJ+1:N )
611* A(LII:N,LIJ:N) is one step out of date.
612* proc( CURROW, CURCOL ) owns A(LII,LIJ)
613* proc( NXTROW, CURCOL ) owns A(LIIP1,LIJ)
614*
615 INH = 1
616*
617 IF( INTERLEAVE ) THEN
618*
619* H and V are interleaved to minimize memory movement
620* LDV has to be twice as large to accomodate interleaving.
621* In addition, LDV is doubled again to allow v, h and
622* toptau to be spreaad across and transposed in a
623* single communication operation with minimum memory
624* movement.
625*
626* We could reduce LDV back to 2*MAX(NPM1,NQM1)
627* by increasing the memory movement required in
628* the spread and transpose of v, h and toptau.
629* However, since the non-interleaved path already
630* provides a mear minimum memory requirement option,
631* we did not provide this additional path.
632*
633 LDV = 4*( MAX( NPM1, NQM1 ) ) + 2
634*
635 INH = 1
636*
637 INV = INH + LDV / 2
638 INVT = INH + ( ANB+1 )*LDV
639*
640 INHT = INVT + LDV / 2
641 INTMP = INVT + LDV*( ANB+1 )
642*
643 ELSE
644 LDV = MAX( NPM1, NQM1 )
645*
646 INHT = INH + LDV*( ANB+1 )
647 INV = INHT + LDV*( ANB+1 )
648*
649* The code works without this +1, but only because of a
650* coincidence. Without the +1, WORK(INVT) gets trashed, but
651* WORK(INVT) is only used once and when it is used, it is
652* multiplied by WORK( INH ) which is zero. Hence, the fact
653* that WORK(INVT) is trashed has no effect.
654*
655 INVT = INV + LDV*( ANB+1 ) + 1
656 INTMP = INVT + LDV*( 2*ANB )
657*
658 END IF
659*
660.NE. IF( INFO0 ) THEN
661 CALL PXERBLA( ICTXT, 'pzhettrd', -INFO )
662 WORK( 1 ) = DCMPLX( LWMIN )
663 RETURN
664 END IF
665*
666*
667* The satisfies the loop invariant: trueA = A - V * HT - H * VT,
668* (where V, H, VT and HT all have BINDEX+1 rows/columns)
669* the first ANB times through the loop.
670*
671*
672*
673* Setting either ( InH and InHT ) or InV to Z_ZERO
674* is adequate except in the face of NaNs.
675*
676*
677 DO 10 I = 1, NP
678 WORK( INH+I-1 ) = Z_ZERO
679 WORK( INV+I-1 ) = Z_ZERO
680 10 CONTINUE
681 DO 20 I = 1, NQ
682 WORK( INHT+I-1 ) = Z_ZERO
683 20 CONTINUE
684*
685*
686*
687 TOPNV = Z_ZERO
688*
689 LTLIP1 = LIJP1
690 LTNM1 = NPM1
691.GT. IF( MYCOLMYROW ) THEN
692 LTLIP1 = LTLIP1 + 1
693 LTNM1 = LTNM1 - 1
694 END IF
695*
696*
697 DO 210 MININDEX = 1, N - 1, ANB
698*
699*
700 MAXINDEX = MIN( MININDEX+ANB-1, N )
701 LIJB = NUMROC( MAXINDEX, 1, MYCOL, 0, NPCOL ) + 1
702 LIIB = NUMROC( MAXINDEX, 1, MYROW, 0, NPROW ) + 1
703*
704 NQB = NQ - LIJB + 1
705 NPB = NP - LIIB + 1
706 INHTB = INHT + LIJB - 1
707 INVTB = INVT + LIJB - 1
708 INHB = INH + LIIB - 1
709 INVB = INV + LIIB - 1
710*
711*
712*
713*
714 DO 160 INDEX = MININDEX, MIN( MAXINDEX, N-1 )
715*
716 BINDEX = INDEX - MININDEX
717*
718 CURROW = NXTROW
719 CURCOL = NXTCOL
720*
721 NXTROW = MOD( CURROW+1, NPROW )
722 NXTCOL = MOD( CURCOL+1, NPCOL )
723*
724 LII = LIIP1
725 LIJ = LIJP1
726 NPM0 = NPM1
727*
728.EQ. IF( MYROWCURROW ) THEN
729 NPM1 = NPM1 - 1
730 LIIP1 = LIIP1 + 1
731 END IF
732.EQ. IF( MYCOLCURCOL ) THEN
733 NQM1 = NQM1 - 1
734 LIJP1 = LIJP1 + 1
735 LTLIP1 = LTLIP1 + 1
736 LTNM1 = LTNM1 - 1
737 END IF
738*
739*
740*
741*
742* V = NV, VT = NVT, H = NH, HT = NHT
743*
744*
745* Update the current column of A
746*
747*
748.EQ. IF( MYCOLCURCOL ) THEN
749*
750 INDEXA = LII + ( LIJ-1 )*LDA
751 INDEXINV = INV + LII - 1 + ( BINDEX-1 )*LDV
752 INDEXINH = INH + LII - 1 + ( BINDEX-1 )*LDV
753 CONJTOPH = DCONJG( WORK( INHT+LIJ-1+BINDEX*LDV ) )
754 CONJTOPV = DCONJG( TOPNV )
755*
756.GT. IF( INDEX1 ) THEN
757 DO 30 I = 0, NPM0 - 1
758* A( INDEXA+I ) = A( INDEXA+I )
759 A( INDEXA+I ) = A( INDEXA+I ) -
760 $ WORK( INDEXINV+LDV+I )*CONJTOPH -
761 $ WORK( INDEXINH+LDV+I )*CONJTOPV
762 30 CONTINUE
763 END IF
764*
765*
766 END IF
767*
768*
769.EQ. IF( MYCOLCURCOL ) THEN
770*
771* Compute the householder vector
772*
773.EQ. IF( MYROWCURROW ) THEN
774 DTMP( 2 ) = DBLE( A( LII+( LIJ-1 )*LDA ) )
775 ELSE
776 DTMP( 2 ) = ZERO
777 END IF
778.EQ. IF( MYROWNXTROW ) THEN
779 DTMP( 3 ) = DBLE( A( LIIP1+( LIJ-1 )*LDA ) )
780 DTMP( 4 ) = DIMAG( A( LIIP1+( LIJ-1 )*LDA ) )
781 ELSE
782 DTMP( 3 ) = ZERO
783 DTMP( 4 ) = ZERO
784 END IF
785*
786 NORM = DZNRM2( NPM1, A( LIIP1+( LIJ-1 )*LDA ), 1 )
787 DTMP( 1 ) = NORM
788*
789* IF DTMP(5) = 1.0, NORM is too large and might cause
790* overflow, hence PDTREECOMB must be called. IF DTMP(5)
791* is zero on output, DTMP(1) can be trusted.
792*
793 DTMP( 5 ) = ZERO
794.GE..OR..LT. IF( DTMP( 1 )SAFMAX DTMP( 1 )SAFMIN ) THEN
795 DTMP( 5 ) = ONE
796 DTMP( 1 ) = ZERO
797 END IF
798*
799 DTMP( 1 ) = DTMP( 1 )*DTMP( 1 )
800 CALL DGSUM2D( ICTXT, 'c', ' ', 5, 1, DTMP, 5, -1,
801 $ CURCOL )
802.EQ. IF( DTMP( 5 )ZERO ) THEN
803 DTMP( 1 ) = SQRT( DTMP( 1 ) )
804 ELSE
805 DTMP( 1 ) = NORM
806 CALL PDTREECOMB( ICTXT, 'c', 1, DTMP, -1, MYCOL,
807 $ DCOMBNRM2 )
808 END IF
809*
810 NORM = DTMP( 1 )
811*
812 D( LIJ ) = DTMP( 2 )
813.EQ..AND..EQ. IF( MYROWCURROW MYCOLCURCOL ) THEN
814 A( LII+( LIJ-1 )*LDA ) = DCMPLX( D( LIJ ), ZERO )
815 END IF
816*
817*
818 ALPHA = DCMPLX( DTMP( 3 ), DTMP( 4 ) )
819*
820 NORM = SIGN( NORM, DBLE( ALPHA ) )
821*
822.EQ. IF( NORMZERO ) THEN
823 TOPTAU = ZERO
824 ELSE
825 BETA = NORM + ALPHA
826 TOPTAU = BETA / NORM
827 ONEOVERBETA = 1.0D0 / BETA
828*
829 CALL ZSCAL( NPM1, ONEOVERBETA,
830 $ A( LIIP1+( LIJ-1 )*LDA ), 1 )
831 END IF
832*
833.EQ. IF( MYROWNXTROW ) THEN
834 A( LIIP1+( LIJ-1 )*LDA ) = Z_ONE
835 END IF
836*
837 TAU( LIJ ) = TOPTAU
838 E( LIJ ) = -NORM
839*
840 END IF
841*
842*
843* Spread v, nh, toptau across
844*
845 DO 40 I = 0, NPM1 - 1
846 WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+I ) = A( LIIP1+I+
847 $ ( LIJ-1 )*LDA )
848 40 CONTINUE
849*
850.EQ. IF( MYCOLCURCOL ) THEN
851 WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+NPM1 ) = TOPTAU
852 CALL ZGEBS2D( ICTXT, 'r', ' ', NPM1+NPM1+1, 1,
853 $ WORK( INV+LIIP1-1+BINDEX*LDV ),
854 $ NPM1+NPM1+1 )
855 ELSE
856 CALL ZGEBR2D( ICTXT, 'r', ' ', NPM1+NPM1+1, 1,
857 $ WORK( INV+LIIP1-1+BINDEX*LDV ),
858 $ NPM1+NPM1+1, MYROW, CURCOL )
859 TOPTAU = WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+NPM1 )
860 END IF
861 DO 50 I = 0, NPM1 - 1
862 WORK( INH+LIIP1-1+( BINDEX+1 )*LDV+I ) = WORK( INV+LIIP1-
863 $ 1+BINDEX*LDV+NPM1+I )
864 50 CONTINUE
865*
866.LT. IF( INDEXN ) THEN
867.EQ..AND..EQ. IF( MYROWNXTROW MYCOLCURCOL )
868 $ A( LIIP1+( LIJ-1 )*LDA ) = E( LIJ )
869 END IF
870*
871* Transpose v, nh
872*
873*
874.EQ. IF( MYROWMYCOL ) THEN
875 DO 60 I = 0, NPM1 + NPM1
876 WORK( INVT+LIJP1-1+BINDEX*LDV+I ) = WORK( INV+LIIP1-1+
877 $ BINDEX*LDV+I )
878 60 CONTINUE
879 ELSE
880 CALL ZGESD2D( ICTXT, NPM1+NPM1, 1,
881 $ WORK( INV+LIIP1-1+BINDEX*LDV ), NPM1+NPM1,
882 $ MYCOL, MYROW )
883 CALL ZGERV2D( ICTXT, NQM1+NQM1, 1,
884 $ WORK( INVT+LIJP1-1+BINDEX*LDV ), NQM1+NQM1,
885 $ MYCOL, MYROW )
886 END IF
887*
888 DO 70 I = 0, NQM1 - 1
889 WORK( INHT+LIJP1-1+( BINDEX+1 )*LDV+I ) = WORK( INVT+
890 $ LIJP1-1+BINDEX*LDV+NQM1+I )
891 70 CONTINUE
892*
893*
894* Update the current block column of A
895*
896.GT. IF( INDEX1 ) THEN
897 DO 90 J = LIJP1, LIJB - 1
898 DO 80 I = 0, NPM1 - 1
899*
900 A( LIIP1+I+( J-1 )*LDA ) = A( LIIP1+I+( J-1 )*LDA )
901 $ - WORK( INV+LIIP1-1+BINDEX*LDV+I )*
902 $ DCONJG( WORK( INHT+J-1+BINDEX*LDV ) ) -
903 $ WORK( INH+LIIP1-1+BINDEX*LDV+I )*
904 $ DCONJG( WORK( INVT+J-1+BINDEX*LDV ) )
905 80 CONTINUE
906 90 CONTINUE
907 END IF
908*
909*
910*
911* Compute NV = A * NHT; NVT = A * NH
912*
913* These two lines are necessary because these elements
914* are not always involved in the calls to ZTRMVT
915* for two reasons:
916* 1) On diagonal processors, the call to TRMVT
917* involves only LTNM1-1 elements
918* 2) On some processes, NQM1 < LTM1 or LIIP1 < LTLIP1
919* and when the results are combined across all processes,
920* uninitialized values may be included.
921 WORK( INV+LIIP1-1+( BINDEX+1 )*LDV ) = Z_ZERO
922 WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV+NQM1-1 ) = Z_ZERO
923*
924*
925.EQ. IF( MYROWMYCOL ) THEN
926.GT. IF( LTNM11 ) THEN
927 CALL ZTRMVT( 'l', ltnm1-1,
928 $ a( ltlip1+1+( lijp1-1 )*lda ), lda,
929 $ work( invt+lijp1-1+( bindex+1 )*ldv ), 1,
930 $ work( inh+ltlip1+1-1+( bindex+1 )*ldv ),
931 $ 1, work( inv+ltlip1+1-1+( bindex+1 )*
932 $ ldv ), 1, work( inht+lijp1-1+( bindex+
933 $ 1 )*ldv ), 1 )
934 END IF
935 DO 100 i = 1, ltnm1
936 work( invt+lijp1+i-1-1+( bindex+1 )*ldv )
937 $ = work( invt+lijp1+i-1-1+( bindex+1 )*ldv ) +
938 $ a( ltlip1+i-1+( lijp1+i-1-1 )*lda )*
939 $ work( inh+ltlip1+i-1-1+( bindex+1 )*ldv )
940 100 CONTINUE
941 ELSE
942 IF( ltnm1.GT.0 )
943 $ CALL ztrmvt( 'L', ltnm1, a( ltlip1+( lijp1-1 )*lda ),
944 $ lda, work( invt+lijp1-1+( bindex+1 )*
945 $ ldv ), 1, work( inh+ltlip1-1+( bindex+
946 $ 1 )*ldv ), 1, work( inv+ltlip1-1+
947 $ ( bindex+1 )*ldv ), 1,
948 $ work( inht+lijp1-1+( bindex+1 )*ldv ),
949 $ 1 )
950*
951 END IF
952*
953*
954* We take advantage of the fact that:
955* A * sum( B ) = sum ( A * B ) for matrices A,B
956*
957* trueA = A + V * HT + H * VT
958* hence: (trueA)v = Av' + V * HT * v + H * VT * v
959* VT * v = sum_p_in_NPROW ( VTp * v )
960* H * VT * v = H * sum (VTp * v) = sum ( H * VTp * v )
961*
962* v = v + V * HT * h + H * VT * h
963*
964*
965*
966* tmp = HT * nh1
967 DO 110 i = 1, 2*( bindex+1 )
968 work( intmp-1+i ) = 0
969 110 CONTINUE
970*
971 IF( balanced ) THEN
972 npset = nprow
973 mysetnum = myrow
974 rowsperproc = iceil( nqb, npset )
975 myfirstrow = min( nqb+1, 1+rowsperproc*mysetnum )
976 numrows = min( rowsperproc, nqb-myfirstrow+1 )
977*
978*
979* tmp = HT * v
980*
981 CALL zgemv( 'C', numrows, bindex+1, z_one,
982 $ work( inhtb+myfirstrow-1 ), ldv,
983 $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
984 $ 1, z_zero, work( intmp ), 1 )
985* tmp2 = VT * v
986 CALL zgemv( 'C', numrows, bindex+1, z_one,
987 $ work( invtb+myfirstrow-1 ), ldv,
988 $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
989 $ 1, z_zero, work( intmp+bindex+1 ), 1 )
990*
991*
992 CALL zgsum2d( ictxt, 'C', ' ', 2*( bindex+1 ), 1,
993 $ work( intmp ), 2*( bindex+1 ), -1, -1 )
994 ELSE
995* tmp = HT * v
996*
997 CALL zgemv( 'C', nqb, bindex+1, z_one, work( inhtb ),
998 $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
999 $ z_zero, work( intmp ), 1 )
1000* tmp2 = VT * v
1001 CALL zgemv( 'C', nqb, bindex+1, z_one, work( invtb ),
1002 $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
1003 $ z_zero, work( intmp+bindex+1 ), 1 )
1004*
1005 END IF
1006*
1007*
1008*
1009 IF( balanced ) THEN
1010 mysetnum = mycol
1011*
1012 rowsperproc = iceil( npb, npset )
1013 myfirstrow = min( npb+1, 1+rowsperproc*mysetnum )
1014 numrows = min( rowsperproc, npb-myfirstrow+1 )
1015*
1016 CALL zgsum2d( ictxt, 'R', ' ', 2*( bindex+1 ), 1,
1017 $ work( intmp ), 2*( bindex+1 ), -1, -1 )
1018*
1019*
1020* v = v + V * tmp
1021 IF( index.GT.1. ) THEN
1022 CALL zgemv( 'N', numrows, bindex+1, z_negone,
1023 $ work( invb+myfirstrow-1 ), ldv,
1024 $ work( intmp ), 1, z_one,
1025 $ work( invb+myfirstrow-1+( bindex+1 )*
1026 $ ldv ), 1 )
1027*
1028* v = v + H * tmp2
1029 CALL zgemv( 'N', numrows, bindex+1, z_negone,
1030 $ work( inhb+myfirstrow-1 ), ldv,
1031 $ work( intmp+bindex+1 ), 1, z_one,
1032 $ work( invb+myfirstrow-1+( bindex+1 )*
1033 $ ldv ), 1 )
1034 END IF
1035*
1036 ELSE
1037* v = v + V * tmp
1038 CALL zgemv( 'N', npb, bindex+1, z_negone, work( invb ),
1039 $ ldv, work( intmp ), 1, z_one,
1040 $ work( invb+( bindex+1 )*ldv ), 1 )
1041*
1042*
1043* v = v + H * tmp2
1044 CALL zgemv( 'N', npb, bindex+1, z_negone, work( inhb ),
1045 $ ldv, work( intmp+bindex+1 ), 1, z_one,
1046 $ work( invb+( bindex+1 )*ldv ), 1 )
1047*
1048 END IF
1049*
1050*
1051* Transpose NV and add it back into NVT
1052*
1053 IF( myrow.EQ.mycol ) THEN
1054 DO 120 i = 0, nqm1 - 1
1055 work( intmp+i ) = work( invt+lijp1-1+( bindex+1 )*ldv+
1056 $ i )
1057 120 CONTINUE
1058 ELSE
1059 CALL zgesd2d( ictxt, nqm1, 1,
1060 $ work( invt+lijp1-1+( bindex+1 )*ldv ),
1061 $ nqm1, mycol, myrow )
1062 CALL zgerv2d( ictxt, npm1, 1, work( intmp ), npm1, mycol,
1063 $ myrow )
1064*
1065 END IF
1066 DO 130 i = 0, npm1 - 1
1067 work( inv+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
1068 $ 1+( bindex+1 )*ldv+i ) + work( intmp+i )
1069 130 CONTINUE
1070*
1071* Sum-to-one NV rowwise (within a row)
1072*
1073 CALL zgsum2d( ictxt, 'R', ' ', npm1, 1,
1074 $ work( inv+liip1-1+( bindex+1 )*ldv ), npm1,
1075 $ myrow, nxtcol )
1076*
1077*
1078* Dot product c = NV * NH
1079* Sum-to-all c within next processor column
1080*
1081*
1082 IF( mycol.EQ.nxtcol ) THEN
1083 cc( 1 ) = z_zero
1084 DO 140 i = 0, npm1 - 1
1085 cc( 1 ) = cc( 1 ) + dconjg( work( inv+liip1-1+
1086 $ ( bindex+1 )*ldv+i ) )*
1087 $ work( inh+liip1-1+( bindex+1 )*ldv+i )
1088 140 CONTINUE
1089 IF( myrow.EQ.nxtrow ) THEN
1090 cc( 2 ) = work( inv+liip1-1+( bindex+1 )*ldv )
1091 cc( 3 ) = work( inh+liip1-1+( bindex+1 )*ldv )
1092 ELSE
1093 cc( 2 ) = z_zero
1094 cc( 3 ) = z_zero
1095 END IF
1096 CALL zgsum2d( ictxt, 'C', ' ', 3, 1, cc, 3, -1, nxtcol )
1097*
1098 topv = cc( 2 )
1099 c = cc( 1 )
1100 toph = cc( 3 )
1101*
1102 topnv = toptau*( topv-c*dconjg( toptau ) / 2*toph )
1103*
1104*
1105* Compute V = Tau * (V - C * Tau' / 2 * H )
1106*
1107*
1108 DO 150 i = 0, npm1 - 1
1109 work( inv+liip1-1+( bindex+1 )*ldv+i ) = toptau*
1110 $ ( work( inv+liip1-1+( bindex+1 )*ldv+i )-c*
1111 $ dconjg( toptau ) / 2*work( inh+liip1-1+( bindex+
1112 $ 1 )*ldv+i ) )
1113 150 CONTINUE
1114*
1115 END IF
1116*
1117*
1118 160 CONTINUE
1119*
1120*
1121* Perform the rank2k update
1122*
1123 IF( maxindex.LT.n ) THEN
1124*
1125 DO 170 i = 0, npm1 - 1
1126 work( intmp+i ) = work( inh+liip1-1+anb*ldv+i )
1127 170 CONTINUE
1128*
1129*
1130*
1131 IF( .NOT.twogemms ) THEN
1132 IF( interleave ) THEN
1133 ldzg = ldv / 2
1134 ELSE
1135 CALL zlamov( 'A', ltnm1, anb, work( inht+lijp1-1 ),
1136 $ ldv, work( invt+lijp1-1+anb*ldv ), ldv )
1137*
1138 CALL zlamov( 'A', ltnm1, anb, work( inv+ltlip1-1 ),
1139 $ ldv, work( inh+ltlip1-1+anb*ldv ), ldv )
1140 ldzg = ldv
1141 END IF
1142 nbzg = anb*2
1143 ELSE
1144 ldzg = ldv
1145 nbzg = anb
1146 END IF
1147*
1148*
1149 DO 180 pbmin = 1, ltnm1, pnb
1150*
1151 pbsize = min( pnb, ltnm1-pbmin+1 )
1152 pbmax = min( ltnm1, pbmin+pnb-1 )
1153 CALL zgemm( 'N', 'C', pbsize, pbmax, nbzg, z_negone,
1154 $ work( inh+ltlip1-1+pbmin-1 ), ldzg,
1155 $ work( invt+lijp1-1 ), ldzg, z_one,
1156 $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1157 IF( twogemms ) THEN
1158 CALL zgemm( 'N', 'C', pbsize, pbmax, anb, z_negone,
1159 $ work( inv+ltlip1-1+pbmin-1 ), ldzg,
1160 $ work( inht+lijp1-1 ), ldzg, z_one,
1161 $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1162 END IF
1163 180 CONTINUE
1164*
1165*
1166*
1167 DO 190 i = 0, npm1 - 1
1168 work( inv+liip1-1+i ) = work( inv+liip1-1+anb*ldv+i )
1169 work( inh+liip1-1+i ) = work( intmp+i )
1170 190 CONTINUE
1171 DO 200 i = 0, nqm1 - 1
1172 work( inht+lijp1-1+i ) = work( inht+lijp1-1+anb*ldv+i )
1173 200 CONTINUE
1174*
1175*
1176 END IF
1177*
1178* End of the update A code
1179*
1180 210 CONTINUE
1181*
1182 IF( mycol.EQ.nxtcol ) THEN
1183 IF( myrow.EQ.nxtrow ) THEN
1184*
1185 d( nq ) = dble( a( np+( nq-1 )*lda ) )
1186 a( np+( nq-1 )*lda ) = d( nq )
1187*
1188 CALL dgebs2d( ictxt, 'C', ' ', 1, 1, d( nq ), 1 )
1189 ELSE
1190 CALL dgebr2d( ictxt, 'C', ' ', 1, 1, d( nq ), 1, nxtrow,
1191 $ nxtcol )
1192 END IF
1193 END IF
1194*
1195*
1196*
1197*
1198 work( 1 ) = dcmplx( lwmin )
1199 RETURN
1200*
1201* End of PZHETTRD
1202*
1203*
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
subroutine zscal(n, za, zx, incx)
ZSCAL
Definition zscal.f:78
subroutine zgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
ZGEMV
Definition zgemv.f:158
subroutine zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
ZGEMM
Definition zgemm.f:187
real(wp) function dznrm2(n, x, incx)
DZNRM2
Definition dznrm2.f90:90
integer function iceil(inum, idenom)
Definition iceil.f:2
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine dgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1082
subroutine zgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1092
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition mpi.f:1577
subroutine zgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1051
subroutine pxerbla(contxt, srname, info)
Definition mpi.f:1600
subroutine dgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1123
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
double precision function pdlamch(ictxt, cmach)
Definition pdblastst.f:6769
subroutine dcombnrm2(x, y)
Definition pdtreecomb.f:307
subroutine pdtreecomb(ictxt, scope, n, mine, rdest0, cdest0, subptr)
Definition pdtreecomb.f:3
integer function pjlaenv(ictxt, ispec, name, opts, n1, n2, n3, n4)
Definition pjlaenv.f:3
subroutine pzhettrd(uplo, n, a, ia, ja, desca, d, e, tau, work, lwork, info)
Definition pzhettrd.f:3
subroutine ztrmvt(uplo, n, t, ldt, x, incx, y, incy, w, incw, z, incz)
Definition ztrmvt.f:3