OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pssyttrd.f File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine pssyttrd (uplo, n, a, ia, ja, desca, d, e, tau, work, lwork, info)

Function/Subroutine Documentation

◆ pssyttrd()

subroutine pssyttrd ( character uplo,
integer n,
real, dimension( * ) a,
integer ia,
integer ja,
integer, dimension( * ) desca,
real, dimension( * ) d,
real, dimension( * ) e,
real, dimension( * ) tau,
real, dimension( * ) work,
integer lwork,
integer info )

Definition at line 1 of file pssyttrd.f.

3*
4* -- ScaLAPACK routine (version 2.0.2) --
5* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6* May 1 2012
7*
8* .. Scalar Arguments ..
9 CHARACTER UPLO
10 INTEGER IA, INFO, JA, LWORK, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * )
14 REAL A( * ), D( * ), E( * ), TAU( * ), WORK( * )
15* ..
16*
17* Purpose
18*
19* =======
20*
21* PSSYTTRD reduces a complex Hermitian matrix sub( A ) to Hermitian
22* tridiagonal form T by an unitary similarity transformation:
23* Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
24*
25* Notes
26* =====
27*
28* Each global data object is described by an associated description
29* vector. This vector stores the information required to establish
30* the mapping between an object element and its corresponding
31* process and memory location.
32*
33* Let A be a generic term for any 2D block cyclicly distributed
34* array.
35* Such a global array has an associated description vector DESCA.
36* In the following comments, the character _ should be read as
37* "of the global array".
38*
39* NOTATION STORED IN EXPLANATION
40* --------------- -------------- -----------------------------------
41* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
42* DTYPE_A = 1.
43* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle,
44* indicating the BLACS process grid A is distribu-
45* ted over. The context itself is glo-
46* bal, but the handle (the integer
47* value) may vary.
48* M_A (global) DESCA( M_ ) The number of rows in the global
49* array A.
50* N_A (global) DESCA( N_ ) The number of columns in the global
51* array A.
52* MB_A (global) DESCA( MB_ ) The blocking factor used to
53* distribute the rows of the array.
54* NB_A (global) DESCA( NB_ ) The blocking factor used to
55* distribute the columns of the array.
56* RSRC_A (global) DESCA( RSRC_ ) The process row over which the
57* first row of the array A is distributed.
58* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
59* first column of the array A is
60* distributed.
61* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
62* array. LLD_A >= MAX(1,LOCp(M_A)).
63*
64* Let K be the number of rows or columns of a distributed matrix,
65* and assume that its process grid has dimension p x q.
66* LOCp( K ) denotes the number of elements of K that a process
67* would receive if K were distributed over the p processes of its
68* process column.
69* Similarly, LOCq( K ) denotes the number of elements of K that a
70* process would receive if K were distributed over the q processes
71* of its process row.
72* The values of LOCp() and LOCq() may be determined via a call to
73* the ScaLAPACK tool function, NUMROC:
74* LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
75* LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
76* An upper bound for these quantities may be computed by:
77* LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
78* LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
79*
80* Arguments
81* =========
82*
83* UPLO (global input) CHARACTER
84* Specifies whether the upper or lower triangular part of the
85* Hermitian matrix sub( A ) is stored:
86* = 'U': Upper triangular
87* = 'L': Lower triangular
88*
89* N (global input) INTEGER
90* The number of rows and columns to be operated on, i.e. the
91* order of the distributed submatrix sub( A ). N >= 0.
92*
93* A (local input/local output) REAL pointer into the
94* local memory to an array of dimension (LLD_A,LOCq(JA+N-1)).
95* On entry, this array contains the local pieces of the
96* Hermitian distributed matrix sub( A ). If UPLO = 'U', the
97* leading N-by-N upper triangular part of sub( A ) contains
98* the upper triangular part of the matrix, and its strictly
99* lower triangular part is not referenced. If UPLO = 'L', the
100* leading N-by-N lower triangular part of sub( A ) contains the
101* lower triangular part of the matrix, and its strictly upper
102* triangular part is not referenced. On exit, if UPLO = 'U',
103* the diagonal and first superdiagonal of sub( A ) are over-
104* written by the corresponding elements of the tridiagonal
105* matrix T, and the elements above the first superdiagonal,
106* with the array TAU, represent the unitary matrix Q as a
107* product of elementary reflectors; if UPLO = 'L', the diagonal
108* and first subdiagonal of sub( A ) are overwritten by the
109* corresponding elements of the tridiagonal matrix T, and the
110* elements below the first subdiagonal, with the array TAU,
111* represent the unitary matrix Q as a product of elementary
112* reflectors. See Further Details.
113*
114* IA (global input) INTEGER
115* The row index in the global array A indicating the first
116* row of sub( A ).
117*
118* JA (global input) INTEGER
119* The column index in the global array A indicating the
120* first column of sub( A ).
121*
122* DESCA (global and local input) INTEGER array of dimension DLEN_.
123* The array descriptor for the distributed matrix A.
124*
125* D (local output) REAL array, dim LOCq(JA+N-1)
126* The diagonal elements of the tridiagonal matrix T:
127* D(i) = A(i,i). D is tied to the distributed matrix A.
128*
129* E (local output) REAL array, dim LOCq(JA+N-1)
130* if UPLO = 'U', LOCq(JA+N-2) otherwise. The off-diagonal
131* elements of the tridiagonal matrix T: E(i) = A(i,i+1) if
132* UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the
133* distributed matrix A.
134*
135* TAU (local output) REAL, array, dimension
136* LOCq(JA+N-1). This array contains the scalar factors TAU of
137* the elementary reflectors. TAU is tied to the distributed
138* matrix A.
139*
140* WORK (local workspace) REAL array, dimension (LWORK)
141* On exit, WORK( 1 ) returns the minimal and optimal workspace
142*
143* LWORK (local input) INTEGER
144* The dimension of the array WORK.
145* LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + NPS
146* Where:
147* NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB )
148* ANB = PJLAENV( DESCA( CTXT_ ), 3, 'PSSYTTRD', 'L', 0, 0,
149* 0, 0 )
150*
151* NUMROC is a ScaLAPACK tool function;
152* PJLAENV is a ScaLAPACK envionmental inquiry function
153* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
154* the subroutine BLACS_GRIDINFO.
155*
156* INFO (global output) INTEGER
157* = 0: successful exit
158* < 0: If the i-th argument is an array and the j-entry had
159* an illegal value, then INFO = -(i*100+j), if the i-th
160* argument is a scalar and had an illegal value, then
161* INFO = -i.
162*
163* Further Details
164* ===============
165*
166* If UPLO = 'U', the matrix Q is represented as a product of
167* elementary reflectors
168*
169* Q = H(n-1) . . . H(2) H(1).
170*
171* Each H(i) has the form
172*
173* H(i) = I - tau * v * v'
174*
175* where tau is a complex scalar, and v is a complex vector with
176* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
177* A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1).
178*
179* If UPLO = 'L', the matrix Q is represented as a product of
180* elementary reflectors
181*
182* Q = H(1) H(2) . . . H(n-1).
183*
184* Each H(i) has the form
185*
186* H(i) = I - tau * v * v'
187*
188* where tau is a complex scalar, and v is a complex vector with
189* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in
190* A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
191*
192* The contents of sub( A ) on exit are illustrated by the following
193* examples with n = 5:
194*
195* if UPLO = 'U': if UPLO = 'L':
196*
197* ( d e v2 v3 v4 ) ( d )
198* ( d e v3 v4 ) ( e d )
199* ( d e v4 ) ( v1 e d )
200* ( d e ) ( v1 v2 e d )
201* ( d ) ( v1 v2 v3 e d )
202*
203* where d and e denote diagonal and off-diagonal elements of T, and
204* vi denotes an element of the vector defining H(i).
205*
206* Data storage requirements
207* =========================
208*
209* PSSYTTRD is not intended to be called directly. All users are
210* encourage to call PSSYTRD which will then call PSHETTRD if
211* appropriate. A must be in cyclic format (i.e. MB = NB = 1),
212* the process grid must be square ( i.e. NPROW = NPCOL ) and
213* only lower triangular storage is supported.
214*
215* Local variables
216* ===============
217*
218* PSSYTTRD uses five local arrays:
219* WORK ( InV ) dimension ( NP, ANB+1): array V
220* WORK ( InH ) dimension ( NP, ANB+1): array H
221* WORK ( InVT ) dimension ( NQ, ANB+1): transpose of the array V
222* WORK ( InHT ) dimension ( NQ, ANB+1): transpose of the array H
223* WORK ( InVTT ) dimension ( NQ, 1): transpose of the array VT
224*
225* Arrays V and H are replicated across all processor columns.
226* Arrays V^T and H^T are replicated across all processor rows.
227*
228* WORK ( InVT ), or V^T, is stored as a tall skinny
229* array ( NQ x ANB-1 ) for efficiency. Since only the lower
230* triangular portion of A is updated, Av is computed as:
231* tril(A) * v + v^T * tril(A,-1). This is performed as
232* two local triangular matrix-vector multiplications (both in
233* MVR2) followed by a transpose and a sum across the columns.
234* In the local computation, WORK( InVT ) is used to compute
235* tril(A) * v and WORK( InV ) is used to compute
236* v^T * tril(A,-1)
237*
238* The following variables are global indices into A:
239* INDEX: The current global row and column number.
240* MAXINDEX: The global row and column for the first row and
241* column in the trailing block of A.
242* LIIB, LIJB: The first row, column in
243*
244* The following variables point into the arrays A, V, H, V^T, H^T:
245* BINDEX =INDEX-MININDEX: The column index in V, H, V^T, H^T.
246* LII: local index I: The local row number for row INDEX
247* LIJ: local index J: The local column number for column INDEX
248* LIIP1: local index I+1: The local row number for row INDEX+1
249* LIJP1: local index J+1: The local col number for col INDEX+1
250* LTLI: lower triangular local index I: The local row for the
251* upper left entry in tril( A(INDEX, INDEX) )
252* LTLIP1: lower triangular local index I+1: The local row for the
253* upper left entry in tril( A(INDEX+1, INDEX+1) )
254*
255* Details: The distinction between LII and LTLI (and between
256* LIIP1 and LTLIP1) is subtle. Within the current processor
257* column (i.e. MYCOL .eq. CURCOL) they are the same. However,
258* on some processors, A( LII, LIJ ) points to an element
259* above the diagonal, on these processors, LTLI = LII+1.
260*
261* The following variables give the number of rows and/or columns
262* in various matrices:
263* NP: The number of local rows in A( 1:N, 1:N )
264* NQ: The number of local columns in A( 1:N, 1:N )
265* NPM0: The number of local rows in A( INDEX:N, INDEX:N )
266* NQM0: The number of local columns in A( INDEX:N, INDEX:N )
267* NPM1: The number of local rows in A( INDEX+1:N, INDEX:N )
268* NQM1: The number of local columns in A( INDEX+1:N, INDEX:N )
269* LTNM0: The number of local rows & columns in
270* tril( A( INDEX:N, INDEX:N ) )
271* LTNM1: The number of local rows & columns in
272* tril( A( INDEX+1:N, INDEX+1:N ) )
273* NOTE: LTNM0 == LTNM1 on all processors except the diagonal
274* processors, i.e. those where MYCOL == MYROW.
275*
276* Invariants:
277* NP = NPM0 + LII - 1
278* NQ = NQM0 + LIJ - 1
279* NP = NPM1 + LIIP1 - 1
280* NQ = NQM1 + LIJP1 - 1
281* NP = LTLI + LTNM0 - 1
282* NP = LTLIP1 + LTNM1 - 1
283*
284* Temporary variables. The following variables are used within
285* a few lines after they are set and do hold state from one loop
286* iteration to the next:
287*
288* The matrix A:
289* The matrix A does not hold the same values that it would
290* in an unblocked code nor the values that it would hold in
291* in a blocked code.
292*
293* The value of A is confusing. It is easiest to state the
294* difference between trueA and A at the point that MVR2 is called,
295* so we will start there.
296*
297* Let trueA be the value that A would
298* have at a given point in an unblocked code and A
299* be the value that A has in this code at the same point.
300*
301* At the time of the call to MVR2,
302* trueA = A + V' * H + H' * V
303* where H = H( MAXINDEX:N, 1:BINDEX ) and
304* V = V( MAXINDEX:N, 1:BINDEX ).
305*
306* At the bottom of the inner loop,
307* trueA = A + V' * H + H' * V + v' * h + h' * v
308* where H = H( MAXINDEX:N, 1:BINDEX ) and
309* V = V( MAXINDEX:N, 1:BINDEX ) and
310* v = V( liip1:N, BINDEX+1 ) and
311* h = H( liip1:N, BINDEX+1 )
312*
313* At the top of the loop, BINDEX gets incremented, hence:
314* trueA = A + V' * H + H' * V + v' * h + h' * v
315* where H = H( MAXINDEX:N, 1:BINDEX-1 ) and
316* V = V( MAXINDEX:N, 1:BINDEX-1 ) and
317* v = V( liip1:N, BINDEX ) and
318* h = H( liip1:N, BINDEX )
319*
320*
321* A gets updated at the bottom of the outer loop
322* After this update, trueA = A + v' * h + h' * v
323* where v = V( liip1:N, BINDEX ) and
324* h = H( liip1:N, BINDEX ) and BINDEX = 0
325* Indeed, the previous loop invariant as stated above for the
326* top of the loop still holds, but with BINDEX = 0, H and V
327* are null matrices.
328*
329* After the current column of A is updated,
330* trueA( INDEX, INDEX:N ) = A( INDEX, INDEX:N )
331* the rest of A is untouched.
332*
333* After the current block column of A is updated,
334* trueA = A + V' * H + H' * V
335* where H = H( MAXINDEX:N, 1:BINDEX ) and
336* V = V( MAXINDEX:N, 1:BINDEX )
337*
338* This brings us back to the point at which mvr2 is called.
339*
340*
341* Details of the parallelization:
342*
343* We delay spreading v across to all processor columns (which
344* would naturally happen at the bottom of the loop) in order to
345* combine the spread of v( : , i-1 ) with the spread of h( : , i )
346*
347* In order to compute h( :, i ), we must update A( :, i )
348* which means that the processor column owning A( :, i ) must
349* have: c, tau, v( i, i ) and h( i, i ).
350*
351* The traditional
352* way of computing v (and the one used in pzlatrd.f and
353* zlatrd.f) is:
354* v = tau * v
355* c = v' * h
356* alpha = - tau * c / 2
357* v = v + alpha * h
358* However, the traditional way of computing v requires that tau
359* be broadcast to all processors in the current column (to compute
360* v = tau * v) and then a sum-to-all is required (to
361* compute v' * h ). We use the following formula instead:
362* c = v' * h
363* v = tau * ( v - c * tau' * h / 2 )
364* The above formula allows tau to be spread down in the
365* same call to SGSUM2D which performs the sum-to-all of c.
366*
367* The computation of v, which could be performed in any processor
368* column (or other procesor subsets), is performed in the
369* processor column that owns A( :, i+1 ) so that A( :, i+1 )
370* can be updated prior to spreading v across.
371*
372* We keep the block column of A up-to-date to minimize the
373* work required in updating the current column of A. Updating
374* the block column of A is reasonably load balanced whereas
375* updating the current column of A is not (only the current
376* processor column is involved).
377*
378* In the following overview of the steps performed, M in the
379* margin indicates message traffic and C indicates O(n^2 nb/sqrt(p))
380* or more flops per processor.
381*
382* Inner loop:
383* A( index:n, index ) -= ( v * ht(bindex) + h * vt( bindex) )
384*M h = house( A(index:n, index) )
385*M Spread v, h across
386*M vt = v^T; ht = h^T
387* A( index+1:n, index+1:maxindex ) -=
388* ( v * ht(index+1:maxindex) + h *vt(index+1:maxindex) )
389*C v = tril(A) * h; vt = ht * tril(A,-1)
390*MorC v = v - H*V*h - V*H*h
391*M v = v + vt^T
392*M c = v' * h
393* v = tau * ( v - c * tau' * h / 2 )
394*C A = A - H*V - V*H
395*
396*
397*
398* =================================================================
399*
400* .. Parameters ..
401 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
402 $ MB_, NB_, RSRC_, CSRC_, LLD_
403 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
404 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
405 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
406 REAL ONE
407 parameter( one = 1.0e0 )
408 REAL Z_ONE, Z_NEGONE, Z_ZERO
409 parameter( z_one = 1.0e0, z_negone = -1.0e0,
410 $ z_zero = 0.0e0 )
411 REAL ZERO
412 parameter( zero = 0.0e+0 )
413* ..
414*
415*
416* .. Local Scalars ..
417*
418*
419 LOGICAL BALANCED, INTERLEAVE, TWOGEMMS, UPPER
420 INTEGER ANB, BINDEX, CURCOL, CURROW, I, ICTXT, INDEX,
421 $ INDEXA, INDEXINH, INDEXINV, INH, INHB, INHT,
422 $ INHTB, INTMP, INV, INVB, INVT, INVTB, J, LDA,
423 $ LDV, LDZG, LII, LIIB, LIIP1, LIJ, LIJB, LIJP1,
424 $ LTLIP1, LTNM1, LWMIN, MAXINDEX, MININDEX,
425 $ MYCOL, MYFIRSTROW, MYROW, MYSETNUM, NBZG, NP,
426 $ NPB, NPCOL, NPM0, NPM1, NPROW, NPS, NPSET, NQ,
427 $ NQB, NQM1, NUMROWS, NXTCOL, NXTROW, PBMAX,
428 $ PBMIN, PBSIZE, PNB, ROWSPERPROC
429 REAL ALPHA, BETA, C, NORM, ONEOVERBETA, SAFMAX,
430 $ SAFMIN, TOPH, TOPNV, TOPTAU, TOPV, TTOPH, TTOPV
431* ..
432* .. Local Arrays ..
433*
434*
435*
436*
437 INTEGER IDUM1( 1 ), IDUM2( 1 )
438 REAL CC( 3 ), DTMP( 5 )
439* ..
440* .. External Subroutines ..
443 $ sgemv, sgerv2d, sgesd2d, sgsum2d, slamov,
444 $ sscal, strmvt
445* ..
446* .. External Functions ..
447*
448 LOGICAL LSAME
449 INTEGER ICEIL, NUMROC, PJLAENV
450 REAL PSLAMCH, SNRM2
451 EXTERNAL lsame, iceil, numroc, pjlaenv, pslamch, snrm2
452* ..
453* .. Intrinsic Functions ..
454 INTRINSIC ichar, max, min, mod, real, sign, sqrt
455* ..
456*
457*
458* .. Executable Statements ..
459* This is just to keep ftnchek and toolpack/1 happy
460 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
461 $ rsrc_.LT.0 )RETURN
462*
463*
464*
465* Further details
466* ===============
467*
468* At the top of the loop, v and nh have been computed but not
469* spread across. Hence, A is out-of-date even after the
470* rank 2k update. Furthermore, we compute the next v before
471* nh is spread across.
472*
473* I claim that if we used a sum-to-all on NV, by summing CC within
474* each column, that we could compute NV locally and could avoid
475* spreading V across. Bruce claims that sum-to-all can be made
476* to cost no more than sum-to-one on the Paragon. If that is
477* true, this would be a win. But,
478* the BLACS sum-to-all is just a sum-to-one followed by a broadcast,
479* and hence the present scheme is better for now.
480*
481* Get grid parameters
482*
483 ictxt = desca( ctxt_ )
484 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
485*
486 safmax = sqrt( pslamch( ictxt, 'O' ) ) / n
487 safmin = sqrt( pslamch( ictxt, 'S' ) )
488*
489* Test the input parameters
490*
491 info = 0
492 IF( nprow.EQ.-1 ) THEN
493 info = -( 600+ctxt_ )
494 ELSE
495*
496* Here we set execution options for PSSYTTRD
497*
498 pnb = pjlaenv( ictxt, 2, 'PSSYTTRD', 'L', 0, 0, 0, 0 )
499 anb = pjlaenv( ictxt, 3, 'PSSYTTRD', 'L', 0, 0, 0, 0 )
500*
501 interleave = ( pjlaenv( ictxt, 4, 'PSSYTTRD', 'L', 1, 0, 0,
502 $ 0 ).EQ.1 )
503 twogemms = ( pjlaenv( ictxt, 4, 'PSSYTTRD', 'L', 2, 0, 0,
504 $ 0 ).EQ.1 )
505 balanced = ( pjlaenv( ictxt, 4, 'PSSYTTRD', 'L', 3, 0, 0,
506 $ 0 ).EQ.1 )
507*
508 CALL chk1mat( n, 2, n, 2, ia, ja, desca, 6, info )
509*
510*
511 upper = lsame( uplo, 'u' )
512.EQ..AND..NE. IF( INFO0 DESCA( NB_ )1 )
513 $ INFO = 600 + NB_
514.EQ. IF( INFO0 ) THEN
515*
516*
517* Here is the arithmetic:
518* Let maxnpq = max( np, nq, 2 * ANB )
519* LDV = 4 * max( np, nq ) + 2
520* LWMIN = 2 * ( ANB + 1 ) * LDV + MAX( np, 2 * ANB )
521* = 2 * ( ANB + 1 ) * ( 4 * NPS + 2 ) + NPS
522*
523* This overestimates memory requirements when ANB > NP/2
524* Memory requirements are lower when interleave = .false.
525* Hence, we could have two sets of memory requirements,
526* one for interleave and one for
527*
528*
529 NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB )
530 LWMIN = 2*( ANB+1 )*( 4*NPS+2 ) + NPS
531*
532 WORK( 1 ) = REAL( LWMIN )
533.NOT. IF( LSAME( UPLO, 'l' ) ) THEN
534 INFO = -1
535.NE. ELSE IF( IA1 ) THEN
536 INFO = -4
537.NE. ELSE IF( JA1 ) THEN
538 INFO = -5
539.NE. ELSE IF( NPROWNPCOL ) THEN
540 INFO = -( 600+CTXT_ )
541.NE. ELSE IF( DESCA( DTYPE_ )1 ) THEN
542 INFO = -( 600+DTYPE_ )
543.NE. ELSE IF( DESCA( MB_ )1 ) THEN
544 INFO = -( 600+MB_ )
545.NE. ELSE IF( DESCA( NB_ )1 ) THEN
546 INFO = -( 600+NB_ )
547.NE. ELSE IF( DESCA( RSRC_ )0 ) THEN
548 INFO = -( 600+RSRC_ )
549.NE. ELSE IF( DESCA( CSRC_ )0 ) THEN
550 INFO = -( 600+CSRC_ )
551.LT. ELSE IF( LWORKLWMIN ) THEN
552 INFO = -11
553 END IF
554 END IF
555 IF( UPPER ) THEN
556 IDUM1( 1 ) = ICHAR( 'u' )
557 ELSE
558 IDUM1( 1 ) = ICHAR( 'l' )
559 END IF
560 IDUM2( 1 ) = 1
561*
562 CALL PCHK1MAT( N, 2, N, 2, IA, JA, DESCA, 6, 1, IDUM1, IDUM2,
563 $ INFO )
564 END IF
565*
566.NE. IF( INFO0 ) THEN
567 CALL PXERBLA( ICTXT, 'pssyttrd', -INFO )
568 RETURN
569 END IF
570*
571* Quick return if possible
572*
573.EQ. IF( N0 )
574 $ RETURN
575*
576*
577*
578* Reduce the lower triangle of sub( A )
579 NP = NUMROC( N, 1, MYROW, 0, NPROW )
580 NQ = NUMROC( N, 1, MYCOL, 0, NPCOL )
581*
582 NXTROW = 0
583 NXTCOL = 0
584*
585 LIIP1 = 1
586 LIJP1 = 1
587 NPM1 = NP
588 NQM1 = NQ
589*
590 LDA = DESCA( LLD_ )
591 ICTXT = DESCA( CTXT_ )
592*
593*
594*
595* Miscellaneous details:
596* Put tau, D and E in the right places
597* Check signs
598* Place all the arrays in WORK, control their placement
599* in memory.
600*
601*
602*
603* Loop invariants
604* A(LIIP1, LIJ) points to the first element of A(I+1,J)
605* NPM1,NQM1 = the number of rows, cols in A( LII+1:N,LIJ+1:N )
606* A(LII:N,LIJ:N) is one step out of date.
607* proc( CURROW, CURCOL ) owns A(LII,LIJ)
608* proc( NXTROW, CURCOL ) owns A(LIIP1,LIJ)
609*
610 INH = 1
611*
612 IF( INTERLEAVE ) THEN
613*
614* H and V are interleaved to minimize memory movement
615* LDV has to be twice as large to accomodate interleaving.
616* In addition, LDV is doubled again to allow v, h and
617* toptau to be spreaad across and transposed in a
618* single communication operation with minimum memory
619* movement.
620*
621* We could reduce LDV back to 2*MAX(NPM1,NQM1)
622* by increasing the memory movement required in
623* the spread and transpose of v, h and toptau.
624* However, since the non-interleaved path already
625* provides a mear minimum memory requirement option,
626* we did not provide this additional path.
627*
628 LDV = 4*( MAX( NPM1, NQM1 ) ) + 2
629*
630 INH = 1
631*
632 INV = INH + LDV / 2
633 INVT = INH + ( ANB+1 )*LDV
634*
635 INHT = INVT + LDV / 2
636 INTMP = INVT + LDV*( ANB+1 )
637*
638 ELSE
639 LDV = MAX( NPM1, NQM1 )
640*
641 INHT = INH + LDV*( ANB+1 )
642 INV = INHT + LDV*( ANB+1 )
643*
644* The code works without this +1, but only because of a
645* coincidence. Without the +1, WORK(INVT) gets trashed, but
646* WORK(INVT) is only used once and when it is used, it is
647* multiplied by WORK( INH ) which is zero. Hence, the fact
648* that WORK(INVT) is trashed has no effect.
649*
650 INVT = INV + LDV*( ANB+1 ) + 1
651 INTMP = INVT + LDV*( 2*ANB )
652*
653 END IF
654*
655.NE. IF( INFO0 ) THEN
656 CALL PXERBLA( ICTXT, 'pssyttrd', -INFO )
657 WORK( 1 ) = REAL( LWMIN )
658 RETURN
659 END IF
660*
661*
662* The satisfies the loop invariant: trueA = A - V * HT - H * VT,
663* (where V, H, VT and HT all have BINDEX+1 rows/columns)
664* the first ANB times through the loop.
665*
666*
667*
668* Setting either ( InH and InHT ) or InV to Z_ZERO
669* is adequate except in the face of NaNs.
670*
671*
672 DO 10 I = 1, NP
673 WORK( INH+I-1 ) = Z_ZERO
674 WORK( INV+I-1 ) = Z_ZERO
675 10 CONTINUE
676 DO 20 I = 1, NQ
677 WORK( INHT+I-1 ) = Z_ZERO
678 20 CONTINUE
679*
680*
681*
682 TOPNV = Z_ZERO
683*
684 LTLIP1 = LIJP1
685 LTNM1 = NPM1
686.GT. IF( MYCOLMYROW ) THEN
687 LTLIP1 = LTLIP1 + 1
688 LTNM1 = LTNM1 - 1
689 END IF
690*
691*
692 DO 210 MININDEX = 1, N - 1, ANB
693*
694*
695 MAXINDEX = MIN( MININDEX+ANB-1, N )
696 LIJB = NUMROC( MAXINDEX, 1, MYCOL, 0, NPCOL ) + 1
697 LIIB = NUMROC( MAXINDEX, 1, MYROW, 0, NPROW ) + 1
698*
699 NQB = NQ - LIJB + 1
700 NPB = NP - LIIB + 1
701 INHTB = INHT + LIJB - 1
702 INVTB = INVT + LIJB - 1
703 INHB = INH + LIIB - 1
704 INVB = INV + LIIB - 1
705*
706*
707*
708*
709 DO 160 INDEX = MININDEX, MIN( MAXINDEX, N-1 )
710*
711 BINDEX = INDEX - MININDEX
712*
713 CURROW = NXTROW
714 CURCOL = NXTCOL
715*
716 NXTROW = MOD( CURROW+1, NPROW )
717 NXTCOL = MOD( CURCOL+1, NPCOL )
718*
719 LII = LIIP1
720 LIJ = LIJP1
721 NPM0 = NPM1
722*
723.EQ. IF( MYROWCURROW ) THEN
724 NPM1 = NPM1 - 1
725 LIIP1 = LIIP1 + 1
726 END IF
727.EQ. IF( MYCOLCURCOL ) THEN
728 NQM1 = NQM1 - 1
729 LIJP1 = LIJP1 + 1
730 LTLIP1 = LTLIP1 + 1
731 LTNM1 = LTNM1 - 1
732 END IF
733*
734*
735*
736*
737* V = NV, VT = NVT, H = NH, HT = NHT
738*
739*
740* Update the current column of A
741*
742*
743.EQ. IF( MYCOLCURCOL ) THEN
744*
745 INDEXA = LII + ( LIJ-1 )*LDA
746 INDEXINV = INV + LII - 1 + ( BINDEX-1 )*LDV
747 INDEXINH = INH + LII - 1 + ( BINDEX-1 )*LDV
748 TTOPH = WORK( INHT+LIJ-1+BINDEX*LDV )
749 TTOPV = TOPNV
750*
751.GT. IF( INDEX1 ) THEN
752 DO 30 I = 0, NPM0 - 1
753* A( INDEXA+I ) = A( INDEXA+I )
754 A( INDEXA+I ) = A( INDEXA+I ) -
755 $ WORK( INDEXINV+LDV+I )*TTOPH -
756 $ WORK( INDEXINH+LDV+I )*TTOPV
757 30 CONTINUE
758 END IF
759*
760*
761 END IF
762*
763*
764.EQ. IF( MYCOLCURCOL ) THEN
765*
766* Compute the householder vector
767*
768.EQ. IF( MYROWCURROW ) THEN
769 DTMP( 2 ) = A( LII+( LIJ-1 )*LDA )
770 ELSE
771 DTMP( 2 ) = ZERO
772 END IF
773.EQ. IF( MYROWNXTROW ) THEN
774 DTMP( 3 ) = A( LIIP1+( LIJ-1 )*LDA )
775 DTMP( 4 ) = ZERO
776 ELSE
777 DTMP( 3 ) = ZERO
778 DTMP( 4 ) = ZERO
779 END IF
780*
781 NORM = SNRM2( NPM1, A( LIIP1+( LIJ-1 )*LDA ), 1 )
782 DTMP( 1 ) = NORM
783*
784* IF DTMP(5) = 1.0, NORM is too large and might cause
785* overflow, hence PSTREECOMB must be called. IF DTMP(5)
786* is zero on output, DTMP(1) can be trusted.
787*
788 DTMP( 5 ) = ZERO
789.GE..OR..LT. IF( DTMP( 1 )SAFMAX DTMP( 1 )SAFMIN ) THEN
790 DTMP( 5 ) = ONE
791 DTMP( 1 ) = ZERO
792 END IF
793*
794 DTMP( 1 ) = DTMP( 1 )*DTMP( 1 )
795 CALL SGSUM2D( ICTXT, 'c', ' ', 5, 1, DTMP, 5, -1,
796 $ CURCOL )
797.EQ. IF( DTMP( 5 )ZERO ) THEN
798 DTMP( 1 ) = SQRT( DTMP( 1 ) )
799 ELSE
800 DTMP( 1 ) = NORM
801 CALL PSTREECOMB( ICTXT, 'c', 1, DTMP, -1, MYCOL,
802 $ SCOMBNRM2 )
803 END IF
804*
805 NORM = DTMP( 1 )
806*
807 D( LIJ ) = DTMP( 2 )
808.EQ..AND..EQ. IF( MYROWCURROW MYCOLCURCOL ) THEN
809 A( LII+( LIJ-1 )*LDA ) = D( LIJ )
810 END IF
811*
812*
813 ALPHA = DTMP( 3 )
814*
815 NORM = SIGN( NORM, ALPHA )
816*
817.EQ. IF( NORMZERO ) THEN
818 TOPTAU = ZERO
819 ELSE
820 BETA = NORM + ALPHA
821 TOPTAU = BETA / NORM
822 ONEOVERBETA = 1.0E0 / BETA
823*
824 CALL SSCAL( NPM1, ONEOVERBETA,
825 $ A( LIIP1+( LIJ-1 )*LDA ), 1 )
826 END IF
827*
828.EQ. IF( MYROWNXTROW ) THEN
829 A( LIIP1+( LIJ-1 )*LDA ) = Z_ONE
830 END IF
831*
832 TAU( LIJ ) = TOPTAU
833 E( LIJ ) = -NORM
834*
835 END IF
836*
837*
838* Spread v, nh, toptau across
839*
840 DO 40 I = 0, NPM1 - 1
841 WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+I ) = A( LIIP1+I+
842 $ ( LIJ-1 )*LDA )
843 40 CONTINUE
844*
845.EQ. IF( MYCOLCURCOL ) THEN
846 WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+NPM1 ) = TOPTAU
847 CALL SGEBS2D( ICTXT, 'r', ' ', NPM1+NPM1+1, 1,
848 $ WORK( INV+LIIP1-1+BINDEX*LDV ),
849 $ NPM1+NPM1+1 )
850 ELSE
851 CALL SGEBR2D( ICTXT, 'r', ' ', NPM1+NPM1+1, 1,
852 $ WORK( INV+LIIP1-1+BINDEX*LDV ),
853 $ NPM1+NPM1+1, MYROW, CURCOL )
854 TOPTAU = WORK( INV+LIIP1-1+BINDEX*LDV+NPM1+NPM1 )
855 END IF
856 DO 50 I = 0, NPM1 - 1
857 WORK( INH+LIIP1-1+( BINDEX+1 )*LDV+I ) = WORK( INV+LIIP1-
858 $ 1+BINDEX*LDV+NPM1+I )
859 50 CONTINUE
860*
861.LT. IF( INDEXN ) THEN
862.EQ..AND..EQ. IF( MYROWNXTROW MYCOLCURCOL )
863 $ A( LIIP1+( LIJ-1 )*LDA ) = E( LIJ )
864 END IF
865*
866* Transpose v, nh
867*
868*
869.EQ. IF( MYROWMYCOL ) THEN
870 DO 60 I = 0, NPM1 + NPM1
871 WORK( INVT+LIJP1-1+BINDEX*LDV+I ) = WORK( INV+LIIP1-1+
872 $ BINDEX*LDV+I )
873 60 CONTINUE
874 ELSE
875 CALL SGESD2D( ICTXT, NPM1+NPM1, 1,
876 $ WORK( INV+LIIP1-1+BINDEX*LDV ), NPM1+NPM1,
877 $ MYCOL, MYROW )
878 CALL SGERV2D( ICTXT, NQM1+NQM1, 1,
879 $ WORK( INVT+LIJP1-1+BINDEX*LDV ), NQM1+NQM1,
880 $ MYCOL, MYROW )
881 END IF
882*
883 DO 70 I = 0, NQM1 - 1
884 WORK( INHT+LIJP1-1+( BINDEX+1 )*LDV+I ) = WORK( INVT+
885 $ LIJP1-1+BINDEX*LDV+NQM1+I )
886 70 CONTINUE
887*
888*
889* Update the current block column of A
890*
891.GT. IF( INDEX1 ) THEN
892 DO 90 J = LIJP1, LIJB - 1
893 DO 80 I = 0, NPM1 - 1
894*
895 A( LIIP1+I+( J-1 )*LDA ) = A( LIIP1+I+( J-1 )*LDA )
896 $ - WORK( INV+LIIP1-1+BINDEX*LDV+I )*
897 $ WORK( INHT+J-1+BINDEX*LDV ) -
898 $ WORK( INH+LIIP1-1+BINDEX*LDV+I )*
899 $ WORK( INVT+J-1+BINDEX*LDV )
900 80 CONTINUE
901 90 CONTINUE
902 END IF
903*
904*
905*
906* Compute NV = A * NHT; NVT = A * NH
907*
908* These two lines are necessary because these elements
909* are not always involved in the calls to STRMVT
910* for two reasons:
911* 1) On diagonal processors, the call to TRMVT
912* involves only LTNM1-1 elements
913* 2) On some processes, NQM1 < LTM1 or LIIP1 < LTLIP1
914* and when the results are combined across all processes,
915* uninitialized values may be included.
916 WORK( INV+LIIP1-1+( BINDEX+1 )*LDV ) = Z_ZERO
917 WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV+NQM1-1 ) = Z_ZERO
918*
919*
920.EQ. IF( MYROWMYCOL ) THEN
921.GT. IF( LTNM11 ) THEN
922 CALL STRMVT( 'l', LTNM1-1,
923 $ A( LTLIP1+1+( LIJP1-1 )*LDA ), LDA,
924 $ WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV ), 1,
925 $ WORK( INH+LTLIP1+1-1+( BINDEX+1 )*LDV ),
926 $ 1, WORK( INV+LTLIP1+1-1+( BINDEX+1 )*
927 $ LDV ), 1, WORK( INHT+LIJP1-1+( BINDEX+
928 $ 1 )*LDV ), 1 )
929 END IF
930 DO 100 I = 1, LTNM1
931 WORK( INVT+LIJP1+I-1-1+( BINDEX+1 )*LDV )
932 $ = WORK( INVT+LIJP1+I-1-1+( BINDEX+1 )*LDV ) +
933 $ A( LTLIP1+I-1+( LIJP1+I-1-1 )*LDA )*
934 $ WORK( INH+LTLIP1+I-1-1+( BINDEX+1 )*LDV )
935 100 CONTINUE
936 ELSE
937.GT. IF( LTNM10 )
938 $ CALL STRMVT( 'l', LTNM1, A( LTLIP1+( LIJP1-1 )*LDA ),
939 $ LDA, WORK( INVT+LIJP1-1+( BINDEX+1 )*
940 $ LDV ), 1, WORK( INH+LTLIP1-1+( BINDEX+
941 $ 1 )*LDV ), 1, WORK( INV+LTLIP1-1+
942 $ ( BINDEX+1 )*LDV ), 1,
943 $ WORK( INHT+LIJP1-1+( BINDEX+1 )*LDV ),
944 $ 1 )
945*
946 END IF
947*
948*
949* We take advantage of the fact that:
950* A * sum( B ) = sum ( A * B ) for matrices A,B
951*
952* trueA = A + V * HT + H * VT
953* hence: (trueA)v = Av' + V * HT * v + H * VT * v
954* VT * v = sum_p_in_NPROW ( VTp * v )
955* H * VT * v = H * sum (VTp * v) = sum ( H * VTp * v )
956*
957* v = v + V * HT * h + H * VT * h
958*
959*
960*
961* tmp = HT * nh1
962 DO 110 I = 1, 2*( BINDEX+1 )
963 WORK( INTMP-1+I ) = 0
964 110 CONTINUE
965*
966 IF( BALANCED ) THEN
967 NPSET = NPROW
968 MYSETNUM = MYROW
969 ROWSPERPROC = ICEIL( NQB, NPSET )
970 MYFIRSTROW = MIN( NQB+1, 1+ROWSPERPROC*MYSETNUM )
971 NUMROWS = MIN( ROWSPERPROC, NQB-MYFIRSTROW+1 )
972*
973*
974* tmp = HT * v
975*
976 CALL SGEMV( 'c', NUMROWS, BINDEX+1, Z_ONE,
977 $ WORK( INHTB+MYFIRSTROW-1 ), LDV,
978 $ WORK( INHTB+MYFIRSTROW-1+( BINDEX+1 )*LDV ),
979 $ 1, Z_ZERO, WORK( INTMP ), 1 )
980* tmp2 = VT * v
981 CALL SGEMV( 'c', NUMROWS, BINDEX+1, Z_ONE,
982 $ WORK( INVTB+MYFIRSTROW-1 ), LDV,
983 $ WORK( INHTB+MYFIRSTROW-1+( BINDEX+1 )*LDV ),
984 $ 1, Z_ZERO, WORK( INTMP+BINDEX+1 ), 1 )
985*
986*
987 CALL SGSUM2D( ICTXT, 'c', ' ', 2*( BINDEX+1 ), 1,
988 $ WORK( INTMP ), 2*( BINDEX+1 ), -1, -1 )
989 ELSE
990* tmp = HT * v
991*
992 CALL SGEMV( 'c', NQB, BINDEX+1, Z_ONE, WORK( INHTB ),
993 $ LDV, WORK( INHTB+( BINDEX+1 )*LDV ), 1,
994 $ Z_ZERO, WORK( INTMP ), 1 )
995* tmp2 = VT * v
996 CALL SGEMV( 'c', NQB, BINDEX+1, Z_ONE, WORK( INVTB ),
997 $ LDV, WORK( INHTB+( BINDEX+1 )*LDV ), 1,
998 $ Z_ZERO, WORK( INTMP+BINDEX+1 ), 1 )
999*
1000 END IF
1001*
1002*
1003*
1004 IF( BALANCED ) THEN
1005 MYSETNUM = MYCOL
1006*
1007 ROWSPERPROC = ICEIL( NPB, NPSET )
1008 MYFIRSTROW = MIN( NPB+1, 1+ROWSPERPROC*MYSETNUM )
1009 NUMROWS = MIN( ROWSPERPROC, NPB-MYFIRSTROW+1 )
1010*
1011 CALL SGSUM2D( ICTXT, 'r', ' ', 2*( BINDEX+1 ), 1,
1012 $ WORK( INTMP ), 2*( BINDEX+1 ), -1, -1 )
1013*
1014*
1015* v = v + V * tmp
1016.GT. IF( INDEX1. ) THEN
1017 CALL SGEMV( 'n', NUMROWS, BINDEX+1, Z_NEGONE,
1018 $ WORK( INVB+MYFIRSTROW-1 ), LDV,
1019 $ WORK( INTMP ), 1, Z_ONE,
1020 $ WORK( INVB+MYFIRSTROW-1+( BINDEX+1 )*
1021 $ LDV ), 1 )
1022*
1023* v = v + H * tmp2
1024 CALL SGEMV( 'n', NUMROWS, BINDEX+1, Z_NEGONE,
1025 $ WORK( INHB+MYFIRSTROW-1 ), LDV,
1026 $ WORK( INTMP+BINDEX+1 ), 1, Z_ONE,
1027 $ WORK( INVB+MYFIRSTROW-1+( BINDEX+1 )*
1028 $ LDV ), 1 )
1029 END IF
1030*
1031 ELSE
1032* v = v + V * tmp
1033 CALL SGEMV( 'n', NPB, BINDEX+1, Z_NEGONE, WORK( INVB ),
1034 $ LDV, WORK( INTMP ), 1, Z_ONE,
1035 $ WORK( INVB+( BINDEX+1 )*LDV ), 1 )
1036*
1037*
1038* v = v + H * tmp2
1039 CALL SGEMV( 'n', NPB, BINDEX+1, Z_NEGONE, WORK( INHB ),
1040 $ LDV, WORK( INTMP+BINDEX+1 ), 1, Z_ONE,
1041 $ WORK( INVB+( BINDEX+1 )*LDV ), 1 )
1042*
1043 END IF
1044*
1045*
1046* Transpose NV and add it back into NVT
1047*
1048.EQ. IF( MYROWMYCOL ) THEN
1049 DO 120 I = 0, NQM1 - 1
1050 WORK( INTMP+I ) = WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV+
1051 $ I )
1052 120 CONTINUE
1053 ELSE
1054 CALL SGESD2D( ICTXT, NQM1, 1,
1055 $ WORK( INVT+LIJP1-1+( BINDEX+1 )*LDV ),
1056 $ NQM1, MYCOL, MYROW )
1057 CALL SGERV2D( ICTXT, NPM1, 1, WORK( INTMP ), NPM1, MYCOL,
1058 $ MYROW )
1059*
1060 END IF
1061 DO 130 I = 0, NPM1 - 1
1062 WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I ) = WORK( INV+LIIP1-
1063 $ 1+( BINDEX+1 )*LDV+I ) + WORK( INTMP+I )
1064 130 CONTINUE
1065*
1066* Sum-to-one NV rowwise (within a row)
1067*
1068 CALL SGSUM2D( ICTXT, 'r', ' ', NPM1, 1,
1069 $ WORK( INV+LIIP1-1+( BINDEX+1 )*LDV ), NPM1,
1070 $ MYROW, NXTCOL )
1071*
1072*
1073* Dot product c = NV * NH
1074* Sum-to-all c within next processor column
1075*
1076*
1077.EQ. IF( MYCOLNXTCOL ) THEN
1078 CC( 1 ) = Z_ZERO
1079 DO 140 I = 0, NPM1 - 1
1080 CC( 1 ) = CC( 1 ) + WORK( INV+LIIP1-1+( BINDEX+1 )*
1081 $ LDV+I )*WORK( INH+LIIP1-1+( BINDEX+1 )*LDV+
1082 $ I )
1083 140 CONTINUE
1084.EQ. IF( MYROWNXTROW ) THEN
1085 CC( 2 ) = WORK( INV+LIIP1-1+( BINDEX+1 )*LDV )
1086 CC( 3 ) = WORK( INH+LIIP1-1+( BINDEX+1 )*LDV )
1087 ELSE
1088 CC( 2 ) = Z_ZERO
1089 CC( 3 ) = Z_ZERO
1090 END IF
1091 CALL SGSUM2D( ICTXT, 'c', ' ', 3, 1, CC, 3, -1, NXTCOL )
1092*
1093 TOPV = CC( 2 )
1094 C = CC( 1 )
1095 TOPH = CC( 3 )
1096*
1097 TOPNV = TOPTAU*( TOPV-C*TOPTAU / 2*TOPH )
1098*
1099*
1100* Compute V = Tau * (V - C * Tau' / 2 * H )
1101*
1102*
1103 DO 150 I = 0, NPM1 - 1
1104 WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I ) = TOPTAU*
1105 $ ( WORK( INV+LIIP1-1+( BINDEX+1 )*LDV+I )-C*TOPTAU /
1106 $ 2*WORK( INH+LIIP1-1+( BINDEX+1 )*LDV+I ) )
1107 150 CONTINUE
1108*
1109 END IF
1110*
1111*
1112 160 CONTINUE
1113*
1114*
1115* Perform the rank2k update
1116*
1117.LT. IF( MAXINDEXN ) THEN
1118*
1119 DO 170 I = 0, NPM1 - 1
1120 WORK( INTMP+I ) = WORK( INH+LIIP1-1+ANB*LDV+I )
1121 170 CONTINUE
1122*
1123*
1124*
1125.NOT. IF( TWOGEMMS ) THEN
1126 IF( INTERLEAVE ) THEN
1127 LDZG = LDV / 2
1128 ELSE
1129 CALL SLAMOV( 'a', LTNM1, ANB, WORK( INHT+LIJP1-1 ),
1130 $ LDV, WORK( INVT+LIJP1-1+ANB*LDV ), LDV )
1131*
1132 CALL SLAMOV( 'a', LTNM1, ANB, WORK( INV+LTLIP1-1 ),
1133 $ LDV, WORK( INH+LTLIP1-1+ANB*LDV ), LDV )
1134 LDZG = LDV
1135 END IF
1136 NBZG = ANB*2
1137 ELSE
1138 LDZG = LDV
1139 NBZG = ANB
1140 END IF
1141*
1142*
1143 DO 180 PBMIN = 1, LTNM1, PNB
1144*
1145 PBSIZE = MIN( PNB, LTNM1-PBMIN+1 )
1146 PBMAX = MIN( LTNM1, PBMIN+PNB-1 )
1147 CALL SGEMM( 'n', 'c', PBSIZE, PBMAX, NBZG, Z_NEGONE,
1148 $ WORK( INH+LTLIP1-1+PBMIN-1 ), LDZG,
1149 $ WORK( INVT+LIJP1-1 ), LDZG, Z_ONE,
1150 $ A( LTLIP1+PBMIN-1+( LIJP1-1 )*LDA ), LDA )
1151 IF( TWOGEMMS ) THEN
1152 CALL SGEMM( 'n', 'c', PBSIZE, PBMAX, ANB, Z_NEGONE,
1153 $ WORK( INV+LTLIP1-1+PBMIN-1 ), LDZG,
1154 $ WORK( INHT+LIJP1-1 ), LDZG, Z_ONE,
1155 $ A( LTLIP1+PBMIN-1+( LIJP1-1 )*LDA ), LDA )
1156 END IF
1157 180 CONTINUE
1158*
1159*
1160*
1161 DO 190 I = 0, NPM1 - 1
1162 WORK( INV+LIIP1-1+I ) = WORK( INV+LIIP1-1+ANB*LDV+I )
1163 WORK( INH+LIIP1-1+I ) = WORK( INTMP+I )
1164 190 CONTINUE
1165 DO 200 I = 0, NQM1 - 1
1166 WORK( INHT+LIJP1-1+I ) = WORK( INHT+LIJP1-1+ANB*LDV+I )
1167 200 CONTINUE
1168*
1169*
1170 END IF
1171*
1172* End of the update A code
1173*
1174 210 CONTINUE
1175*
1176.EQ. IF( MYCOLNXTCOL ) THEN
1177.EQ. IF( MYROWNXTROW ) THEN
1178*
1179 D( NQ ) = A( NP+( NQ-1 )*LDA )
1180*
1181 CALL SGEBS2D( ICTXT, 'c', ' ', 1, 1, D( NQ ), 1 )
1182 ELSE
1183 CALL SGEBR2D( ICTXT, 'c', ' ', 1, 1, D( NQ ), 1, NXTROW,
1184 $ NXTCOL )
1185 END IF
1186 END IF
1187*
1188*
1189*
1190*
1191 WORK( 1 ) = REAL( LWMIN )
1192 RETURN
1193*
1194* End of PSSYTTRD
1195*
1196*
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
subroutine sscal(n, sa, sx, incx)
SSCAL
Definition sscal.f:79
real(wp) function snrm2(n, x, incx)
SNRM2
Definition snrm2.f90:89
subroutine sgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
SGEMV
Definition sgemv.f:156
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
Definition sgemm.f:187
integer function iceil(inum, idenom)
Definition iceil.f:2
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine sgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1072
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition mpi.f:1577
subroutine pxerbla(contxt, srname, info)
Definition mpi.f:1600
subroutine sgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1113
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
real function pslamch(ictxt, cmach)
Definition pcblastst.f:7455
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
integer function pjlaenv(ictxt, ispec, name, opts, n1, n2, n3, n4)
Definition pjlaenv.f:3
subroutine pssyttrd(uplo, n, a, ia, ja, desca, d, e, tau, work, lwork, info)
Definition pssyttrd.f:3
subroutine scombnrm2(x, y)
Definition pstreecomb.f:306
subroutine pstreecomb(ictxt, scope, n, mine, rdest0, cdest0, subptr)
Definition pstreecomb.f:3
subroutine strmvt(uplo, n, t, ldt, x, incx, y, incy, w, incw, z, incz)
Definition strmvt.f:3