OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
zhetrd_hb2st.F
Go to the documentation of this file.
1*> \brief \b ZHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric tridiagonal form T
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download ZHETRD_HB2ST + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd_hb2st.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd_hb2st.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd_hb2st.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE ZHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB,
22* D, E, HOUS, LHOUS, WORK, LWORK, INFO )
23*
24* #if defined(_OPENMP)
25* use omp_lib
26* #endif
27*
28* IMPLICIT NONE
29*
30* .. Scalar Arguments ..
31* CHARACTER STAGE1, UPLO, VECT
32* INTEGER N, KD, IB, LDAB, LHOUS, LWORK, INFO
33* ..
34* .. Array Arguments ..
35* DOUBLE PRECISION D( * ), E( * )
36* COMPLEX*16 AB( LDAB, * ), HOUS( * ), WORK( * )
37* ..
38*
39*
40*> \par Purpose:
41* =============
42*>
43*> \verbatim
44*>
45*> ZHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric
46*> tridiagonal form T by a unitary similarity transformation:
47*> Q**H * A * Q = T.
48*> \endverbatim
49*
50* Arguments:
51* ==========
52*
53*> \param[in] STAGE1
54*> \verbatim
55*> STAGE1 is CHARACTER*1
56*> = 'N': "No": to mention that the stage 1 of the reduction
57*> from dense to band using the zhetrd_he2hb routine
58*> was not called before this routine to reproduce AB.
59*> In other term this routine is called as standalone.
60*> = 'Y': "Yes": to mention that the stage 1 of the
61*> reduction from dense to band using the zhetrd_he2hb
62*> routine has been called to produce AB (e.g., AB is
63*> the output of zhetrd_he2hb.
64*> \endverbatim
65*>
66*> \param[in] VECT
67*> \verbatim
68*> VECT is CHARACTER*1
69*> = 'N': No need for the Housholder representation,
70*> and thus LHOUS is of size max(1, 4*N);
71*> = 'V': the Householder representation is needed to
72*> either generate or to apply Q later on,
73*> then LHOUS is to be queried and computed.
74*> (NOT AVAILABLE IN THIS RELEASE).
75*> \endverbatim
76*>
77*> \param[in] UPLO
78*> \verbatim
79*> UPLO is CHARACTER*1
80*> = 'U': Upper triangle of A is stored;
81*> = 'L': Lower triangle of A is stored.
82*> \endverbatim
83*>
84*> \param[in] N
85*> \verbatim
86*> N is INTEGER
87*> The order of the matrix A. N >= 0.
88*> \endverbatim
89*>
90*> \param[in] KD
91*> \verbatim
92*> KD is INTEGER
93*> The number of superdiagonals of the matrix A if UPLO = 'U',
94*> or the number of subdiagonals if UPLO = 'L'. KD >= 0.
95*> \endverbatim
96*>
97*> \param[in,out] AB
98*> \verbatim
99*> AB is COMPLEX*16 array, dimension (LDAB,N)
100*> On entry, the upper or lower triangle of the Hermitian band
101*> matrix A, stored in the first KD+1 rows of the array. The
102*> j-th column of A is stored in the j-th column of the array AB
103*> as follows:
104*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
105*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd).
106*> On exit, the diagonal elements of AB are overwritten by the
107*> diagonal elements of the tridiagonal matrix T; if KD > 0, the
108*> elements on the first superdiagonal (if UPLO = 'U') or the
109*> first subdiagonal (if UPLO = 'L') are overwritten by the
110*> off-diagonal elements of T; the rest of AB is overwritten by
111*> values generated during the reduction.
112*> \endverbatim
113*>
114*> \param[in] LDAB
115*> \verbatim
116*> LDAB is INTEGER
117*> The leading dimension of the array AB. LDAB >= KD+1.
118*> \endverbatim
119*>
120*> \param[out] D
121*> \verbatim
122*> D is DOUBLE PRECISION array, dimension (N)
123*> The diagonal elements of the tridiagonal matrix T.
124*> \endverbatim
125*>
126*> \param[out] E
127*> \verbatim
128*> E is DOUBLE PRECISION array, dimension (N-1)
129*> The off-diagonal elements of the tridiagonal matrix T:
130*> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
131*> \endverbatim
132*>
133*> \param[out] HOUS
134*> \verbatim
135*> HOUS is COMPLEX*16 array, dimension LHOUS, that
136*> store the Householder representation.
137*> \endverbatim
138*>
139*> \param[in] LHOUS
140*> \verbatim
141*> LHOUS is INTEGER
142*> The dimension of the array HOUS. LHOUS = MAX(1, dimension)
143*> If LWORK = -1, or LHOUS=-1,
144*> then a query is assumed; the routine
145*> only calculates the optimal size of the HOUS array, returns
146*> this value as the first entry of the HOUS array, and no error
147*> message related to LHOUS is issued by XERBLA.
148*> LHOUS = MAX(1, dimension) where
149*> dimension = 4*N if VECT='N'
150*> not available now if VECT='H'
151*> \endverbatim
152*>
153*> \param[out] WORK
154*> \verbatim
155*> WORK is COMPLEX*16 array, dimension LWORK.
156*> \endverbatim
157*>
158*> \param[in] LWORK
159*> \verbatim
160*> LWORK is INTEGER
161*> The dimension of the array WORK. LWORK = MAX(1, dimension)
162*> If LWORK = -1, or LHOUS=-1,
163*> then a workspace query is assumed; the routine
164*> only calculates the optimal size of the WORK array, returns
165*> this value as the first entry of the WORK array, and no error
166*> message related to LWORK is issued by XERBLA.
167*> LWORK = MAX(1, dimension) where
168*> dimension = (2KD+1)*N + KD*NTHREADS
169*> where KD is the blocking size of the reduction,
170*> FACTOPTNB is the blocking used by the QR or LQ
171*> algorithm, usually FACTOPTNB=128 is a good choice
172*> NTHREADS is the number of threads used when
173*> openMP compilation is enabled, otherwise =1.
174*> \endverbatim
175*>
176*> \param[out] INFO
177*> \verbatim
178*> INFO is INTEGER
179*> = 0: successful exit
180*> < 0: if INFO = -i, the i-th argument had an illegal value
181*> \endverbatim
182*
183* Authors:
184* ========
185*
186*> \author Univ. of Tennessee
187*> \author Univ. of California Berkeley
188*> \author Univ. of Colorado Denver
189*> \author NAG Ltd.
190*
191*> \ingroup complex16OTHERcomputational
192*
193*> \par Further Details:
194* =====================
195*>
196*> \verbatim
197*>
198*> Implemented by Azzam Haidar.
199*>
200*> All details are available on technical report, SC11, SC13 papers.
201*>
202*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
203*> Parallel reduction to condensed forms for symmetric eigenvalue problems
204*> using aggregated fine-grained and memory-aware kernels. In Proceedings
205*> of 2011 International Conference for High Performance Computing,
206*> Networking, Storage and Analysis (SC '11), New York, NY, USA,
207*> Article 8 , 11 pages.
208*> http://doi.acm.org/10.1145/2063384.2063394
209*>
210*> A. Haidar, J. Kurzak, P. Luszczek, 2013.
211*> An improved parallel singular value algorithm and its implementation
212*> for multicore hardware, In Proceedings of 2013 International Conference
213*> for High Performance Computing, Networking, Storage and Analysis (SC '13).
214*> Denver, Colorado, USA, 2013.
215*> Article 90, 12 pages.
216*> http://doi.acm.org/10.1145/2503210.2503292
217*>
218*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
219*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
220*> calculations based on fine-grained memory aware tasks.
221*> International Journal of High Performance Computing Applications.
222*> Volume 28 Issue 2, Pages 196-209, May 2014.
223*> http://hpc.sagepub.com/content/28/2/196
224*>
225*> \endverbatim
226*>
227* =====================================================================
228 SUBROUTINE zhetrd_hb2st( STAGE1, VECT, UPLO, N, KD, AB, LDAB,
229 $ D, E, HOUS, LHOUS, WORK, LWORK, INFO )
230*
231*
232#if defined(_OPENMP)
233 use omp_lib
234#endif
235*
236 IMPLICIT NONE
237*
238* -- LAPACK computational routine --
239* -- LAPACK is a software package provided by Univ. of Tennessee, --
240* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
241*
242* .. Scalar Arguments ..
243 CHARACTER STAGE1, UPLO, VECT
244 INTEGER N, KD, LDAB, LHOUS, LWORK, INFO
245* ..
246* .. Array Arguments ..
247 DOUBLE PRECISION D( * ), E( * )
248 COMPLEX*16 AB( LDAB, * ), HOUS( * ), WORK( * )
249* ..
250*
251* =====================================================================
252*
253* .. Parameters ..
254 DOUBLE PRECISION RZERO
255 COMPLEX*16 ZERO, ONE
256 parameter( rzero = 0.0d+0,
257 $ zero = ( 0.0d+0, 0.0d+0 ),
258 $ one = ( 1.0d+0, 0.0d+0 ) )
259* ..
260* .. Local Scalars ..
261 LOGICAL LQUERY, WANTQ, UPPER, AFTERS1
262 INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST,
263 $ ed, stind, edind, blklastind, colpt, thed,
264 $ stepercol, grsiz, thgrsiz, thgrnb, thgrid,
265 $ nbtiles, ttype, tid, nthreads, debug,
266 $ abdpos, abofdpos, dpos, ofdpos, awpos,
267 $ inda, indw, apos, sizea, lda, indv, indtau,
268 $ sizev, sizetau, ldv, lhmin, lwmin
269 DOUBLE PRECISION ABSTMP
270 COMPLEX*16 TMP
271* ..
272* .. External Subroutines ..
274* ..
275* .. Intrinsic Functions ..
276 INTRINSIC min, max, ceiling, dble, real
277* ..
278* .. External Functions ..
279 LOGICAL LSAME
280 INTEGER ILAENV2STAGE
281 EXTERNAL lsame, ilaenv2stage
282* ..
283* .. Executable Statements ..
284*
285* Determine the minimal workspace size required.
286* Test the input parameters
287*
288 debug = 0
289 info = 0
290 afters1 = lsame( stage1, 'Y' )
291 wantq = lsame( vect, 'V' )
292 upper = lsame( uplo, 'U' )
293 lquery = ( lwork.EQ.-1 ) .OR. ( lhous.EQ.-1 )
294*
295* Determine the block size, the workspace size and the hous size.
296*
297 ib = ilaenv2stage( 2, 'ZHETRD_HB2ST', vect, n, kd, -1, -1 )
298 lhmin = ilaenv2stage( 3, 'zhetrd_hb2st', VECT, N, KD, IB, -1 )
299 LWMIN = ILAENV2STAGE( 4, 'zhetrd_hb2st', VECT, N, KD, IB, -1 )
300*
301.NOT..AND..NOT. IF( AFTERS1 LSAME( STAGE1, 'n' ) ) THEN
302 INFO = -1
303.NOT. ELSE IF( LSAME( VECT, 'n' ) ) THEN
304 INFO = -2
305.NOT..AND..NOT. ELSE IF( UPPER LSAME( UPLO, 'l' ) ) THEN
306 INFO = -3
307.LT. ELSE IF( N0 ) THEN
308 INFO = -4
309.LT. ELSE IF( KD0 ) THEN
310 INFO = -5
311.LT. ELSE IF( LDAB(KD+1) ) THEN
312 INFO = -7
313.LT..AND..NOT. ELSE IF( LHOUSLHMIN LQUERY ) THEN
314 INFO = -11
315.LT..AND..NOT. ELSE IF( LWORKLWMIN LQUERY ) THEN
316 INFO = -13
317 END IF
318*
319.EQ. IF( INFO0 ) THEN
320 HOUS( 1 ) = LHMIN
321 WORK( 1 ) = LWMIN
322 END IF
323*
324.NE. IF( INFO0 ) THEN
325 CALL XERBLA( 'zhetrd_hb2st', -INFO )
326 RETURN
327 ELSE IF( LQUERY ) THEN
328 RETURN
329 END IF
330*
331* Quick return if possible
332*
333.EQ. IF( N0 ) THEN
334 HOUS( 1 ) = 1
335 WORK( 1 ) = 1
336 RETURN
337 END IF
338*
339* Determine pointer position
340*
341 LDV = KD + IB
342 SIZETAU = 2 * N
343 SIZEV = 2 * N
344 INDTAU = 1
345 INDV = INDTAU + SIZETAU
346 LDA = 2 * KD + 1
347 SIZEA = LDA * N
348 INDA = 1
349 INDW = INDA + SIZEA
350 NTHREADS = 1
351 TID = 0
352*
353 IF( UPPER ) THEN
354 APOS = INDA + KD
355 AWPOS = INDA
356 DPOS = APOS + KD
357 OFDPOS = DPOS - 1
358 ABDPOS = KD + 1
359 ABOFDPOS = KD
360 ELSE
361 APOS = INDA
362 AWPOS = INDA + KD + 1
363 DPOS = APOS
364 OFDPOS = DPOS + 1
365 ABDPOS = 1
366 ABOFDPOS = 2
367
368 ENDIF
369*
370* Case KD=0:
371* The matrix is diagonal. We just copy it (convert to "real" for
372* complex because D is double and the imaginary part should be 0)
373* and store it in D. A sequential code here is better or
374* in a parallel environment it might need two cores for D and E
375*
376.EQ. IF( KD0 ) THEN
377 DO 30 I = 1, N
378 D( I ) = DBLE( AB( ABDPOS, I ) )
379 30 CONTINUE
380 DO 40 I = 1, N-1
381 E( I ) = RZERO
382 40 CONTINUE
383*
384 HOUS( 1 ) = 1
385 WORK( 1 ) = 1
386 RETURN
387 END IF
388*
389* Case KD=1:
390* The matrix is already Tridiagonal. We have to make diagonal
391* and offdiagonal elements real, and store them in D and E.
392* For that, for real precision just copy the diag and offdiag
393* to D and E while for the COMPLEX case the bulge chasing is
394* performed to convert the hermetian tridiagonal to symmetric
395* tridiagonal. A simpler conversion formula might be used, but then
396* updating the Q matrix will be required and based if Q is generated
397* or not this might complicate the story.
398*
399.EQ. IF( KD1 ) THEN
400 DO 50 I = 1, N
401 D( I ) = DBLE( AB( ABDPOS, I ) )
402 50 CONTINUE
403*
404* make off-diagonal elements real and copy them to E
405*
406 IF( UPPER ) THEN
407 DO 60 I = 1, N - 1
408 TMP = AB( ABOFDPOS, I+1 )
409 ABSTMP = ABS( TMP )
410 AB( ABOFDPOS, I+1 ) = ABSTMP
411 E( I ) = ABSTMP
412.NE. IF( ABSTMPRZERO ) THEN
413 TMP = TMP / ABSTMP
414 ELSE
415 TMP = ONE
416 END IF
417.LT. IF( IN-1 )
418 $ AB( ABOFDPOS, I+2 ) = AB( ABOFDPOS, I+2 )*TMP
419C IF( WANTZ ) THEN
420C CALL ZSCAL( N, DCONJG( TMP ), Q( 1, I+1 ), 1 )
421C END IF
422 60 CONTINUE
423 ELSE
424 DO 70 I = 1, N - 1
425 TMP = AB( ABOFDPOS, I )
426 ABSTMP = ABS( TMP )
427 AB( ABOFDPOS, I ) = ABSTMP
428 E( I ) = ABSTMP
429.NE. IF( ABSTMPRZERO ) THEN
430 TMP = TMP / ABSTMP
431 ELSE
432 TMP = ONE
433 END IF
434.LT. IF( IN-1 )
435 $ AB( ABOFDPOS, I+1 ) = AB( ABOFDPOS, I+1 )*TMP
436C IF( WANTQ ) THEN
437C CALL ZSCAL( N, TMP, Q( 1, I+1 ), 1 )
438C END IF
439 70 CONTINUE
440 ENDIF
441*
442 HOUS( 1 ) = 1
443 WORK( 1 ) = 1
444 RETURN
445 END IF
446*
447* Main code start here.
448* Reduce the hermitian band of A to a tridiagonal matrix.
449*
450 THGRSIZ = N
451 GRSIZ = 1
452 SHIFT = 3
453 NBTILES = CEILING( REAL(N)/REAL(KD) )
454 STEPERCOL = CEILING( REAL(SHIFT)/REAL(GRSIZ) )
455 THGRNB = CEILING( REAL(N-1)/REAL(THGRSIZ) )
456*
457 CALL ZLACPY( "A", KD+1, N, AB, LDAB, WORK( APOS ), LDA )
458 CALL ZLASET( "A", KD, N, ZERO, ZERO, WORK( AWPOS ), LDA )
459*
460*
461* openMP parallelisation start here
462*
463#if defined(_OPENMP)
464!$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND )
465!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID )
466!$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND )
467!$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK)
468!$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA )
469!$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT )
470!$OMP MASTER
471#endif
472*
473* main bulge chasing loop
474*
475 DO 100 THGRID = 1, THGRNB
476 STT = (THGRID-1)*THGRSIZ+1
477 THED = MIN( (STT + THGRSIZ -1), (N-1))
478 DO 110 I = STT, N-1
479 ED = MIN( I, THED )
480.GT. IF( STTED ) EXIT
481 DO 120 M = 1, STEPERCOL
482 ST = STT
483 DO 130 SWEEPID = ST, ED
484 DO 140 K = 1, GRSIZ
485 MYID = (I-SWEEPID)*(STEPERCOL*GRSIZ)
486 $ + (M-1)*GRSIZ + K
487.EQ. IF ( MYID1 ) THEN
488 TTYPE = 1
489 ELSE
490 TTYPE = MOD( MYID, 2 ) + 2
491 ENDIF
492
493.EQ. IF( TTYPE2 ) THEN
494 COLPT = (MYID/2)*KD + SWEEPID
495 STIND = COLPT-KD+1
496 EDIND = MIN(COLPT,N)
497 BLKLASTIND = COLPT
498 ELSE
499 COLPT = ((MYID+1)/2)*KD + SWEEPID
500 STIND = COLPT-KD+1
501 EDIND = MIN(COLPT,N)
502.GE..AND. IF( ( STINDEDIND-1 )
503.EQ. $ ( EDINDN ) ) THEN
504 BLKLASTIND = N
505 ELSE
506 BLKLASTIND = 0
507 ENDIF
508 ENDIF
509*
510* Call the kernel
511*
512#if defined(_OPENMP) && _OPENMP >= 201307
513
514.NE. IF( TTYPE1 ) THEN
515!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
516!$OMP$ DEPEND(in:WORK(MYID-1))
517!$OMP$ DEPEND(out:WORK(MYID))
518 TID = OMP_GET_THREAD_NUM()
519 CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE,
520 $ STIND, EDIND, SWEEPID, N, KD, IB,
521 $ WORK ( INDA ), LDA,
522 $ HOUS( INDV ), HOUS( INDTAU ), LDV,
523 $ WORK( INDW + TID*KD ) )
524!$OMP END TASK
525 ELSE
526!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
527!$OMP$ DEPEND(out:WORK(MYID))
528 TID = OMP_GET_THREAD_NUM()
529 CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE,
530 $ STIND, EDIND, SWEEPID, N, KD, IB,
531 $ WORK ( INDA ), LDA,
532 $ HOUS( INDV ), HOUS( INDTAU ), LDV,
533 $ WORK( INDW + TID*KD ) )
534!$OMP END TASK
535 ENDIF
536#else
537 CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE,
538 $ STIND, EDIND, SWEEPID, N, KD, IB,
539 $ WORK ( INDA ), LDA,
540 $ HOUS( INDV ), HOUS( INDTAU ), LDV,
541 $ WORK( INDW + TID*KD ) )
542#endif
543.GE. IF ( BLKLASTIND(N-1) ) THEN
544 STT = STT + 1
545 EXIT
546 ENDIF
547 140 CONTINUE
548 130 CONTINUE
549 120 CONTINUE
550 110 CONTINUE
551 100 CONTINUE
552*
553#if defined(_OPENMP)
554!$OMP END MASTER
555!$OMP END PARALLEL
556#endif
557*
558* Copy the diagonal from A to D. Note that D is REAL thus only
559* the Real part is needed, the imaginary part should be zero.
560*
561 DO 150 I = 1, N
562 D( I ) = DBLE( WORK( DPOS+(I-1)*LDA ) )
563 150 CONTINUE
564*
565* Copy the off diagonal from A to E. Note that E is REAL thus only
566* the Real part is needed, the imaginary part should be zero.
567*
568 IF( UPPER ) THEN
569 DO 160 I = 1, N-1
570 E( I ) = DBLE( WORK( OFDPOS+I*LDA ) )
571 160 CONTINUE
572 ELSE
573 DO 170 I = 1, N-1
574 E( I ) = DBLE( WORK( OFDPOS+(I-1)*LDA ) )
575 170 CONTINUE
576 ENDIF
577*
578 HOUS( 1 ) = LHMIN
579 WORK( 1 ) = LWMIN
580 RETURN
581*
582* End of ZHETRD_HB2ST
583*
584 END
585
subroutine xerbla(srname, info)
XERBLA
Definition xerbla.f:60
subroutine zlacpy(uplo, m, n, a, lda, b, ldb)
ZLACPY copies all or part of one two-dimensional array to another.
Definition zlacpy.f:103
subroutine zlaset(uplo, m, n, alpha, beta, a, lda)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition zlaset.f:106
subroutine zhetrd_hb2st(stage1, vect, uplo, n, kd, ab, ldab, d, e, hous, lhous, work, lwork, info)
ZHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric tridiagonal form T
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine zhb2st_kernels(uplo, wantz, ttype, st, ed, sweep, n, nb, ib, a, lda, v, tau, ldvt, work)
ZHB2ST_KERNELS