OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
dsytrd_sb2st.F File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine dsytrd_sb2st (stage1, vect, uplo, n, kd, ab, ldab, d, e, hous, lhous, work, lwork, info)
 DSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric tridiagonal form T

Function/Subroutine Documentation

◆ dsytrd_sb2st()

subroutine dsytrd_sb2st ( character stage1,
character vect,
character uplo,
integer n,
integer kd,
double precision, dimension( ldab, * ) ab,
integer ldab,
double precision, dimension( * ) d,
double precision, dimension( * ) e,
double precision, dimension( * ) hous,
integer lhous,
double precision, dimension( * ) work,
integer lwork,
integer info )

DSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric tridiagonal form T

Download DSYTRD_SB2ST + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> DSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric
!> tridiagonal form T by a orthogonal similarity transformation:
!> Q**T * A * Q = T.
!> 
Parameters
[in]STAGE1
!>          STAGE1 is CHARACTER*1
!>          = 'N':  : to mention that the stage 1 of the reduction  
!>                  from dense to band using the dsytrd_sy2sb routine
!>                  was not called before this routine to reproduce AB. 
!>                  In other term this routine is called as standalone. 
!>          = 'Y':  : to mention that the stage 1 of the 
!>                  reduction from dense to band using the dsytrd_sy2sb 
!>                  routine has been called to produce AB (e.g., AB is
!>                  the output of dsytrd_sy2sb.
!> 
[in]VECT
!>          VECT is CHARACTER*1
!>          = 'N':  No need for the Housholder representation, 
!>                  and thus LHOUS is of size max(1, 4*N);
!>          = 'V':  the Householder representation is needed to 
!>                  either generate or to apply Q later on, 
!>                  then LHOUS is to be queried and computed.
!>                  (NOT AVAILABLE IN THIS RELEASE).
!> 
[in]UPLO
!>          UPLO is CHARACTER*1
!>          = 'U':  Upper triangle of A is stored;
!>          = 'L':  Lower triangle of A is stored.
!> 
[in]N
!>          N is INTEGER
!>          The order of the matrix A.  N >= 0.
!> 
[in]KD
!>          KD is INTEGER
!>          The number of superdiagonals of the matrix A if UPLO = 'U',
!>          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
!> 
[in,out]AB
!>          AB is DOUBLE PRECISION array, dimension (LDAB,N)
!>          On entry, the upper or lower triangle of the symmetric band
!>          matrix A, stored in the first KD+1 rows of the array.  The
!>          j-th column of A is stored in the j-th column of the array AB
!>          as follows:
!>          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
!>          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
!>          On exit, the diagonal elements of AB are overwritten by the
!>          diagonal elements of the tridiagonal matrix T; if KD > 0, the
!>          elements on the first superdiagonal (if UPLO = 'U') or the
!>          first subdiagonal (if UPLO = 'L') are overwritten by the
!>          off-diagonal elements of T; the rest of AB is overwritten by
!>          values generated during the reduction.
!> 
[in]LDAB
!>          LDAB is INTEGER
!>          The leading dimension of the array AB.  LDAB >= KD+1.
!> 
[out]D
!>          D is DOUBLE PRECISION array, dimension (N)
!>          The diagonal elements of the tridiagonal matrix T.
!> 
[out]E
!>          E is DOUBLE PRECISION array, dimension (N-1)
!>          The off-diagonal elements of the tridiagonal matrix T:
!>          E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
!> 
[out]HOUS
!>          HOUS is DOUBLE PRECISION array, dimension LHOUS, that
!>          store the Householder representation.
!> 
[in]LHOUS
!>          LHOUS is INTEGER
!>          The dimension of the array HOUS. LHOUS = MAX(1, dimension)
!>          If LWORK = -1, or LHOUS=-1,
!>          then a query is assumed; the routine
!>          only calculates the optimal size of the HOUS array, returns
!>          this value as the first entry of the HOUS array, and no error
!>          message related to LHOUS is issued by XERBLA.
!>          LHOUS = MAX(1, dimension) where
!>          dimension = 4*N if VECT='N'
!>          not available now if VECT='H'     
!> 
[out]WORK
!>          WORK is DOUBLE PRECISION array, dimension LWORK.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK. LWORK = MAX(1, dimension)
!>          If LWORK = -1, or LHOUS=-1,
!>          then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!>          LWORK = MAX(1, dimension) where
!>          dimension   = (2KD+1)*N + KD*NTHREADS
!>          where KD is the blocking size of the reduction,
!>          FACTOPTNB is the blocking used by the QR or LQ
!>          algorithm, usually FACTOPTNB=128 is a good choice
!>          NTHREADS is the number of threads used when
!>          openMP compilation is enabled, otherwise =1.
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit
!>          < 0:  if INFO = -i, the i-th argument had an illegal value
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
!>
!>  Implemented by Azzam Haidar.
!>
!>  All details are available on technical report, SC11, SC13 papers.
!>
!>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
!>  Parallel reduction to condensed forms for symmetric eigenvalue problems
!>  using aggregated fine-grained and memory-aware kernels. In Proceedings
!>  of 2011 International Conference for High Performance Computing,
!>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
!>  Article 8 , 11 pages.
!>  http://doi.acm.org/10.1145/2063384.2063394
!>
!>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
!>  An improved parallel singular value algorithm and its implementation 
!>  for multicore hardware, In Proceedings of 2013 International Conference
!>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
!>  Denver, Colorado, USA, 2013.
!>  Article 90, 12 pages.
!>  http://doi.acm.org/10.1145/2503210.2503292
!>
!>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
!>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
!>  calculations based on fine-grained memory aware tasks.
!>  International Journal of High Performance Computing Applications.
!>  Volume 28 Issue 2, Pages 196-209, May 2014.
!>  http://hpc.sagepub.com/content/28/2/196 
!>
!> 

Definition at line 228 of file dsytrd_sb2st.F.

230*
231#if defined(_OPENMP)
232 use omp_lib
233#endif
234*
235 IMPLICIT NONE
236*
237* -- LAPACK computational routine --
238* -- LAPACK is a software package provided by Univ. of Tennessee, --
239* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
240*
241* .. Scalar Arguments ..
242 CHARACTER STAGE1, UPLO, VECT
243 INTEGER N, KD, LDAB, LHOUS, LWORK, INFO
244* ..
245* .. Array Arguments ..
246 DOUBLE PRECISION D( * ), E( * )
247 DOUBLE PRECISION AB( LDAB, * ), HOUS( * ), WORK( * )
248* ..
249*
250* =====================================================================
251*
252* .. Parameters ..
253 DOUBLE PRECISION RZERO
254 DOUBLE PRECISION ZERO, ONE
255 parameter( rzero = 0.0d+0,
256 $ zero = 0.0d+0,
257 $ one = 1.0d+0 )
258* ..
259* .. Local Scalars ..
260 LOGICAL LQUERY, WANTQ, UPPER, AFTERS1
261 INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST,
262 $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED,
263 $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID,
264 $ NBTILES, TTYPE, TID, NTHREADS, DEBUG,
265 $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS,
266 $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU,
267 $ SIDEV, SIZETAU, LDV, LHMIN, LWMIN
268* ..
269* .. External Subroutines ..
271* ..
272* .. Intrinsic Functions ..
273 INTRINSIC min, max, ceiling, real
274* ..
275* .. External Functions ..
276 LOGICAL LSAME
277 INTEGER ILAENV2STAGE
278 EXTERNAL lsame, ilaenv2stage
279* ..
280* .. Executable Statements ..
281*
282* Determine the minimal workspace size required.
283* Test the input parameters
284*
285 debug = 0
286 info = 0
287 afters1 = lsame( stage1, 'Y' )
288 wantq = lsame( vect, 'V' )
289 upper = lsame( uplo, 'U' )
290 lquery = ( lwork.EQ.-1 ) .OR. ( lhous.EQ.-1 )
291*
292* Determine the block size, the workspace size and the hous size.
293*
294 ib = ilaenv2stage( 2, 'DSYTRD_SB2ST', vect, n, kd, -1, -1 )
295 lhmin = ilaenv2stage( 3, 'DSYTRD_SB2ST', vect, n, kd, ib, -1 )
296 lwmin = ilaenv2stage( 4, 'DSYTRD_SB2ST', vect, n, kd, ib, -1 )
297*
298 IF( .NOT.afters1 .AND. .NOT.lsame( stage1, 'N' ) ) THEN
299 info = -1
300 ELSE IF( .NOT.lsame( vect, 'N' ) ) THEN
301 info = -2
302 ELSE IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
303 info = -3
304 ELSE IF( n.LT.0 ) THEN
305 info = -4
306 ELSE IF( kd.LT.0 ) THEN
307 info = -5
308 ELSE IF( ldab.LT.(kd+1) ) THEN
309 info = -7
310 ELSE IF( lhous.LT.lhmin .AND. .NOT.lquery ) THEN
311 info = -11
312 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
313 info = -13
314 END IF
315*
316 IF( info.EQ.0 ) THEN
317 hous( 1 ) = lhmin
318 work( 1 ) = lwmin
319 END IF
320*
321 IF( info.NE.0 ) THEN
322 CALL xerbla( 'DSYTRD_SB2ST', -info )
323 RETURN
324 ELSE IF( lquery ) THEN
325 RETURN
326 END IF
327*
328* Quick return if possible
329*
330 IF( n.EQ.0 ) THEN
331 hous( 1 ) = 1
332 work( 1 ) = 1
333 RETURN
334 END IF
335*
336* Determine pointer position
337*
338 ldv = kd + ib
339 sizetau = 2 * n
340 sidev = 2 * n
341 indtau = 1
342 indv = indtau + sizetau
343 lda = 2 * kd + 1
344 sizea = lda * n
345 inda = 1
346 indw = inda + sizea
347 nthreads = 1
348 tid = 0
349*
350 IF( upper ) THEN
351 apos = inda + kd
352 awpos = inda
353 dpos = apos + kd
354 ofdpos = dpos - 1
355 abdpos = kd + 1
356 abofdpos = kd
357 ELSE
358 apos = inda
359 awpos = inda + kd + 1
360 dpos = apos
361 ofdpos = dpos + 1
362 abdpos = 1
363 abofdpos = 2
364
365 ENDIF
366*
367* Case KD=0:
368* The matrix is diagonal. We just copy it (convert to "real" for
369* real because D is double and the imaginary part should be 0)
370* and store it in D. A sequential code here is better or
371* in a parallel environment it might need two cores for D and E
372*
373 IF( kd.EQ.0 ) THEN
374 DO 30 i = 1, n
375 d( i ) = ( ab( abdpos, i ) )
376 30 CONTINUE
377 DO 40 i = 1, n-1
378 e( i ) = rzero
379 40 CONTINUE
380*
381 hous( 1 ) = 1
382 work( 1 ) = 1
383 RETURN
384 END IF
385*
386* Case KD=1:
387* The matrix is already Tridiagonal. We have to make diagonal
388* and offdiagonal elements real, and store them in D and E.
389* For that, for real precision just copy the diag and offdiag
390* to D and E while for the COMPLEX case the bulge chasing is
391* performed to convert the hermetian tridiagonal to symmetric
392* tridiagonal. A simpler conversion formula might be used, but then
393* updating the Q matrix will be required and based if Q is generated
394* or not this might complicate the story.
395*
396 IF( kd.EQ.1 ) THEN
397 DO 50 i = 1, n
398 d( i ) = ( ab( abdpos, i ) )
399 50 CONTINUE
400*
401 IF( upper ) THEN
402 DO 60 i = 1, n-1
403 e( i ) = ( ab( abofdpos, i+1 ) )
404 60 CONTINUE
405 ELSE
406 DO 70 i = 1, n-1
407 e( i ) = ( ab( abofdpos, i ) )
408 70 CONTINUE
409 ENDIF
410*
411 hous( 1 ) = 1
412 work( 1 ) = 1
413 RETURN
414 END IF
415*
416* Main code start here.
417* Reduce the symmetric band of A to a tridiagonal matrix.
418*
419 thgrsiz = n
420 grsiz = 1
421 shift = 3
422 nbtiles = ceiling( real(n)/real(kd) )
423 stepercol = ceiling( real(shift)/real(grsiz) )
424 thgrnb = ceiling( real(n-1)/real(thgrsiz) )
425*
426 CALL dlacpy( "A", kd+1, n, ab, ldab, work( apos ), lda )
427 CALL dlaset( "A", kd, n, zero, zero, work( awpos ), lda )
428*
429*
430* openMP parallelisation start here
431*
432#if defined(_OPENMP)
433!$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND )
434!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID )
435!$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND )
436!$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK)
437!$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA )
438!$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT )
439!$OMP MASTER
440#endif
441*
442* main bulge chasing loop
443*
444 DO 100 thgrid = 1, thgrnb
445 stt = (thgrid-1)*thgrsiz+1
446 thed = min( (stt + thgrsiz -1), (n-1))
447 DO 110 i = stt, n-1
448 ed = min( i, thed )
449 IF( stt.GT.ed ) EXIT
450 DO 120 m = 1, stepercol
451 st = stt
452 DO 130 sweepid = st, ed
453 DO 140 k = 1, grsiz
454 myid = (i-sweepid)*(stepercol*grsiz)
455 $ + (m-1)*grsiz + k
456 IF ( myid.EQ.1 ) THEN
457 ttype = 1
458 ELSE
459 ttype = mod( myid, 2 ) + 2
460 ENDIF
461
462 IF( ttype.EQ.2 ) THEN
463 colpt = (myid/2)*kd + sweepid
464 stind = colpt-kd+1
465 edind = min(colpt,n)
466 blklastind = colpt
467 ELSE
468 colpt = ((myid+1)/2)*kd + sweepid
469 stind = colpt-kd+1
470 edind = min(colpt,n)
471 IF( ( stind.GE.edind-1 ).AND.
472 $ ( edind.EQ.n ) ) THEN
473 blklastind = n
474 ELSE
475 blklastind = 0
476 ENDIF
477 ENDIF
478*
479* Call the kernel
480*
481#if defined(_OPENMP) && _OPENMP >= 201307
482 IF( ttype.NE.1 ) THEN
483!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
484!$OMP$ DEPEND(in:WORK(MYID-1))
485!$OMP$ DEPEND(out:WORK(MYID))
486 tid = omp_get_thread_num()
487 CALL dsb2st_kernels( uplo, wantq, ttype,
488 $ stind, edind, sweepid, n, kd, ib,
489 $ work( inda ), lda,
490 $ hous( indv ), hous( indtau ), ldv,
491 $ work( indw + tid*kd ) )
492!$OMP END TASK
493 ELSE
494!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
495!$OMP$ DEPEND(out:WORK(MYID))
496 tid = omp_get_thread_num()
497 CALL dsb2st_kernels( uplo, wantq, ttype,
498 $ stind, edind, sweepid, n, kd, ib,
499 $ work( inda ), lda,
500 $ hous( indv ), hous( indtau ), ldv,
501 $ work( indw + tid*kd ) )
502!$OMP END TASK
503 ENDIF
504#else
505 CALL dsb2st_kernels( uplo, wantq, ttype,
506 $ stind, edind, sweepid, n, kd, ib,
507 $ work( inda ), lda,
508 $ hous( indv ), hous( indtau ), ldv,
509 $ work( indw + tid*kd ) )
510#endif
511 IF ( blklastind.GE.(n-1) ) THEN
512 stt = stt + 1
513 EXIT
514 ENDIF
515 140 CONTINUE
516 130 CONTINUE
517 120 CONTINUE
518 110 CONTINUE
519 100 CONTINUE
520*
521#if defined(_OPENMP)
522!$OMP END MASTER
523!$OMP END PARALLEL
524#endif
525*
526* Copy the diagonal from A to D. Note that D is REAL thus only
527* the Real part is needed, the imaginary part should be zero.
528*
529 DO 150 i = 1, n
530 d( i ) = ( work( dpos+(i-1)*lda ) )
531 150 CONTINUE
532*
533* Copy the off diagonal from A to E. Note that E is REAL thus only
534* the Real part is needed, the imaginary part should be zero.
535*
536 IF( upper ) THEN
537 DO 160 i = 1, n-1
538 e( i ) = ( work( ofdpos+i*lda ) )
539 160 CONTINUE
540 ELSE
541 DO 170 i = 1, n-1
542 e( i ) = ( work( ofdpos+(i-1)*lda ) )
543 170 CONTINUE
544 ENDIF
545*
546 hous( 1 ) = lhmin
547 work( 1 ) = lwmin
548 RETURN
549*
550* End of DSYTRD_SB2ST
551*
subroutine dsb2st_kernels(uplo, wantz, ttype, st, ed, sweep, n, nb, ib, a, lda, v, tau, ldvt, work)
DSB2ST_KERNELS
subroutine dlacpy(uplo, m, n, a, lda, b, ldb)
DLACPY copies all or part of one two-dimensional array to another.
Definition dlacpy.f:103
integer function ilaenv2stage(ispec, name, opts, n1, n2, n3, n4)
ILAENV2STAGE
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition dlaset.f:110
subroutine xerbla(srname, info)
XERBLA
Definition xerbla.f:60
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21