1 SUBROUTINE pzttrdtester( IAM, NPROCS, CHECK, NOUT, THRESH, NVAL,
2 $ NMAT, MEM, TOTMEM, KPASS, KFAIL, KSKIP )
11 INTEGER IAM, KFAIL, KPASS, KSKIP, NMAT, , NPROCS,
71 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
72 $ mb_, nb_, rsrc_, csrc_, lld_
73 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
74 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
75 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
76 INTEGER DBLESZ, ZPLXSZ
78 parameter( dblesz = 8, zplxsz = 16,
79 $ padval = ( -9923.0d+0, -9924.0d+0 ) )
81 parameter( timetests = 11 )
83 parameter( tests = 8 )
85 parameter( mintimen = 8 )
91 INTEGER DUMMY, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD,
92 $ ipe, ipostpad, iprepad, ipt, ipw, itemp, j, k,
93 $ lcm, lwmin, maxtests, memsiz, mycol, myrow, n,
94 $ nb, ndiag, ngrids, nn, noffd, np, npcol, nprow,
95 $ nps, nq, splitstimed, worksiz, worktrd
96 DOUBLE PRECISION ANORM, FRESID, NOPS, TMFLOPS
99 INTEGER ANBTEST( TESTS ), ANBTIME( TIMETESTS ),
100 $ baltest( tests ), baltime( timetests ),
101 $ desca( dlen_ ), descd( dlen_ ), ierr( 1 ),
102 $ intertest( tests ), intertime( timetests ),
103 $ pnbtest( tests ), pnbtime( timetests ),
104 $ twogemmtest( tests ), twogemmtime( timetests )
105 DOUBLE PRECISION CTIME( 100 ), WTIME( 100 )
116 INTEGER ICEIL, ILCM, , PJLAENV
117 DOUBLE PRECISION PZLANHE
118 EXTERNAL lsame, iceil, ilcm,
numroc, pjlaenv, pzlanhe
121 INTRINSIC dble, int,
max, sqrt
125 INTEGER ANB, BALANCED, BCKBLOCK, GSTBLOCK, INTERLEAVE,
126 $ lltblock, minsz, pnb, timeinternals, timing,
130 COMMON / blocksizes / gstblock, lltblock, bckblock,
132 COMMON / minsize / minsz
133 COMMON / pjlaenvtiming / timing
134 COMMON / tailoredopts / pnb, anb, interleave,
136 COMMON / timecontrol / timeinternals
139 DATA baltime / 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0 /
140 DATA intertime / 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1 /
141 DATA twogemmtime / 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0 /
142 DATA anbtime / 16, 16, 16, 16, 16, 8, 8, 32, 32, 16,
144 DATA pnbtime / 32, 32, 32, 32, 32, 32, 32, 32, 32,
146 DATA baltest / 0, 0, 0, 0, 1, 1, 1, 1 /
147 DATA intertest / 0, 0, 1, 1, 0, 0, 1, 1 /
148 DATA twogemmtest / 0, 1, 0, 1, 0, 1, 0, 1 /
149 DATA anbtest / 1, 2, 3, 16, 1, 2, 3, 16 /
150 DATA pnbtest / 1, 16, 8, 1, 16, 8, 1, 16 /
154 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
162 memsiz = totmem / zplxsz
167 WRITE( nout, fmt = * )
168 WRITE( nout, fmt = 9995 )
169 WRITE( nout, fmt = 9994 )
170 WRITE( nout, fmt = 9993 )
171 WRITE( nout, fmt = * )
176 ngrids = int( sqrt( dble( nprocs ) ) )
186 CALL blacs_get( -1, 0, ictxt )
192 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
204 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
210 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
212 IF( ierr( 1 ).GT.0 )
THEN
214 $
WRITE( nout, fmt = 9997 )
'matrix'
221 IF( n.GT.mintimen )
THEN
232 maxtests = timetests + 2
239 DO 10 k = 1, maxtests
242 IF( k.GE.maxtests-1 )
THEN
258 dummy = pjlaenv( ictxt, 3,
'PZHETTRD',
'L', 0, 0,
265 balanced = baltime( k )
266 interleave = intertime( k )
267 twogemms = twogemmtime( k )
274 balanced = baltest( k )
275 interleave = intertest( k )
276 twogemms = twogemmtest( k )
284 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
285 CALL igebs2d( ictxt,
'All',
' ', 1, 1, splitstimed,
288 CALL igebr2d( ictxt,
'All',
' ', 1, 1, splitstimed, 1,
293 IF( splitstimed.EQ.0 .AND. k.EQ.maxtests )
305 np =
numroc( n, nb, myrow, 0, nprow )
306 nq =
numroc( n, nb, mycol, 0, npcol )
308 iprepad =
max( nb, np )
310 ipostpad =
max( nb, nq )
320 CALL descinit( desca, n, n, nb, nb, 0, 0, ictxt,
321 $
max( 1, np )+imidpad, ierr( 1 ) )
323 CALL descinit( descd, 1, n, nb, nb, 0, 0, ictxt, 1,
328 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
330 IF( ierr( 1 ).LT.0 )
THEN
332 $
WRITE( nout, fmt = 9997 )
'descriptor'
341 IF( lsame( uplo,
'U' ) )
THEN
344 noffd =
numroc( n-1, nb, mycol, 0, npcol )
346 ndiag = iceil( dblesz*ndiag, zplxsz )
347 noffd = iceil( dblesz*noffd, zplxsz )
350 ipd = ipa + desca( lld_ )*nq + ipostpad + iprepad
351 ipe = ipd + ndiag + ipostpad + iprepad
352 ipt = ipe + noffd + ipostpad + iprepad
353 ipw = ipt + nq + ipostpad + iprepad
358 nps =
max(
numroc( n, 1, 0, 0, nprow ), 2*anb )
359 lwmin = 2*( anb+1 )*( 4*nps+2 ) + nps
361 worktrd = lwmin + ipostpad
368 IF( nprow.NE.npcol )
THEN
369 lcm = ilcm( nprow, npcol )
370 itemp = nb*iceil( iceil( np, nb ), lcm / nprow ) +
373 itemp =
max( iceil( dblesz*itemp, zplxsz ),
375 worksiz =
max( lwmin, itemp ) + ipostpad
381 IF( ipw+worksiz.GT.memsiz )
THEN
383 $
WRITE( nout, fmt = 9996 )
'Tridiagonal reduction',
384 $ ( ipw+worksiz )*zplxsz
390 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
392 IF( ierr( 1 ).GT.0 )
THEN
394 $
WRITE( nout, fmt = 9997 )
'MEMORY'
403 CALL pzmatgen( ictxt,
'Hemm', 'n
', DESCA( M_ ),
404 $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ),
405 $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ),
406 $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ,
407 $ MYROW, MYCOL, NPROW, NPCOL )
413 CALL PZFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ),
414 $ DESCA( LLD_ ), IPREPAD, IPOSTPAD,
416 CALL PZFILLPAD( ICTXT, NDIAG, 1, MEM( IPD-IPREPAD ),
417 $ NDIAG, IPREPAD, IPOSTPAD, PADVAL )
418 CALL PZFILLPAD( ICTXT, NOFFD, 1, MEM( IPE-IPREPAD ),
419 $ NOFFD, IPREPAD, IPOSTPAD, PADVAL )
420 CALL PZFILLPAD( ICTXT, NQ, 1, MEM( IPT-IPREPAD ), NQ,
421 $ IPREPAD, IPOSTPAD, PADVAL )
422 CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1,
423 $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD,
424 $ IPREPAD, IPOSTPAD, PADVAL )
425 ANORM = PZLANHE( 'i
', UPLO, N, MEM( IPA ), 1, 1,
426 $ DESCA, MEM( IPW ) )
427 CALL PZCHEKPAD( ICTXT, 'pzlanhe
', NP, NQ,
428 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ),
429 $ IPREPAD, IPOSTPAD, PADVAL )
430 CALL PZCHEKPAD( ICTXT, 'pzlanhe
', WORKSIZ-IPOSTPAD, 1,
431 $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD,
432 $ IPREPAD, IPOSTPAD, PADVAL )
433 CALL PZFILLPAD( ICTXT, WORKTRD-IPOSTPAD, 1,
434 $ MEM( IPW-IPREPAD ), WORKTRD-IPOSTPAD,
435 $ IPREPAD, IPOSTPAD, PADVAL )
439 CALL BLACS_BARRIER( ICTXT, 'all
' )
444 CALL PZHETTRD( UPLO, N, MEM( IPA ), 1, 1, DESCA,
445 $ MEM( IPD ), MEM( IPE ), MEM( IPT ),
446 $ MEM( IPW ), LWMIN, INFO )
454 CALL PZCHEKPAD( ICTXT, 'pzhettrd', NP, NQ,
455 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ),
456 $ IPREPAD, IPOSTPAD, PADVAL )
457 CALL PZCHEKPAD( ICTXT, 'pzhettrd', NDIAG, 1,
458 $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD,
461 CALL PZCHEKPAD( ICTXT, 'pzhettrdc
', NOFFD, 1,
462 $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD,
464 CALL PZCHEKPAD( ICTXT, 'pzhettrdd
', NQ, 1,
465 $ MEM( IPT-IPREPAD ), NQ, IPREPAD,
467 CALL PZCHEKPAD( ICTXT, 'pzhettrde
', WORKTRD-IPOSTPAD,
468 $ 1, MEM( IPW-IPREPAD ),
469 $ WORKTRD-IPOSTPAD, IPREPAD, IPOSTPAD,
471 CALL PZFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1,
472 $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD,
473 $ IPREPAD, IPOSTPAD, PADVAL )
477 CALL PZHETDRV( UPLO, N, MEM( IPA ), 1, 1, DESCA,
478 $ MEM( IPD ), MEM( IPE ), MEM( IPT ),
479 $ MEM( IPW ), IERR( 1 ) )
485 CALL PZLATRAN( N, 1, MEM( IPA ), 1, 1, DESCA,
487 CALL PZLAFCHK( 'hemm
', 'no
', N, N, MEM( IPA ), 1, 1,
488 $ DESCA, IASEED, ANORM, FRESID,
493 CALL PZCHEKPAD( ICTXT, 'pzhetdrvf
', NP, NQ,
494 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ),
495 $ IPREPAD, IPOSTPAD, PADVAL )
496 CALL PZCHEKPAD( ICTXT, 'pzhetdrvg
', NDIAG, 1,
497 $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD,
499 CALL PZCHEKPAD( ICTXT, 'pzhetdrvh
', NOFFD, 1,
500 $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD,
502 CALL PZCHEKPAD( ICTXT, 'pzhetdrvi
', WORKSIZ-IPOSTPAD,
503 $ 1, MEM( IPW-IPREPAD ),
504 $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD,
509.LE..AND..EQ.
IF( FRESIDTHRESH FRESID-FRESID
510.AND..EQ.
$ 0.0D+0 IERR( 1 )0 ) THEN
514.EQ..AND..EQ.
IF( MYROW0 MYCOL0 )
515 $ WRITE( NOUT, FMT = 9991 )FRESID
523.EQ..AND..EQ..AND..NE.
IF( MYROW0 MYCOL0 IERR( 1 )0 )
524 $ WRITE( NOUT, FMT = * )'d or e copies incorrect ...
'
530 FRESID = FRESID - FRESID
536 CALL SLCOMBINE( ICTXT, 'all
', '>
', 'w
', 50, 1, WTIME )
537 CALL SLCOMBINE( ICTXT, 'all
', '>
', 'c
', 50, 1, CTIME )
541.EQ..AND..EQ.
IF( MYROW0 MYCOL0 ) THEN
546 NOPS = ( 16.0D+0 / 3.0D+0 )*NOPS**3
551.GT.
IF( WTIME( 1 )0.0D+0 ) THEN
552 TMFLOPS = NOPS / WTIME( 1 )
556.GE.
IF( WTIME( 1 )0.0D+0 )
557 $ WRITE( NOUT, FMT = 9992 )'wall
', N, INTERLEAVE,
558 $ TWOGEMMS, BALANCED, ANB, PNB, NPROW*NPCOL,
559 $ WTIME( 1 ), TMFLOPS, FRESID, PASSED
563.GT.
IF( CTIME( 1 )0.0D+0 ) THEN
564 TMFLOPS = NOPS / CTIME( 1 )
568.GE.
IF( CTIME( 1 )0.0D+0 )
569 $ WRITE( NOUT, FMT = 9992 )'cpu
', N, INTERLEAVE,
570 $ TWOGEMMS, BALANCED, ANB, PNB, NPROW*NPCOL,
571 $ CTIME( 1 ), TMFLOPS, FRESID, PASSED
577.GT..OR.
IF( WTIME( 13 )+WTIME( 15 )+WTIME( 16 )0.0D+0
578.GT.
$ CTIME( 13 )+CTIME( 15 )+CTIME( 16 )0.0D+0 )
582.EQ.
IF( SPLITSTIMED1 ) THEN
583 WRITE( NOUT, FMT = 9990 )WTIME( 10 ), WTIME( 11 ),
584 $ WTIME( 12 ), WTIME( 13 ), WTIME( 14 ),
586 WRITE( NOUT, FMT = 9989 )WTIME( 16 ), WTIME( 17 ),
587 $ WTIME( 18 ), WTIME( 19 ), WTIME( 20 ),
590 WRITE( NOUT, FMT = 9988 )CTIME( 10 ), CTIME( 11 ),
591 $ CTIME( 12 ), CTIME( 13 ), CTIME( 14 ),
593 WRITE( NOUT, FMT = 9987 )CTIME( 16 ), CTIME( 17 ),
594 $ CTIME( 18 ), CTIME( 19 ), CTIME( 20 ),
596 WRITE( NOUT, FMT = 9986 )N, NPROW*NPCOL, PNB, ANB,
597 $ INTERLEAVE, BALANCED, TWOGEMMS, TIMEINTERNALS
603.EQ..AND..EQ.
IF( MYROW0 MYCOL0 ) THEN
604.EQ.
IF( SPLITSTIMED1 ) THEN
605 WRITE( NOUT, FMT = 9985 )
606 WRITE( NOUT, FMT = 9984 )
607 WRITE( NOUT, FMT = 9983 )
608 WRITE( NOUT, FMT = 9982 )
609 WRITE( NOUT, FMT = 9981 )
610 WRITE( NOUT, FMT = 9980 )
611 WRITE( NOUT, FMT = 9979 )
612 WRITE( NOUT, FMT = 9978 )
613 WRITE( NOUT, FMT = 9977 )
614 WRITE( NOUT, FMT = 9976 )
615 WRITE( NOUT, FMT = 9975 )
616 WRITE( NOUT, FMT = 9974 )
617 WRITE( NOUT, FMT = 9973 )
622 CALL BLACS_GRIDEXIT( ICTXT )
626 9999 FORMAT( 'illegal
', A6, ':
', A5, ' =
', I3,
627 $ '; it should be at least 1
' )
628 9998 FORMAT( 'illegal grid: nprow*npcol =
', I4, '. it can be at most
',
630 9997 FORMAT( 'bad
', A6, ' parameters: going on to next test case.
' )
631 9996 FORMAT( 'unable to perform
', A, ': need totmem of at least
',
634 9995 FORMAT( 'pzhettrd, tailored reduction to tridiagonal form, test.
'
636 9994 FORMAT( 'time n int 2gm bal anb pnb prcs trd time
',
637 $ ' mflops residual check
' )
638 9993 FORMAT( '---- ---- --- --- --- --- --- ---- --------
',
639 $ '----------- -------- ------
' )
640 9992 FORMAT( A4, 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 1X,
641 $ I5, 1X, F9.2, 1X, F11.2, 1X, F8.2, 1X, A6 )
642 9991 FORMAT( '||a - q*t*q
''|| / (||a|| * n * eps) =
', G25.7 )
643 9990 FORMAT( 'wsplit1=[wsplit1;
', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2,
644 $ 1X, F9.2, 1X, F9.2, ' ];
' )
645 9989 FORMAT( 'wsplit2=[wsplit2;
', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2,
646 $ 1X, F9.2, 1X, F9.2, ' ];
' )
647 9988 FORMAT( 'csplit1=[csplit1;
', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2,
648 $ 1X, F9.2, 1X, F9.2, ' ];
' )
649 9987 FORMAT( 'csplit2=[csplit2;
', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2,
650 $ 1X, F9.2, 1X, F9.2, ' ];
' )
651 9986 FORMAT( 'size_opts=[size_opts;
', I4, 1X, I4, 1X, I4, 1X, I4, 1X,
652 $ I4, 1X, I4, 1X, I4, 1X, I4, 1X, ' ];
' )
653 9985 FORMAT( 'n=1; nprocs=2; pnb=3; anb=4; interleave=5; balanced=6;
',
654 $ ' twogemms=7; timeinternals=8;
' )
655 9984 FORMAT( 's1_overhead = 1; % Should be mainly cost of barrier
' )
656 9983 FORMAT( 's1_barrier = 2; % Cost of barrier
' )
657 9982 FORMAT( 's1_updcurcol = 3; % Update
the current column
' )
658 9981 FORMAT( 's1_house = 4; % Compute
the householder vector
' )
659 9980 FORMAT( 's1_spread = 5; % Spread across
' )
660 9979 FORMAT( 's1_transpose = 6; % Transpose
' )
661 9978 FORMAT( 's2_updcurblk = 1; % Update
the current block column
' )
662 9977 FORMAT( 's2_trmvt = 2; % TRMVT v = a * h; vt = ht * a
'' ' )
663 9976 FORMAT( 's2_upd_v = 3; % v = v + v * ht * h + h * vt * h
' )
664 9975 FORMAT( 's2_trans_sum = 4; % v = v + vt
'' ' )
665 9974 FORMAT( 's2_dot = 5; % c = v
'' * h
' )
666 9973 FORMAT( 's2_r2k = 6; % A = a - v * h
'' - h * v
'' ' )