1 SUBROUTINE pdttrdtester( IAM, NPROCS, CHECK, NOUT, THRESH, NVAL,
2 $ NMAT, MEM, TOTMEM, KPASS, KFAIL, KSKIP )
11 INTEGER IAM, KFAIL, KPASS, KSKIP, NMAT, NOUT, NPROCS,
17 DOUBLE PRECISION MEM( * )
71 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
72 $ mb_, nb_, rsrc_, csrc_, lld_
73 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
74 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
75 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
77 DOUBLE PRECISION PADVAL
78 parameter( dblesz = 8, padval = -9923.0d+0 )
80 parameter( timetests = 11 )
82 parameter( tests = 8 )
84 parameter( mintimen = 8 )
90 INTEGER DUMMY, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD,
91 $ ipe, ipostpad, iprepad, ipt, ipw, itemp, j, k,
92 $ lcm, lwmin, maxtests, memsiz, mycol, myrow, n,
93 $ nb, ndiag, ngrids, nn, noffd, np, npcol, nprow,
95 DOUBLE PRECISION , FRESID, NOPS, TMFLOPS
98 INTEGER ANBTEST( TESTS ), ANBTIME( TIMETESTS ),
99 $ baltest( tests ), baltime( timetests ),
100 $ desca( dlen_ ), descd( dlen_ ), ierr( 1 ),
101 $ intertest( tests ), intertime( timetests ),
102 $ pnbtest( tests ), pnbtime( timetests ),
103 $ twogemmtest( tests ), twogemmtime( timetests )
104 DOUBLE PRECISION CTIME( 100 ), WTIME( 100 )
115 INTEGER ICEIL, ILCM, NUMROC, PJLAENV
116 DOUBLE PRECISION PDLANSY
117 EXTERNAL lsame, iceil, ilcm, numroc, pjlaenv, pdlansy
120 INTRINSIC dble, int,
max, sqrt
124 INTEGER ANB, BALANCED, BCKBLOCK, GSTBLOCK, INTERLEAVE,
125 $ lltblock, minsz, pnb, timeinternals, timing,
129 COMMON / blocksizes / gstblock, lltblock, bckblock,
131 COMMON / minsize / minsz
132 COMMON / pjlaenvtiming / timing
133 COMMON / tailoredopts / pnb, anb, interleave,
135 COMMON / timecontrol / timeinternals
138 DATA baltime / 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0 /
139 DATA intertime / 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1 /
140 DATA twogemmtime / 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0 /
141 DATA anbtime / 16, 16, 16, 16, 16, 8, 8, 32, 32, 16,
143 DATA pnbtime / 32, 32, 32, 32, 32, 32, 32, 32, 32,
145 DATA baltest / 0, 0, 0, 0, 1, 1, 1, 1 /
146 DATA intertest / 0, 0, 1, 1, 0, 0, 1, 1 /
147 DATA twogemmtest / 0, 1, 0, 1, 0, 1, 0, 1 /
148 DATA anbtest / 1, 2, 3, 16, 1, 2, 3, 16 /
149 DATA pnbtest / 1, 16, 8, 1, 16, 8, 1, 16 /
153 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
161 memsiz = totmem / dblesz
166 WRITE( nout, fmt = * )
167 WRITE( nout, fmt = 9995 )
168 WRITE( nout, fmt = 9994 )
169 WRITE( nout, fmt = 9993 )
170 WRITE( nout, fmt = * )
175 ngrids = int( sqrt( dble( nprocs ) ) )
185 CALL blacs_get( -1, 0, ictxt )
191 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
203 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
209 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1, -1, 0 )
213 $
WRITE( nout, fmt = 9997 )
'matrix'
220 IF( n.GT.mintimen )
THEN
231 maxtests = timetests + 2
238 DO 10 k = 1, maxtests
241 IF( k.GE.maxtests-1 )
THEN
257 dummy = pjlaenv( ictxt, 3,
'PDSYTTRD',
'L', 0, 0,
264 balanced = baltime( k )
265 interleave = intertime( k )
266 twogemms = twogemmtime( k )
273 balanced = baltest( k )
274 interleave = intertest( k )
275 twogemms = twogemmtest( k )
283 IF( myrow.EQ.0 .AND. mycol.EQ.0 )
THEN
284 CALL igebs2d( ictxt,
'All', '
', 1, 1, SPLITSTIMED,
287 CALL IGEBR2D( ICTXT, 'all
', ' ', 1, 1, SPLITSTIMED, 1,
292.EQ..AND..EQ.
IF( SPLITSTIMED0 KMAXTESTS )
304 NP = NUMROC( N, NB, MYROW, 0, NPROW )
305 NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
307 IPREPAD = MAX( NB, NP )
309 IPOSTPAD = MAX( NB, NQ )
319 CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT,
320 $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) )
322 CALL DESCINIT( DESCD, 1, N, NB, NB, 0, 0, ICTXT, 1,
327 CALL IGSUM2D( ICTXT, 'all
', ' ', 1, 1, IERR, 1, -1, 0 )
329.LT.
IF( IERR( 1 )0 ) THEN
331 $ WRITE( NOUT, FMT = 9997 )'descriptor
'
340 IF( LSAME( UPLO, 'u
' ) ) THEN
343 NOFFD = NUMROC( N-1, NB, MYCOL, 0, NPCOL )
347 IPD = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD
348 IPE = IPD + NDIAG + IPOSTPAD + IPREPAD
349 IPT = IPE + NOFFD + IPOSTPAD + IPREPAD
350 IPW = IPT + NQ + IPOSTPAD + IPREPAD
355 NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB )
356 LWMIN = 2*( ANB+1 )*( 4*NPS+2 ) + NPS
358 WORKTRD = LWMIN + IPOSTPAD
365.NE.
IF( NPROWNPCOL ) THEN
366 LCM = ILCM( NPROW, NPCOL )
367 ITEMP = NB*ICEIL( ICEIL( NP, NB ), LCM / NPROW ) +
370 ITEMP = MAX( ITEMP, 2*( NB+NP )*NB )
371 WORKSIZ = MAX( LWMIN, ITEMP ) + IPOSTPAD
377.GT.
IF( IPW+WORKSIZMEMSIZ ) THEN
379 $ WRITE( NOUT, FMT = 9996 )'tridiagonal reduction
',
380 $ ( IPW+WORKSIZ )*DBLESZ
386 CALL IGSUM2D( ICTXT, 'all
', ' ', 1, 1, IERR, 1, -1, 0 )
388.GT.
IF( IERR( 1 )0 ) THEN
390 $ WRITE( NOUT, FMT = 9997 )'memory
'
399 CALL PDMATGEN( ICTXT, 'hemm
', 'n
', DESCA( M_ ),
400 $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ),
401 $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ),
402 $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ,
403 $ MYROW, MYCOL, NPROW, NPCOL )
409 CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ),
410 $ DESCA( LLD_ ), IPREPAD, IPOSTPAD,
412 CALL PDFILLPAD( ICTXT, NDIAG, 1, MEM( IPD-IPREPAD ),
413 $ NDIAG, IPREPAD, IPOSTPAD, PADVAL )
414 CALL PDFILLPAD( ICTXT, NOFFD, 1, MEM( IPE-IPREPAD ),
415 $ NOFFD, IPREPAD, IPOSTPAD, PADVAL )
416 CALL PDFILLPAD( ICTXT, NQ, 1, MEM( IPT-IPREPAD ), NQ,
417 $ IPREPAD, IPOSTPAD, PADVAL )
418 CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1,
419 $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD,
420 $ IPREPAD, IPOSTPAD, PADVAL )
421 ANORM = PDLANSY( 'i
', UPLO, N, MEM( IPA ), 1, 1,
422 $ DESCA, MEM( IPW ) )
423 CALL PDCHEKPAD( ICTXT, 'pdlansy
', NP, NQ,
424 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ),
425 $ IPREPAD, IPOSTPAD, PADVAL )
426 CALL PDCHEKPAD( ICTXT, 'pdlansy', worksiz-ipostpad, 1,
427 $ mem( ipw-iprepad ), worksiz-ipostpad,
428 $ iprepad, ipostpad, padval )
429 CALL pdfillpad( ictxt, worktrd-ipostpad, 1,
430 $ mem( ipw-iprepad ), worktrd-ipostpad,
431 $ iprepad, ipostpad, padval )
435 CALL blacs_barrier( ictxt,
'All' )
440 CALL pdsyttrd( uplo, n, mem( ipa ), 1, 1, desca,
441 $ mem( ipd ), mem( ipe ), mem( ipt ),
442 $ mem( ipw ), lwmin, info )
450 CALL pdchekpad( ictxt,
'PDSYTTRD', np, nq,
451 $ mem( ipa-iprepad ), desca( lld_ ),
452 $ iprepad, ipostpad, padval )
453 CALL pdchekpad( ictxt,
'PDSYTTRD', ndiag, 1,
454 $ mem( ipd-iprepad ), ndiag, iprepad,
457 CALL pdchekpad( ictxt,
'PDSYTTRDc', noffd, 1,
458 $ mem( ipe-iprepad ), noffd, iprepad
460 CALL pdchekpad( ictxt,
'PDSYTTRDd', nq, 1,
461 $ mem( ipt-iprepad ), nq, iprepad,
463 CALL pdchekpad( ictxt,
'PDSYTTRDe', worktrd-ipostpad,
464 $ 1, mem( ipw-iprepad ),
465 $ worktrd-ipostpad, iprepad, ipostpad,
467 CALL pdfillpad( ictxt, worksiz-ipostpad, 1,
468 $ mem( ipw-iprepad ), worksiz-ipostpad,
469 $ iprepad, ipostpad, padval )
473 CALL pdsytdrv( uplo, n, mem( ipa ), 1, 1, desca,
474 $ mem( ipd ), mem( ipe ), mem( ipt ),
475 $ mem( ipw ), ierr( 1 ) )
481 CALL pdlatran( n, 1, mem( ipa ), 1, 1, desca,
483 CALL pdlafchk(
'Hemm',
'No', n, n, mem( ipa ), 1, 1,
484 $ desca, iaseed, anorm, fresid,
489 CALL pdchekpad( ictxt,
'PDSYTDRVf', np, nq,
490 $ mem( ipa-iprepad ), desca( lld_ ),
491 $ iprepad, ipostpad, padval )
492 CALL pdchekpad( ictxt,
'PDSYTDRVg', ndiag, 1,
493 $ mem( ipd-iprepad ), ndiag, iprepad,
495 CALL pdchekpad( ictxt, 'pdsytdrvh
', NOFFD, 1,
496 $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD,
498 CALL PDCHEKPAD( ICTXT, 'pdsytdrvi
', WORKSIZ-IPOSTPAD,
499 $ 1, MEM( IPW-IPREPAD ),
500 $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD,
505.LE..AND..EQ.
IF( FRESIDTHRESH FRESID-FRESID
506.AND..EQ.
$ 0.0D+0 IERR( 1 )0 ) THEN
510.EQ..AND..EQ.
IF( MYROW0 MYCOL0 )
511 $ WRITE( NOUT, FMT = 9991 )FRESID
519.EQ..AND..EQ..AND..NE.
IF( MYROW0 MYCOL0 IERR( 1 )0 )
520 $ WRITE( NOUT, FMT = * )'d or e copies incorrect ...
'
526 FRESID = FRESID - FRESID
532 CALL SLCOMBINE( ICTXT, 'all
', '>
', 'w
', 50, 1, WTIME )
533 CALL SLCOMBINE( ICTXT, 'all
', '>
', 'c
', 50, 1, CTIME )
537.EQ..AND..EQ.
IF( MYROW0 MYCOL0 ) THEN
542 NOPS = ( 16.0D+0 / 3.0D+0 )*NOPS**3
547.GT.
IF( WTIME( 1 )0.0D+0 ) THEN
548 TMFLOPS = NOPS / WTIME( 1 )
552.GE.
IF( WTIME( 1 )0.0D+0 )
553 $ WRITE( NOUT, FMT = 9992 )'wall
', N, INTERLEAVE,
554 $ TWOGEMMS, BALANCED, ANB, PNB, NPROW*NPCOL,
555 $ WTIME( 1 ), TMFLOPS, FRESID, PASSED
559.GT.
IF( CTIME( 1 )0.0D+0 ) THEN
560 TMFLOPS = NOPS / CTIME( 1 )
564.GE.
IF( CTIME( 1 )0.0D+0 )
565 $ WRITE( NOUT, FMT = 9992 )'cpu
', N, INTERLEAVE,
566 $ TWOGEMMS, BALANCED, ANB, PNB, NPROW*NPCOL,
567 $ CTIME( 1 ), TMFLOPS, FRESID, PASSED
573.GT..OR.
IF( WTIME( 13 )+WTIME( 15 )+WTIME( 16 )0.0D+0
574.GT.
$ CTIME( 13 )+CTIME( 15 )+CTIME( 16 )0.0D+0 )
578.EQ.
IF( SPLITSTIMED1 ) THEN
579 WRITE( NOUT, FMT = 9990 )WTIME( 10 ), WTIME( 11 ),
580 $ WTIME( 12 ), WTIME( 13 ), WTIME( 14 ),
582 WRITE( NOUT, FMT = 9989 )WTIME( 16 ), WTIME( 17 ),
583 $ WTIME( 18 ), WTIME( 19 ), WTIME( 20 ),
586 WRITE( NOUT, FMT = 9988 )CTIME( 10 ), CTIME( 11 ),
587 $ CTIME( 12 ), CTIME( 13 ), CTIME( 14 ),
589 WRITE( NOUT, FMT = 9987 )CTIME( 16 ), CTIME( 17 ),
590 $ CTIME( 18 ), CTIME( 19 ), CTIME( 20 ),
592 WRITE( NOUT, FMT = 9986 )N, NPROW*NPCOL, PNB, ANB,
593 $ INTERLEAVE, BALANCED, TWOGEMMS, TIMEINTERNALS
599.EQ..AND..EQ.
IF( MYROW0 MYCOL0 ) THEN
600.EQ.
IF( SPLITSTIMED1 ) THEN
601 WRITE( NOUT, FMT = 9985 )
602 WRITE( NOUT, FMT = 9984 )
603 WRITE( NOUT, FMT = 9983 )
604 WRITE( NOUT, FMT = 9982 )
605 WRITE( NOUT, FMT = 9981 )
606 WRITE( NOUT, FMT = 9980 )
607 WRITE( NOUT, FMT = 9979 )
608 WRITE( NOUT, FMT = 9978 )
609 WRITE( NOUT, FMT = 9977 )
610 WRITE( NOUT, FMT = 9976 )
611 WRITE( NOUT, FMT = 9975 )
612 WRITE( NOUT, FMT = 9974 )
613 WRITE( NOUT, FMT = 9973 )
618 CALL BLACS_GRIDEXIT( ICTXT )
622 9999 FORMAT( 'illegal
', A6, ':
', A5, ' =
', I3,
623 $ '; it should be at least 1' )
624 9998
FORMAT(
'ILLEGAL GRID: nprow*npcol = ', i4,
'. It can be at most',
626 9997
FORMAT(
'Bad ', a6,
' parameters: going on to next test case.' )
627 9996
FORMAT(
'Unable to perform ', a,
': need TOTMEM of at least',
630 9995
FORMAT(
'PDSYTTRD, tailored reduction to tridiagonal form, test.'
632 9994
FORMAT(
'TIME N int 2gm bal anb pnb prcs TRD Time ',
633 $
' MFLOPS Residual CHECK' )
634 9993
FORMAT(
'---- ---- --- --- --- --- --- ---- -------- ',
635 $
'----------- -------- ------' )
636 9992
FORMAT( a4, 1x, i5, 1x, i3, 1x, i3, 1x, i3, 1x, i3, 1x, i3, 1x,
637 $ i5, 1x, f9.2, 1x, f11.2, 1x, f8.2, 1x, a6 )
638 9991
FORMAT(
'||A - Q*T*Q''|| / (||A|| * N * eps) = ', g25.7 )
639 9990
FORMAT(
'wsplit1=[wsplit1;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
640 $ 1x, f9.2, 1x, f9.2,
' ];' )
641 9989
FORMAT(
'wsplit2=[wsplit2;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
642 $ 1x, f9.2, 1x, f9.2,
' ];' )
643 9988
FORMAT(
'csplit1=[csplit1;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
644 $ 1x, f9.2, 1x, f9.2,
' ];' )
645 9987
FORMAT(
'csplit2=[csplit2;', f9.2, 1x, f9.2, 1x, f9.2, 1x, f9.2,
646 $ 1x, f9.2, 1x, f9.2,
' ];' )
647 9986
FORMAT(
'size_opts=[size_opts;', i4, 1x, i4, 1x, i4, 1x, i4, 1x,
648 $ i4, 1x, i4, 1x, i4, 1x, i4, 1x,
' ];' )
649 9985
FORMAT(
'N=1; NPROCS=2; PNB=3; ANB=4; INTERLEAVE=5; BALANCED=6;',
650 $
' TWOGEMMS=7; TIMEINTERNALS=8;' )
651 9984
FORMAT(
'S1_OVERHEAD = 1; % Should be mainly cost of barrier' )
652 9983
FORMAT(
'S1_BARRIER = 2; % Cost of barrier' )
653 9982
FORMAT(
'S1_UPDCURCOL = 3; % Update the current column' )
654 9981
FORMAT(
'S1_HOUSE = 4; % Compute the householder vector' )
655 9980
FORMAT(
'S1_SPREAD = 5; % Spread across' )
656 9979
FORMAT(
'S1_TRANSPOSE = 6; % Transpose' )
657 9978
FORMAT(
'S2_UPDCURBLK = 1; % Update the current block column' )
658 9977
FORMAT(
'S2_TRMVT = 2; % TRMVT v = A * h; vt = ht * A'' ' )
659 9976
FORMAT(
'S2_UPD_V = 3; % v = v + V * HT * h + H * VT * h ' )
660 9975
FORMAT(
'S2_TRANS_SUM = 4; % v = v + vt''
' )
661 9974 FORMAT( 's2_dot = 5; % c = v
'' * h
' )
662 9973 FORMAT( 's2_r2k = 6; % A = a - v * h
'' - h * v
'' ' )