1 SUBROUTINE pdttrdtester( IAM, NPROCS, CHECK, NOUT, THRESH, NVAL,
2 $ NMAT, MEM, TOTMEM, KPASS, KFAIL, KSKIP )
11 INTEGER IAM, KFAIL, KPASS, KSKIP, NMAT, NOUT, NPROCS,
17 DOUBLE PRECISION MEM( * )
71 INTEGER BLOCK_CYCLIC_2D, , DTYPE_, CTXT_, M_, N_,
72 $ mb_, nb_, rsrc_, csrc_, lld_
73 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_
74 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
75 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
77 DOUBLE PRECISION PADVAL
78 parameter( dblesz = 8, padval = -9923.0d+0 )
80 parameter( timetests = 11 )
82 parameter( tests = 8 )
84 parameter( mintimen = 8 )
90 INTEGER DUMMY, IASEED, ICTXT, IMIDPAD, INFO, IPA, IPD,
91 $ ipe, ipostpad, iprepad, ipt, ipw, itemp, j, k,
92 $ lcm, lwmin, maxtests, memsiz, mycol, myrow, n,
93 $ nb, ndiag, ngrids, nn, noffd, np, npcol, nprow,
94 $ nps, nq, splitstimed, worksiz, worktrd
95 DOUBLE PRECISION ANORM, FRESID, NOPS, TMFLOPS
98 INTEGER ANBTEST( TESTS ), ANBTIME( TIMETESTS ),
99 $ baltest( tests ), baltime( timetests ),
100 $ desca( dlen_ ), descd( dlen_ ), ierr( 1 ),
101 $ intertest( tests ), intertime( timetests ),
102 $ pnbtest( tests ), pnbtime( timetests ),
103 $ twogemmtest( tests ), twogemmtime( timetests )
104 DOUBLE PRECISION CTIME( 100 ), WTIME( 100 )
115 INTEGER ICEIL, ILCM, NUMROC, PJLAENV
117 EXTERNAL lsame, iceil, ilcm, numroc, pjlaenv,
pdlansy
120 INTRINSIC dble, int,
max, sqrt
124 INTEGER ANB, BALANCED, BCKBLOCK, GSTBLOCK, INTERLEAVE,
129 COMMON / blocksizes / gstblock, lltblock, bckblock,
131 COMMON / minsize / minsz
132 COMMON / pjlaenvtiming / timing
133 COMMON / tailoredopts / pnb, anb, interleave,
135 COMMON / timecontrol / timeinternals
138 DATA baltime / 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0 /
140 DATA twogemmtime / 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0 /
141 DATA anbtime / 16, 16, 16, 16, 16, 8, 8, 32, 32, 16,
143 DATA pnbtime / 32, 32, 32, 32, 32, 32, 32, 32, 32,
145 DATA baltest / 0, 0, 0, 0, 1, 1, 1, 1 /
146 DATA intertest / 0, 0, 1, 1, 0, 0, 1, 1 /
147 DATA twogemmtest / 0, 1, 0, 1, 0, 1, 0, 1 /
148 DATA anbtest / 1, 2, 3, 16, 1, 2, 3, 16 /
149 DATA pnbtest / 1, 16, 8, 1, 16, 8, 1, 16 /
153 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
161 memsiz = totmem / dblesz
166 WRITE( nout, fmt = * )
167 WRITE( nout, fmt = 9995 )
168 WRITE( nout, fmt = 9994 )
169 WRITE( nout, fmt = 9993 )
170 WRITE( nout, fmt = * )
175 ngrids = int( sqrt( dble( nprocs ) ) )
185 CALL blacs_get( -1, 0, ictxt )
191 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
203 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
209 CALL igsum2d( ictxt, 'all
', ' ', 1, 1, IERR, 1, -1, 0 )
211.GT.
IF( IERR( 1 )0 ) THEN
213 $ WRITE( NOUT, FMT = 9997 )'matrix
'
220.GT.
IF( NMINTIMEN ) THEN
231 MAXTESTS = TIMETESTS + 2
238 DO 10 K = 1, MAXTESTS
241.GE.
IF( KMAXTESTS-1 ) THEN
257 DUMMY = PJLAENV( ICTXT, 3, 'pdsyttrd', 'l
', 0, 0,
264 BALANCED = BALTIME( K )
265 INTERLEAVE = INTERTIME( K )
266 TWOGEMMS = TWOGEMMTIME( K )
273 BALANCED = BALTEST( K )
274 INTERLEAVE = INTERTEST( K )
275 TWOGEMMS = TWOGEMMTEST( K )
283.EQ..AND..EQ.
IF( MYROW0 MYCOL0 ) THEN
284 CALL IGEBS2D( ICTXT, 'all
', ' ', 1, 1, SPLITSTIMED,
287 CALL IGEBR2D( ICTXT, 'all
', ' ', 1, 1, SPLITSTIMED, 1,
292.EQ..AND..EQ.
IF( SPLITSTIMED0 KMAXTESTS )
304 NP = NUMROC( N, NB, MYROW, 0, NPROW )
305 NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
307 IPREPAD = MAX( NB, NP )
309 IPOSTPAD = MAX( NB, NQ )
319 CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT,
320 $ MAX( 1, NP )+IMIDPAD, IERR( 1 ) )
322 CALL DESCINIT( DESCD, 1, N, NB, NB, 0, 0, ICTXT, 1,
327 CALL IGSUM2D( ICTXT, 'all
', ' ', 1, 1, IERR, 1, -1, 0 )
329.LT.
IF( IERR( 1 )0 ) THEN
331 $ WRITE( NOUT, FMT = 9997 )'descriptor
'
340 IF( LSAME( UPLO, 'u
' ) ) THEN
343 NOFFD = NUMROC( N-1, NB, MYCOL, 0, NPCOL )
347 IPD = IPA + DESCA( LLD_ )*NQ + IPOSTPAD + IPREPAD
348 IPE = IPD + NDIAG + IPOSTPAD + IPREPAD
349 IPT = IPE + NOFFD + IPOSTPAD + IPREPAD
350 IPW = IPT + NQ + IPOSTPAD + IPREPAD
355 NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB )
356 LWMIN = 2*( ANB+1 )*( 4*NPS+2 ) + NPS
358 WORKTRD = LWMIN + IPOSTPAD
365.NE.
IF( NPROWNPCOL ) THEN
366 LCM = ILCM( NPROW, NPCOL )
367 ITEMP = NB*ICEIL( ICEIL( NP, NB ), LCM / NPROW ) +
370 ITEMP = MAX( ITEMP, 2*( NB+NP )*NB )
371 WORKSIZ = MAX( LWMIN, ITEMP ) + IPOSTPAD
377.GT.
IF( IPW+WORKSIZMEMSIZ ) THEN
379 $ WRITE( NOUT, FMT = 9996 )'tridiagonal reduction
',
380 $ ( IPW+WORKSIZ )*DBLESZ
386 CALL IGSUM2D( ICTXT, 'all
', ' ', 1, 1, IERR, 1, -1, 0 )
388.GT.
IF( IERR( 1 )0 ) THEN
390 $ WRITE( NOUT, FMT = 9997 )'memory
'
399 CALL PDMATGEN( ICTXT, 'hemm
', 'n
', DESCA( M_ ),
400 $ DESCA( N_ ), DESCA( MB_ ), DESCA( NB_ ),
401 $ MEM( IPA ), DESCA( LLD_ ), DESCA( RSRC_ ),
402 $ DESCA( CSRC_ ), IASEED, 0, NP, 0, NQ,
403 $ MYROW, MYCOL, NPROW, NPCOL )
409 CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ),
410 $ DESCA( LLD_ ), IPREPAD, IPOSTPAD,
412 CALL PDFILLPAD( ICTXT, NDIAG, 1, MEM( IPD-IPREPAD ),
413 $ NDIAG, IPREPAD, IPOSTPAD, PADVAL )
414 CALL PDFILLPAD( ICTXT, NOFFD, 1, MEM( IPE-IPREPAD ),
415 $ NOFFD, IPREPAD, IPOSTPAD, PADVAL )
416 CALL PDFILLPAD( ICTXT, NQ, 1, MEM( IPT-IPREPAD ), NQ,
417 $ IPREPAD, IPOSTPAD, PADVAL )
418 CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1,
419 $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD,
420 $ IPREPAD, IPOSTPAD, PADVAL )
421 ANORM = PDLANSY( 'i
', UPLO, N, MEM( IPA ), 1, 1,
422 $ DESCA, MEM( IPW ) )
423 CALL PDCHEKPAD( ICTXT, 'pdlansy', NP, NQ,
424 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ),
425 $ IPREPAD, IPOSTPAD, PADVAL )
426 CALL PDCHEKPAD( ICTXT, 'pdlansy', WORKSIZ-IPOSTPAD, 1,
427 $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD,
428 $ IPREPAD, IPOSTPAD, PADVAL )
429 CALL PDFILLPAD( ICTXT, WORKTRD-IPOSTPAD, 1,
430 $ MEM( IPW-IPREPAD ), WORKTRD-IPOSTPAD,
431 $ IPREPAD, IPOSTPAD, PADVAL )
435 CALL BLACS_BARRIER( ICTXT, 'all
' )
440 CALL PDSYTTRD( UPLO, N, MEM( IPA ), 1, 1, DESCA,
441 $ MEM( IPD ), MEM( IPE ), MEM( IPT ),
442 $ MEM( IPW ), LWMIN, INFO )
450 CALL PDCHEKPAD( ICTXT, 'pdsyttrd', NP, NQ,
451 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ),
452 $ IPREPAD, IPOSTPAD, PADVAL )
453 CALL PDCHEKPAD( ICTXT, 'pdsyttrd', NDIAG, 1,
454 $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD,
457 CALL PDCHEKPAD( ICTXT, 'pdsyttrdc
', NOFFD, 1,
458 $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD,
460 CALL PDCHEKPAD( ICTXT, 'pdsyttrdd
', NQ, 1,
461 $ MEM( IPT-IPREPAD ), NQ, IPREPAD,
463 CALL PDCHEKPAD( ICTXT, 'pdsyttrde
', WORKTRD-IPOSTPAD,
464 $ 1, MEM( IPW-IPREPAD ),
465 $ WORKTRD-IPOSTPAD, IPREPAD, IPOSTPAD,
467 CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1,
468 $ MEM( IPW-IPREPAD ), WORKSIZ-IPOSTPAD,
469 $ IPREPAD, IPOSTPAD, PADVAL )
473 CALL PDSYTDRV( UPLO, N, MEM( IPA ), 1, 1, DESCA,
474 $ MEM( IPD ), MEM( IPE ), MEM( IPT ),
475 $ MEM( IPW ), IERR( 1 ) )
481 CALL PDLATRAN( N, 1, MEM( IPA ), 1, 1, DESCA,
483 CALL PDLAFCHK( 'hemm
', 'no
', N, N, MEM( IPA ), 1, 1,
484 $ DESCA, IASEED, ANORM, FRESID,
489 CALL PDCHEKPAD( ICTXT, 'pdsytdrvf
', NP, NQ,
490 $ MEM( IPA-IPREPAD ), DESCA( LLD_ ),
491 $ IPREPAD, IPOSTPAD, PADVAL )
492 CALL PDCHEKPAD( ICTXT, 'pdsytdrvg
', NDIAG, 1,
493 $ MEM( IPD-IPREPAD ), NDIAG, IPREPAD,
495 CALL PDCHEKPAD( ICTXT, 'pdsytdrvh
', NOFFD, 1,
496 $ MEM( IPE-IPREPAD ), NOFFD, IPREPAD,
498 CALL PDCHEKPAD( ICTXT, 'pdsytdrvi
', WORKSIZ-IPOSTPAD,
499 $ 1, MEM( IPW-IPREPAD ),
500 $ WORKSIZ-IPOSTPAD, IPREPAD, IPOSTPAD,
505.LE..AND..EQ.
IF( FRESIDTHRESH FRESID-FRESID
506.AND..EQ.
$ 0.0D+0 IERR( 1 )0 ) THEN
510.EQ..AND..EQ.
IF( MYROW0 MYCOL0 )
511 $ WRITE( NOUT, FMT = 9991 )FRESID
519.EQ..AND..EQ..AND..NE.
IF( MYROW0 MYCOL0 IERR( 1 )0 )
520 $ WRITE( NOUT, FMT = * )'d or e copies incorrect ...
'
526 FRESID = FRESID - FRESID
532 CALL SLCOMBINE( ICTXT, 'all
', '>
', 'w
', 50, 1, WTIME )
533 CALL SLCOMBINE( ICTXT, 'all
', '>
', 'c
', 50, 1, CTIME )
537.EQ..AND..EQ.
IF( MYROW0 MYCOL0 ) THEN
542 NOPS = ( 16.0D+0 / 3.0D+0 )*NOPS**3
547.GT.
IF( WTIME( 1 )0.0D+0 ) THEN
548 TMFLOPS = NOPS / WTIME( 1 )
552.GE.
IF( WTIME( 1 )0.0D+0 )
553 $ WRITE( NOUT, FMT = 9992 )'wall
', N, INTERLEAVE,
554 $ TWOGEMMS, BALANCED, ANB, PNB, NPROW*NPCOL,
555 $ WTIME( 1 ), TMFLOPS, FRESID, PASSED
559.GT.
IF( CTIME( 1 )0.0D+0 ) THEN
560 TMFLOPS = NOPS / CTIME( 1 )
564.GE.
IF( CTIME( 1 )0.0D+0 )
565 $ WRITE( NOUT, FMT = 9992 )'cpu
', N, INTERLEAVE,
566 $ TWOGEMMS, BALANCED, ANB, PNB, NPROW*NPCOL,
567 $ CTIME( 1 ), TMFLOPS, FRESID, PASSED
573.GT..OR.
IF( WTIME( 13 )+WTIME( 15 )+WTIME( 16 )0.0D+0
574.GT.
$ CTIME( 13 )+CTIME( 15 )+CTIME( 16 )0.0D+0 )
578.EQ.
IF( SPLITSTIMED1 ) THEN
579 WRITE( NOUT, FMT = 9990 )WTIME( 10 ), WTIME( 11 ),
580 $ WTIME( 12 ), WTIME( 13 ), WTIME( 14 ),
582 WRITE( NOUT, FMT = 9989 )WTIME( 16 ), WTIME( 17 ),
583 $ WTIME( 18 ), WTIME( 19 ), WTIME( 20 ),
586 WRITE( NOUT, FMT = 9988 )CTIME( 10 ), CTIME( 11 ),
587 $ CTIME( 12 ), CTIME( 13 ), CTIME( 14 ),
589 WRITE( NOUT, FMT = 9987 )CTIME( 16 ), CTIME( 17 ),
590 $ CTIME( 18 ), CTIME( 19 ), CTIME( 20 ),
592 WRITE( NOUT, FMT = 9986 )N, NPROW*NPCOL, PNB, ANB,
593 $ INTERLEAVE, BALANCED, TWOGEMMS, TIMEINTERNALS
599.EQ..AND..EQ.
IF( MYROW0 MYCOL0 ) THEN
600.EQ.
IF( SPLITSTIMED1 ) THEN
601 WRITE( NOUT, FMT = 9985 )
602 WRITE( NOUT, FMT = 9984 )
603 WRITE( NOUT, FMT = 9983 )
604 WRITE( NOUT, FMT = 9982 )
605 WRITE( NOUT, FMT = 9981 )
606 WRITE( NOUT, FMT = 9980 )
607 WRITE( NOUT, FMT = 9979 )
608 WRITE( NOUT, FMT = 9978 )
609 WRITE( NOUT, FMT = 9977 )
610 WRITE( NOUT, FMT = 9976 )
611 WRITE( NOUT, FMT = 9975 )
612 WRITE( NOUT, FMT = 9974 )
613 WRITE( NOUT, FMT = 9973 )
618 CALL BLACS_GRIDEXIT( ICTXT )
622 9999 FORMAT( 'illegal
', A6, ':
', A5, ' =
', I3,
623 $ '; it should be at least 1
' )
624 9998 FORMAT( 'illegal grid: nprow*npcol =
', I4, '. it can be at most
',
626 9997 FORMAT( 'bad
', A6, ' parameters: going on to next test case.
' )
627 9996 FORMAT( 'unable to perform
', A, ': need totmem of at least
',
630 9995 FORMAT( 'pdsyttrd, tailored reduction to tridiagonal form, test.
'
632 9994 FORMAT( 'time n int 2gm bal anb pnb prcs trd time
',
633 $ ' mflops residual check
' )
634 9993 FORMAT( '---- ---- --- --- --- --- --- ---- --------
',
635 $ '----------- -------- ------
' )
636 9992 FORMAT( A4, 1X, I5, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 1X, I3, 1X,
637 $ I5, 1X, F9.2, 1X, F11.2, 1X, F8.2, 1X, A6 )
638 9991 FORMAT( '||a - q*t*q
''|| / (||a|| * n * eps) =
', G25.7 )
639 9990 FORMAT( 'wsplit1=[wsplit1;
', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2,
640 $ 1X, F9.2, 1X, F9.2, ' ];
' )
641 9989 FORMAT( 'wsplit2=[wsplit2;
', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2,
642 $ 1X, F9.2, 1X, F9.2, ' ];
' )
643 9988 FORMAT( 'csplit1=[csplit1;
', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2,
644 $ 1X, F9.2, 1X, F9.2, ' ];
' )
645 9987 FORMAT( 'csplit2=[csplit2;
', F9.2, 1X, F9.2, 1X, F9.2, 1X, F9.2,
646 $ 1X, F9.2, 1X, F9.2, ' ];
' )
647 9986 FORMAT( 'size_opts=[size_opts;
', I4, 1X, I4, 1X, I4, 1X, I4, 1X,
648 $ I4, 1X, I4, 1X, I4, 1X, I4, 1X, ' ];
' )
649 9985 FORMAT( 'n=1; nprocs=2; pnb=3; anb=4; interleave=5; balanced=6;
',
650 $ ' twogemms=7; timeinternals=8;
' )
651 9984 FORMAT( 's1_overhead = 1; % Should be mainly cost of barrier
' )
652 9983 FORMAT( 's1_barrier = 2; % Cost of barrier
' )
653 9982 FORMAT( 's1_updcurcol = 3; % Update
the current column
' )
654 9981 FORMAT( 's1_house = 4; % Compute
the householder vector
' )
655 9980 FORMAT( 's1_spread = 5; % Spread across
' )
656 9979 FORMAT( 's1_transpose = 6; % Transpose
' )
657 9978 FORMAT( 's2_updcurblk = 1; % Update
the current block column
' )
658 9977 FORMAT( 's2_trmvt = 2; % TRMVT v = a * h; vt = ht * a
'' ' )
659 9976 FORMAT( 's2_upd_v = 3; % v = v + v * ht * h + h * vt * h
' )
660 9975 FORMAT( 's2_trans_sum = 4; % v = v + vt
'' ' )
661 9974 FORMAT( 's2_dot = 5; % c = v
'' * h
' )
662 9973 FORMAT( 's2_r2k = 6; % A = a - v * h
'' - h * v
'' ' )