74 parameter( totmem = 3000000 )
75 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
76 $ lld_, mb_, m_, nb_, n_, rsrc_
77 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
78 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
79 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
82 INTEGER dblesz, memsiz, ntests
83 DOUBLE PRECISION padval
84 parameter( dblesz = 8,
85 $ memsiz = totmem / dblesz, ntests = 20,
86 $ padval = -9923.0d+0, zero = 0.0d+0 )
88 parameter( int_one = 1 )
95 INTEGER bwl, bwu, bw_num, fillin_size, free_ptr, h, hh,
96 $ i, iam, iaseed, ibseed, ictxt, ictxtb,
97 $ ierr_temp, imidpad, info, ipa, ipb, ipostpad,
98 $ iprepad, ipw, ipw_size, ipw_solve,
99 $ ipw_solve_size, ip_driver_w, ip_fillin, j, k,
100 $ kfail, kpass, kskip, ktests, mycol, myrhs_size,
101 $ myrow, n, nb, nbw, ngrids, nmat, nnb, nnbr,
102 $ nnr, nout, np, npcol, nprocs, nprocs_real,
103 $ nprow, nq, nrhs, n_first, n_last, worksiz
105 DOUBLE PRECISION anorm, nops, nops2, sresid, tmflops,
109 INTEGER bwlval( ntests ), bwuval( ntests ), desca( 7 ),
110 $ desca2d( dlen_ ), descb( 7 ), descb2d( dlen_ ),
111 $ ierr( 1 ), nbrval( ntests ), nbval( ntests ),
112 $ nrval( ntests ), nval( ntests ),
113 $ pval( ntests ), qval( ntests )
114 DOUBLE PRECISION ctime( 2 ), mem( memsiz ), wtime( 2 )
117 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
131 INTRINSIC dble,
max,
min, mod
134 DATA kfail, kpass, kskip, ktests / 4*0 /
143 CALL blacs_pinfo( iam, nprocs )
147 CALL pddbinfo( outfile, nout, trans, nmat, nval, ntests, nbw,
148 $ bwlval, bwuval, ntests, nnb, nbval, ntests, nnr,
149 $ nrval, ntests, nnbr, nbrval, ntests, ngrids, pval,
150 $ ntests, qval, ntests, thresh, mem, iam, nprocs )
152 check = ( thresh.GE.0.0d+0 )
157 WRITE( nout, fmt = * )
158 WRITE( nout, fmt = 9995 )
159 WRITE( nout, fmt = 9994 )
160 WRITE( nout, fmt = * )
173 IF( nprow.LT.1 )
THEN
175 $
WRITE( nout, fmt = 9999 )
'GRID',
'nprow', nprow
177 ELSE IF( npcol.LT.1 )
THEN
179 $
WRITE( nout, fmt = 9999 )
'GRID',
'npcol', npcol
181 ELSE IF( nprow*npcol.GT.nprocs )
THEN
183 $
WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
187 IF( ierr( 1 ).GT.0 )
THEN
189 $
WRITE( nout, fmt = 9997 )
'grid'
196 CALL blacs_get( -1, 0, ictxt )
202 CALL blacs_get( -1, 0, ictxtb )
210 IF( myrow.LT.0 .OR. mycol.LT.0 )
THEN
224 $
WRITE( nout, fmt = 9999 )
'MATRIX',
'N', n
230 CALL igsum2d
'All',
' ', 1, 1, ierr, 1,
233 IF( ierr( 1 ).GT.0 )
THEN
235 $
WRITE( nout, fmt = 9997 )
'size'
241 DO 45 bw_num = 1, nbw
245 bwl = bwlval( bw_num )
248 $
WRITE( nout, fmt = 9999 )
'Lower Band',
'bwl', bwl
252 bwu = bwuval( bw_num )
255 $
WRITE( nout, fmt = 9999 )
'Upper Band',
'bwu', bwu
259 IF( bwl.GT.n-1 )
THEN
265 IF( bwu.GT.n-1 )
THEN
273 CALL igsum2d( ictxt,
'All',
' ', 1, 1, ierr, 1,
276 IF( ierr( 1 ).GT.0 )
THEN
287 nb =( (n-(npcol-1)*
max(bwl,bwu)-1)/npcol + 1 )
289 nb =
max( nb, 2*
max(bwl,bwu) )
296 IF( nb.LT.
min( 2*
max(bwl,bwu), n ) )
THEN
302 CALL igsum2d( ictxt, 'all
', ' ', 1, 1, IERR, 1,
305.GT.
IF( IERR( 1 )0 ) THEN
312 NP = NUMROC( (BWL+BWU+1), (BWL+BWU+1),
314 NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
317 IPREPAD = ((BWL+BWU+1)+10)
319 IPOSTPAD = ((BWL+BWU+1)+10)
328 CALL DESCINIT( DESCA2D, (BWL+BWU+1), N,
329 $ (BWL+BWU+1), NB, 0, 0,
330 $ ICTXT,((BWL+BWU+1)+10), IERR( 1 ) )
339 DESCA( 6 ) = ((BWL+BWU+1)+10)
342 IERR_TEMP = IERR( 1 )
344 IERR( 1 ) = MIN( IERR( 1 ), IERR_TEMP )
348 CALL IGSUM2D( ICTXT, 'all
', ' ', 1, 1, IERR, 1, -1, 0 )
350.LT.
IF( IERR( 1 )0 ) THEN
352 $ WRITE( NOUT, FMT = 9997 ) 'descriptor
'
364 FREE_PTR = FREE_PTR + IPREPAD
367 FREE_PTR = FREE_PTR + DESCA2D( LLD_ )*
380 $ NB*(BWL+BWU)+6*MAX(BWL,BWU)*MAX(BWL,BWU)
384 FREE_PTR = FREE_PTR + IPREPAD
386 FREE_PTR = FREE_PTR + FILLIN_SIZE
394 IPW_SIZE = MAX(BWL,BWU)*MAX(BWL,BWU)
399 FREE_PTR = FREE_PTR + IPW_SIZE
404.GT.
IF( FREE_PTRMEMSIZ ) THEN
406 $ WRITE( NOUT, FMT = 9996 )
407 $ 'divide and conquer factorization
',
414 CALL IGSUM2D( ICTXT, 'all
', ' ', 1, 1, IERR,
417.GT.
IF( IERR( 1 )0 ) THEN
419 $ WRITE( NOUT, FMT = 9997 ) 'memory
'
425 WORKSIZ = MAX( ((BWL+BWU+1)+10), NB )
433 WORKSIZ = MAX( WORKSIZ, DESCA2D( NB_ ) )
436 WORKSIZ = MAX( WORKSIZ,
437 $ MAX(5,MAX(MAX(BWL,BWU)*(MAX(BWL,BWU)+2),NB))+2*NB )
440 FREE_PTR = FREE_PTR + IPREPAD
441 IP_DRIVER_W = FREE_PTR
442 FREE_PTR = FREE_PTR + WORKSIZ + IPOSTPAD
448.GT.
IF( FREE_PTRMEMSIZ ) THEN
450 $ WRITE( NOUT, FMT = 9996 ) 'factorization
',
451 $ ( FREE_PTR )*DBLESZ
457 CALL IGSUM2D( ICTXT, 'all
', ' ', 1, 1, IERR,
460.GT.
IF( IERR( 1 )0 ) THEN
462 $ WRITE( NOUT, FMT = 9997 ) 'memory
'
467 CALL PDBMATGEN( ICTXT, 'g
', 'd
', BWL, BWU, N,
468 $ (BWL+BWU+1), NB, MEM( IPA ),
469 $ ((BWL+BWU+1)+10), 0, 0, IASEED, MYROW,
470 $ MYCOL, NPROW, NPCOL )
472 CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ),
473 $ ((BWL+BWU+1)+10), IPREPAD, IPOSTPAD,
476 CALL PDFILLPAD( ICTXT, WORKSIZ, 1,
477 $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
478 $ IPREPAD, IPOSTPAD, PADVAL )
484 ANORM = PDLANGE( '1
', (BWL+BWU+1),
485 $ N, MEM( IPA ), 1, 1,
486 $ DESCA2D, MEM( IP_DRIVER_W ) )
487 CALL PDCHEKPAD( ICTXT, 'pdlange', NP, NQ,
488 $ MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10),
489 $ IPREPAD, IPOSTPAD, PADVAL )
490 CALL PDCHEKPAD( ICTXT, 'pdlange',
492 $ MEM( IP_DRIVER_W-IPREPAD ), WORKSIZ,
493 $ IPREPAD, IPOSTPAD, PADVAL )
498 CALL BLACS_BARRIER( ICTXT, 'all
' )
504 CALL PDDBTRF( N, BWL, BWU, MEM( IPA ), 1, DESCA,
505 $ MEM( IP_FILLIN ), FILLIN_SIZE, MEM( IPW ),
512 WRITE( NOUT, FMT = * ) 'pddbtrf info=
', INFO
522 CALL PDCHEKPAD( ICTXT, 'pddbtrf', NP,
523 $ NQ, MEM( IPA-IPREPAD ), ((BWL+BWU+1)+10),
524 $ IPREPAD, IPOSTPAD, PADVAL )
538 CALL DESCINIT( DESCB2D, N, NRHS, NB, 1, 0, 0,
539 $ ICTXTB, NB+10, IERR( 1 ) )
548 DESCB( 6 ) = DESCB2D( LLD_ )
557 FREE_PTR = FREE_PTR + IPREPAD
559 FREE_PTR = FREE_PTR + NRHS*DESCB2D( LLD_ )
564 IPW_SOLVE_SIZE = (MAX(BWL,BWU)*NRHS)
567 FREE_PTR = FREE_PTR + IPW_SOLVE_SIZE
570.GT.
IF( FREE_PTRMEMSIZ ) THEN
572 $ WRITE( NOUT, FMT = 9996 )'solve
',
573 $ ( FREE_PTR )*DBLESZ
579 CALL IGSUM2D( ICTXT, 'all
', ' ', 1, 1,
582.GT.
IF( IERR( 1 )0 ) THEN
584 $ WRITE( NOUT, FMT = 9997 ) 'memory
'
589 MYRHS_SIZE = NUMROC( N, NB, MYCOL, 0, NPCOL )
593 CALL PDMATGEN(ICTXTB, 'no
', 'no
',
594 $ DESCB2D( M_ ), DESCB2D( N_ ),
595 $ DESCB2D( MB_ ), DESCB2D( NB_ ),
597 $ DESCB2D( LLD_ ), DESCB2D( RSRC_ ),
599 $ IBSEED, 0, MYRHS_SIZE, 0, NRHS, MYCOL,
600 $ MYROW, NPCOL, NPROW )
603 CALL PDFILLPAD( ICTXTB, NB, NRHS,
604 $ MEM( IPB-IPREPAD ),
608 CALL PDFILLPAD( ICTXT, WORKSIZ, 1,
609 $ MEM( IP_DRIVER_W-IPREPAD ),
615 CALL BLACS_BARRIER( ICTXT, 'all
')
620 CALL PDDBTRS( TRANS, N, BWL, BWU, NRHS, MEM( IPA ),
621 $ 1, DESCA, MEM( IPB ), 1, DESCB,
622 $ MEM( IP_FILLIN ), FILLIN_SIZE,
623 $ MEM( IPW_SOLVE ), IPW_SOLVE_SIZE,
630 $ WRITE( NOUT, FMT = * ) 'pddbtrs info=
', INFO
640 CALL PDCHEKPAD( ICTXT, 'pddbtrs-work
',
642 $ MEM( IP_DRIVER_W-IPREPAD ),
650 CALL PDDBLASCHK( 'n
', 'd
', TRANS,
652 $ MEM( IPB ), 1, 1, DESCB2D,
653 $ IASEED, MEM( IPA ), 1, 1, DESCA2D,
654 $ IBSEED, ANORM, SRESID,
655 $ MEM( IP_DRIVER_W ), WORKSIZ )
658.GT.
IF( SRESIDTHRESH )
659 $ WRITE( NOUT, FMT = 9985 ) SRESID
664.LE..AND.
IF( ( SRESIDTHRESH )
665.EQ.
$ ( (SRESID-SRESID)0.0D+0 ) ) THEN
680 CALL SLCOMBINE( ICTXT, 'all
', '>
', 'w
', 2, 1,
682 CALL SLCOMBINE( ICTXT, 'all
', '>
', 'c
', 2, 1,
687.EQ..AND..EQ.
IF( MYROW0 MYCOL0 ) THEN
693 NPROCS_REAL = ( N-1 )/NB + 1
694 N_LAST = MOD( N-1, NB ) + 1
699 NOPS = 2*(DBLE(N)*DBLE(BWL)*
701 $ (DBLE(N)*DBLE(BWL))
706 $ 2 * (DBLE(N)*(DBLE(BWL)+DBLE(BWU))
714 NOPS2 = 2*( (DBLE(N_FIRST)*
715 $ DBLE(BWL)*DBLE(BWU)))
717.GT.
IF ( NPROCS_REAL 1) THEN
723 $ 8*( (DBLE(N_LAST)*DBLE(BWL)
727.GT.
IF ( NPROCS_REAL 2) THEN
731 NOPS2 = NOPS2 + (NPROCS_REAL-2)*
732 $ 8*( (DBLE(NB)*DBLE(BWL)
739 $ 2*( NPROCS_REAL-1 ) *
741.GT.
IF( NPROCS_REAL 1 ) THEN
743 $ 2*( NPROCS_REAL-2 ) *
757 $ ( DBLE(BWL)+DBLE(BWU))
759.GT.
IF ( NPROCS_REAL 1 ) THEN
767 $ (DBLE(N_LAST)*(DBLE(BWL)+
768 $ DBLE(BWU)))*DBLE(NRHS)
771.GT.
IF ( NPROCS_REAL 2 ) THEN
778 $ ( NPROCS_REAL-2)*2*
779 $ ( (DBLE(NB)*(DBLE(BWL)+
780 $ DBLE(BWU)))*DBLE(NRHS) )
786 $ NRHS*( NPROCS_REAL-1)*2*(BWL*BWU )
787.GT.
IF( NPROCS_REAL 1 ) THEN
789 $ NRHS*( NPROCS_REAL-2 ) *
799.GT.
IF( WTIME( 1 ) + WTIME( 2 ) 0.0D+0 ) THEN
801 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
806.GT.
IF( WTIME( 1 )+WTIME( 2 )0.0D+0 ) THEN
808 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
813.GE.
IF( WTIME( 2 )0.0D+0 )
814 $ WRITE( NOUT, FMT = 9993 ) 'wall
', TRANS,
817 $ NB, NRHS, NPROW, NPCOL,
818 $ WTIME( 1 ), WTIME( 2 ), TMFLOPS,
823.GT.
IF( CTIME( 1 )+CTIME( 2 )0.0D+0 ) THEN
825 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
830.GT.
IF( CTIME( 1 )+CTIME( 2 )0.0D+0 ) THEN
832 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
837.GE.
IF( CTIME( 2 )0.0D+0 )
838 $ WRITE( NOUT, FMT = 9993 ) 'cpu
', TRANS,
841 $ NB, NRHS, NPROW, NPCOL,
842 $ CTIME( 1 ), CTIME( 2 ), TMFLOPS,
858 CALL BLACS_GRIDEXIT( ICTXT )
859 CALL BLACS_GRIDEXIT( ICTXTB )
869 KTESTS = KPASS + KFAIL + KSKIP
870 WRITE( NOUT, FMT = * )
871 WRITE( NOUT, FMT = 9992 ) KTESTS
873 WRITE( NOUT, FMT = 9991 ) KPASS
874 WRITE( NOUT, FMT = 9989 ) KFAIL
876 WRITE( NOUT, FMT = 9990 ) KPASS
878 WRITE( NOUT, FMT = 9988 ) KSKIP
879 WRITE( NOUT, FMT = * )
880 WRITE( NOUT, FMT = * )
881 WRITE( NOUT, FMT = 9987 )
882.NE..AND..NE.
IF( NOUT6 NOUT0 )
888 9999 FORMAT( 'illegal
', A6, ':
', A5, ' =
', I3,
889 $ '; it should be at least 1
' )
890 9998 FORMAT( 'illegal grid: nprow*npcol =
', I4, '. it can be at most
',
892 9997 FORMAT( 'bad
', A6, ' parameters: going on to next test case.
' )
893 9996 FORMAT( 'unable to perform
', A, ': need totmem of at least
',
895 9995 FORMAT( 'time tr n bwl bwu nb nrhs p q l*u time
',
896 $ 'slv time mflops mflop2 check
' )
897 9994 FORMAT( '---- -- ------ --- --- ---- ----- ---- ---- --------
',
898 $ '-------- -------- -------- ------
' )
899 9993 FORMAT( A4,1X,A1,2X,I6,1X,I3,1X,I3,1X,I4,1X,I5,
900 $ 1X,I4,1X,I4,1X,F9.3,
901 $ F9.4, F9.2, F9.2, 1X, A6 )
902 9992 FORMAT( 'finished
', I6, ' tests, with
the following results:
' )
903 9991 FORMAT( I5, ' tests completed and passed residual checks.
' )
904 9990 FORMAT( I5, ' tests completed without checking.
' )
905 9989 FORMAT( I5, ' tests completed and failed residual checks.
' )
906 9988 FORMAT( I5, ' tests skipped because of illegal input values.
' )
907 9987 FORMAT( 'END OF TESTS.
' )
908 9986 FORMAT( '||A -
', A4, '|| / (||A|| * N * eps) =
', G25.7 )
909 9985 FORMAT( '||Ax-b||/(||x||*||A||*eps*N)
', F25.7 )
subroutine pdmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
end diagonal values have been computed in the(sparse) matrix id.SOL
logical function lsame(ca, cb)
LSAME
subroutine blacs_gridinit(cntxt, c, nprow, npcol)
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
subroutine blacs_gridexit(cntxt)
double precision function pdlange(norm, m, n, a, ia, ja, desca, work)
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
integer function numroc(n, nb, iproc, isrcproc, nprocs)
subroutine pdbmatgen(ictxt, aform, aform2, bwl, bwu, n, mb, nb, a, lda, iarow, iacol, iseed, myrow, mycol, nprow, npcol)
subroutine pdchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
subroutine pddbinfo(summry, nout, trans, nmat, nval, ldnval, nbw, bwlval, bwuval, ldbwval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
subroutine pddblaschk(symm, uplo, trans, n, bwl, bwu, nrhs, x, ix, jx, descx, iaseed, a, ia, ja, desca, ibseed, anorm, resid, work, worksiz)
subroutine pddbtrf(n, bwl, bwu, a, ja, desca, af, laf, work, lwork, info)
subroutine pddbtrs(trans, n, bwl, bwu, nrhs, a, ja, desca, b, ib, descb, af, laf, work, lwork, info)
subroutine pdfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)