OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pdinvdriver.f
Go to the documentation of this file.
1 PROGRAM pdinvdriver
2*
3* -- ScaLAPACK testing driver (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 1, 1997
7*
8* Purpose
9* =======
10*
11* PDINVDRIVER is the main test program for the DOUBLE PRECISION
12* SCALAPACK matrix inversion routines. This test driver computes the
13* inverse of different kind of matrix and tests the results.
14*
15* The program must be driven by a short data file. An annotated example
16* of a data file can be obtained by deleting the first 3 characters
17* from the following 14 lines:
18* 'ScaLAPACK Matrix Inversion Testing input file'
19* 'PVM machine.'
20* 'INV.out' output file name (if any)
21* 6 device out
22* 5 number of matrix types (next line)
23* 'GEN' 'UTR' 'LTR' 'UPD' LPD' GEN, UTR, LTR, UPD, LPD
24* 4 number of problems sizes
25* 1000 2000 3000 4000 values of N
26* 3 number of NB's
27* 4 30 35 values of NB
28* 2 number of process grids (ordered P & Q)
29* 4 2 values of P
30* 4 4 values of Q
31* 1.0 threshold
32*
33* Internal Parameters
34* ===================
35*
36* TOTMEM INTEGER, default = 2000000
37* TOTMEM is a machine-specific parameter indicating the
38* maximum amount of available memory in bytes.
39* The user should customize TOTMEM to his platform. Remember
40* to leave room in memory for the operating system, the BLACS
41* buffer, etc. For example, on a system with 8 MB of memory
42* per process (e.g., one processor on an Intel iPSC/860), the
43* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
44* code, BLACS buffer, etc). However, for PVM, we usually set
45* TOTMEM = 2000000. Some experimenting with the maximum value
46* of TOTMEM may be required.
47*
48* INTGSZ INTEGER, default = 4 bytes.
49* DBLESZ INTEGER, default = 8 bytes.
50* INTGSZ and DBLESZ indicate the length in bytes on the
51* given platform for an integer and a double precision real.
52* MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )
53*
54* All arrays used by SCALAPACK routines are allocated from
55* this array and referenced by pointers. The integer IPA,
56* for example, is a pointer to the starting element of MEM for
57* the matrix A.
58*
59* =====================================================================
60*
61* .. Parameters ..
62 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
63 $ lld_, mb_, m_, nb_, n_, rsrc_
64 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
65 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
66 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
67 INTEGER DBLESZ, intgsz, memsiz, ntests, totmem
68 DOUBLE PRECISION padval, zero
69 parameter( dblesz = 8, intgsz = 4, totmem = 2000000,
70 $ memsiz = totmem / dblesz, ntests = 20,
71 $ padval = -9923.0d+0, zero = 0.0d+0 )
72* ..
73* .. Local Scalars ..
74 CHARACTER uplo
75 CHARACTER*3 mtyp
76 CHARACTER*6 passed
77 CHARACTER*80 outfile
78 LOGICAL check
79 INTEGER i, iam, iaseed, ictxt, imidpad, info, ipa,
80 $ ippiv, iprepad, ipostpad, ipiw, ipw, itemp, j,
81 $ k, ktests, kpass, kfail, kskip, l, LCM, lipiv,
82 $ liwork, lwork, mycol, myrow, n, nb, ngrids,
83 $ nmat, nmtyp, nnb, nout, NP, npcol, nprocs,
84 $ nprow, nq, workiinv, workinv, worksiz
85 REAL thresh
86 DOUBLE PRECISION anorm, fresid, nops, rcond, tmflops
87* ..
88* .. Local Arrays ..
89 CHARACTER*3 mattyp( ntests )
90 INTEGER desca( dlen_ ), ierr( 1 ), nbval( ntests ),
91 $ nval( ntests ), pval( ntests ),
92 $ qval( ntests )
93 DOUBLE PRECISION mem( memsiz ), ctime( 2 ), wtime( 2 )
94* ..
95* .. External Subroutines ..
96 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
98 $ blacs_pinfo, descinit, igsum2d, pdchekpad,
103* ..
104* .. External Functions ..
105 LOGICAL lsamen
106 INTEGER iceil, ilcm, numroc
107 DOUBLE PRECISION pdlange, pdlansy, pdlantr
108 EXTERNAL iceil, ilcm, lsamen, numroc, pdlange,
110* ..
111* .. Intrinsic Functions ..
112 INTRINSIC dble, max, mod
113* ..
114* .. Data Statements ..
115 DATA ktests, kpass, kfail, kskip /4*0/
116* ..
117* .. Executable Statements ..
118*
119* Get starting information
120*
121 CALL blacs_pinfo( iam, nprocs )
122 iaseed = 100
123 CALL pdinvinfo( outfile, nout, nmtyp, mattyp, ntests, nmat, nval,
124 $ ntests, nnb, nbval, ntests, ngrids, pval, ntests,
125 $ qval, ntests, thresh, mem, iam, nprocs )
126 check = ( thresh.GE.0.0e+0 )
127*
128* Loop over the different matrix types
129*
130 DO 40 i = 1, nmtyp
131*
132 mtyp = mattyp( i )
133*
134* Print headings
135*
136 IF( iam.EQ.0 ) THEN
137 WRITE( nout, fmt = * )
138 IF( lsamen( 3, mtyp, 'GEN' ) ) THEN
139 WRITE( nout, fmt = 9986 )
140 $ 'A is a general matrix.'
141 ELSE IF( lsamen( 3, mtyp, 'UTR' ) ) THEN
142 WRITE( nout, fmt = 9986 )
143 $ 'A is an upper triangular matrix.'
144 ELSE IF( lsamen( 3, mtyp, 'LTR' ) ) THEN
145 WRITE( nout, fmt = 9986 )
146 $ 'A is a lower triangular matrix.'
147 ELSE IF( lsamen( 3, mtyp, 'UPD' ) ) THEN
148 WRITE( nout, fmt = 9986 )
149 $ 'A is a symmetric positive definite matrix.'
150 WRITE( nout, fmt = 9986 )
151 $ 'Only the upper triangular part will be '//
152 $ 'referenced.'
153 ELSE IF( lsamen( 3, mtyp, 'LPD' ) ) THEN
154 WRITE( nout, fmt = 9986 )
155 $ 'A is a symmetric positive definite matrix.'
156 WRITE( nout, fmt = 9986 )
157 $ 'Only the lower triangular part will be '//
158 $ 'referenced.'
159 END IF
160 WRITE( nout, fmt = * )
161 WRITE( nout, fmt = 9995 )
162 WRITE( nout, fmt = 9994 )
163 WRITE( nout, fmt = * )
164 END IF
165*
166* Loop over different process grids
167*
168 DO 30 j = 1, ngrids
169*
170 nprow = pval( j )
171 npcol = qval( j )
172*
173* Make sure grid information is correct
174*
175 ierr( 1 ) = 0
176 IF( nprow.LT.1 ) THEN
177 IF( iam.EQ.0 )
178 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
179 ierr( 1 ) = 1
180 ELSE IF( npcol.LT.1 ) THEN
181 IF( iam.EQ.0 )
182 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
183 ierr( 1 ) = 1
184 ELSE IF( nprow*npcol.GT.nprocs ) THEN
185 IF( iam.EQ.0 )
186 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
187 ierr( 1 ) = 1
188 END IF
189*
190 IF( ierr( 1 ).GT.0 ) THEN
191 IF( iam.EQ.0 )
192 $ WRITE( nout, fmt = 9997 ) 'grid'
193 kskip = kskip + 1
194 GO TO 30
195 END IF
196*
197* Define process grid
198*
199 CALL blacs_get( -1, 0, ictxt )
200 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
201 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
202*
203* Go to bottom of loop if this case doesn't use my process
204*
205 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
206 $ GO TO 30
207*
208 DO 20 k = 1, nmat
209*
210 n = nval( k )
211*
212* Make sure matrix information is correct
213*
214 ierr( 1 ) = 0
215 IF( n.LT.1 ) THEN
216 IF( iam.EQ.0 )
217 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
218 ierr( 1 ) = 1
219 END IF
220*
221* Make sure no one had error
222*
223 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
224*
225 IF( ierr( 1 ).GT.0 ) THEN
226 IF( iam.EQ.0 )
227 $ WRITE( nout, fmt = 9997 ) 'matrix'
228 kskip = kskip + 1
229 GO TO 20
230 END IF
231*
232* Loop over different blocking sizes
233*
234 DO 10 l = 1, nnb
235*
236 nb = nbval( l )
237*
238* Make sure nb is legal
239*
240 ierr( 1 ) = 0
241 IF( nb.LT.1 ) THEN
242 ierr( 1 ) = 1
243 IF( iam.EQ.0 )
244 $ WRITE( nout, fmt = 9999 ) 'nb', 'nb', NB
245 END IF
246*
247* Check all processes for an error
248*
249 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1, -1,
250 $ 0 )
251*
252.GT. IF( IERR( 1 )0 ) THEN
253.EQ. IF( IAM0 )
254 $ WRITE( NOUT, FMT = 9997 ) 'nb'
255 KSKIP = KSKIP + 1
256 GO TO 10
257 END IF
258*
259* Padding constants
260*
261 NP = NUMROC( N, NB, MYROW, 0, NPROW )
262 NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
263 IF( CHECK ) THEN
264 IPREPAD = MAX( NB, NP )
265 IMIDPAD = NB
266 IPOSTPAD = MAX( NB, NQ )
267 ELSE
268 IPREPAD = 0
269 IMIDPAD = 0
270 IPOSTPAD = 0
271 END IF
272*
273* Initialize the array descriptor for the matrix A
274*
275 CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT,
276 $ MAX( 1, NP ) + IMIDPAD, IERR( 1 ) )
277*
278* Check all processes for an error
279*
280 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1, -1,
281 $ 0 )
282*
283.LT. IF( IERR( 1 )0 ) THEN
284.EQ. IF( IAM0 )
285 $ WRITE( NOUT, FMT = 9997 ) 'descriptor'
286 KSKIP = KSKIP + 1
287 GO TO 10
288 END IF
289*
290* Assign pointers into MEM for ScaLAPACK arrays, A is
291* allocated starting at position MEM( IPREPAD+1 )
292*
293 IPA = IPREPAD+1
294*
295 LCM = ILCM( NPROW, NPCOL )
296 IF( LSAMEN( 3, MTYP, 'gen' ) ) THEN
297*
298* Pivots are needed by LU factorization
299*
300 IPPIV = IPA + DESCA( LLD_ ) * NQ + IPOSTPAD +
301 $ IPREPAD
302 LIPIV = ICEIL( INTGSZ * ( NP + NB ), DBLESZ )
303 IPW = IPPIV + LIPIV + IPOSTPAD + IPREPAD
304*
305 LWORK = MAX( 1, NP * DESCA( NB_ ) )
306 WORKINV = LWORK + IPOSTPAD
307*
308* Figure the amount of workspace required by the
309* general matrix inversion
310*
311.EQ. IF( NPROWNPCOL ) THEN
312 LIWORK = NQ + DESCA( NB_ )
313 ELSE
314*
315* change the integer workspace needed for PDGETRI
316* LIWORK = MAX( DESCA( NB_ ), DESCA( MB_ ) *
317* $ ICEIL( ICEIL( DESCA( LLD_ ),
318* $ DESCA( MB_ ) ), LCM / NPROW ) )
319* $ + NQ
320 LIWORK = NUMROC( DESCA( M_ ) +
321 $ DESCA( MB_ ) * NPROW
322 $ + MOD ( 1 - 1, DESCA( MB_ ) ), DESCA ( NB_ ),
323 $ MYCOL, DESCA( CSRC_ ), NPCOL ) +
324 $ MAX ( DESCA( MB_ ) * ICEIL ( ICEIL(
325 $ NUMROC( DESCA( M_ ) + DESCA( MB_ ) * NPROW,
326 $ DESCA( MB_ ), MYROW, DESCA( RSRC_ ), NPROW ),
327 $ DESCA( MB_ ) ), LCM / NPROW ), DESCA( NB_ ) )
328*
329 END IF
330 WORKIINV = ICEIL( LIWORK*INTGSZ, DBLESZ ) +
331 $ IPOSTPAD
332 IPIW = IPW + WORKINV + IPREPAD
333 WORKSIZ = WORKINV + IPREPAD + WORKIINV
334*
335 ELSE
336*
337* No pivots or workspace needed for triangular or
338* symmetric positive definite matrices.
339*
340 IPW = IPA + DESCA( LLD_ ) * NQ + IPOSTPAD + IPREPAD
341 WORKSIZ = 1 + IPOSTPAD
342*
343 END IF
344*
345 IF( CHECK ) THEN
346*
347* Figure amount of work space for the norm
348* computations
349*
350 IF( LSAMEN( 3, MTYP, 'gen.OR.' )
351 $ LSAMEN( 2, MTYP( 2:3 ), 'tr' ) ) THEN
352 ITEMP = NQ
353 ELSE
354 ITEMP = 2 * NQ + NP
355.NE. IF( NPROWNPCOL ) THEN
356 ITEMP = ITEMP +
357 $ NB * ICEIL( ICEIL( NP, NB ),
358 $ LCM / NPROW )
359 END IF
360 END IF
361 WORKSIZ = MAX( WORKSIZ-IPOSTPAD, ITEMP )
362*
363* Figure the amount of workspace required by the
364* checking routine
365*
366 WORKSIZ = MAX( WORKSIZ, 2 * NB * MAX( 1, NP ) ) +
367 $ IPOSTPAD
368*
369 END IF
370*
371* Check for adequate memory for problem size
372*
373 IERR( 1 ) = 0
374.GT. IF( IPW+WORKSIZMEMSIZ ) THEN
375.EQ. IF( IAM0 )
376 $ WRITE( NOUT, FMT = 9996 ) 'inversion',
377 $ ( IPW + WORKSIZ ) * DBLESZ
378 IERR( 1 ) = 1
379 END IF
380*
381* Check all processes for an error
382*
383 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR, 1, -1,
384 $ 0 )
385*
386.GT. IF( IERR( 1 )0 ) THEN
387.EQ. IF( IAM0 )
388 $ WRITE( NOUT, FMT = 9997 ) 'memory'
389 KSKIP = KSKIP + 1
390 GO TO 10
391 END IF
392*
393 IF( LSAMEN( 3, MTYP, 'gen.OR.' )
394 $ LSAMEN( 2, MTYP( 2:3 ), 'tr' ) ) THEN
395*
396* Generate a general diagonally dominant matrix A
397*
398 CALL PDMATGEN( ICTXT, 'n', 'd', DESCA( M_ ),
399 $ DESCA( N_ ), DESCA( MB_ ),
400 $ DESCA( NB_ ), MEM( IPA ),
401 $ DESCA( LLD_ ), DESCA( RSRC_ ),
402 $ DESCA( CSRC_ ), IASEED, 0, NP, 0,
403 $ NQ, MYROW, MYCOL, NPROW, NPCOL )
404*
405 ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'pd' ) ) THEN
406*
407* Generate a symmetric positive definite matrix
408*
409 CALL PDMATGEN( ICTXT, 's', 'd', DESCA( M_ ),
410 $ DESCA( N_ ), DESCA( MB_ ),
411 $ DESCA( NB_ ), MEM( IPA ),
412 $ DESCA( LLD_ ), DESCA( RSRC_ ),
413 $ DESCA( CSRC_ ), IASEED, 0, NP, 0,
414 $ NQ, MYROW, MYCOL, NPROW, NPCOL )
415*
416 END IF
417*
418* Zeros not-referenced part of A, if any.
419*
420 IF( LSAMEN( 1, MTYP, 'u' ) ) THEN
421*
422 UPLO = 'u'
423 CALL PDLASET( 'lower', N-1, N-1, ZERO, ZERO,
424 $ MEM( IPA ), 2, 1, DESCA )
425*
426 ELSE IF( LSAMEN( 1, MTYP, 'l' ) ) THEN
427*
428 UPLO = 'l'
429 CALL PDLASET( 'upper', N-1, N-1, ZERO, ZERO,
430 $ MEM( IPA ), 1, 2, DESCA )
431*
432 ELSE
433*
434 UPLO = 'g'
435*
436 END IF
437*
438* Need 1-norm of A for checking
439*
440 IF( CHECK ) THEN
441*
442 CALL PDFILLPAD( ICTXT, NP, NQ, MEM( IPA-IPREPAD ),
443 $ DESCA( LLD_ ), IPREPAD, IPOSTPAD,
444 $ PADVAL )
445 CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1,
446 $ MEM( IPW-IPREPAD ),
447 $ WORKSIZ-IPOSTPAD, IPREPAD,
448 $ IPOSTPAD, PADVAL )
449*
450 IF( LSAMEN( 3, MTYP, 'gen' ) ) THEN
451*
452 CALL PDFILLPAD( ICTXT, LIPIV, 1,
453 $ MEM( IPPIV-IPREPAD ), LIPIV,
454 $ IPREPAD, IPOSTPAD, PADVAL )
455 ANORM = PDLANGE( '1', N, N, MEM( IPA ), 1, 1,
456 $ DESCA, MEM( IPW ) )
457 CALL PDCHEKPAD( ICTXT, 'pdlange', NP, NQ,
458 $ MEM( IPA-IPREPAD ),
459 $ DESCA( LLD_ ),
460 $ IPREPAD, IPOSTPAD, PADVAL )
461 CALL PDCHEKPAD( ICTXT, 'pdlange',
462 $ WORKSIZ-IPOSTPAD, 1,
463 $ MEM( IPW-IPREPAD ),
464 $ WORKSIZ-IPOSTPAD,
465 $ IPREPAD, IPOSTPAD, PADVAL )
466 CALL PDFILLPAD( ICTXT, WORKINV-IPOSTPAD, 1,
467 $ MEM( IPW-IPREPAD ),
468 $ WORKINV-IPOSTPAD,
469 $ IPREPAD, IPOSTPAD, PADVAL )
470 CALL PDFILLPAD( ICTXT, WORKIINV-IPOSTPAD, 1,
471 $ MEM( IPIW-IPREPAD ),
472 $ WORKIINV-IPOSTPAD, IPREPAD,
473 $ IPOSTPAD, PADVAL )
474 ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'tr' ) ) THEN
475*
476 ANORM = PDLANTR( '1', UPLO, 'non unit', N, N,
477 $ MEM( IPA ), 1, 1, DESCA,
478 $ MEM( IPW ) )
479 CALL PDCHEKPAD( ICTXT, 'pdlantr', NP, NQ,
480 $ MEM( IPA-IPREPAD ),
481 $ DESCA( LLD_ ),
482 $ IPREPAD, IPOSTPAD, PADVAL )
483 CALL PDCHEKPAD( ICTXT, 'pdlantr',
484 $ WORKSIZ-IPOSTPAD, 1,
485 $ MEM( IPW-IPREPAD ),
486 $ WORKSIZ-IPOSTPAD,
487 $ IPREPAD, IPOSTPAD, PADVAL )
488*
489 ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'pd' ) ) THEN
490*
491 ANORM = PDLANSY( '1', UPLO, N, MEM( IPA ), 1, 1,
492 $ DESCA, MEM( IPW ) )
493 CALL PDCHEKPAD( ICTXT, 'pdlansy', NP, NQ,
494 $ MEM( IPA-IPREPAD ),
495 $ DESCA( LLD_ ),
496 $ IPREPAD, IPOSTPAD, PADVAL )
497 CALL PDCHEKPAD( ICTXT, 'pdlansy',
498 $ WORKSIZ-IPOSTPAD, 1,
499 $ MEM( IPW-IPREPAD ),
500 $ WORKSIZ-IPOSTPAD,
501 $ IPREPAD, IPOSTPAD, PADVAL )
502*
503 ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'sy' ) ) THEN
504*
505 CALL PDFILLPAD( ICTXT, LIPIV, 1,
506 $ MEM( IPPIV-IPREPAD ), LIPIV,
507 $ IPREPAD, IPOSTPAD, PADVAL )
508 ANORM = PDLANSY( '1', UPLO, N, MEM( IPA ), 1, 1,
509 $ DESCA, MEM( IPW ) )
510 CALL PDCHEKPAD( ICTXT, 'pdlansy', NP, NQ,
511 $ MEM( IPA-IPREPAD ),
512 $ DESCA( LLD_ ),
513 $ IPREPAD, IPOSTPAD, PADVAL )
514 CALL PDCHEKPAD( ICTXT, 'pdlansy',
515 $ WORKSIZ-IPOSTPAD, 1,
516 $ MEM( IPW-IPREPAD ),
517 $ WORKSIZ-IPOSTPAD,
518 $ IPREPAD,IPOSTPAD, PADVAL )
519*
520 END IF
521*
522 END IF
523*
524 CALL SLBOOT()
525 CALL BLACS_BARRIER( ICTXT, 'all' )
526*
527 IF( LSAMEN( 3, MTYP, 'gen' ) ) THEN
528*
529* Perform LU factorization
530*
531 CALL SLTIMER( 1 )
532 CALL PDGETRF( N, N, MEM( IPA ), 1, 1, DESCA,
533 $ MEM( IPPIV ), INFO )
534 CALL SLTIMER( 1 )
535*
536 IF( CHECK ) THEN
537*
538* Check for memory overwrite
539*
540 CALL PDCHEKPAD( ICTXT, 'pdgetrf', NP, NQ,
541 $ MEM( IPA-IPREPAD ),
542 $ DESCA( LLD_ ),
543 $ IPREPAD, IPOSTPAD, PADVAL )
544 CALL PDCHEKPAD( ICTXT, 'pdgetrf', LIPIV, 1,
545 $ MEM( IPPIV-IPREPAD ), LIPIV,
546 $ IPREPAD, IPOSTPAD, PADVAL )
547 END IF
548*
549* Perform the general matrix inversion
550*
551 CALL SLTIMER( 2 )
552 CALL PDGETRI( N, MEM( IPA ), 1, 1, DESCA,
553 $ MEM( IPPIV ), MEM( IPW ), LWORK,
554 $ MEM( IPIW ), LIWORK, INFO )
555 CALL SLTIMER( 2 )
556*
557 IF( CHECK ) THEN
558*
559* Check for memory overwrite
560*
561 CALL PDCHEKPAD( ICTXT, 'pdgetri', NP, NQ,
562 $ MEM( IPA-IPREPAD ),
563 $ DESCA( LLD_ ),
564 $ IPREPAD, IPOSTPAD, PADVAL )
565 CALL PDCHEKPAD( ICTXT, 'pdgetri', LIPIV, 1,
566 $ MEM( IPPIV-IPREPAD ), LIPIV,
567 $ IPREPAD, IPOSTPAD, PADVAL )
568 CALL PDCHEKPAD( ICTXT, 'pdgetri',
569 $ WORKIINV-IPOSTPAD, 1,
570 $ MEM( IPIW-IPREPAD ),
571 $ WORKIINV-IPOSTPAD,
572 $ IPREPAD, IPOSTPAD, PADVAL )
573 CALL PDCHEKPAD( ICTXT, 'pdgetri',
574 $ WORKINV-IPOSTPAD, 1,
575 $ MEM( IPW-IPREPAD ),
576 $ WORKINV-IPOSTPAD,
577 $ IPREPAD, IPOSTPAD, PADVAL )
578 END IF
579*
580 ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'tr' ) ) THEN
581*
582* Perform the general matrix inversion
583*
584 CALL SLTIMER( 2 )
585 CALL PDTRTRI( UPLO, 'non unit', N, MEM( IPA ), 1,
586 $ 1, DESCA, INFO )
587 CALL SLTIMER( 2 )
588*
589 IF( CHECK ) THEN
590*
591* Check for memory overwrite
592*
593 CALL PDCHEKPAD( ICTXT, 'pdtrtri', NP, NQ,
594 $ MEM( IPA-IPREPAD ),
595 $ DESCA( LLD_ ),
596 $ IPREPAD, IPOSTPAD, PADVAL )
597 END IF
598*
599 ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'pd' ) ) THEN
600*
601* Perform Cholesky factorization
602*
603 CALL SLTIMER( 1 )
604 CALL PDPOTRF( UPLO, N, MEM( IPA ), 1, 1, DESCA,
605 $ INFO )
606 CALL SLTIMER( 1 )
607*
608 IF( CHECK ) THEN
609*
610* Check for memory overwrite
611*
612 CALL PDCHEKPAD( ICTXT, 'pdpotrf', NP, NQ,
613 $ MEM( IPA-IPREPAD ),
614 $ DESCA( LLD_ ),
615 $ IPREPAD, IPOSTPAD, PADVAL )
616 END IF
617*
618* Perform the symmetric positive definite matrix
619* inversion
620*
621 CALL SLTIMER( 2 )
622 CALL PDPOTRI( UPLO, N, MEM( IPA ), 1, 1, DESCA,
623 $ INFO )
624 CALL SLTIMER( 2 )
625*
626 IF( CHECK ) THEN
627*
628* Check for memory overwrite
629*
630 CALL PDCHEKPAD( ICTXT, 'pdpotri', NP, NQ,
631 $ MEM( IPA-IPREPAD ),
632 $ DESCA( LLD_ ),
633 $ IPREPAD, IPOSTPAD, PADVAL )
634 END IF
635*
636 END IF
637*
638 IF( CHECK ) THEN
639*
640 CALL PDFILLPAD( ICTXT, WORKSIZ-IPOSTPAD, 1,
641 $ MEM( IPW-IPREPAD ),
642 $ WORKSIZ-IPOSTPAD, IPREPAD,
643 $ IPOSTPAD, PADVAL )
644*
645* Compute fresid = || inv(A)*A-I ||
646*
647 CALL PDINVCHK( MTYP, N, MEM( IPA ), 1, 1, DESCA,
648 $ IASEED, ANORM, FRESID, RCOND,
649 $ MEM( IPW ) )
650*
651* Check for memory overwrite
652*
653 CALL PDCHEKPAD( ICTXT, 'pdinvchk', NP, NQ,
654 $ MEM( IPA-IPREPAD ),
655 $ DESCA( LLD_ ),
656 $ IPREPAD, IPOSTPAD, PADVAL )
657 CALL PDCHEKPAD( ICTXT, 'pdinvchk',
658 $ WORKSIZ-IPOSTPAD, 1,
659 $ MEM( IPW-IPREPAD ),
660 $ WORKSIZ-IPOSTPAD, IPREPAD,
661 $ IPOSTPAD, PADVAL )
662*
663* Test residual and detect NaN result
664*
665.LE..AND..EQ..AND. IF( FRESIDTHRESH INFO0
666.EQ. $ ( (FRESID-FRESID) 0.0D+0 ) ) THEN
667 KPASS = KPASS + 1
668 PASSED = 'passed'
669 ELSE
670 KFAIL = KFAIL + 1
671.GT. IF( INFO0 ) THEN
672 PASSED = 'singul'
673 ELSE
674 PASSED = 'failed'
675 END IF
676 END IF
677*
678 ELSE
679*
680* Don't perform the checking, only the timing
681* operation
682*
683 KPASS = KPASS + 1
684 FRESID = FRESID - FRESID
685 PASSED = 'bypass'
686*
687 END IF
688*
689* Gather maximum of all CPU and WALL clock timings
690*
691 CALL SLCOMBINE( ICTXT, 'all', '>', 'w', 2, 1, WTIME )
692 CALL SLCOMBINE( ICTXT, 'all', '>', 'c', 2, 1, CTIME )
693*
694* Print results
695*
696.EQ..AND..EQ. IF( MYROW0 MYCOL0 ) THEN
697*
698 IF( LSAMEN( 3, MTYP, 'gen' ) ) THEN
699*
700* 2/3 N^3 - 1/2 N^2 flops for LU factorization
701*
702 NOPS = ( 2.0D+0 / 3.0D+0 )*( DBLE( N )**3 ) -
703 $ ( 1.0D+0 / 2.0D+0 )*( DBLE( N )**2 )
704*
705* 4/3 N^3 - N^2 flops for inversion
706*
707 NOPS = NOPS +
708 $ ( 4.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) -
709 $ DBLE( N )**2
710*
711 ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'tr' ) ) THEN
712*
713* 1/3 N^3 + 2/3 N flops for triangular inversion
714*
715 CTIME(1) = 0.0D+0
716 WTIME(1) = 0.0D+0
717 NOPS = ( 1.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) +
718 $ ( 2.0D+0 / 3.0D+0 ) * ( DBLE( N ) )
719*
720 ELSE IF( LSAMEN( 2, MTYP( 2:3 ), 'pd' ) ) THEN
721*
722* 1/3 N^3 + 1/2 N^2 flops for Cholesky
723* factorization
724*
725 NOPS = ( 1.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) +
726 $ ( 1.0D+0 / 2.0D+0 ) * ( DBLE( N )**2 )
727*
728* 2/3 N^3 + 1/2 N^2 flops for Cholesky inversion
729*
730 NOPS = NOPS +
731 $ ( 2.0D+0 / 3.0D+0 ) * ( DBLE( N )**3 ) +
732 $ ( 1.0D+0 / 2.0D+0 ) * ( DBLE( N )**2 )
733*
734 END IF
735*
736* Figure total megaflops -- factorization and
737* inversion, for WALL and CPU time, and print
738* output.
739*
740* Print WALL time if machine supports it
741*
742.GT. IF( WTIME( 1 ) + WTIME( 2 ) 0.0D+0 ) THEN
743 TMFLOPS = NOPS /
744 $ ( ( WTIME( 1 )+WTIME( 2 ) ) * 1.0D+6 )
745 ELSE
746 TMFLOPS = 0.0D+0
747 END IF
748*
749.GE. IF( WTIME( 2 ) 0.0D+0 )
750 $ WRITE( NOUT, FMT = 9993 ) 'wall', N, NB, NPROW,
751 $ NPCOL, WTIME( 1 ), WTIME( 2 ), TMFLOPS,
752 $ RCOND, FRESID, PASSED
753*
754* Print CPU time if machine supports it
755*
756.GT. IF( CTIME( 1 ) + CTIME( 2 ) 0.0D+0 ) THEN
757 TMFLOPS = NOPS /
758 $ ( ( CTIME( 1 )+CTIME( 2 ) ) * 1.0D+6 )
759 ELSE
760 TMFLOPS = 0.0D+0
761 END IF
762*
763.GE. IF( CTIME( 2 ) 0.0D+0 )
764 $ WRITE( NOUT, FMT = 9993 ) 'cpu ', N, NB, NPROW,
765 $ NPCOL, CTIME( 1 ), CTIME( 2 ), TMFLOPS,
766 $ RCOND, FRESID, PASSED
767 END IF
768*
769 10 CONTINUE
770*
771 20 CONTINUE
772*
773 CALL BLACS_GRIDEXIT( ICTXT )
774*
775 30 CONTINUE
776*
777 40 CONTINUE
778*
779* Print out ending messages and close output file
780*
781.EQ. IF( IAM0 ) THEN
782 KTESTS = KPASS + KFAIL + KSKIP
783 WRITE( NOUT, FMT = * )
784 WRITE( NOUT, FMT = 9992 ) KTESTS
785 IF( CHECK ) THEN
786 WRITE( NOUT, FMT = 9991 ) KPASS
787 WRITE( NOUT, FMT = 9989 ) KFAIL
788 ELSE
789 WRITE( NOUT, FMT = 9990 ) KPASS
790 END IF
791 WRITE( NOUT, FMT = 9988 ) KSKIP
792 WRITE( NOUT, FMT = * )
793 WRITE( NOUT, FMT = * )
794 WRITE( NOUT, FMT = 9987 )
795.NE..AND..NE. IF( NOUT6 NOUT0 )
796 $ CLOSE ( NOUT )
797 END IF
798*
799 CALL BLACS_EXIT( 0 )
800*
801 9999 FORMAT( 'illegal ', A6, ': ', A5, ' = ', I3,
802 $ '; it should be at least 1' )
803 9998 FORMAT( 'illegal grid: nprow*npcol = ', I4, '. it can be at most',
804 $ I4 )
805 9997 FORMAT( 'bad ', A6, ' parameters: going on to next test case.' )
806 9996 FORMAT( 'unable to perform ', A, ': need totmem of at least',
807 $ I11 )
808 9995 FORMAT( 'time n nb p q fct time inv time ',
809 $ ' mflops cond resid check' )
810 9994 FORMAT( '---- ----- --- ----- ----- -------- -------- ',
811 $ '----------- ------- ------- ------' )
812 9993 FORMAT( A4, 1X, I5, 1X, I3, 1X, I5, 1X, I5, 1X, F8.2, 1X, F8.2,
813 $ 1X, F11.2, 1X, F7.1, 1X, F7.2, 1X, A6 )
814 9992 FORMAT( 'finished ', I6, ' tests, with the following results:' )
815 9991 FORMAT( I5, ' tests completed and passed residual checks.' )
816 9990 FORMAT( I5, ' tests completed without checking.' )
817 9989 FORMAT( I5, ' tests completed and failed residual checks.' )
818 9988 FORMAT( I5, ' tests skipped because of illegal input values.' )
819 9987 FORMAT( 'END OF TESTS.' )
820 9986 FORMAT( A )
821*
822 STOP
823*
824* End of PDINVDRIVER
825*
826 END
subroutine pdmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pdmatgen.f:4
end diagonal values have been computed in the(sparse) matrix id.SOL
logical function lsamen(n, ca, cb)
LSAMEN
Definition lsamen.f:74
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function ilcm(m, n)
Definition ilcm.f:2
#define max(a, b)
Definition macros.h:21
subroutine blacs_gridinit(cntxt, c, nprow, npcol)
Definition mpi.f:745
subroutine pdgetrf(m, n, a, ia, ja, desca, ipiv, info)
Definition mpi.f:914
subroutine pdpotrf(uplo, n, a, ia, ja, desca, info)
Definition mpi.f:903
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition mpi.f:777
subroutine blacs_gridexit(cntxt)
Definition mpi.f:762
double precision function pdlange(norm, m, n, a, ia, ja, desca, work)
Definition mpi.f:1311
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine inversion(a, n, np, b, m, mp, iret)
Definition nlsqf.F:304
subroutine pdlaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
Definition pdblastst.f:6862
subroutine pdchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pdchekpad.f:3
subroutine pdfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pdfillpad.f:2
subroutine pdgetri(n, a, ia, ja, desca, ipiv, work, lwork, iwork, liwork, info)
Definition pdgetri.f:3
subroutine pdinvchk(mattyp, n, a, ia, ja, desca, iaseed, anorm, fresid, rcond, work)
Definition pdinvchk.f:3
program pdinvdriver
Definition pdinvdriver.f:1
subroutine pdinvinfo(summry, nout, nmtyp, mattyp, ldmtyp, nmat, nval, ldnval, nnb, nbval, ldnbval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pdinvinfo.f:5
double precision function pdlansy(norm, uplo, n, a, ia, ja, desca, work)
Definition pdlansy.f:3
double precision function pdlantr(norm, uplo, diag, m, n, a, ia, ja, desca, work)
Definition pdlantr.f:3
subroutine pdpotri(uplo, n, a, ia, ja, desca, info)
Definition pdpotri.f:2
subroutine pdtrtri(uplo, diag, n, a, ia, ja, desca, info)
Definition pdtrtri.f:2
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267