OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pclsdriver.f
Go to the documentation of this file.
1 PROGRAM pclsdriver
2*
3* -- ScaLAPACK routine (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* August 14, 2001
7*
8* Purpose
9* =======
10*
11* PCLSDRIVER is the main test program for the COMPLEX
12* SCALAPACK (full rank) Least Squares routines. This test driver solves
13* full-rank least square problems.
14*
15* The program must be driven by a short data file. An annotated
16* example of a data file can be obtained by deleting the first 3
17* characters from the following 17 lines:
18* 'ScaLapack LS solve input file'
19* 'Intel iPSC/860 hypercube, gamma model.'
20* 'LS.out' output file name (if any)
21* 6 device out
22* 4 number of problems sizes
23* 55 17 31 201 values of M
24* 5 71 31 201 values of N
25* 3 number of NB's
26* 2 3 5 values of NB
27* 3 number of NRHS's
28* 2 3 5 values of NRHS
29* 2 number of NBRHS's
30* 1 2 values of NBRHS
31* 7 number of process grids (ordered P & Q)
32* 1 2 1 4 2 3 8 values of P
33* 7 2 4 1 3 2 1 values of Q
34* 3.0 threshold
35*
36* Internal Parameters
37* ===================
38*
39* TOTMEM INTEGER, default = 6200000.
40* TOTMEM is a machine-specific parameter indicating the
41* maximum amount of available memory in bytes.
42* The user should customize TOTMEM to his platform. Remember
43* to leave room in memory for the operating system, the BLACS
44* buffer, etc. For example, on a system with 8 MB of memory
45* per process (e.g., one processor on an Intel iPSC/860), the
46* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
47* code, BLACS buffer, etc). However, for PVM, we usually set
48* TOTMEM = 2000000. Some experimenting with the maximum value
49* of TOTMEM may be required.
50* INTGSZ INTEGER, default = 4 bytes.
51* CPLXSZ INTEGER, default = 8 bytes.
52* INTGSZ and CPLXSZ indicate the length in bytes on the
53* given platform for an integer and a single precision
54* complex.
55* MEM COMPLEX array, dimension ( TOTMEM / CPLXSZ )
56* All arrays used by SCALAPACK routines are allocated from
57* this array and referenced by pointers. The integer IPA,
58* for example, is a pointer to the starting element of MEM for
59* the matrix A.
60*
61* =====================================================================
62*
63* .. Parameters ..
64 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
65 $ lld_, mb_, m_, nb_, n_, rsrc_
66 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
67 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
68 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
69 INTEGER cplxsz, memsiz, ntests, realsz, totmem
70 REAL rzero, rone
71 COMPLEX one, padval, zero
72 parameter( cplxsz = 8, realsz = 8, totmem = 2000000,
73 $ memsiz = totmem / cplxsz, ntests = 20,
74 $ padval = ( -9923.0e+0, -9923.0e+0 ) )
75 parameter( one = ( 1.0e+0, 0.0e+0 ), rzero = 0.0e+0,
76 $ rone = 1.0e+0, zero = ( 0.0e+0, 0.0e+0 ) )
77* ..
78* .. Local Scalars ..
79 LOGICAL check, tpsd
80 CHARACTER trans
81 CHARACTER*6 passed
82 CHARACTER*80 outfile
83 INTEGER hh, i, iam, iaseed, ibseed, ictxt, ii, imidpad,
84 $ info, ipa, ipb, ipostpad, iprepad, ipw, ipw2,
85 $ ipx, iscale, itran, itype, j, jj, k, kfail, kk,
86 $ kpass, kskip, ktests, lcm, lcmp, LTAU, lwf,
87 $ lwork, lws, m, mnp, mnrhsp, mp, mq, mycol,
88 $ myrow, n, nb, nbrhs, ncols, ngrids, nmat, nnb,
89 $ nnbr, nnr, nnrhsq, nout, np, npcol, nprocs,
90 $ nprow, nrows, nq, nrhs, nrhsp, nrhsq, worksiz
91 REAL anorm, bnorm, sresid, thresh
92 DOUBLE PRECISION addfac, adds, mulfac, mults, nops, tmflops
93* ..
94* .. Local Arrays ..
95 INTEGER desca( dlen_ ), descb( dlen_ ), descw( lld_ ),
96 $ descx( dlen_ ), ierr( 2 ), mval( ntests ),
97 $ nbrval( ntests ), nbval( ntests ),
98 $ nrval( ntests ), nval( ntests ),
99 $ pval( ntests ), qval( ntests )
100 REAL result( 2 )
101 DOUBLE PRECISION ctime( 1 ), wtime( 1 )
102 COMPLEX mem( memsiz )
103* ..
104* .. External Subroutines ..
105 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
107 $ blacs_pinfo, descinit, igsum2d, pcchekpad,
108 $ pcfillpad, pcgels, pcgemm, pclacpy,
110 $ pcsscal, pcqrt13, pcqrt16, slboot,
112* ..
113* .. External Functions ..
114 LOGICAL lsame
115 INTEGER iceil, ilcm, numroc
116 REAL pclange, pcqrt14, PCQRT17
117 EXTERNAL iceil, ilcm, lsame, numroc, pclange,
119* ..
120* .. Intrinsic Functions ..
121 INTRINSIC max, min
122* ..
123* .. Data Statements ..
124 DATA ktests, kpass, kfail, kskip / 4*0 /
125* ..
126* .. Executable Statements ..
127*
128* Get starting information
129*
130 CALL blacs_pinfo( iam, nprocs )
131*
132 iaseed = 100
133 ibseed = 200
134 CALL pclsinfo( outfile, nout, nmat, mval, ntests, nval,
135 $ ntests, nnb, nbval, ntests, nnr, nrval, ntests,
136 $ nnbr, nbrval, ntests, ngrids, pval, ntests, qval,
137 $ ntests, thresh, mem, iam, nprocs )
138 check = ( thresh.GE.0.0e+0 )
139*
140* Print headings
141*
142 IF( iam.EQ.0 ) THEN
143 WRITE( nout, fmt = * )
144 WRITE( nout, fmt = 9995 )
145 WRITE( nout, fmt = 9994 )
146 WRITE( nout, fmt = * )
147 END IF
148*
149* Loop over different process grids
150*
151 DO 90 i = 1, ngrids
152*
153 nprow = pval( i )
154 npcol = qval( i )
155*
156* Make sure grid information is correct
157*
158 ierr( 1 ) = 0
159 IF( nprow.LT.1 ) THEN
160 IF( iam.EQ.0 )
161 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
162 ierr( 1 ) = 1
163 ELSE IF( npcol.LT.1 ) THEN
164 IF( iam.EQ.0 )
165 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
166 ierr( 1 ) = 1
167 ELSE IF( nprow*npcol.GT.nprocs ) THEN
168 IF( iam.EQ.0 )
169 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
170 ierr( 1 ) = 1
171 END IF
172*
173 IF( ierr( 1 ).GT.0 ) THEN
174 IF( iam.EQ.0 )
175 $ WRITE( nout, fmt = 9997 ) 'grid'
176 kskip = kskip + 1
177 GO TO 90
178 END IF
179*
180* Define process grid
181*
182 CALL blacs_get( -1, 0, ictxt )
183 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
184 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
185*
186* Go to bottom of loop if this case doesn't use my process
187*
188 IF( ( myrow.GE.nprow ).OR.( mycol.GE.npcol ) )
189 $ GO TO 90
190*
191 DO 80 j = 1, nmat
192*
193 m = mval( j )
194 n = nval( j )
195*
196* Make sure matrix information is correct
197*
198 ierr( 1 ) = 0
199 IF( m.LT.1 ) THEN
200 IF( iam.EQ.0 )
201 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
202 ierr( 1 ) = 1
203 ELSE IF( n.LT.1 ) THEN
204 IF( iam.EQ.0 )
205 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
206 ierr( 1 ) = 1
207 END IF
208*
209* Make sure no one had error
210*
211 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
212*
213 IF( ierr( 1 ).GT.0 ) THEN
214 IF( iam.EQ.0 )
215 $ WRITE( nout, fmt = 9997 ) 'matrix'
216 kskip = kskip + 1
217 GO TO 80
218 END IF
219*
220* Loop over different blocking sizes
221*
222 DO 70 k = 1, nnb
223*
224 nb = nbval( k )
225*
226* Make sure nb is legal
227*
228 ierr( 1 ) = 0
229 IF( nb.LT.1 ) THEN
230 ierr( 1 ) = 1
231 IF( iam.EQ.0 )
232 $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
233 END IF
234*
235* Check all processes for an error
236*
237 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
238*
239 IF( ierr( 1 ).GT.0 ) THEN
240 IF( iam.EQ.0 )
241 $ WRITE( nout, fmt = 9997 ) 'NB'
242 kskip = kskip + 1
243 GO TO 70
244 END IF
245*
246* Padding constants
247*
248 mp = numroc( m, nb, myrow, 0, nprow )
249 mq = numroc( m, nb, mycol, 0, npcol )
250 np = numroc( n, nb, myrow, 0, nprow )
251 mnp = max( mp, np )
252 nq = numroc( n, nb, mycol, 0, npcol )
253*
254 IF( check ) THEN
255 iprepad = max( nb, mp )
256 imidpad = nb
257 ipostpad = max( nb, nq )
258 ELSE
259 iprepad = 0
260 imidpad = 0
261 ipostpad = 0
262 END IF
263*
264* Initialize the array descriptor for the matrix A
265*
266 CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
267 $ max( 1, mp ) + imidpad, ierr( 1 ) )
268*
269* Check all processes for an error
270*
271 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
272*
273 IF( ierr( 1 ).LT.0 ) THEN
274 IF( iam.EQ.0 )
275 $ WRITE( nout, fmt = 9997 ) 'descriptor'
276 kskip = kskip + 1
277 GO TO 70
278 END IF
279*
280 DO 60 iscale = 1, 3
281*
282 itype = iscale
283*
284* Assign pointers into MEM for SCALAPACK arrays, A is
285* allocated starting at position MEM( IPREPAD+1 )
286*
287 ipa = iprepad + 1
288 ipx = ipa + desca( lld_ )*nq + ipostpad + iprepad
289 ipw = ipx
290*
291 worksiz = nq + ipostpad
292*
293* Check for adequate memory for problem size
294*
295 ierr( 1 ) = 0
296 IF( ( ipw+worksiz ).GT.memsiz ) THEN
297 IF( iam.EQ.0 )
298 $ WRITE( nout, fmt = 9996 ) 'MEMORY',
299 $ ( ipx+worksiz )*cplxsz
300 ierr( 1 ) = 1
301 END IF
302*
303* Check all processes for an error
304*
305 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
306 $ 0 )
307*
308 IF( ierr( 1 ).GT.0 ) THEN
309 IF( iam.EQ.0 )
310 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
311 kskip = kskip + 1
312 GO TO 70
313 END IF
314*
315 IF( check ) THEN
316 CALL pcfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
317 $ desca( lld_ ), iprepad, ipostpad,
318 $ padval )
319 CALL pcfillpad( ictxt, worksiz-ipostpad, 1,
320 $ mem( ipw-iprepad ),
321 $ worksiz-ipostpad, iprepad,
322 $ ipostpad, padval )
323 END IF
324*
325* Generate the matrix A and calculate its 1-norm
326*
327 CALL pcqrt13( iscale, m, n, mem( ipa ), 1, 1,
328 $ desca, anorm, iaseed, mem( ipw ) )
329*
330 IF( check ) THEN
331 CALL pcchekpad( ictxt, 'PCQRT13', mp, nq,
332 $ mem( ipa-iprepad ), desca( lld_ ),
333 $ iprepad, ipostpad, padval )
334 CALL pcchekpad( ictxt, 'PCQRT13',
335 $ worksiz-ipostpad, 1,
336 $ mem( ipw-iprepad ),
337 $ worksiz-ipostpad, iprepad,
338 $ ipostpad, padval )
339 END IF
340*
341 DO 50 itran = 1, 2
342*
343 IF( itran.EQ.1 ) THEN
344 nrows = m
345 ncols = n
346 trans = 'N'
347 tpsd = .false.
348 ELSE
349 nrows = n
350 ncols = m
351 trans = 'C'
352 tpsd = .true.
353 END IF
354*
355* Loop over the different values for NRHS
356*
357 DO 40 hh = 1, nnr
358*
359 nrhs = nrval( hh )
360*
361 DO 30 kk = 1, nnbr
362*
363 nbrhs = nbrval( kk )
364*
365 nrhsp = numroc( nrhs, nbrhs, myrow, 0,
366 $ nprow )
367 nrhsq = numroc( nrhs, nbrhs, mycol, 0,
368 $ npcol )
369*
370* Define Array descriptor for rhs MAX(M,N)xNRHS
371*
372 CALL descinit( descx, max( m, n ), nrhs, nb,
373 $ nbrhs, 0, 0, ictxt,
374 $ max( 1, mnp ) + imidpad,
375 $ ierr( 1 ) )
376 IF( tpsd ) THEN
377 CALL descinit( descw, m, nrhs, nb, nbrhs,
378 $ 0, 0, ictxt, max( 1, mp ) +
379 $ imidpad, ierr( 2 ) )
380 ELSE
381 CALL descinit( descw, n, nrhs, nb, nbrhs,
382 $ 0, 0, ictxt, max( 1, np ) +
383 $ imidpad, ierr( 2 ) )
384 END IF
385*
386* Check all processes for an error
387*
388 CALL igsum2d( ictxt, 'All', ' ', 2, 1, ierr,
389 $ 2, -1, 0 )
390*
391 IF( ierr( 1 ).LT.0 .OR. ierr( 2 ).LT.0 ) THEN
392 IF( iam.EQ.0 )
393 $ WRITE( nout, fmt = 9997 ) 'descriptor'
394 kskip = kskip + 1
395 GO TO 30
396 END IF
397*
398* Check for enough memory
399*
400 ipx = ipa + desca( lld_ )*nq + ipostpad +
401 $ iprepad
402 ipw = ipx + descx( lld_ )*nrhsq + ipostpad +
403 $ iprepad
404 worksiz = descw( lld_ )*nrhsq + ipostpad
405*
406 ierr( 1 ) = 0
407 IF( ipw+worksiz.GT.memsiz ) THEN
408 IF( iam.EQ.0 )
409 $ WRITE( nout, fmt = 9996 ) 'generation',
410 $ ( IPW+WORKSIZ )*CPLXSZ
411 IERR( 1 ) = 1
412 END IF
413*
414* Check all processes for an error
415*
416 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR,
417 $ 1, -1, 0 )
418*
419.GT. IF( IERR( 1 )0 ) THEN
420.EQ. IF( IAM0 )
421 $ WRITE( NOUT, FMT = 9997 ) 'memory'
422 KSKIP = KSKIP + 1
423 GO TO 30
424 END IF
425*
426* Generate RHS
427*
428 IF( TPSD ) THEN
429 CALL PCMATGEN( ICTXT, 'no', 'no',
430 $ DESCW( M_ ), DESCW( N_ ),
431 $ DESCW( MB_ ), DESCW( NB_ ),
432 $ MEM( IPW ), DESCW( LLD_ ),
433 $ DESCW( RSRC_ ),
434 $ DESCW( CSRC_ ), IBSEED, 0,
435 $ MP, 0, NRHSQ, MYROW, MYCOL,
436 $ NPROW, NPCOL )
437 ELSE
438 CALL PCMATGEN( ICTXT, 'no', 'no',
439 $ DESCW( M_ ), DESCW( N_ ),
440 $ DESCW( MB_ ), DESCW( NB_ ),
441 $ MEM( IPW ), DESCW( LLD_ ),
442 $ DESCW( RSRC_ ),
443 $ DESCW( CSRC_ ), IBSEED, 0,
444 $ NP, 0, NRHSQ, MYROW, MYCOL,
445 $ NPROW, NPCOL )
446 END IF
447*
448 IF( CHECK ) THEN
449 CALL PCFILLPAD( ICTXT, MNP, NRHSQ,
450 $ MEM( IPX-IPREPAD ),
451 $ DESCX( LLD_ ), IPREPAD,
452 $ IPOSTPAD, PADVAL )
453 IF( TPSD ) THEN
454 CALL PCFILLPAD( ICTXT, MP, NRHSQ,
455 $ MEM( IPW-IPREPAD ),
456 $ DESCW( LLD_ ), IPREPAD,
457 $ IPOSTPAD, PADVAL )
458 ELSE
459 CALL PCFILLPAD( ICTXT, NP, NRHSQ,
460 $ MEM( IPW-IPREPAD ),
461 $ DESCW( LLD_ ), IPREPAD,
462 $ IPOSTPAD, PADVAL )
463 END IF
464 END IF
465*
466 DO 10 JJ = 1, NRHS
467 CALL PSCNRM2( NCOLS, BNORM, MEM( IPW ),
468 $ 1, JJ, DESCW, 1 )
469.GT. IF( BNORMRZERO )
470 $ CALL PCSSCAL( NCOLS, RONE / BNORM,
471 $ MEM( IPW ), 1, JJ, DESCW,
472 $ 1 )
473 10 CONTINUE
474*
475 CALL PCGEMM( TRANS, 'n', NROWS, NRHS, NCOLS,
476 $ ONE, MEM( IPA ), 1, 1, DESCA,
477 $ MEM( IPW ), 1, 1, DESCW, ZERO,
478 $ MEM( IPX ), 1, 1, DESCX )
479*
480 IF( CHECK ) THEN
481*
482* check for memory overwrite
483*
484 CALL PCCHEKPAD( ICTXT, 'generation', MP,
485 $ NQ, MEM( IPA-IPREPAD ),
486 $ DESCA( LLD_ ), IPREPAD,
487 $ IPOSTPAD, PADVAL )
488 CALL PCCHEKPAD( ICTXT, 'generation', MNP,
489 $ NRHSQ, MEM( IPX-IPREPAD ),
490 $ DESCX( LLD_ ), IPREPAD,
491 $ IPOSTPAD, PADVAL )
492 IF( TPSD ) THEN
493 CALL PCCHEKPAD( ICTXT, 'generation',
494 $ MP, NRHSQ,
495 $ MEM( IPW-IPREPAD ),
496 $ DESCW( LLD_ ), IPREPAD,
497 $ IPOSTPAD, PADVAL )
498 ELSE
499 CALL PCCHEKPAD( ICTXT, 'generation',
500 $ NP, NRHSQ,
501 $ MEM( IPW-IPREPAD ),
502 $ DESCW( LLD_ ), IPREPAD,
503 $ IPOSTPAD, PADVAL )
504 END IF
505*
506* Allocate space for copy of rhs
507*
508 IPB = IPW
509*
510 IF( TPSD ) THEN
511 CALL DESCINIT( DESCB, N, NRHS, NB,
512 $ NBRHS, 0, 0, ICTXT,
513 $ MAX( 1, NP ) + IMIDPAD,
514 $ IERR( 1 ) )
515 ELSE
516 CALL DESCINIT( DESCB, M, NRHS, NB,
517 $ NBRHS, 0, 0, ICTXT,
518 $ MAX( 1, MP ) + IMIDPAD,
519 $ IERR( 1 ) )
520 END IF
521*
522* Check all processes for an error
523*
524 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1,
525 $ IERR, 1, -1, 0 )
526*
527.LT. IF( IERR( 1 )0 ) THEN
528.EQ. IF( IAM0 )
529 $ WRITE( NOUT, FMT = 9997 )
530 $ 'descriptor'
531 KSKIP = KSKIP + 1
532 GO TO 30
533 END IF
534*
535 IPW = IPB + DESCB( LLD_ )*NRHSQ +
536 $ IPOSTPAD + IPREPAD
537*
538 END IF
539*
540* Calculate the amount of workspace for PCGELS
541*
542.GE. IF( MN ) THEN
543 LTAU = NUMROC( MIN(M,N), NB, MYCOL, 0,
544 $ NPCOL )
545 LWF = NB * ( MP + NQ + NB )
546 LWS = MAX( ( NB*( NB - 1 ) ) / 2,
547 $ ( MP + NRHSQ ) * NB ) + NB*NB
548 ELSE
549 LCM = ILCM( NPROW, NPCOL )
550 LCMP = LCM / NPROW
551 LTAU = NUMROC( MIN(M,N), NB, MYROW, 0,
552 $ NPROW )
553 LWF = NB * ( MP + NQ + NB )
554 LWS = MAX( ( NB*( NB - 1 ) ) / 2, ( NP +
555 $ MAX( NQ + NUMROC( NUMROC( N, NB, 0,
556 $ 0, NPROW ), NB, 0, 0, LCMP ),
557 $ NRHSQ ) ) * NB ) + NB*NB
558 END IF
559*
560 LWORK = LTAU + MAX( LWF, LWS )
561 WORKSIZ = LWORK + IPOSTPAD
562*
563* Check for adequate memory for problem size
564*
565 IERR( 1 ) = 0
566.GT. IF( IPW+WORKSIZMEMSIZ ) THEN
567.EQ. IF( IAM0 )
568 $ WRITE( NOUT, FMT = 9996 ) 'solve',
569 $ ( IPW+WORKSIZ )*CPLXSZ
570 IERR( 1 ) = 1
571 END IF
572*
573* Check all processes for an error
574*
575 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1, IERR,
576 $ 1, -1, 0 )
577*
578.GT. IF( IERR( 1 )0 ) THEN
579.EQ. IF( IAM0 )
580 $ WRITE( NOUT, FMT = 9997 ) 'memory'
581 KSKIP = KSKIP + 1
582 GO TO 30
583 END IF
584*
585 IF( CHECK ) THEN
586*
587* Make the copy of the right hand side
588*
589 CALL PCLACPY( 'all', NROWS, NRHS,
590 $ MEM( IPX ), 1, 1, DESCX,
591 $ MEM( IPB ), 1, 1, DESCB )
592*
593 IF( TPSD ) THEN
594 CALL PCFILLPAD( ICTXT, NP, NRHSQ,
595 $ MEM( IPB-IPREPAD ),
596 $ DESCB( LLD_ ), IPREPAD,
597 $ IPOSTPAD, PADVAL )
598 ELSE
599 CALL PCFILLPAD( ICTXT, MP, NRHSQ,
600 $ MEM( IPB-IPREPAD ),
601 $ DESCB( LLD_ ), IPREPAD,
602 $ IPOSTPAD, PADVAL )
603 END IF
604 CALL PCFILLPAD( ICTXT, LWORK, 1,
605 $ MEM( IPW-IPREPAD ),
606 $ LWORK, IPREPAD,
607 $ IPOSTPAD, PADVAL )
608 END IF
609*
610 CALL SLBOOT( )
611 CALL BLACS_BARRIER( ICTXT, 'all' )
612 CALL SLTIMER( 1 )
613*
614* Solve the LS or overdetermined system
615*
616 CALL PCGELS( TRANS, M, N, NRHS, MEM( IPA ),
617 $ 1, 1, DESCA, MEM( IPX ), 1, 1,
618 $ DESCX, MEM( IPW ), LWORK, INFO )
619*
620 CALL SLTIMER( 1 )
621*
622 IF( CHECK ) THEN
623*
624* check for memory overwrite
625*
626 CALL PCCHEKPAD( ICTXT, 'pcgels', MP,
627 $ NQ, MEM( IPA-IPREPAD ),
628 $ DESCA( LLD_ ), IPREPAD,
629 $ IPOSTPAD, PADVAL )
630 CALL PCCHEKPAD( ICTXT, 'pcgels', MNP,
631 $ NRHSQ, MEM( IPX-IPREPAD ),
632 $ DESCX( LLD_ ), IPREPAD,
633 $ IPOSTPAD, PADVAL )
634 CALL PCCHEKPAD( ICTXT, 'pcgels', LWORK,
635 $ 1, MEM( IPW-IPREPAD ),
636 $ LWORK, IPREPAD,
637 $ IPOSTPAD, PADVAL )
638 END IF
639*
640* Regenerate A in place for testing and next
641* iteration
642*
643 CALL PCQRT13( ISCALE, M, N, MEM( IPA ), 1, 1,
644 $ DESCA, ANORM, IASEED,
645 $ MEM( IPW ) )
646*
647* check the solution to rhs
648*
649 IF( CHECK ) THEN
650*
651* Am I going to call PCQRT17 ?
652*
653.GE..AND..NOT..OR. IF( ( MN ( TPSD ) )
654.LT..AND. $ ( MN TPSD ) ) THEN
655*
656* Call PCQRT17 first, A, X, and B remain
657* unchanged. Solving LS system
658*
659* Check amount of memory for PCQRT17
660*
661 IF( TPSD ) THEN
662 WORKSIZ = NP*NRHSQ + NRHSP*MQ
663 IPW2 = IPW + WORKSIZ
664 WORKSIZ = WORKSIZ +
665 $ ICEIL( REALSZ*MAX( NQ, MAX(
666 $ MQ, NRHSQ ) ), CPLXSZ ) +
667 $ IPOSTPAD
668 ELSE
669 WORKSIZ = MP*NRHSQ + NRHSP*NQ
670 IPW2 = IPW + WORKSIZ
671 WORKSIZ = WORKSIZ +
672 $ ICEIL( REALSZ*MAX( NQ,
673 $ NRHSQ ), CPLXSZ ) +
674 $ IPOSTPAD
675 END IF
676*
677* Check for adequate memory for problem
678* size
679*
680 IERR( 1 ) = 0
681.GT. IF( ( IPW+WORKSIZ )MEMSIZ ) THEN
682.EQ. IF( IAM0 )
683 $ WRITE( NOUT, FMT = 9996 )
684 $ 'memory', ( IPW+WORKSIZ )*CPLXSZ
685 IERR( 1 ) = 1
686 END IF
687*
688* Check all processes for an error
689*
690 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1,
691 $ IERR, 1, -1, 0 )
692*
693.GT. IF( IERR( 1 )0 ) THEN
694.EQ. IF( IAM0 )
695 $ WRITE( NOUT, FMT = 9997 )
696 $ 'memory'
697 KSKIP = KSKIP + 1
698 GO TO 30
699 END IF
700*
701 CALL PCFILLPAD( ICTXT,
702 $ WORKSIZ-IPOSTPAD, 1,
703 $ MEM( IPW-IPREPAD ),
704 $ WORKSIZ-IPOSTPAD,
705 $ IPREPAD, IPOSTPAD,
706 $ PADVAL )
707*
708 RESULT( 2 ) = PCQRT17( TRANS, 1, M, N,
709 $ NRHS,
710 $ MEM( IPA ),
711 $ 1, 1, DESCA,
712 $ MEM( IPX ), 1,
713 $ 1, DESCX,
714 $ MEM( IPB ),
715 $ 1, 1, DESCB,
716 $ MEM( IPW ),
717 $ MEM( IPW2 ) )
718 SRESID = RESULT( 2 )
719*
720 CALL PCCHEKPAD( ICTXT, 'pcqrt17',
721 $ MP, NQ,
722 $ MEM( IPA-IPREPAD ),
723 $ DESCA( LLD_ ),
724 $ IPREPAD, IPOSTPAD,
725 $ PADVAL )
726 CALL PCCHEKPAD( ICTXT, 'pcqrt17',
727 $ MNP, NRHSQ,
728 $ MEM( IPX-IPREPAD ),
729 $ DESCX( LLD_ ), IPREPAD,
730 $ IPOSTPAD, PADVAL )
731 IF( TPSD ) THEN
732 CALL PCCHEKPAD( ICTXT, 'pcqrt17',
733 $ NP, NRHSQ,
734 $ MEM( IPB-IPREPAD ),
735 $ DESCB( LLD_ ),
736 $ IPREPAD, IPOSTPAD,
737 $ PADVAL )
738 ELSE
739 CALL PCCHEKPAD( ICTXT, 'pcqrt17',
740 $ MP, NRHSQ,
741 $ MEM( IPB-IPREPAD ),
742 $ DESCB( LLD_ ),
743 $ IPREPAD, IPOSTPAD,
744 $ PADVAL )
745 END IF
746 CALL PCCHEKPAD( ICTXT, 'pcqrt17',
747 $ WORKSIZ-IPOSTPAD, 1,
748 $ MEM( IPW-IPREPAD ),
749 $ WORKSIZ-IPOSTPAD,
750 $ IPREPAD, IPOSTPAD,
751 $ PADVAL )
752 END IF
753*
754* Call PCQRT16, B will be destroyed.
755*
756 IF( TPSD ) THEN
757 WORKSIZ = MP + IPOSTPAD
758 ELSE
759 WORKSIZ = NQ + IPOSTPAD
760 END IF
761*
762* Check for adequate memory for problem size
763*
764 IERR( 1 ) = 0
765.GT. IF( ( IPW+WORKSIZ )MEMSIZ ) THEN
766.EQ. IF( IAM0 )
767 $ WRITE( NOUT, FMT = 9996 ) 'memory',
768 $ ( IPW+WORKSIZ )*CPLXSZ
769 IERR( 1 ) = 1
770 END IF
771*
772* Check all processes for an error
773*
774 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1,
775 $ IERR, 1, -1, 0 )
776*
777.GT. IF( IERR( 1 )0 ) THEN
778.EQ. IF( IAM0 )
779 $ WRITE( NOUT, FMT = 9997 ) 'memory'
780 KSKIP = KSKIP + 1
781 GO TO 30
782 END IF
783*
784 CALL PCFILLPAD( ICTXT,
785 $ WORKSIZ-IPOSTPAD, 1,
786 $ MEM( IPW-IPREPAD ),
787 $ WORKSIZ-IPOSTPAD,
788 $ IPREPAD, IPOSTPAD,
789 $ PADVAL )
790*
791 CALL PCQRT16( TRANS, M, N, NRHS,
792 $ MEM( IPA ), 1, 1, DESCA,
793 $ MEM( IPX ), 1, 1, DESCX,
794 $ MEM( IPB ), 1, 1, DESCB,
795 $ MEM( IPW ), RESULT( 1 ) )
796*
797 CALL PCCHEKPAD( ICTXT, 'pcqrt16',
798 $ MP, NQ,
799 $ MEM( IPA-IPREPAD ),
800 $ DESCA( LLD_ ),
801 $ IPREPAD, IPOSTPAD,
802 $ PADVAL )
803 CALL PCCHEKPAD( ICTXT, 'pcqrt16',
804 $ MNP, NRHSQ,
805 $ MEM( IPX-IPREPAD ),
806 $ DESCX( LLD_ ), IPREPAD,
807 $ IPOSTPAD, PADVAL )
808 IF( TPSD ) THEN
809 CALL PCCHEKPAD( ICTXT, 'pcqrt16',
810 $ NP, NRHSQ,
811 $ MEM( IPB-IPREPAD ),
812 $ DESCB( LLD_ ),
813 $ IPREPAD, IPOSTPAD,
814 $ PADVAL )
815 ELSE
816 CALL PCCHEKPAD( ICTXT, 'pcqrt16',
817 $ MP, NRHSQ,
818 $ MEM( IPB-IPREPAD ),
819 $ DESCB( LLD_ ),
820 $ IPREPAD, IPOSTPAD,
821 $ PADVAL )
822 END IF
823 CALL PCCHEKPAD( ICTXT, 'pcqrt16',
824 $ WORKSIZ-IPOSTPAD, 1,
825 $ MEM( IPW-IPREPAD ),
826 $ WORKSIZ-IPOSTPAD,
827 $ IPREPAD, IPOSTPAD,
828 $ PADVAL )
829*
830* Call PCQRT14
831*
832.GE..AND..OR. IF( ( MN TPSD )
833.LT..AND..NOT. $ ( MN ( TPSD ) ) ) THEN
834*
835 IPW = IPB
836*
837 IF( TPSD ) THEN
838*
839 NNRHSQ = NUMROC( N+NRHS, NB, MYCOL,
840 $ 0, NPCOL )
841 LTAU = NUMROC( MIN( M, N+NRHS ), NB,
842 $ MYCOL, 0, NPCOL )
843 LWF = NB * ( NB + MP + NNRHSQ )
844 WORKSIZ = MP * NNRHSQ + LTAU + LWF +
845 $ IPOSTPAD
846*
847 ELSE
848*
849 MNRHSP = NUMROC( M+NRHS, NB, MYROW,
850 $ 0, NPROW )
851 LTAU = NUMROC( MIN( M+NRHS, N ), NB,
852 $ MYROW, 0, NPROW )
853 LWF = NB * ( NB + MNRHSP + NQ )
854 WORKSIZ = MNRHSP * NQ + LTAU + LWF +
855 $ IPOSTPAD
856*
857 END IF
858*
859* Check for adequate memory for problem
860* size
861*
862 IERR( 1 ) = 0
863.GT. IF( ( IPW+WORKSIZ )MEMSIZ ) THEN
864.EQ. IF( IAM0 )
865 $ WRITE( NOUT, FMT = 9996 )
866 $ 'memory', ( IPW+WORKSIZ )*CPLXSZ
867 IERR( 1 ) = 1
868 END IF
869*
870* Check all processes for an error
871*
872 CALL IGSUM2D( ICTXT, 'all', ' ', 1, 1,
873 $ IERR, 1, -1, 0 )
874*
875.GT. IF( IERR( 1 )0 ) THEN
876.EQ. IF( IAM0 )
877 $ WRITE( NOUT, FMT = 9997 )
878 $ 'memory'
879 KSKIP = KSKIP + 1
880 GO TO 30
881 END IF
882*
883 CALL PCFILLPAD( ICTXT,
884 $ WORKSIZ-IPOSTPAD, 1,
885 $ MEM( IPW-IPREPAD ),
886 $ WORKSIZ-IPOSTPAD,
887 $ IPREPAD, IPOSTPAD,
888 $ PADVAL )
889*
890* Solve underdetermined system
891*
892 RESULT( 2 ) = PCQRT14( TRANS, M, N,
893 $ NRHS,
894 $ MEM( IPA ), 1,
895 $ 1, DESCA,
896 $ MEM( IPX ),
897 $ 1, 1, DESCX,
898 $ MEM( IPW ) )
899 SRESID = RESULT( 2 )
900*
901 CALL PCCHEKPAD( ICTXT, 'pcqrt14',
902 $ MP, NQ,
903 $ MEM( IPA-IPREPAD ),
904 $ DESCA( LLD_ ),
905 $ IPREPAD, IPOSTPAD,
906 $ PADVAL )
907 CALL PCCHEKPAD( ICTXT, 'pcqrt14',
908 $ MNP, NRHSQ,
909 $ MEM( IPX-IPREPAD ),
910 $ DESCX( LLD_ ), IPREPAD,
911 $ IPOSTPAD, PADVAL )
912 CALL PCCHEKPAD( ICTXT, 'pcqrt14',
913 $ WORKSIZ-IPOSTPAD, 1,
914 $ MEM( IPW-IPREPAD ),
915 $ WORKSIZ-IPOSTPAD,
916 $ IPREPAD, IPOSTPAD,
917 $ PADVAL )
918 END IF
919*
920* Print information about the tests that
921* did not pass the threshold.
922*
923 PASSED = 'passed'
924 DO 20 II = 1, 2
925.GE..AND. IF( ( RESULT( II )THRESH )
926.EQ. $ ( RESULT( II )-RESULT( II )0.0E+0
927 $ ) ) THEN
928.EQ. IF( IAM0 )
929 $ WRITE( NOUT, FMT = 9986 )TRANS,
930 $ M, N, NRHS, NB, ITYPE, II,
931 $ RESULT( II )
932 KFAIL = KFAIL + 1
933 PASSED = 'failed'
934 ELSE
935 KPASS = KPASS + 1
936 END IF
937 20 CONTINUE
938*
939 ELSE
940*
941* By-pass the solve check
942*
943 KPASS = KPASS + 1
944 SRESID = SRESID - SRESID
945 PASSED = 'bypass'
946*
947 END IF
948*
949* Gather maximum of all CPU and WALL clock
950* timings
951*
952 CALL SLCOMBINE( ICTXT, 'all', '>', 'w', 1, 1,
953 $ WTIME )
954 CALL SLCOMBINE( ICTXT, 'all', '>', 'c', 1, 1,
955 $ CTIME )
956*
957* Print results
958*
959.EQ..AND..EQ. IF( MYROW0 MYCOL0 ) THEN
960 ADDFAC = 2
961 MULFAC = 6
962.GE. IF( MN ) THEN
963*
964* NOPS = SOPLA( 'CGEQRF', M, N, 0, 0,
965* NB ) + SOPLA( 'CUNMQR', M, NRHS, N,
966* 0, NB )
967*
968 MULTS = N*( ( ( 23.D0 / 6.D0 )+M+N /
969 $ 2.D0 )+ N*( M-N / 3.D0 ) ) +
970 $ N*NRHS*( 2.D0*M+2.D0-N )
971 ADDS = N*( ( 5.D0 / 6.D0 )+N*
972 $ ( 1.D0 / 2.D0+( M-N / 3.D0 ) ) )
973 $ + N*NRHS*( 2.D0*M+1.D0-N )
974 ELSE
975*
976* NOPS = SOPLA( 'CGELQF', M, N, 0, 0,
977* NB ) + SOPLA( 'CUNMLQ', M,
978* NRHS, N, 0, NB )
979*
980 MULTS = M*( ( ( 29.D0 / 6.D0 )+2.D0*N-M
981 $ / 2.D0 )+M*( N-M / 3.D0 ) )
982 $ + N*NRHS*( 2.D0*M+2.D0-N )
983 ADDS = M*( ( 5.D0 / 6.D0 )+M / 2.D0+M*
984 $ ( N-M / 3.D0 ) )
985 $ + N*NRHS*( 2.D0*M+1.D0-N )
986 END IF
987 NOPS = ADDFAC*ADDS + MULFAC*MULTS
988*
989* Calculate total megaflops, for WALL and
990* CPU time, and print output
991*
992* Print WALL time if machine supports it
993*
994.GT. IF( WTIME( 1 )0.0D+0 ) THEN
995 TMFLOPS = NOPS / ( WTIME( 1 )*1.0D+6 )
996 ELSE
997 TMFLOPS = 0.0D+0
998 END IF
999*
1000.GE. IF( WTIME( 1 )0.0D+0 )
1001 $ WRITE( NOUT, FMT = 9993 )
1002 $ 'wall', TRANS, M, N, NB, NRHS,
1003 $ NBRHS, NPROW, NPCOL, WTIME( 1 ),
1004 $ TMFLOPS, PASSED
1005*
1006* Print CPU time if machine supports it
1007*
1008.GT. IF( CTIME( 1 )0.0D+0 ) THEN
1009 TMFLOPS = NOPS / ( CTIME( 1 )*1.0D+6 )
1010 ELSE
1011 TMFLOPS = 0.0D+0
1012 END IF
1013*
1014.GE. IF( CTIME( 1 )0.0D+0 )
1015 $ WRITE( NOUT, FMT = 9993 )
1016 $ 'cpu ', TRANS, M, N, NB, NRHS,
1017 $ NBRHS, NPROW, NPCOL, CTIME( 1 ),
1018 $ TMFLOPS, PASSED
1019 END IF
1020 30 CONTINUE
1021 40 CONTINUE
1022 50 CONTINUE
1023 60 CONTINUE
1024 70 CONTINUE
1025 80 CONTINUE
1026 CALL BLACS_GRIDEXIT( ICTXT )
1027 90 CONTINUE
1028*
1029* Print out ending messages and close output file
1030*
1031.EQ. IF( IAM0 ) THEN
1032 KTESTS = KPASS + KFAIL + KSKIP
1033 WRITE( NOUT, FMT = * )
1034 WRITE( NOUT, FMT = 9992 ) KTESTS
1035 IF( CHECK ) THEN
1036 WRITE( NOUT, FMT = 9991 ) KPASS
1037 WRITE( NOUT, FMT = 9989 ) KFAIL
1038 ELSE
1039 WRITE( NOUT, FMT = 9990 ) KPASS
1040 END IF
1041 WRITE( NOUT, FMT = 9988 ) KSKIP
1042 WRITE( NOUT, FMT = * )
1043 WRITE( NOUT, FMT = * )
1044 WRITE( NOUT, FMT = 9987 )
1045.NE..AND..NE. IF( NOUT6 NOUT0 )
1046 $ CLOSE ( NOUT )
1047 END IF
1048*
1049 CALL BLACS_EXIT( 0 )
1050*
1051 9999 FORMAT( 'illegal ', A6, ': ', A5, ' = ', I3,
1052 $ '; it should be at least 1' )
1053 9998 FORMAT( 'illegal grid: nprow*npcol = ', I4, '. it can be at most',
1054 $ I4 )
1055 9997 FORMAT( 'bad ', A6, ' parameters: going on to next test case.' )
1056 9996 FORMAT( 'unable to perform ', A, ': need totmem of at least',
1057 $ I11 )
1058 9995 FORMAT( 'time trans m n nb nrhs nbrhs p q ',
1059 $ 'ls time mflops check' )
1060 9994 FORMAT( '---- ----- ------ ------ --- ----- ----- ----- ----- ',
1061 $ '--------- -------- ------' )
1062 9993 FORMAT( A4, 3X, A1, 3X, I6, 1X, I6, 1X, I3, 1X, I5, 1X, I5, 1X,
1063 $ I5, 1X, I5, 1X, F9.2, 1X, F8.2, 1X, A6 )
1064 9992 FORMAT( 'finished', I6, ' tests, with the following results:' )
1065 9991 FORMAT( I5, ' tests completed and passed residual checks.' )
1066 9990 FORMAT( I5, ' tests completed without checking.' )
1067 9989 FORMAT( I5, ' tests completed and failed residual checks.' )
1068 9988 FORMAT( I5, ' tests skipped because of illegal input values.' )
1069 9987 FORMAT( 'END OF TESTS.' )
1070 9986 FORMAT( ' TRANS=''', A1, ''', M=', I5, ', N=', I5, ', NRHS=', I4,
1071 $ ', NB=', I4, ', type', I2, ', test(', I2, ')=', G12.5 )
1072*
1073 STOP
1074*
1075* End of PCLSDRIVER
1076*
1077 END
subroutine pcmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pcmatgen.f:4
end diagonal values have been computed in the(sparse) matrix id.SOL
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
integer function iceil(inum, idenom)
Definition iceil.f:2
integer function ilcm(m, n)
Definition ilcm.f:2
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine blacs_gridinit(cntxt, c, nprow, npcol)
Definition mpi.f:745
subroutine pscnrm2(n, norm2, x, ix, jx, descx, incx)
Definition mpi.f:1231
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition mpi.f:777
real function pclange(norm, m, n, a, ia, ja, desca, work)
Definition mpi.f:1275
subroutine blacs_gridexit(cntxt)
Definition mpi.f:762
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pcchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pcchekpad.f:3
subroutine pcfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pcfillpad.f:2
subroutine pcgels(trans, m, n, nrhs, a, ia, ja, desca, b, ib, jb, descb, work, lwork, info)
Definition pcgels.f:3
subroutine pclacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pclacpy.f:3
program pclsdriver
Definition pclsdriver.f:1
subroutine pclsinfo(summry, nout, nmat, mval, ldmval, nval, ldnval, nnb, nbval, ldnbval, nnr, nrval, ldnrval, nnbr, nbrval, ldnbrval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pclsinfo.f:6
subroutine pcqrt13(scale, m, n, a, ia, ja, desca, norma, iseed, work)
Definition pcqrt13.f:3
real function pcqrt14(trans, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, work)
Definition pcqrt14.f:3
subroutine pcqrt16(trans, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, b, ib, jb, descb, rwork, resid)
Definition pcqrt16.f:3
real function pcqrt17(trans, iresid, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, b, ib, jb, descb, work, rwork)
Definition pcqrt17.f:5
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267