OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
iparam2stage.F File Reference

Go to the source code of this file.

Functions/Subroutines

integer function iparam2stage (ispec, name, opts, ni, nbi, ibi, nxi)
 IPARAM2STAGE

Function/Subroutine Documentation

◆ iparam2stage()

integer function iparam2stage ( integer ispec,
character*( * ) name,
character*( * ) opts,
integer ni,
integer nbi,
integer ibi,
integer nxi )

IPARAM2STAGE

Download IPARAM2STAGE + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!>      This program sets problem and machine dependent parameters
!>      useful for xHETRD_2STAGE, xHETRD_HE2HB, xHETRD_HB2ST,
!>      xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD 
!>      and related subroutines for eigenvalue problems. 
!>      It is called whenever ILAENV is called with 17 <= ISPEC <= 21.
!>      It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5
!>      with a direct conversion ISPEC + 16.
!> 
Parameters
[in]ISPEC
!>          ISPEC is integer scalar
!>              ISPEC specifies which tunable parameter IPARAM2STAGE should
!>              return.
!>
!>              ISPEC=17: the optimal blocksize nb for the reduction to
!>                        BAND
!>
!>              ISPEC=18: the optimal blocksize ib for the eigenvectors
!>                        singular vectors update routine
!>
!>              ISPEC=19: The length of the array that store the Housholder 
!>                        representation for the second stage 
!>                        Band to Tridiagonal or Bidiagonal
!>
!>              ISPEC=20: The workspace needed for the routine in input.
!>
!>              ISPEC=21: For future release.
!> 
[in]NAME
!>          NAME is character string
!>               Name of the calling subroutine
!> 
[in]OPTS
!>          OPTS is CHARACTER*(*)
!>          The character options to the subroutine NAME, concatenated
!>          into a single character string.  For example, UPLO = 'U',
!>          TRANS = 'T', and DIAG = 'N' for a triangular routine would
!>          be specified as OPTS = 'UTN'.
!> 
[in]NI
!>          NI is INTEGER which is the size of the matrix
!> 
[in]NBI
!>          NBI is INTEGER which is the used in the reduciton, 
!>          (e.g., the size of the band), needed to compute workspace
!>          and LHOUS2.
!> 
[in]IBI
!>          IBI is INTEGER which represent the IB of the reduciton,
!>          needed to compute workspace and LHOUS2.
!> 
[in]NXI
!>          NXI is INTEGER needed in the future release.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
!>
!>  Implemented by Azzam Haidar.
!>
!>  All detail are available on technical report, SC11, SC13 papers.
!>
!>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
!>  Parallel reduction to condensed forms for symmetric eigenvalue problems
!>  using aggregated fine-grained and memory-aware kernels. In Proceedings
!>  of 2011 International Conference for High Performance Computing,
!>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
!>  Article 8 , 11 pages.
!>  http://doi.acm.org/10.1145/2063384.2063394
!>
!>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
!>  An improved parallel singular value algorithm and its implementation 
!>  for multicore hardware, In Proceedings of 2013 International Conference
!>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
!>  Denver, Colorado, USA, 2013.
!>  Article 90, 12 pages.
!>  http://doi.acm.org/10.1145/2503210.2503292
!>
!>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
!>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
!>  calculations based on fine-grained memory aware tasks.
!>  International Journal of High Performance Computing Applications.
!>  Volume 28 Issue 2, Pages 196-209, May 2014.
!>  http://hpc.sagepub.com/content/28/2/196 
!>
!> 

Definition at line 153 of file iparam2stage.F.

155#if defined(_OPENMP)
156 use omp_lib
157#endif
158 IMPLICIT NONE
159*
160* -- LAPACK auxiliary routine --
161* -- LAPACK is a software package provided by Univ. of Tennessee, --
162* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
163*
164* .. Scalar Arguments ..
165 CHARACTER*( * ) NAME, OPTS
166 INTEGER ISPEC, NI, NBI, IBI, NXI
167*
168* ================================================================
169* ..
170* .. Local Scalars ..
171 INTEGER I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS,
172 $ FACTOPTNB, QROPTNB, LQOPTNB
173 LOGICAL RPREC, CPREC
174 CHARACTER PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*1
175* ..
176* .. Intrinsic Functions ..
177 INTRINSIC char, ichar, max
178* ..
179* .. External Functions ..
180 INTEGER ILAENV
181 EXTERNAL ilaenv
182* ..
183* .. Executable Statements ..
184*
185* Invalid value for ISPEC
186*
187 IF( (ispec.LT.17).OR.(ispec.GT.21) ) THEN
188 iparam2stage = -1
189 RETURN
190 ENDIF
191*
192* Get the number of threads
193*
194 nthreads = 1
195#if defined(_OPENMP)
196!$OMP PARALLEL
197 nthreads = omp_get_num_threads()
198!$OMP END PARALLEL
199#endif
200* WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
201*
202 IF( ispec .NE. 19 ) THEN
203*
204* Convert NAME to upper case if the first character is lower case.
205*
206 iparam2stage = -1
207 subnam = name
208 ic = ichar( subnam( 1: 1 ) )
209 iz = ichar( 'Z' )
210 IF( iz.EQ.90 .OR. iz.EQ.122 ) THEN
211*
212* ASCII character set
213*
214 IF( ic.GE.97 .AND. ic.LE.122 ) THEN
215 subnam( 1: 1 ) = char( ic-32 )
216 DO 100 i = 2, 12
217 ic = ichar( subnam( i: i ) )
218 IF( ic.GE.97 .AND. ic.LE.122 )
219 $ subnam( i: i ) = char( ic-32 )
220 100 CONTINUE
221 END IF
222*
223 ELSE IF( iz.EQ.233 .OR. iz.EQ.169 ) THEN
224*
225* EBCDIC character set
226*
227 IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
228 $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
229 $ ( ic.GE.162 .AND. ic.LE.169 ) ) THEN
230 subnam( 1: 1 ) = char( ic+64 )
231 DO 110 i = 2, 12
232 ic = ichar( subnam( i: i ) )
233 IF( ( ic.GE.129 .AND. ic.LE.137 ) .OR.
234 $ ( ic.GE.145 .AND. ic.LE.153 ) .OR.
235 $ ( ic.GE.162 .AND. ic.LE.169 ) )subnam( i:
236 $ i ) = char( ic+64 )
237 110 CONTINUE
238 END IF
239*
240 ELSE IF( iz.EQ.218 .OR. iz.EQ.250 ) THEN
241*
242* Prime machines: ASCII+128
243*
244 IF( ic.GE.225 .AND. ic.LE.250 ) THEN
245 subnam( 1: 1 ) = char( ic-32 )
246 DO 120 i = 2, 12
247 ic = ichar( subnam( i: i ) )
248 IF( ic.GE.225 .AND. ic.LE.250 )
249 $ subnam( i: i ) = char( ic-32 )
250 120 CONTINUE
251 END IF
252 END IF
253*
254 prec = subnam( 1: 1 )
255 algo = subnam( 4: 6 )
256 stag = subnam( 8:12 )
257 rprec = prec.EQ.'S' .OR. prec.EQ.'D'
258 cprec = prec.EQ.'C' .OR. prec.EQ.'Z'
259*
260* Invalid value for PRECISION
261*
262 IF( .NOT.( rprec .OR. cprec ) ) THEN
263 iparam2stage = -1
264 RETURN
265 ENDIF
266 ENDIF
267* WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
268* $ ' ALGO ',ALGO,' STAGE ',STAG
269*
270*
271 IF (( ispec .EQ. 17 ) .OR. ( ispec .EQ. 18 )) THEN
272*
273* ISPEC = 17, 18: block size KD, IB
274* Could be also dependent from N but for now it
275* depend only on sequential or parallel
276*
277 IF( nthreads.GT.4 ) THEN
278 IF( cprec ) THEN
279 kd = 128
280 ib = 32
281 ELSE
282 kd = 160
283 ib = 40
284 ENDIF
285 ELSE IF( nthreads.GT.1 ) THEN
286 IF( cprec ) THEN
287 kd = 64
288 ib = 32
289 ELSE
290 kd = 64
291 ib = 32
292 ENDIF
293 ELSE
294 IF( cprec ) THEN
295 kd = 16
296 ib = 16
297 ELSE
298 kd = 32
299 ib = 16
300 ENDIF
301 ENDIF
302 IF( ispec.EQ.17 ) iparam2stage = kd
303 IF( ispec.EQ.18 ) iparam2stage = ib
304*
305 ELSE IF ( ispec .EQ. 19 ) THEN
306*
307* ISPEC = 19:
308* LHOUS length of the Houselholder representation
309* matrix (V,T) of the second stage. should be >= 1.
310*
311* Will add the VECT OPTION HERE next release
312 vect = opts(1:1)
313 IF( vect.EQ.'N' ) THEN
314 lhous = max( 1, 4*ni )
315 ELSE
316* This is not correct, it need to call the ALGO and the stage2
317 lhous = max( 1, 4*ni ) + ibi
318 ENDIF
319 IF( lhous.GE.0 ) THEN
320 iparam2stage = lhous
321 ELSE
322 iparam2stage = -1
323 ENDIF
324*
325 ELSE IF ( ispec .EQ. 20 ) THEN
326*
327* ISPEC = 20: (21 for future use)
328* LWORK length of the workspace for
329* either or both stages for TRD and BRD. should be >= 1.
330* TRD:
331* TRD_stage 1: = LT + LW + LS1 + LS2
332* = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD
333* where LDT=LDS2=KD
334* = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
335* TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
336* TRD_both : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
337* = N*KD + N*max(KD+1,FACTOPTNB)
338* + max(2*KD*KD, KD*NTHREADS)
339* + (KD+1)*N
340 lwork = -1
341 subnam(1:1) = prec
342 subnam(2:6) = 'GEQRF'
343 qroptnb = ilaenv( 1, subnam, ' ', ni, nbi, -1, -1 )
344 subnam(2:6) = 'GELQF'
345 lqoptnb = ilaenv( 1, subnam, ' ', nbi, ni, -1, -1 )
346* Could be QR or LQ for TRD and the max for BRD
347 factoptnb = max(qroptnb, lqoptnb)
348 IF( algo.EQ.'TRD' ) THEN
349 IF( stag.EQ.'2STAG' ) THEN
350 lwork = ni*nbi + ni*max(nbi+1,factoptnb)
351 $ + max(2*nbi*nbi, nbi*nthreads)
352 $ + (nbi+1)*ni
353 ELSE IF( (stag.EQ.'HE2HB').OR.(stag.EQ.'SY2SB') ) THEN
354 lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
355 ELSE IF( (stag.EQ.'HB2ST').OR.(stag.EQ.'SB2ST') ) THEN
356 lwork = (2*nbi+1)*ni + nbi*nthreads
357 ENDIF
358 ELSE IF( algo.EQ.'BRD' ) THEN
359 IF( stag.EQ.'2STAG' ) THEN
360 lwork = 2*ni*nbi + ni*max(nbi+1,factoptnb)
361 $ + max(2*nbi*nbi, nbi*nthreads)
362 $ + (nbi+1)*ni
363 ELSE IF( stag.EQ.'GE2GB' ) THEN
364 lwork = ni*nbi + ni*max(nbi,factoptnb) + 2*nbi*nbi
365 ELSE IF( stag.EQ.'GB2BD' ) THEN
366 lwork = (3*nbi+1)*ni + nbi*nthreads
367 ENDIF
368 ENDIF
369 lwork = max( 1, lwork )
370
371 IF( lwork.GT.0 ) THEN
372 iparam2stage = lwork
373 ELSE
374 iparam2stage = -1
375 ENDIF
376*
377 ELSE IF ( ispec .EQ. 21 ) THEN
378*
379* ISPEC = 21 for future use
380 iparam2stage = nxi
381 ENDIF
382*
383* ==== End of IPARAM2STAGE ====
384*
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:162
integer function iparam2stage(ispec, name, opts, ni, nbi, ibi, nxi)
IPARAM2STAGE
#define max(a, b)
Definition macros.h:21