OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
sfac_driver.F
Go to the documentation of this file.
1C
2C This file is part of MUMPS 5.5.1, released
3C on Tue Jul 12 13:17:24 UTC 2022
4C
5C
6C Copyright 1991-2022 CERFACS, CNRS, ENS Lyon, INP Toulouse, Inria,
7C Mumps Technologies, University of Bordeaux.
8C
9C This version of MUMPS is provided to you free of charge. It is
10C released under the CeCILL-C license
11C (see doc/CeCILL-C_V1-en.txt, doc/CeCILL-C_V1-fr.txt, and
12C https://cecill.info/licences/Licence_CeCILL-C_V1-en.html)
13C
14 SUBROUTINE smumps_fac_driver( id)
15 USE smumps_buf
16 USE smumps_load
17 USE smumps_ooc
26#if ! defined(NO_FDM_DESCBAND)
28#endif
29#if ! defined(NO_FDM_MAPROW)
31#endif
32!$ USE OMP_LIB
33C Derived datatype to pass pointers with implicit interfaces
35 IMPLICIT NONE
36C
37C Purpose
38C =======
39C
40C Performs scaling, sorting in arrowhead, then
41C distributes the matrix, and perform
42C factorization.
43C
44C
45 INTERFACE
46 SUBROUTINE smumps_anorminf(id, ANORMINF, LSCAL, EFF_SIZE_SCHUR)
48 TYPE (SMUMPS_STRUC), TARGET :: id
49 REAL, INTENT(OUT) :: ANORMINF
50 LOGICAL, INTENT(IN) :: LSCAL
51 INTEGER, INTENT(IN) :: EFF_SIZE_SCHUR
52 END SUBROUTINE smumps_anorminf
53 SUBROUTINE smumps_free_id_data_modules(id_FDM_F_ENCODING,
54 & id_BLRARRAY_ENCODING, KEEP8, K34)
55# if defined(MUMPS_F2003)
56 CHARACTER, DIMENSION(:), POINTER, intent(inout) ::
57 & id_blrarray_encoding
58 CHARACTER, DIMENSION(:), POINTER, intent(inout) ::
59 & id_fdm_f_encoding
60# else
61 CHARACTER, DIMENSION(:), POINTER :: id_BLRARRAY_ENCODING
62 CHARACTER, DIMENSION(:), POINTER :: id_FDM_F_ENCODING
63# endif
64 INTEGER(8), intent(inout) :: KEEP8(150)
65 INTEGER, intent(in) :: K34
66 END SUBROUTINE smumps_free_id_data_modules
67 END INTERFACE
68C
69C Parameters
70C ==========
71C
72 TYPE(smumps_struc), TARGET :: id
73C
74C MPI
75C ===
76C
77 include 'mpif.h'
78 include 'mumps_tags.h'
79 INTEGER :: STATUS(MPI_STATUS_SIZE)
80 INTEGER :: IERR
81 INTEGER, PARAMETER :: MASTER = 0
82C
83C Local variables
84C ===============
85C
86 include 'mumps_headers.h'
87 INTEGER(8) :: NSEND8, NSEND_TOT8
88 INTEGER(8) :: NLOCAL8, NLOCAL_TOT8
89 INTEGER(4) :: I4
90 INTEGER :: LDPTRAR, NELT_arg, NBRECORDS
91 INTEGER :: ITMP, JTMP
92 INTEGER :: KEEP464COPY, KEEP465COPY
93 REAL :: RATIOK465
94 INTEGER(8) :: KEEP826_SAVE
95 INTEGER(8) :: K67, K68, K70, K74, K75
96 INTEGER(8) ITMP8
97 INTEGER MUMPS_PROCNODE
98 EXTERNAL mumps_procnode
99 INTEGER MP, LP, MPG, allocok
100 LOGICAL PROK, PROKG, LSCAL, LPOK, COMPUTE_ANORMINF
101C Reception buffer
102 INTEGER :: SMUMPS_LBUFR, SMUMPS_LBUFR_BYTES
103 INTEGER(8) :: SMUMPS_LBUFR_BYTES8 ! for intermediate computation
104 INTEGER, ALLOCATABLE, DIMENSION(:) :: BUFR
105C Size of send buffers (in bytes)
106 INTEGER :: SMUMPS_LBUF, SMUMPS_LBUF_INT
107 INTEGER(8) :: SMUMPS_LBUF8 ! for intermediate computation
108C
109 INTEGER PTRIST, PTRWB, MAXELT_SIZE,
110 & itloc, ipool, k28, lpool
111 INTEGER IRANK, ID_ROOT
112 INTEGER KKKK
113 INTEGER(8) :: NZ_locMAX8
114 INTEGER(8) MEMORY_MD_ARG
115 INTEGER(8) MAXS_BASE8, MAXS_BASE_RELAXED8
116 REAL CNTL4, AVG_FLOPS
117 INTEGER MIN_PERLU, MAXIS_ESTIM
118 INTEGER SUM_INFO22_THIS_NODE, MAX_SUM_INFO22_THIS_NODE
119C
120 TYPE (S_IS_POINTERS_T) :: S_IS_POINTERS
121 INTEGER MAXIS
122 INTEGER(8) :: MAXS
123C For S argument to arrowhead routines:
124 INTEGER(8) :: MAXS_ARG
125 REAL, TARGET :: S_DUMMY_ARG(1)
126 REAL, POINTER, DIMENSION(:) :: S_PTR_ARG
127 INTEGER NB_THREADS, NOMP
128 DOUBLE PRECISION TIMEAVG, TIMEMAX,
129 & flopavg, flopmax
130 REAL TMPTIME, TMPFLOP
131 INTEGER NPIV_CRITICAL_PATH, EFF_SIZE_SCHUR
132 DOUBLE PRECISION TIME, TIMEET
133 REAL ZERO, ONE, MONE
134 parameter( zero = 0.0e0, one = 1.0e0, mone = -1.0e0)
135 REAL CZERO
136 parameter( czero = 0.0e0 )
137 INTEGER PERLU, TOTAL_MBYTES, K231, K232, K233, BLR_STRAT
138 INTEGER, PARAMETER :: IDUMMY = -9999
139 LOGICAL, PARAMETER :: BDUMMY =.false.
140 INTEGER, PARAMETER :: PANEL_TABSIZE = 20
141 INTEGER COLOUR, COMM_FOR_SCALING ! For Simultaneous scaling
142 INTEGER LIWK, LWK_REAL
143 INTEGER(8) :: LWK
144C I_AM_SLAVE: used to determine if proc has the role of a slave
145C WK_USER_PROVIDED is set to true when WK_USER is provided by user
146 LOGICAL I_AM_SLAVE, PERLU_ON, WK_USER_PROVIDED, EARLYT3ROOTINS
147 LOGICAL PRINT_MAXAVG, PRINT_NODEINFO
148 REAL :: ANORMINF, SEUIL, SEUIL_LDLT_NIV2, Thresh_Seuil
149 REAL :: CNTL1, CNTL3, CNTL5, CNTL6, EPS
150 INTEGER N, LPN_LIST,POSBUF
151 INTEGER, DIMENSION (:), ALLOCATABLE :: ITMP2
152 INTEGER I,K
153 INTEGER(8) :: ITEMP8
154 INTEGER :: PARPIV_T1
155 INTEGER FRONTWISE
156C temporary variables for collecting stats from all processors
157 DOUBLE PRECISION :: TMP_MRY_LU_FR
158 DOUBLE PRECISION :: TMP_MRY_LU_LRGAIN
159 DOUBLE PRECISION :: TMP_MRY_CB_FR
160 DOUBLE PRECISION :: TMP_MRY_CB_LRGAIN
161 DOUBLE PRECISION :: TMP_FLOP_LRGAIN
162 DOUBLE PRECISION :: TMP_FLOP_TRSM
163 DOUBLE PRECISION :: TMP_FLOP_PANEL
164 DOUBLE PRECISION :: TMP_FLOP_FRFRONTS
165 DOUBLE PRECISION :: TMP_FLOP_TRSM_FR
166 DOUBLE PRECISION :: TMP_FLOP_TRSM_LR
167 DOUBLE PRECISION :: TMP_FLOP_UPDATE_FR
168 DOUBLE PRECISION :: TMP_FLOP_UPDATE_LR
169 DOUBLE PRECISION :: TMP_FLOP_UPDATE_LRLR3
170 DOUBLE PRECISION :: TMP_FLOP_COMPRESS
171 DOUBLE PRECISION :: TMP_FLOP_DECOMPRESS
172 DOUBLE PRECISION :: TMP_FLOP_MIDBLK_COMPRESS
173 DOUBLE PRECISION :: TMP_FLOP_FRSWAP_COMPRESS
174 DOUBLE PRECISION :: TMP_FLOP_ACCUM_COMPRESS
175 DOUBLE PRECISION :: TMP_FLOP_CB_COMPRESS
176 DOUBLE PRECISION :: TMP_FLOP_CB_DECOMPRESS
177 DOUBLE PRECISION :: TMP_FLOP_FACTO_FR
178 INTEGER :: TMP_CNT_NODES
179 DOUBLE PRECISION :: TMP_TIME_UPDATE
180 DOUBLE PRECISION :: TMP_TIME_UPDATE_LRLR1
181 DOUBLE PRECISION :: TMP_TIME_UPDATE_LRLR2
182 DOUBLE PRECISION :: TMP_TIME_UPDATE_LRLR3
183 DOUBLE PRECISION :: TMP_TIME_UPDATE_FRLR
184 DOUBLE PRECISION :: TMP_TIME_UPDATE_FRFR
185 DOUBLE PRECISION :: TMP_TIME_COMPRESS
186 DOUBLE PRECISION :: TMP_TIME_MIDBLK_COMPRESS
187 DOUBLE PRECISION :: TMP_TIME_FRSWAP_COMPRESS
188 DOUBLE PRECISION :: TMP_TIME_CB_COMPRESS
189 DOUBLE PRECISION :: TMP_TIME_PANEL
190 DOUBLE PRECISION :: TMP_TIME_FAC_I
191 DOUBLE PRECISION :: TMP_TIME_FAC_MQ
192 DOUBLE PRECISION :: TMP_TIME_FAC_SQ
193 DOUBLE PRECISION :: TMP_TIME_LRTRSM
194 DOUBLE PRECISION :: TMP_TIME_FRTRSM
195 DOUBLE PRECISION :: TMP_TIME_FRFRONTS
196 DOUBLE PRECISION :: TMP_TIME_LR_MODULE
197 DOUBLE PRECISION :: TMP_TIME_DIAGCOPY
198 DOUBLE PRECISION :: TMP_TIME_DECOMP
199 DOUBLE PRECISION :: TMP_TIME_DECOMP_UCFS
200 DOUBLE PRECISION :: TMP_TIME_DECOMP_ASM1
201 DOUBLE PRECISION :: TMP_TIME_DECOMP_LOCASM2
202 DOUBLE PRECISION :: TMP_TIME_DECOMP_MAPLIG1
203 DOUBLE PRECISION :: TMP_TIME_DECOMP_ASMS2S
204 DOUBLE PRECISION :: TMP_TIME_DECOMP_ASMS2M
205C
206C Workspace.
207C
208 INTEGER, DIMENSION(:), ALLOCATABLE :: IWK
209 REAL, DIMENSION(:), ALLOCATABLE :: WK
210 REAL, DIMENSION(:), ALLOCATABLE :: WK_REAL
211 INTEGER(8), DIMENSION(:), ALLOCATABLE :: IWK8
212 INTEGER, DIMENSION(:), ALLOCATABLE :: BURP
213 INTEGER, DIMENSION(:), ALLOCATABLE :: BUCP
214 INTEGER, DIMENSION(:), ALLOCATABLE :: BURS
215 INTEGER, DIMENSION(:), ALLOCATABLE :: BUCS
216 INTEGER BUREGISTRE(12)
217 INTEGER BUINTSZ, BURESZ, BUJOB
218 INTEGER BUMAXMN, M, SCMYID, SCNPROCS
219 REAL SCONEERR, SCINFERR
220C
221C Parameters arising from the structure
222C =====================================
223C
224 INTEGER, POINTER :: JOB
225* Control parameters: see description in SMUMPSID
226 REAL,DIMENSION(:),POINTER::RINFO, RINFOG
227 REAL,DIMENSION(:),POINTER:: CNTL
228 INTEGER,DIMENSION(:),POINTER:: INFOG, KEEP
229 INTEGER, DIMENSION(:), POINTER :: MYIRN_loc, MYJCN_loc
230 REAL, DIMENSION(:), POINTER :: MYA_loc
231 INTEGER, TARGET :: DUMMYIRN_loc(1), DUMMYJCN_loc(1)
232 REAL, TARGET :: DUMMYA_loc(1)
233 INTEGER,DIMENSION(:),POINTER::ICNTL
234 EXTERNAL mumps_get_pool_length
235 INTEGER MUMPS_GET_POOL_LENGTH
236 INTEGER(8) :: TOTAL_BYTES
237 INTEGER(8) :: I8TMP, LWK_USER_SUM8
238C
239C External references
240C ===================
241 INTEGER numroc
242 EXTERNAL numroc
243 INTEGER:: NWORKING
244 LOGICAL:: MEM_EFF_ALLOCATED
245 INTEGER :: TOTAL_MBYTES_UNDER_L0
246 INTEGER(8):: TOTAL_BYTES_UNDER_L0
247C Fwd in facto:
248 REAL, DIMENSION(:), POINTER :: RHS_MUMPS
249 LOGICAL :: RHS_MUMPS_ALLOCATED
250 INTEGER :: NB_ACTIVE_FRONTS_ESTIM
251 INTEGER :: NB_FRONTS_F_ESTIM
252C
253C
254 job=>id%JOB
255 rinfo=>id%RINFO
256 rinfog=>id%RINFOG
257 cntl=>id%CNTL
258 infog=>id%INFOG
259 keep=>id%KEEP
260 icntl=>id%ICNTL
261 IF (id%KEEP8(29) .NE. 0) THEN
262 myirn_loc=>id%IRN_loc
263 myjcn_loc=>id%JCN_loc
264 mya_loc=>id%A_loc
265 ELSE
266 myirn_loc=>dummyirn_loc
267 myjcn_loc=>dummyjcn_loc
268 mya_loc=>dummya_loc
269 ENDIF
270 n = id%N
271 eps = epsilon( zero )
272C TIMINGS: reset to 0
273 id%DKEEP(92)=0.0e0
274 id%DKEEP(93)=0.0e0
275 id%DKEEP(94)=0.0e0
276 id%DKEEP(97)=0.0e0
277 id%DKEEP(98)=0.0e0
278 id%DKEEP(56)=0.0e0
279C Count of MPI messages: reset to 0
280 id%KEEP(266)=0
281 id%KEEP(267)=0
282C MIN/MAX pivots reset to 0
283 id%DKEEP(19)=huge(0.0e0)
284 id%DKEEP(20)=huge(0.0e0)
285 id%DKEEP(21)=0.0e0
286C Number of symmetric swaps
287 id%KEEP8(80)=0_8
288C Largest increase of internal panel size
289 id%KEEP(425) =0
290C
291 print_maxavg = .NOT.(id%NSLAVES.EQ.1 .AND. keep(46).EQ.1)
292C Print per node informtation only in case ther are several
293C compute nodes (id%KEEP(412): #MPI procs on comupte node)
294 print_nodeinfo = print_maxavg .AND. id%NPROCS .NE. id%KEEP(412)
295C
296C BEGIN CASE OF ALLOCATED DATA FROM PREVIOUS CALLS
297C Data from factorization is now freed asap
298C id%S, id%IS
299 IF (id%KEEP8(24).EQ.0_8) THEN
300C -- deallocate only when not provided/allocated by the user
301 IF (associated(id%S)) THEN
302 DEALLOCATE(id%S)
303 id%KEEP8(23)=0_8
304 NULLIFY(id%S)
305 ENDIF
306 ENDIF
307 IF (associated(id%IS)) THEN
308 DEALLOCATE(id%IS)
309 NULLIFY(id%IS)
310 ENDIF
311C Free BLR factors, if any
312 CALL smumps_free_id_data_modules(id%FDM_F_ENCODING,
313 & id%BLRARRAY_ENCODING, id%KEEP8(1), id%KEEP(34))
314 IF (associated(id%root%RG2L_ROW))THEN
315 DEALLOCATE(id%root%RG2L_ROW)
316 NULLIFY(id%root%RG2L_ROW)
317 ENDIF
318 IF (associated(id%root%RG2L_COL))THEN
319 DEALLOCATE(id%root%RG2L_COL)
320 NULLIFY(id%root%RG2L_COL)
321 ENDIF
322 IF (associated( id%PTLUST_S )) THEN
323 DEALLOCATE(id%PTLUST_S)
324 NULLIFY(id%PTLUST_S)
325 ENDIF
326 IF (associated(id%PTRFAC)) THEN
327 DEALLOCATE(id%PTRFAC)
328 NULLIFY(id%PTRFAC)
329 END IF
330 IF (associated(id%RHSCOMP)) THEN
331 DEALLOCATE(id%RHSCOMP)
332 NULLIFY(id%RHSCOMP)
333 id%KEEP8(25)=0_8
334 ENDIF
335 IF (associated(id%POSINRHSCOMP_ROW)) THEN
336 DEALLOCATE(id%POSINRHSCOMP_ROW)
337 NULLIFY(id%POSINRHSCOMP_ROW)
338 ENDIF
339 IF (id%POSINRHSCOMP_COL_ALLOC) THEN
340 DEALLOCATE(id%POSINRHSCOMP_COL)
341 NULLIFY(id%POSINRHSCOMP_COL)
342 id%POSINRHSCOMP_COL_ALLOC = .false.
343 ENDIF
344C
345C END CASE OF ALLOCATED DATA FROM PREVIOUS CALLS
346C
347C Related to forward in facto functionality (referred to as "Fwd in facto")
348 NULLIFY(rhs_mumps)
349 rhs_mumps_allocated = .false.
350C -----------------------------------------------------------------------
351C Set WK_USER_PROVIDED to true when workspace WK_USER is provided by user
352C We can accept WK_USER to be provided on only one proc and
353C different values of WK_USER per processor
354C
355 IF (id%KEEP8(24).GT.0_8) THEN
356C We nullify S so that later when we test
357C if (associated(S) we can free space and reallocate it).
358 NULLIFY(id%S)
359 ENDIF
360C
361C -- KEEP8(24) can now then be reset safely
362 wk_user_provided = (id%LWK_USER.NE.0)
363 IF (wk_user_provided) THEN
364 IF (id%LWK_USER.GT.0) THEN
365 id%KEEP8(24) = int(id%LWK_USER,8)
366 ELSE
367 id%KEEP8(24) = -int(id%LWK_USER,8)* 1000000_8
368 ENDIF
369 ELSE
370 id%KEEP8(24) = 0_8
371 ENDIF
372C Compute sum of LWK_USER provided by user
373 lwk_user_sum8 = 0_8
374 CALL mpi_reduce ( id%KEEP8(24), lwk_user_sum8, 1, mpi_integer8,
375 & mpi_sum, master, id%COMM, ierr )
376C
377C KEEP8(26) might be modified
378C (element entry format)
379C but need be restore for
380C future factorisation
381C with different scaling option
382C
383 keep826_save = id%KEEP8(26)
384C In case of loop on factorization with
385C different scaling options, initialize
386C DKEEP(4:5) to 0.
387 id%DKEEP(4)=-1.0e0
388 id%DKEEP(5)=-1.0e0
389C Mapping information used during solve. In case of several facto+solve
390C it has to be recomputed. In case of several solves with the same
391C facto, it is not recomputed.
392 IF (associated(id%IPTR_WORKING)) THEN
393 DEALLOCATE(id%IPTR_WORKING)
394 NULLIFY(id%IPTR_WORKING)
395 END IF
396 IF (associated(id%WORKING)) THEN
397 DEALLOCATE(id%WORKING)
398 NULLIFY(id%WORKING)
399 END IF
400C
401C Units for printing
402C MP: diagnostics
403C LP: errors
404C
405 lp = icntl( 1 )
406 mp = icntl( 2 )
407 mpg = icntl( 3 )
408 lpok = ((lp.GT.0).AND.(id%ICNTL(4).GE.1))
409 prok = ((mp.GT.0).AND.(id%ICNTL(4).GE.2))
410 prokg = ( mpg .GT. 0 .and. id%MYID .eq. master )
411 prokg = (prokg.AND.(id%ICNTL(4).GE.2))
412 IF ( prok ) WRITE( mp, 130 )
413 IF ( prokg ) WRITE( mpg, 130 )
414C -------------------------------------
415C Depending on the type of parallelism,
416C the master can now (soon) potentially
417C have the role of a slave
418C -------------------------------------
419 i_am_slave = ( id%MYID .ne. master .OR.
420 & ( id%MYID .eq. master .AND.
421 & keep(46) .eq. 1 ) )
422C
423C Prepare work for out-of-core
424C
425 IF (id%MYID .EQ. master .AND. keep(201) .NE. -1) THEN
426C Note that if KEEP(201)=-1, then we have decided
427C at analysis phase that factors will not be stored
428C (neither in memory nor on disk). In that case,
429C ICNTL(22) is ignored.
430C -- ICNTL(22) must be set before facto phase
431C (=1 OOC on; =0 OOC off)
432C and cannot be changed for subsequent solve phases.
433 keep(201)=id%ICNTL(22)
434 IF (keep(201) .NE. 0) THEN
435# if defined(OLD_OOC_NOPANEL)
436 keep(201)=2
437# else
438 keep(201)=1
439# endif
440 ENDIF
441 ENDIF
442C ----------------------
443C Broadcast KEEP options
444C defined for facto:
445C ----------------------
446 CALL mpi_bcast( keep(12), 1, mpi_integer,
447 & master, id%COMM, ierr )
448 CALL mpi_bcast( keep(19), 1, mpi_integer,
449 & master, id%COMM, ierr )
450 CALL mpi_bcast( keep(21), 1, mpi_integer,
451 & master, id%COMM, ierr )
452 CALL mpi_bcast( keep(201), 1, mpi_integer,
453 & master, id%COMM, ierr )
454 CALL mpi_bcast( keep(459), 1, mpi_integer,
455 & master, id%COMM, ierr )
456 CALL mpi_bcast( keep(460), 1, mpi_integer,
457 & master, id%COMM, ierr )
458 IF ( keep(459) .GE. panel_tabsize ) THEN
459 IF ( lpok ) THEN
460 WRITE(lp,'(A,I4,A,I3)') " ** WARNING ** KEEP(459)=",keep(459),
461 & " too large, resetting to",panel_tabsize-1
462 ENDIF
463 keep(459) = panel_tabsize - 1
464 ENDIF
465 perlu = keep(12)
466 IF (id%MYID.EQ.master) THEN
467C KEEP(50) case
468C ==============
469C
470C KEEP(50) = 0 : matrix is unsymmetric
471C KEEP(50) /= 0 : matrix is symmetric
472C KEEP(50) = 1 : Ask L L^T on the root. Matrix is PSD.
473C KEEP(50) = 2 : Ask for L U on the root
474C KEEP(50) = 3 ... L D L^T ??
475C
476 cntl1 = id%CNTL(1)
477C ---------------------------------------
478C For symmetric (non general) matrices
479C set (directly) CNTL1 = 0.0
480C ---------------------------------------
481 keep(17)=0
482 IF ( keep(50) .eq. 1 ) THEN
483 IF (cntl1 .ne. zero ) THEN
484 IF ( prokg ) THEN
485 WRITE(mpg,'(A)')
486 & '** Warning : SPD solver called, resetting CNTL(1) to 0.0E0'
487 END IF
488 END IF
489 cntl1 = zero
490 END IF
491C CNTL1 threshold value must be between
492C 0.0 and 1.0 (for SYM=0) and 0.5 (for SYM=1,2)
493 IF (cntl1.GT.one) cntl1=one
494 IF (cntl1.LT.zero) cntl1=zero
495 IF (keep(50).NE.0.AND.cntl1.GT.0.5e0) THEN
496 cntl1 = 0.5e0
497 ENDIF
498 parpiv_t1 = id%KEEP(268)
499 IF (parpiv_t1.EQ.77) THEN
500 parpiv_t1 = 0
501#if defined(__ve__)
502 parpiv_t1 = -2
503#endif
504 ENDIF
505 IF (parpiv_t1.EQ.-3) THEN
506 parpiv_t1 = 0
507 ENDIF
508 IF ((parpiv_t1.LT.-3).OR.(parpiv_t1.GT.1)) THEN
509C out of range values
510 parpiv_t1 =0
511 ENDIF
512C note that KEEP(50).EQ.1 => CNTL1=0.0
513 IF (cntl1.EQ.0.0.OR.(keep(50).eq.1)) parpiv_t1 = 0
514C
515 IF (parpiv_t1.EQ.-2) THEN
516 IF (keep(19).NE.0) THEN
517C switch off PARPIV_T1 if RR activated
518C but do NOT switch off PARPIV_1 with null pivot detection
519 parpiv_t1 = 0
520 ENDIF
521 ENDIF
522 id%KEEP(269) = parpiv_t1
523 ENDIF
524 CALL mpi_bcast(cntl1, 1, mpi_real,
525 & master, id%COMM, ierr)
526 CALL mpi_bcast( keep(269), 1, mpi_integer,
527 & master, id%COMM, ierr )
528 IF (id%MYID.EQ.master) THEN
529C -----------------------------------------------------
530C Decoding of ICNTL(35) for factorization: same as
531C at analysis except that we store a copy of ICNTL(35)
532C in KEEP(486) instead of KEEP(494) and need to check
533C compatibility of KEEP(486) and KEEP(494): If LR was
534C not activated during analysis, it cannot be activated
535C at factorization.
536C ------------------------------------------------------
537 id%KEEP(486) = id%ICNTL(35)
538 IF (id%KEEP(486).EQ.1) THEN
539C -- Automatic BLR option setting
540 id%KEEP(486)= 2
541 ENDIF
542 IF ( id%KEEP(486).EQ.4) id%KEEP(486)=0
543 IF ((id%KEEP(486).LT.0).OR.(id%KEEP(486).GT.4)) THEN
544C Out of range values treated as 0
545 id%KEEP(486) = 0
546 ENDIF
547 IF ((keep(486).NE.0).AND.(keep(494).EQ.0)) THEN
548C To activate BLR during factorization,
549C ICNTL(35) must have been set at analysis.
550 IF (lpok) THEN
551 WRITE(lp,'(A)')
552 & " *** Error with BLR setting "
553 WRITE(lp,'(A)') " *** BLR was not activated during ",
554 & " analysis but is requested during factorization."
555 ENDIF
556 id%INFO(1)=-54
557 id%INFO(2)=0
558 GOTO 105
559 ENDIF
560 keep464copy = id%ICNTL(38)
561 IF (keep464copy.LT.0.OR.keep464copy.GT.1000) THEN
562C Out of range values treated as 1000
563 keep464copy = 1000
564 ENDIF
565 IF (id%KEEP(461).LT.1) THEN
566 id%KEEP(461) = 10
567 ENDIF
568 keep465copy=0
569 IF (id%ICNTL(36).EQ.1.OR.id%ICNTL(36).EQ.3) THEN
570 IF (cntl1.EQ.zero .OR. keep(468).LE.1) THEN
571 keep(475) = 3
572 ELSE IF ( (keep(269).GT.0).OR. (keep(269).EQ.-2)) THEN
573 keep(475) = 2
574 ELSE IF (keep(468).EQ.2) THEN
575 keep(475) = 2
576 ELSE
577 keep(475) = 1
578 ENDIF
579 ELSE
580 keep(475) = 0
581 ENDIF
582 keep(481)=0
583 IF (id%ICNTL(36).LT.0 .OR. id%ICNTL(36).GE.2) THEN
584C Only options 1 and 2 are allowed
585 keep(475) = 0
586 ENDIF
587C K489 is set according to ICNTL(37)
588 IF (id%ICNTL(37).EQ.0.OR.id%ICNTL(37).EQ.1) THEN
589 keep(489) = id%ICNTL(37)
590 ELSE
591C Other values treated as zero
592 keep(489) = 0
593 ENDIF
594 IF (keep(79).GE.1) THEN
595C CompressCB incompatible with type4,5,6 nodes
596 keep(489)=0
597 ENDIF
598 keep(489)=0
599C id%KEEP(476) \in [1,100]
600 IF ((id%KEEP(476).GT.100).OR.(id%KEEP(476).LT.1)) THEN
601 id%KEEP(476)= 50
602 ENDIF
603C id%KEEP(477) \in [1,100]
604 IF ((id%KEEP(477).GT.100).OR.(id%KEEP(477).LT.1)) THEN
605 id%KEEP(477)= 100
606 ENDIF
607C id%KEEP(483) \in [1,100]
608 IF ((id%KEEP(483).GT.100).OR.(id%KEEP(483).LT.1)) THEN
609 id%KEEP(483)= 50
610 ENDIF
611C id%KEEP(484) \in [1,100]
612 IF ((id%KEEP(484).GT.100).OR.(id%KEEP(484).LT.1)) THEN
613 id%KEEP(484)= 50
614 ENDIF
615C id%KEEP(480)=0,2,3,4,5,6
616 IF ((id%KEEP(480).GT.6).OR.(id%KEEP(480).LT.0)
617 & .OR.(id%KEEP(480).EQ.1)) THEN
618 id%KEEP(480)=0
619 ENDIF
620C id%KEEP(473)=0 or 1
621 IF ((id%KEEP(473).NE.0).AND.(id%KEEP(473).NE.1)) THEN
622 id%KEEP(473)=0
623 ENDIF
624C id%KEEP(474)=0,1,2,3
625 IF ((id%KEEP(474).GT.3).OR.(id%KEEP(474).LT.0)) THEN
626 id%KEEP(474)=0
627 ENDIF
628C id%KEEP(479)>0
629 IF (id%KEEP(479).LE.0) THEN
630 id%KEEP(479)=1
631 ENDIF
632 IF (id%KEEP(474).NE.0.AND.id%KEEP(480).EQ.0) THEN
633 id%KEEP(474) = 0
634 ENDIF
635 IF (id%KEEP(478).NE.0.AND.id%KEEP(480).LT.4) THEN
636 id%KEEP(478) = 0
637 ENDIF
638 IF (id%KEEP(480).GE.5 .OR.
639 & (id%KEEP(480).NE.0.AND.id%KEEP(474).EQ.3)) THEN
640 IF (id%KEEP(475).LT.2) THEN
641C Reset to 3 if 5 or to 4 if 6
642 id%KEEP(480) = id%KEEP(480) - 2
643 write(*,*) ' Resetting KEEP(480) to ', id%KEEP(480)
644 ENDIF
645 ENDIF
646 105 CONTINUE
647 ENDIF ! id%MYID .EQ. MASTER
648 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
649 & id%COMM, id%MYID )
650C
651 IF (id%INFO(1).LT.0) GOTO 530
652 CALL mpi_bcast( keep(473), 14, mpi_integer,
653 & master, id%COMM, ierr )
654 IF (keep(486).NE.0) THEN
655 CALL mpi_bcast( keep(489), 1, mpi_integer,
656 & master, id%COMM, ierr )
657 CALL mpi_bcast( keep464copy, 1, mpi_integer,
658 & master, id%COMM, ierr )
659 CALL mpi_bcast( keep465copy, 1, mpi_integer,
660 & master, id%COMM, ierr )
661 ENDIF
662 IF (id%MYID.EQ.master) THEN
663 IF (keep(217).GT.2.OR.keep(217).LT.0) THEN
664 keep(217)=0
665 ENDIF
666 keep(214)=keep(217)
667 IF (keep(214).EQ.0) THEN
668 IF (keep(201).NE.0) THEN ! OOC or no factors
669 keep(214)=1
670 ELSE
671 keep(214)=2
672 ENDIF
673 IF (keep(486).EQ.2) THEN
674 keep(214)=1
675 ENDIF
676 ENDIF
677 ENDIF
678 CALL mpi_bcast( keep(214), 1, mpi_integer,
679 & master, id%COMM, ierr )
680 IF (keep(201).NE.0) THEN
681C -- Low Level I/O strategy
682 CALL mpi_bcast( keep(99), 1, mpi_integer,
683 & master, id%COMM, ierr )
684 CALL mpi_bcast( keep(205), 1, mpi_integer,
685 & master, id%COMM, ierr )
686 CALL mpi_bcast( keep(211), 1, mpi_integer,
687 & master, id%COMM, ierr )
688 ENDIF
689C Fwd in facto: explicitly forbid
690C sparse RHS and A-1 computation
691 IF (id%KEEP(252).EQ.1 .AND. id%MYID.EQ.master) THEN
692 IF (id%ICNTL(20).EQ.1) THEN ! out-of-range => 0
693C NB: in doc ICNTL(20) only accessed during solve
694C In practice, will have failed earlier if RHS not allocated.
695C Still it looks safer to keep this test.
696 id%INFO(1)=-43
697 id%INFO(2)=20
698 IF (lpok) WRITE(lp,'(A)')
699 & ' ERROR: Sparse RHS is incompatible with forward',
700 & ' performed during factorization (ICNTL(32)=1)'
701 ELSE IF (id%ICNTL(30).NE.0) THEN ! out-of-range => 1
702 id%INFO(1)=-43
703 id%INFO(2)=30
704 IF (lpok) WRITE(lp,'(A)')
705 & ' ERROR: A-1 functionality incompatible with forward',
706 & ' performed during factorization (ICNTL(32)=1)'
707 ELSE IF (id%ICNTL(9) .NE. 1) THEN
708 id%INFO(1)=-43
709 id%INFO(2)=9
710 IF (lpok) WRITE(lp,'(A)')
711 & .NE.' ERROR: Transpose system (ICNTL(9)0) not ',
712 & ' compatible with forward performed during',
713 & ' factorization (ICNTL(32)=1)'
714 ENDIF
715 ENDIF
716 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
717 & id%COMM, id%MYID )
718C
719 IF (id%INFO(1).LT.0) GOTO 530
720C
721C The memory allowed is given by ICNTL(23) in Mbytes
722C 0 means that nothing is provided.
723C Save memory available, ICNTL(23) in KEEP8(4)
724C
725 IF ( icntl(23) .GT. 0 ) THEN
726 itmp = 1
727 ELSE
728 itmp = 0
729 ENDIF
730 CALL mpi_allreduce( itmp, jtmp, 1, mpi_integer,
731 & mpi_sum, id%COMM, ierr)
732 IF ( id%MYID.EQ.master ) THEN
733C Negative values considered 0
734 itmp = max(icntl(23),0)
735 END IF
736 CALL mpi_bcast( itmp, 1, mpi_integer,
737 & master, id%COMM, ierr )
738C JTMP: nb of procs with nonzero ICNTL(23)
739C ITMP: value of ICNTL(23) on master
740 IF ( itmp .GT. 0 .AND. jtmp .EQ. 1 ) THEN
741C ICNTL(23)>0 only on master
742 ELSE
743C Local values of ICNTL(23) are used, note that
744C they could all be zeros
745 itmp = icntl(23)
746 ENDIF
747C
748 itmp8 = int(itmp, 8)
749 id%KEEP8(4) = itmp8 * 1000000_8 ! convert to nb of bytes
750C Compute \sum of memories allowed
751 CALL mpi_reduce( id%KEEP8(4), itmp8, 1, mpi_integer8,
752 & mpi_sum, master, id%COMM, ierr )
753 itmp8 = itmp8 / 1000000_8 ! Use to print \sum_{ICNTL(23)}
754 IF ( prokg ) THEN
755 nworking = id%NSLAVES
756 WRITE( mpg, 172 ) nworking, id%ICNTL(22), keep(486),
757 & keep(12),
758 & id%KEEP8(111), keep(126), keep(127), keep(28),
759 & id%KEEP8(4)/1000000_8, itmp8, lwk_user_sum8, cntl1
760 IF (keep(252).GT.0)
761 & WRITE(mpg,173) keep(253)
762 IF (keep(269).NE.0)
763 & WRITE(mpg,174) keep(269)
764 ENDIF
765 IF (keep(201).LE.0) THEN
766C In-core version or no factors
767 keep(ixsz)=xsize_ic
768 ELSE IF (keep(201).EQ.2) THEN
769C OOC version, no panels
770 keep(ixsz)=xsize_ooc_nopanel
771 ELSE IF (keep(201).EQ.1) THEN
772C Panel versions:
773 IF (keep(50).EQ.0) THEN
774 keep(ixsz)=xsize_ooc_unsym
775 ELSE
776 keep(ixsz)=xsize_ooc_sym
777 ENDIF
778 ENDIF
779 IF ( keep(486) .NE. 0 ) THEN !LR is activated
780C Stats initialization for LR
781 CALL init_stats_global(id)
782 END IF
783C
784* **********************************
785* Begin intializations regarding the
786* computation of the determinant
787* **********************************
788 IF (id%MYID.EQ.master) keep(258)=icntl(33)
789 CALL mpi_bcast(keep(258), 1, mpi_integer,
790 & master, id%COMM, ierr)
791 IF (keep(258) .NE. 0) THEN
792 keep(259) = 0 ! Initial exponent of the local determinant
793 keep(260) = 1 ! Number of permutations
794 id%DKEEP(6) = 1.0e0 ! real part of the local determinant
795 ENDIF
796* ********************************
797* End intializations regarding the
798* computation of the determinant
799* ********************************
800C
801* **********************
802* Begin of Scaling phase
803* **********************
804C
805C SCALING MANAGEMENT
806C * Options 1, 3, 4 centralized only
807C
808C * Options 7, 8 : also works for distributed matrix
809C
810C At this point, we have the scaling arrays allocated
811C on the master. They have been allocated on the master
812C inside the main MUMPS driver.
813C
814 CALL mpi_bcast(keep(52), 1, mpi_integer,
815 & master, id%COMM, ierr)
816 lscal = ((keep(52) .GT. 0) .AND. (keep(52) .LE. 8))
817 IF (lscal) THEN
818C
819 IF ( id%MYID.EQ.master ) THEN
820 CALL mumps_secdeb(timeet)
821 ENDIF
822C -----------------------
823C Retrieve parameters for
824C simultaneous scaling
825C -----------------------
826 IF (keep(52) .EQ. 7) THEN
827C -- Cheap setting of SIMSCALING (it is the default in 4.8.4)
828 k231= keep(231)
829 k232= keep(232)
830 k233= keep(233)
831 ELSEIF (keep(52) .EQ. 8) THEN
832C -- More expensive setting of SIMSCALING (it was the default in 4.8.1,2,3)
833 k231= keep(239)
834 k232= keep(240)
835 k233= keep(241)
836 ENDIF
837 CALL mpi_bcast(id%DKEEP(3),1,mpi_real,master,
838 & id%COMM,ierr)
839C
840 IF ( ((keep(52).EQ.7).OR.(keep(52).EQ.8)) .AND.
841 & keep(54).NE.0 ) THEN
842C ------------------------------
843C Scaling for distributed matrix
844C We need to allocate scaling
845C arrays on all processors, not
846C only the master.
847C ------------------------------
848 IF ( id%MYID .NE. master ) THEN
849 IF ( associated(id%COLSCA))
850 & DEALLOCATE( id%COLSCA )
851 IF ( associated(id%ROWSCA))
852 & DEALLOCATE( id%ROWSCA )
853 ALLOCATE( id%COLSCA(n), stat=ierr)
854 IF (ierr .GT.0) THEN
855 id%INFO(1)=-13
856 id%INFO(2)=n
857 ENDIF
858 ALLOCATE( id%ROWSCA(n), stat=ierr)
859 IF (ierr .GT.0) THEN
860 id%INFO(1)=-13
861 id%INFO(2)=n
862 ENDIF
863 ENDIF
864 m = n
865 bumaxmn=m
866 IF(n > bumaxmn) bumaxmn = n
867 liwk = 4*bumaxmn
868 ALLOCATE (iwk(liwk),burp(m),bucp(n),
869 & burs(2* (id%NPROCS)),bucs(2* (id%NPROCS)),
870 & stat=allocok)
871 IF (allocok > 0) THEN
872 id%INFO(1)=-13
873 id%INFO(2)=liwk+m+n+4* (id%NPROCS)
874 ENDIF
875C --- Propagate enventual error
876 CALL mumps_propinfo( icntl(1), id%INFO(1),
877 & id%COMM, id%MYID )
878 IF (id%INFO(1).LT.0) GOTO 517
879C -- estimation of memory and construction of partvecs
880 bujob = 1
881C -- LWK not used
882 lwk_real = 1
883 ALLOCATE(wk_real(lwk_real),
884 & stat=allocok)
885 IF (allocok > 0) THEN
886 id%INFO(1)=-13
887 id%INFO(2)=lwk_real
888 ENDIF
889C --- Propagate enventual error
890 CALL mumps_propinfo( icntl(1), id%INFO(1),
891 & id%COMM, id%MYID )
892 IF (id%INFO(1).LT.0) GOTO 517
894 & myirn_loc(1), myjcn_loc(1), mya_loc(1),
895 & id%KEEP8(29),
896 & m, n, id%NPROCS, id%MYID, id%COMM,
897 & burp, bucp,
898 & burs, bucs, buregistre,
899 & iwk, liwk,
900 & buintsz, buresz, bujob,
901 & id%ROWSCA(1), id%COLSCA(1), wk_real, lwk_real,
902 & id%KEEP(50),
903 & k231, k232, k233,
904 & id%DKEEP(3),
905 & sconeerr, scinferr)
906 IF(liwk < buintsz) THEN
907 DEALLOCATE(iwk)
908 liwk = buintsz
909 ALLOCATE(iwk(liwk), stat=allocok)
910 IF (allocok > 0) THEN
911 id%INFO(1)=-13
912 id%INFO(2)=liwk
913 ENDIF
914 ENDIF
915 lwk_real = buresz
916 DEALLOCATE(wk_real)
917 ALLOCATE (wk_real(lwk_real), stat=allocok)
918 IF (allocok > 0) THEN
919 id%INFO(1)=-13
920 id%INFO(2)=lwk_real
921 ENDIF
922C --- Propagate enventual error
923 CALL mumps_propinfo( icntl(1), id%INFO(1),
924 & id%COMM, id%MYID )
925 IF (id%INFO(1).LT.0) GOTO 517
926C -- estimation of memory and construction of partvecs
927 bujob = 2
929 & myirn_loc(1), myjcn_loc(1), mya_loc(1),
930 & id%KEEP8(29),
931 & m, n, id%NPROCS, id%MYID, id%COMM,
932 & burp, bucp,
933 & burs, bucs, buregistre,
934 & iwk, liwk,
935 & buintsz, buresz, bujob,
936 & id%ROWSCA(1), id%COLSCA(1), wk_real, lwk_real,
937 & id%KEEP(50),
938 & k231, k232, k233,
939 & id%DKEEP(3),
940 & sconeerr, scinferr)
941 id%DKEEP(4) = sconeerr
942 id%DKEEP(5) = scinferr
943CXXXX
944 DEALLOCATE(iwk, wk_real,burp,bucp,burs, bucs)
945 ELSE IF ( keep(54) .EQ. 0 ) THEN
946C ------------------
947C Centralized matrix
948C ------------------
949 IF ((keep(52).EQ.7).OR.(keep(52).EQ.8)) THEN
950C -------------------------------
951C Create a communicator of size 1
952C -------------------------------
953 IF (id%MYID.EQ.master) THEN
954 colour = 0
955 ELSE
956 colour = mpi_undefined
957 ENDIF
958 CALL mpi_comm_split( id%COMM, colour, 0,
959 & comm_for_scaling, ierr )
960 IF (id%MYID.EQ.master) THEN
961 m = n
962 bumaxmn=n
963 IF(n > bumaxmn) bumaxmn = n
964 liwk = 1
965 ALLOCATE(iwk(liwk),burp(1),bucp(1),
966 & burs(1),bucs(1),
967 & stat=allocok)
968 IF (allocok > 0) THEN
969 id%INFO(1)=-13
970 id%INFO(2)=liwk+1+1+1+1
971 GOTO 400
972 ENDIF
973 lwk_real = m + n
974 ALLOCATE (wk_real(lwk_real), stat=allocok)
975 IF (allocok > 0) THEN
976 id%INFO(1)=-13
977 id%INFO(2)=lwk_real
978 GOTO 400
979 ENDIF
980 CALL mpi_comm_rank(comm_for_scaling, scmyid, ierr)
981 CALL mpi_comm_size(comm_for_scaling, scnprocs, ierr)
982 bujob = 1
984 & id%IRN(1), id%JCN(1), id%A(1),
985 & id%KEEP8(28),
986 & m, n, scnprocs, scmyid, comm_for_scaling,
987 & burp, bucp,
988 & burs, bucs, buregistre,
989 & iwk, liwk,
990 & buintsz, buresz, bujob,
991 & id%ROWSCA(1), id%COLSCA(1), wk_real, lwk_real,
992 & id%KEEP(50),
993 & k231, k232, k233,
994 & id%DKEEP(3),
995 & sconeerr, scinferr)
996 IF(lwk_real < buresz) THEN
997 id%INFO(1) = -136
998 GOTO 400
999 ENDIF
1000 bujob = 2
1001 CALL smumps_simscaleabs(id%IRN(1),
1002 & id%JCN(1), id%A(1),
1003 & id%KEEP8(28),
1004 & m, n, scnprocs, scmyid, comm_for_scaling,
1005 & burp, bucp,
1006 & burs, bucs, buregistre,
1007 & iwk, liwk,
1008 & buintsz, buresz, bujob,
1009 & id%ROWSCA(1), id%COLSCA(1), wk_real, lwk_real,
1010 & id%KEEP(50),
1011 & k231, k232, k233,
1012 & id%DKEEP(3),
1013 & sconeerr, scinferr)
1014 id%DKEEP(4) = sconeerr
1015 id%DKEEP(5) = scinferr
1016 400 CONTINUE
1017 IF (allocated(wk_real)) DEALLOCATE(wk_real)
1018 IF (allocated(iwk)) DEALLOCATE(iwk)
1019 IF (allocated(burp)) DEALLOCATE(burp)
1020 IF (allocated(bucp)) DEALLOCATE(bucp)
1021 IF (allocated(burs)) DEALLOCATE(burs)
1022 IF (allocated(bucs)) DEALLOCATE(bucs)
1023 ENDIF
1024C Centralized matrix: make DKEEP(4:5) available to all processors
1025 CALL mpi_bcast( id%DKEEP(4),2,mpi_real,
1026 & master, id%COMM, ierr )
1027 IF (id%MYID.EQ.master) THEN
1028C Communicator should only be
1029C freed on the master process
1030 CALL mpi_comm_free(comm_for_scaling, ierr)
1031 ENDIF
1032 CALL mumps_propinfo(icntl(1), id%INFO(1),
1033 & id%COMM, id%MYID)
1034 IF (id%INFO(1).LT.0) GOTO 517
1035 ELSE IF (id%MYID.EQ.master) THEN
1036C ----------------------------------
1037C Centralized scaling, options 1 to 6
1038C ----------------------------------
1039 IF (keep(52).GT.0 .AND. keep(52).LE.6) THEN
1040C ---------------------
1041C Allocate temporary
1042C workspace for scaling
1043C ---------------------
1044 IF ( keep(52) .eq. 5 .or.
1045 & keep(52) .eq. 6 ) THEN
1046C We have an explicit copy of the original
1047C matrix in complex format which should probably
1048C be avoided (but do we want to keep all
1049C those old scaling options ?)
1050 lwk = id%KEEP8(28)
1051 ELSE
1052 lwk = 1_8
1053 END IF
1054 lwk_real = 5 * n
1055 ALLOCATE( wk_real( lwk_real ), stat = ierr )
1056 IF ( ierr .GT. 0 ) THEN
1057 id%INFO(1) = -13
1058 id%INFO(2) = lwk_real
1059 GOTO 137
1060 END IF
1061 ALLOCATE( wk( lwk ), stat = ierr )
1062 IF ( ierr .GT. 0 ) THEN
1063 id%INFO(1) = -13
1064 CALL mumps_set_ierror(lwk, id%INFO(2))
1065 GOTO 137
1066 END IF
1067 CALL smumps_fac_a(n, id%KEEP8(28), keep(52), id%A(1),
1068 & id%IRN(1), id%JCN(1),
1069 & id%COLSCA(1), id%ROWSCA(1),
1070 & wk, lwk, wk_real, lwk_real, icntl(1), id%INFO(1) )
1071 DEALLOCATE( wk_real )
1072 DEALLOCATE( wk )
1073 ENDIF
1074 ENDIF
1075 ENDIF ! Scaling distributed matrices or centralized
1076 IF (keep(125).NE.0) THEN
1077C ------------------------
1078C If we enable the scaling of the |A11 A12| block
1079C we et to 1 the scaling corresponding to the Schur
1080C complement matrix A22
1081C ------------------------
1082 IF ((keep(60).GT.0) .and. (keep(116).GT.0)) THEN
1083C Schur is active, reset Schur entries to ONE
1084 IF ( ((keep(52).EQ.7).OR.(keep(52).EQ.8)) .AND.
1085 & keep(54).NE.0 ) THEN
1086C Scaling available on all procs
1087 DO i=1, n
1088 IF (id%SYM_PERM(i).GT.id%N-keep(116)) THEN
1089 id%COLSCA(i) = one
1090 id%ROWSCA(i) = one
1091 ENDIF
1092 ENDDO
1093 ELSE IF ( id%MYID .EQ. master) THEN
1094C Scaling available on master
1095 DO i=1, n
1096 IF (id%SYM_PERM(i).GT.id%N-keep(116)) THEN
1097 id%COLSCA(i) = one
1098 id%ROWSCA(i) = one
1099 ENDIF
1100 ENDDO
1101 ENDIF
1102 ENDIF
1103 ENDIF
1104 IF (id%MYID.EQ.master) THEN
1105 CALL mumps_secfin(timeet)
1106 id%DKEEP(92)=real(timeet)
1107C Print inf-norm after last KEEP(233) iterations of
1108C scaling option KEEP(52)=7 or 8 (SimScale)
1109C
1110 IF (prokg.AND.(keep(52).EQ.7.OR.keep(52).EQ.8)
1111 & .AND. (k233+k231+k232).GT.0) THEN
1112 IF (k232.GT.0) WRITE(mpg, 166) id%DKEEP(4)
1113 ENDIF
1114 ENDIF
1115 ENDIF ! LSCAL
1116C
1117C scaling might also be provided by the user
1118 lscal = (lscal .OR. (keep(52) .EQ. -1) .OR. keep(52) .EQ. -2)
1119 IF (lscal .AND. keep(258).NE.0 .AND. id%MYID .EQ. master) THEN
1120 DO i = 1, id%N
1121 CALL smumps_updatedeter_scaling(id%ROWSCA(i),
1122 & id%DKEEP(6), ! determinant
1123 & keep(259)) ! exponent of the determinant
1124 ENDDO
1125 IF (keep(50) .EQ. 0) THEN ! unsymmetric
1126 DO i = 1, id%N
1127 CALL smumps_updatedeter_scaling(id%COLSCA(i),
1128 & id%DKEEP(6), ! determinant
1129 & keep(259)) ! exponent of the determinant
1130 ENDDO
1131 ELSE
1132C -----------------------------------------
1133C In this case COLSCA = ROWSCA
1134C Since determinant was initialized to 1,
1135C compute square of the current determinant
1136C rather than going through COLSCA.
1137C -----------------------------------------
1138 CALL smumps_deter_square(id%DKEEP(6), keep(259))
1139 ENDIF
1140C Now we should have taken the
1141C inverse of the scaling vectors
1142 CALL smumps_deter_scaling_inverse(id%DKEEP(6), keep(259))
1143 ENDIF
1144C
1145C ********************
1146C End of Scaling phase
1147C At this point: either (matrix is distributed and KEEP(52)=7 or 8)
1148C in which case scaling arrays are allocated on all processors,
1149C or scaling arrays are only on the host processor.
1150C In case of distributed matrix input, we will free the scaling
1151C arrays on procs with MYID .NE. 0 after the all-to-all distribution
1152C of the original matrix.
1153C ********************
1154C
1155 137 CONTINUE
1156C Fwd in facto: in case of repeated factorizations
1157C with different Schur options we prefer to free
1158C systematically this array now than waiting for
1159C the root node. We rely on the fact that it is
1160C allocated or not during the solve phase so if
1161C it was allocated in a 1st call to facto and not
1162C in a second, we don't want the solve to think
1163C it was allocated in the second call.
1164 IF (associated(id%root%RHS_CNTR_MASTER_ROOT)) THEN
1165 DEALLOCATE (id%root%RHS_CNTR_MASTER_ROOT)
1166 NULLIFY (id%root%RHS_CNTR_MASTER_ROOT)
1167 ENDIF
1168C Fwd in facto: check that id%NRHS has not changed
1169 IF ( id%MYID.EQ.master.AND. keep(252).EQ.1 .AND.
1170 & id%NRHS .NE. id%KEEP(253) ) THEN
1171C Error: NRHS should not have
1172C changed since the analysis
1173 id%INFO(1)=-42
1174 id%INFO(2)=id%KEEP(253)
1175 ENDIF
1176C Fwd in facto: allocate and broadcast RHS_MUMPS
1177C to make it available on all processors.
1178 IF (id%KEEP(252) .EQ. 1) THEN
1179 IF ( id%MYID.NE.master ) THEN
1180 id%KEEP(254) = n ! Leading dimension
1181 id%KEEP(255) = n*id%KEEP(253) ! Tot size
1182 ALLOCATE(rhs_mumps(id%KEEP(255)),stat=ierr)
1183 IF (ierr > 0) THEN
1184 id%INFO(1)=-13
1185 id%INFO(2)=id%KEEP(255)
1186 IF (lpok)
1187 & WRITE(lp,*) 'ERROR while allocating RHS on a slave'
1188 NULLIFY(rhs_mumps)
1189 ENDIF
1190 rhs_mumps_allocated = .true.
1191 ELSE
1192C Case of non working master
1193 id%KEEP(254)=id%LRHS ! Leading dimension
1194 id%KEEP(255)=id%LRHS*(id%KEEP(253)-1)+id%N ! tot size
1195 rhs_mumps=>id%RHS
1196 rhs_mumps_allocated = .false.
1197 IF (lscal) THEN
1198C Scale before broadcast: apply row
1199C scaling (remark that we assume no
1200C transpose).
1201 DO k=1, id%KEEP(253)
1202 DO i=1, n
1203 rhs_mumps( id%KEEP(254) * (k-1) + i )
1204 & = rhs_mumps( id%KEEP(254) * (k-1) + i )
1205 & * id%ROWSCA(i)
1206 ENDDO
1207 ENDDO
1208 ENDIF
1209 ENDIF
1210 ELSE
1211 id%KEEP(255)=1
1212 ALLOCATE(rhs_mumps(1),stat=ierr)
1213 IF (ierr > 0) THEN
1214 id%INFO(1)=-13
1215 id%INFO(2)=1
1216 IF (lpok)
1217 & WRITE(lp,*) 'ERREUR while allocating RHS on a slave'
1218 NULLIFY(rhs_mumps)
1219 ENDIF
1220 rhs_mumps_allocated = .true.
1221 ENDIF
1222 CALL mumps_propinfo( icntl(1), id%INFO(1),
1223 & id%COMM, id%MYID )
1224 IF ( id%INFO(1).lt.0 ) GOTO 517
1225 IF (keep(252) .EQ. 1) THEN
1226C
1227C Broadcast the columns of the right-hand side
1228C one by one. Leading dimension is keep(254)=N
1229C on procs with MYID > 0 but may be larger on
1230C the master processor.
1231 DO i= 1, id%KEEP(253)
1232 CALL mpi_bcast(rhs_mumps((i-1)*id%KEEP(254)+1), n,
1233 & mpi_real, master,id%COMM,ierr)
1234 END DO
1235 ENDIF
1236C Keep a copy of ICNTL(24) and make it
1237C available on all working processors.
1238 keep(110)=id%ICNTL(24)
1239 CALL mpi_bcast(keep(110), 1, mpi_integer,
1240 & master, id%COMM, ierr)
1241C KEEP(110) defaults to 0 for out of range values
1242 IF (keep(110).NE.1) keep(110)=0
1243 IF (keep(219).NE.0) THEN
1244 CALL smumps_buf_max_array_minsize(max(keep(108),1),ierr)
1245 IF (ierr .NE. 0) THEN
1246C ------------------------
1247C Error allocating SMUMPS_BUF
1248C ------------------------
1249 id%INFO(1) = -13
1250 id%INFO(2) = max(keep(108),1)
1251 END IF
1252 ENDIF
1253C -----------------------------------------------
1254C Depending on the option used for
1255C -detecting null pivots (ICNTL(24)/KEEP(110))
1256C CNTL(3) is used to set DKEEP(1)
1257C ( A row is considered as null if ||row|| < DKEEP(1) )
1258C CNTL(5) is then used to define if a large
1259C value is set on the diagonal or if a 1 is set
1260C and other values in the row are reset to zeros.
1261C SEUIL* corresponds to the minimum required
1262C absolute value of pivot.
1263C SEUIL_LDLT_NIV2 is used only in the
1264C case of SYM=2 within a niv2 node for which
1265C we have only a partial view of the fully summed rows.
1266 IF (id%MYID .EQ. master) cntl3 = id%CNTL(3)
1267 CALL mpi_bcast(cntl3, 1, mpi_real,
1268 & master, id%COMM, ierr)
1269 IF (id%MYID .EQ. master) cntl5 = id%CNTL(5)
1270 CALL mpi_bcast(cntl5, 1, mpi_real,
1271 & master, id%COMM, ierr)
1272 IF (id%MYID .EQ. master) cntl6 = id%CNTL(6)
1273 CALL mpi_bcast(cntl6, 1, mpi_real,
1274 & master, id%COMM, ierr)
1275 IF (id%MYID .EQ. master) id%DKEEP(8) = id%CNTL(7)
1276 CALL mpi_bcast(id%DKEEP(8), 1, mpi_real,
1277 & master, id%COMM, ierr)
1278 id%DKEEP(11) = id%DKEEP(8)/id%KEEP(461)
1279 id%DKEEP(12) = id%DKEEP(8)/id%KEEP(462)
1280 IF (keep(486).EQ.0) id%DKEEP(8) = zero
1281 compute_anorminf = .false.
1282 IF ( (keep(486) .NE. 0).AND. (id%DKEEP(8).LT.zero)) THEN
1283 compute_anorminf = .true.
1284 ENDIF
1285 IF (keep(19).NE.0) THEN
1286C Rank revealing factorisation
1287 compute_anorminf = .true.
1288 ENDIF
1289 IF (keep(110).NE.0) THEN
1290C Null pivot detection
1291 compute_anorminf = .true.
1292 ENDIF
1293 IF (id%DKEEP(8).LT.zero) THEN
1294C Experimental setting of CNTL(7)
1295 IF (compute_anorminf) THEN
1296 eff_size_schur = 0
1297 CALL smumps_anorminf( id , anorminf, lscal, eff_size_schur )
1298C If no schur ANORMINF fine for other cases
1299 ELSE
1300 anorminf = zero
1301 ENDIF
1302 id%DKEEP(8) = abs(id%DKEEP(8))*anorminf
1303C ANORMINF need be recomputed in case of schur
1304 IF ((keep(60).GT.0).AND.keep(116).GT.0) anorminf=zero
1305 ENDIF
1306C -------------------------------------------------------
1307C We compute ANORMINF, when needed, based on
1308C the infinite norm of Rowsca *A*Colsca
1309C and make it available on all working processes.
1310 IF (compute_anorminf) THEN
1311 eff_size_schur = 0
1312 IF (keep(60).GT.0) eff_size_schur = keep(116)
1313 CALL smumps_anorminf( id , anorminf, lscal, eff_size_schur )
1314 ELSE
1315 anorminf = zero
1316 ENDIF
1317C
1318 IF ((keep(19).NE.0).OR.(keep(110).NE.0)) THEN
1319 IF (prokg) THEN
1320 IF (keep(19).NE.0) THEN
1321 WRITE(mpg,'(A,1PD16.4)')
1322 & ' CNTL(3) for null pivot rows/singularities =',cntl3
1323 ELSE
1324 WRITE(mpg,'(A,1PD16.4)')
1325 & ' CNTL(3) for null pivot row detection =',cntl3
1326 ENDIF
1327 ENDIF
1328 ENDIF
1329 IF (keep(19).EQ.0) THEN
1330C -- RR is off
1331 seuil = zero
1332 id%DKEEP(9) = zero
1333 ELSE
1334C -- RR is on
1335C
1336C CNTL(3) is the threshold used in the following to compute
1337C DKEEP(9) the threshold under which the sing val. are considered
1338C as null and from which we start to look for a gap between two
1339C sing val.
1340 IF (cntl3 .LT. zero) THEN
1341 id%DKEEP(9) = abs(cntl(3))
1342 ELSE IF (cntl3 .GT. zero) THEN
1343 id%DKEEP(9) = cntl3*anorminf
1344 ELSE ! (CNTL(3) .EQ. ZERO) THEN
1345 ENDIF
1346 IF (prokg) THEN
1347 WRITE(mpg, '(A,I16)')
1348 & ' ICNTL(56) rank revealing effective value =',keep(19)
1349 WRITE(mpg,'(A,1PD16.4)')
1350 & ' ...Threshold for singularities on the root =',id%DKEEP(9)
1351 ENDIF
1352C RR postponing considers that pivot rows with norm smaller
1353C than SEUIL should be postponed.
1354C SEUIL should be bigger than DKEEP(9), this means that
1355C DKEEP(13) should be bigger than 1.
1356 thresh_seuil = id%DKEEP(13)
1357 IF (id%DKEEP(13).LT.1) thresh_seuil = 10
1358 seuil = id%DKEEP(9)*thresh_seuil
1359 IF (prokg) WRITE(mpg,'(A,1PD16.4)')
1360 & ' ...Threshold for postponing =',seuil
1361 ENDIF !end KEEP(19)
1362 seuil_ldlt_niv2 = seuil
1363C -------------------------------
1364C -- Null pivot row detection
1365C -------------------------------
1366 IF (keep(110).EQ.0) THEN
1367C -- Null pivot is off
1368C Initialize DKEEP(1) to a negative value
1369C in order to avoid detection of null pivots
1370C (test max(AMAX,RMAX,abs(PIVOT)).LE.PIVNUL
1371C in SMUMPS_FAC_I, where PIVNUL=DKEEP(1))
1372 id%DKEEP(1) = -1.0e0
1373 id%DKEEP(2) = zero
1374 ELSE
1375C -- Null pivot is on
1376 IF (keep(19).NE.0) THEN
1377C -- RR is on
1378C RR postponing considers that pivot rows of norm smaller that SEUIL
1379C should be postponed, but pivot rows smaller than DKEEP(1) are
1380C directly added to null space and thus considered as null pivot rows.
1381 IF ((id%DKEEP(10).LE.0).OR.(id%DKEEP(10).GT.1)) THEN
1382C DKEEP(10) is out of range, set to the default value 10-1
1383 id%DKEEP(1) = id%DKEEP(9)*1e-1
1384 ELSE
1385 id%DKEEP(1) = id%DKEEP(9)*id%DKEEP(10)
1386 ENDIF
1387 ELSE
1388C -- RR is off
1389C -- only Null pivot detection
1390C We keep strategy currently used in MUMPS 4.10.0
1391 IF (cntl3 .LT. zero) THEN
1392 id%DKEEP(1) = abs(cntl(3))
1393 ELSE IF (cntl3 .GT. zero) THEN
1394 id%DKEEP(1) = cntl3*anorminf
1395 ELSE ! (CNTL(3) .EQ. ZERO) THEN
1396c id%DKEEP(1) = NPIV_CRITICAL_PATH*EPS*ANORMINF
1398 & n, keep(28), id%STEP(1), id%FRERE_STEPS(1), id%FILS(1),
1399 & id%NA(1), id%LNA, id%NE_STEPS(1), npiv_critical_path )
1400 id%DKEEP(1) = sqrt(real(npiv_critical_path))*eps*anorminf
1401 ENDIF
1402 ENDIF ! fin rank revealing
1403 IF ((keep(110).NE.0).AND.(prokg)) THEN
1404 WRITE(mpg, '(A,I16)')
1405 & ' ICNTL(24) null pivot rows detection =',keep(110)
1406 WRITE(mpg,'(A,1PD16.4)')
1407 & ' ...Zero pivot detection threshold =',id%DKEEP(1)
1408 ENDIF
1409 IF (cntl5.GT.zero) THEN
1410 id%DKEEP(2) = cntl5 * anorminf
1411 IF (prokg) WRITE(mpg,'(A,1PD10.3)')
1412 & ' ...Fixation for null pivots =',id%DKEEP(2)
1413 ELSE
1414 IF (prokg) WRITE(mpg,*) '...Infinite fixation '
1415 IF (id%KEEP(50).EQ.0) THEN
1416C Unsym
1417 ! the user let us choose a fixation. set in NEGATIVE
1418 ! to detect during facto when to set row to zero !
1419 id%DKEEP(2) = -max(1.0e10*anorminf,
1420 & sqrt(huge(anorminf))/1.0e8)
1421 ELSE
1422C Sym
1423 id%DKEEP(2) = zero
1424 ENDIF
1425 ENDIF
1426 ENDIF ! fin null pivot detection.
1427C Find id of root node if RR is on
1428 IF (keep(53).NE.0) THEN
1429 id_root =mumps_procnode(id%PROCNODE_STEPS(id%STEP(keep(20))),
1430 & id%KEEP(199))
1431 IF ( keep( 46 ) .NE. 1 ) THEN
1432 id_root = id_root + 1
1433 END IF
1434 ENDIF
1435C Second pass: set parameters for null pivot detection
1436C Allocate PIVNUL_LIST in case of null pivot detection
1437 lpn_list = 1
1438 IF ( associated( id%PIVNUL_LIST) ) DEALLOCATE(id%PIVNUL_LIST)
1439 IF(keep(110) .EQ. 1) THEN
1440 lpn_list = n
1441 ENDIF
1442 IF (keep(19).NE.0 .AND.
1443 & (id_root.EQ.id%MYID .OR. id%MYID.EQ.master)) THEN
1444 lpn_list = n
1445 ENDIF
1446 ALLOCATE( id%PIVNUL_LIST(lpn_list),stat = ierr )
1447 IF ( ierr .GT. 0 ) THEN
1448 id%INFO(1)=-13
1449 id%INFO(2)=lpn_list
1450 END IF
1451 id%PIVNUL_LIST(1:lpn_list) = 0
1452 keep(109) = 0
1453C end set parameter for null pivot detection
1454 CALL mumps_propinfo( icntl(1), id%INFO(1),
1455 & id%COMM, id%MYID )
1456 IF ( id%INFO(1).lt.0 ) GOTO 517
1457C --------------------------------------------------------------
1458C STATIC PIVOTING
1459C -- Static pivoting only when RR and Null pivot detection OFF
1460C --------------------------------------------------------------
1461 keep(97) = 0
1462 IF ((keep(19).EQ.0).AND.(keep(110).EQ.0)) THEN
1463 IF (id%MYID .EQ. master) cntl4 = id%CNTL(4)
1464 CALL mpi_bcast( cntl4, 1, mpi_real,
1465 & master, id%COMM, ierr )
1466C
1467 IF ( cntl4 .GE. zero ) THEN
1468 keep(97) = 1
1469 IF ( cntl4 .EQ. zero ) THEN
1470C -- set seuil to sqrt(eps)*||A||
1471 IF(anorminf .EQ. zero) THEN
1472 eff_size_schur = 0
1473 IF (keep(60).GT.0) eff_size_schur = keep(116)
1474 CALL smumps_anorminf( id , anorminf, lscal,
1475 & eff_size_schur )
1476 ENDIF
1477 seuil = sqrt(eps) * anorminf
1478 ELSE
1479 seuil = cntl4
1480 ENDIF
1481 seuil_ldlt_niv2 = seuil
1482 ELSE
1483 seuil = zero
1484 ENDIF
1485 ENDIF
1486C set number of tiny pivots / 2x2 pivots in types 1 /
1487C 2x2 pivots in types 2, to zero. This is because the
1488C user can call the factorization step several times.
1489 keep(98) = 0
1490 keep(103) = 0
1491 keep(105) = 0
1492 maxs = 1_8
1493*
1494* Start allocations
1495* *****************
1496*
1497C
1498C The slaves can now perform the factorization
1499C
1500C
1501C Allocate id%S on all nodes
1502C or point to user provided data WK_USER when LWK_USER>0
1503C =======================
1504C
1505C Compute BLR_STRAT and a first estimation
1506C of MAXS, the size of id%S
1508 & maxs_base8, maxs_base_relaxed8,
1509 & blr_strat,
1510 & id%KEEP(1), id%KEEP8(1))
1511C
1512 maxs = maxs_base_relaxed8
1513 IF (wk_user_provided) THEN
1514C -- Set MAXS to size of WK_USER_
1515 maxs = id%KEEP8(24)
1516 ENDIF
1517 CALL mumps_propinfo( icntl(1), id%INFO(1),
1518 & id%COMM, id%MYID )
1519 IF (id%INFO(1) .LT. 0) THEN
1520 GOTO 517
1521 ENDIF
1522C
1523 id%KEEP8(75) = huge(id%KEEP8(75))
1524 id%KEEP8(76) = huge(id%KEEP8(76))
1525 IF (i_am_slave) THEN
1526C
1527 IF (id%KEEP8(4) .NE. 0_8) THEN
1528C
1529 IF ( .NOT. wk_user_provided ) THEN
1530C Set MAXS given BLR_STRAT, KEEP(201) and MAXS_BASE_RELAXED8
1532 & maxs,
1533 & blr_strat, id%KEEP(201), maxs_base_relaxed8,
1534 & id%KEEP(1), id%KEEP8(1), id%MYID, id%N, id%NELT,
1535 & id%NA(1), id%LNA, id%NSLAVES,
1536 & keep464copy, keep465copy,
1537 & id%INFO(1), id%INFO(2)
1538 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1539 & size(id%I8_L0_OMP,2)
1540 & )
1541C Given MAXS and max memory allowed KEEP8(4)
1542C compute in KEEP8(75) the number of real/complex
1543C available for dynamic allocations
1545 & maxs, id%MYID,
1546 & .false., ! UNDER_L0_OMP
1547 & n, id%NELT, id%NA(1), id%LNA, id%NSLAVES,
1548 & blr_strat, id%KEEP(201),
1549 & id%KEEP, id%KEEP8, id%INFO(1), id%INFO(2)
1550 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1551 & size(id%I8_L0_OMP,2)
1552 & )
1553 ELSE
1554C KEEP8(75) dow not include MAXS, since WK_USER is provided
1556 & 0_8, id%MYID,
1557 & .false., ! UNDER_L0_OMP
1558 & n, id%NELT, id%NA(1), id%LNA, id%NSLAVES,
1559 & blr_strat, id%KEEP(201),
1560 & id%KEEP, id%KEEP8, id%INFO(1), id%INFO(2)
1561 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1562 & size(id%I8_L0_OMP,2)
1563 & )
1564 ENDIF
1565 IF (keep(400) .GT.0) THEN
1566C ------------------------------
1567C compute KEEP8(75) under L0_OMP
1568C ------------------------------
1569C Save KEEP8(75) above L0_OMP to reset KEEP8(75)
1570C when starting FAC_PAR_M
1571 id%KEEP8(76) = id%KEEP8(75)
1573 & 0_8, ! MAXS=0_8
1574 & id%MYID,
1575 & .true., ! UNDER_L0_OMP
1576 & id%N, id%NELT, id%NA(1), id%LNA, id%NSLAVES,
1577 & blr_strat, id%KEEP(201),
1578 & id%KEEP, id%KEEP8, id%INFO(1), id%INFO(2)
1579 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1580 & size(id%I8_L0_OMP,2)
1581 & )
1582C KEEP8(75) holds the number of entries that
1583C can be allocated underL0.
1584C It will be used during SMUMPS_FAC_L0_OMP to adjust the
1585C the size of MUMPS_TPS_ARR(ITH)%LA
1586 ENDIF
1587 ENDIF ! MEM_ALLOWED
1588C
1589 ENDIF ! I_AM_SLAVE THEN
1590C
1591 IF (i_am_slave) THEN
1592 IF ( (keep(400).GT.0) .AND. (keep(406).EQ.2) ) THEN
1593C Compute KEEP8(77) the peak authorized used by
1594C SMUMPS_PERFORM_COPIES
1596 & id%MYID, id%N,
1597 & id%NELT, id%NA(1), id%LNA, id%NSLAVES,
1598 & blr_strat, id%KEEP(201),
1599 & id%KEEP(1), id%KEEP8(1), id%INFO(1), id%INFO(2)
1600 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1601 & size(id%I8_L0_OMP,2)
1602 & )
1603 ENDIF
1604 ENDIF ! I_AM_SLAVE)
1605C
1606 CALL mumps_propinfo( icntl(1), id%INFO(1),
1607 & id%COMM, id%MYID )
1608 IF (id%INFO(1) .LT. 0) THEN
1609 GOTO 517
1610 ENDIF
1611 CALL mumps_seti8toi4(maxs, id%INFO(39))
1612 CALL smumps_avgmax_stat8(prokg, mpg, maxs, id%NSLAVES,
1613 & print_maxavg,
1614 & id%COMM, " Effective size of S (based on INFO(39))= ")
1615C
1616 IF ( i_am_slave ) THEN
1617C ------------------
1618C Dynamic scheduling
1619C ------------------
1620 CALL smumps_load_set_inicost( dble(id%COST_SUBTREES),
1621 & keep(64), id%DKEEP(15), keep(375), maxs )
1622 k28=keep(28)
1623 memory_md_arg = min(int(perlu,8) * ( maxs_base8 / 100_8 + 1_8 ),
1624C Restrict freedom from dynamic scheduler when
1625C MEM_ALLOWED=ICNTL(23) is small (case where KEEP8(4)-MAXS_BASE8
1626C is negative after call to SMUMPS_MAX_MEM)
1627 & max(0_8, maxs-maxs_base8))
1628 CALL smumps_load_init( id, memory_md_arg, maxs )
1629C
1630C Out-Of-Core (OOC) issues. Case where we ran one factorization OOC
1631C and the second one is in-core: we try to free OOC
1632C related data from previous factorization.
1633C
1634 CALL smumps_clean_ooc_data(id, ierr)
1635 IF (ierr < 0) THEN
1636 id%INFO(1) = -90
1637 id%INFO(2) = 0
1638 GOTO 112
1639 ENDIF
1640 IF (keep(201) .GT. 0) THEN
1641C -------------------
1642C OOC initializations
1643C -------------------
1644 IF (keep(201).EQ.1 !PANEL Version
1645 & .AND.keep(50).EQ.0 ! Unsymmetric
1646 & .AND.keep(251).NE.2 ! Store L to disk
1647 & ) THEN
1648 id%OOC_NB_FILE_TYPE=2 ! declared in MUMPS_OOC_COMMON
1649 ELSE
1650 id%OOC_NB_FILE_TYPE=1 ! declared in MUMPS_OOC_COMMON
1651 ENDIF
1652C ------------------------------
1653C Dimension IO buffer, KEEP(100)
1654C ------------------------------
1655 IF (keep(205) .GT. 0) THEN
1656 keep(100) = keep(205)
1657 ELSE
1658 IF (keep(201).EQ.1) THEN ! PANEL version
1659 i8tmp = int(id%OOC_NB_FILE_TYPE,8) *
1660 & 2_8 * int(keep(226),8)
1661 ELSE
1662 i8tmp = 2_8 * id%KEEP8(119)
1663 ENDIF
1664 i8tmp = i8tmp + int(max(keep(12),0),8) *
1665 & (i8tmp/100_8+1_8)
1666C we want to avoid too large IO buffers.
1667C 12M corresponds to 100Mbytes given to buffers.
1668 i8tmp = min(i8tmp, 12000000_8)
1669 keep(100)=int(i8tmp)
1670 ENDIF
1671 IF (keep(201).EQ.1) THEN
1672C Panel version. Force the use of a buffer.
1673 IF ( keep(99) < 3 ) THEN
1674 keep(99) = keep(99) + 3
1675 ENDIF
1676 ENDIF
1677C --------------------------
1678C Reset KEEP(100) to 0 if no
1679C buffer is used for OOC.
1680C --------------------------
1681 IF (keep(99) .LT.3) keep(100)=0
1682 IF((dble(keep(100))*dble(keep(35))/dble(2)).GT.
1683 & (dble(1999999999)))THEN
1684 IF (prokg) THEN
1685 WRITE(mpg,*)id%MYID,': Warning: DIM_BUF_IO might be
1686 & too big for Filesystem'
1687 ENDIF
1688 ENDIF
1689 ALLOCATE (id%OOC_INODE_SEQUENCE(keep(28),
1690 & id%OOC_NB_FILE_TYPE),
1691 & stat=ierr)
1692 IF ( ierr .GT. 0 ) THEN
1693 id%INFO(1) = -13
1694 id%INFO(2) = id%OOC_NB_FILE_TYPE*keep(28)
1695 NULLIFY(id%OOC_INODE_SEQUENCE)
1696 GOTO 112
1697 ENDIF
1698 ALLOCATE (id%OOC_TOTAL_NB_NODES(id%OOC_NB_FILE_TYPE),
1699 & stat=ierr)
1700 IF ( ierr .GT. 0 ) THEN
1701 id%INFO(1) = -13
1702 id%INFO(2) = id%OOC_NB_FILE_TYPE
1703 NULLIFY(id%OOC_TOTAL_NB_NODES)
1704 GOTO 112
1705 ENDIF
1706 ALLOCATE (id%OOC_SIZE_OF_BLOCK(keep(28),
1707 & id%OOC_NB_FILE_TYPE),
1708 & stat=ierr)
1709 IF ( ierr .GT. 0 ) THEN
1710 id%INFO(1) = -13
1711 id%INFO(2) = id%OOC_NB_FILE_TYPE*keep(28)
1712 NULLIFY(id%OOC_SIZE_OF_BLOCK)
1713 GOTO 112
1714 ENDIF
1715 ALLOCATE (id%OOC_VADDR(keep(28),id%OOC_NB_FILE_TYPE),
1716 & stat=ierr)
1717 IF ( ierr .GT. 0 ) THEN
1718 id%INFO(1) = -13
1719 id%INFO(2) = id%OOC_NB_FILE_TYPE*keep(28)
1720 NULLIFY(id%OOC_VADDR)
1721 GOTO 112
1722 ENDIF
1723 ENDIF
1724 ENDIF
1725 112 CALL mumps_propinfo( icntl(1), id%INFO(1),
1726 & id%COMM, id%MYID )
1727 IF (id%INFO(1) < 0) THEN
1728C LOAD_END must be done but not OOC_END_FACTO
1729 GOTO 513
1730 ENDIF
1731 IF (i_am_slave) THEN
1732 IF (keep(201) .GT. 0) THEN
1733 IF ((keep(201).EQ.1).OR.(keep(201).EQ.2)) THEN
1734 CALL smumps_ooc_init_facto(id,maxs)
1735 ELSE
1736 WRITE(*,*) "Internal error in SMUMPS_FAC_DRIVER"
1737 CALL mumps_abort()
1738 ENDIF
1739 IF(id%INFO(1).LT.0)THEN
1740 GOTO 111
1741 ENDIF
1742 ENDIF
1743C First increment corresponds to the number of
1744C floating-point operations for subtrees allocated
1745C to the local processor.
1746 CALL smumps_load_update(0,.false.,dble(id%COST_SUBTREES),
1747 & id%KEEP(1),id%KEEP8(1))
1748 IF (id%INFO(1).LT.0) GOTO 111
1749 END IF
1750C -----------------------
1751C Manage main workarray S
1752C -----------------------
1753 earlyt3rootins = keep(200) .EQ.0
1754 & .OR. ( keep(200) .LT. 0 .AND. keep(400) .EQ. 0 )
1755#if defined (LARGEMATRICES)
1756 IF ( id%MYID .ne. master ) THEN
1757#endif
1758 IF (.NOT.wk_user_provided) THEN
1759 IF ( earlyt3rootins ) THEN
1760C Standard allocation strategy
1761 ALLOCATE (id%S(maxs),stat=ierr)
1762 id%KEEP8(23) = maxs
1763 IF ( ierr .GT. 0 ) THEN
1764 id%INFO(1) = -13
1765 CALL mumps_set_ierror(maxs, id%INFO(2))
1766C On some platforms (IBM for example), an
1767C allocation failure returns a non-null pointer.
1768C Therefore we nullify S
1769 NULLIFY(id%S)
1770 id%KEEP8(23)=0_8
1771 ENDIF
1772 ENDIF
1773 ELSE
1774 id%S => id%WK_USER(1:id%KEEP8(24))
1775 id%KEEP8(23) = 0_8
1776 ENDIF
1777#if defined (LARGEMATRICES)
1778 END IF
1779#endif
1780C
1781 111 CALL mumps_propinfo( icntl(1), id%INFO(1),
1782 & id%COMM, id%MYID )
1783 IF ( id%INFO(1).LT.0 ) GOTO 514
1784C --------------------------
1785C Initialization of modules
1786C related to data management
1787C --------------------------
1788 nb_active_fronts_estim = 3
1789 nb_threads = 1
1790!$ NB_THREADS = OMP_GET_MAX_THREADS()
1791C
1792 nb_active_fronts_estim = 3*nb_threads
1793 IF (i_am_slave) THEN
1794C
1795 CALL mumps_fdm_init('A',nb_active_fronts_estim, id%INFO)
1796C
1797 IF ( (keep(486).EQ.2)
1798 & .OR. ((keep(489).NE.0).AND.(keep(400).GT.1))
1799 & ) THEN
1800C In case of LRSOLVE or CompressCB,
1801C initialize nb of handlers to nb of BLR
1802C nodes estimated at analysis
1803 nb_fronts_f_estim = keep(470)
1804 ELSE
1805 IF (keep(489).NE.0) THEN
1806C Compress CB and no L0 OMP (or 1 thread under L0):
1807C NB_ACTIVE_FRONTS_ESTIM is too small,
1808C to limit nb of reallocations make it twice larger
1809 nb_fronts_f_estim = 2*nb_active_fronts_estim
1810 ELSE
1811 nb_fronts_f_estim = nb_active_fronts_estim
1812 ENDIF
1813 ENDIF
1814 CALL mumps_fdm_init('F',nb_fronts_f_estim, id%INFO )
1815 IF (id%INFO(1) .LT. 0 ) GOTO 114
1816#if ! defined(NO_FDM_DESCBAND)
1817C Storage of DESCBAND information
1818 CALL mumps_fdbd_init( nb_active_fronts_estim, id%INFO )
1819#endif
1820#if ! defined(NO_FDM_MAPROW)
1821C Storage of MAPROW and ROOT2SON information
1822 CALL mumps_fmrd_init( nb_active_fronts_estim, id%INFO )
1823#endif
1824 CALL smumps_blr_init_module( nb_fronts_f_estim, id%INFO )
1825 114 CONTINUE
1826 ENDIF
1827 CALL mumps_propinfo( icntl(1), id%INFO(1),
1828 & id%COMM, id%MYID )
1829C GOTO 500: one of the above module initializations failed
1830 IF ( id%INFO(1).LT.0 ) GOTO 500
1831C
1832C
1833C Allocate space for matrix in arrowhead
1834C ======================================
1835C
1836C CASE 1 : Matrix is assembled
1837C CASE 2 : Matrix is elemental
1838C
1839 IF ( keep(55) .eq. 0 ) THEN
1840C ------------------------------------
1841C Space has been allocated already for
1842C the integer part during analysis
1843C Only slaves need the arrowheads.
1844C ------------------------------------
1845 IF (associated( id%DBLARR)) THEN
1846 DEALLOCATE(id%DBLARR)
1847 NULLIFY(id%DBLARR)
1848 ENDIF
1849 IF ( i_am_slave .and. id%KEEP8(26) .ne. 0_8 ) THEN
1850 ALLOCATE( id%DBLARR( id%KEEP8(26) ), stat = ierr )
1851 ELSE
1852 ALLOCATE( id%DBLARR( 1 ), stat =ierr )
1853 END IF
1854 IF ( ierr .NE. 0 ) THEN
1855 IF (lpok) THEN
1856 WRITE(lp,*) id%MYID,
1857 & ': Allocation error for DBLARR(',id%KEEP8(26),')'
1858 ENDIF
1859 id%INFO(1)=-13
1860 CALL mumps_set_ierror(id%KEEP8(26), id%INFO(2))
1861 NULLIFY(id%DBLARR)
1862 GOTO 100
1863 END IF
1864 ELSE
1865C ----------------------------------------
1866C Allocate variable lists. Systematically.
1867C ----------------------------------------
1868 IF ( associated( id%INTARR ) ) THEN
1869 DEALLOCATE( id%INTARR )
1870 NULLIFY( id%INTARR )
1871 END IF
1872 IF ( i_am_slave .and. id%KEEP8(27) .ne. 0_8 ) THEN
1873 ALLOCATE( id%INTARR( id%KEEP8(27) ), stat = allocok )
1874 IF ( allocok .GT. 0 ) THEN
1875 id%INFO(1) = -13
1876 CALL mumps_set_ierror(id%KEEP8(27), id%INFO(2))
1877 NULLIFY(id%INTARR)
1878 GOTO 100
1879 END IF
1880 ELSE
1881 ALLOCATE( id%INTARR(1),stat=allocok )
1882 IF ( allocok .GT. 0 ) THEN
1883 id%INFO(1) = -13
1884 id%INFO(2) = 1
1885 NULLIFY(id%INTARR)
1886 GOTO 100
1887 END IF
1888 END IF
1889C -----------------------------
1890C Allocate real values.
1891C On master, if hybrid host and
1892C no scaling, avoid the copy.
1893C -----------------------------
1894 IF (associated( id%DBLARR)) THEN
1895 DEALLOCATE(id%DBLARR)
1896 NULLIFY(id%DBLARR)
1897 ENDIF
1898 IF ( i_am_slave ) THEN
1899 IF ( id%MYID_NODES .eq. master
1900 & .AND. keep(46) .eq. 1
1901 & .AND. keep(52) .eq. 0 ) THEN
1902C --------------------------
1903C Simple pointer association
1904C --------------------------
1905 id%DBLARR => id%A_ELT
1906 ELSE
1907C ----------
1908C Allocation
1909C ----------
1910 IF ( id%KEEP8(26) .ne. 0_8 ) THEN
1911 ALLOCATE( id%DBLARR( id%KEEP8(26) ), stat = allocok )
1912 IF ( allocok .GT. 0 ) THEN
1913 id%INFO(1) = -13
1914 CALL mumps_set_ierror(id%KEEP8(26), id%INFO(2))
1915 NULLIFY(id%DBLARR)
1916 GOTO 100
1917 END IF
1918 ELSE
1919 ALLOCATE( id%DBLARR(1), stat = allocok )
1920 IF ( allocok .GT. 0 ) THEN
1921 id%INFO(1) = -13
1922 id%INFO(2) = 1
1923 NULLIFY(id%DBLARR)
1924 GOTO 100
1925 END IF
1926 END IF
1927 END IF
1928 ELSE
1929 ALLOCATE( id%DBLARR(1), stat = allocok )
1930 IF ( allocok .GT. 0 ) THEN
1931 id%INFO(1) = -13
1932 id%INFO(2) = 1
1933 NULLIFY(id%DBLARR)
1934 GOTO 100
1935 END IF
1936 END IF
1937 END IF
1938C -----------------
1939C Also prepare some
1940C data for the root
1941C -----------------
1942 IF ( keep(38).NE.0 .AND. i_am_slave ) THEN
1943 CALL smumps_init_root_fac( id%N,
1944 & id%root, id%FILS(1), keep(38), id%KEEP(1), id%INFO(1) )
1945 END IF
1946C
1947C
1948 100 CONTINUE
1949C ----------------
1950C Check for errors
1951C ----------------
1952 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
1953 & id%COMM, id%MYID )
1954 IF ( id%INFO(1).LT.0 ) GOTO 500
1955C
1956C -----------------------------------
1957C
1958C DISTRIBUTION OF THE ORIGINAL MATRIX
1959C
1960C -----------------------------------
1961C
1962C TIMINGS: computed (and printed) on the host
1963C Next line: global time for distrib(arrowheads,elts)
1964C on the host. Synchronization has been performed.
1965 IF (id%MYID.EQ.master) CALL mumps_secdeb(time)
1966C -------------------------------------------
1967C S_PTR_ARG / MAXS_ARG will be used for id%S
1968C argument to arrowhead/element distribution
1969C routines: if id%S is not allocated, we pass
1970C S_DUMMY_ARG instead, which is not accessed.
1971C -------------------------------------------
1972 IF (earlyt3rootins) THEN
1973 s_ptr_arg => id%S
1974 maxs_arg = maxs
1975 ELSE
1976 s_ptr_arg => s_dummy_arg
1977 maxs_arg = 1
1978 ENDIF
1979C
1980 IF ( keep( 55 ) .eq. 0 ) THEN
1981C ----------------------------
1982C Original matrix is assembled
1983C Arrowhead format to be used.
1984C ----------------------------
1985C KEEP8(26) and KEEP8(27) hold the number of entries for real/integer
1986C for the matrix in arrowhead format. They have been set by the
1987C analysis phase (SMUMPS_ANA_F and SMUMPS_ANA_G)
1988C
1989C ------------------------------------------------------------------
1990C Blocking is used for sending arrowhead records (I,J,VAL)
1991C buffer(1) is used to store number of bytes already packed
1992C buffer(2) number of records already packed
1993C KEEP(39) : Number of records (blocking factor)
1994C ------------------------------------------------------------------
1995C
1996C ---------------------------------------------
1997C In case of parallel root compute minimum
1998C size of workspace to receive arrowheads
1999C of root node. Will be used to check that
2000C MAXS is large enough for arrowheads (case
2001C of EARLYT3ROOTINS (KEEP(200)=0); if .NOT.
2002C EARLYT3ROOTINS (KEEP(200)=1), root will
2003C be assembled into id%S later and size of
2004C id%S will be checked later)
2005C ---------------------------------------------
2006 IF (earlyt3rootins .AND. keep(38).NE.0 .AND.
2007 & keep(60) .EQ.0 .AND. i_am_slave) THEN
2008 lwk = int(numroc( id%root%ROOT_SIZE, id%root%MBLOCK,
2009 & id%root%MYROW, 0, id%root%NPROW ),8)
2010 lwk = max( 1_8, lwk )
2011 lwk = lwk*
2012 & int(numroc( id%root%ROOT_SIZE, id%root%NBLOCK,
2013 & id%root%MYCOL, 0, id%root%NPCOL ),8)
2014 lwk = max( 1_8, lwk )
2015 ELSE
2016 lwk = 1_8
2017 ENDIF
2018C MAXS must be at least 1, and in case of
2019C parallel root, large enough to receive
2020C arrowheads of root.
2021 IF (maxs .LT. int(lwk,8)) THEN
2022 id%INFO(1) = -9
2023 CALL mumps_set_ierror(lwk, id%INFO(2))
2024 ENDIF
2025 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
2026 & id%COMM, id%MYID )
2027 IF ( id%INFO(1).LT.0 ) GOTO 500
2028C
2029 IF ( keep(54) .eq. 0 ) THEN
2030C ================================================
2031C FIRST CASE : MATRIX IS NOT INITIALLY DISTRIBUTED
2032C ================================================
2033C A small integer workspace is needed to
2034C send the arrowheads.
2035 IF ( id%MYID .eq. master ) THEN
2036 ALLOCATE(iwk(id%N), stat=allocok)
2037 IF ( allocok .NE. 0 ) THEN
2038 id%INFO(1)=-13
2039 id%INFO(2)=id%N
2040 END IF
2041#if defined(largematrices)
2042 ALLOCATE (wk(lwk),stat=ierr)
2043 IF ( ierr .GT. 0 ) THEN
2044 id%INFO(1) = -13
2045 CALL mumps_set_ierror(lwk, id%INFO(2))
2046 write(6,*) ' PB1 ALLOC LARGEMAT'
2047 ENDIF
2048#endif
2049 ENDIF
2050 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
2051 & id%COMM, id%MYID )
2052 IF ( id%INFO(1).LT.0 ) GOTO 500
2053 IF ( id%MYID .eq. master ) THEN
2054C
2055C --------------------------------
2056C MASTER sends arowheads using the
2057C global communicator with ranks
2058C also in global communicator
2059C IWK is used as temporary
2060C workspace of size N.
2061C --------------------------------
2062 IF ( .not. associated( id%INTARR ) ) THEN
2063 ALLOCATE( id%INTARR( 1 ),stat=ierr)
2064 IF ( ierr .GT. 0 ) THEN
2065 id%INFO(1) = -13
2066 id%INFO(2) = 1
2067 NULLIFY(id%INTARR)
2068 write(6,*) ' PB2 ALLOC INTARR'
2069 CALL mumps_abort()
2070 ENDIF
2071 ENDIF
2072 nbrecords = keep(39)
2073 IF (id%KEEP8(28) .LT. int(nbrecords,8)) THEN
2074 nbrecords = int(id%KEEP8(28))
2075 ENDIF
2076#if defined(LARGEMATRICES)
2077 CALL smumps_facto_send_arrowheads(id%N, id%KEEP8(28), id%A(1),
2078 & id%IRN(1), id%JCN(1), id%SYM_PERM(1),
2079 & lscal, id%COLSCA(1), id%ROWSCA(1),
2080 & id%MYID, id%NSLAVES, id%PROCNODE_STEPS(1),
2081 & nbrecords,
2082 & lp, id%COMM, id%root, keep,id%KEEP8,
2083 & id%FILS(1), iwk(1), ! workspace of size N
2084 &
2085 & id%INTARR(1), id%KEEP8(27), id%DBLARR(1), id%KEEP8(26),
2086 & id%PTRAR(1), id%PTRAR(id%N+1),
2087 & id%FRERE_STEPS(1), id%STEP(1), wk(1), lwk,
2088 & id%ISTEP_TO_INIV2(1), id%I_AM_CAND(1),
2089 & id%CANDIDATES(1,1))
2090C write(6,*) '!!! A,IRN,JCN are freed during factorization '
2091 DEALLOCATE (id%A)
2092 NULLIFY(id%A)
2093 DEALLOCATE (id%IRN)
2094 NULLIFY (id%IRN)
2095 DEALLOCATE (id%JCN)
2096 NULLIFY (id%JCN)
2097 IF (.NOT.wk_user_provided) THEN
2098 IF (earlyt3rootins) THEN
2099 ALLOCATE (id%S(maxs),stat=ierr)
2100 id%KEEP8(23) = maxs
2101 IF ( ierr .GT. 0 ) THEN
2102 id%INFO(1) = -13
2103 id%INFO(2) = maxs
2104 NULLIFY(id%S)
2105 id%KEEP8(23)=0_8
2106 write(6,*) ' PB2 ALLOC LARGEMAT',maxs
2107 CALL mumps_abort()
2108 ENDIF
2109 ENDIF
2110 ENDIF
2111 ELSE
2112 id%S => id%WK_USER(1:id%KEEP8(24))
2113 ENDIF
2114 IF (earlyt3rootins) THEN
2115 id%S(maxs-lwk+1_8:maxs) = wk(1_8:lwk)
2116 ENDIF
2117 DEALLOCATE (wk)
2118#else
2119 CALL smumps_facto_send_arrowheads(id%N, id%KEEP8(28), id%A(1),
2120 & id%IRN(1), id%JCN(1), id%SYM_PERM(1),
2121 & lscal, id%COLSCA(1), id%ROWSCA(1),
2122 & id%MYID, id%NSLAVES, id%PROCNODE_STEPS(1),
2123 & nbrecords,
2124 & lp, id%COMM, id%root, keep(1),id%KEEP8(1),
2125 & id%FILS(1), iwk(1),
2126 &
2127 & id%INTARR(1), id%KEEP8(27), id%DBLARR(1), id%KEEP8(26),
2128 & id%PTRAR(1), id%PTRAR(id%N+1),
2129 & id%FRERE_STEPS(1), id%STEP(1), s_ptr_arg(1), maxs_arg,
2130 & id%ISTEP_TO_INIV2(1), id%I_AM_CAND(1),
2131 & id%CANDIDATES(1,1) )
2132#endif
2133 DEALLOCATE(iwk)
2134 ELSE
2135 nbrecords = keep(39)
2136 IF (id%KEEP8(28) .LT. int(nbrecords,8)) THEN
2137 nbrecords = int(id%KEEP8(28))
2138 ENDIF
2139 CALL smumps_facto_recv_arrowhd2( id%N,
2140 & id%DBLARR(1), id%KEEP8(26),
2141 & id%INTARR(1), id%KEEP8(27),
2142 & id%PTRAR( 1 ),
2143 & id%PTRAR(id%N+1),
2144 & keep( 1 ), id%KEEP8(1), id%MYID, id%COMM,
2145 & nbrecords,
2146 &
2147 & s_ptr_arg(1), maxs_arg,
2148 & id%root,
2149 & id%PROCNODE_STEPS(1), id%NSLAVES,
2150 & id%SYM_PERM(1), id%FRERE_STEPS(1), id%STEP(1),
2151 & id%INFO(1), id%INFO(2) )
2152 ENDIF
2153 ELSE
2154C
2155C =============================================
2156C SECOND CASE : MATRIX IS INITIALLY DISTRIBUTED
2157C =============================================
2158C Timing on master.
2159 IF (id%MYID.EQ.master) THEN
2160 CALL mumps_secdeb(time)
2161 END IF
2162 IF ( i_am_slave ) THEN
2163C ---------------------------------------------------
2164C In order to have possibly IRN_loc/JCN_loc/A_loc
2165C of size 0, avoid to pass them inside REDISTRIBUTION
2166C and pass id instead
2167C NZ_locMAX8 gives as a maximum buffer size (send/recv) used
2168C an upper bound to limit buffers on small matrices
2169C ---------------------------------------------------
2170 CALL mpi_allreduce(id%KEEP8(29), nz_locmax8, 1, mpi_integer8,
2171 & mpi_max, id%COMM_NODES, ierr)
2172 nbrecords = keep(39)
2173 IF (nz_locmax8 .LT. int(nbrecords,8)) THEN
2174 nbrecords = int(nz_locmax8)
2175 ENDIF
2176 CALL smumps_redistribution( id%N,
2177 & id%KEEP8(29),
2178 & id,
2179 & id%DBLARR(1), id%KEEP8(26), id%INTARR(1),
2180 & id%KEEP8(27), id%PTRAR(1), id%PTRAR(id%N+1),
2181 & keep(1), id%KEEP8(1), id%MYID_NODES,
2182 & id%COMM_NODES, nbrecords,
2183 & s_ptr_arg(1), maxs_arg, id%root, id%PROCNODE_STEPS(1),
2184 & id%NSLAVES, id%SYM_PERM(1), id%STEP(1),
2185 & id%ICNTL(1), id%INFO(1), nsend8, nlocal8,
2186 & id%ISTEP_TO_INIV2(1),
2187 & id%CANDIDATES(1,1) )
2188 IF ( ( keep(52).EQ.7 ).OR. (keep(52).EQ.8) ) THEN
2189C -------------------------------------------------
2190C In that case, scaling arrays have been allocated
2191C on all processors. They were useful for matrix
2192C distribution. But we now really only need them
2193C on the host. In case of distributed solution, we
2194C will have to broadcast either ROWSCA or COLSCA
2195C (depending on MTYPE) but this is done later.
2196C
2197C In other words, on exit from the factorization,
2198C we want to have scaling arrays available only
2199C on the host.
2200C -------------------------------------------------
2201 IF ( id%MYID > 0 ) THEN
2202 IF (associated(id%ROWSCA)) THEN
2203 DEALLOCATE(id%ROWSCA)
2204 NULLIFY(id%ROWSCA)
2205 ENDIF
2206 IF (associated(id%COLSCA)) THEN
2207 DEALLOCATE(id%COLSCA)
2208 NULLIFY(id%COLSCA)
2209 ENDIF
2210 ENDIF
2211 ENDIF
2212#if defined(LARGEMATRICES)
2213C deallocate id%IRN_loc, id%JCN(loc) to free extra space
2214C Note that in this case IRN_loc cannot be used
2215C anymore during the solve phase for IR and Error analysis.
2216 IF (associated(id%IRN_loc)) THEN
2217 DEALLOCATE(id%IRN_loc)
2218 NULLIFY(id%IRN_loc)
2219 ENDIF
2220 IF (associated(id%JCN_loc)) THEN
2221 DEALLOCATE(id%JCN_loc)
2222 NULLIFY(id%JCN_loc)
2223 ENDIF
2224 IF (associated(id%A_loc)) THEN
2225 DEALLOCATE(id%A_loc)
2226 NULLIFY(id%A_loc)
2227 ENDIF
2228 write(6,*) ' Warning :',
2229 & ' id%A_loc, IRN_loc, JCN_loc deallocated !!! '
2230#endif
2231 IF (prok) THEN
2232 WRITE(mp,120) nlocal8, nsend8
2233 END IF
2234 END IF
2235 IF ( keep(46) .eq. 0 .AND. id%MYID.eq.master ) THEN
2236C ------------------------------
2237C The host is not working -> had
2238C no data from initial matrix
2239C ------------------------------
2240 nsend8 = 0_8
2241 nlocal8 = 0_8
2242 END IF
2243C --------------------------
2244C Put into some info/infog ?
2245C --------------------------
2246 CALL mpi_reduce( nsend8, nsend_tot8, 1, mpi_integer8,
2247 & mpi_sum, master, id%COMM, ierr )
2248 CALL mpi_reduce( nlocal8, nlocal_tot8, 1, mpi_integer8,
2249 & mpi_sum, master, id%COMM, ierr )
2250 IF ( prokg ) THEN
2251 WRITE(mpg,125) nlocal_tot8, nsend_tot8
2252 END IF
2253C
2254C -------------------------
2255C Check for possible errors
2256C -------------------------
2257 CALL mumps_propinfo( icntl(1), id%INFO(1),
2258 & id%COMM, id%MYID )
2259 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2260C
2261 ENDIF
2262 ELSE
2263C -------------------
2264C Matrix is elemental,
2265C provided on the
2266C master only
2267C -------------------
2268 IF ( id%MYID.eq.master)
2269 & CALL smumps_maxelt_size( id%ELTPTR(1),
2270 & id%NELT,
2271 & maxelt_size )
2272C
2273C Perform the distribution of the elements.
2274C A this point,
2275C PTRAIW/PTRARW have been computed.
2276C INTARR/DBLARR have been allocated
2277C ELTPROC gives the mapping of elements
2278C
2279 CALL smumps_elt_distrib( id%N, id%NELT, id%KEEP8(30),
2280 & id%COMM, id%MYID,
2281 & id%NSLAVES, id%PTRAR(1),
2282 & id%PTRAR(id%NELT+2),
2283 & id%INTARR(1), id%DBLARR(1), id%KEEP8(27), id%KEEP8(26),
2284 & id%KEEP(1), id%KEEP8(1), maxelt_size,
2285 & id%FRTPTR(1), id%FRTELT(1),
2286 & s_ptr_arg(1), maxs_arg, id%FILS(1),
2287 & id, id%root )
2288C ----------------
2289C Broadcast errors
2290C ----------------
2291 CALL mumps_propinfo( icntl(1), id%INFO(1),
2292 & id%COMM, id%MYID )
2293 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2294 END IF ! Element entry
2295C ------------------------
2296C Time the redistribution:
2297C ------------------------
2298 IF ( id%MYID.EQ.master) THEN
2299 CALL mumps_secfin(time)
2300 id%DKEEP(93) = real(time)
2301 IF (prokg) WRITE(mpg,160) id%DKEEP(93)
2302 END IF
2303 IF ( keep(400) .GT. 0 ) THEN
2304C L0-OMP was active at analysis and
2305C thus will be active at factorization
2306C We check the number of threads.
2307 nomp=1
2308!$ NOMP = omp_get_max_threads()
2309 IF ( nomp .NE. keep(400) ) THEN
2310 id%INFO(1)=-58
2311 id%INFO(2)=keep(400)
2312 IF (lpok) WRITE(lp,'(A,A,I5,A,I5)')
2313 &" FAILURE DETECTED IN FACTORIZATION: #threads for KEEP(401)",
2314 &" changed from",keep(400)," at analysis to", nomp
2315 ENDIF
2316C error check
2317 CALL mumps_propinfo( icntl(1), id%INFO(1),
2318 & id%COMM, id%MYID )
2319 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2320 ENDIF
2321C
2322C TIMINGS:
2323C Next line: elapsed time for factorization
2324 IF (id%MYID.EQ.master) CALL mumps_secdeb(time)
2325C
2326C Allocate buffers on the workers
2327C ===============================
2328C
2329 IF ( i_am_slave ) THEN
2330 CALL smumps_buf_ini_myid(id%MYID_NODES)
2331C
2332C Some buffers are required to pack/unpack data and for
2333C receiving MPI messages.
2334C For packing/unpacking : the buffer must be large
2335C enough to send several messages while receives might not
2336C be posted yet.
2337C It is assumed that the size of an integer is held in KEEP(34)
2338C while the size of a complex is held in KEEP(35).
2339C BUFR and LBUFR are declared of type integer, since byte is not
2340C a standard datatype.
2341C We now use KEEP(43) or KEEP(379) and KEEP(44) or KEEP(380)
2342C as estimated at analysis to allocate appropriate buffer sizes
2343C
2344C Reception buffer
2345C ----------------
2346 IF (keep(486).NE.0) THEN
2347 smumps_lbufr_bytes8 = int(keep( 380 ),8) * int(keep( 35 ),8)
2348 ELSE
2349 smumps_lbufr_bytes8 = int(keep( 44 ),8) * int(keep( 35 ),8)
2350 ENDIF
2351C ---------------------------------------
2352C Ensure a reasonable minimal buffer size
2353C ---------------------------------------
2354 smumps_lbufr_bytes8 = max( smumps_lbufr_bytes8,
2355 & 100000_8 )
2356 IF ((keep(50).NE.0).AND.(keep(489).GT.0).AND.
2357 & (id%NSLAVES.GE.2)) THEN
2358C ----------------------------------------------------------
2359C Ensure large enough receive buffer in case of BLR with
2360C CompressCB for symmetric matrices.
2361C -----------------------------------------------------------
2362 ratiok465 = real(keep465copy)/real(1000)
2363 smumps_lbufr_bytes8 = max(smumps_lbufr_bytes8,
2364 & int(
2365 & ratiok465*
2366 & real(
2367 & int(keep(2)+1,8)*int(keep(142),8)*int(keep(35),8)
2368 & )
2369 & ,8)
2370 & )
2371 ENDIF
2372C
2373C If there is pivoting, size of the message might still increase.
2374C We use a relaxation (so called PERLU) to increase the estimate.
2375C
2376C Note: PERLU is a global estimate for pivoting.
2377C It may happen that one large contribution block size is increased
2378C by more than that.
2379C This is why we use an extra factor 2 relaxation coefficient for
2380C the relaxation of
2381C the reception buffer in the case where pivoting is allowed.
2382C A more dynamic strategy could be applied: if message to
2383C be received is larger than expected, reallocate a larger
2384C buffer. (But this won't work with IRECV.)
2385C Finally, one may want (as we are currently doing it for
2386C most messages)
2387C to cut large messages into a series of smaller ones.
2388C
2389 IF (keep(48).EQ.5) THEN
2390 min_perlu = 2
2391 ELSE
2392 min_perlu = 0
2393 ENDIF
2394C
2395 smumps_lbufr_bytes8 = smumps_lbufr_bytes8
2396 & + int( 2.0e0 * real(max(perlu,min_perlu))*
2397 & real(smumps_lbufr_bytes8)/100e0, 8)
2398 smumps_lbufr_bytes8 = min(smumps_lbufr_bytes8,
2399 & int(huge(i4)-100,8))
2400 smumps_lbufr_bytes = int( smumps_lbufr_bytes8 )
2401 IF (keep(48)==5) THEN
2402C Since the buffer is going to be allocated, use
2403C it as the constraint for memory/granularity
2404C in hybrid scheduler
2405C
2406 id%KEEP8(21) = id%KEEP8(22) +
2407 & int( real(max(perlu,min_perlu))*
2408 & real(id%KEEP8(22))/100e0,8)
2409 ENDIF
2410C
2411C Now estimate the size for the buffer for asynchronous
2412C sends of contribution blocks (so called CB). We want to be able to send at
2413C least KEEP(213)/100 (two in general) messages at the
2414C same time.
2415C
2416C Send buffer
2417C -----------
2418 IF (keep(486).NE.0) THEN
2419 smumps_lbuf8 = int( real(keep(213)) / 100.0e0 *
2420 & real(keep(379)) * real(keep(35)), 8 )
2421 ELSE
2422 smumps_lbuf8 = int( real(keep(213)) / 100.0e0 *
2423 & real(keep(43)) * real(keep(35)), 8 )
2424 ENDIF
2425 smumps_lbuf8 = max( smumps_lbuf8, 100000_8 )
2426 smumps_lbuf8 = smumps_lbuf8
2427 & + int( 2.0e0 * real(max(perlu,min_perlu))*
2428 & real(smumps_lbuf8)/100e0, 8)
2429C Make SMUMPS_LBUF8 small enough to be stored in a standard integer
2430 smumps_lbuf8 = min(smumps_lbuf8, int(huge(i4)-100,8))
2431C
2432C No reason to have send buffer smaller than receive buffer.
2433C This should never occur with the formulas above but just
2434C in case:
2435 smumps_lbuf8 = max(smumps_lbuf8, smumps_lbufr_bytes8+3*keep(34))
2436 smumps_lbuf = int(smumps_lbuf8)
2437 IF(id%KEEP(48).EQ.4)THEN
2438 smumps_lbufr_bytes=smumps_lbufr_bytes*5
2439 smumps_lbuf=smumps_lbuf*5
2440 ENDIF
2441C
2442C Estimate size of buffer for small messages
2443C Each node can send ( NSLAVES - 1 ) messages to (NSLAVES-1) nodes
2444C
2445C KEEP(56) is the number of nodes of level II.
2446C Messages will be sent for the symmetric case
2447C for synchronisation issues.
2448C
2449C We take an upperbound
2450C
2451 smumps_lbuf_int = ( keep(56) + id%NSLAVES * id%NSLAVES ) * 5
2452 & * keep(34)
2453 IF ( keep( 38 ) .NE. 0 ) THEN
2454C
2455C
2456 kkkk = mumps_procnode( id%PROCNODE_STEPS(id%STEP(keep(38))),
2457 & id%KEEP(199) )
2458 IF ( kkkk .EQ. id%MYID_NODES ) THEN
2459 smumps_lbuf_int = smumps_lbuf_int + 4 * keep(34) *
2460 & ( id%NSLAVES + id%NE_STEPS(id%STEP(keep(38)))
2461 & + min(keep(56), id%NE_STEPS(id%STEP(keep(38)))) * id%NSLAVES
2462 & )
2463 END IF
2464 END IF
2465C At this point, SMUMPS_LBUFR_BYTES, SMUMPS_LBUF
2466C and SMUMPS_LBUF_INT have been computed (all
2467C are in numbers of bytes).
2468 IF ( prok ) THEN
2469 WRITE( mp, 9999 ) smumps_lbufr_bytes,
2470 & smumps_lbuf, smumps_lbuf_int
2471 END IF
2472 9999 FORMAT( /,' Allocated buffers',/,' ------------------',/,
2473 & ' Size of reception buffer in bytes ...... = ', i10,
2474 & /,
2475 & ' Size of async. emission buffer (bytes).. = ', i10,/,
2476 & ' Small emission buffer (bytes) .......... = ', i10)
2477C --------------------------
2478C Allocate small send buffer
2479C required for SMUMPS_FAC_B
2480C --------------------------
2481 CALL smumps_buf_alloc_small_buf( smumps_lbuf_int, ierr )
2482 IF ( ierr .NE. 0 ) THEN
2483 id%INFO(1)= -13
2484C convert to size in integer id%INFO(2)= SMUMPS_LBUF_INT
2485 id%INFO(2)= (smumps_lbuf_int+keep(34)-1)/keep(34)
2486 IF (lpok) THEN
2487 WRITE(lp,*) id%MYID,
2488 & ':Allocation error in SMUMPS_BUF_ALLOC_SMALL_BUF'
2489 & ,id%INFO(2)
2490 ENDIF
2491 GO TO 110
2492 END IF
2493C
2494C --------------------------------------
2495C Allocate reception buffer on all procs
2496C This is done now.
2497C --------------------------------------
2498 smumps_lbufr = (smumps_lbufr_bytes+keep(34)-1)/keep(34)
2499 ALLOCATE( bufr( smumps_lbufr ),stat=ierr )
2500 IF ( ierr .NE. 0 ) THEN
2501 id%INFO(1) = -13
2502 id%INFO(2) = smumps_lbufr
2503 IF (lpok) THEN
2504 WRITE(lp,*)
2505 & ': Allocation error for BUFR(', smumps_lbufr,
2506 & ') on MPI process',id%MYID
2507 ENDIF
2508 GO TO 110
2509 END IF
2510C -----------------------------------------
2511C Estimate MAXIS. IS will be allocated in
2512C SMUMPS_FAC_B. It will contain factors and
2513C contribution blocks integer information
2514C -----------------------------------------
2515C Relax integer workspace based on PERLU
2516 perlu = keep( 12 )
2517 IF (keep(201).GT.0) THEN
2518C OOC panel or non panel (note that
2519C KEEP(15)=KEEP(225) if non panel)
2520 maxis_estim = keep(225)
2521 ELSE
2522C In-core or reals for factors not stored
2523 maxis_estim = keep(15)
2524 ENDIF
2525 maxis = max( 1, int( min( int(huge(maxis),8),
2526 & int(maxis_estim,8) + 3_8 * max(int(perlu,8),10_8) *
2527 & ( int(maxis_estim,8) / 100_8 + 1_8 )
2528 & ) ! min
2529 & ) ! int
2530 & ) !max
2531C ----------------------------
2532C Allocate PTLUST_S and PTRFAC
2533C They will be used to access
2534C factors in the solve phase.
2535C They are also needed for
2536C SMUMPS_FAC_L0_OMP.
2537C ----------------------------
2538 ALLOCATE( id%PTLUST_S( id%KEEP(28) ), stat = ierr )
2539 IF ( ierr .NE. 0 ) THEN
2540 id%INFO(1)=-13
2541 id%INFO(2)=id%KEEP(28)
2542 IF (lpok) THEN
2543 WRITE(lp,*) id%MYID,
2544 & ': Allocation error for id%PTLUST_S(', id%KEEP(28),')'
2545 ENDIF
2546 NULLIFY(id%PTLUST_S)
2547 GOTO 110
2548 END IF
2549 ALLOCATE( id%PTRFAC( id%KEEP(28) ), stat = ierr )
2550 IF ( ierr .NE. 0 ) THEN
2551 id%INFO(1)=-13
2552 id%INFO(2)=id%KEEP(28)
2553 NULLIFY(id%PTRFAC)
2554 IF (lpok) THEN
2555 WRITE(lp,*) id%MYID,
2556 & ': Allocation error for id%PTRFAC(', id%KEEP(28),')'
2557 ENDIF
2558 GOTO 110
2559 END IF
2560C -----------------------------
2561C Reserve temporary workspace :
2562C IPOOL, PTRWB, ITLOC, PTRIST
2563C PTRWB will be subdivided again
2564C in routine SMUMPS_FAC_B
2565C -----------------------------
2566 ptrist = 1
2567 ptrwb = ptrist + id%KEEP(28)
2568 itloc = ptrwb + 2 * id%KEEP(28)
2569C Fwd in facto: ITLOC of size id%N + id%KEEP(253)
2570 ipool = itloc + id%N + id%KEEP(253)
2571C
2572C --------------------------------
2573C NA(1) is an upperbound for LPOOL
2574C --------------------------------
2575C Structure of the pool:
2576C ____________________________________________________
2577C | Subtrees | | Top nodes | 1 2 3 |
2578C ----------------------------------------------------
2579 lpool = mumps_get_pool_length(id%NA(1), id%KEEP(1),id%KEEP8(1))
2580 ALLOCATE( iwk( ipool + lpool - 1 ), stat = ierr )
2581 IF ( ierr .NE. 0 ) THEN
2582 id%INFO(1)=-13
2583 id%INFO(2)=ipool + lpool - 1
2584 IF (lpok) THEN
2585 WRITE(lp,*) id%MYID,
2586 & ': Allocation error for IWK(',ipool+lpool-1,')'
2587 ENDIF
2588 GOTO 110
2589 END IF
2590 ALLOCATE(iwk8( 2 * id%KEEP(28)), stat = ierr)
2591 IF ( ierr .NE. 0 ) THEN
2592 id%INFO(1)=-13
2593 id%INFO(2)=2 * id%KEEP(28)
2594 IF (lpok) THEN
2595 WRITE(lp,*) id%MYID,
2596 & ': Allocation error for IWKB(', 2*id%KEEP(28),')'
2597 ENDIF
2598 GOTO 110
2599 END IF
2600C
2601C Return to SPMD
2602C
2603 ENDIF
2604C
2605 110 CONTINUE
2606C ----------------
2607C Broadcast errors
2608C ----------------
2609 CALL mumps_propinfo( icntl(1), id%INFO(1),
2610 & id%COMM, id%MYID )
2611 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2612C
2613 IF ( i_am_slave ) THEN
2614C Store size of receive buffers in SMUMPS_LBUF module
2615 CALL smumps_buf_dist_irecv_size( smumps_lbufr_bytes )
2616 IF (prok) THEN
2617 WRITE( mp, 170 ) maxs, maxis, id%KEEP8(12), keep(15),
2618 & id%KEEP8(26), id%KEEP8(27), id%KEEP8(11), keep(26), keep(27)
2619 ENDIF
2620 END IF
2621C ===============================================================
2622C Before calling the main driver, SMUMPS_FAC_B,
2623C some statistics should be initialized to 0,
2624C even on the host node because they will be
2625C used in REDUCE operations afterwards.
2626C --------------------------------------------
2627C Size of factors written. It will be set to POSFAC in
2628C IC, otherwise we accumulate written factors in it.
2629 id%KEEP8(31)= 0_8
2630C Size of factors under L0 will be returned
2631C in id%KEEP8(64), not included in KEEP8(31))
2632C Number of entries in factors
2633 id%KEEP8(10) = 0_8
2634C KEEP8(8) will hold the volume of extra copies due to
2635C in-place stacking in fac_mem_stack.F
2636 id%KEEP8(8)=0_8
2637 id%INFO(9:14)=0
2638 rinfo(2:3)=zero
2639 IF ( i_am_slave ) THEN
2640C ------------------------------------
2641C Call effective factorization routine
2642C ------------------------------------
2643 IF ( keep(55) .eq. 0 ) THEN
2644 ldptrar = id%N
2645 ELSE
2646 ldptrar = id%NELT + 1
2647 END IF
2648 IF ( id%KEEP(55) .NE. 0 ) THEN
2649 nelt_arg = id%NELT
2650 ELSE
2651C ------------------------------
2652C Use size 1 to avoid complaints
2653C when using check bound options
2654C ------------------------------
2655 nelt_arg = 1
2656 END IF
2657 ENDIF
2658 IF (i_am_slave) THEN
2659 IF (associated(id%L0_OMP_MAPPING))
2660 & DEALLOCATE(id%L0_OMP_MAPPING)
2661 IF (keep(400) .GT. 0) THEN
2662 id%LL0_OMP_MAPPING = keep(28)
2663 ELSE
2664 id%LL0_OMP_MAPPING = 1
2665 ENDIF
2666 ALLOCATE(id%L0_OMP_MAPPING(id%LL0_OMP_MAPPING), stat=allocok)
2667 IF ( allocok > 0) THEN
2668 write(*,*) "Problem allocating L0_OMP_MAPPING",
2669 & ierr, keep(28)
2670 GOTO 115
2671 ENDIF
2672 IF (associated(id%L0_OMP_FACTORS)) THEN
2673 CALL smumps_free_l0_omp_factors(id%L0_OMP_FACTORS)
2674 ENDIF
2675 IF (keep(400) .GT. 0) THEN
2676 id%LL0_OMP_FACTORS = keep(400)
2677 ELSE
2678 id%LL0_OMP_FACTORS = 1
2679 ENDIF
2680 ALLOCATE(id%L0_OMP_FACTORS(id%LL0_OMP_FACTORS),stat = allocok)
2681 IF (allocok > 0) THEN
2682 id%INFO(1)=-7
2683 id%INFO(2)=nb_threads
2684 GOTO 111
2685 ENDIF
2686 CALL smumps_init_l0_omp_factors(id%L0_OMP_FACTORS)
2687 ENDIF
2688 115 CONTINUE
2689 CALL mumps_propinfo( icntl(1), id%INFO(1),
2690 & id%COMM, id%MYID )
2691 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2692C Compute DKEEP(17)
2693 avg_flops = rinfog(1)/(real(id%NSLAVES))
2694 id%DKEEP(17) = max( id%DKEEP(18), avg_flops/real(50) )
2695 &
2696 IF (prok.AND.id%MYID.EQ.master) THEN
2697 IF (id%NSLAVES.LE.1) THEN
2698 WRITE(mp,'(/A,A,1PD10.3)')
2699 &' Start factorization with total',
2700 &' estimated flops (RINFOG(1)) = ',
2701 & rinfog(1)
2702 ELSE
2703 WRITE(mp,'(/A,A,1PD10.3,A,1PD10.3)')
2704 &' Start factorization with total',
2705 &' estimated flops RINFOG(1) / Average per MPI proc = ',
2706 & rinfog(1), ' / ', avg_flops
2707 ENDIF
2708 ENDIF
2709 IF (i_am_slave) THEN
2710C IS/S pointers passed to SMUMPS_FAC_B with
2711C implicit interface through intermediate
2712C structure S_IS_POINTERS. IS will be allocated
2713C during SMUMPS_FAC_B.
2714C In case of L0OMP, id%IS and id%S are allocated during
2715C SMUMPS_FAC_B, and only after L0OMP nodes are processed,
2716C in order to limit the global memory peak.
2717 s_is_pointers%IW => id%IS; NULLIFY(id%IS)
2718 s_is_pointers%A => id%S ; NULLIFY(id%S)
2719 CALL smumps_fac_b(id%N,s_is_pointers,maxs,maxis,id%SYM_PERM(1),
2720 & id%NA(1),id%LNA,id%NE_STEPS(1),id%ND_STEPS(1), id%FILS(1),
2721 & id%STEP(1),id%FRERE_STEPS(1),id%DAD_STEPS(1),id%CANDIDATES(1,1),
2722 & id%ISTEP_TO_INIV2(1),id%TAB_POS_IN_PERE(1,1), id%PTRAR(1),
2723 & ldptrar,iwk(ptrist),id%PTLUST_S(1),id%PTRFAC(1),iwk(ptrwb),iwk8,
2724 & iwk(itloc),rhs_mumps(1),iwk(ipool),lpool,cntl1,icntl(1),
2725 & id%INFO(1), rinfo(1),keep(1),id%KEEP8(1),id%PROCNODE_STEPS(1),
2726 & id%NSLAVES,id%COMM_NODES,id%MYID,id%MYID_NODES,bufr,smumps_lbufr
2727 & , smumps_lbufr_bytes, smumps_lbuf, id%INTARR(1),id%DBLARR(1),
2728 & id%root, nelt_arg, id%FRTPTR(1), id%FRTELT(1),id%COMM_LOAD,
2729 & id%ASS_IRECV, seuil, seuil_ldlt_niv2, id%MEM_DIST(0),
2730 & id%DKEEP(1), id%PIVNUL_LIST(1), lpn_list, id%LRGROUPS(1)
2731 & ,id%IPOOL_B_L0_OMP(1),id%LPOOL_B_L0_OMP,
2732 & id%IPOOL_A_L0_OMP(1),id%LPOOL_A_L0_OMP,id%L_VIRT_L0_OMP,
2733 & id%VIRT_L0_OMP(1), id%VIRT_L0_OMP_MAPPING(1),id%L_PHYS_L0_OMP,
2734 & id%PHYS_L0_OMP(1), id%PERM_L0_OMP(1), id%PTR_LEAFS_L0_OMP(1),
2735 & id%L0_OMP_MAPPING(1),id%LL0_OMP_MAPPING,
2736 & id%THREAD_LA, id%L0_OMP_FACTORS(1), id%LL0_OMP_FACTORS,
2737 & id%I4_L0_OMP(1,1), size(id%I4_L0_OMP,1), size(id%I4_L0_OMP,2),
2738 & id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1), size(id%I8_L0_OMP,2)
2739 & )
2740 id%IS => s_is_pointers%IW; NULLIFY(s_is_pointers%IW)
2741 id%S => s_is_pointers%A ; NULLIFY(s_is_pointers%A)
2742C
2743C ------------------------------
2744C Deallocate temporary workspace
2745C ------------------------------
2746 DEALLOCATE( iwk )
2747 DEALLOCATE( iwk8 )
2748 ENDIF
2749C ---------------------------------
2750C Free some workspace corresponding
2751C to the original matrix in
2752C arrowhead or elemental format.
2753C -----
2754C Note : INTARR was not allocated
2755C during factorization in the case
2756C of an assembled matrix.
2757C ---------------------------------
2758 IF ( keep(55) .eq. 0 ) THEN
2759C
2760C ----------------
2761C Assembled matrix
2762C ----------------
2763 IF (associated( id%DBLARR)) THEN
2764 DEALLOCATE(id%DBLARR)
2765 NULLIFY(id%DBLARR)
2766 ENDIF
2767C
2768 ELSE
2769C
2770C ----------------
2771C Elemental matrix
2772C ----------------
2773 IF (associated(id%INTARR)) THEN
2774 DEALLOCATE( id%INTARR)
2775 NULLIFY( id%INTARR )
2776 ENDIF
2777C ------------------------------------
2778C For the master from an hybrid host
2779C execution without scaling, then real
2780C values have not been copied !
2781C -------------------------------------
2782 IF ( id%MYID_NODES .eq. master
2783 & .AND. keep(46) .eq. 1
2784 & .AND. keep(52) .eq. 0 ) THEN
2785 NULLIFY( id%DBLARR )
2786 ELSE
2787 IF (associated( id%DBLARR)) THEN
2788 DEALLOCATE(id%DBLARR)
2789 NULLIFY(id%DBLARR)
2790 ENDIF
2791 END IF
2792 END IF
2793C Memroy statistics
2794C -----------------------------------
2795C If QR (Keep(19)) is not zero, and if
2796C the host does not have the information
2797C (ie is not slave), send information
2798C computed on the slaves during facto
2799C to the host.
2800C -----------------------------------
2801 IF ( keep(19) .NE. 0 ) THEN
2802 IF ( keep(46) .NE. 1 ) THEN
2803C Host was not working during facto_root
2804C Send him the information
2805 IF ( id%MYID .eq. master ) THEN
2806 CALL mpi_recv( keep(17), 1, mpi_integer, 1, defic_tag,
2807 & id%COMM, status, ierr )
2808 CALL mpi_recv( keep(143), 1, mpi_integer, 1, defic_tag,
2809 & id%COMM, status, ierr )
2810 ELSE IF ( id%MYID .EQ. 1 ) THEN
2811 CALL mpi_send( keep(17), 1, mpi_integer, 0, defic_tag,
2812 & id%COMM, ierr )
2813 CALL mpi_send( keep(143), 1, mpi_integer, 0, defic_tag,
2814 & id%COMM, ierr )
2815 END IF
2816 END IF
2817 END IF
2818C --------------------------------
2819C Deallocate communication buffers
2820C They will be reallocated
2821C in the solve.
2822C --------------------------------
2823 IF (allocated(bufr)) DEALLOCATE(bufr)
2824 CALL smumps_buf_deall_small_buf( ierr )
2825C//PIV
2826 IF (keep(219).NE.0) THEN
2828 ENDIF
2829C
2830C Check for errors.
2831C After SMUMPS_FAC_B every slave is aware of an error.
2832C If master is included in computations, the call below should
2833C not be necessary.
2834 CALL mumps_propinfo( icntl(1), id%INFO(1),
2835 & id%COMM, id%MYID )
2836C
2838 IF (keep(201) .GT. 0) THEN
2839 IF ((keep(201).EQ.1) .OR. (keep(201).EQ.2)) THEN
2840 IF ( i_am_slave ) THEN
2841 CALL smumps_ooc_clean_pending(ierr)
2842 IF(ierr.LT.0)THEN
2843 id%INFO(1)=ierr
2844 id%INFO(2)=0
2845 ENDIF
2846 ENDIF
2847 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
2848 & id%COMM, id%MYID )
2849C We want to collect statistics even in case of
2850C error to understand if it is due to numerical
2851C issues
2852CC IF ( id%INFO(1) < 0 ) GOTO 500
2853 END IF
2854 END IF
2855 IF (id%MYID.EQ.master) THEN
2856 CALL mumps_secfin(time)
2857 id%DKEEP(94)=real(time)
2858 IF (keep(400).GT.0) THEN
2859C Facto time above L0_OMP = total time - facto time under L0_OMP
2860 id%DKEEP(96)=id%DKEEP(94)-id%DKEEP(95)
2861 ENDIF
2862 ENDIF
2863C =====================================================================
2864C COMPUTE MEMORY ALLOCATED BY MUMPS, INFO(16)
2865C ---------------------------------------------
2866 mem_eff_allocated = .true.
2867 CALL smumps_max_mem( id%KEEP(1),id%KEEP8(1),
2868 & id%MYID, n, id%NELT, id%NA(1), id%LNA, id%KEEP8(28),
2869 & id%KEEP8(30),
2870 & id%NSLAVES, total_mbytes, .true., id%KEEP(201),
2871 & blr_strat, .true., total_bytes,
2872 & idummy, bdummy, mem_eff_allocated
2873 & , .false. ! UNDER_L0_OMP
2874 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
2875 & size(id%I8_L0_OMP,2)
2876 & )
2877 IF (keep(400) .GT. 0 ) THEN ! L0 activated
2878 CALL smumps_max_mem( id%KEEP(1),id%KEEP8(1),
2879 & id%MYID, n, id%NELT, id%NA(1), id%LNA, id%KEEP8(28),
2880 & id%KEEP8(30),
2881 & id%NSLAVES, total_mbytes_under_l0, .true., id%KEEP(201),
2882 & blr_strat, .true., total_bytes_under_l0,
2883 & idummy, bdummy, mem_eff_allocated
2884 & , .true. ! UNDER_L0_OMP
2885 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
2886 & size(id%I8_L0_OMP,2)
2887 & )
2888 total_mbytes = max(total_mbytes,total_mbytes_under_l0)
2889 total_bytes = max(total_bytes, total_bytes_under_l0)
2890 ENDIF
2891 IF (id%KEEP8(24).NE.0) THEN
2892C WK_USER is not part of memory allocated by MUMPS
2893C and is not counted, id%KEEP8(23) should be zero
2894 id%INFO(16) = total_mbytes
2895 ELSE
2896C Note that even for the case of ICNTL(23)>0
2897C we report here the memory effectively allocated
2898C that can be smaller than ICNTL(23) !
2899 id%INFO(16) = total_mbytes
2900 ENDIF
2901C ----------------------------------------------------
2902C Centralize memory statistics on the host
2903C id%INFOG(18) = size of mem in Mbytes for facto,
2904C for the processor using largest memory
2905C id%INFOG(19) = size of mem in Mbytes for facto,
2906C sum over all processors
2907C ----------------------------------------------------
2908 CALL mumps_mem_centralize( id%MYID, id%COMM,
2909 & id%INFO(16), id%INFOG(18), irank )
2910 CALL smumps_print_allocated_mem( prok, prokg, print_maxavg,
2911 & mp, mpg, id%INFO(16), id%INFOG(18), id%INFOG(19),
2912 & id%NSLAVES, irank,
2913 & id%KEEP(1) )
2914C If WK_USER is provided, this excludes WK_USER
2915 IF (prok ) THEN
2916 WRITE(mp,'(A,I12) ')
2917 & ' ** Eff. min. Space MBYTES for facto (INFO(16)):',
2918 & total_mbytes
2919 ENDIF
2920C ========================(INFO(16) RELATED)======================
2921C ---------------------------------------
2922C COMPUTE EFFECTIVE MEMORY USED INFO(22)
2923C ---------------------------------------
2924 perlu_on = .true.
2925 mem_eff_allocated = .false.
2926 CALL smumps_max_mem( id%KEEP(1),id%KEEP8(1),
2927 & id%MYID, n, id%NELT, id%NA(1), id%LNA, id%KEEP8(28),
2928 & id%KEEP8(30),
2929 & id%NSLAVES, total_mbytes, .true., id%KEEP(201),
2930 & blr_strat, perlu_on, total_bytes,
2931 & idummy, bdummy, mem_eff_allocated
2932 & , .false. ! UNDER_L0_OMP
2933 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
2934 & size(id%I8_L0_OMP,2)
2935 & )
2936 IF (keep(400) .GT. 0 ) THEN ! L0 activated
2937 CALL smumps_max_mem( id%KEEP(1),id%KEEP8(1),
2938 & id%MYID, n, id%NELT, id%NA(1), id%LNA, id%KEEP8(28),
2939 & id%KEEP8(30),
2940 & id%NSLAVES, total_mbytes_under_l0, .true., id%KEEP(201),
2941 & blr_strat, perlu_on, total_bytes_under_l0,
2942 & idummy, bdummy, mem_eff_allocated
2943 & , .true. ! UNDER_L0_OMP
2944 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
2945 & size(id%I8_L0_OMP,2)
2946 & )
2947 total_mbytes = max(total_mbytes,total_mbytes_under_l0)
2948 total_bytes = max(total_bytes, total_bytes_under_l0)
2949 ENDIF
2950C -- TOTAL_BYTES and TOTAL_MBYTES includes both static
2951C -- (MAXS) and BLR structures computed as the SUM of the PEAKS
2952C -- (KEEP8(67) + KEEP8(70))
2953 id%KEEP8(7) = total_bytes
2954C -- INFO(22) holds the effective space (in Mbytes) used by MUMPS
2955C -- (it includes part of WK_USER used if provided by user)
2956 id%INFO(22) = total_mbytes
2957C ----------------------------------------------------
2958C Centralize memory statistics on the host
2959C INFOG(21) = size of effective mem (Mbytes) for facto,
2960C for the processor using largest memory
2961C INFOG(22) = size of effective mem (Mbytes) for facto,
2962C sum over all processors
2963C ----------------------------------------------------
2964 CALL mumps_mem_centralize( id%MYID, id%COMM,
2965 & id%INFO(22), id%INFOG(21), irank )
2966 IF ( prokg ) THEN
2967 IF (print_maxavg) THEN
2968 WRITE( mpg,'(A,I12) ')
2969 & ' ** Memory effectively used, max in Mbytes (INFOG(21)):',
2970 & id%INFOG(21)
2971 ENDIF
2972 WRITE( mpg,'(A,I12) ')
2973 & ' ** Memory effectively used, total in Mbytes (INFOG(22)):',
2974 & id%INFOG(22)
2975 END IF
2976 sum_info22_this_node=0
2977 CALL mpi_reduce( id%INFO(22), sum_info22_this_node, 1,
2978 & mpi_integer,
2979 & mpi_sum, 0, id%KEEP(411), ierr )
2980 CALL mpi_reduce( sum_info22_this_node, max_sum_info22_this_node,
2981 & 1, mpi_integer, mpi_max, 0, id%COMM, ierr )
2982 IF (prokg .AND. print_nodeinfo) THEN
2983 WRITE(mpg,'(A,I12)')
2984 & ' ** Max. effective space per compute node, in MBytes :',
2985 & max_sum_info22_this_node
2986 ENDIF
2987C
2988 IF (i_am_slave) THEN
2989 k67 = id%KEEP8(67)
2990 k68 = id%KEEP8(68)
2991 k70 = id%KEEP8(70)
2992 k74 = id%KEEP8(74)
2993 k75 = id%KEEP8(75)
2994 ELSE
2995 k67 = 0_8
2996 k68 = 0_8
2997 k70 = 0_8
2998 k74 = 0_8
2999 k75 = 0_8
3000 ENDIF
3001C -- Save the number of entries effectively used
3002C in main working array S
3003 CALL mumps_seti8toi4(k67,id%INFO(21))
3004C
3005C
3006 IF (keep(400) .GT.0 ) THEN
3007 IF (.NOT. i_am_slave) THEN
3008 id%DKEEP(95) = 0.0e0
3009 id%DKEEP(16) = 0.0e0
3010 ENDIF
3011 IF (id%NPROCS .GT. 1) THEN
3012C Compute average and max (across MPI's)
3013 CALL mpi_reduce(id%DKEEP(95), tmptime, 1,
3014 & mpi_real, mpi_sum, master, id%COMM, ierr)
3015 IF (id%MYID.EQ.master) timeavg=dble(tmptime)
3016 CALL mpi_reduce(id%DKEEP(16), tmpflop, 1,
3017 & mpi_real, mpi_sum, master, id%COMM, ierr)
3018 IF (id%MYID.EQ.master) flopavg=dble(tmpflop)
3019 IF (id%MYID.EQ.master) THEN
3020 timeavg = timeavg / id%NSLAVES
3021 flopavg = flopavg / id%NSLAVES
3022 ENDIF
3023 CALL mpi_reduce(id%DKEEP(95), tmptime, 1,
3024 & mpi_real, mpi_max, master, id%COMM, ierr)
3025 IF (id%MYID.EQ.master) timemax=dble(tmptime)
3026 CALL mpi_reduce(id%DKEEP(16), tmpflop, 1,
3027 & mpi_real, mpi_max, master, id%COMM, ierr)
3028 IF (id%MYID.EQ.master) flopmax=dble(tmpflop)
3029C (PROKG may only be true on master)
3030 IF ( prokg ) THEN
3031 WRITE(mpg,190) flopavg, flopmax
3032 WRITE(mpg,188) timeavg, timemax
3033 ENDIF
3034 ELSE
3035C Print DKEEP(95) directly without reduction
3036 IF ( prokg ) THEN
3037 WRITE(mpg,189) id%DKEEP(16)
3038 WRITE(mpg,187) id%DKEEP(95)
3039 ENDIF
3040 ENDIF
3041 ENDIF
3042 IF ( prokg ) THEN
3043 IF (id%INFO(1) .GE.0) THEN
3044 WRITE(mpg,180) id%DKEEP(94)
3045 ELSE
3046 WRITE(mpg,185) id%DKEEP(94)
3047 ENDIF
3048 ENDIF
3049C
3050C Sum RINFO(2) : total number of flops for assemblies
3051C Sum RINFO(3) : total number of flops for eliminations
3052C Initialize RINFO(4) in case BLR was not activated
3053 rinfo(4) = rinfo(3)
3054C
3055C Should work even if the master does some work
3056C
3057 CALL mpi_reduce( rinfo(2), rinfog(2), 2,
3058 & mpi_real,
3059 & mpi_sum, master, id%COMM, ierr)
3060C Reduce needed to dimension small working array
3061C on all procs during SMUMPS_GATHER_SOLUTION
3062 keep(247) = 0
3063 CALL mpi_reduce( keep(246), keep(247), 1, mpi_integer,
3064 & mpi_max, master, id%COMM, ierr)
3065C
3066C Reduce compression times: get max compression times
3067 CALL mpi_reduce( id%DKEEP(97), id%DKEEP(98), 1,
3068 & mpi_real,
3069 & mpi_max, master, id%COMM, ierr)
3070C
3071 CALL mpi_reduce( rinfo(2), rinfog(2), 2,
3072 & mpi_real,
3073 & mpi_sum, master, id%COMM, ierr)
3074 CALL mumps_reducei8( id%KEEP8(31)+id%KEEP8(64),id%KEEP8(6),
3075 & mpi_sum, master, id%COMM )
3076C
3077 IF (id%MYID.EQ.0) THEN
3078C In MegaBytes
3079 rinfog(16) = real(id%KEEP8(6)*int(keep(35),8))/real(1e6)
3080 IF (keep(201).LE.0) THEN
3081 rinfog(16) = zero
3082 ENDIF
3083 ENDIF
3084 CALL mumps_reducei8( id%KEEP8(48),id%KEEP8(148), mpi_sum,
3085 & master, id%COMM )
3086 CALL mumps_seti8toi4(id%KEEP8(148), infog(9))
3087C
3088 CALL mpi_reduce( int(id%INFO(10),8), id%KEEP8(128),
3089 & 1, mpi_integer8,
3090 & mpi_sum, master, id%COMM, ierr)
3091 IF (id%MYID.EQ.master) THEN
3092 CALL mumps_seti8toi4(id%KEEP8(128), id%INFOG(10))
3093 ENDIF
3094C Use MPI_MAX for this one to get largest front size
3095 CALL mpi_allreduce( id%INFO(11), infog(11), 1, mpi_integer,
3096 & mpi_max, id%COMM, ierr)
3097C make maximum effective frontal size available on all procs
3098C for solve phase
3099C (Note that INFO(11) includes root size on root master)
3100 keep(133) = infog(11)
3101 CALL mpi_reduce( id%INFO(12), infog(12), 3, mpi_integer,
3102 & mpi_sum, master, id%COMM, ierr)
3103 CALL mpi_reduce( keep(103), infog(25), 1, mpi_integer,
3104 & mpi_sum, master, id%COMM, ierr)
3105 keep(229) = infog(25)
3106 CALL mpi_reduce( keep(105), infog(25), 1, mpi_integer,
3107 & mpi_sum, master, id%COMM, ierr)
3108 keep(230) = infog(25)
3109C
3110 id%INFO(25) = keep(98)
3111 CALL mpi_allreduce( id%INFO(25), infog(25), 1, mpi_integer,
3112 & mpi_sum, id%COMM, ierr)
3113C Extra copies due to in-place stacking
3114 CALL mumps_reducei8( id%KEEP8(8), id%KEEP8(108), mpi_sum,
3115 & master, id%COMM )
3116C Entries in factors
3117 CALL mumps_seti8toi4(id%KEEP8(10), id%INFO(27))
3118 CALL mumps_reducei8( id%KEEP8(10),id%KEEP8(110), mpi_sum,
3119 & master, id%COMM )
3120 CALL mumps_seti8toi4(id%KEEP8(110), infog(29))
3121C Initialize INFO(28)/INFOG(35) in case BLR not activated
3122 id%INFO(28) = id%INFO(27)
3123 infog(35) = infog(29)
3124C ==============================
3125C LOW-RANK
3126C ==============================
3127 IF ( keep(486) .NE. 0 ) THEN !LR is activated
3128C Compute and Save local amount of flops in case of BLR
3129 rinfo(4) = real(flop_frfronts + flop_facto_fr - flop_lrgain
3131C
3132C Compute and Save local number of entries in compressed factors
3133C
3134 itmp8 = id%KEEP8(10) - int(mry_lu_lrgain,8)
3135 CALL mumps_seti8toi4( itmp8, id%INFO(28))
3136C
3137 CALL mpi_reduce( mry_lu_lrgain, tmp_mry_lu_lrgain
3138 & , 1, mpi_double_precision,
3139 & mpi_sum, master, id%COMM, ierr)
3140 CALL mpi_reduce( mry_lu_fr, tmp_mry_lu_fr
3141 & , 1, mpi_double_precision,
3142 & mpi_sum, master, id%COMM, ierr)
3143 CALL mpi_reduce( mry_cb_fr, tmp_mry_cb_fr
3144 & , 1, mpi_double_precision,
3145 & mpi_sum, master, id%COMM, ierr)
3146 CALL mpi_reduce( mry_cb_lrgain, tmp_mry_cb_lrgain
3147 & , 1, mpi_double_precision,
3148 & mpi_sum, master, id%COMM, ierr)
3149 CALL mpi_reduce( flop_lrgain, tmp_flop_lrgain
3150 & , 1, mpi_double_precision,
3151 & mpi_sum, master, id%COMM, ierr)
3152 CALL mpi_reduce( flop_trsm_fr, tmp_flop_trsm_fr
3153 & , 1, mpi_double_precision,
3154 & mpi_sum, master, id%COMM, ierr)
3155 CALL mpi_reduce( flop_trsm_lr, tmp_flop_trsm_lr
3156 & , 1, mpi_double_precision,
3157 & mpi_sum, master, id%COMM, ierr)
3158 CALL mpi_reduce( flop_update_fr, tmp_flop_update_fr
3159 & , 1, mpi_double_precision,
3160 & mpi_sum, master, id%COMM, ierr)
3161 CALL mpi_reduce( flop_update_lr, tmp_flop_update_lr
3162 & , 1, mpi_double_precision,
3163 & mpi_sum, master, id%COMM, ierr)
3165 & tmp_flop_frswap_compress
3166 & , 1, mpi_double_precision,
3167 & mpi_sum, master, id%COMM, ierr)
3169 & tmp_flop_midblk_compress
3170 & , 1, mpi_double_precision,
3171 & mpi_sum, master, id%COMM, ierr)
3172 CALL mpi_reduce( flop_update_lrlr3, tmp_flop_update_lrlr3
3173 & , 1, mpi_double_precision,
3174 & mpi_sum, master, id%COMM, ierr)
3175 CALL mpi_reduce(flop_accum_compress, tmp_flop_accum_compress
3176 & , 1, mpi_double_precision,
3177 & mpi_sum, master, id%COMM, ierr)
3178 CALL mpi_reduce( flop_trsm, tmp_flop_trsm
3179 & , 1, mpi_double_precision,
3180 & mpi_sum, master, id%COMM, ierr)
3181 CALL mpi_reduce( flop_panel, tmp_flop_panel
3182 & , 1, mpi_double_precision,
3183 & mpi_sum, master, id%COMM, ierr)
3184 CALL mpi_reduce( flop_frfronts, tmp_flop_frfronts
3185 & , 1, mpi_double_precision,
3186 & mpi_sum, master, id%COMM, ierr)
3187 CALL mpi_reduce( flop_compress, tmp_flop_compress
3188 & , 1, mpi_double_precision,
3189 & mpi_sum, master, id%COMM, ierr)
3190 CALL mpi_reduce( flop_decompress, tmp_flop_decompress
3191 & , 1, mpi_double_precision,
3192 & mpi_sum, master, id%COMM, ierr)
3193 CALL mpi_reduce( flop_cb_compress, tmp_flop_cb_compress
3194 & , 1, mpi_double_precision,
3195 & mpi_sum, master, id%COMM, ierr)
3196 CALL mpi_reduce( flop_cb_decompress,tmp_flop_cb_decompress
3197 & , 1, mpi_double_precision,
3198 & mpi_sum, master, id%COMM, ierr)
3199 CALL mpi_reduce( flop_facto_fr, tmp_flop_facto_fr
3200 & , 1, mpi_double_precision,
3201 & mpi_sum, master, id%COMM, ierr)
3202 CALL mpi_reduce( cnt_nodes,tmp_cnt_nodes
3203 & , 1, mpi_integer,
3204 & mpi_sum, master, id%COMM, ierr)
3205 IF (id%NPROCS.GT.1) THEN
3209 & , 1, mpi_double_precision,
3210 & mpi_sum, master, id%COMM, ierr)
3211 IF (id%MYID.EQ.master) THEN
3213 ENDIF
3215 & , 1, mpi_double_precision,
3216 & mpi_min, master, id%COMM, ierr)
3218 & , 1, mpi_double_precision,
3219 & mpi_max, master, id%COMM, ierr)
3220 ENDIF ! NPROCS > 1
3221 CALL mpi_reduce( time_update, tmp_time_update
3222 & , 1, mpi_double_precision,
3223 & mpi_sum, master, id%COMM, ierr)
3224 CALL mpi_reduce( time_update_lrlr1, tmp_time_update_lrlr1
3225 & , 1, mpi_double_precision,
3226 & mpi_sum, master, id%COMM, ierr)
3227 CALL mpi_reduce( time_update_lrlr2, tmp_time_update_lrlr2
3228 & , 1, mpi_double_precision,
3229 & mpi_sum, master, id%COMM, ierr)
3230 CALL mpi_reduce( time_update_lrlr3, tmp_time_update_lrlr3
3231 & , 1, mpi_double_precision,
3232 & mpi_sum, master, id%COMM, ierr)
3233 CALL mpi_reduce( time_update_frlr, tmp_time_update_frlr
3234 & , 1, mpi_double_precision,
3235 & mpi_sum, master, id%COMM, ierr)
3236 CALL mpi_reduce( time_update_frfr, tmp_time_update_frfr
3237 & , 1, mpi_double_precision,
3238 & mpi_sum, master, id%COMM, ierr)
3239 CALL mpi_reduce( time_diagcopy, tmp_time_diagcopy
3240 & , 1, mpi_double_precision,
3241 & mpi_sum, master, id%COMM, ierr)
3242 CALL mpi_reduce( time_compress,tmp_time_compress
3243 & , 1, mpi_double_precision,
3244 & mpi_sum, master, id%COMM, ierr)
3246 & tmp_time_midblk_compress
3247 & , 1, mpi_double_precision,
3248 & mpi_sum, master, id%COMM, ierr)
3250 & tmp_time_frswap_compress
3251 & , 1, mpi_double_precision,
3252 & mpi_sum, master, id%COMM, ierr)
3253 CALL mpi_reduce( time_cb_compress, tmp_time_cb_compress
3254 & , 1, mpi_double_precision,
3255 & mpi_sum, master, id%COMM, ierr)
3256 CALL mpi_reduce( time_decomp, tmp_time_decomp
3257 & , 1, mpi_double_precision,
3258 & mpi_sum, master, id%COMM, ierr)
3259 CALL mpi_reduce( time_decomp_ucfs, tmp_time_decomp_ucfs
3260 & , 1, mpi_double_precision,
3261 & mpi_sum, master, id%COMM, ierr)
3262 CALL mpi_reduce( time_decomp_asm1, tmp_time_decomp_asm1
3263 & , 1, mpi_double_precision,
3264 & mpi_sum, master, id%COMM, ierr)
3265 CALL mpi_reduce(time_decomp_locasm2, tmp_time_decomp_locasm2
3266 & , 1, mpi_double_precision,
3267 & mpi_sum, master, id%COMM, ierr)
3268 CALL mpi_reduce(time_decomp_maplig1, tmp_time_decomp_maplig1
3269 & , 1, mpi_double_precision,
3270 & mpi_sum, master, id%COMM, ierr)
3271 CALL mpi_reduce( time_decomp_asms2s, tmp_time_decomp_asms2s
3272 & , 1, mpi_double_precision,
3273 & mpi_sum, master, id%COMM, ierr)
3274 CALL mpi_reduce( time_decomp_asms2m, tmp_time_decomp_asms2m
3275 & , 1, mpi_double_precision,
3276 & mpi_sum, master, id%COMM, ierr)
3277 CALL mpi_reduce( time_panel, tmp_time_panel
3278 & , 1, mpi_double_precision,
3279 & mpi_sum, master, id%COMM, ierr)
3280 CALL mpi_reduce( time_fac_i, tmp_time_fac_i
3281 & , 1, mpi_double_precision,
3282 & mpi_sum, master, id%COMM, ierr)
3283 CALL mpi_reduce( time_fac_mq, tmp_time_fac_mq
3284 & , 1, mpi_double_precision,
3285 & mpi_sum, master, id%COMM, ierr)
3286 CALL mpi_reduce( time_fac_sq, tmp_time_fac_sq
3287 & , 1, mpi_double_precision,
3288 & mpi_sum, master, id%COMM, ierr)
3289 CALL mpi_reduce( time_lrtrsm, tmp_time_lrtrsm
3290 & , 1, mpi_double_precision,
3291 & mpi_sum, master, id%COMM, ierr)
3292 CALL mpi_reduce( time_frtrsm, tmp_time_frtrsm
3293 & , 1, mpi_double_precision,
3294 & mpi_sum, master, id%COMM, ierr)
3295 CALL mpi_reduce( time_frfronts, tmp_time_frfronts
3296 & , 1, mpi_double_precision,
3297 & mpi_sum, master, id%COMM, ierr)
3298 CALL mpi_reduce( time_lr_module, tmp_time_lr_module
3299 & , 1, mpi_double_precision,
3300 & mpi_sum, master, id%COMM, ierr)
3301 IF (id%MYID.EQ.master) THEN
3302 IF (id%NPROCS.GT.1) THEN
3303C rename the stat variable so that COMPUTE_GLOBAL_GAINS can work for any
3304C number of procs
3305 mry_lu_fr = tmp_mry_lu_fr
3306 mry_lu_lrgain = tmp_mry_lu_lrgain
3307 mry_cb_fr = tmp_mry_cb_fr
3308 mry_cb_lrgain = tmp_mry_cb_lrgain
3309 flop_lrgain = tmp_flop_lrgain
3310 flop_panel = tmp_flop_panel
3311 flop_trsm = tmp_flop_trsm
3312 flop_trsm_fr = tmp_flop_trsm_fr
3313 flop_trsm_lr = tmp_flop_trsm_lr
3314 flop_update_fr = tmp_flop_update_fr
3315 flop_update_lr = tmp_flop_update_lr
3316 flop_update_lrlr3 = tmp_flop_update_lrlr3
3317 flop_compress = tmp_flop_compress
3318 flop_midblk_compress = tmp_flop_midblk_compress
3319 flop_frswap_compress = tmp_flop_frswap_compress
3320 flop_accum_compress = tmp_flop_accum_compress
3321 flop_cb_compress = tmp_flop_cb_compress
3322 flop_decompress = tmp_flop_decompress
3323 flop_cb_decompress = tmp_flop_cb_decompress
3324 flop_frfronts = tmp_flop_frfronts
3325 flop_facto_fr = tmp_flop_facto_fr
3326 cnt_nodes = tmp_cnt_nodes
3327 time_update = tmp_time_update /id%NPROCS
3328 time_update_lrlr1 = tmp_time_update_lrlr1 /id%NPROCS
3329 time_update_lrlr2 = tmp_time_update_lrlr2 /id%NPROCS
3330 time_update_lrlr3 = tmp_time_update_lrlr3 /id%NPROCS
3331 time_update_frlr = tmp_time_update_frlr /id%NPROCS
3332 time_update_frfr = tmp_time_update_frfr /id%NPROCS
3333 time_compress = tmp_time_compress /id%NPROCS
3334 time_midblk_compress = tmp_time_midblk_compress/id%NPROCS
3335 time_frswap_compress = tmp_time_frswap_compress/id%NPROCS
3336 time_diagcopy = tmp_time_diagcopy /id%NPROCS
3337 time_cb_compress = tmp_time_cb_compress /id%NPROCS
3338 time_panel = tmp_time_panel /id%NPROCS
3339 time_fac_i = tmp_time_fac_i /id%NPROCS
3340 time_fac_mq = tmp_time_fac_mq /id%NPROCS
3341 time_fac_sq = tmp_time_fac_sq /id%NPROCS
3342 time_lrtrsm = tmp_time_lrtrsm /id%NPROCS
3343 time_frtrsm = tmp_time_frtrsm /id%NPROCS
3344 time_frfronts = tmp_time_frfronts /id%NPROCS
3345 time_lr_module = tmp_time_lr_module /id%NPROCS
3346 time_decomp = tmp_time_decomp /id%NPROCS
3347 time_decomp_ucfs = tmp_time_decomp_ucfs /id%NPROCS
3348 time_decomp_asm1 = tmp_time_decomp_asm1 /id%NPROCS
3349 time_decomp_locasm2 = tmp_time_decomp_locasm2 /id%NPROCS
3350 time_decomp_maplig1 = tmp_time_decomp_maplig1 /id%NPROCS
3351 time_decomp_asms2s = tmp_time_decomp_asms2s /id%NPROCS
3352 time_decomp_asms2m = tmp_time_decomp_asms2m /id%NPROCS
3353 ENDIF
3354 CALL compute_global_gains(id%KEEP8(110),id%RINFOG(3),
3355 & id%KEEP8(49), prokg, mpg)
3356C Number of entries in factor INFOG(35) in
3357C compressed form is updated as long as
3358C BLR is activated, this independently of the
3359C fact that factors are saved in LR.
3360 CALL mumps_seti8toi4(id%KEEP8(49), id%INFOG(35))
3361 frontwise = 0
3362C WRITE gains also compute stats stored in DKEEP array
3363 IF (lpok) THEN
3364 IF (cntl(7) < 0.0e0) THEN
3365C Warning : using negative values is an experimental and
3366C non recommended setting.
3367 WRITE(lp,'(/A/,A/,A/,A,A)')
3368 & ' WARNING in BLR input setting',
3369 & ' CNTL(7) < 0 is experimental: ',
3370 & ' RRQR precision = |CNTL(7| x ||A_pre||, ',
3371 & ' where A_pre is the preprocessed matrix as defined',
3372 & ' in the Users guide '
3373 ENDIF
3374 ENDIF
3375 CALL saveandwrite_gains(frontwise,
3376 & keep(489), id%DKEEP, n, id%ICNTL(36),
3377 & keep(487), keep(488), keep(490),
3378 & keep(491), keep(50), keep(486),
3379 & keep(472), keep(475), keep(478), keep(480),
3380 & keep(481),
3381 & keep(483), keep(484),
3382 & id%KEEP8(110), id%KEEP8(49),
3383 & keep(28), id%NPROCS, mpg, prokg)
3384C flops when BLR activated
3385 rinfog(14) = id%DKEEP(56)
3386 ELSE
3387 rinfog(14) = 0.0e00
3388 ENDIF
3389 ENDIF
3390C ==============================
3391C NULL PIVOTS AND RANK-REVEALING
3392C ==============================
3393 IF(keep(110) .EQ. 1) THEN
3394C -- make available to users the local number of null pivots detected
3395C -- with ICNTL(24) = 1.
3396 id%INFO(18) = keep(109)
3397 CALL mpi_allreduce( keep(109), keep(112), 1, mpi_integer,
3398 & mpi_sum, id%COMM, ierr)
3399 ELSE
3400 id%INFO(18) = 0
3401 keep(109) = 0
3402 keep(112) = 0
3403 ENDIF
3404 IF (id%MYID.EQ.master) THEN
3405C INFOG(28) deficiency resulting from ICNTL(24) and ICNTL(56).
3406 infog(28)=keep(112)+keep(17)
3407 ENDIF
3408C ========================================
3409C We now provide to the host the part of
3410C PIVNUL_LIST resulting from the processing
3411C of the root node and we update id%INFO(18)
3412C on the processor holding the root to
3413C include null pivots relative to the root
3414C ========================================
3415 IF (keep(17) .NE. 0) THEN
3416 IF (id%MYID .EQ. id_root) THEN
3417C Include in id%INFO(18) null pivots resulting
3418C from deficiency on the root. In this way,
3419C the sum of all id%INFO(18) is equal to INFOG(28).
3420 id%INFO(18)=id%INFO(18)+keep(17)
3421 ENDIF
3422 IF (id_root .EQ. master) THEN
3423 IF (id%MYID.EQ.master) THEN
3424C --------------------------------------------------
3425C Null pivots of root have been stored in
3426C PIVNUL_LIST(KEEP(109)+1:KEEP(109)+KEEP(17).
3427C Shift them at the end of the list because:
3428C * this is what we need to build the null space
3429C * we would otherwise overwrite them on the host
3430C when gathering null pivots from other processors
3431C --------------------------------------------------
3432 DO i= keep(17), 1, -1
3433c DO I=1, KEEP(17) % incorrect
3434C when KEEP(112) < KEEP(109)+ KEEP(17)
3435 id%PIVNUL_LIST(keep(112)+i)=id%PIVNUL_LIST(keep(109)+i)
3436 ENDDO
3437 ENDIF
3438 ELSE
3439C ---------------------------------
3440C Null pivots of root must be sent
3441C from the processor responsible of
3442C the root to the host (or MASTER).
3443C ---------------------------------
3444 IF (id%MYID .EQ. id_root) THEN
3445 CALL mpi_send(id%PIVNUL_LIST(keep(109)+1), keep(17),
3446 & mpi_integer, master, zero_piv,
3447 & id%COMM, ierr)
3448 ELSE IF (id%MYID .EQ. master) THEN
3449 CALL mpi_recv(id%PIVNUL_LIST(keep(112)+1), keep(17),
3450 & mpi_integer, id_root, zero_piv,
3451 & id%COMM, status, ierr )
3452 ENDIF
3453 ENDIF
3454 ENDIF
3455C ===========================
3456C gather zero pivots indices
3457C on the host node
3458C ===========================
3459C In case of non working host, the following code also
3460C works considering that KEEP(109) is equal to 0 on
3461C the non-working host
3462 IF(keep(110) .EQ. 1) THEN
3463 ALLOCATE(itmp2(id%NPROCS),stat = ierr ) ! deallocated in 490
3464 IF ( ierr .GT. 0 ) THEN
3465 id%INFO(1)=-13
3466 id%INFO(2)=id%NPROCS
3467 END IF
3468 CALL mumps_propinfo( icntl(1), id%INFO(1),
3469 & id%COMM, id%MYID )
3470 IF (id%INFO(1).LT.0) GOTO 490
3471 CALL mpi_gather ( keep(109),1, mpi_integer,
3472 & itmp2(1), 1, mpi_integer,
3473 & master, id%COMM, ierr)
3474 IF(id%MYID .EQ. master) THEN
3475 posbuf = itmp2(1)+1
3476C First null pivot of master is in
3477C position 1 of global list
3478 keep(220)=1
3479 DO i = 1,id%NPROCS-1
3480 CALL mpi_recv(id%PIVNUL_LIST(posbuf), itmp2(i+1),
3481 & mpi_integer,i,
3482 & zero_piv, id%COMM, status, ierr)
3483C Send position POSBUF of first null pivot of proc I
3484C in global list. Will allow to quickly identify during
3485C the solve step if one is concerned by a global position
3486C K, 0 <= K <= INFOG(28).
3487 CALL mpi_send(posbuf, 1, mpi_integer, i, zero_piv,
3488 & id%COMM, ierr)
3489 posbuf = posbuf + itmp2(i+1)
3490 ENDDO
3491 ELSE
3492 CALL mpi_send( id%PIVNUL_LIST(1), keep(109), mpi_integer,
3493 & master,zero_piv, id%COMM, ierr)
3494 CALL mpi_recv( keep(220), 1, mpi_integer, master, zero_piv,
3495 & id%COMM, status, ierr )
3496 ENDIF
3497 ENDIF
3498C =====================================
3499C Statistics relative to min/max pivots
3500C =====================================
3501 CALL mpi_reduce( id%DKEEP(19), rinfog(19), 1,
3502 & mpi_real,
3503 & mpi_min, master, id%COMM, ierr )
3504 CALL mpi_reduce( id%DKEEP(20), rinfog(20), 1,
3505 & mpi_real,
3506 & mpi_min, master, id%COMM, ierr )
3507 CALL mpi_reduce( id%DKEEP(21), rinfog(21), 1,
3508 & mpi_real,
3509 & mpi_max, master, id%COMM, ierr )
3510C =========================================
3511C Centralized number of swaps for pivoting
3512C =========================================
3513 CALL mpi_reduce( id%KEEP8(80), itemp8, 1, mpi_integer8,
3514 & mpi_sum, master, id%COMM, ierr )
3515 IF (id%MYID .EQ. master) THEN
3516 CALL mumps_seti8toi4(itemp8,id%INFOG(48))
3517 ENDIF
3518C ==========================================
3519C Centralized largest increase of panel size
3520C ==========================================
3521 CALL mpi_reduce( id%KEEP(425), id%INFOG(49), 1, mpi_integer,
3522 & mpi_max, master, id%COMM, ierr )
3523C =====================================
3524C Statistics concerning the determinant
3525C =====================================
3526C
3527C 1/ on the host better take into account null pivots if scaling:
3528C
3529C Since null pivots are excluded from the computation
3530C of the determinant, we also exclude the corresponding
3531C scaling entries. Since those entries have already been
3532C taken into account before the factorization, we multiply
3533C the determinant on the host by the scaling values corresponding
3534C to pivots in PIVNUL_LIST.
3535 IF (id%MYID.EQ.master .AND. lscal. and. keep(258).NE.0) THEN
3536 k = min(keep(143), keep(17))
3537 k = max(k, 0)
3538 DO i = 1, keep(112)+ k
3539c DO I = 1, id%INFOG(28) ! all null pivots + singular values
3541 & id%ROWSCA(id%PIVNUL_LIST(i)),
3542 & id%DKEEP(6), keep(259))
3544 & id%COLSCA(id%PIVNUL_LIST(i)),
3545 & id%DKEEP(6), keep(259))
3546 ENDDO
3547 ENDIF
3548C
3549C 2/ Swap signs depending on pivoting on each proc
3550C
3551 IF (keep(258).NE.0) THEN
3552C Return the determinant in INFOG(34) and RINFOG(12/13)
3553C In case of real arithmetic, initialize
3554C RINFOG(13) to 0 (no imaginary part and
3555C not touched by SMUMPS_DETER_REDUCTION)
3556 rinfog(13)=0.0e0
3557 IF (keep(260).EQ.-1) THEN ! Local to each processor
3558 id%DKEEP(6)=-id%DKEEP(6)
3559 ENDIF
3560C
3561C 3/ Perform a reduction
3562C
3564 & id%COMM, id%DKEEP(6), keep(259),
3565 & rinfog(12), infog(34), id%NPROCS)
3566C
3567C 4/ Swap sign if needed
3568C
3569 IF (id%KEEP(50).EQ.0 .AND. id%MYID.EQ. master) THEN
3570C Modify sign of determinant according
3571C to unsymmetric permutation (max-trans
3572C of max-weighted matching)
3573 IF (id%KEEP(23).NE.0) THEN
3575 & rinfog(12), id%N,
3576C id%STEP: used as workspace of size N still
3577C allocated on master; restored on exit
3578 & id%STEP(1),
3579 & id%UNS_PERM(1) )
3580C Remark that RINFOG(12/13) are modified only
3581C on the host but will be broadcast on exit
3582C from MUMPS (see SMUMPS_DRIVER)
3583 ENDIF
3584 ENDIF
3585 ENDIF
3586 490 IF (allocated(itmp2)) DEALLOCATE(itmp2)
3587 IF ( prokg ) THEN
3588C -----------------------------
3589C PRINT STATISTICS (on master)
3590C -----------------------------
3591 WRITE(mpg,99984) rinfog(2),rinfog(3),keep(52),
3592 & id%KEEP8(148),
3593 & id%KEEP8(128), infog(11), id%KEEP8(110)
3594 IF (id%KEEP(50) == 1 .OR. id%KEEP(50) == 2) THEN
3595 ! negative pivots
3596 WRITE(mpg, 99987) infog(12)
3597 END IF
3598 IF (id%KEEP(50) == 0) THEN
3599 ! off diag pivots
3600 WRITE(mpg, 99985) infog(12)
3601 END IF
3602 IF (id%KEEP(50) .NE. 1) THEN
3603 ! delayed pivots
3604 WRITE(mpg, 99982) infog(13)
3605 END IF
3606 IF (keep(97) .NE. 0) THEN
3607 ! tiny pivots
3608 WRITE(mpg, '(A,D16.4)')
3609 & ' Effective static pivoting thresh., CNTL(4) =', seuil
3610 WRITE(mpg, 99986) infog(25)
3611 ENDIF
3612 IF (id%KEEP(50) == 2) THEN
3613 !number of 2x2 pivots in type 1 nodes
3614 WRITE(mpg, 99988) keep(229)
3615 !number of 2x2 pivots in type 2 nodes
3616 WRITE(mpg, 99989) keep(230)
3617 ENDIF
3618 !number of zero pivots
3619 IF (keep(110) .NE.0) THEN
3620 WRITE(mpg, 99991) keep(112)
3621 ENDIF
3622 !Deficiency on root
3623 IF ( keep(19) .ne. 0 )
3624c IF ( KEEP(17) .ne. 0 )
3625 & WRITE(mpg, 99983) keep(17)
3626 !Total deficiency
3627 IF (keep(110).NE.0.OR.keep(19).NE.0)
3628c IF (KEEP(110).NE.0.OR.KEEP(17).NE.0)
3629 & WRITE(mpg, 99992) keep(17)+keep(112)
3630 ! Memory compress
3631 WRITE(mpg, 99981) infog(14)
3632 ! Extra copies due to ip stack in unsym case
3633 ! in core case (or OLD_OOC_PANEL)
3634 IF (id%KEEP8(108) .GT. 0_8) THEN
3635 WRITE(mpg, 99980) id%KEEP8(108)
3636 ENDIF
3637 IF ((keep(60).NE.0) .AND. infog(25).GT.0) THEN
3638 ! Schur on and tiny pivots set in last level
3639 ! before the Schur if KEEP(114)=0
3640 WRITE(mpg, '(A)')
3641 & " ** Warning Static pivoting was necessary"
3642 WRITE(mpg, '(A)')
3643 & " ** to factor interior variables with Schur ON"
3644 ENDIF
3645 IF (keep(258).NE.0) THEN
3646 WRITE(mpg,99978) rinfog(12)
3647 WRITE(mpg,99977) infog(34)
3648 ENDIF
3649 END IF
3650* ==========================================
3651*
3652* End of Factorization Phase
3653*
3654* ==========================================
3655C
3656C Goto 500 is done when
3657C LOAD_INIT
3658C OOC_INIT_FACTO
3659C MUMPS_FDM_INIT
3660#if ! defined(NO_FDM_DESCBAND)
3661C MUMPS_FDBD_INIT
3662#endif
3663#if ! defined(NO_FDM_MAPROW)
3664C MUMPS_FMRD_INIT
3665#endif
3666C are all called.
3667C
3668 500 CONTINUE
3669C Redo free DBLARR (as in end_driver.F)
3670C in case an error occurred after allocating
3671C DBLARR and before freeing it above.
3672 IF (id%KEEP(46).EQ.1 .AND.
3673 & id%KEEP(55).NE.0 .AND.
3674 & id%MYID.EQ.master .AND.
3675 & id%KEEP(52) .EQ. 0) THEN
3676 NULLIFY(id%DBLARR)
3677 ELSE
3678 IF (associated(id%DBLARR)) THEN
3679 DEALLOCATE(id%DBLARR)
3680 NULLIFY(id%DBLARR)
3681 ENDIF
3682 ENDIF
3683#if ! defined(NO_FDM_DESCBAND)
3684 IF (i_am_slave) THEN
3685 CALL mumps_fdbd_end(id%INFO(1)) ! INFO(1): input only
3686 ENDIF
3687#endif
3688#if ! defined(NO_FDM_MAPROW)
3689 IF (i_am_slave) THEN
3690 CALL mumps_fmrd_end(id%INFO(1)) ! INFO(1): input only
3691 ENDIF
3692#endif
3693 IF (i_am_slave) THEN
3694C Terminate BLR module except if it is still needed for solve
3695 IF (
3696 & (
3697 & (keep(486).EQ.2)
3698 & )
3699 & .AND. id%INFO(1).GE.0
3700 & ) THEN
3701C Store pointer to BLR_ARRAY in MUMPS structure
3702C (requires successful factorization otherwise module is freed)
3703 CALL smumps_blr_mod_to_struc(id%BLRARRAY_ENCODING)
3704 ELSE
3705C INFO(1) positive or negative
3706 CALL smumps_blr_end_module(id%INFO(1), id%KEEP8, id%KEEP(34))
3707 ENDIF
3708 ENDIF
3709 IF (i_am_slave) THEN
3710 CALL mumps_fdm_end('A')
3711C Terminate BLR module except if it is still needed for solve
3712 IF (
3713 & (
3714 & (keep(486).EQ.2)
3715 & )
3716 & .AND. id%INFO(1).GE.0
3717 & ) THEN
3718 CALL mumps_fdm_mod_to_struc('F', id%FDM_F_ENCODING,
3719 & id%INFO(1))
3720 IF (.NOT. associated(id%FDM_F_ENCODING)) THEN
3721 WRITE(*,*) "Internal error 2 in SMUMPS_FAC_DRIVER"
3722 ENDIF
3723 ELSE
3724 CALL mumps_fdm_end('F')
3725 ENDIF
3726 ENDIF
3727C
3728C Goto 514 is done when an
3729C error occurred in MUMPS_FDM_INIT
3730C or (after FDM_INIT but before
3731C OOC_INIT)
3732C
3733 514 CONTINUE
3734 IF ( i_am_slave ) THEN
3735 IF ((keep(201).EQ.1).OR.(keep(201).EQ.2)) THEN
3736 CALL smumps_ooc_end_facto(id,ierr)
3737 IF (id%ASSOCIATED_OOC_FILES) THEN
3738 id%ASSOCIATED_OOC_FILES = .false.
3739 ENDIF
3740 IF (ierr.LT.0 .AND. id%INFO(1) .GE. 0) id%INFO(1) = ierr
3741 ENDIF
3742 IF (wk_user_provided) THEN
3743C at the end of a phase S is always freed when WK_USER provided
3744 NULLIFY(id%S)
3745 ELSE IF (keep(201).NE.0) THEN
3746C ----------------------------------------
3747C In OOC or if KEEP(201).EQ.-1 we always
3748C free S at end of factorization. As id%S
3749C may be unassociated in case of error
3750C during or before the allocation of id%S,
3751C we only free S when it was associated.
3752C ----------------------------------------
3753 IF (associated(id%S)) DEALLOCATE(id%S)
3754 NULLIFY(id%S) ! in all cases
3755 id%KEEP8(23)=0_8
3756 ENDIF
3757 ELSE ! host not working
3758 IF (wk_user_provided) THEN
3759C at the end of a phase S is always freed when WK_USER provided
3760 NULLIFY(id%S)
3761 ELSE
3762 IF (associated(id%S)) DEALLOCATE(id%S)
3763 NULLIFY(id%S) ! in all cases
3764 id%KEEP8(23)=0_8
3765 END IF
3766 END IF
3767C
3768C Goto 513 is done in case of error where LOAD_INIT was
3769C called but not OOC_INIT_FACTO.
3770 513 CONTINUE
3771 IF ( i_am_slave ) THEN
3772 CALL smumps_load_end( id%INFO(1), id%NSLAVES, ierr )
3773 IF (ierr.LT.0 .AND. id%INFO(1) .GE. 0) id%INFO(1) = ierr
3774 ENDIF
3775 CALL mumps_propinfo( icntl(1), id%INFO(1),
3776 & id%COMM, id%MYID )
3777C
3778C Goto 517 is done when an error occurs when GPU initialization
3779C has been performed but not LOAD_INIT or OOC_INIT_FACTO
3780C
3781 517 CONTINUE
3782C
3783C Goto 530 is done when an error occurs before
3784C the calls to GPU_INIT, LOAD_INIT and OOC_INIT_FACTO
3785 530 CONTINUE
3786C Fwd in facto: free RHS_MUMPS in case
3787C it was allocated.
3788 IF (rhs_mumps_allocated) DEALLOCATE(rhs_mumps)
3789 NULLIFY(rhs_mumps)
3790C
3791 id%KEEP8(26) = keep826_save
3792 RETURN
3793 120 FORMAT(/' Local redistrib: data local/sent =',i16,i16)
3794 125 FORMAT(/' Redistrib: total data local/sent =',i16,i16)
3795 130 FORMAT(//'****** FACTORIZATION STEP ********'/)
3796 160 FORMAT(
3797 & /' Elapsed time to reformat/distribute matrix =',f12.4)
3798 166 FORMAT(' Max difference from 1 after scaling the entries',
3799 & ' for ONE-NORM (option 7/8) =',d9.2)
3800 170 FORMAT(' STATISTICS PRIOR NUMERICAL FACTORIZATION ...'/
3801 & ' Size of internal working array S =',i16/
3802 & ' Size of internal working array IS =',i16/
3803 & ' Minimum (ICNTL(14)=0) size of S =',i16/
3804 & ' Minimum (ICNTL(14)=0) size of IS =',i16/
3805 & ' Real space for original matrix =',i16/
3806 & ' Integer space for original matrix =',i16/
3807 & ' INFO(3) Real space for factors (estimated) =',i16/
3808 & ' INFO(4) Integer space for factors (estim.) =',i16/
3809 & ' Maximum frontal size (estimated) =',i16)
3810 172 FORMAT(' GLOBAL STATISTICS PRIOR NUMERICAL FACTORIZATION ...'/
3811 & ' Number of working processes =',i16/
3812 & ' ICNTL(22) Out-of-core option =',i16/
3813 & ' ICNTL(35) BLR activation (eff. choice) =',i16/
3814 & ' ICNTL(14) Memory relaxation =',i16/
3815 & ' INFOG(3) Real space for factors (estimated)=',i16/
3816 & ' INFOG(4) Integer space for factors (estim.)=',i16/
3817 & ' Maximum frontal size (estimated) =',i16/
3818 & ' Number of nodes in the tree =',i16/
3819 & ' ICNTL(23) Memory allowed (value on host) =',i16/
3820 & ' Sum over all procs =',i16/
3821 & ' Memory provided by user, sum of LWK_USER =',i16/
3822 & ' Effective threshold for pivoting, CNTL(1) =',d16.4)
3823 173 FORMAT( ' Perform forward during facto, NRHS =',i16)
3824 174 FORMAT( ' KEEP(268) Relaxed pivoting effective value =',i16)
3825 180 FORMAT(/' Elapsed time for factorization =',f12.4)
3826 185 FORMAT(/' Elapsed time for (failed) factorization =',f12.4)
3827 187 FORMAT( ' Elapsed time under L0 =',f12.4)
3828 188 FORMAT( ' Elapsed time under L0 (avg/max across MPI) =',
3829 & f12.4,f12.4)
3830 189 FORMAT(/' Flops under L0 layer =',1pd12.3)
3831 190 FORMAT(/' Flops under L0 Layer (avg/max across MPI) =',
3832 & 1pd12.3,1pd12.3)
383399977 FORMAT( ' INFOG(34) Determinant (base 2 exponent) =',i16)
383499978 FORMAT( ' RINFOG(12) Determinant (real part) =',f16.8)
383599980 FORMAT( ' Extra copies due to In-Place stacking =',i16)
383699981 FORMAT( ' INFOG(14) Number of memory compress =',i16)
383799982 FORMAT( ' INFOG(13) Number of delayed pivots =',i16)
383899983 FORMAT( ' Nb of singularities detected by ICNTL(56) =',i16)
383999991 FORMAT( ' Nb of null pivots detected by ICNTL(24) =',i16)
384099992 FORMAT( ' INFOG(28) Estimated deficiency =',i16)
384199984 FORMAT(/'Leaving factorization with ...'/
3842 & ' RINFOG(2) Operations in node assembly =',1pd10.3/
3843 & ' ------(3) Operations in node elimination =',1pd10.3/
3844 & ' ICNTL (8) Scaling effectively used =',i16/
3845 & ' INFOG (9) Real space for factors =',i16/
3846 & ' infog(10) Integer space for factors =',I16/
3847 & ' infog(11) maximum front size =',I16/
3848 & ' infog(29) number of entries in factors =',I16)
384999985 FORMAT( ' infog(12) number of off diagonal pivots =',I16)
385099986 FORMAT( ' infog(25) number of tiny pivots(static) =',I16)
385199987 FORMAT( ' infog(12) number of negative pivots =',I16)
385299988 FORMAT( ' number of 2x2 pivots in type 1 nodes =',I16)
385399989 FORMAT( ' number of 2x2 pivots in type 2 nodes =',I16)
3854 END SUBROUTINE SMUMPS_FAC_DRIVER
3855C
3856 SUBROUTINE SMUMPS_PRINT_ALLOCATED_MEM( PROK, PROKG, PRINT_MAXAVG,
3857 & MP, MPG, INFO16, INFOG18, INFOG19, NSLAVES, IRANK, KEEP )
3858 IMPLICIT NONE
3859C
3860C Purpose:
3861C =======
3862C Print memory allocated during factorization
3863C - called at beginning of factorization in full-rank
3864C - called at end of factorization in low-rank (because
3865C of dynamic allocations)
3866C
3867 LOGICAL, INTENT(IN) :: PROK, PROKG, PRINT_MAXAVG
3868 INTEGER, INTENT(IN) :: MP, MPG, INFO16, INFOG18, INFOG19
3869 INTEGER, INTENT(IN) :: IRANK, NSLAVES
3870 INTEGER, INTENT(IN) :: KEEP(500)
3871C
3872 IF ( PROKG ) THEN
3873 IF (PRINT_MAXAVG) THEN
3874 WRITE( MPG,'(a,i12) ')
3875 & ' ** memory allocated, max in mbytes(infog(18)):',
3876 & INFOG18
3877 ENDIF
3878 WRITE( MPG,'(/a,i12) ')
3879 & ' ** memory allocated, total in mbytes(infog(19)):',
3880 & INFOG19
3881 END IF
3882 RETURN
3883 END SUBROUTINE SMUMPS_PRINT_ALLOCATED_MEM
3884 SUBROUTINE SMUMPS_AVGMAX_STAT8(PROKG, MPG, VAL, NSLAVES,
3885 & PRINT_MAXAVG, COMM, MSG)
3886 IMPLICIT NONE
3887 INCLUDE 'mpif.h'
3888 LOGICAL, intent(in) :: PROKG
3889 INTEGER, intent(in) :: MPG
3890 INTEGER(8), intent(in) :: VAL
3891 INTEGER, intent(in) :: NSLAVES
3892 LOGICAL, intent(in) :: PRINT_MAXAVG
3893 INTEGER, intent(in) :: COMM
3894 CHARACTER*48 MSG
3895C Local
3896 INTEGER(8) MAX_VAL
3897 INTEGER IERR, MASTER
3898 REAL LOC_VAL, AVG_VAL
3899 PARAMETER(MASTER=0)
3900C
3901 CALL MUMPS_REDUCEI8( VAL, MAX_VAL, MPI_MAX, MASTER, COMM)
3902 LOC_VAL = real(VAL)/real(NSLAVES)
3903 CALL MPI_REDUCE( LOC_VAL, AVG_VAL, 1, MPI_REAL,
3904 & MPI_SUM, MASTER, COMM, IERR )
3905 IF (PROKG) THEN
3906 IF (PRINT_MAXAVG) THEN
3907 WRITE(MPG,100) " Average", MSG, int(AVG_VAL,8)
3908 ELSE
3909 WRITE(MPG,110) MSG, MAX_VAL
3910 ENDIF
3911 ENDIF
3912 RETURN
3913 100 FORMAT(A8,A48,I18)
3914 110 FORMAT(A48,I18)
3915 END SUBROUTINE SMUMPS_AVGMAX_STAT8
3916C
3917 SUBROUTINE SMUMPS_EXTRACT_SCHUR_REDRHS(id)
3918 USE SMUMPS_STRUC_DEF
3919 IMPLICIT NONE
3920C
3921C Purpose
3922C =======
3923C
3924C Extract the Schur and possibly also the reduced right-hand side
3925C (if Fwd in facto) from the processor working on Schur and copy
3926C it into the user datastructures id%SCHUR and id%REDRHS on the host.
3927C This routine assumes that the integer list of the Schur has not
3928C been permuted and still corresponds to LISTVAR_SCHUR.
3929C
3930C If the Schur is centralized, the master of the Schur holds the
3931C Schur and possibly also the reduced right-hand side.
3932C If the Schur is distribued (already built in user's datastructure),
3933C then the master of the Schur may hold the reduced right-hand side,
3934C in which case it is available in root%RHS_CNTR_MASTER_ROOT.
3935C
3936 TYPE(SMUMPS_STRUC) :: id
3937C
3938C Local variables
3939C ===============
3940C
3941 INCLUDE 'mpif.h'
3942 INCLUDE 'mumps_tags.h'
3943 INCLUDE 'mumps_headers.h'
3944 INTEGER :: STATUS(MPI_STATUS_SIZE)
3945 INTEGER :: IERR
3946 INTEGER, PARAMETER :: MASTER = 0
3947 INTEGER :: ID_SCHUR, SIZE_SCHUR, LD_SCHUR, IB, BL4
3948 INTEGER(4) :: I4 ! 32-bit even in 64-bit version
3949 INTEGER :: ROW_LENGTH, I
3950 INTEGER(8) :: SURFSCHUR8, BL8, SHIFT8
3951 INTEGER(8) :: ISCHUR_SRC, ISCHUR_DEST, ISCHUR_SYM, ISCHUR_UNS
3952C
3953C External functions
3954C ==================
3955C
3956 INTEGER MUMPS_PROCNODE
3957 EXTERNAL MUMPS_PROCNODE
3958C Quick return in case factorization did not terminate correctly
3959.LT. IF (id%INFO(1) 0) RETURN
3960C Quick return if Schur option off
3961.EQ. IF (id%KEEP(60) 0) RETURN
3962C Get Schur id
3963 ID_SCHUR =MUMPS_PROCNODE(
3964 & id%PROCNODE_STEPS(id%STEP(max(id%KEEP(20),id%KEEP(38)))),
3965 & id%KEEP(199))
3966.NE. IF ( id%KEEP( 46 ) 1 ) THEN
3967 ID_SCHUR = ID_SCHUR + 1
3968 END IF
3969C Get size of Schur
3970.EQ. IF (id%MYIDID_SCHUR) THEN
3971.EQ. IF (id%KEEP(60)1) THEN
3972C Sequential Schur
3973 LD_SCHUR =
3974 & id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))+2+id%KEEP(IXSZ))
3975 SIZE_SCHUR = LD_SCHUR - id%KEEP(253)
3976 ELSE
3977C Parallel Schur
3978 LD_SCHUR = -999999 ! not used
3979 SIZE_SCHUR = id%root%TOT_ROOT_SIZE
3980 ENDIF
3981.EQ. ELSE IF (id%MYID MASTER) THEN
3982 SIZE_SCHUR = id%KEEP(116)
3983 LD_SCHUR = -44444 ! Not used
3984 ELSE
3985C Proc is not concerned with Schur, return
3986 RETURN
3987 ENDIF
3988 SURFSCHUR8 = int(SIZE_SCHUR,8)*int(SIZE_SCHUR,8)
3989C =================================
3990C Case of parallel Schur: if REDRHS
3991C was requested, obtain it directly
3992C from id%root%RHS_CNTR_MASTER_ROOT
3993C =================================
3994.GT. IF (id%KEEP(60) 1) THEN
3995.EQ..AND..GT. IF (id%KEEP(221)1 id%KEEP(252)0) THEN
3996 DO I = 1, id%KEEP(253)
3997.EQ. IF (ID_SCHURMASTER) THEN ! Necessarily = id%MYID
3998 CALL scopy(SIZE_SCHUR,
3999 & id%root%RHS_CNTR_MASTER_ROOT((I-1)*SIZE_SCHUR+1), 1,
4000 & id%REDRHS((I-1)*id%LREDRHS+1), 1)
4001 ELSE
4002.EQ. IF (id%MYIDID_SCHUR) THEN
4003C Send
4004 CALL MPI_SEND(
4005 & id%root%RHS_CNTR_MASTER_ROOT((I-1)*SIZE_SCHUR+1),
4006 & SIZE_SCHUR,
4007 & MPI_REAL,
4008 & MASTER, TAG_SCHUR,
4009 & id%COMM, IERR )
4010.EQ. ELSE ! MYIDMASTER
4011C Receive
4012 CALL MPI_RECV( id%REDRHS((I-1)*id%LREDRHS+1),
4013 & SIZE_SCHUR,
4014 & MPI_REAL, ID_SCHUR, TAG_SCHUR,
4015 & id%COMM, STATUS, IERR )
4016 ENDIF
4017 ENDIF
4018 ENDDO
4019C ------------------------------
4020C In case of parallel Schur, we
4021C free root%RHS_CNTR_MASTER_ROOT
4022C ------------------------------
4023.EQ. IF (id%MYIDID_SCHUR) THEN
4024 DEALLOCATE(id%root%RHS_CNTR_MASTER_ROOT)
4025 NULLIFY (id%root%RHS_CNTR_MASTER_ROOT)
4026 ENDIF
4027 ENDIF
4028C return because this is all we need to do
4029C in case of parallel Schur complement
4030 RETURN
4031 ENDIF
4032C ============================
4033C Centralized Schur complement
4034C ============================
4035C PTRAST has been freed at the moment of calling this
4036C routine. Schur is available through
4037C PTRFAC(IW( PTLUST_S( STEP(KEEP(20)) ) + 4 +KEEP(IXSZ) ))
4038.EQ. IF (id%KEEP(252)0) THEN
4039C CASE 1 (ORIGINAL CODE):
4040C Schur is contiguous on ID_SCHUR
4041.EQ. IF ( ID_SCHUR MASTER ) THEN ! Necessarily equals id%MYID
4042C ---------------------
4043C Copy Schur complement
4044C ---------------------
4045 CALL SMUMPS_COPYI8SIZE( SURFSCHUR8,
4046 & id%S(id%PTRFAC(id%STEP(id%KEEP(20)))),
4047 & id%SCHUR(1) )
4048 ELSE
4049C -----------------------------------------
4050C The processor responsible of the Schur
4051C complement sends it to the host processor
4052C Use blocks to avoid too large messages.
4053C -----------------------------------------
4054 BL8=int(huge(I4)/id%KEEP(35)/10,8)
4055 DO IB=1, int((SURFSCHUR8+BL8-1_8) / BL8)
4056 SHIFT8 = int(IB-1,8) * BL8 ! Where to send
4057 BL4 = int(min(BL8,SURFSCHUR8-SHIFT8)) ! Size of block
4058.eq. IF ( id%MYID ID_SCHUR ) THEN
4059C Send Schur complement
4060 CALL MPI_SEND( id%S( SHIFT8 +
4061 & id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))
4062 & +4+id%KEEP(IXSZ)))),
4063 & BL4,
4064 & MPI_REAL,
4065 & MASTER, TAG_SCHUR,
4066 & id%COMM, IERR )
4067.eq. ELSE IF ( id%MYID MASTER ) THEN
4068C Receive Schur complement
4069 CALL MPI_RECV( id%SCHUR(1_8 + SHIFT8),
4070 & BL4,
4071 & MPI_REAL, ID_SCHUR, TAG_SCHUR,
4072 & id%COMM, STATUS, IERR )
4073 END IF
4074 ENDDO
4075 END IF
4076 ELSE
4077C CASE 2 (Fwd in facto): Schur is not contiguous on ID_SCHUR,
4078C process it row by row.
4079C
4080C 2.1: We first centralize Schur complement into id%SCHUR
4081 ISCHUR_SRC = id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))
4082 & +4+id%KEEP(IXSZ)))
4083 ISCHUR_DEST= 1_8
4084 DO I=1, SIZE_SCHUR
4085 ROW_LENGTH = SIZE_SCHUR
4086.EQ. IF (ID_SCHURMASTER) THEN ! Necessarily = id%MYID
4087 CALL scopy(ROW_LENGTH, id%S(ISCHUR_SRC), 1,
4088 & id%SCHUR(ISCHUR_DEST),1)
4089 ELSE
4090.EQ. IF (id%MYIDID_SCHUR) THEN
4091C Send
4092 CALL MPI_SEND( id%S(ISCHUR_SRC), ROW_LENGTH,
4093 & MPI_REAL,
4094 & MASTER, TAG_SCHUR,
4095 & id%COMM, IERR )
4096 ELSE
4097C Recv
4098 CALL MPI_RECV( id%SCHUR(ISCHUR_DEST),
4099 & ROW_LENGTH,
4100 & MPI_REAL, ID_SCHUR, TAG_SCHUR,
4101 & id%COMM, STATUS, IERR )
4102 ENDIF
4103 ENDIF
4104 ISCHUR_SRC = ISCHUR_SRC+int(LD_SCHUR,8)
4105 ISCHUR_DEST= ISCHUR_DEST+int(SIZE_SCHUR,8)
4106 ENDDO
4107C 2.2: Get REDRHS on host
4108C 2.2.1: Symmetric => REDRHS is available in last KEEP(253)
4109C rows of Schur structure on ID_SCHUR
4110C 2.2.2: Unsymmetric => REDRHS corresponds to last KEEP(253)
4111C columns. However it must be transposed.
4112.EQ. IF (id%KEEP(221)1) THEN ! Implies Fwd in facto
4113 ISCHUR_SYM = id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))
4114 & +4+id%KEEP(IXSZ))) + int(SIZE_SCHUR,8) *
4115 & int(LD_SCHUR,8)
4116 ISCHUR_UNS =
4117 & id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))
4118 & +4+id%KEEP(IXSZ))) + int(SIZE_SCHUR,8)
4119 ISCHUR_DEST = 1_8
4120 DO I = 1, id%KEEP(253)
4121.EQ. IF (ID_SCHUR MASTER) THEN ! necessarily = id%MYID
4122.EQ. IF (id%KEEP(50) 0) THEN
4123 CALL scopy(SIZE_SCHUR, id%S(ISCHUR_UNS), LD_SCHUR,
4124 & id%REDRHS(ISCHUR_DEST), 1)
4125 ELSE
4126 CALL scopy(SIZE_SCHUR, id%S(ISCHUR_SYM), 1,
4127 & id%REDRHS(ISCHUR_DEST), 1)
4128 ENDIF
4129 ELSE
4130.NE. IF (id%MYID MASTER) THEN
4131.EQ. IF (id%KEEP(50) 0) THEN
4132C Use id%S(ISCHUR_SYM) as temporary contig. workspace
4133C of size SIZE_SCHUR.
4134 CALL scopy(SIZE_SCHUR, id%S(ISCHUR_UNS), LD_SCHUR,
4135 & id%S(ISCHUR_SYM), 1)
4136 ENDIF
4137 CALL MPI_SEND(id%S(ISCHUR_SYM), SIZE_SCHUR,
4138 & MPI_REAL, MASTER, TAG_SCHUR,
4139 & id%COMM, IERR )
4140 ELSE
4141 CALL MPI_RECV(id%REDRHS(ISCHUR_DEST),
4142 & SIZE_SCHUR, MPI_REAL, ID_SCHUR, TAG_SCHUR,
4143 & id%COMM, STATUS, IERR )
4144 ENDIF
4145 ENDIF
4146.EQ. IF (id%KEEP(50)0) THEN
4147 ISCHUR_UNS = ISCHUR_UNS + int(LD_SCHUR,8)
4148 ELSE
4149 ISCHUR_SYM = ISCHUR_SYM + int(LD_SCHUR,8)
4150 ENDIF
4151 ISCHUR_DEST = ISCHUR_DEST + int(id%LREDRHS,8)
4152 ENDDO
4153 ENDIF
4154 ENDIF
4155 RETURN
4156 END SUBROUTINE SMUMPS_EXTRACT_SCHUR_REDRHS
#define mumps_abort
Definition VE_Metis.h:25
subroutine mumps_propinfo(icntl, info, comm, id)
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine mpi_recv(buf, cnt, datatype, source, tag, comm, status, ierr)
Definition mpi.f:461
subroutine mpi_reduce(sendbuf, recvbuf, cnt, datatype, op, root, comm, ierr)
Definition mpi.f:120
subroutine mpi_comm_split(comm, color, key, comm2, ierr)
Definition mpi.f:272
subroutine mpi_send(buf, cnt, datatype, dest, tag, comm, ierr)
Definition mpi.f:480
subroutine mpi_allreduce(sendbuf, recvbuf, cnt, datatype, operation, comm, ierr)
Definition mpi.f:103
subroutine mpi_comm_size(comm, size, ierr)
Definition mpi.f:263
subroutine mpi_bcast(buffer, cnt, datatype, root, comm, ierr)
Definition mpi.f:205
subroutine mpi_gather(sendbuf, cnt, datatype, recvbuf, reccnt, rectype, root, comm, ierr)
Definition mpi.f:56
subroutine mpi_comm_free(comm, ierr)
Definition mpi.f:238
subroutine mpi_comm_rank(comm, rank, ierr)
Definition mpi.f:254
subroutine, public mumps_fdbd_init(initial_size, info)
subroutine, public mumps_fdbd_end(info1)
subroutine, public mumps_fmrd_init(initial_size, info)
subroutine, public mumps_fmrd_end(info1)
subroutine, public mumps_fdm_mod_to_struc(what, id_fdm_encoding, info)
subroutine, public mumps_fdm_init(what, initial_size, info)
subroutine, public mumps_fdm_end(what)
subroutine, public smumps_buf_deall_small_buf(ierr)
subroutine, public smumps_buf_dist_irecv_size(smumps_lbufr_bytes)
subroutine, public smumps_buf_ini_myid(myid)
subroutine, public smumps_buf_max_array_minsize(nfs4father, ierr)
subroutine, public smumps_buf_alloc_small_buf(size, ierr)
subroutine, public smumps_buf_deall_max_array()
subroutine, public smumps_init_l0_omp_factors(id_l0_omp_factors)
subroutine, public smumps_free_l0_omp_factors(id_l0_omp_factors)
subroutine, public smumps_load_init(id, memory_md_arg, maxs)
subroutine, public smumps_load_set_inicost(cost_subtree_arg, k64, dk15, k375, maxs)
subroutine, public smumps_load_end(info1, nslaves, ierr)
subroutine, public smumps_load_update(check_flops, process_bande, inc_load, keep, keep8)
subroutine, public smumps_blr_mod_to_struc(id_blrarray_encoding)
subroutine, public smumps_blr_end_module(info1, keep8, k34, lrsolve_act_opt)
subroutine, public smumps_blr_init_module(initial_size, info)
subroutine compute_global_gains(nb_entries_factor, flop_number, nb_entries_factor_withlr, prokg, mpg)
Definition slr_stats.F:535
double precision flop_cb_compress
Definition slr_stats.F:38
double precision time_frtrsm
Definition slr_stats.F:62
double precision time_update_lrlr3
Definition slr_stats.F:52
double precision time_lrtrsm
Definition slr_stats.F:61
double precision time_decomp_asm1
Definition slr_stats.F:71
double precision time_update_frlr
Definition slr_stats.F:53
subroutine init_stats_global(id)
Definition slr_stats.F:344
double precision time_decomp_asms2s
Definition slr_stats.F:74
double precision flop_compress
Definition slr_stats.F:38
double precision time_diagcopy
Definition slr_stats.F:68
double precision flop_cb_decompress
Definition slr_stats.F:38
double precision time_decomp_locasm2
Definition slr_stats.F:72
double precision time_fac_mq
Definition slr_stats.F:65
double precision time_decomp
Definition slr_stats.F:69
double precision flop_frfronts
Definition slr_stats.F:38
double precision time_update
Definition slr_stats.F:49
double precision time_panel
Definition slr_stats.F:63
double precision flop_decompress
Definition slr_stats.F:38
double precision mry_lu_fr
Definition slr_stats.F:17
double precision min_flop_facto_lr
Definition slr_stats.F:82
double precision time_update_frfr
Definition slr_stats.F:54
double precision flop_trsm_lr
Definition slr_stats.F:24
double precision flop_panel
Definition slr_stats.F:24
double precision time_decomp_maplig1
Definition slr_stats.F:73
double precision avg_flop_facto_lr
Definition slr_stats.F:81
double precision max_flop_facto_lr
Definition slr_stats.F:83
double precision flop_update_lrlr3
Definition slr_stats.F:24
subroutine saveandwrite_gains(local, k489, dkeep, n, icntl36, depth, bcksz, nassmin, nfrontmin, sym, k486, k472, k475, k478, k480, k481, k483, k484, k8110, k849, nbtreenodes, nprocs, mpg, prokg)
Definition slr_stats.F:578
integer cnt_nodes
Definition slr_stats.F:23
double precision time_fac_i
Definition slr_stats.F:64
double precision time_frswap_compress
Definition slr_stats.F:57
double precision flop_accum_compress
Definition slr_stats.F:38
double precision flop_update_fr
Definition slr_stats.F:24
double precision flop_trsm
Definition slr_stats.F:24
double precision time_update_lrlr1
Definition slr_stats.F:50
double precision flop_lrgain
Definition slr_stats.F:24
double precision flop_midblk_compress
Definition slr_stats.F:38
double precision time_fac_sq
Definition slr_stats.F:66
double precision time_decomp_asms2m
Definition slr_stats.F:75
double precision time_decomp_ucfs
Definition slr_stats.F:70
double precision time_lr_module
Definition slr_stats.F:59
double precision flop_frswap_compress
Definition slr_stats.F:38
double precision flop_update_lr
Definition slr_stats.F:24
double precision time_cb_compress
Definition slr_stats.F:58
double precision flop_trsm_fr
Definition slr_stats.F:24
double precision time_frfronts
Definition slr_stats.F:67
double precision mry_cb_lrgain
Definition slr_stats.F:17
double precision time_update_lrlr2
Definition slr_stats.F:51
double precision mry_cb_fr
Definition slr_stats.F:17
double precision mry_lu_lrgain
Definition slr_stats.F:17
double precision flop_facto_lr
Definition slr_stats.F:24
double precision time_compress
Definition slr_stats.F:55
double precision time_midblk_compress
Definition slr_stats.F:56
double precision flop_facto_fr
Definition slr_stats.F:24
subroutine smumps_ooc_end_facto(id, ierr)
Definition smumps_ooc.F:459
subroutine smumps_clean_ooc_data(id, ierr)
Definition smumps_ooc.F:568
subroutine, public smumps_ooc_init_facto(id, maxs)
Definition smumps_ooc.F:114
subroutine smumps_ooc_clean_pending(ierr)
Definition smumps_ooc.F:446
subroutine smumps_facto_recv_arrowhd2(n, dblarr, ldblarr, intarr, lintarr, ptraiw, ptrarw, keep, keep8, myid, comm, nbrecords, a, la, root, procnode_steps, slavef, perm, frere_steps, step, info1, info2)
subroutine smumps_facto_send_arrowheads(n, nz, aspk, irn, icn, perm, lscal, colsca, rowsca, myid, slavef, procnode_steps, nbrecords, lp, comm, root, keep, keep8, fils, rg2l, intarr, lintarr, dblarr, ldblarr, ptraiw, ptrarw, frere_steps, step, a, la, istep_to_iniv2, i_am_cand, candidates)
subroutine smumps_free_id_data_modules(id_fdm_f_encoding, id_blrarray_encoding, keep8, k34)
subroutine smumps_fac_b(n, s_is_pointers, la, liw, sym_perm, na, lna, ne_steps, nfsiz, fils, step, frere, dad, cand, istep_to_iniv2, tab_pos_in_pere, ptrar, ldptrar, ptrist, ptlust_s, ptrfac, iw1, iw2, itloc, rhs_mumps, pool, lpool, cntl1, icntl, info, rinfo, keep, keep8, procnode_steps, slavef, comm_nodes, myid, myid_nodes, bufr, lbufr, lbufr_bytes, smumps_lbuf, intarr, dblarr, root, nelt, frtptr, frtelt, comm_load, ass_irecv, seuil, seuil_ldlt_niv2, mem_distrib, dkeep, pivnul_list, lpn_list, lrgroups, ipool_b_l0_omp, lpool_b_l0_omp, ipool_a_l0_omp, lpool_a_l0_omp, l_virt_l0_omp, virt_l0_omp, virt_l0_omp_mapping, l_phys_l0_omp, phys_l0_omp, perm_l0_omp, ptr_leafs_l0_omp, l0_omp_mapping, ll0_omp_mapping, thread_la, l0_omp_factors, ll0_omp_factors, i4_l0_omp, nbstats_i4, nbcols_i4, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition sfac_b.F:30
subroutine smumps_deter_sign_perm(deter, n, visited, perm)
subroutine smumps_deter_square(deter, nexp)
subroutine smumps_deter_scaling_inverse(deter, nexp)
subroutine smumps_deter_reduction(comm, deter_in, nexp_in, deter_out, nexp_out, nprocs)
subroutine smumps_updatedeter_scaling(piv, deter, nexp)
subroutine smumps_maxelt_size(eltptr, nelt, maxelt_size)
subroutine smumps_elt_distrib(n, nelt, na_elt8, comm, myid, slavef, ielptr_loc8, relptr_loc8, eltvar_loc, eltval_loc, lintarr, ldblarr, keep, keep8, maxelt_size, frtptr, frtelt, a, la, fils, id, root)
subroutine smumps_redistribution(n, nz_loc8, id, dblarr, ldblarr, intarr, lintarr, ptraiw, ptrarw, keep, keep8, myid, comm, nbrecords a, la, root, procnode_steps, slavef, perm, step, icntl, info, nsend8, nlocal8, istep_to_iniv2, candidates)
subroutine smumps_avgmax_stat8(prokg, mpg, val, nslaves, print_maxavg, comm, msg)
subroutine smumps_fac_driver(id)
Definition sfac_driver.F:15
subroutine smumps_extract_schur_redrhs(id)
subroutine smumps_print_allocated_mem(prok, prokg, print_maxavg, mp, mpg, info16, infog18, infog19, nslaves, irank, keep)
subroutine smumps_anorminf(id, anorminf, lscal, eff_size_schur)
subroutine smumps_fac_a(n, nz8, nsca, aspk, irn, icn, colsca, rowsca, wk, lwk8, wk_real, lwk_real, icntl, info)
subroutine smumps_simscaleabs(irn_loc, jcn_loc, a_loc, nz_loc, m, n, numprocs, myid, comm, rpartvec, cpartvec, rsndrcvsz, csndrcvsz, registre, iwrk, iwrksz, intsz, resz, op, rowsca, colsca, wrkrc, iszwrkrc, sym, nb1, nb2, nb3, eps, onenormerr, infnormerr)
subroutine static(v, vr, a, ar, ms, in, igrnod, weight_md, wfext)
Definition static.F:33
subroutine smumps_max_mem(keep, keep8, myid, n, nelt, na, lna, nnz8, na_elt8, nslaves, memory_mbytes, eff, ooc_strat, blr_strat, perlu_on, memory_bytes, blr_case, sum_of_peaks, mem_eff_allocated, under_l0_omp, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition stools.F:638
subroutine smumps_mem_allowed_set_k75(maxs, myid, under_l0_omp, n, nelt, na, lna, nslaves, blr_strat, ooc_strat, keep, keep8, iflag, ierror, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition stools.F:1432
subroutine smumps_set_blrstrat_and_maxs_k8(maxs_base8, maxs_base_relaxed8, blr_strat, keep, keep8)
Definition stools.F:1165
subroutine smumps_l0_compute_peak_allowed(myid, n, nelt, na, lna, nslaves, blr_strat, ooc_strat, keep, keep8, iflag, ierror, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition stools.F:1479
subroutine smumps_mem_allowed_set_maxs(maxs, blr_strat, ooc_strat, maxs_estim_relaxed8, keep, keep8, myid, n, nelt, na, lna, nslaves, icntl38, icntl39, iflag, ierror, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition stools.F:1250
subroutine smumps_init_root_fac(n, root, fils, iroot, keep, info)
subroutine mumps_secfin(t)
subroutine mumps_seti8toi4(i8, i)
integer function mumps_get_pool_length(max_active_nodes, keep, keep8)
subroutine mumps_set_ierror(size8, ierror)
subroutine mumps_reducei8(in, out, mpi_op, root, comm)
subroutine mumps_mem_centralize(myid, comm, info, infog, irank)
subroutine mumps_secdeb(t)
subroutine mumps_npiv_critical_path(n, nsteps, step, frere, fils, na, lna, ne, maxnpivtree)