OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
dfac_driver.F File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine dmumps_fac_driver (id)
subroutine dmumps_print_allocated_mem (prok, prokg, print_maxavg, mp, mpg, info16, infog18, infog19, nslaves, irank, keep)
subroutine dmumps_avgmax_stat8 (prokg, mpg, val, nslaves, print_maxavg, comm, msg)
subroutine dmumps_extract_schur_redrhs (id)

Function/Subroutine Documentation

◆ dmumps_avgmax_stat8()

subroutine dmumps_avgmax_stat8 ( logical, intent(in) prokg,
integer, intent(in) mpg,
integer(8), intent(in) val,
integer, intent(in) nslaves,
logical, intent(in) print_maxavg,
integer, intent(in) comm,
character*48 msg )

Definition at line 3882 of file dfac_driver.F.

3884 IMPLICIT NONE
3885 include 'mpif.h'
3886 LOGICAL, intent(in) :: PROKG
3887 INTEGER, intent(in) :: MPG
3888 INTEGER(8), intent(in) :: VAL
3889 INTEGER, intent(in) :: NSLAVES
3890 LOGICAL, intent(in) :: PRINT_MAXAVG
3891 INTEGER, intent(in) :: COMM
3892 CHARACTER*48 MSG
3893C Local
3894 INTEGER(8) MAX_VAL
3895 INTEGER IERR, MASTER
3896 DOUBLE PRECISION LOC_VAL, AVG_VAL
3897 parameter(master=0)
3898C
3899 CALL mumps_reducei8( val, max_val, mpi_max, master, comm)
3900 loc_val = dble(val)/dble(nslaves)
3901 CALL mpi_reduce( loc_val, avg_val, 1, mpi_double_precision,
3902 & mpi_sum, master, comm, ierr )
3903 IF (prokg) THEN
3904 IF (print_maxavg) THEN
3905 WRITE(mpg,100) " Average", msg, int(avg_val,8)
3906 ELSE
3907 WRITE(mpg,110) msg, max_val
3908 ENDIF
3909 ENDIF
3910 RETURN
3911 100 FORMAT(a8,a48,i18)
3912 110 FORMAT(a48,i18)
subroutine mpi_reduce(sendbuf, recvbuf, cnt, datatype, op, root, comm, ierr)
Definition mpi.f:120
subroutine mumps_reducei8(in, out, mpi_op, root, comm)

◆ dmumps_extract_schur_redrhs()

subroutine dmumps_extract_schur_redrhs ( type(dmumps_struc) id)

Definition at line 3915 of file dfac_driver.F.

3917 IMPLICIT NONE
3918C
3919C Purpose
3920C =======
3921C
3922C Extract the Schur and possibly also the reduced right-hand side
3923C (if Fwd in facto) from the processor working on Schur and copy
3924C it into the user datastructures id%SCHUR and id%REDRHS on the host.
3925C This routine assumes that the integer list of the Schur has not
3926C been permuted and still corresponds to LISTVAR_SCHUR.
3927C
3928C If the Schur is centralized, the master of the Schur holds the
3929C Schur and possibly also the reduced right-hand side.
3930C If the Schur is distribued (already built in user's datastructure),
3931C then the master of the Schur may hold the reduced right-hand side,
3932C in which case it is available in root%RHS_CNTR_MASTER_ROOT.
3933C
3934 TYPE(DMUMPS_STRUC) :: id
3935C
3936C Local variables
3937C ===============
3938C
3939 include 'mpif.h'
3940 include 'mumps_tags.h'
3941 include 'mumps_headers.h'
3942 INTEGER :: STATUS(MPI_STATUS_SIZE)
3943 INTEGER :: IERR
3944 INTEGER, PARAMETER :: MASTER = 0
3945 INTEGER :: ID_SCHUR, SIZE_SCHUR, LD_SCHUR, IB, BL4
3946 INTEGER(4) :: I4 ! 32-bit even in 64-bit version
3947 INTEGER :: ROW_LENGTH, I
3948 INTEGER(8) :: SURFSCHUR8, BL8, SHIFT8
3949 INTEGER(8) :: ISCHUR_SRC, ISCHUR_DEST, ISCHUR_SYM, ISCHUR_UNS
3950C
3951C External functions
3952C ==================
3953C
3954 INTEGER MUMPS_PROCNODE
3955 EXTERNAL mumps_procnode
3956C Quick return in case factorization did not terminate correctly
3957 IF (id%INFO(1) .LT. 0) RETURN
3958C Quick return if Schur option off
3959 IF (id%KEEP(60) .EQ. 0) RETURN
3960C Get Schur id
3961 id_schur =mumps_procnode(
3962 & id%PROCNODE_STEPS(id%STEP(max(id%KEEP(20),id%KEEP(38)))),
3963 & id%KEEP(199))
3964 IF ( id%KEEP( 46 ) .NE. 1 ) THEN
3965 id_schur = id_schur + 1
3966 END IF
3967C Get size of Schur
3968 IF (id%MYID.EQ.id_schur) THEN
3969 IF (id%KEEP(60).EQ.1) THEN
3970C Sequential Schur
3971 ld_schur =
3972 & id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))+2+id%KEEP(ixsz))
3973 size_schur = ld_schur - id%KEEP(253)
3974 ELSE
3975C Parallel Schur
3976 ld_schur = -999999 ! not used
3977 size_schur = id%root%TOT_ROOT_SIZE
3978 ENDIF
3979 ELSE IF (id%MYID .EQ. master) THEN
3980 size_schur = id%KEEP(116)
3981 ld_schur = -44444 ! Not used
3982 ELSE
3983C Proc is not concerned with Schur, return
3984 RETURN
3985 ENDIF
3986 surfschur8 = int(size_schur,8)*int(size_schur,8)
3987C =================================
3988C Case of parallel Schur: if REDRHS
3989C was requested, obtain it directly
3990C from id%root%RHS_CNTR_MASTER_ROOT
3991C =================================
3992 IF (id%KEEP(60) .GT. 1) THEN
3993 IF (id%KEEP(221).EQ.1 .AND. id%KEEP(252).GT.0) THEN
3994 DO i = 1, id%KEEP(253)
3995 IF (id_schur.EQ.master) THEN ! Necessarily = id%MYID
3996 CALL dcopy(size_schur,
3997 & id%root%RHS_CNTR_MASTER_ROOT((i-1)*size_schur+1), 1,
3998 & id%REDRHS((i-1)*id%LREDRHS+1), 1)
3999 ELSE
4000 IF (id%MYID.EQ.id_schur) THEN
4001C Send
4002 CALL mpi_send(
4003 & id%root%RHS_CNTR_MASTER_ROOT((i-1)*size_schur+1),
4004 & size_schur,
4005 & mpi_double_precision,
4006 & master, tag_schur,
4007 & id%COMM, ierr )
4008 ELSE ! MYID.EQ.MASTER
4009C Receive
4010 CALL mpi_recv( id%REDRHS((i-1)*id%LREDRHS+1),
4011 & size_schur,
4012 & mpi_double_precision, id_schur, tag_schur,
4013 & id%COMM, status, ierr )
4014 ENDIF
4015 ENDIF
4016 ENDDO
4017C ------------------------------
4018C In case of parallel Schur, we
4019C free root%RHS_CNTR_MASTER_ROOT
4020C ------------------------------
4021 IF (id%MYID.EQ.id_schur) THEN
4022 DEALLOCATE(id%root%RHS_CNTR_MASTER_ROOT)
4023 NULLIFY (id%root%RHS_CNTR_MASTER_ROOT)
4024 ENDIF
4025 ENDIF
4026C return because this is all we need to do
4027C in case of parallel Schur complement
4028 RETURN
4029 ENDIF
4030C ============================
4031C Centralized Schur complement
4032C ============================
4033C PTRAST has been freed at the moment of calling this
4034C routine. Schur is available through
4035C PTRFAC(IW( PTLUST_S( STEP(KEEP(20)) ) + 4 +KEEP(IXSZ) ))
4036 IF (id%KEEP(252).EQ.0) THEN
4037C CASE 1 (ORIGINAL CODE):
4038C Schur is contiguous on ID_SCHUR
4039 IF ( id_schur .EQ. master ) THEN ! Necessarily equals id%MYID
4040C ---------------------
4041C Copy Schur complement
4042C ---------------------
4043 CALL dmumps_copyi8size( surfschur8,
4044 & id%S(id%PTRFAC(id%STEP(id%KEEP(20)))),
4045 & id%SCHUR(1) )
4046 ELSE
4047C -----------------------------------------
4048C The processor responsible of the Schur
4049C complement sends it to the host processor
4050C Use blocks to avoid too large messages.
4051C -----------------------------------------
4052 bl8=int(huge(i4)/id%KEEP(35)/10,8)
4053 DO ib=1, int((surfschur8+bl8-1_8) / bl8)
4054 shift8 = int(ib-1,8) * bl8 ! Where to send
4055 bl4 = int(min(bl8,surfschur8-shift8)) ! Size of block
4056 IF ( id%MYID .eq. id_schur ) THEN
4057C Send Schur complement
4058 CALL mpi_send( id%S( shift8 +
4059 & id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))
4060 & +4+id%KEEP(ixsz)))),
4061 & bl4,
4062 & mpi_double_precision,
4063 & master, tag_schur,
4064 & id%COMM, ierr )
4065 ELSE IF ( id%MYID .eq. master ) THEN
4066C Receive Schur complement
4067 CALL mpi_recv( id%SCHUR(1_8 + shift8),
4068 & bl4,
4069 & mpi_double_precision, id_schur, tag_schur,
4070 & id%COMM, status, ierr )
4071 END IF
4072 ENDDO
4073 END IF
4074 ELSE
4075C CASE 2 (Fwd in facto): Schur is not contiguous on ID_SCHUR,
4076C process it row by row.
4077C
4078C 2.1: We first centralize Schur complement into id%SCHUR
4079 ischur_src = id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))
4080 & +4+id%KEEP(ixsz)))
4081 ischur_dest= 1_8
4082 DO i=1, size_schur
4083 row_length = size_schur
4084 IF (id_schur.EQ.master) THEN ! Necessarily = id%MYID
4085 CALL dcopy(row_length, id%S(ischur_src), 1,
4086 & id%SCHUR(ischur_dest),1)
4087 ELSE
4088 IF (id%MYID.EQ.id_schur) THEN
4089C Send
4090 CALL mpi_send( id%S(ischur_src), row_length,
4091 & mpi_double_precision,
4092 & master, tag_schur,
4093 & id%COMM, ierr )
4094 ELSE
4095C Recv
4096 CALL mpi_recv( id%SCHUR(ischur_dest),
4097 & row_length,
4098 & mpi_double_precision, id_schur, tag_schur,
4099 & id%COMM, status, ierr )
4100 ENDIF
4101 ENDIF
4102 ischur_src = ischur_src+int(ld_schur,8)
4103 ischur_dest= ischur_dest+int(size_schur,8)
4104 ENDDO
4105C 2.2: Get REDRHS on host
4106C 2.2.1: Symmetric => REDRHS is available in last KEEP(253)
4107C rows of Schur structure on ID_SCHUR
4108C 2.2.2: Unsymmetric => REDRHS corresponds to last KEEP(253)
4109C columns. However it must be transposed.
4110 IF (id%KEEP(221).EQ.1) THEN ! Implies Fwd in facto
4111 ischur_sym = id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))
4112 & +4+id%KEEP(ixsz))) + int(size_schur,8) *
4113 & int(ld_schur,8)
4114 ischur_uns =
4115 & id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))
4116 & +4+id%KEEP(ixsz))) + int(size_schur,8)
4117 ischur_dest = 1_8
4118 DO i = 1, id%KEEP(253)
4119 IF (id_schur .EQ. master) THEN ! necessarily = id%MYID
4120 IF (id%KEEP(50) .EQ. 0) THEN
4121 CALL dcopy(size_schur, id%S(ischur_uns), ld_schur,
4122 & id%REDRHS(ischur_dest), 1)
4123 ELSE
4124 CALL dcopy(size_schur, id%S(ischur_sym), 1,
4125 & id%REDRHS(ischur_dest), 1)
4126 ENDIF
4127 ELSE
4128 IF (id%MYID .NE. master) THEN
4129 IF (id%KEEP(50) .EQ. 0) THEN
4130C Use id%S(ISCHUR_SYM) as temporary contig. workspace
4131C of size SIZE_SCHUR.
4132 CALL dcopy(size_schur, id%S(ischur_uns), ld_schur,
4133 & id%S(ischur_sym), 1)
4134 ENDIF
4135 CALL mpi_send(id%S(ischur_sym), size_schur,
4136 & mpi_double_precision, master, tag_schur,
4137 & id%COMM, ierr )
4138 ELSE
4139 CALL mpi_recv(id%REDRHS(ischur_dest),
4140 & size_schur, mpi_double_precision, id_schur, tag_schur,
4141 & id%COMM, status, ierr )
4142 ENDIF
4143 ENDIF
4144 IF (id%KEEP(50).EQ.0) THEN
4145 ischur_uns = ischur_uns + int(ld_schur,8)
4146 ELSE
4147 ischur_sym = ischur_sym + int(ld_schur,8)
4148 ENDIF
4149 ischur_dest = ischur_dest + int(id%LREDRHS,8)
4150 ENDDO
4151 ENDIF
4152 ENDIF
4153 RETURN
subroutine dmumps_copyi8size(n8, src, dest)
Definition dtools.F:1823
subroutine dcopy(n, dx, incx, dy, incy)
DCOPY
Definition dcopy.f:82
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine mpi_recv(buf, cnt, datatype, source, tag, comm, status, ierr)
Definition mpi.f:461
subroutine mpi_send(buf, cnt, datatype, dest, tag, comm, ierr)
Definition mpi.f:480
initmumps id
integer function mumps_procnode(procinfo_inode, k199)

◆ dmumps_fac_driver()

subroutine dmumps_fac_driver ( type(dmumps_struc), target id)

Definition at line 14 of file dfac_driver.F.

15 USE dmumps_buf
16 USE dmumps_load
17 USE dmumps_ooc
26#if ! defined(NO_FDM_DESCBAND)
28#endif
29#if ! defined(NO_FDM_MAPROW)
31#endif
32!$ USE OMP_LIB
33C Derived datatype to pass pointers with implicit interfaces
35 IMPLICIT NONE
36C
37C Purpose
38C =======
39C
40C Performs scaling, sorting in arrowhead, then
41C distributes the matrix, and perform
42C factorization.
43C
44C
45 INTERFACE
46 SUBROUTINE dmumps_anorminf(id, ANORMINF, LSCAL, EFF_SIZE_SCHUR)
48 TYPE (DMUMPS_STRUC), TARGET :: id
49 DOUBLE PRECISION, INTENT(OUT) :: ANORMINF
50 LOGICAL, INTENT(IN) :: LSCAL
51 INTEGER, INTENT(IN) :: EFF_SIZE_SCHUR
52 END SUBROUTINE dmumps_anorminf
53 SUBROUTINE dmumps_free_id_data_modules(id_FDM_F_ENCODING,
54 & id_BLRARRAY_ENCODING, KEEP8, K34)
55# if defined(MUMPS_F2003)
56 CHARACTER, DIMENSION(:), POINTER, intent(inout) ::
57 & id_blrarray_encoding
58 CHARACTER, DIMENSION(:), POINTER, intent(inout) ::
59 & id_fdm_f_encoding
60# else
61 CHARACTER, DIMENSION(:), POINTER :: id_BLRARRAY_ENCODING
62 CHARACTER, DIMENSION(:), POINTER :: id_FDM_F_ENCODING
63# endif
64 INTEGER(8), intent(inout) :: KEEP8(150)
65 INTEGER, intent(in) :: K34
66 END SUBROUTINE dmumps_free_id_data_modules
67 END INTERFACE
68C
69C Parameters
70C ==========
71C
72 TYPE(DMUMPS_STRUC), TARGET :: id
73C
74C MPI
75C ===
76C
77 include 'mpif.h'
78 include 'mumps_tags.h'
79 INTEGER :: STATUS(MPI_STATUS_SIZE)
80 INTEGER :: IERR
81 INTEGER, PARAMETER :: MASTER = 0
82C
83C Local variables
84C ===============
85C
86 include 'mumps_headers.h'
87 INTEGER(8) :: NSEND8, NSEND_TOT8
88 INTEGER(8) :: NLOCAL8, NLOCAL_TOT8
89 INTEGER(4) :: I4
90 INTEGER :: LDPTRAR, NELT_arg, NBRECORDS
91 INTEGER :: ITMP, JTMP
92 INTEGER :: KEEP464COPY, KEEP465COPY
93 DOUBLE PRECISION :: RATIOK465
94 INTEGER(8) :: KEEP826_SAVE
95 INTEGER(8) :: K67, K68, K70, K74, K75
96 INTEGER(8) ITMP8
97 INTEGER MUMPS_PROCNODE
98 EXTERNAL mumps_procnode
99 INTEGER MP, LP, MPG, allocok
100 LOGICAL PROK, PROKG, LSCAL, LPOK, COMPUTE_ANORMINF
101C Reception buffer
102 INTEGER :: DMUMPS_LBUFR, DMUMPS_LBUFR_BYTES
103 INTEGER(8) :: DMUMPS_LBUFR_BYTES8 ! for intermediate computation
104 INTEGER, ALLOCATABLE, DIMENSION(:) :: BUFR
105C Size of send buffers (in bytes)
106 INTEGER :: DMUMPS_LBUF, DMUMPS_LBUF_INT
107 INTEGER(8) :: DMUMPS_LBUF8 ! for intermediate computation
108C
109 INTEGER PTRIST, PTRWB, MAXELT_SIZE,
110 & itloc, ipool, k28, lpool
111 INTEGER IRANK, ID_ROOT
112 INTEGER KKKK
113 INTEGER(8) :: NZ_locMAX8
114 INTEGER(8) MEMORY_MD_ARG
115 INTEGER(8) MAXS_BASE8, MAXS_BASE_RELAXED8
116 DOUBLE PRECISION CNTL4, AVG_FLOPS
117 INTEGER MIN_PERLU, MAXIS_ESTIM
118 INTEGER SUM_INFO22_THIS_NODE, MAX_SUM_INFO22_THIS_NODE
119C
120 TYPE (S_IS_POINTERS_T) :: S_IS_POINTERS
121 INTEGER MAXIS
122 INTEGER(8) :: MAXS
123C For S argument to arrowhead routines:
124 INTEGER(8) :: MAXS_ARG
125 DOUBLE PRECISION, TARGET :: S_DUMMY_ARG(1)
126 DOUBLE PRECISION, POINTER, DIMENSION(:) :: S_PTR_ARG
127 INTEGER NB_THREADS, NOMP
128 DOUBLE PRECISION TIMEAVG, TIMEMAX,
129 & flopavg, flopmax
130 DOUBLE PRECISION TMPTIME, TMPFLOP
131 INTEGER NPIV_CRITICAL_PATH, EFF_SIZE_SCHUR
132 DOUBLE PRECISION TIME, TIMEET
133 DOUBLE PRECISION ZERO, ONE, MONE
134 parameter( zero = 0.0d0, one = 1.0d0, mone = -1.0d0)
135 DOUBLE PRECISION CZERO
136 parameter( czero = 0.0d0 )
137 INTEGER PERLU, TOTAL_MBYTES, K231, K232, K233, BLR_STRAT
138 INTEGER, PARAMETER :: IDUMMY = -9999
139 LOGICAL, PARAMETER :: BDUMMY =.false.
140 INTEGER, PARAMETER :: PANEL_TABSIZE = 20
141 INTEGER COLOUR, COMM_FOR_SCALING ! For Simultaneous scaling
142 INTEGER LIWK, LWK_REAL
143 INTEGER(8) :: LWK
144C I_AM_SLAVE: used to determine if proc has the role of a slave
145C WK_USER_PROVIDED is set to true when WK_USER is provided by user
146 LOGICAL I_AM_SLAVE, PERLU_ON, WK_USER_PROVIDED, EARLYT3ROOTINS
147 LOGICAL PRINT_MAXAVG, PRINT_NODEINFO
148 DOUBLE PRECISION :: ANORMINF, SEUIL, SEUIL_LDLT_NIV2, Thresh_Seuil
149 DOUBLE PRECISION :: CNTL1, CNTL3, CNTL5, CNTL6, EPS
150 INTEGER N, LPN_LIST,POSBUF
151 INTEGER, DIMENSION (:), ALLOCATABLE :: ITMP2
152 INTEGER I,K
153 INTEGER(8) :: ITEMP8
154 INTEGER :: PARPIV_T1
155 INTEGER FRONTWISE
156C temporary variables for collecting stats from all processors
157 DOUBLE PRECISION :: TMP_MRY_LU_FR
158 DOUBLE PRECISION :: TMP_MRY_LU_LRGAIN
159 DOUBLE PRECISION :: TMP_MRY_CB_FR
160 DOUBLE PRECISION :: TMP_MRY_CB_LRGAIN
161 DOUBLE PRECISION :: TMP_FLOP_LRGAIN
162 DOUBLE PRECISION :: TMP_FLOP_TRSM
163 DOUBLE PRECISION :: TMP_FLOP_PANEL
164 DOUBLE PRECISION :: TMP_FLOP_FRFRONTS
165 DOUBLE PRECISION :: TMP_FLOP_TRSM_FR
166 DOUBLE PRECISION :: TMP_FLOP_TRSM_LR
167 DOUBLE PRECISION :: TMP_FLOP_UPDATE_FR
168 DOUBLE PRECISION :: TMP_FLOP_UPDATE_LR
169 DOUBLE PRECISION :: TMP_FLOP_UPDATE_LRLR3
170 DOUBLE PRECISION :: TMP_FLOP_COMPRESS
171 DOUBLE PRECISION :: TMP_FLOP_DECOMPRESS
172 DOUBLE PRECISION :: TMP_FLOP_MIDBLK_COMPRESS
173 DOUBLE PRECISION :: TMP_FLOP_FRSWAP_COMPRESS
174 DOUBLE PRECISION :: TMP_FLOP_ACCUM_COMPRESS
175 DOUBLE PRECISION :: TMP_FLOP_CB_COMPRESS
176 DOUBLE PRECISION :: TMP_FLOP_CB_DECOMPRESS
177 DOUBLE PRECISION :: TMP_FLOP_FACTO_FR
178 INTEGER :: TMP_CNT_NODES
179 DOUBLE PRECISION :: TMP_TIME_UPDATE
180 DOUBLE PRECISION :: TMP_TIME_UPDATE_LRLR1
181 DOUBLE PRECISION :: TMP_TIME_UPDATE_LRLR2
182 DOUBLE PRECISION :: TMP_TIME_UPDATE_LRLR3
183 DOUBLE PRECISION :: TMP_TIME_UPDATE_FRLR
184 DOUBLE PRECISION :: TMP_TIME_UPDATE_FRFR
185 DOUBLE PRECISION :: TMP_TIME_COMPRESS
186 DOUBLE PRECISION :: TMP_TIME_MIDBLK_COMPRESS
187 DOUBLE PRECISION :: TMP_TIME_FRSWAP_COMPRESS
188 DOUBLE PRECISION :: TMP_TIME_CB_COMPRESS
189 DOUBLE PRECISION :: TMP_TIME_PANEL
190 DOUBLE PRECISION :: TMP_TIME_FAC_I
191 DOUBLE PRECISION :: TMP_TIME_FAC_MQ
192 DOUBLE PRECISION :: TMP_TIME_FAC_SQ
193 DOUBLE PRECISION :: TMP_TIME_LRTRSM
194 DOUBLE PRECISION :: TMP_TIME_FRTRSM
195 DOUBLE PRECISION :: TMP_TIME_FRFRONTS
196 DOUBLE PRECISION :: TMP_TIME_LR_MODULE
197 DOUBLE PRECISION :: TMP_TIME_DIAGCOPY
198 DOUBLE PRECISION :: TMP_TIME_DECOMP
199 DOUBLE PRECISION :: TMP_TIME_DECOMP_UCFS
200 DOUBLE PRECISION :: TMP_TIME_DECOMP_ASM1
201 DOUBLE PRECISION :: TMP_TIME_DECOMP_LOCASM2
202 DOUBLE PRECISION :: TMP_TIME_DECOMP_MAPLIG1
203 DOUBLE PRECISION :: TMP_TIME_DECOMP_ASMS2S
204 DOUBLE PRECISION :: TMP_TIME_DECOMP_ASMS2M
205C
206C Workspace.
207C
208 INTEGER, DIMENSION(:), ALLOCATABLE :: IWK
209 DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: WK
210 DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: WK_REAL
211 INTEGER(8), DIMENSION(:), ALLOCATABLE :: IWK8
212 INTEGER, DIMENSION(:), ALLOCATABLE :: BURP
213 INTEGER, DIMENSION(:), ALLOCATABLE :: BUCP
214 INTEGER, DIMENSION(:), ALLOCATABLE :: BURS
215 INTEGER, DIMENSION(:), ALLOCATABLE :: BUCS
216 INTEGER BUREGISTRE(12)
217 INTEGER BUINTSZ, BURESZ, BUJOB
218 INTEGER BUMAXMN, M, SCMYID, SCNPROCS
219 DOUBLE PRECISION SCONEERR, SCINFERR
220C
221C Parameters arising from the structure
222C =====================================
223C
224 INTEGER, POINTER :: JOB
225* Control parameters: see description in DMUMPSID
226 DOUBLE PRECISION,DIMENSION(:),POINTER::RINFO, RINFOG
227 DOUBLE PRECISION,DIMENSION(:),POINTER:: CNTL
228 INTEGER,DIMENSION(:),POINTER:: INFOG, KEEP
229 INTEGER, DIMENSION(:), POINTER :: MYIRN_loc, MYJCN_loc
230 DOUBLE PRECISION, DIMENSION(:), POINTER :: MYA_loc
231 INTEGER, TARGET :: DUMMYIRN_loc(1), DUMMYJCN_loc(1)
232 DOUBLE PRECISION, TARGET :: DUMMYA_loc(1)
233 INTEGER,DIMENSION(:),POINTER::ICNTL
234 EXTERNAL mumps_get_pool_length
235 INTEGER MUMPS_GET_POOL_LENGTH
236 INTEGER(8) :: TOTAL_BYTES
237 INTEGER(8) :: I8TMP, LWK_USER_SUM8
238C
239C External references
240C ===================
241 INTEGER numroc
242 EXTERNAL numroc
243 INTEGER:: NWORKING
244 LOGICAL:: MEM_EFF_ALLOCATED
245 INTEGER :: TOTAL_MBYTES_UNDER_L0
246 INTEGER(8):: TOTAL_BYTES_UNDER_L0
247C Fwd in facto:
248 DOUBLE PRECISION, DIMENSION(:), POINTER :: RHS_MUMPS
249 LOGICAL :: RHS_MUMPS_ALLOCATED
250 INTEGER :: NB_ACTIVE_FRONTS_ESTIM
251 INTEGER :: NB_FRONTS_F_ESTIM
252C
253C
254 job=>id%JOB
255 rinfo=>id%RINFO
256 rinfog=>id%RINFOG
257 cntl=>id%CNTL
258 infog=>id%INFOG
259 keep=>id%KEEP
260 icntl=>id%ICNTL
261 IF (id%KEEP8(29) .NE. 0) THEN
262 myirn_loc=>id%IRN_loc
263 myjcn_loc=>id%JCN_loc
264 mya_loc=>id%A_loc
265 ELSE
266 myirn_loc=>dummyirn_loc
267 myjcn_loc=>dummyjcn_loc
268 mya_loc=>dummya_loc
269 ENDIF
270 n = id%N
271 eps = epsilon( zero )
272C TIMINGS: reset to 0
273 id%DKEEP(92)=0.0d0
274 id%DKEEP(93)=0.0d0
275 id%DKEEP(94)=0.0d0
276 id%DKEEP(97)=0.0d0
277 id%DKEEP(98)=0.0d0
278 id%DKEEP(56)=0.0d0
279C Count of MPI messages: reset to 0
280 id%KEEP(266)=0
281 id%KEEP(267)=0
282C MIN/MAX pivots reset to 0
283 id%DKEEP(19)=huge(0.0d0)
284 id%DKEEP(20)=huge(0.0d0)
285 id%DKEEP(21)=0.0d0
286C Number of symmetric swaps
287 id%KEEP8(80)=0_8
288C Largest increase of internal panel size
289 id%KEEP(425) =0
290C
291 print_maxavg = .NOT.(id%NSLAVES.EQ.1 .AND. keep(46).EQ.1)
292C Print per node informtation only in case ther are several
293C compute nodes (id%KEEP(412): #MPI procs on comupte node)
294 print_nodeinfo = print_maxavg .AND. id%NPROCS .NE. id%KEEP(412)
295C
296C BEGIN CASE OF ALLOCATED DATA FROM PREVIOUS CALLS
297C Data from factorization is now freed asap
298C id%S, id%IS
299 IF (id%KEEP8(24).EQ.0_8) THEN
300C -- deallocate only when not provided/allocated by the user
301 IF (associated(id%S)) THEN
302 DEALLOCATE(id%S)
303 id%KEEP8(23)=0_8
304 NULLIFY(id%S)
305 ENDIF
306 ENDIF
307 IF (associated(id%IS)) THEN
308 DEALLOCATE(id%IS)
309 NULLIFY(id%IS)
310 ENDIF
311C Free BLR factors, if any
312 CALL dmumps_free_id_data_modules(id%FDM_F_ENCODING,
313 & id%BLRARRAY_ENCODING, id%KEEP8(1), id%KEEP(34))
314 IF (associated(id%root%RG2L_ROW))THEN
315 DEALLOCATE(id%root%RG2L_ROW)
316 NULLIFY(id%root%RG2L_ROW)
317 ENDIF
318 IF (associated(id%root%RG2L_COL))THEN
319 DEALLOCATE(id%root%RG2L_COL)
320 NULLIFY(id%root%RG2L_COL)
321 ENDIF
322 IF (associated( id%PTLUST_S )) THEN
323 DEALLOCATE(id%PTLUST_S)
324 NULLIFY(id%PTLUST_S)
325 ENDIF
326 IF (associated(id%PTRFAC)) THEN
327 DEALLOCATE(id%PTRFAC)
328 NULLIFY(id%PTRFAC)
329 END IF
330 IF (associated(id%RHSCOMP)) THEN
331 DEALLOCATE(id%RHSCOMP)
332 NULLIFY(id%RHSCOMP)
333 id%KEEP8(25)=0_8
334 ENDIF
335 IF (associated(id%POSINRHSCOMP_ROW)) THEN
336 DEALLOCATE(id%POSINRHSCOMP_ROW)
337 NULLIFY(id%POSINRHSCOMP_ROW)
338 ENDIF
339 IF (id%POSINRHSCOMP_COL_ALLOC) THEN
340 DEALLOCATE(id%POSINRHSCOMP_COL)
341 NULLIFY(id%POSINRHSCOMP_COL)
342 id%POSINRHSCOMP_COL_ALLOC = .false.
343 ENDIF
344C
345C END CASE OF ALLOCATED DATA FROM PREVIOUS CALLS
346C
347C Related to forward in facto functionality (referred to as "Fwd in facto")
348 NULLIFY(rhs_mumps)
349 rhs_mumps_allocated = .false.
350C -----------------------------------------------------------------------
351C Set WK_USER_PROVIDED to true when workspace WK_USER is provided by user
352C We can accept WK_USER to be provided on only one proc and
353C different values of WK_USER per processor
354C
355 IF (id%KEEP8(24).GT.0_8) THEN
356C We nullify S so that later when we test
357C if (associated(S) we can free space and reallocate it).
358 NULLIFY(id%S)
359 ENDIF
360C
361C -- KEEP8(24) can now then be reset safely
362 wk_user_provided = (id%LWK_USER.NE.0)
363 IF (wk_user_provided) THEN
364 IF (id%LWK_USER.GT.0) THEN
365 id%KEEP8(24) = int(id%LWK_USER,8)
366 ELSE
367 id%KEEP8(24) = -int(id%LWK_USER,8)* 1000000_8
368 ENDIF
369 ELSE
370 id%KEEP8(24) = 0_8
371 ENDIF
372C Compute sum of LWK_USER provided by user
373 lwk_user_sum8 = 0_8
374 CALL mpi_reduce ( id%KEEP8(24), lwk_user_sum8, 1, mpi_integer8,
375 & mpi_sum, master, id%COMM, ierr )
376C
377C KEEP8(26) might be modified
378C (element entry format)
379C but need be restore for
380C future factorisation
381C with different scaling option
382C
383 keep826_save = id%KEEP8(26)
384C In case of loop on factorization with
385C different scaling options, initialize
386C DKEEP(4:5) to 0.
387 id%DKEEP(4)=-1.0d0
388 id%DKEEP(5)=-1.0d0
389C Mapping information used during solve. In case of several facto+solve
390C it has to be recomputed. In case of several solves with the same
391C facto, it is not recomputed.
392 IF (associated(id%IPTR_WORKING)) THEN
393 DEALLOCATE(id%IPTR_WORKING)
394 NULLIFY(id%IPTR_WORKING)
395 END IF
396 IF (associated(id%WORKING)) THEN
397 DEALLOCATE(id%WORKING)
398 NULLIFY(id%WORKING)
399 END IF
400C
401C Units for printing
402C MP: diagnostics
403C LP: errors
404C
405 lp = icntl( 1 )
406 mp = icntl( 2 )
407 mpg = icntl( 3 )
408 lpok = ((lp.GT.0).AND.(id%ICNTL(4).GE.1))
409 prok = ((mp.GT.0).AND.(id%ICNTL(4).GE.2))
410 prokg = ( mpg .GT. 0 .and. id%MYID .eq. master )
411 prokg = (prokg.AND.(id%ICNTL(4).GE.2))
412 IF ( prok ) WRITE( mp, 130 )
413 IF ( prokg ) WRITE( mpg, 130 )
414C -------------------------------------
415C Depending on the type of parallelism,
416C the master can now (soon) potentially
417C have the role of a slave
418C -------------------------------------
419 i_am_slave = ( id%MYID .ne. master .OR.
420 & ( id%MYID .eq. master .AND.
421 & keep(46) .eq. 1 ) )
422C
423C Prepare work for out-of-core
424C
425 IF (id%MYID .EQ. master .AND. keep(201) .NE. -1) THEN
426C Note that if KEEP(201)=-1, then we have decided
427C at analysis phase that factors will not be stored
428C (neither in memory nor on disk). In that case,
429C ICNTL(22) is ignored.
430C -- ICNTL(22) must be set before facto phase
431C (=1 OOC on; =0 OOC off)
432C and cannot be changed for subsequent solve phases.
433 keep(201)=id%ICNTL(22)
434 IF (keep(201) .NE. 0) THEN
435# if defined(OLD_OOC_NOPANEL)
436 keep(201)=2
437# else
438 keep(201)=1
439# endif
440 ENDIF
441 ENDIF
442C ----------------------
443C Broadcast KEEP options
444C defined for facto:
445C ----------------------
446 CALL mpi_bcast( keep(12), 1, mpi_integer,
447 & master, id%COMM, ierr )
448 CALL mpi_bcast( keep(19), 1, mpi_integer,
449 & master, id%COMM, ierr )
450 CALL mpi_bcast( keep(21), 1, mpi_integer,
451 & master, id%COMM, ierr )
452 CALL mpi_bcast( keep(201), 1, mpi_integer,
453 & master, id%COMM, ierr )
454 CALL mpi_bcast( keep(459), 1, mpi_integer,
455 & master, id%COMM, ierr )
456 CALL mpi_bcast( keep(460), 1, mpi_integer,
457 & master, id%COMM, ierr )
458 IF ( keep(459) .GE. panel_tabsize ) THEN
459 IF ( lpok ) THEN
460 WRITE(lp,'(A,I4,A,I3)') " ** WARNING ** KEEP(459)=",keep(459),
461 & " too large, resetting to",panel_tabsize-1
462 ENDIF
463 keep(459) = panel_tabsize - 1
464 ENDIF
465 perlu = keep(12)
466 IF (id%MYID.EQ.master) THEN
467C KEEP(50) case
468C ==============
469C
470C KEEP(50) = 0 : matrix is unsymmetric
471C KEEP(50) /= 0 : matrix is symmetric
472C KEEP(50) = 1 : Ask L L^T on the root. Matrix is PSD.
473C KEEP(50) = 2 : Ask for L U on the root
474C KEEP(50) = 3 ... L D L^T ??
475C
476 cntl1 = id%CNTL(1)
477C ---------------------------------------
478C For symmetric (non general) matrices
479C set (directly) CNTL1 = 0.0
480C ---------------------------------------
481 keep(17)=0
482 IF ( keep(50) .eq. 1 ) THEN
483 IF (cntl1 .ne. zero ) THEN
484 IF ( prokg ) THEN
485 WRITE(mpg,'(A)')
486 & '** Warning : SPD solver called, resetting CNTL(1) to 0.0D0'
487 END IF
488 END IF
489 cntl1 = zero
490 END IF
491C CNTL1 threshold value must be between
492C 0.0 and 1.0 (for SYM=0) and 0.5 (for SYM=1,2)
493 IF (cntl1.GT.one) cntl1=one
494 IF (cntl1.LT.zero) cntl1=zero
495 IF (keep(50).NE.0.AND.cntl1.GT.0.5d0) THEN
496 cntl1 = 0.5d0
497 ENDIF
498 parpiv_t1 = id%KEEP(268)
499 IF (parpiv_t1.EQ.77) THEN
500 parpiv_t1 = 0
501#if defined(__ve__)
502 parpiv_t1 = -2
503#endif
504 ENDIF
505 IF (parpiv_t1.EQ.-3) THEN
506 parpiv_t1 = 0
507 ENDIF
508 IF ((parpiv_t1.LT.-3).OR.(parpiv_t1.GT.1)) THEN
509C out of range values
510 parpiv_t1 =0
511 ENDIF
512C note that KEEP(50).EQ.1 => CNTL1=0.0
513 IF (cntl1.EQ.0.0.OR.(keep(50).eq.1)) parpiv_t1 = 0
514C
515 IF (parpiv_t1.EQ.-2) THEN
516 IF (keep(19).NE.0) THEN
517C switch off PARPIV_T1 if RR activated
518C but do NOT switch off PARPIV_1 with null pivot detection
519 parpiv_t1 = 0
520 ENDIF
521 ENDIF
522 id%KEEP(269) = parpiv_t1
523 ENDIF
524 CALL mpi_bcast(cntl1, 1, mpi_double_precision,
525 & master, id%COMM, ierr)
526 CALL mpi_bcast( keep(269), 1, mpi_integer,
527 & master, id%COMM, ierr )
528 IF (id%MYID.EQ.master) THEN
529C -----------------------------------------------------
530C Decoding of ICNTL(35) for factorization: same as
531C at analysis except that we store a copy of ICNTL(35)
532C in KEEP(486) instead of KEEP(494) and need to check
533C compatibility of KEEP(486) and KEEP(494): If LR was
534C not activated during analysis, it cannot be activated
535C at factorization.
536C ------------------------------------------------------
537 id%KEEP(486) = id%ICNTL(35)
538 IF (id%KEEP(486).EQ.1) THEN
539C -- Automatic BLR option setting
540 id%KEEP(486)= 2
541 ENDIF
542 IF ( id%KEEP(486).EQ.4) id%KEEP(486)=0
543 IF ((id%KEEP(486).LT.0).OR.(id%KEEP(486).GT.4)) THEN
544C Out of range values treated as 0
545 id%KEEP(486) = 0
546 ENDIF
547 IF ((keep(486).NE.0).AND.(keep(494).EQ.0)) THEN
548C To activate BLR during factorization,
549C ICNTL(35) must have been set at analysis.
550 IF (lpok) THEN
551 WRITE(lp,'(A)')
552 & " *** Error with BLR setting "
553 WRITE(lp,'(A)') " *** BLR was not activated during ",
554 & " analysis but is requested during factorization."
555 ENDIF
556 id%INFO(1)=-54
557 id%INFO(2)=0
558 GOTO 105
559 ENDIF
560 keep464copy = id%ICNTL(38)
561 IF (keep464copy.LT.0.OR.keep464copy.GT.1000) THEN
562C Out of range values treated as 1000
563 keep464copy = 1000
564 ENDIF
565 IF (id%KEEP(461).LT.1) THEN
566 id%KEEP(461) = 10
567 ENDIF
568 keep465copy=0
569 IF (id%ICNTL(36).EQ.1.OR.id%ICNTL(36).EQ.3) THEN
570 IF (cntl1.EQ.zero .OR. keep(468).LE.1) THEN
571 keep(475) = 3
572 ELSE IF ( (keep(269).GT.0).OR. (keep(269).EQ.-2)) THEN
573 keep(475) = 2
574 ELSE IF (keep(468).EQ.2) THEN
575 keep(475) = 2
576 ELSE
577 keep(475) = 1
578 ENDIF
579 ELSE
580 keep(475) = 0
581 ENDIF
582 keep(481)=0
583 IF (id%ICNTL(36).LT.0 .OR. id%ICNTL(36).GE.2) THEN
584C Only options 1 and 2 are allowed
585 keep(475) = 0
586 ENDIF
587C K489 is set according to ICNTL(37)
588 IF (id%ICNTL(37).EQ.0.OR.id%ICNTL(37).EQ.1) THEN
589 keep(489) = id%ICNTL(37)
590 ELSE
591C Other values treated as zero
592 keep(489) = 0
593 ENDIF
594 IF (keep(79).GE.1) THEN
595C CompressCB incompatible with type4,5,6 nodes
596 keep(489)=0
597 ENDIF
598 keep(489)=0
599C id%KEEP(476) \in [1,100]
600 IF ((id%KEEP(476).GT.100).OR.(id%KEEP(476).LT.1)) THEN
601 id%KEEP(476)= 50
602 ENDIF
603C id%KEEP(477) \in [1,100]
604 IF ((id%KEEP(477).GT.100).OR.(id%KEEP(477).LT.1)) THEN
605 id%KEEP(477)= 100
606 ENDIF
607C id%KEEP(483) \in [1,100]
608 IF ((id%KEEP(483).GT.100).OR.(id%KEEP(483).LT.1)) THEN
609 id%KEEP(483)= 50
610 ENDIF
611C id%KEEP(484) \in [1,100]
612 IF ((id%KEEP(484).GT.100).OR.(id%KEEP(484).LT.1)) THEN
613 id%KEEP(484)= 50
614 ENDIF
615C id%KEEP(480)=0,2,3,4,5,6
616 IF ((id%KEEP(480).GT.6).OR.(id%KEEP(480).LT.0)
617 & .OR.(id%KEEP(480).EQ.1)) THEN
618 id%KEEP(480)=0
619 ENDIF
620C id%KEEP(473)=0 or 1
621 IF ((id%KEEP(473).NE.0).AND.(id%KEEP(473).NE.1)) THEN
622 id%KEEP(473)=0
623 ENDIF
624C id%KEEP(474)=0,1,2,3
625 IF ((id%KEEP(474).GT.3).OR.(id%KEEP(474).LT.0)) THEN
626 id%KEEP(474)=0
627 ENDIF
628C id%KEEP(479)>0
629 IF (id%KEEP(479).LE.0) THEN
630 id%KEEP(479)=1
631 ENDIF
632 IF (id%KEEP(474).NE.0.AND.id%KEEP(480).EQ.0) THEN
633 id%KEEP(474) = 0
634 ENDIF
635 IF (id%KEEP(478).NE.0.AND.id%KEEP(480).LT.4) THEN
636 id%KEEP(478) = 0
637 ENDIF
638 IF (id%KEEP(480).GE.5 .OR.
639 & (id%KEEP(480).NE.0.AND.id%KEEP(474).EQ.3)) THEN
640 IF (id%KEEP(475).LT.2) THEN
641C Reset to 3 if 5 or to 4 if 6
642 id%KEEP(480) = id%KEEP(480) - 2
643 write(*,*) ' Resetting KEEP(480) to ', id%KEEP(480)
644 ENDIF
645 ENDIF
646 105 CONTINUE
647 ENDIF ! id%MYID .EQ. MASTER
648 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
649 & id%COMM, id%MYID )
650C
651 IF (id%INFO(1).LT.0) GOTO 530
652 CALL mpi_bcast( keep(473), 14, mpi_integer,
653 & master, id%COMM, ierr )
654 IF (keep(486).NE.0) THEN
655 CALL mpi_bcast( keep(489), 1, mpi_integer,
656 & master, id%COMM, ierr )
657 CALL mpi_bcast( keep464copy, 1, mpi_integer,
658 & master, id%COMM, ierr )
659 CALL mpi_bcast( keep465copy, 1, mpi_integer,
660 & master, id%COMM, ierr )
661 ENDIF
662 IF (id%MYID.EQ.master) THEN
663 IF (keep(217).GT.2.OR.keep(217).LT.0) THEN
664 keep(217)=0
665 ENDIF
666 keep(214)=keep(217)
667 IF (keep(214).EQ.0) THEN
668 IF (keep(201).NE.0) THEN ! OOC or no factors
669 keep(214)=1
670 ELSE
671 keep(214)=2
672 ENDIF
673 IF (keep(486).EQ.2) THEN
674 keep(214)=1
675 ENDIF
676 ENDIF
677 ENDIF
678 CALL mpi_bcast( keep(214), 1, mpi_integer,
679 & master, id%COMM, ierr )
680 IF (keep(201).NE.0) THEN
681C -- Low Level I/O strategy
682 CALL mpi_bcast( keep(99), 1, mpi_integer,
683 & master, id%COMM, ierr )
684 CALL mpi_bcast( keep(205), 1, mpi_integer,
685 & master, id%COMM, ierr )
686 CALL mpi_bcast( keep(211), 1, mpi_integer,
687 & master, id%COMM, ierr )
688 ENDIF
689C Fwd in facto: explicitly forbid
690C sparse RHS and A-1 computation
691 IF (id%KEEP(252).EQ.1 .AND. id%MYID.EQ.master) THEN
692 IF (id%ICNTL(20).EQ.1) THEN ! out-of-range => 0
693C NB: in doc ICNTL(20) only accessed during solve
694C In practice, will have failed earlier if RHS not allocated.
695C Still it looks safer to keep this test.
696 id%INFO(1)=-43
697 id%INFO(2)=20
698 IF (lpok) WRITE(lp,'(A)')
699 & ' ERROR: Sparse RHS is incompatible with forward',
700 & ' performed during factorization (ICNTL(32)=1)'
701 ELSE IF (id%ICNTL(30).NE.0) THEN ! out-of-range => 1
702 id%INFO(1)=-43
703 id%INFO(2)=30
704 IF (lpok) WRITE(lp,'(A)')
705 & ' ERROR: A-1 functionality incompatible with forward',
706 & ' performed during factorization (ICNTL(32)=1)'
707 ELSE IF (id%ICNTL(9) .NE. 1) THEN
708 id%INFO(1)=-43
709 id%INFO(2)=9
710 IF (lpok) WRITE(lp,'(A)')
711 & .NE.' ERROR: Transpose system (ICNTL(9)0) not ',
712 & ' compatible with forward performed during',
713 & ' factorization (ICNTL(32)=1)'
714 ENDIF
715 ENDIF
716 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
717 & id%COMM, id%MYID )
718C
719 IF (id%INFO(1).LT.0) GOTO 530
720C
721C The memory allowed is given by ICNTL(23) in Mbytes
722C 0 means that nothing is provided.
723C Save memory available, ICNTL(23) in KEEP8(4)
724C
725 IF ( icntl(23) .GT. 0 ) THEN
726 itmp = 1
727 ELSE
728 itmp = 0
729 ENDIF
730 CALL mpi_allreduce( itmp, jtmp, 1, mpi_integer,
731 & mpi_sum, id%COMM, ierr)
732 IF ( id%MYID.EQ.master ) THEN
733C Negative values considered 0
734 itmp = max(icntl(23),0)
735 END IF
736 CALL mpi_bcast( itmp, 1, mpi_integer,
737 & master, id%COMM, ierr )
738C JTMP: nb of procs with nonzero ICNTL(23)
739C ITMP: value of ICNTL(23) on master
740 IF ( itmp .GT. 0 .AND. jtmp .EQ. 1 ) THEN
741C ICNTL(23)>0 only on master
742 ELSE
743C Local values of ICNTL(23) are used, note that
744C they could all be zeros
745 itmp = icntl(23)
746 ENDIF
747C
748 itmp8 = int(itmp, 8)
749 id%KEEP8(4) = itmp8 * 1000000_8 ! convert to nb of bytes
750C Compute \sum of memories allowed
751 CALL mpi_reduce( id%KEEP8(4), itmp8, 1, mpi_integer8,
752 & mpi_sum, master, id%COMM, ierr )
753 itmp8 = itmp8 / 1000000_8 ! Use to print \sum_{ICNTL(23)}
754 IF ( prokg ) THEN
755 nworking = id%NSLAVES
756 WRITE( mpg, 172 ) nworking, id%ICNTL(22), keep(486),
757 & keep(12),
758 & id%KEEP8(111), keep(126), keep(127), keep(28),
759 & id%KEEP8(4)/1000000_8, itmp8, lwk_user_sum8, cntl1
760 IF (keep(252).GT.0)
761 & WRITE(mpg,173) keep(253)
762 IF (keep(269).NE.0)
763 & WRITE(mpg,174) keep(269)
764 ENDIF
765 IF (keep(201).LE.0) THEN
766C In-core version or no factors
767 keep(ixsz)=xsize_ic
768 ELSE IF (keep(201).EQ.2) THEN
769C OOC version, no panels
770 keep(ixsz)=xsize_ooc_nopanel
771 ELSE IF (keep(201).EQ.1) THEN
772C Panel versions:
773 IF (keep(50).EQ.0) THEN
774 keep(ixsz)=xsize_ooc_unsym
775 ELSE
776 keep(ixsz)=xsize_ooc_sym
777 ENDIF
778 ENDIF
779 IF ( keep(486) .NE. 0 ) THEN !LR is activated
780C Stats initialization for LR
782 END IF
783C
784* **********************************
785* Begin intializations regarding the
786* computation of the determinant
787* **********************************
788 IF (id%MYID.EQ.master) keep(258)=icntl(33)
789 CALL mpi_bcast(keep(258), 1, mpi_integer,
790 & master, id%COMM, ierr)
791 IF (keep(258) .NE. 0) THEN
792 keep(259) = 0 ! Initial exponent of the local determinant
793 keep(260) = 1 ! Number of permutations
794 id%DKEEP(6) = 1.0d0 ! real part of the local determinant
795 ENDIF
796* ********************************
797* End intializations regarding the
798* computation of the determinant
799* ********************************
800C
801* **********************
802* Begin of Scaling phase
803* **********************
804C
805C SCALING MANAGEMENT
806C * Options 1, 3, 4 centralized only
807C
808C * Options 7, 8 : also works for distributed matrix
809C
810C At this point, we have the scaling arrays allocated
811C on the master. They have been allocated on the master
812C inside the main MUMPS driver.
813C
814 CALL mpi_bcast(keep(52), 1, mpi_integer,
815 & master, id%COMM, ierr)
816 lscal = ((keep(52) .GT. 0) .AND. (keep(52) .LE. 8))
817 IF (lscal) THEN
818C
819 IF ( id%MYID.EQ.master ) THEN
820 CALL mumps_secdeb(timeet)
821 ENDIF
822C -----------------------
823C Retrieve parameters for
824C simultaneous scaling
825C -----------------------
826 IF (keep(52) .EQ. 7) THEN
827C -- Cheap setting of SIMSCALING (it is the default in 4.8.4)
828 k231= keep(231)
829 k232= keep(232)
830 k233= keep(233)
831 ELSEIF (keep(52) .EQ. 8) THEN
832C -- More expensive setting of SIMSCALING (it was the default in 4.8.1,2,3)
833 k231= keep(239)
834 k232= keep(240)
835 k233= keep(241)
836 ENDIF
837 CALL mpi_bcast(id%DKEEP(3),1,mpi_double_precision,master,
838 & id%COMM,ierr)
839C
840 IF ( ((keep(52).EQ.7).OR.(keep(52).EQ.8)) .AND.
841 & keep(54).NE.0 ) THEN
842C ------------------------------
843C Scaling for distributed matrix
844C We need to allocate scaling
845C arrays on all processors, not
846C only the master.
847C ------------------------------
848 IF ( id%MYID .NE. master ) THEN
849 IF ( associated(id%COLSCA))
850 & DEALLOCATE( id%COLSCA )
851 IF ( associated(id%ROWSCA))
852 & DEALLOCATE( id%ROWSCA )
853 ALLOCATE( id%COLSCA(n), stat=ierr)
854 IF (ierr .GT.0) THEN
855 id%INFO(1)=-13
856 id%INFO(2)=n
857 ENDIF
858 ALLOCATE( id%ROWSCA(n), stat=ierr)
859 IF (ierr .GT.0) THEN
860 id%INFO(1)=-13
861 id%INFO(2)=n
862 ENDIF
863 ENDIF
864 m = n
865 bumaxmn=m
866 IF(n > bumaxmn) bumaxmn = n
867 liwk = 4*bumaxmn
868 ALLOCATE (iwk(liwk),burp(m),bucp(n),
869 & burs(2* (id%NPROCS)),bucs(2* (id%NPROCS)),
870 & stat=allocok)
871 IF (allocok > 0) THEN
872 id%INFO(1)=-13
873 id%INFO(2)=liwk+m+n+4* (id%NPROCS)
874 ENDIF
875C --- Propagate enventual error
876 CALL mumps_propinfo( icntl(1), id%INFO(1),
877 & id%COMM, id%MYID )
878 IF (id%INFO(1).LT.0) GOTO 517
879C -- estimation of memory and construction of partvecs
880 bujob = 1
881C -- LWK not used
882 lwk_real = 1
883 ALLOCATE(wk_real(lwk_real),
884 & stat=allocok)
885 IF (allocok > 0) THEN
886 id%INFO(1)=-13
887 id%INFO(2)=lwk_real
888 ENDIF
889C --- Propagate enventual error
890 CALL mumps_propinfo( icntl(1), id%INFO(1),
891 & id%COMM, id%MYID )
892 IF (id%INFO(1).LT.0) GOTO 517
894 & myirn_loc(1), myjcn_loc(1), mya_loc(1),
895 & id%KEEP8(29),
896 & m, n, id%NPROCS, id%MYID, id%COMM,
897 & burp, bucp,
898 & burs, bucs, buregistre,
899 & iwk, liwk,
900 & buintsz, buresz, bujob,
901 & id%ROWSCA(1), id%COLSCA(1), wk_real, lwk_real,
902 & id%KEEP(50),
903 & k231, k232, k233,
904 & id%DKEEP(3),
905 & sconeerr, scinferr)
906 IF(liwk < buintsz) THEN
907 DEALLOCATE(iwk)
908 liwk = buintsz
909 ALLOCATE(iwk(liwk), stat=allocok)
910 IF (allocok > 0) THEN
911 id%INFO(1)=-13
912 id%INFO(2)=liwk
913 ENDIF
914 ENDIF
915 lwk_real = buresz
916 DEALLOCATE(wk_real)
917 ALLOCATE (wk_real(lwk_real), stat=allocok)
918 IF (allocok > 0) THEN
919 id%INFO(1)=-13
920 id%INFO(2)=lwk_real
921 ENDIF
922C --- Propagate enventual error
923 CALL mumps_propinfo( icntl(1), id%INFO(1),
924 & id%COMM, id%MYID )
925 IF (id%INFO(1).LT.0) GOTO 517
926C -- estimation of memory and construction of partvecs
927 bujob = 2
929 & myirn_loc(1), myjcn_loc(1), mya_loc(1),
930 & id%KEEP8(29),
931 & m, n, id%NPROCS, id%MYID, id%COMM,
932 & burp, bucp,
933 & burs, bucs, buregistre,
934 & iwk, liwk,
935 & buintsz, buresz, bujob,
936 & id%ROWSCA(1), id%COLSCA(1), wk_real, lwk_real,
937 & id%KEEP(50),
938 & k231, k232, k233,
939 & id%DKEEP(3),
940 & sconeerr, scinferr)
941 id%DKEEP(4) = sconeerr
942 id%DKEEP(5) = scinferr
943CXXXX
944 DEALLOCATE(iwk, wk_real,burp,bucp,burs, bucs)
945 ELSE IF ( keep(54) .EQ. 0 ) THEN
946C ------------------
947C Centralized matrix
948C ------------------
949 IF ((keep(52).EQ.7).OR.(keep(52).EQ.8)) THEN
950C -------------------------------
951C Create a communicator of size 1
952C -------------------------------
953 IF (id%MYID.EQ.master) THEN
954 colour = 0
955 ELSE
956 colour = mpi_undefined
957 ENDIF
958 CALL mpi_comm_split( id%COMM, colour, 0,
959 & comm_for_scaling, ierr )
960 IF (id%MYID.EQ.master) THEN
961 m = n
962 bumaxmn=n
963 IF(n > bumaxmn) bumaxmn = n
964 liwk = 1
965 ALLOCATE(iwk(liwk),burp(1),bucp(1),
966 & burs(1),bucs(1),
967 & stat=allocok)
968 IF (allocok > 0) THEN
969 id%INFO(1)=-13
970 id%INFO(2)=liwk+1+1+1+1
971 GOTO 400
972 ENDIF
973 lwk_real = m + n
974 ALLOCATE (wk_real(lwk_real), stat=allocok)
975 IF (allocok > 0) THEN
976 id%INFO(1)=-13
977 id%INFO(2)=lwk_real
978 GOTO 400
979 ENDIF
980 CALL mpi_comm_rank(comm_for_scaling, scmyid, ierr)
981 CALL mpi_comm_size(comm_for_scaling, scnprocs, ierr)
982 bujob = 1
984 & id%IRN(1), id%JCN(1), id%A(1),
985 & id%KEEP8(28),
986 & m, n, scnprocs, scmyid, comm_for_scaling,
987 & burp, bucp,
988 & burs, bucs, buregistre,
989 & iwk, liwk,
990 & buintsz, buresz, bujob,
991 & id%ROWSCA(1), id%COLSCA(1), wk_real, lwk_real,
992 & id%KEEP(50),
993 & k231, k232, k233,
994 & id%DKEEP(3),
995 & sconeerr, scinferr)
996 IF(lwk_real < buresz) THEN
997 id%INFO(1) = -136
998 GOTO 400
999 ENDIF
1000 bujob = 2
1001 CALL dmumps_simscaleabs(id%IRN(1),
1002 & id%JCN(1), id%A(1),
1003 & id%KEEP8(28),
1004 & m, n, scnprocs, scmyid, comm_for_scaling,
1005 & burp, bucp,
1006 & burs, bucs, buregistre,
1007 & iwk, liwk,
1008 & buintsz, buresz, bujob,
1009 & id%ROWSCA(1), id%COLSCA(1), wk_real, lwk_real,
1010 & id%KEEP(50),
1011 & k231, k232, k233,
1012 & id%DKEEP(3),
1013 & sconeerr, scinferr)
1014 id%DKEEP(4) = sconeerr
1015 id%DKEEP(5) = scinferr
1016 400 CONTINUE
1017 IF (allocated(wk_real)) DEALLOCATE(wk_real)
1018 IF (allocated(iwk)) DEALLOCATE(iwk)
1019 IF (allocated(burp)) DEALLOCATE(burp)
1020 IF (allocated(bucp)) DEALLOCATE(bucp)
1021 IF (allocated(burs)) DEALLOCATE(burs)
1022 IF (allocated(bucs)) DEALLOCATE(bucs)
1023 ENDIF
1024C Centralized matrix: make DKEEP(4:5) available to all processors
1025 CALL mpi_bcast( id%DKEEP(4),2,mpi_double_precision,
1026 & master, id%COMM, ierr )
1027 IF (id%MYID.EQ.master) THEN
1028C Communicator should only be
1029C freed on the master process
1030 CALL mpi_comm_free(comm_for_scaling, ierr)
1031 ENDIF
1032 CALL mumps_propinfo(icntl(1), id%INFO(1),
1033 & id%COMM, id%MYID)
1034 IF (id%INFO(1).LT.0) GOTO 517
1035 ELSE IF (id%MYID.EQ.master) THEN
1036C ----------------------------------
1037C Centralized scaling, options 1 to 6
1038C ----------------------------------
1039 IF (keep(52).GT.0 .AND. keep(52).LE.6) THEN
1040C ---------------------
1041C Allocate temporary
1042C workspace for scaling
1043C ---------------------
1044 IF ( keep(52) .eq. 5 .or.
1045 & keep(52) .eq. 6 ) THEN
1046C We have an explicit copy of the original
1047C matrix in complex format which should probably
1048C be avoided (but do we want to keep all
1049C those old scaling options ?)
1050 lwk = id%KEEP8(28)
1051 ELSE
1052 lwk = 1_8
1053 END IF
1054 lwk_real = 5 * n
1055 ALLOCATE( wk_real( lwk_real ), stat = ierr )
1056 IF ( ierr .GT. 0 ) THEN
1057 id%INFO(1) = -13
1058 id%INFO(2) = lwk_real
1059 GOTO 137
1060 END IF
1061 ALLOCATE( wk( lwk ), stat = ierr )
1062 IF ( ierr .GT. 0 ) THEN
1063 id%INFO(1) = -13
1064 CALL mumps_set_ierror(lwk, id%INFO(2))
1065 GOTO 137
1066 END IF
1067 CALL dmumps_fac_a(n, id%KEEP8(28), keep(52), id%A(1),
1068 & id%IRN(1), id%JCN(1),
1069 & id%COLSCA(1), id%ROWSCA(1),
1070 & wk, lwk, wk_real, lwk_real, icntl(1), id%INFO(1) )
1071 DEALLOCATE( wk_real )
1072 DEALLOCATE( wk )
1073 ENDIF
1074 ENDIF
1075 ENDIF ! Scaling distributed matrices or centralized
1076 IF (keep(125).NE.0) THEN
1077C ------------------------
1078C If we enable the scaling of the |A11 A12| block
1079C we et to 1 the scaling corresponding to the Schur
1080C complement matrix A22
1081C ------------------------
1082 IF ((keep(60).GT.0) .and. (keep(116).GT.0)) THEN
1083C Schur is active, reset Schur entries to ONE
1084 IF ( ((keep(52).EQ.7).OR.(keep(52).EQ.8)) .AND.
1085 & keep(54).NE.0 ) THEN
1086C Scaling available on all procs
1087 DO i=1, n
1088 IF (id%SYM_PERM(i).GT.id%N-keep(116)) THEN
1089 id%COLSCA(i) = one
1090 id%ROWSCA(i) = one
1091 ENDIF
1092 ENDDO
1093 ELSE IF ( id%MYID .EQ. master) THEN
1094C Scaling available on master
1095 DO i=1, n
1096 IF (id%SYM_PERM(i).GT.id%N-keep(116)) THEN
1097 id%COLSCA(i) = one
1098 id%ROWSCA(i) = one
1099 ENDIF
1100 ENDDO
1101 ENDIF
1102 ENDIF
1103 ENDIF
1104 IF (id%MYID.EQ.master) THEN
1105 CALL mumps_secfin(timeet)
1106 id%DKEEP(92)=timeet
1107C Print inf-norm after last KEEP(233) iterations of
1108C scaling option KEEP(52)=7 or 8 (SimScale)
1109C
1110 IF (prokg.AND.(keep(52).EQ.7.OR.keep(52).EQ.8)
1111 & .AND. (k233+k231+k232).GT.0) THEN
1112 IF (k232.GT.0) WRITE(mpg, 166) id%DKEEP(4)
1113 ENDIF
1114 ENDIF
1115 ENDIF ! LSCAL
1116C
1117C scaling might also be provided by the user
1118 lscal = (lscal .OR. (keep(52) .EQ. -1) .OR. keep(52) .EQ. -2)
1119 IF (lscal .AND. keep(258).NE.0 .AND. id%MYID .EQ. master) THEN
1120 DO i = 1, id%N
1121 CALL dmumps_updatedeter_scaling(id%ROWSCA(i),
1122 & id%DKEEP(6), ! determinant
1123 & keep(259)) ! exponent of the determinant
1124 ENDDO
1125 IF (keep(50) .EQ. 0) THEN ! unsymmetric
1126 DO i = 1, id%N
1127 CALL dmumps_updatedeter_scaling(id%COLSCA(i),
1128 & id%DKEEP(6), ! determinant
1129 & keep(259)) ! exponent of the determinant
1130 ENDDO
1131 ELSE
1132C -----------------------------------------
1133C In this case COLSCA = ROWSCA
1134C Since determinant was initialized to 1,
1135C compute square of the current determinant
1136C rather than going through COLSCA.
1137C -----------------------------------------
1138 CALL dmumps_deter_square(id%DKEEP(6), keep(259))
1139 ENDIF
1140C Now we should have taken the
1141C inverse of the scaling vectors
1142 CALL dmumps_deter_scaling_inverse(id%DKEEP(6), keep(259))
1143 ENDIF
1144C
1145C ********************
1146C End of Scaling phase
1147C At this point: either (matrix is distributed and KEEP(52)=7 or 8)
1148C in which case scaling arrays are allocated on all processors,
1149C or scaling arrays are only on the host processor.
1150C In case of distributed matrix input, we will free the scaling
1151C arrays on procs with MYID .NE. 0 after the all-to-all distribution
1152C of the original matrix.
1153C ********************
1154C
1155 137 CONTINUE
1156C Fwd in facto: in case of repeated factorizations
1157C with different Schur options we prefer to free
1158C systematically this array now than waiting for
1159C the root node. We rely on the fact that it is
1160C allocated or not during the solve phase so if
1161C it was allocated in a 1st call to facto and not
1162C in a second, we don't want the solve to think
1163C it was allocated in the second call.
1164 IF (associated(id%root%RHS_CNTR_MASTER_ROOT)) THEN
1165 DEALLOCATE (id%root%RHS_CNTR_MASTER_ROOT)
1166 NULLIFY (id%root%RHS_CNTR_MASTER_ROOT)
1167 ENDIF
1168C Fwd in facto: check that id%NRHS has not changed
1169 IF ( id%MYID.EQ.master.AND. keep(252).EQ.1 .AND.
1170 & id%NRHS .NE. id%KEEP(253) ) THEN
1171C Error: NRHS should not have
1172C changed since the analysis
1173 id%INFO(1)=-42
1174 id%INFO(2)=id%KEEP(253)
1175 ENDIF
1176C Fwd in facto: allocate and broadcast RHS_MUMPS
1177C to make it available on all processors.
1178 IF (id%KEEP(252) .EQ. 1) THEN
1179 IF ( id%MYID.NE.master ) THEN
1180 id%KEEP(254) = n ! Leading dimension
1181 id%KEEP(255) = n*id%KEEP(253) ! Tot size
1182 ALLOCATE(rhs_mumps(id%KEEP(255)),stat=ierr)
1183 IF (ierr > 0) THEN
1184 id%INFO(1)=-13
1185 id%INFO(2)=id%KEEP(255)
1186 IF (lpok)
1187 & WRITE(lp,*) 'ERROR while allocating RHS on a slave'
1188 NULLIFY(rhs_mumps)
1189 ENDIF
1190 rhs_mumps_allocated = .true.
1191 ELSE
1192C Case of non working master
1193 id%KEEP(254)=id%LRHS ! Leading dimension
1194 id%KEEP(255)=id%LRHS*(id%KEEP(253)-1)+id%N ! Tot size
1195 rhs_mumps=>id%RHS
1196 rhs_mumps_allocated = .false.
1197 IF (lscal) THEN
1198C Scale before broadcast: apply row
1199C scaling (remark that we assume no
1200C transpose).
1201 DO k=1, id%KEEP(253)
1202 DO i=1, n
1203 rhs_mumps( id%KEEP(254) * (k-1) + i )
1204 & = rhs_mumps( id%KEEP(254) * (k-1) + i )
1205 & * id%ROWSCA(i)
1206 ENDDO
1207 ENDDO
1208 ENDIF
1209 ENDIF
1210 ELSE
1211 id%KEEP(255)=1
1212 ALLOCATE(rhs_mumps(1),stat=ierr)
1213 IF (ierr > 0) THEN
1214 id%INFO(1)=-13
1215 id%INFO(2)=1
1216 IF (lpok)
1217 & WRITE(lp,*) 'ERREUR while allocating RHS on a slave'
1218 NULLIFY(rhs_mumps)
1219 ENDIF
1220 rhs_mumps_allocated = .true.
1221 ENDIF
1222 CALL mumps_propinfo( icntl(1), id%INFO(1),
1223 & id%COMM, id%MYID )
1224 IF ( id%INFO(1).lt.0 ) GOTO 517
1225 IF (keep(252) .EQ. 1) THEN
1226C
1227C Broadcast the columns of the right-hand side
1228C one by one. Leading dimension is keep(254)=N
1229C on procs with MYID > 0 but may be larger on
1230C the master processor.
1231 DO i= 1, id%KEEP(253)
1232 CALL mpi_bcast(rhs_mumps((i-1)*id%KEEP(254)+1), n,
1233 & mpi_double_precision, master,id%COMM,ierr)
1234 END DO
1235 ENDIF
1236C Keep a copy of ICNTL(24) and make it
1237C available on all working processors.
1238 keep(110)=id%ICNTL(24)
1239 CALL mpi_bcast(keep(110), 1, mpi_integer,
1240 & master, id%COMM, ierr)
1241C KEEP(110) defaults to 0 for out of range values
1242 IF (keep(110).NE.1) keep(110)=0
1243 IF (keep(219).NE.0) THEN
1244 CALL dmumps_buf_max_array_minsize(max(keep(108),1),ierr)
1245 IF (ierr .NE. 0) THEN
1246C ------------------------
1247C Error allocating DMUMPS_BUF
1248C ------------------------
1249 id%INFO(1) = -13
1250 id%INFO(2) = max(keep(108),1)
1251 END IF
1252 ENDIF
1253C -----------------------------------------------
1254C Depending on the option used for
1255C -detecting null pivots (ICNTL(24)/KEEP(110))
1256C CNTL(3) is used to set DKEEP(1)
1257C ( A row is considered as null if ||row|| < DKEEP(1) )
1258C CNTL(5) is then used to define if a large
1259C value is set on the diagonal or if a 1 is set
1260C and other values in the row are reset to zeros.
1261C SEUIL* corresponds to the minimum required
1262C absolute value of pivot.
1263C SEUIL_LDLT_NIV2 is used only in the
1264C case of SYM=2 within a niv2 node for which
1265C we have only a partial view of the fully summed rows.
1266 IF (id%MYID .EQ. master) cntl3 = id%CNTL(3)
1267 CALL mpi_bcast(cntl3, 1, mpi_double_precision,
1268 & master, id%COMM, ierr)
1269 IF (id%MYID .EQ. master) cntl5 = id%CNTL(5)
1270 CALL mpi_bcast(cntl5, 1, mpi_double_precision,
1271 & master, id%COMM, ierr)
1272 IF (id%MYID .EQ. master) cntl6 = id%CNTL(6)
1273 CALL mpi_bcast(cntl6, 1, mpi_double_precision,
1274 & master, id%COMM, ierr)
1275 IF (id%MYID .EQ. master) id%DKEEP(8) = id%CNTL(7)
1276 CALL mpi_bcast(id%DKEEP(8), 1, mpi_double_precision,
1277 & master, id%COMM, ierr)
1278 id%DKEEP(11) = id%DKEEP(8)/id%KEEP(461)
1279 id%DKEEP(12) = id%DKEEP(8)/id%KEEP(462)
1280 IF (keep(486).EQ.0) id%DKEEP(8) = zero
1281 compute_anorminf = .false.
1282 IF ( (keep(486) .NE. 0).AND. (id%DKEEP(8).LT.zero)) THEN
1283 compute_anorminf = .true.
1284 ENDIF
1285 IF (keep(19).NE.0) THEN
1286C Rank revealing factorisation
1287 compute_anorminf = .true.
1288 ENDIF
1289 IF (keep(110).NE.0) THEN
1290C Null pivot detection
1291 compute_anorminf = .true.
1292 ENDIF
1293 IF (id%DKEEP(8).LT.zero) THEN
1294C Experimental setting of CNTL(7)
1295 IF (compute_anorminf) THEN
1296 eff_size_schur = 0
1297 CALL dmumps_anorminf( id , anorminf, lscal, eff_size_schur )
1298C If no schur ANORMINF fine for other cases
1299 ELSE
1300 anorminf = zero
1301 ENDIF
1302 id%DKEEP(8) = abs(id%DKEEP(8))*anorminf
1303C ANORMINF need be recomputed in case of schur
1304 IF ((keep(60).GT.0).AND.keep(116).GT.0) anorminf=zero
1305 ENDIF
1306C -------------------------------------------------------
1307C We compute ANORMINF, when needed, based on
1308C the infinite norm of Rowsca *A*Colsca
1309C and make it available on all working processes.
1310 IF (compute_anorminf) THEN
1311 eff_size_schur = 0
1312 IF (keep(60).GT.0) eff_size_schur = keep(116)
1313 CALL dmumps_anorminf( id , anorminf, lscal, eff_size_schur )
1314 ELSE
1315 anorminf = zero
1316 ENDIF
1317C
1318 IF ((keep(19).NE.0).OR.(keep(110).NE.0)) THEN
1319 IF (prokg) THEN
1320 IF (keep(19).NE.0) THEN
1321 WRITE(mpg,'(A,1PD16.4)')
1322 & ' CNTL(3) for null pivot rows/singularities =',cntl3
1323 ELSE
1324 WRITE(mpg,'(A,1PD16.4)')
1325 & ' CNTL(3) for null pivot row detection =',cntl3
1326 ENDIF
1327 ENDIF
1328 ENDIF
1329 IF (keep(19).EQ.0) THEN
1330C -- RR is off
1331 seuil = zero
1332 id%DKEEP(9) = zero
1333 ELSE
1334C -- RR is on
1335C
1336C CNTL(3) is the threshold used in the following to compute
1337C DKEEP(9) the threshold under which the sing val. are considered
1338C as null and from which we start to look for a gap between two
1339C sing val.
1340 IF (cntl3 .LT. zero) THEN
1341 id%DKEEP(9) = abs(cntl(3))
1342 ELSE IF (cntl3 .GT. zero) THEN
1343 id%DKEEP(9) = cntl3*anorminf
1344 ELSE ! (CNTL(3) .EQ. ZERO) THEN
1345 ENDIF
1346 IF (prokg) THEN
1347 WRITE(mpg, '(A,I16)')
1348 & ' ICNTL(56) rank revealing effective value =',keep(19)
1349 WRITE(mpg,'(A,1PD16.4)')
1350 & ' ...Threshold for singularities on the root =',id%DKEEP(9)
1351 ENDIF
1352C RR postponing considers that pivot rows with norm smaller
1353C than SEUIL should be postponed.
1354C SEUIL should be bigger than DKEEP(9), this means that
1355C DKEEP(13) should be bigger than 1.
1356 thresh_seuil = id%DKEEP(13)
1357 IF (id%DKEEP(13).LT.1) thresh_seuil = 10
1358 seuil = id%DKEEP(9)*thresh_seuil
1359 IF (prokg) WRITE(mpg,'(A,1PD16.4)')
1360 & ' ...Threshold for postponing =',seuil
1361 ENDIF !end KEEP(19)
1362 seuil_ldlt_niv2 = seuil
1363C -------------------------------
1364C -- Null pivot row detection
1365C -------------------------------
1366 IF (keep(110).EQ.0) THEN
1367C -- Null pivot is off
1368C Initialize DKEEP(1) to a negative value
1369C in order to avoid detection of null pivots
1370C (test max(AMAX,RMAX,abs(PIVOT)).LE.PIVNUL
1371C in DMUMPS_FAC_I, where PIVNUL=DKEEP(1))
1372 id%DKEEP(1) = -1.0d0
1373 id%DKEEP(2) = zero
1374 ELSE
1375C -- Null pivot is on
1376 IF (keep(19).NE.0) THEN
1377C -- RR is on
1378C RR postponing considers that pivot rows of norm smaller that SEUIL
1379C should be postponed, but pivot rows smaller than DKEEP(1) are
1380C directly added to null space and thus considered as null pivot rows.
1381 IF ((id%DKEEP(10).LE.0).OR.(id%DKEEP(10).GT.1)) THEN
1382C DKEEP(10) is out of range, set to the default value 10-1
1383 id%DKEEP(1) = id%DKEEP(9)*1d-1
1384 ELSE
1385 id%DKEEP(1) = id%DKEEP(9)*id%DKEEP(10)
1386 ENDIF
1387 ELSE
1388C -- RR is off
1389C -- only Null pivot detection
1390C We keep strategy currently used in MUMPS 4.10.0
1391 IF (cntl3 .LT. zero) THEN
1392 id%DKEEP(1) = abs(cntl(3))
1393 ELSE IF (cntl3 .GT. zero) THEN
1394 id%DKEEP(1) = cntl3*anorminf
1395 ELSE ! (CNTL(3) .EQ. ZERO) THEN
1396c id%DKEEP(1) = NPIV_CRITICAL_PATH*EPS*ANORMINF
1398 & n, keep(28), id%STEP(1), id%FRERE_STEPS(1), id%FILS(1),
1399 & id%NA(1), id%LNA, id%NE_STEPS(1), npiv_critical_path )
1400 id%DKEEP(1) = sqrt(dble(npiv_critical_path))*eps*anorminf
1401 ENDIF
1402 ENDIF ! fin rank revealing
1403 IF ((keep(110).NE.0).AND.(prokg)) THEN
1404 WRITE(mpg, '(A,I16)')
1405 & ' ICNTL(24) null pivot rows detection =',keep(110)
1406 WRITE(mpg,'(A,1PD16.4)')
1407 & ' ...Zero pivot detection threshold =',id%DKEEP(1)
1408 ENDIF
1409 IF (cntl5.GT.zero) THEN
1410 id%DKEEP(2) = cntl5 * anorminf
1411 IF (prokg) WRITE(mpg,'(A,1PD10.3)')
1412 & ' ...Fixation for null pivots =',id%DKEEP(2)
1413 ELSE
1414 IF (prokg) WRITE(mpg,*) '...Infinite fixation '
1415 IF (id%KEEP(50).EQ.0) THEN
1416C Unsym
1417 ! the user let us choose a fixation. set in NEGATIVE
1418 ! to detect during facto when to set row to zero !
1419 id%DKEEP(2) = -max(1.0d10*anorminf,
1420 & sqrt(huge(anorminf))/1.0d8)
1421 ELSE
1422C Sym
1423 id%DKEEP(2) = zero
1424 ENDIF
1425 ENDIF
1426 ENDIF ! fin null pivot detection.
1427C Find id of root node if RR is on
1428 IF (keep(53).NE.0) THEN
1429 id_root =mumps_procnode(id%PROCNODE_STEPS(id%STEP(keep(20))),
1430 & id%KEEP(199))
1431 IF ( keep( 46 ) .NE. 1 ) THEN
1432 id_root = id_root + 1
1433 END IF
1434 ENDIF
1435C Second pass: set parameters for null pivot detection
1436C Allocate PIVNUL_LIST in case of null pivot detection
1437 lpn_list = 1
1438 IF ( associated( id%PIVNUL_LIST) ) DEALLOCATE(id%PIVNUL_LIST)
1439 IF(keep(110) .EQ. 1) THEN
1440 lpn_list = n
1441 ENDIF
1442 IF (keep(19).NE.0 .AND.
1443 & (id_root.EQ.id%MYID .OR. id%MYID.EQ.master)) THEN
1444 lpn_list = n
1445 ENDIF
1446 ALLOCATE( id%PIVNUL_LIST(lpn_list),stat = ierr )
1447 IF ( ierr .GT. 0 ) THEN
1448 id%INFO(1)=-13
1449 id%INFO(2)=lpn_list
1450 END IF
1451 id%PIVNUL_LIST(1:lpn_list) = 0
1452 keep(109) = 0
1453C end set parameter for null pivot detection
1454 CALL mumps_propinfo( icntl(1), id%INFO(1),
1455 & id%COMM, id%MYID )
1456 IF ( id%INFO(1).lt.0 ) GOTO 517
1457C --------------------------------------------------------------
1458C STATIC PIVOTING
1459C -- Static pivoting only when RR and Null pivot detection OFF
1460C --------------------------------------------------------------
1461 keep(97) = 0
1462 IF ((keep(19).EQ.0).AND.(keep(110).EQ.0)) THEN
1463 IF (id%MYID .EQ. master) cntl4 = id%CNTL(4)
1464 CALL mpi_bcast( cntl4, 1, mpi_double_precision,
1465 & master, id%COMM, ierr )
1466C
1467 IF ( cntl4 .GE. zero ) THEN
1468 keep(97) = 1
1469 IF ( cntl4 .EQ. zero ) THEN
1470C -- set seuil to sqrt(eps)*||A||
1471 IF(anorminf .EQ. zero) THEN
1472 eff_size_schur = 0
1473 IF (keep(60).GT.0) eff_size_schur = keep(116)
1474 CALL dmumps_anorminf( id , anorminf, lscal,
1475 & eff_size_schur )
1476 ENDIF
1477 seuil = sqrt(eps) * anorminf
1478 ELSE
1479 seuil = cntl4
1480 ENDIF
1481 seuil_ldlt_niv2 = seuil
1482 ELSE
1483 seuil = zero
1484 ENDIF
1485 ENDIF
1486C set number of tiny pivots / 2x2 pivots in types 1 /
1487C 2x2 pivots in types 2, to zero. This is because the
1488C user can call the factorization step several times.
1489 keep(98) = 0
1490 keep(103) = 0
1491 keep(105) = 0
1492 maxs = 1_8
1493*
1494* Start allocations
1495* *****************
1496*
1497C
1498C The slaves can now perform the factorization
1499C
1500C
1501C Allocate id%S on all nodes
1502C or point to user provided data WK_USER when LWK_USER>0
1503C =======================
1504C
1505C Compute BLR_STRAT and a first estimation
1506C of MAXS, the size of id%S
1508 & maxs_base8, maxs_base_relaxed8,
1509 & blr_strat,
1510 & id%KEEP(1), id%KEEP8(1))
1511C
1512 maxs = maxs_base_relaxed8
1513 IF (wk_user_provided) THEN
1514C -- Set MAXS to size of WK_USER_
1515 maxs = id%KEEP8(24)
1516 ENDIF
1517 CALL mumps_propinfo( icntl(1), id%INFO(1),
1518 & id%COMM, id%MYID )
1519 IF (id%INFO(1) .LT. 0) THEN
1520 GOTO 517
1521 ENDIF
1522C
1523 id%KEEP8(75) = huge(id%KEEP8(75))
1524 id%KEEP8(76) = huge(id%KEEP8(76))
1525 IF (i_am_slave) THEN
1526C
1527 IF (id%KEEP8(4) .NE. 0_8) THEN
1528C
1529 IF ( .NOT. wk_user_provided ) THEN
1530C Set MAXS given BLR_STRAT, KEEP(201) and MAXS_BASE_RELAXED8
1532 & maxs,
1533 & blr_strat, id%KEEP(201), maxs_base_relaxed8,
1534 & id%KEEP(1), id%KEEP8(1), id%MYID, id%N, id%NELT,
1535 & id%NA(1), id%LNA, id%NSLAVES,
1536 & keep464copy, keep465copy,
1537 & id%INFO(1), id%INFO(2)
1538 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1539 & size(id%I8_L0_OMP,2)
1540 & )
1541C Given MAXS and max memory allowed KEEP8(4)
1542C compute in KEEP8(75) the number of real/complex
1543C available for dynamic allocations
1545 & maxs, id%MYID,
1546 & .false., ! UNDER_L0_OMP
1547 & n, id%NELT, id%NA(1), id%LNA, id%NSLAVES,
1548 & blr_strat, id%KEEP(201),
1549 & id%KEEP, id%KEEP8, id%INFO(1), id%INFO(2)
1550 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1551 & size(id%I8_L0_OMP,2)
1552 & )
1553 ELSE
1554C KEEP8(75) dow not include MAXS, since WK_USER is provided
1556 & 0_8, id%MYID,
1557 & .false., ! UNDER_L0_OMP
1558 & n, id%NELT, id%NA(1), id%LNA, id%NSLAVES,
1559 & blr_strat, id%KEEP(201),
1560 & id%KEEP, id%KEEP8, id%INFO(1), id%INFO(2)
1561 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1562 & size(id%I8_L0_OMP,2)
1563 & )
1564 ENDIF
1565 IF (keep(400) .GT.0) THEN
1566C ------------------------------
1567C compute KEEP8(75) under L0_OMP
1568C ------------------------------
1569C Save KEEP8(75) above L0_OMP to reset KEEP8(75)
1570C when starting FAC_PAR_M
1571 id%KEEP8(76) = id%KEEP8(75)
1573 & 0_8, ! MAXS=0_8
1574 & id%MYID,
1575 & .true., ! UNDER_L0_OMP
1576 & id%N, id%NELT, id%NA(1), id%LNA, id%NSLAVES,
1577 & blr_strat, id%KEEP(201),
1578 & id%KEEP, id%KEEP8, id%INFO(1), id%INFO(2)
1579 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1580 & size(id%I8_L0_OMP,2)
1581 & )
1582C KEEP8(75) holds the number of entries that
1583C can be allocated underL0.
1584C It will be used during DMUMPS_FAC_L0_OMP to adjust the
1585C the size of MUMPS_TPS_ARR(ITH)%LA
1586 ENDIF
1587 ENDIF ! MEM_ALLOWED
1588C
1589 ENDIF ! I_AM_SLAVE THEN
1590C
1591 IF (i_am_slave) THEN
1592 IF ( (keep(400).GT.0) .AND. (keep(406).EQ.2) ) THEN
1593C Compute KEEP8(77) the peak authorized used by
1594C DMUMPS_PERFORM_COPIES
1596 & id%MYID, id%N,
1597 & id%NELT, id%NA(1), id%LNA, id%NSLAVES,
1598 & blr_strat, id%KEEP(201),
1599 & id%KEEP(1), id%KEEP8(1), id%INFO(1), id%INFO(2)
1600 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1601 & size(id%I8_L0_OMP,2)
1602 & )
1603 ENDIF
1604 ENDIF ! I_AM_SLAVE)
1605C
1606 CALL mumps_propinfo( icntl(1), id%INFO(1),
1607 & id%COMM, id%MYID )
1608 IF (id%INFO(1) .LT. 0) THEN
1609 GOTO 517
1610 ENDIF
1611 CALL mumps_seti8toi4(maxs, id%INFO(39))
1612 CALL dmumps_avgmax_stat8(prokg, mpg, maxs, id%NSLAVES,
1613 & print_maxavg,
1614 & id%COMM, " Effective size of S (based on INFO(39))= ")
1615C
1616 IF ( i_am_slave ) THEN
1617C ------------------
1618C Dynamic scheduling
1619C ------------------
1620 CALL dmumps_load_set_inicost( dble(id%COST_SUBTREES),
1621 & keep(64), id%DKEEP(15), keep(375), maxs )
1622 k28=keep(28)
1623 memory_md_arg = min(int(perlu,8) * ( maxs_base8 / 100_8 + 1_8 ),
1624C Restrict freedom from dynamic scheduler when
1625C MEM_ALLOWED=ICNTL(23) is small (case where KEEP8(4)-MAXS_BASE8
1626C is negative after call to DMUMPS_MAX_MEM)
1627 & max(0_8, maxs-maxs_base8))
1628 CALL dmumps_load_init( id, memory_md_arg, maxs )
1629C
1630C Out-Of-Core (OOC) issues. Case where we ran one factorization OOC
1631C and the second one is in-core: we try to free OOC
1632C related data from previous factorization.
1633C
1634 CALL dmumps_clean_ooc_data(id, ierr)
1635 IF (ierr < 0) THEN
1636 id%INFO(1) = -90
1637 id%INFO(2) = 0
1638 GOTO 112
1639 ENDIF
1640 IF (keep(201) .GT. 0) THEN
1641C -------------------
1642C OOC initializations
1643C -------------------
1644 IF (keep(201).EQ.1 !PANEL Version
1645 & .AND.keep(50).EQ.0 ! Unsymmetric
1646 & .AND.keep(251).NE.2 ! Store L to disk
1647 & ) THEN
1648 id%OOC_NB_FILE_TYPE=2 ! declared in MUMPS_OOC_COMMON
1649 ELSE
1650 id%OOC_NB_FILE_TYPE=1 ! declared in MUMPS_OOC_COMMON
1651 ENDIF
1652C ------------------------------
1653C Dimension IO buffer, KEEP(100)
1654C ------------------------------
1655 IF (keep(205) .GT. 0) THEN
1656 keep(100) = keep(205)
1657 ELSE
1658 IF (keep(201).EQ.1) THEN ! PANEL version
1659 i8tmp = int(id%OOC_NB_FILE_TYPE,8) *
1660 & 2_8 * int(keep(226),8)
1661 ELSE
1662 i8tmp = 2_8 * id%KEEP8(119)
1663 ENDIF
1664 i8tmp = i8tmp + int(max(keep(12),0),8) *
1665 & (i8tmp/100_8+1_8)
1666C we want to avoid too large IO buffers.
1667C 12M corresponds to 100Mbytes given to buffers.
1668 i8tmp = min(i8tmp, 12000000_8)
1669 keep(100)=int(i8tmp)
1670 ENDIF
1671 IF (keep(201).EQ.1) THEN
1672C Panel version. Force the use of a buffer.
1673 IF ( keep(99) < 3 ) THEN
1674 keep(99) = keep(99) + 3
1675 ENDIF
1676 ENDIF
1677C --------------------------
1678C Reset KEEP(100) to 0 if no
1679C buffer is used for OOC.
1680C --------------------------
1681 IF (keep(99) .LT.3) keep(100)=0
1682 IF((dble(keep(100))*dble(keep(35))/dble(2)).GT.
1683 & (dble(1999999999)))THEN
1684 IF (prokg) THEN
1685 WRITE(mpg,*)id%MYID,': Warning: DIM_BUF_IO might be
1686 & too big for Filesystem'
1687 ENDIF
1688 ENDIF
1689 ALLOCATE (id%OOC_INODE_SEQUENCE(keep(28),
1690 & id%OOC_NB_FILE_TYPE),
1691 & stat=ierr)
1692 IF ( ierr .GT. 0 ) THEN
1693 id%INFO(1) = -13
1694 id%INFO(2) = id%OOC_NB_FILE_TYPE*keep(28)
1695 NULLIFY(id%OOC_INODE_SEQUENCE)
1696 GOTO 112
1697 ENDIF
1698 ALLOCATE (id%OOC_TOTAL_NB_NODES(id%OOC_NB_FILE_TYPE),
1699 & stat=ierr)
1700 IF ( ierr .GT. 0 ) THEN
1701 id%INFO(1) = -13
1702 id%INFO(2) = id%OOC_NB_FILE_TYPE
1703 NULLIFY(id%OOC_TOTAL_NB_NODES)
1704 GOTO 112
1705 ENDIF
1706 ALLOCATE (id%OOC_SIZE_OF_BLOCK(keep(28),
1707 & id%OOC_NB_FILE_TYPE),
1708 & stat=ierr)
1709 IF ( ierr .GT. 0 ) THEN
1710 id%INFO(1) = -13
1711 id%INFO(2) = id%OOC_NB_FILE_TYPE*keep(28)
1712 NULLIFY(id%OOC_SIZE_OF_BLOCK)
1713 GOTO 112
1714 ENDIF
1715 ALLOCATE (id%OOC_VADDR(keep(28),id%OOC_NB_FILE_TYPE),
1716 & stat=ierr)
1717 IF ( ierr .GT. 0 ) THEN
1718 id%INFO(1) = -13
1719 id%INFO(2) = id%OOC_NB_FILE_TYPE*keep(28)
1720 NULLIFY(id%OOC_VADDR)
1721 GOTO 112
1722 ENDIF
1723 ENDIF
1724 ENDIF
1725 112 CALL mumps_propinfo( icntl(1), id%INFO(1),
1726 & id%COMM, id%MYID )
1727 IF (id%INFO(1) < 0) THEN
1728C LOAD_END must be done but not OOC_END_FACTO
1729 GOTO 513
1730 ENDIF
1731 IF (i_am_slave) THEN
1732 IF (keep(201) .GT. 0) THEN
1733 IF ((keep(201).EQ.1).OR.(keep(201).EQ.2)) THEN
1734 CALL dmumps_ooc_init_facto(id,maxs)
1735 ELSE
1736 WRITE(*,*) "Internal error in DMUMPS_FAC_DRIVER"
1737 CALL mumps_abort()
1738 ENDIF
1739 IF(id%INFO(1).LT.0)THEN
1740 GOTO 111
1741 ENDIF
1742 ENDIF
1743C First increment corresponds to the number of
1744C floating-point operations for subtrees allocated
1745C to the local processor.
1746 CALL dmumps_load_update(0,.false.,dble(id%COST_SUBTREES),
1747 & id%KEEP(1),id%KEEP8(1))
1748 IF (id%INFO(1).LT.0) GOTO 111
1749 END IF
1750C -----------------------
1751C Manage main workarray S
1752C -----------------------
1753 earlyt3rootins = keep(200) .EQ.0
1754 & .OR. ( keep(200) .LT. 0 .AND. keep(400) .EQ. 0 )
1755#if defined (LARGEMATRICES)
1756 IF ( id%MYID .ne. master ) THEN
1757#endif
1758 IF (.NOT.wk_user_provided) THEN
1759 IF ( earlyt3rootins ) THEN
1760C Standard allocation strategy
1761 ALLOCATE (id%S(maxs),stat=ierr)
1762 id%KEEP8(23) = maxs
1763 IF ( ierr .GT. 0 ) THEN
1764 id%INFO(1) = -13
1765 CALL mumps_set_ierror(maxs, id%INFO(2))
1766C On some platforms (IBM for example), an
1767C allocation failure returns a non-null pointer.
1768C Therefore we nullify S
1769 NULLIFY(id%S)
1770 id%KEEP8(23)=0_8
1771 ENDIF
1772 ENDIF
1773 ELSE
1774 id%S => id%WK_USER(1:id%KEEP8(24))
1775 id%KEEP8(23) = 0_8
1776 ENDIF
1777#if defined (LARGEMATRICES)
1778 END IF
1779#endif
1780C
1781 111 CALL mumps_propinfo( icntl(1), id%INFO(1),
1782 & id%COMM, id%MYID )
1783 IF ( id%INFO(1).LT.0 ) GOTO 514
1784C --------------------------
1785C Initialization of modules
1786C related to data management
1787C --------------------------
1788 nb_active_fronts_estim = 3
1789 nb_threads = 1
1790!$ NB_THREADS = OMP_GET_MAX_THREADS()
1791C
1792 nb_active_fronts_estim = 3*nb_threads
1793 IF (i_am_slave) THEN
1794C
1795 CALL mumps_fdm_init('A',nb_active_fronts_estim, id%INFO)
1796C
1797 IF ( (keep(486).EQ.2)
1798 & .OR. ((keep(489).NE.0).AND.(keep(400).GT.1))
1799 & ) THEN
1800C In case of LRSOLVE or CompressCB,
1801C initialize nb of handlers to nb of BLR
1802C nodes estimated at analysis
1803 nb_fronts_f_estim = keep(470)
1804 ELSE
1805 IF (keep(489).NE.0) THEN
1806C Compress CB and no L0 OMP (or 1 thread under L0):
1807C NB_ACTIVE_FRONTS_ESTIM is too small,
1808C to limit nb of reallocations make it twice larger
1809 nb_fronts_f_estim = 2*nb_active_fronts_estim
1810 ELSE
1811 nb_fronts_f_estim = nb_active_fronts_estim
1812 ENDIF
1813 ENDIF
1814 CALL mumps_fdm_init('F',nb_fronts_f_estim, id%INFO )
1815 IF (id%INFO(1) .LT. 0 ) GOTO 114
1816#if ! defined(NO_FDM_DESCBAND)
1817C Storage of DESCBAND information
1818 CALL mumps_fdbd_init( nb_active_fronts_estim, id%INFO )
1819#endif
1820#if ! defined(NO_FDM_MAPROW)
1821C Storage of MAPROW and ROOT2SON information
1822 CALL mumps_fmrd_init( nb_active_fronts_estim, id%INFO )
1823#endif
1824 CALL dmumps_blr_init_module( nb_fronts_f_estim, id%INFO )
1825 114 CONTINUE
1826 ENDIF
1827 CALL mumps_propinfo( icntl(1), id%INFO(1),
1828 & id%COMM, id%MYID )
1829C GOTO 500: one of the above module initializations failed
1830 IF ( id%INFO(1).LT.0 ) GOTO 500
1831C
1832C
1833C Allocate space for matrix in arrowhead
1834C ======================================
1835C
1836C CASE 1 : Matrix is assembled
1837C CASE 2 : Matrix is elemental
1838C
1839 IF ( keep(55) .eq. 0 ) THEN
1840C ------------------------------------
1841C Space has been allocated already for
1842C the integer part during analysis
1843C Only slaves need the arrowheads.
1844C ------------------------------------
1845 IF (associated( id%DBLARR)) THEN
1846 DEALLOCATE(id%DBLARR)
1847 NULLIFY(id%DBLARR)
1848 ENDIF
1849 IF ( i_am_slave .and. id%KEEP8(26) .ne. 0_8 ) THEN
1850 ALLOCATE( id%DBLARR( id%KEEP8(26) ), stat = ierr )
1851 ELSE
1852 ALLOCATE( id%DBLARR( 1 ), stat =ierr )
1853 END IF
1854 IF ( ierr .NE. 0 ) THEN
1855 IF (lpok) THEN
1856 WRITE(lp,*) id%MYID,
1857 & ': Allocation error for DBLARR(',id%KEEP8(26),')'
1858 ENDIF
1859 id%INFO(1)=-13
1860 CALL mumps_set_ierror(id%KEEP8(26), id%INFO(2))
1861 NULLIFY(id%DBLARR)
1862 GOTO 100
1863 END IF
1864 ELSE
1865C ----------------------------------------
1866C Allocate variable lists. Systematically.
1867C ----------------------------------------
1868 IF ( associated( id%INTARR ) ) THEN
1869 DEALLOCATE( id%INTARR )
1870 NULLIFY( id%INTARR )
1871 END IF
1872 IF ( i_am_slave .and. id%KEEP8(27) .ne. 0_8 ) THEN
1873 ALLOCATE( id%INTARR( id%KEEP8(27) ), stat = allocok )
1874 IF ( allocok .GT. 0 ) THEN
1875 id%INFO(1) = -13
1876 CALL mumps_set_ierror(id%KEEP8(27), id%INFO(2))
1877 NULLIFY(id%INTARR)
1878 GOTO 100
1879 END IF
1880 ELSE
1881 ALLOCATE( id%INTARR(1),stat=allocok )
1882 IF ( allocok .GT. 0 ) THEN
1883 id%INFO(1) = -13
1884 id%INFO(2) = 1
1885 NULLIFY(id%INTARR)
1886 GOTO 100
1887 END IF
1888 END IF
1889C -----------------------------
1890C Allocate real values.
1891C On master, if hybrid host and
1892C no scaling, avoid the copy.
1893C -----------------------------
1894 IF (associated( id%DBLARR)) THEN
1895 DEALLOCATE(id%DBLARR)
1896 NULLIFY(id%DBLARR)
1897 ENDIF
1898 IF ( i_am_slave ) THEN
1899 IF ( id%MYID_NODES .eq. master
1900 & .AND. keep(46) .eq. 1
1901 & .AND. keep(52) .eq. 0 ) THEN
1902C --------------------------
1903C Simple pointer association
1904C --------------------------
1905 id%DBLARR => id%A_ELT
1906 ELSE
1907C ----------
1908C Allocation
1909C ----------
1910 IF ( id%KEEP8(26) .ne. 0_8 ) THEN
1911 ALLOCATE( id%DBLARR( id%KEEP8(26) ), stat = allocok )
1912 IF ( allocok .GT. 0 ) THEN
1913 id%INFO(1) = -13
1914 CALL mumps_set_ierror(id%KEEP8(26), id%INFO(2))
1915 NULLIFY(id%DBLARR)
1916 GOTO 100
1917 END IF
1918 ELSE
1919 ALLOCATE( id%DBLARR(1), stat = allocok )
1920 IF ( allocok .GT. 0 ) THEN
1921 id%INFO(1) = -13
1922 id%INFO(2) = 1
1923 NULLIFY(id%DBLARR)
1924 GOTO 100
1925 END IF
1926 END IF
1927 END IF
1928 ELSE
1929 ALLOCATE( id%DBLARR(1), stat = allocok )
1930 IF ( allocok .GT. 0 ) THEN
1931 id%INFO(1) = -13
1932 id%INFO(2) = 1
1933 NULLIFY(id%DBLARR)
1934 GOTO 100
1935 END IF
1936 END IF
1937 END IF
1938C -----------------
1939C Also prepare some
1940C data for the root
1941C -----------------
1942 IF ( keep(38).NE.0 .AND. i_am_slave ) THEN
1943 CALL dmumps_init_root_fac( id%N,
1944 & id%root, id%FILS(1), keep(38), id%KEEP(1), id%INFO(1) )
1945 END IF
1946C
1947C
1948 100 CONTINUE
1949C ----------------
1950C Check for errors
1951C ----------------
1952 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
1953 & id%COMM, id%MYID )
1954 IF ( id%INFO(1).LT.0 ) GOTO 500
1955C
1956C -----------------------------------
1957C
1958C DISTRIBUTION OF THE ORIGINAL MATRIX
1959C
1960C -----------------------------------
1961C
1962C TIMINGS: computed (and printed) on the host
1963C Next line: global time for distrib(arrowheads,elts)
1964C on the host. Synchronization has been performed.
1965 IF (id%MYID.EQ.master) CALL mumps_secdeb(time)
1966C -------------------------------------------
1967C S_PTR_ARG / MAXS_ARG will be used for id%S
1968C argument to arrowhead/element distribution
1969C routines: if id%S is not allocated, we pass
1970C S_DUMMY_ARG instead, which is not accessed.
1971C -------------------------------------------
1972 IF (earlyt3rootins) THEN
1973 s_ptr_arg => id%S
1974 maxs_arg = maxs
1975 ELSE
1976 s_ptr_arg => s_dummy_arg
1977 maxs_arg = 1
1978 ENDIF
1979C
1980 IF ( keep( 55 ) .eq. 0 ) THEN
1981C ----------------------------
1982C Original matrix is assembled
1983C Arrowhead format to be used.
1984C ----------------------------
1985C KEEP8(26) and KEEP8(27) hold the number of entries for real/integer
1986C for the matrix in arrowhead format. They have been set by the
1987C analysis phase (DMUMPS_ANA_F and DMUMPS_ANA_G)
1988C
1989C ------------------------------------------------------------------
1990C Blocking is used for sending arrowhead records (I,J,VAL)
1991C buffer(1) is used to store number of bytes already packed
1992C buffer(2) number of records already packed
1993C KEEP(39) : Number of records (blocking factor)
1994C ------------------------------------------------------------------
1995C
1996C ---------------------------------------------
1997C In case of parallel root compute minimum
1998C size of workspace to receive arrowheads
1999C of root node. Will be used to check that
2000C MAXS is large enough for arrowheads (case
2001C of EARLYT3ROOTINS (KEEP(200)=0); if .NOT.
2002C EARLYT3ROOTINS (KEEP(200)=1), root will
2003C be assembled into id%S later and size of
2004C id%S will be checked later)
2005C ---------------------------------------------
2006 IF (earlyt3rootins .AND. keep(38).NE.0 .AND.
2007 & keep(60) .EQ.0 .AND. i_am_slave) THEN
2008 lwk = int(numroc( id%root%ROOT_SIZE, id%root%MBLOCK,
2009 & id%root%MYROW, 0, id%root%NPROW ),8)
2010 lwk = max( 1_8, lwk )
2011 lwk = lwk*
2012 & int(numroc( id%root%ROOT_SIZE, id%root%NBLOCK,
2013 & id%root%MYCOL, 0, id%root%NPCOL ),8)
2014 lwk = max( 1_8, lwk )
2015 ELSE
2016 lwk = 1_8
2017 ENDIF
2018C MAXS must be at least 1, and in case of
2019C parallel root, large enough to receive
2020C arrowheads of root.
2021 IF (maxs .LT. int(lwk,8)) THEN
2022 id%INFO(1) = -9
2023 CALL mumps_set_ierror(lwk, id%INFO(2))
2024 ENDIF
2025 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
2026 & id%COMM, id%MYID )
2027 IF ( id%INFO(1).LT.0 ) GOTO 500
2028C
2029 IF ( keep(54) .eq. 0 ) THEN
2030C ================================================
2031C FIRST CASE : MATRIX IS NOT INITIALLY DISTRIBUTED
2032C ================================================
2033C A small integer workspace is needed to
2034C send the arrowheads.
2035 IF ( id%MYID .eq. master ) THEN
2036 ALLOCATE(iwk(id%N), stat=allocok)
2037 IF ( allocok .NE. 0 ) THEN
2038 id%INFO(1)=-13
2039 id%INFO(2)=id%N
2040 END IF
2041#if defined(LARGEMATRICES)
2042 ALLOCATE (wk(lwk),stat=ierr)
2043 IF ( ierr .GT. 0 ) THEN
2044 id%INFO(1) = -13
2045 CALL mumps_set_ierror(lwk, id%INFO(2))
2046 write(6,*) ' PB1 ALLOC LARGEMAT'
2047 ENDIF
2048#endif
2049 ENDIF
2050 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
2051 & id%COMM, id%MYID )
2052 IF ( id%INFO(1).LT.0 ) GOTO 500
2053 IF ( id%MYID .eq. master ) THEN
2054C
2055C --------------------------------
2056C MASTER sends arowheads using the
2057C global communicator with ranks
2058C also in global communicator
2059C IWK is used as temporary
2060C workspace of size N.
2061C --------------------------------
2062 IF ( .not. associated( id%INTARR ) ) THEN
2063 ALLOCATE( id%INTARR( 1 ),stat=ierr)
2064 IF ( ierr .GT. 0 ) THEN
2065 id%INFO(1) = -13
2066 id%INFO(2) = 1
2067 NULLIFY(id%INTARR)
2068 write(6,*) ' PB2 ALLOC INTARR'
2069 CALL mumps_abort()
2070 ENDIF
2071 ENDIF
2072 nbrecords = keep(39)
2073 IF (id%KEEP8(28) .LT. int(nbrecords,8)) THEN
2074 nbrecords = int(id%KEEP8(28))
2075 ENDIF
2076#if defined(LARGEMATRICES)
2077 CALL dmumps_facto_send_arrowheads(id%N, id%KEEP8(28), id%A(1),
2078 & id%IRN(1), id%JCN(1), id%SYM_PERM(1),
2079 & lscal, id%COLSCA(1), id%ROWSCA(1),
2080 & id%MYID, id%NSLAVES, id%PROCNODE_STEPS(1),
2081 & nbrecords,
2082 & lp, id%COMM, id%root, keep,id%KEEP8,
2083 & id%FILS(1), iwk(1), ! workspace of size N
2084 &
2085 & id%INTARR(1), id%KEEP8(27), id%DBLARR(1), id%KEEP8(26),
2086 & id%PTRAR(1), id%PTRAR(id%N+1),
2087 & id%FRERE_STEPS(1), id%STEP(1), wk(1), lwk,
2088 & id%ISTEP_TO_INIV2(1), id%I_AM_CAND(1),
2089 & id%CANDIDATES(1,1))
2090C write(6,*) '!!! A,IRN,JCN are freed during factorization '
2091 DEALLOCATE (id%A)
2092 NULLIFY(id%A)
2093 DEALLOCATE (id%IRN)
2094 NULLIFY (id%IRN)
2095 DEALLOCATE (id%JCN)
2096 NULLIFY (id%JCN)
2097 IF (.NOT.wk_user_provided) THEN
2098 IF (earlyt3rootins) THEN
2099 ALLOCATE (id%S(maxs),stat=ierr)
2100 id%KEEP8(23) = maxs
2101 IF ( ierr .GT. 0 ) THEN
2102 id%INFO(1) = -13
2103 id%INFO(2) = maxs
2104 NULLIFY(id%S)
2105 id%KEEP8(23)=0_8
2106 write(6,*) ' PB2 ALLOC LARGEMAT',maxs
2107 CALL mumps_abort()
2108 ENDIF
2109 ENDIF
2110 ENDIF
2111 ELSE
2112 id%S => id%WK_USER(1:id%KEEP8(24))
2113 ENDIF
2114 IF (earlyt3rootins) THEN
2115 id%S(maxs-lwk+1_8:maxs) = wk(1_8:lwk)
2116 ENDIF
2117 DEALLOCATE (wk)
2118#else
2119 CALL dmumps_facto_send_arrowheads(id%N, id%KEEP8(28), id%A(1),
2120 & id%IRN(1), id%JCN(1), id%SYM_PERM(1),
2121 & lscal, id%COLSCA(1), id%ROWSCA(1),
2122 & id%MYID, id%NSLAVES, id%PROCNODE_STEPS(1),
2123 & nbrecords,
2124 & lp, id%COMM, id%root, keep(1),id%KEEP8(1),
2125 & id%FILS(1), iwk(1),
2126 &
2127 & id%INTARR(1), id%KEEP8(27), id%DBLARR(1), id%KEEP8(26),
2128 & id%PTRAR(1), id%PTRAR(id%N+1),
2129 & id%FRERE_STEPS(1), id%STEP(1), s_ptr_arg(1), maxs_arg,
2130 & id%ISTEP_TO_INIV2(1), id%I_AM_CAND(1),
2131 & id%CANDIDATES(1,1) )
2132#endif
2133 DEALLOCATE(iwk)
2134 ELSE
2135 nbrecords = keep(39)
2136 IF (id%KEEP8(28) .LT. int(nbrecords,8)) THEN
2137 nbrecords = int(id%KEEP8(28))
2138 ENDIF
2140 & id%DBLARR(1), id%KEEP8(26),
2141 & id%INTARR(1), id%KEEP8(27),
2142 & id%PTRAR( 1 ),
2143 & id%PTRAR(id%N+1),
2144 & keep( 1 ), id%KEEP8(1), id%MYID, id%COMM,
2145 & nbrecords,
2146 &
2147 & s_ptr_arg(1), maxs_arg,
2148 & id%root,
2149 & id%PROCNODE_STEPS(1), id%NSLAVES,
2150 & id%SYM_PERM(1), id%FRERE_STEPS(1), id%STEP(1),
2151 & id%INFO(1), id%INFO(2) )
2152 ENDIF
2153 ELSE
2154C
2155C =============================================
2156C SECOND CASE : MATRIX IS INITIALLY DISTRIBUTED
2157C =============================================
2158C Timing on master.
2159 IF (id%MYID.EQ.master) THEN
2160 CALL mumps_secdeb(time)
2161 END IF
2162 IF ( i_am_slave ) THEN
2163C ---------------------------------------------------
2164C In order to have possibly IRN_loc/JCN_loc/A_loc
2165C of size 0, avoid to pass them inside REDISTRIBUTION
2166C and pass id instead
2167C NZ_locMAX8 gives as a maximum buffer size (send/recv) used
2168C an upper bound to limit buffers on small matrices
2169C ---------------------------------------------------
2170 CALL mpi_allreduce(id%KEEP8(29), nz_locmax8, 1, mpi_integer8,
2171 & mpi_max, id%COMM_NODES, ierr)
2172 nbrecords = keep(39)
2173 IF (nz_locmax8 .LT. int(nbrecords,8)) THEN
2174 nbrecords = int(nz_locmax8)
2175 ENDIF
2176 CALL dmumps_redistribution( id%N,
2177 & id%KEEP8(29),
2178 & id,
2179 & id%DBLARR(1), id%KEEP8(26), id%INTARR(1),
2180 & id%KEEP8(27), id%PTRAR(1), id%PTRAR(id%N+1),
2181 & keep(1), id%KEEP8(1), id%MYID_NODES,
2182 & id%COMM_NODES, nbrecords,
2183 & s_ptr_arg(1), maxs_arg, id%root, id%PROCNODE_STEPS(1),
2184 & id%NSLAVES, id%SYM_PERM(1), id%STEP(1),
2185 & id%ICNTL(1), id%INFO(1), nsend8, nlocal8,
2186 & id%ISTEP_TO_INIV2(1),
2187 & id%CANDIDATES(1,1) )
2188 IF ( ( keep(52).EQ.7 ).OR. (keep(52).EQ.8) ) THEN
2189C -------------------------------------------------
2190C In that case, scaling arrays have been allocated
2191C on all processors. They were useful for matrix
2192C distribution. But we now really only need them
2193C on the host. In case of distributed solution, we
2194C will have to broadcast either ROWSCA or COLSCA
2195C (depending on MTYPE) but this is done later.
2196C
2197C In other words, on exit from the factorization,
2198C we want to have scaling arrays available only
2199C on the host.
2200C -------------------------------------------------
2201 IF ( id%MYID > 0 ) THEN
2202 IF (associated(id%ROWSCA)) THEN
2203 DEALLOCATE(id%ROWSCA)
2204 NULLIFY(id%ROWSCA)
2205 ENDIF
2206 IF (associated(id%COLSCA)) THEN
2207 DEALLOCATE(id%COLSCA)
2208 NULLIFY(id%COLSCA)
2209 ENDIF
2210 ENDIF
2211 ENDIF
2212#if defined(LARGEMATRICES)
2213C deallocate id%IRN_loc, id%JCN(loc) to free extra space
2214C Note that in this case IRN_loc cannot be used
2215C anymore during the solve phase for IR and Error analysis.
2216 IF (associated(id%IRN_loc)) THEN
2217 DEALLOCATE(id%IRN_loc)
2218 NULLIFY(id%IRN_loc)
2219 ENDIF
2220 IF (associated(id%JCN_loc)) THEN
2221 DEALLOCATE(id%JCN_loc)
2222 NULLIFY(id%JCN_loc)
2223 ENDIF
2224 IF (associated(id%A_loc)) THEN
2225 DEALLOCATE(id%A_loc)
2226 NULLIFY(id%A_loc)
2227 ENDIF
2228 write(6,*) ' Warning :',
2229 & ' id%A_loc, IRN_loc, JCN_loc deallocated !!! '
2230#endif
2231 IF (prok) THEN
2232 WRITE(mp,120) nlocal8, nsend8
2233 END IF
2234 END IF
2235 IF ( keep(46) .eq. 0 .AND. id%MYID.eq.master ) THEN
2236C ------------------------------
2237C The host is not working -> had
2238C no data from initial matrix
2239C ------------------------------
2240 nsend8 = 0_8
2241 nlocal8 = 0_8
2242 END IF
2243C --------------------------
2244C Put into some info/infog ?
2245C --------------------------
2246 CALL mpi_reduce( nsend8, nsend_tot8, 1, mpi_integer8,
2247 & mpi_sum, master, id%COMM, ierr )
2248 CALL mpi_reduce( nlocal8, nlocal_tot8, 1, mpi_integer8,
2249 & mpi_sum, master, id%COMM, ierr )
2250 IF ( prokg ) THEN
2251 WRITE(mpg,125) nlocal_tot8, nsend_tot8
2252 END IF
2253C
2254C -------------------------
2255C Check for possible errors
2256C -------------------------
2257 CALL mumps_propinfo( icntl(1), id%INFO(1),
2258 & id%COMM, id%MYID )
2259 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2260C
2261 ENDIF
2262 ELSE
2263C -------------------
2264C Matrix is elemental,
2265C provided on the
2266C master only
2267C -------------------
2268 IF ( id%MYID.eq.master)
2269 & CALL dmumps_maxelt_size( id%ELTPTR(1),
2270 & id%NELT,
2271 & maxelt_size )
2272C
2273C Perform the distribution of the elements.
2274C A this point,
2275C PTRAIW/PTRARW have been computed.
2276C INTARR/DBLARR have been allocated
2277C ELTPROC gives the mapping of elements
2278C
2279 CALL dmumps_elt_distrib( id%N, id%NELT, id%KEEP8(30),
2280 & id%COMM, id%MYID,
2281 & id%NSLAVES, id%PTRAR(1),
2282 & id%PTRAR(id%NELT+2),
2283 & id%INTARR(1), id%DBLARR(1), id%KEEP8(27), id%KEEP8(26),
2284 & id%KEEP(1), id%KEEP8(1), maxelt_size,
2285 & id%FRTPTR(1), id%FRTELT(1),
2286 & s_ptr_arg(1), maxs_arg, id%FILS(1),
2287 & id, id%root )
2288C ----------------
2289C Broadcast errors
2290C ----------------
2291 CALL mumps_propinfo( icntl(1), id%INFO(1),
2292 & id%COMM, id%MYID )
2293 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2294 END IF ! Element entry
2295C ------------------------
2296C Time the redistribution:
2297C ------------------------
2298 IF ( id%MYID.EQ.master) THEN
2299 CALL mumps_secfin(time)
2300 id%DKEEP(93) = time
2301 IF (prokg) WRITE(mpg,160) id%DKEEP(93)
2302 END IF
2303 IF ( keep(400) .GT. 0 ) THEN
2304C L0-OMP was active at analysis and
2305C thus will be active at factorization
2306C We check the number of threads.
2307 nomp=1
2308!$ NOMP = omp_get_max_threads()
2309 IF ( nomp .NE. keep(400) ) THEN
2310 id%INFO(1)=-58
2311 id%INFO(2)=keep(400)
2312 IF (lpok) WRITE(lp,'(A,A,I5,A,I5)')
2313 &" FAILURE DETECTED IN FACTORIZATION: #threads for KEEP(401)",
2314 &" changed from",keep(400)," at analysis to", nomp
2315 ENDIF
2316C error check
2317 CALL mumps_propinfo( icntl(1), id%INFO(1),
2318 & id%COMM, id%MYID )
2319 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2320 ENDIF
2321C
2322C TIMINGS:
2323C Next line: elapsed time for factorization
2324 IF (id%MYID.EQ.master) CALL mumps_secdeb(time)
2325C
2326C Allocate buffers on the workers
2327C ===============================
2328C
2329 IF ( i_am_slave ) THEN
2330 CALL dmumps_buf_ini_myid(id%MYID_NODES)
2331C
2332C Some buffers are required to pack/unpack data and for
2333C receiving MPI messages.
2334C For packing/unpacking : the buffer must be large
2335C enough to send several messages while receives might not
2336C be posted yet.
2337C It is assumed that the size of an integer is held in KEEP(34)
2338C while the size of a complex is held in KEEP(35).
2339C BUFR and LBUFR are declared of type integer, since byte is not
2340C a standard datatype.
2341C We now use KEEP(43) or KEEP(379) and KEEP(44) or KEEP(380)
2342C as estimated at analysis to allocate appropriate buffer sizes
2343C
2344C Reception buffer
2345C ----------------
2346 IF (keep(486).NE.0) THEN
2347 dmumps_lbufr_bytes8 = int(keep( 380 ),8) * int(keep( 35 ),8)
2348 ELSE
2349 dmumps_lbufr_bytes8 = int(keep( 44 ),8) * int(keep( 35 ),8)
2350 ENDIF
2351C ---------------------------------------
2352C Ensure a reasonable minimal buffer size
2353C ---------------------------------------
2354 dmumps_lbufr_bytes8 = max( dmumps_lbufr_bytes8,
2355 & 100000_8 )
2356 IF ((keep(50).NE.0).AND.(keep(489).GT.0).AND.
2357 & (id%NSLAVES.GE.2)) THEN
2358C ----------------------------------------------------------
2359C Ensure large enough receive buffer in case of BLR with
2360C CompressCB for symmetric matrices.
2361C -----------------------------------------------------------
2362 ratiok465 = dble(keep465copy)/dble(1000)
2363 dmumps_lbufr_bytes8 = max(dmumps_lbufr_bytes8,
2364 & int(
2365 & ratiok465*
2366 & dble(
2367 & int(keep(2)+1,8)*int(keep(142),8)*int(keep(35),8)
2368 & )
2369 & ,8)
2370 & )
2371 ENDIF
2372C
2373C If there is pivoting, size of the message might still increase.
2374C We use a relaxation (so called PERLU) to increase the estimate.
2375C
2376C Note: PERLU is a global estimate for pivoting.
2377C It may happen that one large contribution block size is increased
2378C by more than that.
2379C This is why we use an extra factor 2 relaxation coefficient for
2380C the relaxation of
2381C the reception buffer in the case where pivoting is allowed.
2382C A more dynamic strategy could be applied: if message to
2383C be received is larger than expected, reallocate a larger
2384C buffer. (But this won't work with IRECV.)
2385C Finally, one may want (as we are currently doing it for
2386C most messages)
2387C to cut large messages into a series of smaller ones.
2388C
2389 IF (keep(48).EQ.5) THEN
2390 min_perlu = 2
2391 ELSE
2392 min_perlu = 0
2393 ENDIF
2394C
2395 dmumps_lbufr_bytes8 = dmumps_lbufr_bytes8
2396 & + int( 2.0d0 * dble(max(perlu,min_perlu))*
2397 & dble(dmumps_lbufr_bytes8)/100d0, 8)
2398 dmumps_lbufr_bytes8 = min(dmumps_lbufr_bytes8,
2399 & int(huge(i4)-100,8))
2400 dmumps_lbufr_bytes = int( dmumps_lbufr_bytes8 )
2401 IF (keep(48)==5) THEN
2402C Since the buffer is going to be allocated, use
2403C it as the constraint for memory/granularity
2404C in hybrid scheduler
2405C
2406 id%KEEP8(21) = id%KEEP8(22) +
2407 & int( dble(max(perlu,min_perlu))*
2408 & dble(id%KEEP8(22))/100d0,8)
2409 ENDIF
2410C
2411C Now estimate the size for the buffer for asynchronous
2412C sends of contribution blocks (so called CB). We want to be able to send at
2413C least KEEP(213)/100 (two in general) messages at the
2414C same time.
2415C
2416C Send buffer
2417C -----------
2418 IF (keep(486).NE.0) THEN
2419 dmumps_lbuf8 = int( dble(keep(213)) / 100.0d0 *
2420 & dble(keep(379)) * dble(keep(35)), 8 )
2421 ELSE
2422 dmumps_lbuf8 = int( dble(keep(213)) / 100.0d0 *
2423 & dble(keep(43)) * dble(keep(35)), 8 )
2424 ENDIF
2425 dmumps_lbuf8 = max( dmumps_lbuf8, 100000_8 )
2426 dmumps_lbuf8 = dmumps_lbuf8
2427 & + int( 2.0d0 * dble(max(perlu,min_perlu))*
2428 & dble(dmumps_lbuf8)/100d0, 8)
2429C Make DMUMPS_LBUF8 small enough to be stored in a standard integer
2430 dmumps_lbuf8 = min(dmumps_lbuf8, int(huge(i4)-100,8))
2431C
2432C No reason to have send buffer smaller than receive buffer.
2433C This should never occur with the formulas above but just
2434C in case:
2435 dmumps_lbuf8 = max(dmumps_lbuf8, dmumps_lbufr_bytes8+3*keep(34))
2436 dmumps_lbuf = int(dmumps_lbuf8)
2437 IF(id%KEEP(48).EQ.4)THEN
2438 dmumps_lbufr_bytes=dmumps_lbufr_bytes*5
2439 dmumps_lbuf=dmumps_lbuf*5
2440 ENDIF
2441C
2442C Estimate size of buffer for small messages
2443C Each node can send ( NSLAVES - 1 ) messages to (NSLAVES-1) nodes
2444C
2445C KEEP(56) is the number of nodes of level II.
2446C Messages will be sent for the symmetric case
2447C for synchronisation issues.
2448C
2449C We take an upperbound
2450C
2451 dmumps_lbuf_int = ( keep(56) + id%NSLAVES * id%NSLAVES ) * 5
2452 & * keep(34)
2453 IF ( keep( 38 ) .NE. 0 ) THEN
2454C
2455C
2456 kkkk = mumps_procnode( id%PROCNODE_STEPS(id%STEP(keep(38))),
2457 & id%KEEP(199) )
2458 IF ( kkkk .EQ. id%MYID_NODES ) THEN
2459 dmumps_lbuf_int = dmumps_lbuf_int + 4 * keep(34) *
2460 & ( id%NSLAVES + id%NE_STEPS(id%STEP(keep(38)))
2461 & + min(keep(56), id%NE_STEPS(id%STEP(keep(38)))) * id%NSLAVES
2462 & )
2463 END IF
2464 END IF
2465C At this point, DMUMPS_LBUFR_BYTES, DMUMPS_LBUF
2466C and DMUMPS_LBUF_INT have been computed (all
2467C are in numbers of bytes).
2468 IF ( prok ) THEN
2469 WRITE( mp, 9999 ) dmumps_lbufr_bytes,
2470 & dmumps_lbuf, dmumps_lbuf_int
2471 END IF
2472 9999 FORMAT( /,' Allocated buffers',/,' ------------------',/,
2473 & ' Size of reception buffer in bytes ...... = ', i10,
2474 & /,
2475 & ' Size of async. emission buffer (bytes).. = ', i10,/,
2476 & ' Small emission buffer (bytes) .......... = ', i10)
2477C --------------------------
2478C Allocate small send buffer
2479C required for DMUMPS_FAC_B
2480C --------------------------
2481 CALL dmumps_buf_alloc_small_buf( dmumps_lbuf_int, ierr )
2482 IF ( ierr .NE. 0 ) THEN
2483 id%INFO(1)= -13
2484C convert to size in integer id%INFO(2)= DMUMPS_LBUF_INT
2485 id%INFO(2)= (dmumps_lbuf_int+keep(34)-1)/keep(34)
2486 IF (lpok) THEN
2487 WRITE(lp,*) id%MYID,
2488 & ':Allocation error in DMUMPS_BUF_ALLOC_SMALL_BUF'
2489 & ,id%INFO(2)
2490 ENDIF
2491 GO TO 110
2492 END IF
2493C
2494C --------------------------------------
2495C Allocate reception buffer on all procs
2496C This is done now.
2497C --------------------------------------
2498 dmumps_lbufr = (dmumps_lbufr_bytes+keep(34)-1)/keep(34)
2499 ALLOCATE( bufr( dmumps_lbufr ),stat=ierr )
2500 IF ( ierr .NE. 0 ) THEN
2501 id%INFO(1) = -13
2502 id%INFO(2) = dmumps_lbufr
2503 IF (lpok) THEN
2504 WRITE(lp,*)
2505 & ': Allocation error for BUFR(', dmumps_lbufr,
2506 & ') on MPI process',id%MYID
2507 ENDIF
2508 GO TO 110
2509 END IF
2510C -----------------------------------------
2511C Estimate MAXIS. IS will be allocated in
2512C DMUMPS_FAC_B. It will contain factors and
2513C contribution blocks integer information
2514C -----------------------------------------
2515C Relax integer workspace based on PERLU
2516 perlu = keep( 12 )
2517 IF (keep(201).GT.0) THEN
2518C OOC panel or non panel (note that
2519C KEEP(15)=KEEP(225) if non panel)
2520 maxis_estim = keep(225)
2521 ELSE
2522C In-core or reals for factors not stored
2523 maxis_estim = keep(15)
2524 ENDIF
2525 maxis = max( 1, int( min( int(huge(maxis),8),
2526 & int(maxis_estim,8) + 3_8 * max(int(perlu,8),10_8) *
2527 & ( int(maxis_estim,8) / 100_8 + 1_8 )
2528 & ) ! min
2529 & ) ! int
2530 & ) !max
2531C ----------------------------
2532C Allocate PTLUST_S and PTRFAC
2533C They will be used to access
2534C factors in the solve phase.
2535C They are also needed for
2536C DMUMPS_FAC_L0_OMP.
2537C ----------------------------
2538 ALLOCATE( id%PTLUST_S( id%KEEP(28) ), stat = ierr )
2539 IF ( ierr .NE. 0 ) THEN
2540 id%INFO(1)=-13
2541 id%INFO(2)=id%KEEP(28)
2542 IF (lpok) THEN
2543 WRITE(lp,*) id%MYID,
2544 & ': Allocation error for id%PTLUST_S(', id%KEEP(28),')'
2545 ENDIF
2546 NULLIFY(id%PTLUST_S)
2547 GOTO 110
2548 END IF
2549 ALLOCATE( id%PTRFAC( id%KEEP(28) ), stat = ierr )
2550 IF ( ierr .NE. 0 ) THEN
2551 id%INFO(1)=-13
2552 id%INFO(2)=id%KEEP(28)
2553 NULLIFY(id%PTRFAC)
2554 IF (lpok) THEN
2555 WRITE(lp,*) id%MYID,
2556 & ': Allocation error for id%PTRFAC(', id%KEEP(28),')'
2557 ENDIF
2558 GOTO 110
2559 END IF
2560C -----------------------------
2561C Reserve temporary workspace :
2562C IPOOL, PTRWB, ITLOC, PTRIST
2563C PTRWB will be subdivided again
2564C in routine DMUMPS_FAC_B
2565C -----------------------------
2566 ptrist = 1
2567 ptrwb = ptrist + id%KEEP(28)
2568 itloc = ptrwb + 2 * id%KEEP(28)
2569C Fwd in facto: ITLOC of size id%N + id%KEEP(253)
2570 ipool = itloc + id%N + id%KEEP(253)
2571C
2572C --------------------------------
2573C NA(1) is an upperbound for LPOOL
2574C --------------------------------
2575C Structure of the pool:
2576C ____________________________________________________
2577C | Subtrees | | Top nodes | 1 2 3 |
2578C ----------------------------------------------------
2579 lpool = mumps_get_pool_length(id%NA(1), id%KEEP(1),id%KEEP8(1))
2580 ALLOCATE( iwk( ipool + lpool - 1 ), stat = ierr )
2581 IF ( ierr .NE. 0 ) THEN
2582 id%INFO(1)=-13
2583 id%INFO(2)=ipool + lpool - 1
2584 IF (lpok) THEN
2585 WRITE(lp,*) id%MYID,
2586 & ': Allocation error for IWK(',ipool+lpool-1,')'
2587 ENDIF
2588 GOTO 110
2589 END IF
2590 ALLOCATE(iwk8( 2 * id%KEEP(28)), stat = ierr)
2591 IF ( ierr .NE. 0 ) THEN
2592 id%INFO(1)=-13
2593 id%INFO(2)=2 * id%KEEP(28)
2594 IF (lpok) THEN
2595 WRITE(lp,*) id%MYID,
2596 & ': Allocation error for IWKB(', 2*id%KEEP(28),')'
2597 ENDIF
2598 GOTO 110
2599 END IF
2600C
2601C Return to SPMD
2602C
2603 ENDIF
2604C
2605 110 CONTINUE
2606C ----------------
2607C Broadcast errors
2608C ----------------
2609 CALL mumps_propinfo( icntl(1), id%INFO(1),
2610 & id%COMM, id%MYID )
2611 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2612C
2613 IF ( i_am_slave ) THEN
2614C Store size of receive buffers in DMUMPS_LBUF module
2615 CALL dmumps_buf_dist_irecv_size( dmumps_lbufr_bytes )
2616 IF (prok) THEN
2617 WRITE( mp, 170 ) maxs, maxis, id%KEEP8(12), keep(15),
2618 & id%KEEP8(26), id%KEEP8(27), id%KEEP8(11), keep(26), keep(27)
2619 ENDIF
2620 END IF
2621C ===============================================================
2622C Before calling the main driver, DMUMPS_FAC_B,
2623C some statistics should be initialized to 0,
2624C even on the host node because they will be
2625C used in REDUCE operations afterwards.
2626C --------------------------------------------
2627C Size of factors written. It will be set to POSFAC in
2628C IC, otherwise we accumulate written factors in it.
2629 id%KEEP8(31)= 0_8
2630C Size of factors under L0 will be returned
2631C in id%KEEP8(64), not included in KEEP8(31))
2632C Number of entries in factors
2633 id%KEEP8(10) = 0_8
2634C KEEP8(8) will hold the volume of extra copies due to
2635C in-place stacking in fac_mem_stack.F
2636 id%KEEP8(8)=0_8
2637 id%INFO(9:14)=0
2638 rinfo(2:3)=zero
2639 IF ( i_am_slave ) THEN
2640C ------------------------------------
2641C Call effective factorization routine
2642C ------------------------------------
2643 IF ( keep(55) .eq. 0 ) THEN
2644 ldptrar = id%N
2645 ELSE
2646 ldptrar = id%NELT + 1
2647 END IF
2648 IF ( id%KEEP(55) .NE. 0 ) THEN
2649 nelt_arg = id%NELT
2650 ELSE
2651C ------------------------------
2652C Use size 1 to avoid complaints
2653C when using check bound options
2654C ------------------------------
2655 nelt_arg = 1
2656 END IF
2657 ENDIF
2658 IF (i_am_slave) THEN
2659 IF (associated(id%L0_OMP_MAPPING))
2660 & DEALLOCATE(id%L0_OMP_MAPPING)
2661 IF (keep(400) .GT. 0) THEN
2662 id%LL0_OMP_MAPPING = keep(28)
2663 ELSE
2664 id%LL0_OMP_MAPPING = 1
2665 ENDIF
2666 ALLOCATE(id%L0_OMP_MAPPING(id%LL0_OMP_MAPPING), stat=allocok)
2667 IF ( allocok > 0) THEN
2668 write(*,*) "Problem allocating L0_OMP_MAPPING",
2669 & ierr, keep(28)
2670 GOTO 115
2671 ENDIF
2672 IF (associated(id%L0_OMP_FACTORS)) THEN
2673 CALL dmumps_free_l0_omp_factors(id%L0_OMP_FACTORS)
2674 ENDIF
2675 IF (keep(400) .GT. 0) THEN
2676 id%LL0_OMP_FACTORS = keep(400)
2677 ELSE
2678 id%LL0_OMP_FACTORS = 1
2679 ENDIF
2680 ALLOCATE(id%L0_OMP_FACTORS(id%LL0_OMP_FACTORS),stat = allocok)
2681 IF (allocok > 0) THEN
2682 id%INFO(1)=-7
2683 id%INFO(2)=nb_threads
2684 GOTO 111
2685 ENDIF
2686 CALL dmumps_init_l0_omp_factors(id%L0_OMP_FACTORS)
2687 ENDIF
2688 115 CONTINUE
2689 CALL mumps_propinfo( icntl(1), id%INFO(1),
2690 & id%COMM, id%MYID )
2691 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2692C Compute DKEEP(17)
2693 avg_flops = rinfog(1)/(dble(id%NSLAVES))
2694 id%DKEEP(17) = max( id%DKEEP(18), avg_flops/dble(50) )
2695 &
2696 IF (prok.AND.id%MYID.EQ.master) THEN
2697 IF (id%NSLAVES.LE.1) THEN
2698 WRITE(mp,'(/A,A,1PD10.3)')
2699 &' Start factorization with total',
2700 &' estimated flops (RINFOG(1)) = ',
2701 & rinfog(1)
2702 ELSE
2703 WRITE(mp,'(/A,A,1PD10.3,A,1PD10.3)')
2704 &' Start factorization with total',
2705 &' estimated flops RINFOG(1) / Average per MPI proc = ',
2706 & rinfog(1), ' / ', avg_flops
2707 ENDIF
2708 ENDIF
2709 IF (i_am_slave) THEN
2710C IS/S pointers passed to DMUMPS_FAC_B with
2711C implicit interface through intermediate
2712C structure S_IS_POINTERS. IS will be allocated
2713C during DMUMPS_FAC_B.
2714C In case of L0OMP, id%IS and id%S are allocated during
2715C DMUMPS_FAC_B, and only after L0OMP nodes are processed,
2716C in order to limit the global memory peak.
2717 s_is_pointers%IW => id%IS; NULLIFY(id%IS)
2718 s_is_pointers%A => id%S ; NULLIFY(id%S)
2719 CALL dmumps_fac_b(id%N,s_is_pointers,maxs,maxis,id%SYM_PERM(1),
2720 & id%NA(1),id%LNA,id%NE_STEPS(1),id%ND_STEPS(1), id%FILS(1),
2721 & id%STEP(1),id%FRERE_STEPS(1),id%DAD_STEPS(1),id%CANDIDATES(1,1),
2722 & id%ISTEP_TO_INIV2(1),id%TAB_POS_IN_PERE(1,1), id%PTRAR(1),
2723 & ldptrar,iwk(ptrist),id%PTLUST_S(1),id%PTRFAC(1),iwk(ptrwb),iwk8,
2724 & iwk(itloc),rhs_mumps(1),iwk(ipool),lpool,cntl1,icntl(1),
2725 & id%INFO(1), rinfo(1),keep(1),id%KEEP8(1),id%PROCNODE_STEPS(1),
2726 & id%NSLAVES,id%COMM_NODES,id%MYID,id%MYID_NODES,bufr,dmumps_lbufr
2727 & , dmumps_lbufr_bytes, dmumps_lbuf, id%INTARR(1),id%DBLARR(1),
2728 & id%root, nelt_arg, id%FRTPTR(1), id%FRTELT(1),id%COMM_LOAD,
2729 & id%ASS_IRECV, seuil, seuil_ldlt_niv2, id%MEM_DIST(0),
2730 & id%DKEEP(1), id%PIVNUL_LIST(1), lpn_list, id%LRGROUPS(1)
2731 & ,id%IPOOL_B_L0_OMP(1),id%LPOOL_B_L0_OMP,
2732 & id%IPOOL_A_L0_OMP(1),id%LPOOL_A_L0_OMP,id%L_VIRT_L0_OMP,
2733 & id%VIRT_L0_OMP(1), id%VIRT_L0_OMP_MAPPING(1),id%L_PHYS_L0_OMP,
2734 & id%PHYS_L0_OMP(1), id%PERM_L0_OMP(1), id%PTR_LEAFS_L0_OMP(1),
2735 & id%L0_OMP_MAPPING(1),id%LL0_OMP_MAPPING,
2736 & id%THREAD_LA, id%L0_OMP_FACTORS(1), id%LL0_OMP_FACTORS,
2737 & id%I4_L0_OMP(1,1), size(id%I4_L0_OMP,1), size(id%I4_L0_OMP,2),
2738 & id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1), size(id%I8_L0_OMP,2)
2739 & )
2740 id%IS => s_is_pointers%IW; NULLIFY(s_is_pointers%IW)
2741 id%S => s_is_pointers%A ; NULLIFY(s_is_pointers%A)
2742C
2743C ------------------------------
2744C Deallocate temporary workspace
2745C ------------------------------
2746 DEALLOCATE( iwk )
2747 DEALLOCATE( iwk8 )
2748 ENDIF
2749C ---------------------------------
2750C Free some workspace corresponding
2751C to the original matrix in
2752C arrowhead or elemental format.
2753C -----
2754C Note : INTARR was not allocated
2755C during factorization in the case
2756C of an assembled matrix.
2757C ---------------------------------
2758 IF ( keep(55) .eq. 0 ) THEN
2759C
2760C ----------------
2761C Assembled matrix
2762C ----------------
2763 IF (associated( id%DBLARR)) THEN
2764 DEALLOCATE(id%DBLARR)
2765 NULLIFY(id%DBLARR)
2766 ENDIF
2767C
2768 ELSE
2769C
2770C ----------------
2771C Elemental matrix
2772C ----------------
2773 IF (associated(id%INTARR)) THEN
2774 DEALLOCATE( id%INTARR)
2775 NULLIFY( id%INTARR )
2776 ENDIF
2777C ------------------------------------
2778C For the master from an hybrid host
2779C execution without scaling, then real
2780C values have not been copied !
2781C -------------------------------------
2782 IF ( id%MYID_NODES .eq. master
2783 & .AND. keep(46) .eq. 1
2784 & .AND. keep(52) .eq. 0 ) THEN
2785 NULLIFY( id%DBLARR )
2786 ELSE
2787 IF (associated( id%DBLARR)) THEN
2788 DEALLOCATE(id%DBLARR)
2789 NULLIFY(id%DBLARR)
2790 ENDIF
2791 END IF
2792 END IF
2793C Memroy statistics
2794C -----------------------------------
2795C If QR (Keep(19)) is not zero, and if
2796C the host does not have the information
2797C (ie is not slave), send information
2798C computed on the slaves during facto
2799C to the host.
2800C -----------------------------------
2801 IF ( keep(19) .NE. 0 ) THEN
2802 IF ( keep(46) .NE. 1 ) THEN
2803C Host was not working during facto_root
2804C Send him the information
2805 IF ( id%MYID .eq. master ) THEN
2806 CALL mpi_recv( keep(17), 1, mpi_integer, 1, defic_tag,
2807 & id%COMM, status, ierr )
2808 CALL mpi_recv( keep(143), 1, mpi_integer, 1, defic_tag,
2809 & id%COMM, status, ierr )
2810 ELSE IF ( id%MYID .EQ. 1 ) THEN
2811 CALL mpi_send( keep(17), 1, mpi_integer, 0, defic_tag,
2812 & id%COMM, ierr )
2813 CALL mpi_send( keep(143), 1, mpi_integer, 0, defic_tag,
2814 & id%COMM, ierr )
2815 END IF
2816 END IF
2817 END IF
2818C --------------------------------
2819C Deallocate communication buffers
2820C They will be reallocated
2821C in the solve.
2822C --------------------------------
2823 IF (allocated(bufr)) DEALLOCATE(bufr)
2824 CALL dmumps_buf_deall_small_buf( ierr )
2825C//PIV
2826 IF (keep(219).NE.0) THEN
2828 ENDIF
2829C
2830C Check for errors.
2831C After DMUMPS_FAC_B every slave is aware of an error.
2832C If master is included in computations, the call below should
2833C not be necessary.
2834 CALL mumps_propinfo( icntl(1), id%INFO(1),
2835 & id%COMM, id%MYID )
2836C
2838 IF (keep(201) .GT. 0) THEN
2839 IF ((keep(201).EQ.1) .OR. (keep(201).EQ.2)) THEN
2840 IF ( i_am_slave ) THEN
2841 CALL dmumps_ooc_clean_pending(ierr)
2842 IF(ierr.LT.0)THEN
2843 id%INFO(1)=ierr
2844 id%INFO(2)=0
2845 ENDIF
2846 ENDIF
2847 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
2848 & id%COMM, id%MYID )
2849C We want to collect statistics even in case of
2850C error to understand if it is due to numerical
2851C issues
2852CC IF ( id%INFO(1) < 0 ) GOTO 500
2853 END IF
2854 END IF
2855 IF (id%MYID.EQ.master) THEN
2856 CALL mumps_secfin(time)
2857 id%DKEEP(94)=time
2858 IF (keep(400).GT.0) THEN
2859C Facto time above L0_OMP = total time - facto time under L0_OMP
2860 id%DKEEP(96)=id%DKEEP(94)-id%DKEEP(95)
2861 ENDIF
2862 ENDIF
2863C =====================================================================
2864C COMPUTE MEMORY ALLOCATED BY MUMPS, INFO(16)
2865C ---------------------------------------------
2866 mem_eff_allocated = .true.
2867 CALL dmumps_max_mem( id%KEEP(1),id%KEEP8(1),
2868 & id%MYID, n, id%NELT, id%NA(1), id%LNA, id%KEEP8(28),
2869 & id%KEEP8(30),
2870 & id%NSLAVES, total_mbytes, .true., id%KEEP(201),
2871 & blr_strat, .true., total_bytes,
2872 & idummy, bdummy, mem_eff_allocated
2873 & , .false. ! UNDER_L0_OMP
2874 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
2875 & size(id%I8_L0_OMP,2)
2876 & )
2877 IF (keep(400) .GT. 0 ) THEN ! L0 activated
2878 CALL dmumps_max_mem( id%KEEP(1),id%KEEP8(1),
2879 & id%MYID, n, id%NELT, id%NA(1), id%LNA, id%KEEP8(28),
2880 & id%KEEP8(30),
2881 & id%NSLAVES, total_mbytes_under_l0, .true., id%KEEP(201),
2882 & blr_strat, .true., total_bytes_under_l0,
2883 & idummy, bdummy, mem_eff_allocated
2884 & , .true. ! UNDER_L0_OMP
2885 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
2886 & size(id%I8_L0_OMP,2)
2887 & )
2888 total_mbytes = max(total_mbytes,total_mbytes_under_l0)
2889 total_bytes = max(total_bytes, total_bytes_under_l0)
2890 ENDIF
2891 IF (id%KEEP8(24).NE.0) THEN
2892C WK_USER is not part of memory allocated by MUMPS
2893C and is not counted, id%KEEP8(23) should be zero
2894 id%INFO(16) = total_mbytes
2895 ELSE
2896C Note that even for the case of ICNTL(23)>0
2897C we report here the memory effectively allocated
2898C that can be smaller than ICNTL(23) !
2899 id%INFO(16) = total_mbytes
2900 ENDIF
2901C ----------------------------------------------------
2902C Centralize memory statistics on the host
2903C id%INFOG(18) = size of mem in Mbytes for facto,
2904C for the processor using largest memory
2905C id%INFOG(19) = size of mem in Mbytes for facto,
2906C sum over all processors
2907C ----------------------------------------------------
2908 CALL mumps_mem_centralize( id%MYID, id%COMM,
2909 & id%INFO(16), id%INFOG(18), irank )
2910 CALL dmumps_print_allocated_mem( prok, prokg, print_maxavg,
2911 & mp, mpg, id%INFO(16), id%INFOG(18), id%INFOG(19),
2912 & id%NSLAVES, irank,
2913 & id%KEEP(1) )
2914C If WK_USER is provided, this excludes WK_USER
2915 IF (prok ) THEN
2916 WRITE(mp,'(A,I12) ')
2917 & ' ** Eff. min. Space MBYTES for facto (INFO(16)):',
2918 & total_mbytes
2919 ENDIF
2920C ========================(INFO(16) RELATED)======================
2921C ---------------------------------------
2922C COMPUTE EFFECTIVE MEMORY USED INFO(22)
2923C ---------------------------------------
2924 perlu_on = .true.
2925 mem_eff_allocated = .false.
2926 CALL dmumps_max_mem( id%KEEP(1),id%KEEP8(1),
2927 & id%MYID, n, id%NELT, id%NA(1), id%LNA, id%KEEP8(28),
2928 & id%KEEP8(30),
2929 & id%NSLAVES, total_mbytes, .true., id%KEEP(201),
2930 & blr_strat, perlu_on, total_bytes,
2931 & idummy, bdummy, mem_eff_allocated
2932 & , .false. ! UNDER_L0_OMP
2933 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
2934 & size(id%I8_L0_OMP,2)
2935 & )
2936 IF (keep(400) .GT. 0 ) THEN ! L0 activated
2937 CALL dmumps_max_mem( id%KEEP(1),id%KEEP8(1),
2938 & id%MYID, n, id%NELT, id%NA(1), id%LNA, id%KEEP8(28),
2939 & id%KEEP8(30),
2940 & id%NSLAVES, total_mbytes_under_l0, .true., id%KEEP(201),
2941 & blr_strat, perlu_on, total_bytes_under_l0,
2942 & idummy, bdummy, mem_eff_allocated
2943 & , .true. ! UNDER_L0_OMP
2944 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
2945 & size(id%I8_L0_OMP,2)
2946 & )
2947 total_mbytes = max(total_mbytes,total_mbytes_under_l0)
2948 total_bytes = max(total_bytes, total_bytes_under_l0)
2949 ENDIF
2950C -- TOTAL_BYTES and TOTAL_MBYTES includes both static
2951C -- (MAXS) and BLR structures computed as the SUM of the PEAKS
2952C -- (KEEP8(67) + KEEP8(70))
2953 id%KEEP8(7) = total_bytes
2954C -- INFO(22) holds the effective space (in Mbytes) used by MUMPS
2955C -- (it includes part of WK_USER used if provided by user)
2956 id%INFO(22) = total_mbytes
2957C ----------------------------------------------------
2958C Centralize memory statistics on the host
2959C INFOG(21) = size of effective mem (Mbytes) for facto,
2960C for the processor using largest memory
2961C INFOG(22) = size of effective mem (Mbytes) for facto,
2962C sum over all processors
2963C ----------------------------------------------------
2964 CALL mumps_mem_centralize( id%MYID, id%COMM,
2965 & id%INFO(22), id%INFOG(21), irank )
2966 IF ( prokg ) THEN
2967 IF (print_maxavg) THEN
2968 WRITE( mpg,'(A,I12) ')
2969 & ' ** memory effectively used, max in mbytes(infog(21)):',
2970 & id%INFOG(21)
2971 ENDIF
2972 WRITE( MPG,'(a,i12) ')
2973 & ' ** memory effectively used, total in mbytes(infog(22)):',
2974 & id%INFOG(22)
2975 END IF
2976 SUM_INFO22_THIS_NODE=0
2977 CALL MPI_REDUCE( id%INFO(22), SUM_INFO22_THIS_NODE, 1,
2978 & MPI_INTEGER,
2979 & MPI_SUM, 0, id%KEEP(411), IERR )
2980 CALL MPI_REDUCE( SUM_INFO22_THIS_NODE, MAX_SUM_INFO22_THIS_NODE,
2981 & 1, MPI_INTEGER, MPI_MAX, 0, id%COMM, IERR )
2982.AND. IF (PROKG PRINT_NODEINFO) THEN
2983 WRITE(MPG,'(a,i12)')
2984 & ' ** max. effective space per compute node, in mbytes :',
2985 & MAX_SUM_INFO22_THIS_NODE
2986 ENDIF
2987C
2988 IF (I_AM_SLAVE) THEN
2989 K67 = id%KEEP8(67)
2990 K68 = id%KEEP8(68)
2991 K70 = id%KEEP8(70)
2992 K74 = id%KEEP8(74)
2993 K75 = id%KEEP8(75)
2994 ELSE
2995 K67 = 0_8
2996 K68 = 0_8
2997 K70 = 0_8
2998 K74 = 0_8
2999 K75 = 0_8
3000 ENDIF
3001C -- Save the number of entries effectively used
3002C in main working array S
3003 CALL MUMPS_SETI8TOI4(K67,id%INFO(21))
3004C
3005C
3006.GT. IF (KEEP(400) 0 ) THEN
3007.NOT. IF ( I_AM_SLAVE) THEN
3008 id%DKEEP(95) = 0.0D0
3009 id%DKEEP(16) = 0.0D0
3010 ENDIF
3011.GT. IF (id%NPROCS 1) THEN
3012C Compute average and max (across MPI's)
3013 CALL MPI_REDUCE(id%DKEEP(95), TMPTIME, 1,
3014 & MPI_DOUBLE_PRECISION, MPI_SUM, MASTER, id%COMM, IERR)
3015.EQ. IF (id%MYIDMASTER) TIMEAVG = TMPTIME
3016 CALL MPI_REDUCE(id%DKEEP(16), TMPFLOP, 1,
3017 & MPI_DOUBLE_PRECISION, MPI_SUM, MASTER, id%COMM, IERR)
3018.EQ. IF (id%MYIDMASTER) FLOPAVG = TMPFLOP
3019.EQ. IF (id%MYIDMASTER) THEN
3020 TIMEAVG = TIMEAVG / id%NSLAVES
3021 FLOPAVG = FLOPAVG / id%NSLAVES
3022 ENDIF
3023 CALL MPI_REDUCE(id%DKEEP(95), TIMEMAX, 1,
3024 & MPI_DOUBLE_PRECISION, MPI_MAX, MASTER, id%COMM, IERR)
3025 CALL MPI_REDUCE(id%DKEEP(16), FLOPMAX, 1,
3026 & MPI_DOUBLE_PRECISION, MPI_MAX, MASTER, id%COMM, IERR)
3027C (PROKG may only be true on master)
3028 IF ( PROKG ) THEN
3029 WRITE(MPG,190) FLOPAVG, FLOPMAX
3030 WRITE(MPG,188) TIMEAVG, TIMEMAX
3031 ENDIF
3032 ELSE
3033C Print DKEEP(95) directly without reduction
3034 IF ( PROKG ) THEN
3035 WRITE(MPG,189) id%DKEEP(16)
3036 WRITE(MPG,187) id%DKEEP(95)
3037 ENDIF
3038 ENDIF
3039 ENDIF
3040 IF ( PROKG ) THEN
3041.GE. IF (id%INFO(1) 0) THEN
3042 WRITE(MPG,180) id%DKEEP(94)
3043 ELSE
3044 WRITE(MPG,185) id%DKEEP(94)
3045 ENDIF
3046 ENDIF
3047C
3048C Sum RINFO(2) : total number of flops for assemblies
3049C Sum RINFO(3) : total number of flops for eliminations
3050C Initialize RINFO(4) in case BLR was not activated
3051 RINFO(4) = RINFO(3)
3052C
3053C Should work even if the master does some work
3054C
3055 CALL MPI_REDUCE( RINFO(2), RINFOG(2), 2,
3056 & MPI_DOUBLE_PRECISION,
3057 & MPI_SUM, MASTER, id%COMM, IERR)
3058C Reduce needed to dimension small working array
3059C on all procs during DMUMPS_GATHER_SOLUTION
3060 KEEP(247) = 0
3061 CALL MPI_REDUCE( KEEP(246), KEEP(247), 1, MPI_INTEGER,
3062 & MPI_MAX, MASTER, id%COMM, IERR)
3063C
3064C Reduce compression times: get max compression times
3065 CALL MPI_REDUCE( id%DKEEP(97), id%DKEEP(98), 1,
3066 & MPI_DOUBLE_PRECISION,
3067 & MPI_MAX, MASTER, id%COMM, IERR)
3068C
3069 CALL MPI_REDUCE( RINFO(2), RINFOG(2), 2,
3070 & MPI_DOUBLE_PRECISION,
3071 & MPI_SUM, MASTER, id%COMM, IERR)
3072 CALL MUMPS_REDUCEI8( id%KEEP8(31)+id%KEEP8(64),id%KEEP8(6),
3073 & MPI_SUM, MASTER, id%COMM )
3074C
3075.EQ. IF (id%MYID0) THEN
3076C In MegaBytes
3077 RINFOG(16) = dble(id%KEEP8(6)*int(KEEP(35),8))/dble(1D6)
3078.LE. IF (KEEP(201)0) THEN
3079 RINFOG(16) = ZERO
3080 ENDIF
3081 ENDIF
3082 CALL MUMPS_REDUCEI8( id%KEEP8(48),id%KEEP8(148), MPI_SUM,
3083 & MASTER, id%COMM )
3084 CALL MUMPS_SETI8TOI4(id%KEEP8(148), INFOG(9))
3085C
3086 CALL MPI_REDUCE( int(id%INFO(10),8), id%KEEP8(128),
3087 & 1, MPI_INTEGER8,
3088 & MPI_SUM, MASTER, id%COMM, IERR)
3089.EQ. IF (id%MYIDMASTER) THEN
3090 CALL MUMPS_SETI8TOI4(id%KEEP8(128), id%INFOG(10))
3091 ENDIF
3092C Use MPI_MAX for this one to get largest front size
3093 CALL MPI_ALLREDUCE( id%INFO(11), INFOG(11), 1, MPI_INTEGER,
3094 & MPI_MAX, id%COMM, IERR)
3095C make maximum effective frontal size available on all procs
3096C for solve phase
3097C (Note that INFO(11) includes root size on root master)
3098 KEEP(133) = INFOG(11)
3099 CALL MPI_REDUCE( id%INFO(12), INFOG(12), 3, MPI_INTEGER,
3100 & MPI_SUM, MASTER, id%COMM, IERR)
3101 CALL MPI_REDUCE( KEEP(103), INFOG(25), 1, MPI_INTEGER,
3102 & MPI_SUM, MASTER, id%COMM, IERR)
3103 KEEP(229) = INFOG(25)
3104 CALL MPI_REDUCE( KEEP(105), INFOG(25), 1, MPI_INTEGER,
3105 & MPI_SUM, MASTER, id%COMM, IERR)
3106 KEEP(230) = INFOG(25)
3107C
3108 id%INFO(25) = KEEP(98)
3109 CALL MPI_ALLREDUCE( id%INFO(25), INFOG(25), 1, MPI_INTEGER,
3110 & MPI_SUM, id%COMM, IERR)
3111C Extra copies due to in-place stacking
3112 CALL MUMPS_REDUCEI8( id%KEEP8(8), id%KEEP8(108), MPI_SUM,
3113 & MASTER, id%COMM )
3114C Entries in factors
3115 CALL MUMPS_SETI8TOI4(id%KEEP8(10), id%INFO(27))
3116 CALL MUMPS_REDUCEI8( id%KEEP8(10),id%KEEP8(110), MPI_SUM,
3117 & MASTER, id%COMM )
3118 CALL MUMPS_SETI8TOI4(id%KEEP8(110), INFOG(29))
3119C Initialize INFO(28)/INFOG(35) in case BLR not activated
3120 id%INFO(28) = id%INFO(27)
3121 INFOG(35) = INFOG(29)
3122C ==============================
3123C LOW-RANK
3124C ==============================
3125.NE. IF ( KEEP(486) 0 ) THEN !LR is activated
3126C Compute and Save local amount of flops in case of BLR
3127 RINFO(4) = dble(FLOP_FRFRONTS + FLOP_FACTO_FR - FLOP_LRGAIN
3128 & + FLOP_COMPRESS + FLOP_FRFRONTS)
3129C
3130C Compute and Save local number of entries in compressed factors
3131C
3132 ITMP8 = id%KEEP8(10) - int(MRY_LU_LRGAIN,8)
3133 CALL MUMPS_SETI8TOI4( ITMP8, id%INFO(28))
3134C
3135 CALL MPI_REDUCE( MRY_LU_LRGAIN, TMP_MRY_LU_LRGAIN
3136 & , 1, MPI_DOUBLE_PRECISION,
3137 & MPI_SUM, MASTER, id%COMM, IERR)
3138 CALL MPI_REDUCE( MRY_LU_FR, TMP_MRY_LU_FR
3139 & , 1, MPI_DOUBLE_PRECISION,
3140 & MPI_SUM, MASTER, id%COMM, IERR)
3141 CALL MPI_REDUCE( MRY_CB_FR, TMP_MRY_CB_FR
3142 & , 1, MPI_DOUBLE_PRECISION,
3143 & MPI_SUM, MASTER, id%COMM, IERR)
3144 CALL MPI_REDUCE( MRY_CB_LRGAIN, TMP_MRY_CB_LRGAIN
3145 & , 1, MPI_DOUBLE_PRECISION,
3146 & MPI_SUM, MASTER, id%COMM, IERR)
3147 CALL MPI_REDUCE( FLOP_LRGAIN, TMP_FLOP_LRGAIN
3148 & , 1, MPI_DOUBLE_PRECISION,
3149 & MPI_SUM, MASTER, id%COMM, IERR)
3150 CALL MPI_REDUCE( FLOP_TRSM_FR, TMP_FLOP_TRSM_FR
3151 & , 1, MPI_DOUBLE_PRECISION,
3152 & MPI_SUM, MASTER, id%COMM, IERR)
3153 CALL MPI_REDUCE( FLOP_TRSM_LR, TMP_FLOP_TRSM_LR
3154 & , 1, MPI_DOUBLE_PRECISION,
3155 & MPI_SUM, MASTER, id%COMM, IERR)
3156 CALL MPI_REDUCE( FLOP_UPDATE_FR, TMP_FLOP_UPDATE_FR
3157 & , 1, MPI_DOUBLE_PRECISION,
3158 & MPI_SUM, MASTER, id%COMM, IERR)
3159 CALL MPI_REDUCE( FLOP_UPDATE_LR, TMP_FLOP_UPDATE_LR
3160 & , 1, MPI_DOUBLE_PRECISION,
3161 & MPI_SUM, MASTER, id%COMM, IERR)
3162 CALL MPI_REDUCE( FLOP_FRSWAP_COMPRESS,
3163 & TMP_FLOP_FRSWAP_COMPRESS
3164 & , 1, MPI_DOUBLE_PRECISION,
3165 & MPI_SUM, MASTER, id%COMM, IERR)
3166 CALL MPI_REDUCE( FLOP_MIDBLK_COMPRESS,
3167 & TMP_FLOP_MIDBLK_COMPRESS
3168 & , 1, MPI_DOUBLE_PRECISION,
3169 & MPI_SUM, MASTER, id%COMM, IERR)
3170 CALL MPI_REDUCE( FLOP_UPDATE_LRLR3, TMP_FLOP_UPDATE_LRLR3
3171 & , 1, MPI_DOUBLE_PRECISION,
3172 & MPI_SUM, MASTER, id%COMM, IERR)
3173 CALL MPI_REDUCE(FLOP_ACCUM_COMPRESS, TMP_FLOP_ACCUM_COMPRESS
3174 & , 1, MPI_DOUBLE_PRECISION,
3175 & MPI_SUM, MASTER, id%COMM, IERR)
3176 CALL MPI_REDUCE( FLOP_TRSM, TMP_FLOP_TRSM
3177 & , 1, MPI_DOUBLE_PRECISION,
3178 & MPI_SUM, MASTER, id%COMM, IERR)
3179 CALL MPI_REDUCE( FLOP_PANEL, TMP_FLOP_PANEL
3180 & , 1, MPI_DOUBLE_PRECISION,
3181 & MPI_SUM, MASTER, id%COMM, IERR)
3182 CALL MPI_REDUCE( FLOP_FRFRONTS, TMP_FLOP_FRFRONTS
3183 & , 1, MPI_DOUBLE_PRECISION,
3184 & MPI_SUM, MASTER, id%COMM, IERR)
3185 CALL MPI_REDUCE( FLOP_COMPRESS, TMP_FLOP_COMPRESS
3186 & , 1, MPI_DOUBLE_PRECISION,
3187 & MPI_SUM, MASTER, id%COMM, IERR)
3188 CALL MPI_REDUCE( FLOP_DECOMPRESS, TMP_FLOP_DECOMPRESS
3189 & , 1, MPI_DOUBLE_PRECISION,
3190 & MPI_SUM, MASTER, id%COMM, IERR)
3191 CALL MPI_REDUCE( FLOP_CB_COMPRESS, TMP_FLOP_CB_COMPRESS
3192 & , 1, MPI_DOUBLE_PRECISION,
3193 & MPI_SUM, MASTER, id%COMM, IERR)
3194 CALL MPI_REDUCE( FLOP_CB_DECOMPRESS,TMP_FLOP_CB_DECOMPRESS
3195 & , 1, MPI_DOUBLE_PRECISION,
3196 & MPI_SUM, MASTER, id%COMM, IERR)
3197 CALL MPI_REDUCE( FLOP_FACTO_FR, TMP_FLOP_FACTO_FR
3198 & , 1, MPI_DOUBLE_PRECISION,
3199 & MPI_SUM, MASTER, id%COMM, IERR)
3200 CALL MPI_REDUCE( CNT_NODES,TMP_CNT_NODES
3201 & , 1, MPI_INTEGER,
3202 & MPI_SUM, MASTER, id%COMM, IERR)
3203.GT. IF (id%NPROCS1) THEN
3204 FLOP_FACTO_LR = FLOP_FACTO_FR - FLOP_LRGAIN
3205 & + FLOP_COMPRESS + FLOP_FRFRONTS
3206 CALL MPI_REDUCE( FLOP_FACTO_LR, AVG_FLOP_FACTO_LR
3207 & , 1, MPI_DOUBLE_PRECISION,
3208 & MPI_SUM, MASTER, id%COMM, IERR)
3209.EQ. IF (id%MYIDMASTER) THEN
3210 AVG_FLOP_FACTO_LR = AVG_FLOP_FACTO_LR/id%NPROCS
3211 ENDIF
3212 CALL MPI_REDUCE( FLOP_FACTO_LR, MIN_FLOP_FACTO_LR
3213 & , 1, MPI_DOUBLE_PRECISION,
3214 & MPI_MIN, MASTER, id%COMM, IERR)
3215 CALL MPI_REDUCE( FLOP_FACTO_LR, MAX_FLOP_FACTO_LR
3216 & , 1, MPI_DOUBLE_PRECISION,
3217 & MPI_MAX, MASTER, id%COMM, IERR)
3218 ENDIF ! NPROCS > 1
3219 CALL MPI_REDUCE( TIME_UPDATE, TMP_TIME_UPDATE
3220 & , 1, MPI_DOUBLE_PRECISION,
3221 & MPI_SUM, MASTER, id%COMM, IERR)
3222 CALL MPI_REDUCE( TIME_UPDATE_LRLR1, TMP_TIME_UPDATE_LRLR1
3223 & , 1, MPI_DOUBLE_PRECISION,
3224 & MPI_SUM, MASTER, id%COMM, IERR)
3225 CALL MPI_REDUCE( TIME_UPDATE_LRLR2, TMP_TIME_UPDATE_LRLR2
3226 & , 1, MPI_DOUBLE_PRECISION,
3227 & MPI_SUM, MASTER, id%COMM, IERR)
3228 CALL MPI_REDUCE( TIME_UPDATE_LRLR3, TMP_TIME_UPDATE_LRLR3
3229 & , 1, MPI_DOUBLE_PRECISION,
3230 & MPI_SUM, MASTER, id%COMM, IERR)
3231 CALL MPI_REDUCE( TIME_UPDATE_FRLR, TMP_TIME_UPDATE_FRLR
3232 & , 1, MPI_DOUBLE_PRECISION,
3233 & MPI_SUM, MASTER, id%COMM, IERR)
3234 CALL MPI_REDUCE( TIME_UPDATE_FRFR, TMP_TIME_UPDATE_FRFR
3235 & , 1, MPI_DOUBLE_PRECISION,
3236 & MPI_SUM, MASTER, id%COMM, IERR)
3237 CALL MPI_REDUCE( TIME_DIAGCOPY, TMP_TIME_DIAGCOPY
3238 & , 1, MPI_DOUBLE_PRECISION,
3239 & MPI_SUM, MASTER, id%COMM, IERR)
3240 CALL MPI_REDUCE( TIME_COMPRESS,TMP_TIME_COMPRESS
3241 & , 1, MPI_DOUBLE_PRECISION,
3242 & MPI_SUM, MASTER, id%COMM, IERR)
3243 CALL MPI_REDUCE( TIME_MIDBLK_COMPRESS,
3244 & TMP_TIME_MIDBLK_COMPRESS
3245 & , 1, MPI_DOUBLE_PRECISION,
3246 & MPI_SUM, MASTER, id%COMM, IERR)
3247 CALL MPI_REDUCE( TIME_FRSWAP_COMPRESS,
3248 & TMP_TIME_FRSWAP_COMPRESS
3249 & , 1, MPI_DOUBLE_PRECISION,
3250 & MPI_SUM, MASTER, id%COMM, IERR)
3251 CALL MPI_REDUCE( TIME_CB_COMPRESS, TMP_TIME_CB_COMPRESS
3252 & , 1, MPI_DOUBLE_PRECISION,
3253 & MPI_SUM, MASTER, id%COMM, IERR)
3254 CALL MPI_REDUCE( TIME_DECOMP, TMP_TIME_DECOMP
3255 & , 1, MPI_DOUBLE_PRECISION,
3256 & MPI_SUM, MASTER, id%COMM, IERR)
3257 CALL MPI_REDUCE( TIME_DECOMP_UCFS, TMP_TIME_DECOMP_UCFS
3258 & , 1, MPI_DOUBLE_PRECISION,
3259 & MPI_SUM, MASTER, id%COMM, IERR)
3260 CALL MPI_REDUCE( TIME_DECOMP_ASM1, TMP_TIME_DECOMP_ASM1
3261 & , 1, MPI_DOUBLE_PRECISION,
3262 & MPI_SUM, MASTER, id%COMM, IERR)
3263 CALL MPI_REDUCE(TIME_DECOMP_LOCASM2, TMP_TIME_DECOMP_LOCASM2
3264 & , 1, MPI_DOUBLE_PRECISION,
3265 & MPI_SUM, MASTER, id%COMM, IERR)
3266 CALL MPI_REDUCE(TIME_DECOMP_MAPLIG1, TMP_TIME_DECOMP_MAPLIG1
3267 & , 1, MPI_DOUBLE_PRECISION,
3268 & MPI_SUM, MASTER, id%COMM, IERR)
3269 CALL MPI_REDUCE( TIME_DECOMP_ASMS2S, TMP_TIME_DECOMP_ASMS2S
3270 & , 1, MPI_DOUBLE_PRECISION,
3271 & MPI_SUM, MASTER, id%COMM, IERR)
3272 CALL MPI_REDUCE( TIME_DECOMP_ASMS2M, TMP_TIME_DECOMP_ASMS2M
3273 & , 1, MPI_DOUBLE_PRECISION,
3274 & MPI_SUM, MASTER, id%COMM, IERR)
3275 CALL MPI_REDUCE( TIME_PANEL, TMP_TIME_PANEL
3276 & , 1, MPI_DOUBLE_PRECISION,
3277 & MPI_SUM, MASTER, id%COMM, IERR)
3278 CALL MPI_REDUCE( TIME_FAC_I, TMP_TIME_FAC_I
3279 & , 1, MPI_DOUBLE_PRECISION,
3280 & MPI_SUM, MASTER, id%COMM, IERR)
3281 CALL MPI_REDUCE( TIME_FAC_MQ, TMP_TIME_FAC_MQ
3282 & , 1, MPI_DOUBLE_PRECISION,
3283 & MPI_SUM, MASTER, id%COMM, IERR)
3284 CALL MPI_REDUCE( TIME_FAC_SQ, TMP_TIME_FAC_SQ
3285 & , 1, MPI_DOUBLE_PRECISION,
3286 & MPI_SUM, MASTER, id%COMM, IERR)
3287 CALL MPI_REDUCE( TIME_LRTRSM, TMP_TIME_LRTRSM
3288 & , 1, MPI_DOUBLE_PRECISION,
3289 & MPI_SUM, MASTER, id%COMM, IERR)
3290 CALL MPI_REDUCE( TIME_FRTRSM, TMP_TIME_FRTRSM
3291 & , 1, MPI_DOUBLE_PRECISION,
3292 & MPI_SUM, MASTER, id%COMM, IERR)
3293 CALL MPI_REDUCE( TIME_FRFRONTS, TMP_TIME_FRFRONTS
3294 & , 1, MPI_DOUBLE_PRECISION,
3295 & MPI_SUM, MASTER, id%COMM, IERR)
3296 CALL MPI_REDUCE( TIME_LR_MODULE, TMP_TIME_LR_MODULE
3297 & , 1, MPI_DOUBLE_PRECISION,
3298 & MPI_SUM, MASTER, id%COMM, IERR)
3299.EQ. IF (id%MYIDMASTER) THEN
3300.GT. IF (id%NPROCS1) THEN
3301C rename the stat variable so that COMPUTE_GLOBAL_GAINS can work for any
3302C number of procs
3303 MRY_LU_FR = TMP_MRY_LU_FR
3304 MRY_LU_LRGAIN = TMP_MRY_LU_LRGAIN
3305 MRY_CB_FR = TMP_MRY_CB_FR
3306 MRY_CB_LRGAIN = TMP_MRY_CB_LRGAIN
3307 FLOP_LRGAIN = TMP_FLOP_LRGAIN
3308 FLOP_PANEL = TMP_FLOP_PANEL
3309 FLOP_TRSM = TMP_FLOP_TRSM
3310 FLOP_TRSM_FR = TMP_FLOP_TRSM_FR
3311 FLOP_TRSM_LR = TMP_FLOP_TRSM_LR
3312 FLOP_UPDATE_FR = TMP_FLOP_UPDATE_FR
3313 FLOP_UPDATE_LR = TMP_FLOP_UPDATE_LR
3314 FLOP_UPDATE_LRLR3 = TMP_FLOP_UPDATE_LRLR3
3315 FLOP_COMPRESS = TMP_FLOP_COMPRESS
3316 FLOP_MIDBLK_COMPRESS = TMP_FLOP_MIDBLK_COMPRESS
3317 FLOP_FRSWAP_COMPRESS = TMP_FLOP_FRSWAP_COMPRESS
3318 FLOP_ACCUM_COMPRESS = TMP_FLOP_ACCUM_COMPRESS
3319 FLOP_CB_COMPRESS = TMP_FLOP_CB_COMPRESS
3320 FLOP_DECOMPRESS = TMP_FLOP_DECOMPRESS
3321 FLOP_CB_DECOMPRESS = TMP_FLOP_CB_DECOMPRESS
3322 FLOP_FRFRONTS = TMP_FLOP_FRFRONTS
3323 FLOP_FACTO_FR = TMP_FLOP_FACTO_FR
3324 CNT_NODES = TMP_CNT_NODES
3325 TIME_UPDATE = TMP_TIME_UPDATE /id%NPROCS
3326 TIME_UPDATE_LRLR1 = TMP_TIME_UPDATE_LRLR1 /id%NPROCS
3327 TIME_UPDATE_LRLR2 = TMP_TIME_UPDATE_LRLR2 /id%NPROCS
3328 TIME_UPDATE_LRLR3 = TMP_TIME_UPDATE_LRLR3 /id%NPROCS
3329 TIME_UPDATE_FRLR = TMP_TIME_UPDATE_FRLR /id%NPROCS
3330 TIME_UPDATE_FRFR = TMP_TIME_UPDATE_FRFR /id%NPROCS
3331 TIME_COMPRESS = TMP_TIME_COMPRESS /id%NPROCS
3332 TIME_MIDBLK_COMPRESS = TMP_TIME_MIDBLK_COMPRESS/id%NPROCS
3333 TIME_FRSWAP_COMPRESS = TMP_TIME_FRSWAP_COMPRESS/id%NPROCS
3334 TIME_DIAGCOPY = TMP_TIME_DIAGCOPY /id%NPROCS
3335 TIME_CB_COMPRESS = TMP_TIME_CB_COMPRESS /id%NPROCS
3336 TIME_PANEL = TMP_TIME_PANEL /id%NPROCS
3337 TIME_FAC_I = TMP_TIME_FAC_I /id%NPROCS
3338 TIME_FAC_MQ = TMP_TIME_FAC_MQ /id%NPROCS
3339 TIME_FAC_SQ = TMP_TIME_FAC_SQ /id%NPROCS
3340 TIME_LRTRSM = TMP_TIME_LRTRSM /id%NPROCS
3341 TIME_FRTRSM = TMP_TIME_FRTRSM /id%NPROCS
3342 TIME_FRFRONTS = TMP_TIME_FRFRONTS /id%NPROCS
3343 TIME_LR_MODULE = TMP_TIME_LR_MODULE /id%NPROCS
3344 TIME_DECOMP = TMP_TIME_DECOMP /id%NPROCS
3345 TIME_DECOMP_UCFS = TMP_TIME_DECOMP_UCFS /id%NPROCS
3346 TIME_DECOMP_ASM1 = TMP_TIME_DECOMP_ASM1 /id%NPROCS
3347 TIME_DECOMP_LOCASM2 = TMP_TIME_DECOMP_LOCASM2 /id%NPROCS
3348 TIME_DECOMP_MAPLIG1 = TMP_TIME_DECOMP_MAPLIG1 /id%NPROCS
3349 TIME_DECOMP_ASMS2S = TMP_TIME_DECOMP_ASMS2S /id%NPROCS
3350 TIME_DECOMP_ASMS2M = TMP_TIME_DECOMP_ASMS2M /id%NPROCS
3351 ENDIF
3352 CALL COMPUTE_GLOBAL_GAINS(id%KEEP8(110),id%RINFOG(3),
3353 & id%KEEP8(49), PROKG, MPG)
3354C Number of entries in factor INFOG(35) in
3355C compressed form is updated as long as
3356C BLR is activated, this independently of the
3357C fact that factors are saved in LR.
3358 CALL MUMPS_SETI8TOI4(id%KEEP8(49), id%INFOG(35))
3359 FRONTWISE = 0
3360C WRITE gains also compute stats stored in DKEEP array
3361 IF (LPOK) THEN
3362 IF (CNTL(7) < 0.0D0) THEN
3363C Warning : using negative values is an experimental and
3364C non recommended setting.
3365 WRITE(LP,'(/a/,a/,a/,a,a)')
3366 & ' warning in blr input setting',
3367 & ' cntl(7) < 0 is experimental: ',
3368 & ' rrqr precision = |cntl(7| x ||a_pre||, ',
3369 & ' where a_pre is the preprocessed matrix as defined',
3370 & ' in the users guide '
3371 ENDIF
3372 ENDIF
3373 CALL saveandwrite_gains(frontwise,
3374 & keep(489), id%DKEEP, n, id%ICNTL(36),
3375 & keep(487), keep(488), keep(490),
3376 & keep(491), keep(50), keep(486),
3377 & keep(472), keep(475), keep(478), keep(480),
3378 & keep(481),
3379 & keep(483), keep(484),
3380 & id%KEEP8(110), id%KEEP8(49),
3381 & keep(28), id%NPROCS, mpg, prokg)
3382C flops when BLR activated
3383 rinfog(14) = id%DKEEP(56)
3384 ELSE
3385 rinfog(14) = 0.0d00
3386 ENDIF
3387 ENDIF
3388C ==============================
3389C NULL PIVOTS AND RANK-REVEALING
3390C ==============================
3391 IF(keep(110) .EQ. 1) THEN
3392C -- make available to users the local number of null pivots detected
3393C -- with ICNTL(24) = 1.
3394 id%INFO(18) = keep(109)
3395 CALL mpi_allreduce( keep(109), keep(112), 1, mpi_integer,
3396 & mpi_sum, id%COMM, ierr)
3397 ELSE
3398 id%INFO(18) = 0
3399 keep(109) = 0
3400 keep(112) = 0
3401 ENDIF
3402 IF (id%MYID.EQ.master) THEN
3403C INFOG(28) deficiency resulting from ICNTL(24) and ICNTL(56).
3404 infog(28)=keep(112)+keep(17)
3405 ENDIF
3406C ========================================
3407C We now provide to the host the part of
3408C PIVNUL_LIST resulting from the processing
3409C of the root node and we update id%INFO(18)
3410C on the processor holding the root to
3411C include null pivots relative to the root
3412C ========================================
3413 IF (keep(17) .NE. 0) THEN
3414 IF (id%MYID .EQ. id_root) THEN
3415C Include in id%INFO(18) null pivots resulting
3416C from deficiency on the root. In this way,
3417C the sum of all id%INFO(18) is equal to INFOG(28).
3418 id%INFO(18)=id%INFO(18)+keep(17)
3419 ENDIF
3420 IF (id_root .EQ. master) THEN
3421 IF (id%MYID.EQ.master) THEN
3422C --------------------------------------------------
3423C Null pivots of root have been stored in
3424C PIVNUL_LIST(KEEP(109)+1:KEEP(109)+KEEP(17).
3425C Shift them at the end of the list because:
3426C * this is what we need to build the null space
3427C * we would otherwise overwrite them on the host
3428C when gathering null pivots from other processors
3429C --------------------------------------------------
3430 DO i= keep(17), 1, -1
3431c DO I=1, KEEP(17) % incorrect
3432C when KEEP(112) < KEEP(109)+ KEEP(17)
3433 id%PIVNUL_LIST(keep(112)+i)=id%PIVNUL_LIST(keep(109)+i)
3434 ENDDO
3435 ENDIF
3436 ELSE
3437C ---------------------------------
3438C Null pivots of root must be sent
3439C from the processor responsible of
3440C the root to the host (or MASTER).
3441C ---------------------------------
3442 IF (id%MYID .EQ. id_root) THEN
3443 CALL mpi_send(id%PIVNUL_LIST(keep(109)+1), keep(17),
3444 & mpi_integer, master, zero_piv,
3445 & id%COMM, ierr)
3446 ELSE IF (id%MYID .EQ. master) THEN
3447 CALL mpi_recv(id%PIVNUL_LIST(keep(112)+1), keep(17),
3448 & mpi_integer, id_root, zero_piv,
3449 & id%COMM, status, ierr )
3450 ENDIF
3451 ENDIF
3452 ENDIF
3453C ===========================
3454C gather zero pivots indices
3455C on the host node
3456C ===========================
3457C In case of non working host, the following code also
3458C works considering that KEEP(109) is equal to 0 on
3459C the non-working host
3460 IF(keep(110) .EQ. 1) THEN
3461 ALLOCATE(itmp2(id%NPROCS),stat = ierr ) ! deallocated in 490
3462 IF ( ierr .GT. 0 ) THEN
3463 id%INFO(1)=-13
3464 id%INFO(2)=id%NPROCS
3465 END IF
3466 CALL mumps_propinfo( icntl(1), id%INFO(1),
3467 & id%COMM, id%MYID )
3468 IF (id%INFO(1).LT.0) GOTO 490
3469 CALL mpi_gather ( keep(109),1, mpi_integer,
3470 & itmp2(1), 1, mpi_integer,
3471 & master, id%COMM, ierr)
3472 IF(id%MYID .EQ. master) THEN
3473 posbuf = itmp2(1)+1
3474C First null pivot of master is in
3475C position 1 of global list
3476 keep(220)=1
3477 DO i = 1,id%NPROCS-1
3478 CALL mpi_recv(id%PIVNUL_LIST(posbuf), itmp2(i+1),
3479 & mpi_integer,i,
3480 & zero_piv, id%COMM, status, ierr)
3481C Send position POSBUF of first null pivot of proc I
3482C in global list. Will allow to quickly identify during
3483C the solve step if one is concerned by a global position
3484C K, 0 <= K <= INFOG(28).
3485 CALL mpi_send(posbuf, 1, mpi_integer, i, zero_piv,
3486 & id%COMM, ierr)
3487 posbuf = posbuf + itmp2(i+1)
3488 ENDDO
3489 ELSE
3490 CALL mpi_send( id%PIVNUL_LIST(1), keep(109), mpi_integer,
3491 & master,zero_piv, id%COMM, ierr)
3492 CALL mpi_recv( keep(220), 1, mpi_integer, master, zero_piv,
3493 & id%COMM, status, ierr )
3494 ENDIF
3495 ENDIF
3496C =====================================
3497C Statistics relative to min/max pivots
3498C =====================================
3499 CALL mpi_reduce( id%DKEEP(19), rinfog(19), 1,
3500 & mpi_double_precision,
3501 & mpi_min, master, id%COMM, ierr )
3502 CALL mpi_reduce( id%DKEEP(20), rinfog(20), 1,
3503 & mpi_double_precision,
3504 & mpi_min, master, id%COMM, ierr )
3505 CALL mpi_reduce( id%DKEEP(21), rinfog(21), 1,
3506 & mpi_double_precision,
3507 & mpi_max, master, id%COMM, ierr )
3508C =========================================
3509C Centralized number of swaps for pivoting
3510C =========================================
3511 CALL mpi_reduce( id%KEEP8(80), itemp8, 1, mpi_integer8,
3512 & mpi_sum, master, id%COMM, ierr )
3513 IF (id%MYID .EQ. master) THEN
3514 CALL mumps_seti8toi4(itemp8,id%INFOG(48))
3515 ENDIF
3516C ==========================================
3517C Centralized largest increase of panel size
3518C ==========================================
3519 CALL mpi_reduce( id%KEEP(425), id%INFOG(49), 1, mpi_integer,
3520 & mpi_max, master, id%COMM, ierr )
3521C =====================================
3522C Statistics concerning the determinant
3523C =====================================
3524C
3525C 1/ on the host better take into account null pivots if scaling:
3526C
3527C Since null pivots are excluded from the computation
3528C of the determinant, we also exclude the corresponding
3529C scaling entries. Since those entries have already been
3530C taken into account before the factorization, we multiply
3531C the determinant on the host by the scaling values corresponding
3532C to pivots in PIVNUL_LIST.
3533 IF (id%MYID.EQ.master .AND. lscal. and. keep(258).NE.0) THEN
3534 k = min(keep(143), keep(17))
3535 k = max(k, 0)
3536 DO i = 1, keep(112)+ k
3537c DO I = 1, id%INFOG(28) ! all null pivots + singular values
3539 & id%ROWSCA(id%PIVNUL_LIST(i)),
3540 & id%DKEEP(6), keep(259))
3542 & id%COLSCA(id%PIVNUL_LIST(i)),
3543 & id%DKEEP(6), keep(259))
3544 ENDDO
3545 ENDIF
3546C
3547C 2/ Swap signs depending on pivoting on each proc
3548C
3549 IF (keep(258).NE.0) THEN
3550C Return the determinant in INFOG(34) and RINFOG(12/13)
3551C In case of real arithmetic, initialize
3552C RINFOG(13) to 0 (no imaginary part and
3553C not touched by DMUMPS_DETER_REDUCTION)
3554 rinfog(13)=0.0d0
3555 IF (keep(260).EQ.-1) THEN ! Local to each processor
3556 id%DKEEP(6)=-id%DKEEP(6)
3557 ENDIF
3558C
3559C 3/ Perform a reduction
3560C
3562 & id%COMM, id%DKEEP(6), keep(259),
3563 & rinfog(12), infog(34), id%NPROCS)
3564C
3565C 4/ Swap sign if needed
3566C
3567 IF (id%KEEP(50).EQ.0 .AND. id%MYID.EQ. master) THEN
3568C Modify sign of determinant according
3569C to unsymmetric permutation (max-trans
3570C of max-weighted matching)
3571 IF (id%KEEP(23).NE.0) THEN
3573 & rinfog(12), id%N,
3574C id%STEP: used as workspace of size N still
3575C allocated on master; restored on exit
3576 & id%STEP(1),
3577 & id%UNS_PERM(1) )
3578C Remark that RINFOG(12/13) are modified only
3579C on the host but will be broadcast on exit
3580C from MUMPS (see DMUMPS_DRIVER)
3581 ENDIF
3582 ENDIF
3583 ENDIF
3584 490 IF (allocated(itmp2)) DEALLOCATE(itmp2)
3585 IF ( prokg ) THEN
3586C -----------------------------
3587C PRINT STATISTICS (on master)
3588C -----------------------------
3589 WRITE(mpg,99984) rinfog(2),rinfog(3),keep(52),
3590 & id%KEEP8(148),
3591 & id%KEEP8(128), infog(11), id%KEEP8(110)
3592 IF (id%KEEP(50) == 1 .OR. id%KEEP(50) == 2) THEN
3593 ! negative pivots
3594 WRITE(mpg, 99987) infog(12)
3595 END IF
3596 IF (id%KEEP(50) == 0) THEN
3597 ! off diag pivots
3598 WRITE(mpg, 99985) infog(12)
3599 END IF
3600 IF (id%KEEP(50) .NE. 1) THEN
3601 ! delayed pivots
3602 WRITE(mpg, 99982) infog(13)
3603 END IF
3604 IF (keep(97) .NE. 0) THEN
3605 ! tiny pivots
3606 WRITE(mpg, '(A,D16.4)')
3607 & ' Effective static pivoting thresh., CNTL(4) =', seuil
3608 WRITE(mpg, 99986) infog(25)
3609 ENDIF
3610 IF (id%KEEP(50) == 2) THEN
3611 !number of 2x2 pivots in type 1 nodes
3612 WRITE(mpg, 99988) keep(229)
3613 !number of 2x2 pivots in type 2 nodes
3614 WRITE(mpg, 99989) keep(230)
3615 ENDIF
3616 !number of zero pivots
3617 IF (keep(110) .NE.0) THEN
3618 WRITE(mpg, 99991) keep(112)
3619 ENDIF
3620 !Deficiency on root
3621 IF ( keep(19) .ne. 0 )
3622c IF ( KEEP(17) .ne. 0 )
3623 & WRITE(mpg, 99983) keep(17)
3624 !Total deficiency
3625 IF (keep(110).NE.0.OR.keep(19).NE.0)
3626c IF (KEEP(110).NE.0.OR.KEEP(17).NE.0)
3627 & WRITE(mpg, 99992) keep(17)+keep(112)
3628 ! Memory compress
3629 WRITE(mpg, 99981) infog(14)
3630 ! Extra copies due to ip stack in unsym case
3631 ! in core case (or OLD_OOC_PANEL)
3632 IF (id%KEEP8(108) .GT. 0_8) THEN
3633 WRITE(mpg, 99980) id%KEEP8(108)
3634 ENDIF
3635 IF ((keep(60).NE.0) .AND. infog(25).GT.0) THEN
3636 ! Schur on and tiny pivots set in last level
3637 ! before the Schur if KEEP(114)=0
3638 WRITE(mpg, '(A)')
3639 & " ** Warning Static pivoting was necessary"
3640 WRITE(mpg, '(A)')
3641 & " ** to factor interior variables with Schur ON"
3642 ENDIF
3643 IF (keep(258).NE.0) THEN
3644 WRITE(mpg,99978) rinfog(12)
3645 WRITE(mpg,99977) infog(34)
3646 ENDIF
3647 END IF
3648* ==========================================
3649*
3650* End of Factorization Phase
3651*
3652* ==========================================
3653C
3654C Goto 500 is done when
3655C LOAD_INIT
3656C OOC_INIT_FACTO
3657C MUMPS_FDM_INIT
3658#if ! defined(NO_FDM_DESCBAND)
3659C MUMPS_FDBD_INIT
3660#endif
3661#if ! defined(NO_FDM_MAPROW)
3662C MUMPS_FMRD_INIT
3663#endif
3664C are all called.
3665C
3666 500 CONTINUE
3667C Redo free DBLARR (as in end_driver.F)
3668C in case an error occurred after allocating
3669C DBLARR and before freeing it above.
3670 IF (id%KEEP(46).EQ.1 .AND.
3671 & id%KEEP(55).NE.0 .AND.
3672 & id%MYID.EQ.master .AND.
3673 & id%KEEP(52) .EQ. 0) THEN
3674 NULLIFY(id%DBLARR)
3675 ELSE
3676 IF (associated(id%DBLARR)) THEN
3677 DEALLOCATE(id%DBLARR)
3678 NULLIFY(id%DBLARR)
3679 ENDIF
3680 ENDIF
3681#if ! defined(NO_FDM_DESCBAND)
3682 IF (i_am_slave) THEN
3683 CALL mumps_fdbd_end(id%INFO(1)) ! INFO(1): input only
3684 ENDIF
3685#endif
3686#if ! defined(NO_FDM_MAPROW)
3687 IF (i_am_slave) THEN
3688 CALL mumps_fmrd_end(id%INFO(1)) ! INFO(1): input only
3689 ENDIF
3690#endif
3691 IF (i_am_slave) THEN
3692C Terminate BLR module except if it is still needed for solve
3693 IF (
3694 & (
3695 & (keep(486).EQ.2)
3696 & )
3697 & .AND. id%INFO(1).GE.0
3698 & ) THEN
3699C Store pointer to BLR_ARRAY in MUMPS structure
3700C (requires successful factorization otherwise module is freed)
3701 CALL dmumps_blr_mod_to_struc(id%BLRARRAY_ENCODING)
3702 ELSE
3703C INFO(1) positive or negative
3704 CALL dmumps_blr_end_module(id%INFO(1), id%KEEP8, id%KEEP(34))
3705 ENDIF
3706 ENDIF
3707 IF (i_am_slave) THEN
3708 CALL mumps_fdm_end('A')
3709C Terminate BLR module except if it is still needed for solve
3710 IF (
3711 & (
3712 & (keep(486).EQ.2)
3713 & )
3714 & .AND. id%INFO(1).GE.0
3715 & ) THEN
3716 CALL mumps_fdm_mod_to_struc('F', id%FDM_F_ENCODING,
3717 & id%INFO(1))
3718 IF (.NOT. associated(id%FDM_F_ENCODING)) THEN
3719 WRITE(*,*) "Internal error 2 in DMUMPS_FAC_DRIVER"
3720 ENDIF
3721 ELSE
3722 CALL mumps_fdm_end('F')
3723 ENDIF
3724 ENDIF
3725C
3726C Goto 514 is done when an
3727C error occurred in MUMPS_FDM_INIT
3728C or (after FDM_INIT but before
3729C OOC_INIT)
3730C
3731 514 CONTINUE
3732 IF ( i_am_slave ) THEN
3733 IF ((keep(201).EQ.1).OR.(keep(201).EQ.2)) THEN
3734 CALL dmumps_ooc_end_facto(id,ierr)
3735 IF (id%ASSOCIATED_OOC_FILES) THEN
3736 id%ASSOCIATED_OOC_FILES = .false.
3737 ENDIF
3738 IF (ierr.LT.0 .AND. id%INFO(1) .GE. 0) id%INFO(1) = ierr
3739 ENDIF
3740 IF (wk_user_provided) THEN
3741C at the end of a phase S is always freed when WK_USER provided
3742 NULLIFY(id%S)
3743 ELSE IF (keep(201).NE.0) THEN
3744C ----------------------------------------
3745C In OOC or if KEEP(201).EQ.-1 we always
3746C free S at end of factorization. As id%S
3747C may be unassociated in case of error
3748C during or before the allocation of id%S,
3749C we only free S when it was associated.
3750C ----------------------------------------
3751 IF (associated(id%S)) DEALLOCATE(id%S)
3752 NULLIFY(id%S) ! in all cases
3753 id%KEEP8(23)=0_8
3754 ENDIF
3755 ELSE ! host not working
3756 IF (wk_user_provided) THEN
3757C at the end of a phase S is always freed when WK_USER provided
3758 NULLIFY(id%S)
3759 ELSE
3760 IF (associated(id%S)) DEALLOCATE(id%S)
3761 NULLIFY(id%S) ! in all cases
3762 id%KEEP8(23)=0_8
3763 END IF
3764 END IF
3765C
3766C Goto 513 is done in case of error where LOAD_INIT was
3767C called but not OOC_INIT_FACTO.
3768 513 CONTINUE
3769 IF ( i_am_slave ) THEN
3770 CALL dmumps_load_end( id%INFO(1), id%NSLAVES, ierr )
3771 IF (ierr.LT.0 .AND. id%INFO(1) .GE. 0) id%INFO(1) = ierr
3772 ENDIF
3773 CALL mumps_propinfo( icntl(1), id%INFO(1),
3774 & id%COMM, id%MYID )
3775C
3776C Goto 517 is done when an error occurs when GPU initialization
3777C has been performed but not LOAD_INIT or OOC_INIT_FACTO
3778C
3779 517 CONTINUE
3780C
3781C Goto 530 is done when an error occurs before
3782C the calls to GPU_INIT, LOAD_INIT and OOC_INIT_FACTO
3783 530 CONTINUE
3784C Fwd in facto: free RHS_MUMPS in case
3785C it was allocated.
3786 IF (rhs_mumps_allocated) DEALLOCATE(rhs_mumps)
3787 NULLIFY(rhs_mumps)
3788C
3789 id%KEEP8(26) = keep826_save
3790 RETURN
3791 120 FORMAT(/' Local redistrib: data local/sent =',i16,i16)
3792 125 FORMAT(/' Redistrib: total data local/sent =',i16,i16)
3793 130 FORMAT(//'****** FACTORIZATION STEP ********'/)
3794 160 FORMAT(
3795 & /' Elapsed time to reformat/distribute matrix =',f12.4)
3796 166 FORMAT(' Max difference from 1 after scaling the entries',
3797 & ' for ONE-NORM (option 7/8) =',d9.2)
3798 170 FORMAT(' STATISTICS PRIOR NUMERICAL FACTORIZATION ...'/
3799 & ' Size of internal working array S =',i16/
3800 & ' Size of internal working array IS =',i16/
3801 & ' Minimum (ICNTL(14)=0) size of S =',i16/
3802 & ' Minimum (ICNTL(14)=0) size of IS =',i16/
3803 & ' Real space for original matrix =',i16/
3804 & ' Integer space for original matrix =',i16/
3805 & ' INFO(3) Real space for factors (estimated) =',i16/
3806 & ' INFO(4) Integer space for factors (estim.) =',i16/
3807 & ' Maximum frontal size (estimated) =',i16)
3808 172 FORMAT(' GLOBAL STATISTICS PRIOR NUMERICAL FACTORIZATION ...'/
3809 & ' Number of working processes =',i16/
3810 & ' ICNTL(22) Out-of-core option =',i16/
3811 & ' ICNTL(35) BLR activation (eff. choice) =',i16/
3812 & ' ICNTL(14) Memory relaxation =',i16/
3813 & ' INFOG(3) Real space for factors (estimated)=',i16/
3814 & ' INFOG(4) Integer space for factors (estim.)=',i16/
3815 & ' Maximum frontal size (estimated) =',i16/
3816 & ' Number of nodes in the tree =',i16/
3817 & ' ICNTL(23) Memory allowed (value on host) =',i16/
3818 & ' Sum over all procs =',i16/
3819 & ' Memory provided by user, sum of LWK_USER =',i16/
3820 & ' Effective threshold for pivoting, CNTL(1) =',d16.4)
3821 173 FORMAT( ' Perform forward during facto, NRHS =',i16)
3822 174 FORMAT( ' KEEP(268) Relaxed pivoting effective value =',i16)
3823 180 FORMAT(/' Elapsed time for factorization =',f12.4)
3824 185 FORMAT(/' Elapsed time for (failed) factorization =',f12.4)
3825 187 FORMAT( ' Elapsed time under L0 =',f12.4)
3826 188 FORMAT( ' Elapsed time under L0 (avg/max across MPI) =',
3827 & f12.4,f12.4)
3828 189 FORMAT(/' Flops under L0 layer =',1pd12.3)
3829 190 FORMAT(/' Flops under L0 Layer (avg/max across MPI) =',
3830 & 1pd12.3,1pd12.3)
383199977 FORMAT( ' INFOG(34) Determinant (base 2 exponent) =',i16)
383299978 FORMAT( ' RINFOG(12) Determinant (real part) =',f16.8)
383399980 FORMAT( ' Extra copies due to In-Place stacking =',i16)
383499981 FORMAT( ' INFOG(14) Number of memory compress =',i16)
383599982 FORMAT( ' INFOG(13) Number of delayed pivots =',i16)
383699983 FORMAT( ' Nb of singularities detected by ICNTL(56) =',i16)
383799991 FORMAT( ' Nb of null pivots detected by ICNTL(24) =',i16)
383899992 FORMAT( ' INFOG(28) Estimated deficiency =',i16)
383999984 FORMAT(/'Leaving factorization with ...'/
3840 & ' RINFOG(2) Operations in node assembly =',1pd10.3/
3841 & ' ------(3) Operations in node elimination =',1pd10.3/
3842 & ' ICNTL (8) Scaling effectively used =',i16/
3843 & ' INFOG (9) Real space for factors =',i16/
3844 & ' INFOG(10) Integer space for factors =',i16/
3845 & ' INFOG(11) Maximum front size =',i16/
3846 & ' INFOG(29) Number of entries in factors =',i16)
384799985 FORMAT( ' INFOG(12) Number of off diagonal pivots =',i16)
384899986 FORMAT( ' INFOG(25) Number of tiny pivots(static) =',i16)
384999987 FORMAT( ' INFOG(12) Number of negative pivots =',i16)
385099988 FORMAT( ' Number of 2x2 pivots in type 1 nodes =',i16)
385199989 FORMAT( ' Number of 2x2 pivots in type 2 nodes =',i16)
#define mumps_abort
Definition VE_Metis.h:25
subroutine mumps_propinfo(icntl, info, comm, id)
subroutine dmumps_facto_recv_arrowhd2(n, dblarr, ldblarr, intarr, lintarr, ptraiw, ptrarw, keep, keep8, myid, comm, nbrecords, a, la, root, procnode_steps, slavef, perm, frere_steps, step, info1, info2)
subroutine dmumps_facto_send_arrowheads(n, nz, aspk, irn, icn, perm, lscal, colsca, rowsca, myid, slavef, procnode_steps, nbrecords, lp, comm, root, keep, keep8, fils, rg2l, intarr, lintarr, dblarr, ldblarr, ptraiw, ptrarw, frere_steps, step, a, la, istep_to_iniv2, i_am_cand, candidates)
subroutine dmumps_free_id_data_modules(id_fdm_f_encoding, id_blrarray_encoding, keep8, k34)
subroutine dmumps_fac_b(n, s_is_pointers, la, liw, sym_perm, na, lna, ne_steps, nfsiz, fils, step, frere, dad, cand, istep_to_iniv2, tab_pos_in_pere, ptrar, ldptrar, ptrist, ptlust_s, ptrfac, iw1, iw2, itloc, rhs_mumps, pool, lpool, cntl1, icntl, info, rinfo, keep, keep8, procnode_steps, slavef, comm_nodes, myid, myid_nodes, bufr, lbufr, lbufr_bytes, dmumps_lbuf, intarr, dblarr, root, nelt, frtptr, frtelt, comm_load, ass_irecv, seuil, seuil_ldlt_niv2, mem_distrib, dkeep, pivnul_list, lpn_list, lrgroups, ipool_b_l0_omp, lpool_b_l0_omp, ipool_a_l0_omp, lpool_a_l0_omp, l_virt_l0_omp, virt_l0_omp, virt_l0_omp_mapping, l_phys_l0_omp, phys_l0_omp, perm_l0_omp, ptr_leafs_l0_omp, l0_omp_mapping, ll0_omp_mapping, thread_la, l0_omp_factors, ll0_omp_factors, i4_l0_omp, nbstats_i4, nbcols_i4, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition dfac_b.F:30
subroutine dmumps_deter_square(deter, nexp)
subroutine dmumps_updatedeter_scaling(piv, deter, nexp)
subroutine dmumps_deter_sign_perm(deter, n, visited, perm)
subroutine dmumps_deter_reduction(comm, deter_in, nexp_in, deter_out, nexp_out, nprocs)
subroutine dmumps_deter_scaling_inverse(deter, nexp)
subroutine dmumps_maxelt_size(eltptr, nelt, maxelt_size)
subroutine dmumps_elt_distrib(n, nelt, na_elt8, comm, myid, slavef, ielptr_loc8, relptr_loc8, eltvar_loc, eltval_loc, lintarr, ldblarr, keep, keep8, maxelt_size, frtptr, frtelt, a, la, fils, id, root)
subroutine dmumps_redistribution(n, nz_loc8, id, dblarr, ldblarr, intarr, lintarr, ptraiw, ptrarw, keep, keep8, myid, comm, nbrecords a, la, root, procnode_steps, slavef, perm, step, icntl, info, nsend8, nlocal8, istep_to_iniv2, candidates)
subroutine dmumps_print_allocated_mem(prok, prokg, print_maxavg, mp, mpg, info16, infog18, infog19, nslaves, irank, keep)
subroutine dmumps_avgmax_stat8(prokg, mpg, val, nslaves, print_maxavg, comm, msg)
subroutine dmumps_extract_schur_redrhs(id)
subroutine dmumps_anorminf(id, anorminf, lscal, eff_size_schur)
subroutine dmumps_fac_a(n, nz8, nsca, aspk, irn, icn, colsca, rowsca, wk, lwk8, wk_real, lwk_real, icntl, info)
subroutine dmumps_simscaleabs(irn_loc, jcn_loc, a_loc, nz_loc, m, n, numprocs, myid, comm, rpartvec, cpartvec, rsndrcvsz, csndrcvsz, registre, iwrk, iwrksz, intsz, resz, op, rowsca, colsca, wrkrc, iszwrkrc, sym, nb1, nb2, nb3, eps, onenormerr, infnormerr)
end diagonal values have been computed in the(sparse) matrix id.SOL
subroutine dmumps_max_mem(keep, keep8, myid, n, nelt, na, lna, nnz8, na_elt8, nslaves, memory_mbytes, eff, ooc_strat, blr_strat, perlu_on, memory_bytes, blr_case, sum_of_peaks, mem_eff_allocated, under_l0_omp, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition dtools.F:638
subroutine dmumps_set_blrstrat_and_maxs_k8(maxs_base8, maxs_base_relaxed8, blr_strat, keep, keep8)
Definition dtools.F:1165
subroutine dmumps_mem_allowed_set_maxs(maxs, blr_strat, ooc_strat, maxs_estim_relaxed8, keep, keep8, myid, n, nelt, na, lna, nslaves, icntl38, icntl39, iflag, ierror, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition dtools.F:1250
subroutine dmumps_mem_allowed_set_k75(maxs, myid, under_l0_omp, n, nelt, na, lna, nslaves, blr_strat, ooc_strat, keep, keep8, iflag, ierror, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition dtools.F:1432
subroutine dmumps_l0_compute_peak_allowed(myid, n, nelt, na, lna, nslaves, blr_strat, ooc_strat, keep, keep8, iflag, ierror, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition dtools.F:1479
subroutine dmumps_init_root_fac(n, root, fils, iroot, keep, info)
subroutine mpi_comm_split(comm, color, key, comm2, ierr)
Definition mpi.f:272
subroutine mpi_allreduce(sendbuf, recvbuf, cnt, datatype, operation, comm, ierr)
Definition mpi.f:103
subroutine mpi_comm_size(comm, size, ierr)
Definition mpi.f:263
subroutine mpi_bcast(buffer, cnt, datatype, root, comm, ierr)
Definition mpi.f:205
subroutine mpi_gather(sendbuf, cnt, datatype, recvbuf, reccnt, rectype, root, comm, ierr)
Definition mpi.f:56
subroutine mpi_comm_free(comm, ierr)
Definition mpi.f:238
subroutine mpi_comm_rank(comm, rank, ierr)
Definition mpi.f:254
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine, public dmumps_buf_deall_small_buf(ierr)
subroutine, public dmumps_buf_alloc_small_buf(size, ierr)
subroutine, public dmumps_buf_deall_max_array()
subroutine, public dmumps_buf_max_array_minsize(nfs4father, ierr)
subroutine, public dmumps_buf_dist_irecv_size(dmumps_lbufr_bytes)
subroutine, public dmumps_buf_ini_myid(myid)
subroutine, public dmumps_init_l0_omp_factors(id_l0_omp_factors)
subroutine, public dmumps_free_l0_omp_factors(id_l0_omp_factors)
subroutine, public dmumps_load_end(info1, nslaves, ierr)
subroutine, public dmumps_load_set_inicost(cost_subtree_arg, k64, dk15, k375, maxs)
subroutine, public dmumps_load_init(id, memory_md_arg, maxs)
subroutine, public dmumps_load_update(check_flops, process_bande, inc_load, keep, keep8)
subroutine, public dmumps_blr_mod_to_struc(id_blrarray_encoding)
subroutine, public dmumps_blr_init_module(initial_size, info)
subroutine, public dmumps_blr_end_module(info1, keep8, k34, lrsolve_act_opt)
subroutine saveandwrite_gains(local, k489, dkeep, n, icntl36, depth, bcksz, nassmin, nfrontmin, sym, k486, k472, k475, k478, k480, k481, k483, k484, k8110, k849, nbtreenodes, nprocs, mpg, prokg)
Definition dlr_stats.F:578
subroutine init_stats_global(id)
Definition dlr_stats.F:344
subroutine, public dmumps_ooc_init_facto(id, maxs)
Definition dmumps_ooc.F:114
subroutine dmumps_ooc_end_facto(id, ierr)
Definition dmumps_ooc.F:459
subroutine dmumps_ooc_clean_pending(ierr)
Definition dmumps_ooc.F:446
integer used
Definition dmumps_ooc.F:20
subroutine dmumps_clean_ooc_data(id, ierr)
Definition dmumps_ooc.F:568
subroutine, public mumps_fdbd_init(initial_size, info)
subroutine, public mumps_fdbd_end(info1)
subroutine, public mumps_fmrd_init(initial_size, info)
subroutine, public mumps_fmrd_end(info1)
subroutine, public mumps_fdm_mod_to_struc(what, id_fdm_encoding, info)
subroutine, public mumps_fdm_init(what, initial_size, info)
subroutine, public mumps_fdm_end(what)
subroutine mumps_secfin(t)
subroutine mumps_seti8toi4(i8, i)
integer function mumps_get_pool_length(max_active_nodes, keep, keep8)
subroutine mumps_set_ierror(size8, ierror)
subroutine mumps_mem_centralize(myid, comm, info, infog, irank)
subroutine mumps_secdeb(t)
subroutine mumps_npiv_critical_path(n, nsteps, step, frere, fils, na, lna, ne, maxnpivtree)

◆ dmumps_print_allocated_mem()

subroutine dmumps_print_allocated_mem ( logical, intent(in) prok,
logical, intent(in) prokg,
logical, intent(in) print_maxavg,
integer, intent(in) mp,
integer, intent(in) mpg,
integer, intent(in) info16,
integer, intent(in) infog18,
integer, intent(in) infog19,
integer, intent(in) nslaves,
integer, intent(in) irank,
integer, dimension(500), intent(in) keep )

Definition at line 3854 of file dfac_driver.F.

3856 IMPLICIT NONE
3857C
3858C Purpose:
3859C =======
3860C Print memory allocated during factorization
3861C - called at beginning of factorization in full-rank
3862C - called at end of factorization in low-rank (because
3863C of dynamic allocations)
3864C
3865 LOGICAL, INTENT(IN) :: PROK, PROKG, PRINT_MAXAVG
3866 INTEGER, INTENT(IN) :: MP, MPG, INFO16, INFOG18, INFOG19
3867 INTEGER, INTENT(IN) :: IRANK, NSLAVES
3868 INTEGER, INTENT(IN) :: KEEP(500)
3869C
3870 IF ( prokg ) THEN
3871 IF (print_maxavg) THEN
3872 WRITE( mpg,'(A,I12) ')
3873 & ' ** Memory allocated, max in Mbytes (INFOG(18)):',
3874 & infog18
3875 ENDIF
3876 WRITE( mpg,'(/A,I12) ')
3877 & ' ** Memory allocated, total in Mbytes (INFOG(19)):',
3878 & infog19
3879 END IF
3880 RETURN