OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
cfac_driver.F File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine cmumps_fac_driver (id)
subroutine cmumps_print_allocated_mem (prok, prokg, print_maxavg, mp, mpg, info16, infog18, infog19, nslaves, irank, keep)
subroutine cmumps_avgmax_stat8 (prokg, mpg, val, nslaves, print_maxavg, comm, msg)
subroutine cmumps_extract_schur_redrhs (id)

Function/Subroutine Documentation

◆ cmumps_avgmax_stat8()

subroutine cmumps_avgmax_stat8 ( logical, intent(in) prokg,
integer, intent(in) mpg,
integer(8), intent(in) val,
integer, intent(in) nslaves,
logical, intent(in) print_maxavg,
integer, intent(in) comm,
character*48 msg )

Definition at line 3879 of file cfac_driver.F.

3881 IMPLICIT NONE
3882 include 'mpif.h'
3883 LOGICAL, intent(in) :: PROKG
3884 INTEGER, intent(in) :: MPG
3885 INTEGER(8), intent(in) :: VAL
3886 INTEGER, intent(in) :: NSLAVES
3887 LOGICAL, intent(in) :: PRINT_MAXAVG
3888 INTEGER, intent(in) :: COMM
3889 CHARACTER*48 MSG
3890C Local
3891 INTEGER(8) MAX_VAL
3892 INTEGER IERR, MASTER
3893 REAL LOC_VAL, AVG_VAL
3894 parameter(master=0)
3895C
3896 CALL mumps_reducei8( val, max_val, mpi_max, master, comm)
3897 loc_val = real(val)/real(nslaves)
3898 CALL mpi_reduce( loc_val, avg_val, 1, mpi_real,
3899 & mpi_sum, master, comm, ierr )
3900 IF (prokg) THEN
3901 IF (print_maxavg) THEN
3902 WRITE(mpg,100) " Average", msg, int(avg_val,8)
3903 ELSE
3904 WRITE(mpg,110) msg, max_val
3905 ENDIF
3906 ENDIF
3907 RETURN
3908 100 FORMAT(a8,a48,i18)
3909 110 FORMAT(a48,i18)
subroutine mpi_reduce(sendbuf, recvbuf, cnt, datatype, op, root, comm, ierr)
Definition mpi.f:120
subroutine mumps_reducei8(in, out, mpi_op, root, comm)

◆ cmumps_extract_schur_redrhs()

subroutine cmumps_extract_schur_redrhs ( type(cmumps_struc) id)

Definition at line 3912 of file cfac_driver.F.

3914 IMPLICIT NONE
3915C
3916C Purpose
3917C =======
3918C
3919C Extract the Schur and possibly also the reduced right-hand side
3920C (if Fwd in facto) from the processor working on Schur and copy
3921C it into the user datastructures id%SCHUR and id%REDRHS on the host.
3922C This routine assumes that the integer list of the Schur has not
3923C been permuted and still corresponds to LISTVAR_SCHUR.
3924C
3925C If the Schur is centralized, the master of the Schur holds the
3926C Schur and possibly also the reduced right-hand side.
3927C If the Schur is distribued (already built in user's datastructure),
3928C then the master of the Schur may hold the reduced right-hand side,
3929C in which case it is available in root%RHS_CNTR_MASTER_ROOT.
3930C
3931 TYPE(CMUMPS_STRUC) :: id
3932C
3933C Local variables
3934C ===============
3935C
3936 include 'mpif.h'
3937 include 'mumps_tags.h'
3938 include 'mumps_headers.h'
3939 INTEGER :: STATUS(MPI_STATUS_SIZE)
3940 INTEGER :: IERR
3941 INTEGER, PARAMETER :: MASTER = 0
3942 INTEGER :: ID_SCHUR, SIZE_SCHUR, LD_SCHUR, IB, BL4
3943 INTEGER(4) :: I4 ! 32-bit even in 64-bit version
3944 INTEGER :: ROW_LENGTH, I
3945 INTEGER(8) :: SURFSCHUR8, BL8, SHIFT8
3946 INTEGER(8) :: ISCHUR_SRC, ISCHUR_DEST, ISCHUR_SYM, ISCHUR_UNS
3947C
3948C External functions
3949C ==================
3950C
3951 INTEGER MUMPS_PROCNODE
3952 EXTERNAL mumps_procnode
3953C Quick return in case factorization did not terminate correctly
3954 IF (id%INFO(1) .LT. 0) RETURN
3955C Quick return if Schur option off
3956 IF (id%KEEP(60) .EQ. 0) RETURN
3957C Get Schur id
3958 id_schur =mumps_procnode(
3959 & id%PROCNODE_STEPS(id%STEP(max(id%KEEP(20),id%KEEP(38)))),
3960 & id%KEEP(199))
3961 IF ( id%KEEP( 46 ) .NE. 1 ) THEN
3962 id_schur = id_schur + 1
3963 END IF
3964C Get size of Schur
3965 IF (id%MYID.EQ.id_schur) THEN
3966 IF (id%KEEP(60).EQ.1) THEN
3967C Sequential Schur
3968 ld_schur =
3969 & id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))+2+id%KEEP(ixsz))
3970 size_schur = ld_schur - id%KEEP(253)
3971 ELSE
3972C Parallel Schur
3973 ld_schur = -999999 ! not used
3974 size_schur = id%root%TOT_ROOT_SIZE
3975 ENDIF
3976 ELSE IF (id%MYID .EQ. master) THEN
3977 size_schur = id%KEEP(116)
3978 ld_schur = -44444 ! Not used
3979 ELSE
3980C Proc is not concerned with Schur, return
3981 RETURN
3982 ENDIF
3983 surfschur8 = int(size_schur,8)*int(size_schur,8)
3984C =================================
3985C Case of parallel Schur: if REDRHS
3986C was requested, obtain it directly
3987C from id%root%RHS_CNTR_MASTER_ROOT
3988C =================================
3989 IF (id%KEEP(60) .GT. 1) THEN
3990 IF (id%KEEP(221).EQ.1 .AND. id%KEEP(252).GT.0) THEN
3991 DO i = 1, id%KEEP(253)
3992 IF (id_schur.EQ.master) THEN ! Necessarily = id%MYID
3993 CALL ccopy(size_schur,
3994 & id%root%RHS_CNTR_MASTER_ROOT((i-1)*size_schur+1), 1,
3995 & id%REDRHS((i-1)*id%LREDRHS+1), 1)
3996 ELSE
3997 IF (id%MYID.EQ.id_schur) THEN
3998C Send
3999 CALL mpi_send(
4000 & id%root%RHS_CNTR_MASTER_ROOT((i-1)*size_schur+1),
4001 & size_schur,
4002 & mpi_complex,
4003 & master, tag_schur,
4004 & id%COMM, ierr )
4005 ELSE ! MYID.EQ.MASTER
4006C Receive
4007 CALL mpi_recv( id%REDRHS((i-1)*id%LREDRHS+1),
4008 & size_schur,
4009 & mpi_complex, id_schur, tag_schur,
4010 & id%COMM, status, ierr )
4011 ENDIF
4012 ENDIF
4013 ENDDO
4014C ------------------------------
4015C In case of parallel Schur, we
4016C free root%RHS_CNTR_MASTER_ROOT
4017C ------------------------------
4018 IF (id%MYID.EQ.id_schur) THEN
4019 DEALLOCATE(id%root%RHS_CNTR_MASTER_ROOT)
4020 NULLIFY (id%root%RHS_CNTR_MASTER_ROOT)
4021 ENDIF
4022 ENDIF
4023C return because this is all we need to do
4024C in case of parallel Schur complement
4025 RETURN
4026 ENDIF
4027C ============================
4028C Centralized Schur complement
4029C ============================
4030C PTRAST has been freed at the moment of calling this
4031C routine. Schur is available through
4032C PTRFAC(IW( PTLUST_S( STEP(KEEP(20)) ) + 4 +KEEP(IXSZ) ))
4033 IF (id%KEEP(252).EQ.0) THEN
4034C CASE 1 (ORIGINAL CODE):
4035C Schur is contiguous on ID_SCHUR
4036 IF ( id_schur .EQ. master ) THEN ! Necessarily equals id%MYID
4037C ---------------------
4038C Copy Schur complement
4039C ---------------------
4040 CALL cmumps_copyi8size( surfschur8,
4041 & id%S(id%PTRFAC(id%STEP(id%KEEP(20)))),
4042 & id%SCHUR(1) )
4043 ELSE
4044C -----------------------------------------
4045C The processor responsible of the Schur
4046C complement sends it to the host processor
4047C Use blocks to avoid too large messages.
4048C -----------------------------------------
4049 bl8=int(huge(i4)/id%KEEP(35)/10,8)
4050 DO ib=1, int((surfschur8+bl8-1_8) / bl8)
4051 shift8 = int(ib-1,8) * bl8 ! Where to send
4052 bl4 = int(min(bl8,surfschur8-shift8)) ! Size of block
4053 IF ( id%MYID .eq. id_schur ) THEN
4054C Send Schur complement
4055 CALL mpi_send( id%S( shift8 +
4056 & id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))
4057 & +4+id%KEEP(ixsz)))),
4058 & bl4,
4059 & mpi_complex,
4060 & master, tag_schur,
4061 & id%COMM, ierr )
4062 ELSE IF ( id%MYID .eq. master ) THEN
4063C Receive Schur complement
4064 CALL mpi_recv( id%SCHUR(1_8 + shift8),
4065 & bl4,
4066 & mpi_complex, id_schur, tag_schur,
4067 & id%COMM, status, ierr )
4068 END IF
4069 ENDDO
4070 END IF
4071 ELSE
4072C CASE 2 (Fwd in facto): Schur is not contiguous on ID_SCHUR,
4073C process it row by row.
4074C
4075C 2.1: We first centralize Schur complement into id%SCHUR
4076 ischur_src = id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))
4077 & +4+id%KEEP(ixsz)))
4078 ischur_dest= 1_8
4079 DO i=1, size_schur
4080 row_length = size_schur
4081 IF (id_schur.EQ.master) THEN ! Necessarily = id%MYID
4082 CALL ccopy(row_length, id%S(ischur_src), 1,
4083 & id%SCHUR(ischur_dest),1)
4084 ELSE
4085 IF (id%MYID.EQ.id_schur) THEN
4086C Send
4087 CALL mpi_send( id%S(ischur_src), row_length,
4088 & mpi_complex,
4089 & master, tag_schur,
4090 & id%COMM, ierr )
4091 ELSE
4092C Recv
4093 CALL mpi_recv( id%SCHUR(ischur_dest),
4094 & row_length,
4095 & mpi_complex, id_schur, tag_schur,
4096 & id%COMM, status, ierr )
4097 ENDIF
4098 ENDIF
4099 ischur_src = ischur_src+int(ld_schur,8)
4100 ischur_dest= ischur_dest+int(size_schur,8)
4101 ENDDO
4102C 2.2: Get REDRHS on host
4103C 2.2.1: Symmetric => REDRHS is available in last KEEP(253)
4104C rows of Schur structure on ID_SCHUR
4105C 2.2.2: Unsymmetric => REDRHS corresponds to last KEEP(253)
4106C columns. However it must be transposed.
4107 IF (id%KEEP(221).EQ.1) THEN ! Implies Fwd in facto
4108 ischur_sym = id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))
4109 & +4+id%KEEP(ixsz))) + int(size_schur,8) *
4110 & int(ld_schur,8)
4111 ischur_uns =
4112 & id%PTRFAC(id%IS(id%PTLUST_S(id%STEP(id%KEEP(20)))
4113 & +4+id%KEEP(ixsz))) + int(size_schur,8)
4114 ischur_dest = 1_8
4115 DO i = 1, id%KEEP(253)
4116 IF (id_schur .EQ. master) THEN ! necessarily = id%MYID
4117 IF (id%KEEP(50) .EQ. 0) THEN
4118 CALL ccopy(size_schur, id%S(ischur_uns), ld_schur,
4119 & id%REDRHS(ischur_dest), 1)
4120 ELSE
4121 CALL ccopy(size_schur, id%S(ischur_sym), 1,
4122 & id%REDRHS(ischur_dest), 1)
4123 ENDIF
4124 ELSE
4125 IF (id%MYID .NE. master) THEN
4126 IF (id%KEEP(50) .EQ. 0) THEN
4127C Use id%S(ISCHUR_SYM) as temporary contig. workspace
4128C of size SIZE_SCHUR.
4129 CALL ccopy(size_schur, id%S(ischur_uns), ld_schur,
4130 & id%S(ischur_sym), 1)
4131 ENDIF
4132 CALL mpi_send(id%S(ischur_sym), size_schur,
4133 & mpi_complex, master, tag_schur,
4134 & id%COMM, ierr )
4135 ELSE
4136 CALL mpi_recv(id%REDRHS(ischur_dest),
4137 & size_schur, mpi_complex, id_schur, tag_schur,
4138 & id%COMM, status, ierr )
4139 ENDIF
4140 ENDIF
4141 IF (id%KEEP(50).EQ.0) THEN
4142 ischur_uns = ischur_uns + int(ld_schur,8)
4143 ELSE
4144 ischur_sym = ischur_sym + int(ld_schur,8)
4145 ENDIF
4146 ischur_dest = ischur_dest + int(id%LREDRHS,8)
4147 ENDDO
4148 ENDIF
4149 ENDIF
4150 RETURN
subroutine cmumps_copyi8size(n8, src, dest)
Definition ctools.F:1823
subroutine ccopy(n, cx, incx, cy, incy)
CCOPY
Definition ccopy.f:81
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine mpi_recv(buf, cnt, datatype, source, tag, comm, status, ierr)
Definition mpi.f:461
subroutine mpi_send(buf, cnt, datatype, dest, tag, comm, ierr)
Definition mpi.f:480
initmumps id
integer function mumps_procnode(procinfo_inode, k199)

◆ cmumps_fac_driver()

subroutine cmumps_fac_driver ( type(cmumps_struc), target id)

Definition at line 14 of file cfac_driver.F.

15 USE cmumps_buf
16 USE cmumps_load
17 USE cmumps_ooc
26#if ! defined(NO_FDM_DESCBAND)
28#endif
29#if ! defined(NO_FDM_MAPROW)
31#endif
32!$ USE OMP_LIB
33C Derived datatype to pass pointers with implicit interfaces
35 IMPLICIT NONE
36C
37C Purpose
38C =======
39C
40C Performs scaling, sorting in arrowhead, then
41C distributes the matrix, and perform
42C factorization.
43C
44C
45 INTERFACE
46 SUBROUTINE cmumps_anorminf(id, ANORMINF, LSCAL, EFF_SIZE_SCHUR)
48 TYPE (CMUMPS_STRUC), TARGET :: id
49 REAL, INTENT(OUT) :: ANORMINF
50 LOGICAL, INTENT(IN) :: LSCAL
51 INTEGER, INTENT(IN) :: EFF_SIZE_SCHUR
52 END SUBROUTINE cmumps_anorminf
53 SUBROUTINE cmumps_free_id_data_modules(id_FDM_F_ENCODING,
54 & id_BLRARRAY_ENCODING, KEEP8, K34)
55# if defined(MUMPS_F2003)
56 CHARACTER, DIMENSION(:), POINTER, intent(inout) ::
57 & id_blrarray_encoding
58 CHARACTER, DIMENSION(:), POINTER, intent(inout) ::
59 & id_fdm_f_encoding
60# else
61 CHARACTER, DIMENSION(:), POINTER :: id_BLRARRAY_ENCODING
62 CHARACTER, DIMENSION(:), POINTER :: id_FDM_F_ENCODING
63# endif
64 INTEGER(8), intent(inout) :: KEEP8(150)
65 INTEGER, intent(in) :: K34
66 END SUBROUTINE cmumps_free_id_data_modules
67 END INTERFACE
68C
69C Parameters
70C ==========
71C
72 TYPE(CMUMPS_STRUC), TARGET :: id
73C
74C MPI
75C ===
76C
77 include 'mpif.h'
78 include 'mumps_tags.h'
79 INTEGER :: STATUS(MPI_STATUS_SIZE)
80 INTEGER :: IERR
81 INTEGER, PARAMETER :: MASTER = 0
82C
83C Local variables
84C ===============
85C
86 include 'mumps_headers.h'
87 INTEGER(8) :: NSEND8, NSEND_TOT8
88 INTEGER(8) :: NLOCAL8, NLOCAL_TOT8
89 INTEGER(4) :: I4
90 INTEGER :: LDPTRAR, NELT_arg, NBRECORDS
91 INTEGER :: ITMP, JTMP
92 INTEGER :: KEEP464COPY, KEEP465COPY
93 REAL :: RATIOK465
94 INTEGER(8) :: KEEP826_SAVE
95 INTEGER(8) :: K67, K68, K70, K74, K75
96 INTEGER(8) ITMP8
97 INTEGER MUMPS_PROCNODE
98 EXTERNAL mumps_procnode
99 INTEGER MP, LP, MPG, allocok
100 LOGICAL PROK, PROKG, LSCAL, LPOK, COMPUTE_ANORMINF
101C Reception buffer
102 INTEGER :: CMUMPS_LBUFR, CMUMPS_LBUFR_BYTES
103 INTEGER(8) :: CMUMPS_LBUFR_BYTES8 ! for intermediate computation
104 INTEGER, ALLOCATABLE, DIMENSION(:) :: BUFR
105C Size of send buffers (in bytes)
106 INTEGER :: CMUMPS_LBUF, CMUMPS_LBUF_INT
107 INTEGER(8) :: CMUMPS_LBUF8 ! for intermediate computation
108C
109 INTEGER PTRIST, PTRWB, MAXELT_SIZE,
110 & itloc, ipool, k28, lpool
111 INTEGER IRANK, ID_ROOT
112 INTEGER KKKK
113 INTEGER(8) :: NZ_locMAX8
114 INTEGER(8) MEMORY_MD_ARG
115 INTEGER(8) MAXS_BASE8, MAXS_BASE_RELAXED8
116 REAL CNTL4, AVG_FLOPS
117 INTEGER MIN_PERLU, MAXIS_ESTIM
118 INTEGER SUM_INFO22_THIS_NODE, MAX_SUM_INFO22_THIS_NODE
119C
120 TYPE (S_IS_POINTERS_T) :: S_IS_POINTERS
121 INTEGER MAXIS
122 INTEGER(8) :: MAXS
123C For S argument to arrowhead routines:
124 INTEGER(8) :: MAXS_ARG
125 COMPLEX, TARGET :: S_DUMMY_ARG(1)
126 COMPLEX, POINTER, DIMENSION(:) :: S_PTR_ARG
127 INTEGER NB_THREADS, NOMP
128 DOUBLE PRECISION TIMEAVG, TIMEMAX,
129 & flopavg, flopmax
130 REAL TMPTIME, TMPFLOP
131 INTEGER NPIV_CRITICAL_PATH, EFF_SIZE_SCHUR
132 DOUBLE PRECISION TIME, TIMEET
133 REAL ZERO, ONE, MONE
134 parameter( zero = 0.0e0, one = 1.0e0, mone = -1.0e0)
135 COMPLEX CZERO
136 parameter( czero = (0.0e0, 0.0e0) )
137 INTEGER PERLU, TOTAL_MBYTES, K231, K232, K233, BLR_STRAT
138 INTEGER, PARAMETER :: IDUMMY = -9999
139 LOGICAL, PARAMETER :: BDUMMY =.false.
140 INTEGER, PARAMETER :: PANEL_TABSIZE = 20
141 INTEGER COLOUR, COMM_FOR_SCALING ! For Simultaneous scaling
142 INTEGER LIWK, LWK_REAL
143 INTEGER(8) :: LWK
144C I_AM_SLAVE: used to determine if proc has the role of a slave
145C WK_USER_PROVIDED is set to true when WK_USER is provided by user
146 LOGICAL I_AM_SLAVE, PERLU_ON, WK_USER_PROVIDED, EARLYT3ROOTINS
147 LOGICAL PRINT_MAXAVG, PRINT_NODEINFO
148 REAL :: ANORMINF, SEUIL, SEUIL_LDLT_NIV2, Thresh_Seuil
149 REAL :: CNTL1, CNTL3, CNTL5, CNTL6, EPS
150 INTEGER N, LPN_LIST,POSBUF
151 INTEGER, DIMENSION (:), ALLOCATABLE :: ITMP2
152 INTEGER I,K
153 INTEGER(8) :: ITEMP8
154 INTEGER :: PARPIV_T1
155 INTEGER FRONTWISE
156C temporary variables for collecting stats from all processors
157 DOUBLE PRECISION :: TMP_MRY_LU_FR
158 DOUBLE PRECISION :: TMP_MRY_LU_LRGAIN
159 DOUBLE PRECISION :: TMP_MRY_CB_FR
160 DOUBLE PRECISION :: TMP_MRY_CB_LRGAIN
161 DOUBLE PRECISION :: TMP_FLOP_LRGAIN
162 DOUBLE PRECISION :: TMP_FLOP_TRSM
163 DOUBLE PRECISION :: TMP_FLOP_PANEL
164 DOUBLE PRECISION :: TMP_FLOP_FRFRONTS
165 DOUBLE PRECISION :: TMP_FLOP_TRSM_FR
166 DOUBLE PRECISION :: TMP_FLOP_TRSM_LR
167 DOUBLE PRECISION :: TMP_FLOP_UPDATE_FR
168 DOUBLE PRECISION :: TMP_FLOP_UPDATE_LR
169 DOUBLE PRECISION :: TMP_FLOP_UPDATE_LRLR3
170 DOUBLE PRECISION :: TMP_FLOP_COMPRESS
171 DOUBLE PRECISION :: TMP_FLOP_DECOMPRESS
172 DOUBLE PRECISION :: TMP_FLOP_MIDBLK_COMPRESS
173 DOUBLE PRECISION :: TMP_FLOP_FRSWAP_COMPRESS
174 DOUBLE PRECISION :: TMP_FLOP_ACCUM_COMPRESS
175 DOUBLE PRECISION :: TMP_FLOP_CB_COMPRESS
176 DOUBLE PRECISION :: TMP_FLOP_CB_DECOMPRESS
177 DOUBLE PRECISION :: TMP_FLOP_FACTO_FR
178 INTEGER :: TMP_CNT_NODES
179 DOUBLE PRECISION :: TMP_TIME_UPDATE
180 DOUBLE PRECISION :: TMP_TIME_UPDATE_LRLR1
181 DOUBLE PRECISION :: TMP_TIME_UPDATE_LRLR2
182 DOUBLE PRECISION :: TMP_TIME_UPDATE_LRLR3
183 DOUBLE PRECISION :: TMP_TIME_UPDATE_FRLR
184 DOUBLE PRECISION :: TMP_TIME_UPDATE_FRFR
185 DOUBLE PRECISION :: TMP_TIME_COMPRESS
186 DOUBLE PRECISION :: TMP_TIME_MIDBLK_COMPRESS
187 DOUBLE PRECISION :: TMP_TIME_FRSWAP_COMPRESS
188 DOUBLE PRECISION :: TMP_TIME_CB_COMPRESS
189 DOUBLE PRECISION :: TMP_TIME_PANEL
190 DOUBLE PRECISION :: TMP_TIME_FAC_I
191 DOUBLE PRECISION :: TMP_TIME_FAC_MQ
192 DOUBLE PRECISION :: TMP_TIME_FAC_SQ
193 DOUBLE PRECISION :: TMP_TIME_LRTRSM
194 DOUBLE PRECISION :: TMP_TIME_FRTRSM
195 DOUBLE PRECISION :: TMP_TIME_FRFRONTS
196 DOUBLE PRECISION :: TMP_TIME_LR_MODULE
197 DOUBLE PRECISION :: TMP_TIME_DIAGCOPY
198 DOUBLE PRECISION :: TMP_TIME_DECOMP
199 DOUBLE PRECISION :: TMP_TIME_DECOMP_UCFS
200 DOUBLE PRECISION :: TMP_TIME_DECOMP_ASM1
201 DOUBLE PRECISION :: TMP_TIME_DECOMP_LOCASM2
202 DOUBLE PRECISION :: TMP_TIME_DECOMP_MAPLIG1
203 DOUBLE PRECISION :: TMP_TIME_DECOMP_ASMS2S
204 DOUBLE PRECISION :: TMP_TIME_DECOMP_ASMS2M
205C
206C Workspace.
207C
208 INTEGER, DIMENSION(:), ALLOCATABLE :: IWK
209 COMPLEX, DIMENSION(:), ALLOCATABLE :: WK
210 REAL, DIMENSION(:), ALLOCATABLE :: WK_REAL
211 INTEGER(8), DIMENSION(:), ALLOCATABLE :: IWK8
212 INTEGER, DIMENSION(:), ALLOCATABLE :: BURP
213 INTEGER, DIMENSION(:), ALLOCATABLE :: BUCP
214 INTEGER, DIMENSION(:), ALLOCATABLE :: BURS
215 INTEGER, DIMENSION(:), ALLOCATABLE :: BUCS
216 INTEGER BUREGISTRE(12)
217 INTEGER BUINTSZ, BURESZ, BUJOB
218 INTEGER BUMAXMN, M, SCMYID, SCNPROCS
219 REAL SCONEERR, SCINFERR
220C
221C Parameters arising from the structure
222C =====================================
223C
224 INTEGER, POINTER :: JOB
225* Control parameters: see description in CMUMPSID
226 REAL,DIMENSION(:),POINTER::RINFO, RINFOG
227 REAL,DIMENSION(:),POINTER:: CNTL
228 INTEGER,DIMENSION(:),POINTER:: INFOG, KEEP
229 INTEGER, DIMENSION(:), POINTER :: MYIRN_loc, MYJCN_loc
230 COMPLEX, DIMENSION(:), POINTER :: MYA_loc
231 INTEGER, TARGET :: DUMMYIRN_loc(1), DUMMYJCN_loc(1)
232 COMPLEX, TARGET :: DUMMYA_loc(1)
233 INTEGER,DIMENSION(:),POINTER::ICNTL
234 EXTERNAL mumps_get_pool_length
235 INTEGER MUMPS_GET_POOL_LENGTH
236 INTEGER(8) :: TOTAL_BYTES
237 INTEGER(8) :: I8TMP, LWK_USER_SUM8
238C
239C External references
240C ===================
241 INTEGER numroc
242 EXTERNAL numroc
243 INTEGER:: NWORKING
244 LOGICAL:: MEM_EFF_ALLOCATED
245 INTEGER :: TOTAL_MBYTES_UNDER_L0
246 INTEGER(8):: TOTAL_BYTES_UNDER_L0
247C Fwd in facto:
248 COMPLEX, DIMENSION(:), POINTER :: RHS_MUMPS
249 LOGICAL :: RHS_MUMPS_ALLOCATED
250 INTEGER :: NB_ACTIVE_FRONTS_ESTIM
251 INTEGER :: NB_FRONTS_F_ESTIM
252C
253C
254 job=>id%JOB
255 rinfo=>id%RINFO
256 rinfog=>id%RINFOG
257 cntl=>id%CNTL
258 infog=>id%INFOG
259 keep=>id%KEEP
260 icntl=>id%ICNTL
261 IF (id%KEEP8(29) .NE. 0) THEN
262 myirn_loc=>id%IRN_loc
263 myjcn_loc=>id%JCN_loc
264 mya_loc=>id%A_loc
265 ELSE
266 myirn_loc=>dummyirn_loc
267 myjcn_loc=>dummyjcn_loc
268 mya_loc=>dummya_loc
269 ENDIF
270 n = id%N
271 eps = epsilon( zero )
272C TIMINGS: reset to 0
273 id%DKEEP(92)=0.0e0
274 id%DKEEP(93)=0.0e0
275 id%DKEEP(94)=0.0e0
276 id%DKEEP(97)=0.0e0
277 id%DKEEP(98)=0.0e0
278 id%DKEEP(56)=0.0e0
279C Count of MPI messages: reset to 0
280 id%KEEP(266)=0
281 id%KEEP(267)=0
282C MIN/MAX pivots reset to 0
283 id%DKEEP(19)=huge(0.0e0)
284 id%DKEEP(20)=huge(0.0e0)
285 id%DKEEP(21)=0.0e0
286C Number of symmetric swaps
287 id%KEEP8(80)=0_8
288C Largest increase of internal panel size
289 id%KEEP(425) =0
290C
291 print_maxavg = .NOT.(id%NSLAVES.EQ.1 .AND. keep(46).EQ.1)
292C Print per node informtation only in case ther are several
293C compute nodes (id%KEEP(412): #MPI procs on comupte node)
294 print_nodeinfo = print_maxavg .AND. id%NPROCS .NE. id%KEEP(412)
295C
296C BEGIN CASE OF ALLOCATED DATA FROM PREVIOUS CALLS
297C Data from factorization is now freed asap
298C id%S, id%IS
299 IF (id%KEEP8(24).EQ.0_8) THEN
300C -- deallocate only when not provided/allocated by the user
301 IF (associated(id%S)) THEN
302 DEALLOCATE(id%S)
303 id%KEEP8(23)=0_8
304 NULLIFY(id%S)
305 ENDIF
306 ENDIF
307 IF (associated(id%IS)) THEN
308 DEALLOCATE(id%IS)
309 NULLIFY(id%IS)
310 ENDIF
311C Free BLR factors, if any
312 CALL cmumps_free_id_data_modules(id%FDM_F_ENCODING,
313 & id%BLRARRAY_ENCODING, id%KEEP8(1), id%KEEP(34))
314 IF (associated(id%root%RG2L_ROW))THEN
315 DEALLOCATE(id%root%RG2L_ROW)
316 NULLIFY(id%root%RG2L_ROW)
317 ENDIF
318 IF (associated(id%root%RG2L_COL))THEN
319 DEALLOCATE(id%root%RG2L_COL)
320 NULLIFY(id%root%RG2L_COL)
321 ENDIF
322 IF (associated( id%PTLUST_S )) THEN
323 DEALLOCATE(id%PTLUST_S)
324 NULLIFY(id%PTLUST_S)
325 ENDIF
326 IF (associated(id%PTRFAC)) THEN
327 DEALLOCATE(id%PTRFAC)
328 NULLIFY(id%PTRFAC)
329 END IF
330 IF (associated(id%RHSCOMP)) THEN
331 DEALLOCATE(id%RHSCOMP)
332 NULLIFY(id%RHSCOMP)
333 id%KEEP8(25)=0_8
334 ENDIF
335 IF (associated(id%POSINRHSCOMP_ROW)) THEN
336 DEALLOCATE(id%POSINRHSCOMP_ROW)
337 NULLIFY(id%POSINRHSCOMP_ROW)
338 ENDIF
339 IF (id%POSINRHSCOMP_COL_ALLOC) THEN
340 DEALLOCATE(id%POSINRHSCOMP_COL)
341 NULLIFY(id%POSINRHSCOMP_COL)
342 id%POSINRHSCOMP_COL_ALLOC = .false.
343 ENDIF
344C
345C END CASE OF ALLOCATED DATA FROM PREVIOUS CALLS
346C
347C Related to forward in facto functionality (referred to as "Fwd in facto")
348 NULLIFY(rhs_mumps)
349 rhs_mumps_allocated = .false.
350C -----------------------------------------------------------------------
351C Set WK_USER_PROVIDED to true when workspace WK_USER is provided by user
352C We can accept WK_USER to be provided on only one proc and
353C different values of WK_USER per processor
354C
355 IF (id%KEEP8(24).GT.0_8) THEN
356C We nullify S so that later when we test
357C if (associated(S) we can free space and reallocate it).
358 NULLIFY(id%S)
359 ENDIF
360C
361C -- KEEP8(24) can now then be reset safely
362 wk_user_provided = (id%LWK_USER.NE.0)
363 IF (wk_user_provided) THEN
364 IF (id%LWK_USER.GT.0) THEN
365 id%KEEP8(24) = int(id%LWK_USER,8)
366 ELSE
367 id%KEEP8(24) = -int(id%LWK_USER,8)* 1000000_8
368 ENDIF
369 ELSE
370 id%KEEP8(24) = 0_8
371 ENDIF
372C Compute sum of LWK_USER provided by user
373 lwk_user_sum8 = 0_8
374 CALL mpi_reduce ( id%KEEP8(24), lwk_user_sum8, 1, mpi_integer8,
375 & mpi_sum, master, id%COMM, ierr )
376C
377C KEEP8(26) might be modified
378C (element entry format)
379C but need be restore for
380C future factorisation
381C with different scaling option
382C
383 keep826_save = id%KEEP8(26)
384C In case of loop on factorization with
385C different scaling options, initialize
386C DKEEP(4:5) to 0.
387 id%DKEEP(4)=-1.0e0
388 id%DKEEP(5)=-1.0e0
389C Mapping information used during solve. In case of several facto+solve
390C it has to be recomputed. In case of several solves with the same
391C facto, it is not recomputed.
392 IF (associated(id%IPTR_WORKING)) THEN
393 DEALLOCATE(id%IPTR_WORKING)
394 NULLIFY(id%IPTR_WORKING)
395 END IF
396 IF (associated(id%WORKING)) THEN
397 DEALLOCATE(id%WORKING)
398 NULLIFY(id%WORKING)
399 END IF
400C
401C Units for printing
402C MP: diagnostics
403C LP: errors
404C
405 lp = icntl( 1 )
406 mp = icntl( 2 )
407 mpg = icntl( 3 )
408 lpok = ((lp.GT.0).AND.(id%ICNTL(4).GE.1))
409 prok = ((mp.GT.0).AND.(id%ICNTL(4).GE.2))
410 prokg = ( mpg .GT. 0 .and. id%MYID .eq. master )
411 prokg = (prokg.AND.(id%ICNTL(4).GE.2))
412 IF ( prok ) WRITE( mp, 130 )
413 IF ( prokg ) WRITE( mpg, 130 )
414C -------------------------------------
415C Depending on the type of parallelism,
416C the master can now (soon) potentially
417C have the role of a slave
418C -------------------------------------
419 i_am_slave = ( id%MYID .ne. master .OR.
420 & ( id%MYID .eq. master .AND.
421 & keep(46) .eq. 1 ) )
422C
423C Prepare work for out-of-core
424C
425 IF (id%MYID .EQ. master .AND. keep(201) .NE. -1) THEN
426C Note that if KEEP(201)=-1, then we have decided
427C at analysis phase that factors will not be stored
428C (neither in memory nor on disk). In that case,
429C ICNTL(22) is ignored.
430C -- ICNTL(22) must be set before facto phase
431C (=1 OOC on; =0 OOC off)
432C and cannot be changed for subsequent solve phases.
433 keep(201)=id%ICNTL(22)
434 IF (keep(201) .NE. 0) THEN
435# if defined(OLD_OOC_NOPANEL)
436 keep(201)=2
437# else
438 keep(201)=1
439# endif
440 ENDIF
441 ENDIF
442C ----------------------
443C Broadcast KEEP options
444C defined for facto:
445C ----------------------
446 CALL mpi_bcast( keep(12), 1, mpi_integer,
447 & master, id%COMM, ierr )
448 CALL mpi_bcast( keep(19), 1, mpi_integer,
449 & master, id%COMM, ierr )
450 CALL mpi_bcast( keep(21), 1, mpi_integer,
451 & master, id%COMM, ierr )
452 CALL mpi_bcast( keep(201), 1, mpi_integer,
453 & master, id%COMM, ierr )
454 CALL mpi_bcast( keep(459), 1, mpi_integer,
455 & master, id%COMM, ierr )
456 CALL mpi_bcast( keep(460), 1, mpi_integer,
457 & master, id%COMM, ierr )
458 IF ( keep(459) .GE. panel_tabsize ) THEN
459 IF ( lpok ) THEN
460 WRITE(lp,'(A,I4,A,I3)') " ** WARNING ** KEEP(459)=",keep(459),
461 & " too large, resetting to",panel_tabsize-1
462 ENDIF
463 keep(459) = panel_tabsize - 1
464 ENDIF
465 perlu = keep(12)
466 IF (id%MYID.EQ.master) THEN
467C KEEP(50) case
468C ==============
469C
470C KEEP(50) = 0 : matrix is unsymmetric
471C KEEP(50) /= 0 : matrix is symmetric
472C KEEP(50) = 1 : Ask L L^T on the root. Matrix is PSD.
473C KEEP(50) = 2 : Ask for L U on the root
474C KEEP(50) = 3 ... L D L^T ??
475C
476 cntl1 = id%CNTL(1)
477C ---------------------------------------
478C For symmetric (non general) matrices
479C set (directly) CNTL1 = 0.0
480C ---------------------------------------
481 keep(17)=0
482 IF ( keep(50) .eq. 1 ) THEN
483 IF (cntl1 .ne. zero ) THEN
484 IF ( prokg ) THEN
485 WRITE(mpg,'(A)')
486 & '** Warning : SPD solver called, resetting CNTL(1) to 0.0E0'
487 END IF
488 END IF
489 cntl1 = zero
490 END IF
491C CNTL1 threshold value must be between
492C 0.0 and 1.0 (for SYM=0) and 0.5 (for SYM=1,2)
493 IF (cntl1.GT.one) cntl1=one
494 IF (cntl1.LT.zero) cntl1=zero
495 IF (keep(50).NE.0.AND.cntl1.GT.0.5e0) THEN
496 cntl1 = 0.5e0
497 ENDIF
498 parpiv_t1 = id%KEEP(268)
499 IF (parpiv_t1.EQ.77) THEN
500 parpiv_t1 = 0
501#if defined(__ve__)
502 parpiv_t1 = -2
503#endif
504 ENDIF
505 IF (parpiv_t1.EQ.-3) THEN
506 parpiv_t1 = 0
507 ENDIF
508 IF ((parpiv_t1.LT.-3).OR.(parpiv_t1.GT.1)) THEN
509C out of range values
510 parpiv_t1 =0
511 ENDIF
512C note that KEEP(50).EQ.1 => CNTL1=0.0
513 IF (cntl1.EQ.0.0.OR.(keep(50).eq.1)) parpiv_t1 = 0
514C
515 IF (parpiv_t1.EQ.-2) THEN
516 IF (keep(19).NE.0) THEN
517C switch off PARPIV_T1 if RR activated
518C but do NOT switch off PARPIV_1 with null pivot detection
519 parpiv_t1 = 0
520 ENDIF
521 ENDIF
522 id%KEEP(269) = parpiv_t1
523 ENDIF
524 CALL mpi_bcast(cntl1, 1, mpi_real,
525 & master, id%COMM, ierr)
526 CALL mpi_bcast( keep(269), 1, mpi_integer,
527 & master, id%COMM, ierr )
528 IF (id%MYID.EQ.master) THEN
529C -----------------------------------------------------
530C Decoding of ICNTL(35) for factorization: same as
531C at analysis except that we store a copy of ICNTL(35)
532C in KEEP(486) instead of KEEP(494) and need to check
533C compatibility of KEEP(486) and KEEP(494): If LR was
534C not activated during analysis, it cannot be activated
535C at factorization.
536C ------------------------------------------------------
537 id%KEEP(486) = id%ICNTL(35)
538 IF (id%KEEP(486).EQ.1) THEN
539C -- Automatic BLR option setting
540 id%KEEP(486)= 2
541 ENDIF
542 IF ( id%KEEP(486).EQ.4) id%KEEP(486)=0
543 IF ((id%KEEP(486).LT.0).OR.(id%KEEP(486).GT.4)) THEN
544C Out of range values treated as 0
545 id%KEEP(486) = 0
546 ENDIF
547 IF ((keep(486).NE.0).AND.(keep(494).EQ.0)) THEN
548C To activate BLR during factorization,
549C ICNTL(35) must have been set at analysis.
550 IF (lpok) THEN
551 WRITE(lp,'(A)')
552 & " *** Error with BLR setting "
553 WRITE(lp,'(A)') " *** BLR was not activated during ",
554 & " analysis but is requested during factorization."
555 ENDIF
556 id%INFO(1)=-54
557 id%INFO(2)=0
558 GOTO 105
559 ENDIF
560 keep464copy = id%ICNTL(38)
561 IF (keep464copy.LT.0.OR.keep464copy.GT.1000) THEN
562C Out of range values treated as 1000
563 keep464copy = 1000
564 ENDIF
565 IF (id%KEEP(461).LT.1) THEN
566 id%KEEP(461) = 10
567 ENDIF
568 keep465copy=0
569 IF (id%ICNTL(36).EQ.1.OR.id%ICNTL(36).EQ.3) THEN
570 IF (cntl1.EQ.zero .OR. keep(468).LE.1) THEN
571 keep(475) = 3
572 ELSE IF ( (keep(269).GT.0).OR. (keep(269).EQ.-2)) THEN
573 keep(475) = 2
574 ELSE IF (keep(468).EQ.2) THEN
575 keep(475) = 2
576 ELSE
577 keep(475) = 1
578 ENDIF
579 ELSE
580 keep(475) = 0
581 ENDIF
582 keep(481)=0
583 IF (id%ICNTL(36).LT.0 .OR. id%ICNTL(36).GE.2) THEN
584C Only options 1 and 2 are allowed
585 keep(475) = 0
586 ENDIF
587C K489 is set according to ICNTL(37)
588 IF (id%ICNTL(37).EQ.0.OR.id%ICNTL(37).EQ.1) THEN
589 keep(489) = id%ICNTL(37)
590 ELSE
591C Other values treated as zero
592 keep(489) = 0
593 ENDIF
594 IF (keep(79).GE.1) THEN
595C CompressCB incompatible with type4,5,6 nodes
596 keep(489)=0
597 ENDIF
598 keep(489)=0
599C id%KEEP(476) \in [1,100]
600 IF ((id%KEEP(476).GT.100).OR.(id%KEEP(476).LT.1)) THEN
601 id%KEEP(476)= 50
602 ENDIF
603C id%KEEP(477) \in [1,100]
604 IF ((id%KEEP(477).GT.100).OR.(id%KEEP(477).LT.1)) THEN
605 id%KEEP(477)= 100
606 ENDIF
607C id%KEEP(483) \in [1,100]
608 IF ((id%KEEP(483).GT.100).OR.(id%KEEP(483).LT.1)) THEN
609 id%KEEP(483)= 50
610 ENDIF
611C id%KEEP(484) \in [1,100]
612 IF ((id%KEEP(484).GT.100).OR.(id%KEEP(484).LT.1)) THEN
613 id%KEEP(484)= 50
614 ENDIF
615C id%KEEP(480)=0,2,3,4,5,6
616 IF ((id%KEEP(480).GT.6).OR.(id%KEEP(480).LT.0)
617 & .OR.(id%KEEP(480).EQ.1)) THEN
618 id%KEEP(480)=0
619 ENDIF
620C id%KEEP(473)=0 or 1
621 IF ((id%KEEP(473).NE.0).AND.(id%KEEP(473).NE.1)) THEN
622 id%KEEP(473)=0
623 ENDIF
624C id%KEEP(474)=0,1,2,3
625 IF ((id%KEEP(474).GT.3).OR.(id%KEEP(474).LT.0)) THEN
626 id%KEEP(474)=0
627 ENDIF
628C id%KEEP(479)>0
629 IF (id%KEEP(479).LE.0) THEN
630 id%KEEP(479)=1
631 ENDIF
632 IF (id%KEEP(474).NE.0.AND.id%KEEP(480).EQ.0) THEN
633 id%KEEP(474) = 0
634 ENDIF
635 IF (id%KEEP(478).NE.0.AND.id%KEEP(480).LT.4) THEN
636 id%KEEP(478) = 0
637 ENDIF
638 IF (id%KEEP(480).GE.5 .OR.
639 & (id%KEEP(480).NE.0.AND.id%KEEP(474).EQ.3)) THEN
640 IF (id%KEEP(475).LT.2) THEN
641C Reset to 3 if 5 or to 4 if 6
642 id%KEEP(480) = id%KEEP(480) - 2
643 write(*,*) ' Resetting KEEP(480) to ', id%KEEP(480)
644 ENDIF
645 ENDIF
646 105 CONTINUE
647 ENDIF ! id%MYID .EQ. MASTER
648 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
649 & id%COMM, id%MYID )
650C
651 IF (id%INFO(1).LT.0) GOTO 530
652 CALL mpi_bcast( keep(473), 14, mpi_integer,
653 & master, id%COMM, ierr )
654 IF (keep(486).NE.0) THEN
655 CALL mpi_bcast( keep(489), 1, mpi_integer,
656 & master, id%COMM, ierr )
657 CALL mpi_bcast( keep464copy, 1, mpi_integer,
658 & master, id%COMM, ierr )
659 CALL mpi_bcast( keep465copy, 1, mpi_integer,
660 & master, id%COMM, ierr )
661 ENDIF
662 IF (id%MYID.EQ.master) THEN
663 IF (keep(217).GT.2.OR.keep(217).LT.0) THEN
664 keep(217)=0
665 ENDIF
666 keep(214)=keep(217)
667 IF (keep(214).EQ.0) THEN
668 IF (keep(201).NE.0) THEN ! OOC or no factors
669 keep(214)=1
670 ELSE
671 keep(214)=2
672 ENDIF
673 IF (keep(486).EQ.2) THEN
674 keep(214)=1
675 ENDIF
676 ENDIF
677 ENDIF
678 CALL mpi_bcast( keep(214), 1, mpi_integer,
679 & master, id%COMM, ierr )
680 IF (keep(201).NE.0) THEN
681C -- Low Level I/O strategy
682 CALL mpi_bcast( keep(99), 1, mpi_integer,
683 & master, id%COMM, ierr )
684 CALL mpi_bcast( keep(205), 1, mpi_integer,
685 & master, id%COMM, ierr )
686 CALL mpi_bcast( keep(211), 1, mpi_integer,
687 & master, id%COMM, ierr )
688 ENDIF
689C Fwd in facto: explicitly forbid
690C sparse RHS and A-1 computation
691 IF (id%KEEP(252).EQ.1 .AND. id%MYID.EQ.master) THEN
692 IF (id%ICNTL(20).EQ.1) THEN ! out-of-range => 0
693C NB: in doc ICNTL(20) only accessed during solve
694C In practice, will have failed earlier if RHS not allocated.
695C Still it looks safer to keep this test.
696 id%INFO(1)=-43
697 id%INFO(2)=20
698 IF (lpok) WRITE(lp,'(A)')
699 & ' ERROR: Sparse RHS is incompatible with forward',
700 & ' performed during factorization (ICNTL(32)=1)'
701 ELSE IF (id%ICNTL(30).NE.0) THEN ! out-of-range => 1
702 id%INFO(1)=-43
703 id%INFO(2)=30
704 IF (lpok) WRITE(lp,'(A)')
705 & ' ERROR: A-1 functionality incompatible with forward',
706 & ' performed during factorization (ICNTL(32)=1)'
707 ELSE IF (id%ICNTL(9) .NE. 1) THEN
708 id%INFO(1)=-43
709 id%INFO(2)=9
710 IF (lpok) WRITE(lp,'(A)')
711 & .NE.' ERROR: Transpose system (ICNTL(9)0) not ',
712 & ' compatible with forward performed during',
713 & ' factorization (ICNTL(32)=1)'
714 ENDIF
715 ENDIF
716 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
717 & id%COMM, id%MYID )
718C
719 IF (id%INFO(1).LT.0) GOTO 530
720C
721C The memory allowed is given by ICNTL(23) in Mbytes
722C 0 means that nothing is provided.
723C Save memory available, ICNTL(23) in KEEP8(4)
724C
725 IF ( icntl(23) .GT. 0 ) THEN
726 itmp = 1
727 ELSE
728 itmp = 0
729 ENDIF
730 CALL mpi_allreduce( itmp, jtmp, 1, mpi_integer,
731 & mpi_sum, id%COMM, ierr)
732 IF ( id%MYID.EQ.master ) THEN
733C Negative values considered 0
734 itmp = max(icntl(23),0)
735 END IF
736 CALL mpi_bcast( itmp, 1, mpi_integer,
737 & master, id%COMM, ierr )
738C JTMP: nb of procs with nonzero ICNTL(23)
739C ITMP: value of ICNTL(23) on master
740 IF ( itmp .GT. 0 .AND. jtmp .EQ. 1 ) THEN
741C ICNTL(23)>0 only on master
742 ELSE
743C Local values of ICNTL(23) are used, note that
744C they could all be zeros
745 itmp = icntl(23)
746 ENDIF
747C
748 itmp8 = int(itmp, 8)
749 id%KEEP8(4) = itmp8 * 1000000_8 ! convert to nb of bytes
750C Compute \sum of memories allowed
751 CALL mpi_reduce( id%KEEP8(4), itmp8, 1, mpi_integer8,
752 & mpi_sum, master, id%COMM, ierr )
753 itmp8 = itmp8 / 1000000_8 ! Use to print \sum_{ICNTL(23)}
754 IF ( prokg ) THEN
755 nworking = id%NSLAVES
756 WRITE( mpg, 172 ) nworking, id%ICNTL(22), keep(486),
757 & keep(12),
758 & id%KEEP8(111), keep(126), keep(127), keep(28),
759 & id%KEEP8(4)/1000000_8, itmp8, lwk_user_sum8, cntl1
760 IF (keep(252).GT.0)
761 & WRITE(mpg,173) keep(253)
762 IF (keep(269).NE.0)
763 & WRITE(mpg,174) keep(269)
764 ENDIF
765 IF (keep(201).LE.0) THEN
766C In-core version or no factors
767 keep(ixsz)=xsize_ic
768 ELSE IF (keep(201).EQ.2) THEN
769C OOC version, no panels
770 keep(ixsz)=xsize_ooc_nopanel
771 ELSE IF (keep(201).EQ.1) THEN
772C Panel versions:
773 IF (keep(50).EQ.0) THEN
774 keep(ixsz)=xsize_ooc_unsym
775 ELSE
776 keep(ixsz)=xsize_ooc_sym
777 ENDIF
778 ENDIF
779 IF ( keep(486) .NE. 0 ) THEN !LR is activated
780C Stats initialization for LR
782 END IF
783C
784* **********************************
785* Begin intializations regarding the
786* computation of the determinant
787* **********************************
788 IF (id%MYID.EQ.master) keep(258)=icntl(33)
789 CALL mpi_bcast(keep(258), 1, mpi_integer,
790 & master, id%COMM, ierr)
791 IF (keep(258) .NE. 0) THEN
792 keep(259) = 0 ! Initial exponent of the local determinant
793 keep(260) = 1 ! Number of permutations
794 id%DKEEP(6) = 1.0e0 ! real part of the local determinant
795 id%DKEEP(7) = 0.0e0 ! imaginary part of the local determinant
796 ENDIF
797* ********************************
798* End intializations regarding the
799* computation of the determinant
800* ********************************
801C
802* **********************
803* Begin of Scaling phase
804* **********************
805C
806C SCALING MANAGEMENT
807C * Options 1, 3, 4 centralized only
808C
809C * Options 7, 8 : also works for distributed matrix
810C
811C At this point, we have the scaling arrays allocated
812C on the master. They have been allocated on the master
813C inside the main MUMPS driver.
814C
815 CALL mpi_bcast(keep(52), 1, mpi_integer,
816 & master, id%COMM, ierr)
817 lscal = ((keep(52) .GT. 0) .AND. (keep(52) .LE. 8))
818 IF (lscal) THEN
819C
820 IF ( id%MYID.EQ.master ) THEN
821 CALL mumps_secdeb(timeet)
822 ENDIF
823C -----------------------
824C Retrieve parameters for
825C simultaneous scaling
826C -----------------------
827 IF (keep(52) .EQ. 7) THEN
828C -- Cheap setting of SIMSCALING (it is the default in 4.8.4)
829 k231= keep(231)
830 k232= keep(232)
831 k233= keep(233)
832 ELSEIF (keep(52) .EQ. 8) THEN
833C -- More expensive setting of SIMSCALING (it was the default in 4.8.1,2,3)
834 k231= keep(239)
835 k232= keep(240)
836 k233= keep(241)
837 ENDIF
838 CALL mpi_bcast(id%DKEEP(3),1,mpi_real,master,
839 & id%COMM,ierr)
840C
841 IF ( ((keep(52).EQ.7).OR.(keep(52).EQ.8)) .AND.
842 & keep(54).NE.0 ) THEN
843C ------------------------------
844C Scaling for distributed matrix
845C We need to allocate scaling
846C arrays on all processors, not
847C only the master.
848C ------------------------------
849 IF ( id%MYID .NE. master ) THEN
850 IF ( associated(id%COLSCA))
851 & DEALLOCATE( id%COLSCA )
852 IF ( associated(id%ROWSCA))
853 & DEALLOCATE( id%ROWSCA )
854 ALLOCATE( id%COLSCA(n), stat=ierr)
855 IF (ierr .GT.0) THEN
856 id%INFO(1)=-13
857 id%INFO(2)=n
858 ENDIF
859 ALLOCATE( id%ROWSCA(n), stat=ierr)
860 IF (ierr .GT.0) THEN
861 id%INFO(1)=-13
862 id%INFO(2)=n
863 ENDIF
864 ENDIF
865 m = n
866 bumaxmn=m
867 IF(n > bumaxmn) bumaxmn = n
868 liwk = 4*bumaxmn
869 ALLOCATE (iwk(liwk),burp(m),bucp(n),
870 & burs(2* (id%NPROCS)),bucs(2* (id%NPROCS)),
871 & stat=allocok)
872 IF (allocok > 0) THEN
873 id%INFO(1)=-13
874 id%INFO(2)=liwk+m+n+4* (id%NPROCS)
875 ENDIF
876C --- Propagate enventual error
877 CALL mumps_propinfo( icntl(1), id%INFO(1),
878 & id%COMM, id%MYID )
879 IF (id%INFO(1).LT.0) GOTO 517
880C -- estimation of memory and construction of partvecs
881 bujob = 1
882C -- LWK not used
883 lwk_real = 1
884 ALLOCATE(wk_real(lwk_real),
885 & stat=allocok)
886 IF (allocok > 0) THEN
887 id%INFO(1)=-13
888 id%INFO(2)=lwk_real
889 ENDIF
890C --- Propagate enventual error
891 CALL mumps_propinfo( icntl(1), id%INFO(1),
892 & id%COMM, id%MYID )
893 IF (id%INFO(1).LT.0) GOTO 517
895 & myirn_loc(1), myjcn_loc(1), mya_loc(1),
896 & id%KEEP8(29),
897 & m, n, id%NPROCS, id%MYID, id%COMM,
898 & burp, bucp,
899 & burs, bucs, buregistre,
900 & iwk, liwk,
901 & buintsz, buresz, bujob,
902 & id%ROWSCA(1), id%COLSCA(1), wk_real, lwk_real,
903 & id%KEEP(50),
904 & k231, k232, k233,
905 & id%DKEEP(3),
906 & sconeerr, scinferr)
907 IF(liwk < buintsz) THEN
908 DEALLOCATE(iwk)
909 liwk = buintsz
910 ALLOCATE(iwk(liwk), stat=allocok)
911 IF (allocok > 0) THEN
912 id%INFO(1)=-13
913 id%INFO(2)=liwk
914 ENDIF
915 ENDIF
916 lwk_real = buresz
917 DEALLOCATE(wk_real)
918 ALLOCATE (wk_real(lwk_real), stat=allocok)
919 IF (allocok > 0) THEN
920 id%INFO(1)=-13
921 id%INFO(2)=lwk_real
922 ENDIF
923C --- Propagate enventual error
924 CALL mumps_propinfo( icntl(1), id%INFO(1),
925 & id%COMM, id%MYID )
926 IF (id%INFO(1).LT.0) GOTO 517
927C -- estimation of memory and construction of partvecs
928 bujob = 2
930 & myirn_loc(1), myjcn_loc(1), mya_loc(1),
931 & id%KEEP8(29),
932 & m, n, id%NPROCS, id%MYID, id%COMM,
933 & burp, bucp,
934 & burs, bucs, buregistre,
935 & iwk, liwk,
936 & buintsz, buresz, bujob,
937 & id%ROWSCA(1), id%COLSCA(1), wk_real, lwk_real,
938 & id%KEEP(50),
939 & k231, k232, k233,
940 & id%DKEEP(3),
941 & sconeerr, scinferr)
942 id%DKEEP(4) = sconeerr
943 id%DKEEP(5) = scinferr
944CXXXX
945 DEALLOCATE(iwk, wk_real,burp,bucp,burs, bucs)
946 ELSE IF ( keep(54) .EQ. 0 ) THEN
947C ------------------
948C Centralized matrix
949C ------------------
950 IF ((keep(52).EQ.7).OR.(keep(52).EQ.8)) THEN
951C -------------------------------
952C Create a communicator of size 1
953C -------------------------------
954 IF (id%MYID.EQ.master) THEN
955 colour = 0
956 ELSE
957 colour = mpi_undefined
958 ENDIF
959 CALL mpi_comm_split( id%COMM, colour, 0,
960 & comm_for_scaling, ierr )
961 IF (id%MYID.EQ.master) THEN
962 m = n
963 bumaxmn=n
964 IF(n > bumaxmn) bumaxmn = n
965 liwk = 1
966 ALLOCATE(iwk(liwk),burp(1),bucp(1),
967 & burs(1),bucs(1),
968 & stat=allocok)
969 IF (allocok > 0) THEN
970 id%INFO(1)=-13
971 id%INFO(2)=liwk+1+1+1+1
972 GOTO 400
973 ENDIF
974 lwk_real = m + n
975 ALLOCATE (wk_real(lwk_real), stat=allocok)
976 IF (allocok > 0) THEN
977 id%INFO(1)=-13
978 id%INFO(2)=lwk_real
979 GOTO 400
980 ENDIF
981 CALL mpi_comm_rank(comm_for_scaling, scmyid, ierr)
982 CALL mpi_comm_size(comm_for_scaling, scnprocs, ierr)
983 bujob = 1
985 & id%IRN(1), id%JCN(1), id%A(1),
986 & id%KEEP8(28),
987 & m, n, scnprocs, scmyid, comm_for_scaling,
988 & burp, bucp,
989 & burs, bucs, buregistre,
990 & iwk, liwk,
991 & buintsz, buresz, bujob,
992 & id%ROWSCA(1), id%COLSCA(1), wk_real, lwk_real,
993 & id%KEEP(50),
994 & k231, k232, k233,
995 & id%DKEEP(3),
996 & sconeerr, scinferr)
997 IF(lwk_real < buresz) THEN
998 id%INFO(1) = -136
999 GOTO 400
1000 ENDIF
1001 bujob = 2
1002 CALL cmumps_simscaleabs(id%IRN(1),
1003 & id%JCN(1), id%A(1),
1004 & id%KEEP8(28),
1005 & m, n, scnprocs, scmyid, comm_for_scaling,
1006 & burp, bucp,
1007 & burs, bucs, buregistre,
1008 & iwk, liwk,
1009 & buintsz, buresz, bujob,
1010 & id%ROWSCA(1), id%COLSCA(1), wk_real, lwk_real,
1011 & id%KEEP(50),
1012 & k231, k232, k233,
1013 & id%DKEEP(3),
1014 & sconeerr, scinferr)
1015 id%DKEEP(4) = sconeerr
1016 id%DKEEP(5) = scinferr
1017 400 CONTINUE
1018 IF (allocated(wk_real)) DEALLOCATE(wk_real)
1019 IF (allocated(iwk)) DEALLOCATE(iwk)
1020 IF (allocated(burp)) DEALLOCATE(burp)
1021 IF (allocated(bucp)) DEALLOCATE(bucp)
1022 IF (allocated(burs)) DEALLOCATE(burs)
1023 IF (allocated(bucs)) DEALLOCATE(bucs)
1024 ENDIF
1025C Centralized matrix: make DKEEP(4:5) available to all processors
1026 CALL mpi_bcast( id%DKEEP(4),2,mpi_real,
1027 & master, id%COMM, ierr )
1028 IF (id%MYID.EQ.master) THEN
1029C Communicator should only be
1030C freed on the master process
1031 CALL mpi_comm_free(comm_for_scaling, ierr)
1032 ENDIF
1033 CALL mumps_propinfo(icntl(1), id%INFO(1),
1034 & id%COMM, id%MYID)
1035 IF (id%INFO(1).LT.0) GOTO 517
1036 ELSE IF (id%MYID.EQ.master) THEN
1037C ----------------------------------
1038C Centralized scaling, options 1 to 6
1039C ----------------------------------
1040 IF (keep(52).GT.0 .AND. keep(52).LE.6) THEN
1041C ---------------------
1042C Allocate temporary
1043C workspace for scaling
1044C ---------------------
1045 IF ( keep(52) .eq. 5 .or.
1046 & keep(52) .eq. 6 ) THEN
1047C We have an explicit copy of the original
1048C matrix in complex format which should probably
1049C be avoided (but do we want to keep all
1050C those old scaling options ?)
1051 lwk = id%KEEP8(28)
1052 ELSE
1053 lwk = 1_8
1054 END IF
1055 lwk_real = 5 * n
1056 ALLOCATE( wk_real( lwk_real ), stat = ierr )
1057 IF ( ierr .GT. 0 ) THEN
1058 id%INFO(1) = -13
1059 id%INFO(2) = lwk_real
1060 GOTO 137
1061 END IF
1062 ALLOCATE( wk( lwk ), stat = ierr )
1063 IF ( ierr .GT. 0 ) THEN
1064 id%INFO(1) = -13
1065 CALL mumps_set_ierror(lwk, id%INFO(2))
1066 GOTO 137
1067 END IF
1068 CALL cmumps_fac_a(n, id%KEEP8(28), keep(52), id%A(1),
1069 & id%IRN(1), id%JCN(1),
1070 & id%COLSCA(1), id%ROWSCA(1),
1071 & wk, lwk, wk_real, lwk_real, icntl(1), id%INFO(1) )
1072 DEALLOCATE( wk_real )
1073 DEALLOCATE( wk )
1074 ENDIF
1075 ENDIF
1076 ENDIF ! Scaling distributed matrices or centralized
1077 IF (keep(125).NE.0) THEN
1078C ------------------------
1079C If we enable the scaling of the |A11 A12| block
1080C we et to 1 the scaling corresponding to the Schur
1081C complement matrix A22
1082C ------------------------
1083 IF ((keep(60).GT.0) .and. (keep(116).GT.0)) THEN
1084C Schur is active, reset Schur entries to ONE
1085 IF ( ((keep(52).EQ.7).OR.(keep(52).EQ.8)) .AND.
1086 & keep(54).NE.0 ) THEN
1087C Scaling available on all procs
1088 DO i=1, n
1089 IF (id%SYM_PERM(i).GT.id%N-keep(116)) THEN
1090 id%COLSCA(i) = one
1091 id%ROWSCA(i) = one
1092 ENDIF
1093 ENDDO
1094 ELSE IF ( id%MYID .EQ. master) THEN
1095C Scaling available on master
1096 DO i=1, n
1097 IF (id%SYM_PERM(i).GT.id%N-keep(116)) THEN
1098 id%COLSCA(i) = one
1099 id%ROWSCA(i) = one
1100 ENDIF
1101 ENDDO
1102 ENDIF
1103 ENDIF
1104 ENDIF
1105 IF (id%MYID.EQ.master) THEN
1106 CALL mumps_secfin(timeet)
1107 id%DKEEP(92)=real(timeet)
1108C Print inf-norm after last KEEP(233) iterations of
1109C scaling option KEEP(52)=7 or 8 (SimScale)
1110C
1111 IF (prokg.AND.(keep(52).EQ.7.OR.keep(52).EQ.8)
1112 & .AND. (k233+k231+k232).GT.0) THEN
1113 IF (k232.GT.0) WRITE(mpg, 166) id%DKEEP(4)
1114 ENDIF
1115 ENDIF
1116 ENDIF ! LSCAL
1117C
1118C scaling might also be provided by the user
1119 lscal = (lscal .OR. (keep(52) .EQ. -1) .OR. keep(52) .EQ. -2)
1120 IF (lscal .AND. keep(258).NE.0 .AND. id%MYID .EQ. master) THEN
1121 DO i = 1, id%N
1122 CALL cmumps_updatedeter_scaling(id%ROWSCA(i),
1123 & id%DKEEP(6), ! determinant
1124 & keep(259)) ! exponent of the determinant
1125 ENDDO
1126 IF (keep(50) .EQ. 0) THEN ! unsymmetric
1127 DO i = 1, id%N
1128 CALL cmumps_updatedeter_scaling(id%COLSCA(i),
1129 & id%DKEEP(6), ! determinant
1130 & keep(259)) ! exponent of the determinant
1131 ENDDO
1132 ELSE
1133C -----------------------------------------
1134C In this case COLSCA = ROWSCA
1135C Since determinant was initialized to 1,
1136C compute square of the current determinant
1137C rather than going through COLSCA.
1138C -----------------------------------------
1139 CALL cmumps_deter_square(id%DKEEP(6), keep(259))
1140 ENDIF
1141C Now we should have taken the
1142C inverse of the scaling vectors
1143 CALL cmumps_deter_scaling_inverse(id%DKEEP(6), keep(259))
1144 ENDIF
1145C
1146C ********************
1147C End of Scaling phase
1148C At this point: either (matrix is distributed and KEEP(52)=7 or 8)
1149C in which case scaling arrays are allocated on all processors,
1150C or scaling arrays are only on the host processor.
1151C In case of distributed matrix input, we will free the scaling
1152C arrays on procs with MYID .NE. 0 after the all-to-all distribution
1153C of the original matrix.
1154C ********************
1155C
1156 137 CONTINUE
1157C Fwd in facto: in case of repeated factorizations
1158C with different Schur options we prefer to free
1159C systematically this array now than waiting for
1160C the root node. We rely on the fact that it is
1161C allocated or not during the solve phase so if
1162C it was allocated in a 1st call to facto and not
1163C in a second, we don't want the solve to think
1164C it was allocated in the second call.
1165 IF (associated(id%root%RHS_CNTR_MASTER_ROOT)) THEN
1166 DEALLOCATE (id%root%RHS_CNTR_MASTER_ROOT)
1167 NULLIFY (id%root%RHS_CNTR_MASTER_ROOT)
1168 ENDIF
1169C Fwd in facto: check that id%NRHS has not changed
1170 IF ( id%MYID.EQ.master.AND. keep(252).EQ.1 .AND.
1171 & id%NRHS .NE. id%KEEP(253) ) THEN
1172C Error: NRHS should not have
1173C changed since the analysis
1174 id%INFO(1)=-42
1175 id%INFO(2)=id%KEEP(253)
1176 ENDIF
1177C Fwd in facto: allocate and broadcast RHS_MUMPS
1178C to make it available on all processors.
1179 IF (id%KEEP(252) .EQ. 1) THEN
1180 IF ( id%MYID.NE.master ) THEN
1181 id%KEEP(254) = n ! Leading dimension
1182 id%KEEP(255) = n*id%KEEP(253) ! Tot size
1183 ALLOCATE(rhs_mumps(id%KEEP(255)),stat=ierr)
1184 IF (ierr > 0) THEN
1185 id%INFO(1)=-13
1186 id%INFO(2)=id%KEEP(255)
1187 IF (lpok)
1188 & WRITE(lp,*) 'ERROR while allocating RHS on a slave'
1189 NULLIFY(rhs_mumps)
1190 ENDIF
1191 rhs_mumps_allocated = .true.
1192 ELSE
1193C Case of non working master
1194 id%KEEP(254)=id%LRHS ! Leading dimension
1195 id%KEEP(255)=id%LRHS*(id%KEEP(253)-1)+id%N ! Tot size
1196 rhs_mumps=>id%RHS
1197 rhs_mumps_allocated = .false.
1198 IF (lscal) THEN
1199C Scale before broadcast: apply row
1200C scaling (remark that we assume no
1201C transpose).
1202 DO k=1, id%KEEP(253)
1203 DO i=1, n
1204 rhs_mumps( id%KEEP(254) * (k-1) + i )
1205 & = rhs_mumps( id%KEEP(254) * (k-1) + i )
1206 & * id%ROWSCA(i)
1207 ENDDO
1208 ENDDO
1209 ENDIF
1210 ENDIF
1211 ELSE
1212 id%KEEP(255)=1
1213 ALLOCATE(rhs_mumps(1),stat=ierr)
1214 IF (ierr > 0) THEN
1215 id%INFO(1)=-13
1216 id%INFO(2)=1
1217 IF (lpok)
1218 & WRITE(lp,*) 'ERREUR while allocating RHS on a slave'
1219 NULLIFY(rhs_mumps)
1220 ENDIF
1221 rhs_mumps_allocated = .true.
1222 ENDIF
1223 CALL mumps_propinfo( icntl(1), id%INFO(1),
1224 & id%COMM, id%MYID )
1225 IF ( id%INFO(1).lt.0 ) GOTO 517
1226 IF (keep(252) .EQ. 1) THEN
1227C
1228C Broadcast the columns of the right-hand side
1229C one by one. Leading dimension is keep(254)=N
1230C on procs with MYID > 0 but may be larger on
1231C the master processor.
1232 DO i= 1, id%KEEP(253)
1233 CALL mpi_bcast(rhs_mumps((i-1)*id%KEEP(254)+1), n,
1234 & mpi_complex, master,id%COMM,ierr)
1235 END DO
1236 ENDIF
1237C Keep a copy of ICNTL(24) and make it
1238C available on all working processors.
1239 keep(110)=id%ICNTL(24)
1240 CALL mpi_bcast(keep(110), 1, mpi_integer,
1241 & master, id%COMM, ierr)
1242C KEEP(110) defaults to 0 for out of range values
1243 IF (keep(110).NE.1) keep(110)=0
1244 IF (keep(219).NE.0) THEN
1245 CALL cmumps_buf_max_array_minsize(max(keep(108),1),ierr)
1246 IF (ierr .NE. 0) THEN
1247C ------------------------
1248C Error allocating CMUMPS_BUF
1249C ------------------------
1250 id%INFO(1) = -13
1251 id%INFO(2) = max(keep(108),1)
1252 END IF
1253 ENDIF
1254C -----------------------------------------------
1255C Depending on the option used for
1256C -detecting null pivots (ICNTL(24)/KEEP(110))
1257C CNTL(3) is used to set DKEEP(1)
1258C ( A row is considered as null if ||row|| < DKEEP(1) )
1259C CNTL(5) is then used to define if a large
1260C value is set on the diagonal or if a 1 is set
1261C and other values in the row are reset to zeros.
1262C SEUIL* corresponds to the minimum required
1263C absolute value of pivot.
1264C SEUIL_LDLT_NIV2 is used only in the
1265C case of SYM=2 within a niv2 node for which
1266C we have only a partial view of the fully summed rows.
1267 IF (id%MYID .EQ. master) cntl3 = id%CNTL(3)
1268 CALL mpi_bcast(cntl3, 1, mpi_real,
1269 & master, id%COMM, ierr)
1270 IF (id%MYID .EQ. master) cntl5 = id%CNTL(5)
1271 CALL mpi_bcast(cntl5, 1, mpi_real,
1272 & master, id%COMM, ierr)
1273 IF (id%MYID .EQ. master) cntl6 = id%CNTL(6)
1274 CALL mpi_bcast(cntl6, 1, mpi_real,
1275 & master, id%COMM, ierr)
1276 IF (id%MYID .EQ. master) id%DKEEP(8) = id%CNTL(7)
1277 CALL mpi_bcast(id%DKEEP(8), 1, mpi_real,
1278 & master, id%COMM, ierr)
1279 id%DKEEP(11) = id%DKEEP(8)/id%KEEP(461)
1280 id%DKEEP(12) = id%DKEEP(8)/id%KEEP(462)
1281 IF (keep(486).EQ.0) id%DKEEP(8) = zero
1282 compute_anorminf = .false.
1283 IF ( (keep(486) .NE. 0).AND. (id%DKEEP(8).LT.zero)) THEN
1284 compute_anorminf = .true.
1285 ENDIF
1286 IF (keep(19).NE.0) THEN
1287C Rank revealing factorisation
1288 compute_anorminf = .true.
1289 ENDIF
1290 IF (keep(110).NE.0) THEN
1291C Null pivot detection
1292 compute_anorminf = .true.
1293 ENDIF
1294 IF (id%DKEEP(8).LT.zero) THEN
1295C Experimental setting of CNTL(7)
1296 IF (compute_anorminf) THEN
1297 eff_size_schur = 0
1298 CALL cmumps_anorminf( id , anorminf, lscal, eff_size_schur )
1299C If no schur ANORMINF fine for other cases
1300 ELSE
1301 anorminf = zero
1302 ENDIF
1303 id%DKEEP(8) = abs(id%DKEEP(8))*anorminf
1304C ANORMINF need be recomputed in case of schur
1305 IF ((keep(60).GT.0).AND.keep(116).GT.0) anorminf=zero
1306 ENDIF
1307C -------------------------------------------------------
1308C We compute ANORMINF, when needed, based on
1309C the infinite norm of Rowsca *A*Colsca
1310C and make it available on all working processes.
1311 IF (compute_anorminf) THEN
1312 eff_size_schur = 0
1313 IF (keep(60).GT.0) eff_size_schur = keep(116)
1314 CALL cmumps_anorminf( id , anorminf, lscal, eff_size_schur )
1315 ELSE
1316 anorminf = zero
1317 ENDIF
1318C
1319 IF ((keep(19).NE.0).OR.(keep(110).NE.0)) THEN
1320 IF (prokg) THEN
1321 IF (keep(19).NE.0) THEN
1322 WRITE(mpg,'(A,1PD16.4)')
1323 & ' CNTL(3) for null pivot rows/singularities =',cntl3
1324 ELSE
1325 WRITE(mpg,'(A,1PD16.4)')
1326 & ' CNTL(3) for null pivot row detection =',cntl3
1327 ENDIF
1328 ENDIF
1329 ENDIF
1330 IF (keep(19).EQ.0) THEN
1331C -- RR is off
1332 seuil = zero
1333 id%DKEEP(9) = zero
1334 ELSE
1335C -- RR is on
1336C
1337C CNTL(3) is the threshold used in the following to compute
1338C DKEEP(9) the threshold under which the sing val. are considered
1339C as null and from which we start to look for a gap between two
1340C sing val.
1341 IF (cntl3 .LT. zero) THEN
1342 id%DKEEP(9) = abs(cntl(3))
1343 ELSE IF (cntl3 .GT. zero) THEN
1344 id%DKEEP(9) = cntl3*anorminf
1345 ELSE ! (CNTL(3) .EQ. ZERO) THEN
1346 ENDIF
1347 IF (prokg) THEN
1348 WRITE(mpg, '(A,I16)')
1349 & ' ICNTL(56) rank revealing effective value =',keep(19)
1350 WRITE(mpg,'(A,1PD16.4)')
1351 & ' ...Threshold for singularities on the root =',id%DKEEP(9)
1352 ENDIF
1353C RR postponing considers that pivot rows with norm smaller
1354C than SEUIL should be postponed.
1355C SEUIL should be bigger than DKEEP(9), this means that
1356C DKEEP(13) should be bigger than 1.
1357 thresh_seuil = id%DKEEP(13)
1358 IF (id%DKEEP(13).LT.1) thresh_seuil = 10
1359 seuil = id%DKEEP(9)*thresh_seuil
1360 IF (prokg) WRITE(mpg,'(A,1PD16.4)')
1361 & ' ...Threshold for postponing =',seuil
1362 ENDIF !end KEEP(19)
1363 seuil_ldlt_niv2 = seuil
1364C -------------------------------
1365C -- Null pivot row detection
1366C -------------------------------
1367 IF (keep(110).EQ.0) THEN
1368C -- Null pivot is off
1369C Initialize DKEEP(1) to a negative value
1370C in order to avoid detection of null pivots
1371C (test max(AMAX,RMAX,abs(PIVOT)).LE.PIVNUL
1372C in CMUMPS_FAC_I, where PIVNUL=DKEEP(1))
1373 id%DKEEP(1) = -1.0e0
1374 id%DKEEP(2) = zero
1375 ELSE
1376C -- Null pivot is on
1377 IF (keep(19).NE.0) THEN
1378C -- RR is on
1379C RR postponing considers that pivot rows of norm smaller that SEUIL
1380C should be postponed, but pivot rows smaller than DKEEP(1) are
1381C directly added to null space and thus considered as null pivot rows.
1382 IF ((id%DKEEP(10).LE.0).OR.(id%DKEEP(10).GT.1)) THEN
1383C DKEEP(10) is out of range, set to the default value 10-1
1384 id%DKEEP(1) = id%DKEEP(9)*1e-1
1385 ELSE
1386 id%DKEEP(1) = id%DKEEP(9)*id%DKEEP(10)
1387 ENDIF
1388 ELSE
1389C -- RR is off
1390C -- only Null pivot detection
1391C We keep strategy currently used in MUMPS 4.10.0
1392 IF (cntl3 .LT. zero) THEN
1393 id%DKEEP(1) = abs(cntl(3))
1394 ELSE IF (cntl3 .GT. zero) THEN
1395 id%DKEEP(1) = cntl3*anorminf
1396 ELSE ! (CNTL(3) .EQ. ZERO) THEN
1397c id%DKEEP(1) = NPIV_CRITICAL_PATH*EPS*ANORMINF
1399 & n, keep(28), id%STEP(1), id%FRERE_STEPS(1), id%FILS(1),
1400 & id%NA(1), id%LNA, id%NE_STEPS(1), npiv_critical_path )
1401 id%DKEEP(1) = sqrt(real(npiv_critical_path))*eps*anorminf
1402 ENDIF
1403 ENDIF ! fin rank revealing
1404 IF ((keep(110).NE.0).AND.(prokg)) THEN
1405 WRITE(mpg, '(A,I16)')
1406 & ' ICNTL(24) null pivot rows detection =',keep(110)
1407 WRITE(mpg,'(A,1PD16.4)')
1408 & ' ...Zero pivot detection threshold =',id%DKEEP(1)
1409 ENDIF
1410 IF (cntl5.GT.zero) THEN
1411 id%DKEEP(2) = cntl5 * anorminf
1412 IF (prokg) WRITE(mpg,'(A,1PD10.3)')
1413 & ' ...Fixation for null pivots =',id%DKEEP(2)
1414 ELSE
1415 IF (prokg) WRITE(mpg,*) '...Infinite fixation '
1416 IF (id%KEEP(50).EQ.0) THEN
1417C Unsym
1418 ! the user let us choose a fixation. set in NEGATIVE
1419 ! to detect during facto when to set row to zero !
1420 id%DKEEP(2) = -max(1.0e10*anorminf,
1421 & sqrt(huge(anorminf))/1.0e8)
1422 ELSE
1423C Sym
1424 id%DKEEP(2) = zero
1425 ENDIF
1426 ENDIF
1427 ENDIF ! fin null pivot detection.
1428C Find id of root node if RR is on
1429 IF (keep(53).NE.0) THEN
1430 id_root =mumps_procnode(id%PROCNODE_STEPS(id%STEP(keep(20))),
1431 & id%KEEP(199))
1432 IF ( keep( 46 ) .NE. 1 ) THEN
1433 id_root = id_root + 1
1434 END IF
1435 ENDIF
1436C Second pass: set parameters for null pivot detection
1437C Allocate PIVNUL_LIST in case of null pivot detection
1438 lpn_list = 1
1439 IF ( associated( id%PIVNUL_LIST) ) DEALLOCATE(id%PIVNUL_LIST)
1440 IF(keep(110) .EQ. 1) THEN
1441 lpn_list = n
1442 ENDIF
1443 IF (keep(19).NE.0 .AND.
1444 & (id_root.EQ.id%MYID .OR. id%MYID.EQ.master)) THEN
1445 lpn_list = n
1446 ENDIF
1447 ALLOCATE( id%PIVNUL_LIST(lpn_list),stat = ierr )
1448 IF ( ierr .GT. 0 ) THEN
1449 id%INFO(1)=-13
1450 id%INFO(2)=lpn_list
1451 END IF
1452 id%PIVNUL_LIST(1:lpn_list) = 0
1453 keep(109) = 0
1454C end set parameter for null pivot detection
1455 CALL mumps_propinfo( icntl(1), id%INFO(1),
1456 & id%COMM, id%MYID )
1457 IF ( id%INFO(1).lt.0 ) GOTO 517
1458C --------------------------------------------------------------
1459C STATIC PIVOTING
1460C -- Static pivoting only when RR and Null pivot detection OFF
1461C --------------------------------------------------------------
1462 keep(97) = 0
1463 IF ((keep(19).EQ.0).AND.(keep(110).EQ.0)) THEN
1464 IF (id%MYID .EQ. master) cntl4 = id%CNTL(4)
1465 CALL mpi_bcast( cntl4, 1, mpi_real,
1466 & master, id%COMM, ierr )
1467C
1468 IF ( cntl4 .GE. zero ) THEN
1469 keep(97) = 1
1470 IF ( cntl4 .EQ. zero ) THEN
1471C -- set seuil to sqrt(eps)*||A||
1472 IF(anorminf .EQ. zero) THEN
1473 eff_size_schur = 0
1474 IF (keep(60).GT.0) eff_size_schur = keep(116)
1475 CALL cmumps_anorminf( id , anorminf, lscal,
1476 & eff_size_schur )
1477 ENDIF
1478 seuil = sqrt(eps) * anorminf
1479 ELSE
1480 seuil = cntl4
1481 ENDIF
1482 seuil_ldlt_niv2 = seuil
1483 ELSE
1484 seuil = zero
1485 ENDIF
1486 ENDIF
1487C set number of tiny pivots / 2x2 pivots in types 1 /
1488C 2x2 pivots in types 2, to zero. This is because the
1489C user can call the factorization step several times.
1490 keep(98) = 0
1491 keep(103) = 0
1492 keep(105) = 0
1493 maxs = 1_8
1494*
1495* Start allocations
1496* *****************
1497*
1498C
1499C The slaves can now perform the factorization
1500C
1501C
1502C Allocate id%S on all nodes
1503C or point to user provided data WK_USER when LWK_USER>0
1504C =======================
1505C
1506C Compute BLR_STRAT and a first estimation
1507C of MAXS, the size of id%S
1509 & maxs_base8, maxs_base_relaxed8,
1510 & blr_strat,
1511 & id%KEEP(1), id%KEEP8(1))
1512C
1513 maxs = maxs_base_relaxed8
1514 IF (wk_user_provided) THEN
1515C -- Set MAXS to size of WK_USER_
1516 maxs = id%KEEP8(24)
1517 ENDIF
1518 CALL mumps_propinfo( icntl(1), id%INFO(1),
1519 & id%COMM, id%MYID )
1520 IF (id%INFO(1) .LT. 0) THEN
1521 GOTO 517
1522 ENDIF
1523C
1524 id%KEEP8(75) = huge(id%KEEP8(75))
1525 id%KEEP8(76) = huge(id%KEEP8(76))
1526 IF (i_am_slave) THEN
1527C
1528 IF (id%KEEP8(4) .NE. 0_8) THEN
1529C
1530 IF ( .NOT. wk_user_provided ) THEN
1531C Set MAXS given BLR_STRAT, KEEP(201) and MAXS_BASE_RELAXED8
1533 & maxs,
1534 & blr_strat, id%KEEP(201), maxs_base_relaxed8,
1535 & id%KEEP(1), id%KEEP8(1), id%MYID, id%N, id%NELT,
1536 & id%NA(1), id%LNA, id%NSLAVES,
1537 & keep464copy, keep465copy,
1538 & id%INFO(1), id%INFO(2)
1539 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1540 & size(id%I8_L0_OMP,2)
1541 & )
1542C Given MAXS and max memory allowed KEEP8(4)
1543C compute in KEEP8(75) the number of real/complex
1544C available for dynamic allocations
1546 & maxs, id%MYID,
1547 & .false., ! UNDER_L0_OMP
1548 & n, id%NELT, id%NA(1), id%LNA, id%NSLAVES,
1549 & blr_strat, id%KEEP(201),
1550 & id%KEEP, id%KEEP8, id%INFO(1), id%INFO(2)
1551 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1552 & size(id%I8_L0_OMP,2)
1553 & )
1554 ELSE
1555C KEEP8(75) dow not include MAXS, since WK_USER is provided
1557 & 0_8, id%MYID,
1558 & .false., ! UNDER_L0_OMP
1559 & n, id%NELT, id%NA(1), id%LNA, id%NSLAVES,
1560 & blr_strat, id%KEEP(201),
1561 & id%KEEP, id%KEEP8, id%INFO(1), id%INFO(2)
1562 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1563 & size(id%I8_L0_OMP,2)
1564 & )
1565 ENDIF
1566 IF (keep(400) .GT.0) THEN
1567C ------------------------------
1568C compute KEEP8(75) under L0_OMP
1569C ------------------------------
1570C Save KEEP8(75) above L0_OMP to reset KEEP8(75)
1571C when starting FAC_PAR_M
1572 id%KEEP8(76) = id%KEEP8(75)
1574 & 0_8, ! MAXS=0_8
1575 & id%MYID,
1576 & .true., ! UNDER_L0_OMP
1577 & id%N, id%NELT, id%NA(1), id%LNA, id%NSLAVES,
1578 & blr_strat, id%KEEP(201),
1579 & id%KEEP, id%KEEP8, id%INFO(1), id%INFO(2)
1580 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1581 & size(id%I8_L0_OMP,2)
1582 & )
1583C KEEP8(75) holds the number of entries that
1584C can be allocated underL0.
1585C It will be used during CMUMPS_FAC_L0_OMP to adjust the
1586C the size of MUMPS_TPS_ARR(ITH)%LA
1587 ENDIF
1588 ENDIF ! MEM_ALLOWED
1589C
1590 ENDIF ! I_AM_SLAVE THEN
1591C
1592 IF (i_am_slave) THEN
1593 IF ( (keep(400).GT.0) .AND. (keep(406).EQ.2) ) THEN
1594C Compute KEEP8(77) the peak authorized used by
1595C CMUMPS_PERFORM_COPIES
1597 & id%MYID, id%N,
1598 & id%NELT, id%NA(1), id%LNA, id%NSLAVES,
1599 & blr_strat, id%KEEP(201),
1600 & id%KEEP(1), id%KEEP8(1), id%INFO(1), id%INFO(2)
1601 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
1602 & size(id%I8_L0_OMP,2)
1603 & )
1604 ENDIF
1605 ENDIF ! I_AM_SLAVE)
1606C
1607 CALL mumps_propinfo( icntl(1), id%INFO(1),
1608 & id%COMM, id%MYID )
1609 IF (id%INFO(1) .LT. 0) THEN
1610 GOTO 517
1611 ENDIF
1612 CALL mumps_seti8toi4(maxs, id%INFO(39))
1613 CALL cmumps_avgmax_stat8(prokg, mpg, maxs, id%NSLAVES,
1614 & print_maxavg,
1615 & id%COMM, " Effective size of S (based on INFO(39))= ")
1616C
1617 IF ( i_am_slave ) THEN
1618C ------------------
1619C Dynamic scheduling
1620C ------------------
1621 CALL cmumps_load_set_inicost( dble(id%COST_SUBTREES),
1622 & keep(64), id%DKEEP(15), keep(375), maxs )
1623 k28=keep(28)
1624 memory_md_arg = min(int(perlu,8) * ( maxs_base8 / 100_8 + 1_8 ),
1625C Restrict freedom from dynamic scheduler when
1626C MEM_ALLOWED=ICNTL(23) is small (case where KEEP8(4)-MAXS_BASE8
1627C is negative after call to CMUMPS_MAX_MEM)
1628 & max(0_8, maxs-maxs_base8))
1629 CALL cmumps_load_init( id, memory_md_arg, maxs )
1630C
1631C Out-Of-Core (OOC) issues. Case where we ran one factorization OOC
1632C and the second one is in-core: we try to free OOC
1633C related data from previous factorization.
1634C
1635 CALL cmumps_clean_ooc_data(id, ierr)
1636 IF (ierr < 0) THEN
1637 id%INFO(1) = -90
1638 id%INFO(2) = 0
1639 GOTO 112
1640 ENDIF
1641 IF (keep(201) .GT. 0) THEN
1642C -------------------
1643C OOC initializations
1644C -------------------
1645 IF (keep(201).EQ.1 !PANEL Version
1646 & .AND.keep(50).EQ.0 ! Unsymmetric
1647 & .AND.keep(251).NE.2 ! Store L to disk
1648 & ) THEN
1649 id%OOC_NB_FILE_TYPE=2 ! declared in MUMPS_OOC_COMMON
1650 ELSE
1651 id%OOC_NB_FILE_TYPE=1 ! declared in MUMPS_OOC_COMMON
1652 ENDIF
1653C ------------------------------
1654C Dimension IO buffer, KEEP(100)
1655C ------------------------------
1656 IF (keep(205) .GT. 0) THEN
1657 keep(100) = keep(205)
1658 ELSE
1659 IF (keep(201).EQ.1) THEN ! PANEL version
1660 i8tmp = int(id%OOC_NB_FILE_TYPE,8) *
1661 & 2_8 * int(keep(226),8)
1662 ELSE
1663 i8tmp = 2_8 * id%KEEP8(119)
1664 ENDIF
1665 i8tmp = i8tmp + int(max(keep(12),0),8) *
1666 & (i8tmp/100_8+1_8)
1667C we want to avoid too large IO buffers.
1668C 12M corresponds to 100Mbytes given to buffers.
1669 i8tmp = min(i8tmp, 12000000_8)
1670 keep(100)=int(i8tmp)
1671 ENDIF
1672 IF (keep(201).EQ.1) THEN
1673C Panel version. Force the use of a buffer.
1674 IF ( keep(99) < 3 ) THEN
1675 keep(99) = keep(99) + 3
1676 ENDIF
1677 ENDIF
1678C --------------------------
1679C Reset KEEP(100) to 0 if no
1680C buffer is used for OOC.
1681C --------------------------
1682 IF (keep(99) .LT.3) keep(100)=0
1683 IF((dble(keep(100))*dble(keep(35))/dble(2)).GT.
1684 & (dble(1999999999)))THEN
1685 IF (prokg) THEN
1686 WRITE(mpg,*)id%MYID,': Warning: DIM_BUF_IO might be
1687 & too big for Filesystem'
1688 ENDIF
1689 ENDIF
1690 ALLOCATE (id%OOC_INODE_SEQUENCE(keep(28),
1691 & id%OOC_NB_FILE_TYPE),
1692 & stat=ierr)
1693 IF ( ierr .GT. 0 ) THEN
1694 id%INFO(1) = -13
1695 id%INFO(2) = id%OOC_NB_FILE_TYPE*keep(28)
1696 NULLIFY(id%OOC_INODE_SEQUENCE)
1697 GOTO 112
1698 ENDIF
1699 ALLOCATE (id%OOC_TOTAL_NB_NODES(id%OOC_NB_FILE_TYPE),
1700 & stat=ierr)
1701 IF ( ierr .GT. 0 ) THEN
1702 id%INFO(1) = -13
1703 id%INFO(2) = id%OOC_NB_FILE_TYPE
1704 NULLIFY(id%OOC_TOTAL_NB_NODES)
1705 GOTO 112
1706 ENDIF
1707 ALLOCATE (id%OOC_SIZE_OF_BLOCK(keep(28),
1708 & id%OOC_NB_FILE_TYPE),
1709 & stat=ierr)
1710 IF ( ierr .GT. 0 ) THEN
1711 id%INFO(1) = -13
1712 id%INFO(2) = id%OOC_NB_FILE_TYPE*keep(28)
1713 NULLIFY(id%OOC_SIZE_OF_BLOCK)
1714 GOTO 112
1715 ENDIF
1716 ALLOCATE (id%OOC_VADDR(keep(28),id%OOC_NB_FILE_TYPE),
1717 & stat=ierr)
1718 IF ( ierr .GT. 0 ) THEN
1719 id%INFO(1) = -13
1720 id%INFO(2) = id%OOC_NB_FILE_TYPE*keep(28)
1721 NULLIFY(id%OOC_VADDR)
1722 GOTO 112
1723 ENDIF
1724 ENDIF
1725 ENDIF
1726 112 CALL mumps_propinfo( icntl(1), id%INFO(1),
1727 & id%COMM, id%MYID )
1728 IF (id%INFO(1) < 0) THEN
1729C LOAD_END must be done but not OOC_END_FACTO
1730 GOTO 513
1731 ENDIF
1732 IF (i_am_slave) THEN
1733 IF (keep(201) .GT. 0) THEN
1734 IF ((keep(201).EQ.1).OR.(keep(201).EQ.2)) THEN
1735 CALL cmumps_ooc_init_facto(id,maxs)
1736 ELSE
1737 WRITE(*,*) "Internal error in CMUMPS_FAC_DRIVER"
1738 CALL mumps_abort()
1739 ENDIF
1740 IF(id%INFO(1).LT.0)THEN
1741 GOTO 111
1742 ENDIF
1743 ENDIF
1744C First increment corresponds to the number of
1745C floating-point operations for subtrees allocated
1746C to the local processor.
1747 CALL cmumps_load_update(0,.false.,dble(id%COST_SUBTREES),
1748 & id%KEEP(1),id%KEEP8(1))
1749 IF (id%INFO(1).LT.0) GOTO 111
1750 END IF
1751C -----------------------
1752C Manage main workarray S
1753C -----------------------
1754 earlyt3rootins = keep(200) .EQ.0
1755 & .OR. ( keep(200) .LT. 0 .AND. keep(400) .EQ. 0 )
1756#if defined (LARGEMATRICES)
1757 IF ( id%MYID .ne. master ) THEN
1758#endif
1759 IF (.NOT.wk_user_provided) THEN
1760 IF ( earlyt3rootins ) THEN
1761C Standard allocation strategy
1762 ALLOCATE (id%S(maxs),stat=ierr)
1763 id%KEEP8(23) = maxs
1764 IF ( ierr .GT. 0 ) THEN
1765 id%INFO(1) = -13
1766 CALL mumps_set_ierror(maxs, id%INFO(2))
1767C On some platforms (IBM for example), an
1768C allocation failure returns a non-null pointer.
1769C Therefore we nullify S
1770 NULLIFY(id%S)
1771 id%KEEP8(23)=0_8
1772 ENDIF
1773 ENDIF
1774 ELSE
1775 id%S => id%WK_USER(1:id%KEEP8(24))
1776 id%KEEP8(23) = 0_8
1777 ENDIF
1778#if defined (LARGEMATRICES)
1779 END IF
1780#endif
1781C
1782 111 CALL mumps_propinfo( icntl(1), id%INFO(1),
1783 & id%COMM, id%MYID )
1784 IF ( id%INFO(1).LT.0 ) GOTO 514
1785C --------------------------
1786C Initialization of modules
1787C related to data management
1788C --------------------------
1789 nb_active_fronts_estim = 3
1790 nb_threads = 1
1791!$ NB_THREADS = OMP_GET_MAX_THREADS()
1792C
1793 nb_active_fronts_estim = 3*nb_threads
1794 IF (i_am_slave) THEN
1795C
1796 CALL mumps_fdm_init('A',nb_active_fronts_estim, id%INFO)
1797C
1798 IF ( (keep(486).EQ.2)
1799 & .OR. ((keep(489).NE.0).AND.(keep(400).GT.1))
1800 & ) THEN
1801C In case of LRSOLVE or CompressCB,
1802C initialize nb of handlers to nb of BLR
1803C nodes estimated at analysis
1804 nb_fronts_f_estim = keep(470)
1805 ELSE
1806 IF (keep(489).NE.0) THEN
1807C Compress CB and no L0 OMP (or 1 thread under L0):
1808C NB_ACTIVE_FRONTS_ESTIM is too small,
1809C to limit nb of reallocations make it twice larger
1810 nb_fronts_f_estim = 2*nb_active_fronts_estim
1811 ELSE
1812 nb_fronts_f_estim = nb_active_fronts_estim
1813 ENDIF
1814 ENDIF
1815 CALL mumps_fdm_init('F',nb_fronts_f_estim, id%INFO )
1816 IF (id%INFO(1) .LT. 0 ) GOTO 114
1817#if ! defined(NO_FDM_DESCBAND)
1818C Storage of DESCBAND information
1819 CALL mumps_fdbd_init( nb_active_fronts_estim, id%INFO )
1820#endif
1821#if ! defined(NO_FDM_MAPROW)
1822C Storage of MAPROW and ROOT2SON information
1823 CALL mumps_fmrd_init( nb_active_fronts_estim, id%INFO )
1824#endif
1825 CALL cmumps_blr_init_module( nb_fronts_f_estim, id%INFO )
1826 114 CONTINUE
1827 ENDIF
1828 CALL mumps_propinfo( icntl(1), id%INFO(1),
1829 & id%COMM, id%MYID )
1830C GOTO 500: one of the above module initializations failed
1831 IF ( id%INFO(1).LT.0 ) GOTO 500
1832C
1833C
1834C Allocate space for matrix in arrowhead
1835C ======================================
1836C
1837C CASE 1 : Matrix is assembled
1838C CASE 2 : Matrix is elemental
1839C
1840 IF ( keep(55) .eq. 0 ) THEN
1841C ------------------------------------
1842C Space has been allocated already for
1843C the integer part during analysis
1844C Only slaves need the arrowheads.
1845C ------------------------------------
1846 IF (associated( id%DBLARR)) THEN
1847 DEALLOCATE(id%DBLARR)
1848 NULLIFY(id%DBLARR)
1849 ENDIF
1850 IF ( i_am_slave .and. id%KEEP8(26) .ne. 0_8 ) THEN
1851 ALLOCATE( id%DBLARR( id%KEEP8(26) ), stat = ierr )
1852 ELSE
1853 ALLOCATE( id%DBLARR( 1 ), stat =ierr )
1854 END IF
1855 IF ( ierr .NE. 0 ) THEN
1856 IF (lpok) THEN
1857 WRITE(lp,*) id%MYID,
1858 & ': Allocation error for DBLARR(',id%KEEP8(26),')'
1859 ENDIF
1860 id%INFO(1)=-13
1861 CALL mumps_set_ierror(id%KEEP8(26), id%INFO(2))
1862 NULLIFY(id%DBLARR)
1863 GOTO 100
1864 END IF
1865 ELSE
1866C ----------------------------------------
1867C Allocate variable lists. Systematically.
1868C ----------------------------------------
1869 IF ( associated( id%INTARR ) ) THEN
1870 DEALLOCATE( id%INTARR )
1871 NULLIFY( id%INTARR )
1872 END IF
1873 IF ( i_am_slave .and. id%KEEP8(27) .ne. 0_8 ) THEN
1874 ALLOCATE( id%INTARR( id%KEEP8(27) ), stat = allocok )
1875 IF ( allocok .GT. 0 ) THEN
1876 id%INFO(1) = -13
1877 CALL mumps_set_ierror(id%KEEP8(27), id%INFO(2))
1878 NULLIFY(id%INTARR)
1879 GOTO 100
1880 END IF
1881 ELSE
1882 ALLOCATE( id%INTARR(1),stat=allocok )
1883 IF ( allocok .GT. 0 ) THEN
1884 id%INFO(1) = -13
1885 id%INFO(2) = 1
1886 NULLIFY(id%INTARR)
1887 GOTO 100
1888 END IF
1889 END IF
1890C -----------------------------
1891C Allocate real values.
1892C On master, if hybrid host and
1893C no scaling, avoid the copy.
1894C -----------------------------
1895 IF (associated( id%DBLARR)) THEN
1896 DEALLOCATE(id%DBLARR)
1897 NULLIFY(id%DBLARR)
1898 ENDIF
1899 IF ( i_am_slave ) THEN
1900 IF ( id%MYID_NODES .eq. master
1901 & .AND. keep(46) .eq. 1
1902 & .AND. keep(52) .eq. 0 ) THEN
1903C --------------------------
1904C Simple pointer association
1905C --------------------------
1906 id%DBLARR => id%A_ELT
1907 ELSE
1908C ----------
1909C Allocation
1910C ----------
1911 IF ( id%KEEP8(26) .ne. 0_8 ) THEN
1912 ALLOCATE( id%DBLARR( id%KEEP8(26) ), stat = allocok )
1913 IF ( allocok .GT. 0 ) THEN
1914 id%INFO(1) = -13
1915 CALL mumps_set_ierror(id%KEEP8(26), id%INFO(2))
1916 NULLIFY(id%DBLARR)
1917 GOTO 100
1918 END IF
1919 ELSE
1920 ALLOCATE( id%DBLARR(1), stat = allocok )
1921 IF ( allocok .GT. 0 ) THEN
1922 id%INFO(1) = -13
1923 id%INFO(2) = 1
1924 NULLIFY(id%DBLARR)
1925 GOTO 100
1926 END IF
1927 END IF
1928 END IF
1929 ELSE
1930 ALLOCATE( id%DBLARR(1), stat = allocok )
1931 IF ( allocok .GT. 0 ) THEN
1932 id%INFO(1) = -13
1933 id%INFO(2) = 1
1934 NULLIFY(id%DBLARR)
1935 GOTO 100
1936 END IF
1937 END IF
1938 END IF
1939C -----------------
1940C Also prepare some
1941C data for the root
1942C -----------------
1943 IF ( keep(38).NE.0 .AND. i_am_slave ) THEN
1944 CALL cmumps_init_root_fac( id%N,
1945 & id%root, id%FILS(1), keep(38), id%KEEP(1), id%INFO(1) )
1946 END IF
1947C
1948C
1949 100 CONTINUE
1950C ----------------
1951C Check for errors
1952C ----------------
1953 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
1954 & id%COMM, id%MYID )
1955 IF ( id%INFO(1).LT.0 ) GOTO 500
1956C
1957C -----------------------------------
1958C
1959C DISTRIBUTION OF THE ORIGINAL MATRIX
1960C
1961C -----------------------------------
1962C
1963C TIMINGS: computed (and printed) on the host
1964C Next line: global time for distrib(arrowheads,elts)
1965C on the host. Synchronization has been performed.
1966 IF (id%MYID.EQ.master) CALL mumps_secdeb(time)
1967C -------------------------------------------
1968C S_PTR_ARG / MAXS_ARG will be used for id%S
1969C argument to arrowhead/element distribution
1970C routines: if id%S is not allocated, we pass
1971C S_DUMMY_ARG instead, which is not accessed.
1972C -------------------------------------------
1973 IF (earlyt3rootins) THEN
1974 s_ptr_arg => id%S
1975 maxs_arg = maxs
1976 ELSE
1977 s_ptr_arg => s_dummy_arg
1978 maxs_arg = 1
1979 ENDIF
1980C
1981 IF ( keep( 55 ) .eq. 0 ) THEN
1982C ----------------------------
1983C Original matrix is assembled
1984C Arrowhead format to be used.
1985C ----------------------------
1986C KEEP8(26) and KEEP8(27) hold the number of entries for real/integer
1987C for the matrix in arrowhead format. They have been set by the
1988C analysis phase (CMUMPS_ANA_F and CMUMPS_ANA_G)
1989C
1990C ------------------------------------------------------------------
1991C Blocking is used for sending arrowhead records (I,J,VAL)
1992C buffer(1) is used to store number of bytes already packed
1993C buffer(2) number of records already packed
1994C KEEP(39) : Number of records (blocking factor)
1995C ------------------------------------------------------------------
1996C
1997C ---------------------------------------------
1998C In case of parallel root compute minimum
1999C size of workspace to receive arrowheads
2000C of root node. Will be used to check that
2001C MAXS is large enough for arrowheads (case
2002C of EARLYT3ROOTINS (KEEP(200)=0); if .NOT.
2003C EARLYT3ROOTINS (KEEP(200)=1), root will
2004C be assembled into id%S later and size of
2005C id%S will be checked later)
2006C ---------------------------------------------
2007 IF (earlyt3rootins .AND. keep(38).NE.0 .AND.
2008 & keep(60) .EQ.0 .AND. i_am_slave) THEN
2009 lwk = int(numroc( id%root%ROOT_SIZE, id%root%MBLOCK,
2010 & id%root%MYROW, 0, id%root%NPROW ),8)
2011 lwk = max( 1_8, lwk )
2012 lwk = lwk*
2013 & int(numroc( id%root%ROOT_SIZE, id%root%NBLOCK,
2014 & id%root%MYCOL, 0, id%root%NPCOL ),8)
2015 lwk = max( 1_8, lwk )
2016 ELSE
2017 lwk = 1_8
2018 ENDIF
2019C MAXS must be at least 1, and in case of
2020C parallel root, large enough to receive
2021C arrowheads of root.
2022 IF (maxs .LT. int(lwk,8)) THEN
2023 id%INFO(1) = -9
2024 CALL mumps_set_ierror(lwk, id%INFO(2))
2025 ENDIF
2026 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
2027 & id%COMM, id%MYID )
2028 IF ( id%INFO(1).LT.0 ) GOTO 500
2029C
2030 IF ( keep(54) .eq. 0 ) THEN
2031C ================================================
2032C FIRST CASE : MATRIX IS NOT INITIALLY DISTRIBUTED
2033C ================================================
2034C A small integer workspace is needed to
2035C send the arrowheads.
2036 IF ( id%MYID .eq. master ) THEN
2037 ALLOCATE(iwk(id%N), stat=allocok)
2038 IF ( allocok .NE. 0 ) THEN
2039 id%INFO(1)=-13
2040 id%INFO(2)=id%N
2041 END IF
2042#if defined(LARGEMATRICES)
2043 ALLOCATE (wk(lwk),stat=ierr)
2044 IF ( ierr .GT. 0 ) THEN
2045 id%INFO(1) = -13
2046 CALL mumps_set_ierror(lwk, id%INFO(2))
2047 write(6,*) ' PB1 ALLOC LARGEMAT'
2048 ENDIF
2049#endif
2050 ENDIF
2051 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
2052 & id%COMM, id%MYID )
2053 IF ( id%INFO(1).LT.0 ) GOTO 500
2054 IF ( id%MYID .eq. master ) THEN
2055C
2056C --------------------------------
2057C MASTER sends arowheads using the
2058C global communicator with ranks
2059C also in global communicator
2060C IWK is used as temporary
2061C workspace of size N.
2062C --------------------------------
2063 IF ( .not. associated( id%INTARR ) ) THEN
2064 ALLOCATE( id%INTARR( 1 ),stat=ierr)
2065 IF ( ierr .GT. 0 ) THEN
2066 id%INFO(1) = -13
2067 id%INFO(2) = 1
2068 NULLIFY(id%INTARR)
2069 write(6,*) ' PB2 ALLOC INTARR'
2070 CALL mumps_abort()
2071 ENDIF
2072 ENDIF
2073 nbrecords = keep(39)
2074 IF (id%KEEP8(28) .LT. int(nbrecords,8)) THEN
2075 nbrecords = int(id%KEEP8(28))
2076 ENDIF
2077#if defined(LARGEMATRICES)
2078 CALL cmumps_facto_send_arrowheads(id%N, id%KEEP8(28), id%A(1),
2079 & id%IRN(1), id%JCN(1), id%SYM_PERM(1),
2080 & lscal, id%COLSCA(1), id%ROWSCA(1),
2081 & id%MYID, id%NSLAVES, id%PROCNODE_STEPS(1),
2082 & nbrecords,
2083 & lp, id%COMM, id%root, keep,id%KEEP8,
2084 & id%FILS(1), iwk(1), ! workspace of size N
2085 &
2086 & id%INTARR(1), id%KEEP8(27), id%DBLARR(1), id%KEEP8(26),
2087 & id%PTRAR(1), id%PTRAR(id%N+1),
2088 & id%FRERE_STEPS(1), id%STEP(1), wk(1), lwk,
2089 & id%ISTEP_TO_INIV2(1), id%I_AM_CAND(1),
2090 & id%CANDIDATES(1,1))
2091C write(6,*) '!!! A,IRN,JCN are freed during factorization '
2092 DEALLOCATE (id%A)
2093 NULLIFY(id%A)
2094 DEALLOCATE (id%IRN)
2095 NULLIFY (id%IRN)
2096 DEALLOCATE (id%JCN)
2097 NULLIFY (id%JCN)
2098 IF (.NOT.wk_user_provided) THEN
2099 IF (earlyt3rootins) THEN
2100 ALLOCATE (id%S(maxs),stat=ierr)
2101 id%KEEP8(23) = maxs
2102 IF ( ierr .GT. 0 ) THEN
2103 id%INFO(1) = -13
2104 id%INFO(2) = maxs
2105 NULLIFY(id%S)
2106 id%KEEP8(23)=0_8
2107 write(6,*) ' PB2 ALLOC LARGEMAT',maxs
2108 CALL mumps_abort()
2109 ENDIF
2110 ENDIF
2111 ENDIF
2112 ELSE
2113 id%S => id%WK_USER(1:id%KEEP8(24))
2114 ENDIF
2115 IF (earlyt3rootins) THEN
2116 id%S(maxs-lwk+1_8:maxs) = wk(1_8:lwk)
2117 ENDIF
2118 DEALLOCATE (wk)
2119#else
2120 CALL cmumps_facto_send_arrowheads(id%N, id%KEEP8(28), id%A(1),
2121 & id%IRN(1), id%JCN(1), id%SYM_PERM(1),
2122 & lscal, id%COLSCA(1), id%ROWSCA(1),
2123 & id%MYID, id%NSLAVES, id%PROCNODE_STEPS(1),
2124 & nbrecords,
2125 & lp, id%COMM, id%root, keep(1),id%KEEP8(1),
2126 & id%FILS(1), iwk(1),
2127 &
2128 & id%INTARR(1), id%KEEP8(27), id%DBLARR(1), id%KEEP8(26),
2129 & id%PTRAR(1), id%PTRAR(id%N+1),
2130 & id%FRERE_STEPS(1), id%STEP(1), s_ptr_arg(1), maxs_arg,
2131 & id%ISTEP_TO_INIV2(1), id%I_AM_CAND(1),
2132 & id%CANDIDATES(1,1) )
2133#endif
2134 DEALLOCATE(iwk)
2135 ELSE
2136 nbrecords = keep(39)
2137 IF (id%KEEP8(28) .LT. int(nbrecords,8)) THEN
2138 nbrecords = int(id%KEEP8(28))
2139 ENDIF
2141 & id%DBLARR(1), id%KEEP8(26),
2142 & id%INTARR(1), id%KEEP8(27),
2143 & id%PTRAR( 1 ),
2144 & id%PTRAR(id%N+1),
2145 & keep( 1 ), id%KEEP8(1), id%MYID, id%COMM,
2146 & nbrecords,
2147 &
2148 & s_ptr_arg(1), maxs_arg,
2149 & id%root,
2150 & id%PROCNODE_STEPS(1), id%NSLAVES,
2151 & id%SYM_PERM(1), id%FRERE_STEPS(1), id%STEP(1),
2152 & id%INFO(1), id%INFO(2) )
2153 ENDIF
2154 ELSE
2155C
2156C =============================================
2157C SECOND CASE : MATRIX IS INITIALLY DISTRIBUTED
2158C =============================================
2159C Timing on master.
2160 IF (id%MYID.EQ.master) THEN
2161 CALL mumps_secdeb(time)
2162 END IF
2163 IF ( i_am_slave ) THEN
2164C ---------------------------------------------------
2165C In order to have possibly IRN_loc/JCN_loc/A_loc
2166C of size 0, avoid to pass them inside REDISTRIBUTION
2167C and pass id instead
2168C NZ_locMAX8 gives as a maximum buffer size (send/recv) used
2169C an upper bound to limit buffers on small matrices
2170C ---------------------------------------------------
2171 CALL mpi_allreduce(id%KEEP8(29), nz_locmax8, 1, mpi_integer8,
2172 & mpi_max, id%COMM_NODES, ierr)
2173 nbrecords = keep(39)
2174 IF (nz_locmax8 .LT. int(nbrecords,8)) THEN
2175 nbrecords = int(nz_locmax8)
2176 ENDIF
2177 CALL cmumps_redistribution( id%N,
2178 & id%KEEP8(29),
2179 & id,
2180 & id%DBLARR(1), id%KEEP8(26), id%INTARR(1),
2181 & id%KEEP8(27), id%PTRAR(1), id%PTRAR(id%N+1),
2182 & keep(1), id%KEEP8(1), id%MYID_NODES,
2183 & id%COMM_NODES, nbrecords,
2184 & s_ptr_arg(1), maxs_arg, id%root, id%PROCNODE_STEPS(1),
2185 & id%NSLAVES, id%SYM_PERM(1), id%STEP(1),
2186 & id%ICNTL(1), id%INFO(1), nsend8, nlocal8,
2187 & id%ISTEP_TO_INIV2(1),
2188 & id%CANDIDATES(1,1) )
2189 IF ( ( keep(52).EQ.7 ).OR. (keep(52).EQ.8) ) THEN
2190C -------------------------------------------------
2191C In that case, scaling arrays have been allocated
2192C on all processors. They were useful for matrix
2193C distribution. But we now really only need them
2194C on the host. In case of distributed solution, we
2195C will have to broadcast either ROWSCA or COLSCA
2196C (depending on MTYPE) but this is done later.
2197C
2198C In other words, on exit from the factorization,
2199C we want to have scaling arrays available only
2200C on the host.
2201C -------------------------------------------------
2202 IF ( id%MYID > 0 ) THEN
2203 IF (associated(id%ROWSCA)) THEN
2204 DEALLOCATE(id%ROWSCA)
2205 NULLIFY(id%ROWSCA)
2206 ENDIF
2207 IF (associated(id%COLSCA)) THEN
2208 DEALLOCATE(id%COLSCA)
2209 NULLIFY(id%COLSCA)
2210 ENDIF
2211 ENDIF
2212 ENDIF
2213#if defined(LARGEMATRICES)
2214C deallocate id%IRN_loc, id%JCN(loc) to free extra space
2215C Note that in this case IRN_loc cannot be used
2216C anymore during the solve phase for IR and Error analysis.
2217 IF (associated(id%IRN_loc)) THEN
2218 DEALLOCATE(id%IRN_loc)
2219 NULLIFY(id%IRN_loc)
2220 ENDIF
2221 IF (associated(id%JCN_loc)) THEN
2222 DEALLOCATE(id%JCN_loc)
2223 NULLIFY(id%JCN_loc)
2224 ENDIF
2225 IF (associated(id%A_loc)) THEN
2226 DEALLOCATE(id%A_loc)
2227 NULLIFY(id%A_loc)
2228 ENDIF
2229 write(6,*) ' Warning :',
2230 & ' id%A_loc, IRN_loc, JCN_loc deallocated !!! '
2231#endif
2232 IF (prok) THEN
2233 WRITE(mp,120) nlocal8, nsend8
2234 END IF
2235 END IF
2236 IF ( keep(46) .eq. 0 .AND. id%MYID.eq.master ) THEN
2237C ------------------------------
2238C The host is not working -> had
2239C no data from initial matrix
2240C ------------------------------
2241 nsend8 = 0_8
2242 nlocal8 = 0_8
2243 END IF
2244C --------------------------
2245C Put into some info/infog ?
2246C --------------------------
2247 CALL mpi_reduce( nsend8, nsend_tot8, 1, mpi_integer8,
2248 & mpi_sum, master, id%COMM, ierr )
2249 CALL mpi_reduce( nlocal8, nlocal_tot8, 1, mpi_integer8,
2250 & mpi_sum, master, id%COMM, ierr )
2251 IF ( prokg ) THEN
2252 WRITE(mpg,125) nlocal_tot8, nsend_tot8
2253 END IF
2254C
2255C -------------------------
2256C Check for possible errors
2257C -------------------------
2258 CALL mumps_propinfo( icntl(1), id%INFO(1),
2259 & id%COMM, id%MYID )
2260 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2261C
2262 ENDIF
2263 ELSE
2264C -------------------
2265C Matrix is elemental,
2266C provided on the
2267C master only
2268C -------------------
2269 IF ( id%MYID.eq.master)
2270 & CALL cmumps_maxelt_size( id%ELTPTR(1),
2271 & id%NELT,
2272 & maxelt_size )
2273C
2274C Perform the distribution of the elements.
2275C A this point,
2276C PTRAIW/PTRARW have been computed.
2277C INTARR/DBLARR have been allocated
2278C ELTPROC gives the mapping of elements
2279C
2280 CALL cmumps_elt_distrib( id%N, id%NELT, id%KEEP8(30),
2281 & id%COMM, id%MYID,
2282 & id%NSLAVES, id%PTRAR(1),
2283 & id%PTRAR(id%NELT+2),
2284 & id%INTARR(1), id%DBLARR(1), id%KEEP8(27), id%KEEP8(26),
2285 & id%KEEP(1), id%KEEP8(1), maxelt_size,
2286 & id%FRTPTR(1), id%FRTELT(1),
2287 & s_ptr_arg(1), maxs_arg, id%FILS(1),
2288 & id, id%root )
2289C ----------------
2290C Broadcast errors
2291C ----------------
2292 CALL mumps_propinfo( icntl(1), id%INFO(1),
2293 & id%COMM, id%MYID )
2294 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2295 END IF ! Element entry
2296C ------------------------
2297C Time the redistribution:
2298C ------------------------
2299 IF ( id%MYID.EQ.master) THEN
2300 CALL mumps_secfin(time)
2301 id%DKEEP(93) = real(time)
2302 IF (prokg) WRITE(mpg,160) id%DKEEP(93)
2303 END IF
2304 IF ( keep(400) .GT. 0 ) THEN
2305C L0-OMP was active at analysis and
2306C thus will be active at factorization
2307C We check the number of threads.
2308 nomp=1
2309!$ NOMP = omp_get_max_threads()
2310 IF ( nomp .NE. keep(400) ) THEN
2311 id%INFO(1)=-58
2312 id%INFO(2)=keep(400)
2313 IF (lpok) WRITE(lp,'(A,A,I5,A,I5)')
2314 &" FAILURE DETECTED IN FACTORIZATION: #threads for KEEP(401)",
2315 &" changed from",keep(400)," at analysis to", nomp
2316 ENDIF
2317C error check
2318 CALL mumps_propinfo( icntl(1), id%INFO(1),
2319 & id%COMM, id%MYID )
2320 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2321 ENDIF
2322C
2323C TIMINGS:
2324C Next line: elapsed time for factorization
2325 IF (id%MYID.EQ.master) CALL mumps_secdeb(time)
2326C
2327C Allocate buffers on the workers
2328C ===============================
2329C
2330 IF ( i_am_slave ) THEN
2331 CALL cmumps_buf_ini_myid(id%MYID_NODES)
2332C
2333C Some buffers are required to pack/unpack data and for
2334C receiving MPI messages.
2335C For packing/unpacking : the buffer must be large
2336C enough to send several messages while receives might not
2337C be posted yet.
2338C It is assumed that the size of an integer is held in KEEP(34)
2339C while the size of a complex is held in KEEP(35).
2340C BUFR and LBUFR are declared of type integer, since byte is not
2341C a standard datatype.
2342C We now use KEEP(43) or KEEP(379) and KEEP(44) or KEEP(380)
2343C as estimated at analysis to allocate appropriate buffer sizes
2344C
2345C Reception buffer
2346C ----------------
2347 IF (keep(486).NE.0) THEN
2348 cmumps_lbufr_bytes8 = int(keep( 380 ),8) * int(keep( 35 ),8)
2349 ELSE
2350 cmumps_lbufr_bytes8 = int(keep( 44 ),8) * int(keep( 35 ),8)
2351 ENDIF
2352C ---------------------------------------
2353C Ensure a reasonable minimal buffer size
2354C ---------------------------------------
2355 cmumps_lbufr_bytes8 = max( cmumps_lbufr_bytes8,
2356 & 100000_8 )
2357 IF ((keep(50).NE.0).AND.(keep(489).GT.0).AND.
2358 & (id%NSLAVES.GE.2)) THEN
2359C ----------------------------------------------------------
2360C Ensure large enough receive buffer in case of BLR with
2361C CompressCB for symmetric matrices.
2362C -----------------------------------------------------------
2363 ratiok465 = real(keep465copy)/real(1000)
2364 cmumps_lbufr_bytes8 = max(cmumps_lbufr_bytes8,
2365 & int(
2366 & ratiok465*
2367 & real(
2368 & int(keep(2)+1,8)*int(keep(142),8)*int(keep(35),8)
2369 & )
2370 & ,8)
2371 & )
2372 ENDIF
2373C
2374C If there is pivoting, size of the message might still increase.
2375C We use a relaxation (so called PERLU) to increase the estimate.
2376C
2377C Note: PERLU is a global estimate for pivoting.
2378C It may happen that one large contribution block size is increased
2379C by more than that.
2380C This is why we use an extra factor 2 relaxation coefficient for
2381C the relaxation of
2382C the reception buffer in the case where pivoting is allowed.
2383C A more dynamic strategy could be applied: if message to
2384C be received is larger than expected, reallocate a larger
2385C buffer. (But this won't work with IRECV.)
2386C Finally, one may want (as we are currently doing it for
2387C most messages)
2388C to cut large messages into a series of smaller ones.
2389C
2390 IF (keep(48).EQ.5) THEN
2391 min_perlu = 2
2392 ELSE
2393 min_perlu = 0
2394 ENDIF
2395C
2396 cmumps_lbufr_bytes8 = cmumps_lbufr_bytes8
2397 & + int( 2.0e0 * real(max(perlu,min_perlu))*
2398 & real(cmumps_lbufr_bytes8)/100e0, 8)
2399 cmumps_lbufr_bytes8 = min(cmumps_lbufr_bytes8,
2400 & int(huge(i4)-100,8))
2401 cmumps_lbufr_bytes = int( cmumps_lbufr_bytes8 )
2402 IF (keep(48)==5) THEN
2403C Since the buffer is going to be allocated, use
2404C it as the constraint for memory/granularity
2405C in hybrid scheduler
2406C
2407 id%KEEP8(21) = id%KEEP8(22) +
2408 & int( real(max(perlu,min_perlu))*
2409 & real(id%KEEP8(22))/100e0,8)
2410 ENDIF
2411C
2412C Now estimate the size for the buffer for asynchronous
2413C sends of contribution blocks (so called CB). We want to be able to send at
2414C least KEEP(213)/100 (two in general) messages at the
2415C same time.
2416C
2417C Send buffer
2418C -----------
2419 IF (keep(486).NE.0) THEN
2420 cmumps_lbuf8 = int( real(keep(213)) / 100.0e0 *
2421 & real(keep(379)) * real(keep(35)), 8 )
2422 ELSE
2423 cmumps_lbuf8 = int( real(keep(213)) / 100.0e0 *
2424 & real(keep(43)) * real(keep(35)), 8 )
2425 ENDIF
2426 cmumps_lbuf8 = max( cmumps_lbuf8, 100000_8 )
2427 cmumps_lbuf8 = cmumps_lbuf8
2428 & + int( 2.0e0 * real(max(perlu,min_perlu))*
2429 & real(cmumps_lbuf8)/100e0, 8)
2430C Make CMUMPS_LBUF8 small enough to be stored in a standard integer
2431 cmumps_lbuf8 = min(cmumps_lbuf8, int(huge(i4)-100,8))
2432C
2433C No reason to have send buffer smaller than receive buffer.
2434C This should never occur with the formulas above but just
2435C in case:
2436 cmumps_lbuf8 = max(cmumps_lbuf8, cmumps_lbufr_bytes8+3*keep(34))
2437 cmumps_lbuf = int(cmumps_lbuf8)
2438 IF(id%KEEP(48).EQ.4)THEN
2439 cmumps_lbufr_bytes=cmumps_lbufr_bytes*5
2440 cmumps_lbuf=cmumps_lbuf*5
2441 ENDIF
2442C
2443C Estimate size of buffer for small messages
2444C Each node can send ( NSLAVES - 1 ) messages to (NSLAVES-1) nodes
2445C
2446C KEEP(56) is the number of nodes of level II.
2447C Messages will be sent for the symmetric case
2448C for synchronisation issues.
2449C
2450C We take an upperbound
2451C
2452 cmumps_lbuf_int = ( keep(56) + id%NSLAVES * id%NSLAVES ) * 5
2453 & * keep(34)
2454 IF ( keep( 38 ) .NE. 0 ) THEN
2455C
2456C
2457 kkkk = mumps_procnode( id%PROCNODE_STEPS(id%STEP(keep(38))),
2458 & id%KEEP(199) )
2459 IF ( kkkk .EQ. id%MYID_NODES ) THEN
2460 cmumps_lbuf_int = cmumps_lbuf_int + 4 * keep(34) *
2461 & ( id%NSLAVES + id%NE_STEPS(id%STEP(keep(38)))
2462 & + min(keep(56), id%NE_STEPS(id%STEP(keep(38)))) * id%NSLAVES
2463 & )
2464 END IF
2465 END IF
2466C At this point, CMUMPS_LBUFR_BYTES, CMUMPS_LBUF
2467C and CMUMPS_LBUF_INT have been computed (all
2468C are in numbers of bytes).
2469 IF ( prok ) THEN
2470 WRITE( mp, 9999 ) cmumps_lbufr_bytes,
2471 & cmumps_lbuf, cmumps_lbuf_int
2472 END IF
2473 9999 FORMAT( /,' Allocated buffers',/,' ------------------',/,
2474 & ' Size of reception buffer in bytes ...... = ', i10,
2475 & /,
2476 & ' Size of async. emission buffer (bytes).. = ', i10,/,
2477 & ' Small emission buffer (bytes) .......... = ', i10)
2478C --------------------------
2479C Allocate small send buffer
2480C required for CMUMPS_FAC_B
2481C --------------------------
2482 CALL cmumps_buf_alloc_small_buf( cmumps_lbuf_int, ierr )
2483 IF ( ierr .NE. 0 ) THEN
2484 id%INFO(1)= -13
2485C convert to size in integer id%INFO(2)= CMUMPS_LBUF_INT
2486 id%INFO(2)= (cmumps_lbuf_int+keep(34)-1)/keep(34)
2487 IF (lpok) THEN
2488 WRITE(lp,*) id%MYID,
2489 & ':Allocation error in CMUMPS_BUF_ALLOC_SMALL_BUF'
2490 & ,id%INFO(2)
2491 ENDIF
2492 GO TO 110
2493 END IF
2494C
2495C --------------------------------------
2496C Allocate reception buffer on all procs
2497C This is done now.
2498C --------------------------------------
2499 cmumps_lbufr = (cmumps_lbufr_bytes+keep(34)-1)/keep(34)
2500 ALLOCATE( bufr( cmumps_lbufr ),stat=ierr )
2501 IF ( ierr .NE. 0 ) THEN
2502 id%INFO(1) = -13
2503 id%INFO(2) = cmumps_lbufr
2504 IF (lpok) THEN
2505 WRITE(lp,*)
2506 & ': Allocation error for BUFR(', cmumps_lbufr,
2507 & ') on MPI process',id%MYID
2508 ENDIF
2509 GO TO 110
2510 END IF
2511C -----------------------------------------
2512C Estimate MAXIS. IS will be allocated in
2513C CMUMPS_FAC_B. It will contain factors and
2514C contribution blocks integer information
2515C -----------------------------------------
2516C Relax integer workspace based on PERLU
2517 perlu = keep( 12 )
2518 IF (keep(201).GT.0) THEN
2519C OOC panel or non panel (note that
2520C KEEP(15)=KEEP(225) if non panel)
2521 maxis_estim = keep(225)
2522 ELSE
2523C In-core or reals for factors not stored
2524 maxis_estim = keep(15)
2525 ENDIF
2526 maxis = max( 1, int( min( int(huge(maxis),8),
2527 & int(maxis_estim,8) + 3_8 * max(int(perlu,8),10_8) *
2528 & ( int(maxis_estim,8) / 100_8 + 1_8 )
2529 & ) ! min
2530 & ) ! int
2531 & ) !max
2532C ----------------------------
2533C Allocate PTLUST_S and PTRFAC
2534C They will be used to access
2535C factors in the solve phase.
2536C They are also needed for
2537C CMUMPS_FAC_L0_OMP.
2538C ----------------------------
2539 ALLOCATE( id%PTLUST_S( id%KEEP(28) ), stat = ierr )
2540 IF ( ierr .NE. 0 ) THEN
2541 id%INFO(1)=-13
2542 id%INFO(2)=id%KEEP(28)
2543 IF (lpok) THEN
2544 WRITE(lp,*) id%MYID,
2545 & ': Allocation error for id%PTLUST_S(', id%KEEP(28),')'
2546 ENDIF
2547 NULLIFY(id%PTLUST_S)
2548 GOTO 110
2549 END IF
2550 ALLOCATE( id%PTRFAC( id%KEEP(28) ), stat = ierr )
2551 IF ( ierr .NE. 0 ) THEN
2552 id%INFO(1)=-13
2553 id%INFO(2)=id%KEEP(28)
2554 NULLIFY(id%PTRFAC)
2555 IF (lpok) THEN
2556 WRITE(lp,*) id%MYID,
2557 & ': Allocation error for id%PTRFAC(', id%KEEP(28),')'
2558 ENDIF
2559 GOTO 110
2560 END IF
2561C -----------------------------
2562C Reserve temporary workspace :
2563C IPOOL, PTRWB, ITLOC, PTRIST
2564C PTRWB will be subdivided again
2565C in routine CMUMPS_FAC_B
2566C -----------------------------
2567 ptrist = 1
2568 ptrwb = ptrist + id%KEEP(28)
2569 itloc = ptrwb + 2 * id%KEEP(28)
2570C Fwd in facto: ITLOC of size id%N + id%KEEP(253)
2571 ipool = itloc + id%N + id%KEEP(253)
2572C
2573C --------------------------------
2574C NA(1) is an upperbound for LPOOL
2575C --------------------------------
2576C Structure of the pool:
2577C ____________________________________________________
2578C | Subtrees | | Top nodes | 1 2 3 |
2579C ----------------------------------------------------
2580 lpool = mumps_get_pool_length(id%NA(1), id%KEEP(1),id%KEEP8(1))
2581 ALLOCATE( iwk( ipool + lpool - 1 ), stat = ierr )
2582 IF ( ierr .NE. 0 ) THEN
2583 id%INFO(1)=-13
2584 id%INFO(2)=ipool + lpool - 1
2585 IF (lpok) THEN
2586 WRITE(lp,*) id%MYID,
2587 & ': Allocation error for IWK(',ipool+lpool-1,')'
2588 ENDIF
2589 GOTO 110
2590 END IF
2591 ALLOCATE(iwk8( 2 * id%KEEP(28)), stat = ierr)
2592 IF ( ierr .NE. 0 ) THEN
2593 id%INFO(1)=-13
2594 id%INFO(2)=2 * id%KEEP(28)
2595 IF (lpok) THEN
2596 WRITE(lp,*) id%MYID,
2597 & ': Allocation error for IWKB(', 2*id%KEEP(28),')'
2598 ENDIF
2599 GOTO 110
2600 END IF
2601C
2602C Return to SPMD
2603C
2604 ENDIF
2605C
2606 110 CONTINUE
2607C ----------------
2608C Broadcast errors
2609C ----------------
2610 CALL mumps_propinfo( icntl(1), id%INFO(1),
2611 & id%COMM, id%MYID )
2612 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2613C
2614 IF ( i_am_slave ) THEN
2615C Store size of receive buffers in CMUMPS_LBUF module
2616 CALL cmumps_buf_dist_irecv_size( cmumps_lbufr_bytes )
2617 IF (prok) THEN
2618 WRITE( mp, 170 ) maxs, maxis, id%KEEP8(12), keep(15),
2619 & id%KEEP8(26), id%KEEP8(27), id%KEEP8(11), keep(26), keep(27)
2620 ENDIF
2621 END IF
2622C ===============================================================
2623C Before calling the main driver, CMUMPS_FAC_B,
2624C some statistics should be initialized to 0,
2625C even on the host node because they will be
2626C used in REDUCE operations afterwards.
2627C --------------------------------------------
2628C Size of factors written. It will be set to POSFAC in
2629C IC, otherwise we accumulate written factors in it.
2630 id%KEEP8(31)= 0_8
2631C Size of factors under L0 will be returned
2632C in id%KEEP8(64), not included in KEEP8(31))
2633C Number of entries in factors
2634 id%KEEP8(10) = 0_8
2635C KEEP8(8) will hold the volume of extra copies due to
2636C in-place stacking in fac_mem_stack.F
2637 id%KEEP8(8)=0_8
2638 id%INFO(9:14)=0
2639 rinfo(2:3)=zero
2640 IF ( i_am_slave ) THEN
2641C ------------------------------------
2642C Call effective factorization routine
2643C ------------------------------------
2644 IF ( keep(55) .eq. 0 ) THEN
2645 ldptrar = id%N
2646 ELSE
2647 ldptrar = id%NELT + 1
2648 END IF
2649 IF ( id%KEEP(55) .NE. 0 ) THEN
2650 nelt_arg = id%NELT
2651 ELSE
2652C ------------------------------
2653C Use size 1 to avoid complaints
2654C when using check bound options
2655C ------------------------------
2656 nelt_arg = 1
2657 END IF
2658 ENDIF
2659 IF (i_am_slave) THEN
2660 IF (associated(id%L0_OMP_MAPPING))
2661 & DEALLOCATE(id%L0_OMP_MAPPING)
2662 IF (keep(400) .GT. 0) THEN
2663 id%LL0_OMP_MAPPING = keep(28)
2664 ELSE
2665 id%LL0_OMP_MAPPING = 1
2666 ENDIF
2667 ALLOCATE(id%L0_OMP_MAPPING(id%LL0_OMP_MAPPING), stat=allocok)
2668 IF ( allocok > 0) THEN
2669 write(*,*) "Problem allocating L0_OMP_MAPPING",
2670 & ierr, keep(28)
2671 GOTO 115
2672 ENDIF
2673 IF (associated(id%L0_OMP_FACTORS)) THEN
2674 CALL cmumps_free_l0_omp_factors(id%L0_OMP_FACTORS)
2675 ENDIF
2676 IF (keep(400) .GT. 0) THEN
2677 id%LL0_OMP_FACTORS = keep(400)
2678 ELSE
2679 id%LL0_OMP_FACTORS = 1
2680 ENDIF
2681 ALLOCATE(id%L0_OMP_FACTORS(id%LL0_OMP_FACTORS),stat = allocok)
2682 IF (allocok > 0) THEN
2683 id%INFO(1)=-7
2684 id%INFO(2)=nb_threads
2685 GOTO 111
2686 ENDIF
2687 CALL cmumps_init_l0_omp_factors(id%L0_OMP_FACTORS)
2688 ENDIF
2689 115 CONTINUE
2690 CALL mumps_propinfo( icntl(1), id%INFO(1),
2691 & id%COMM, id%MYID )
2692 IF ( id%INFO( 1 ) .LT. 0 ) GOTO 500
2693C Compute DKEEP(17)
2694 avg_flops = rinfog(1)/(real(id%NSLAVES))
2695 id%DKEEP(17) = max( id%DKEEP(18), avg_flops/real(50) )
2696 &
2697 IF (prok.AND.id%MYID.EQ.master) THEN
2698 IF (id%NSLAVES.LE.1) THEN
2699 WRITE(mp,'(/A,A,1PD10.3)')
2700 &' Start factorization with total',
2701 &' estimated flops (RINFOG(1)) = ',
2702 & rinfog(1)
2703 ELSE
2704 WRITE(mp,'(/A,A,1PD10.3,A,1PD10.3)')
2705 &' Start factorization with total',
2706 &' estimated flops RINFOG(1) / Average per MPI proc = ',
2707 & rinfog(1), ' / ', avg_flops
2708 ENDIF
2709 ENDIF
2710 IF (i_am_slave) THEN
2711C IS/S pointers passed to CMUMPS_FAC_B with
2712C implicit interface through intermediate
2713C structure S_IS_POINTERS. IS will be allocated
2714C during CMUMPS_FAC_B.
2715C In case of L0OMP, id%IS and id%S are allocated during
2716C CMUMPS_FAC_B, and only after L0OMP nodes are processed,
2717C in order to limit the global memory peak.
2718 s_is_pointers%IW => id%IS; NULLIFY(id%IS)
2719 s_is_pointers%A => id%S ; NULLIFY(id%S)
2720 CALL cmumps_fac_b(id%N,s_is_pointers,maxs,maxis,id%SYM_PERM(1),
2721 & id%NA(1),id%LNA,id%NE_STEPS(1),id%ND_STEPS(1), id%FILS(1),
2722 & id%STEP(1),id%FRERE_STEPS(1),id%DAD_STEPS(1),id%CANDIDATES(1,1),
2723 & id%ISTEP_TO_INIV2(1),id%TAB_POS_IN_PERE(1,1), id%PTRAR(1),
2724 & ldptrar,iwk(ptrist),id%PTLUST_S(1),id%PTRFAC(1),iwk(ptrwb),iwk8,
2725 & iwk(itloc),rhs_mumps(1),iwk(ipool),lpool,cntl1,icntl(1),
2726 & id%INFO(1), rinfo(1),keep(1),id%KEEP8(1),id%PROCNODE_STEPS(1),
2727 & id%NSLAVES,id%COMM_NODES,id%MYID,id%MYID_NODES,bufr,cmumps_lbufr
2728 & , cmumps_lbufr_bytes, cmumps_lbuf, id%INTARR(1),id%DBLARR(1),
2729 & id%root, nelt_arg, id%FRTPTR(1), id%FRTELT(1),id%COMM_LOAD,
2730 & id%ASS_IRECV, seuil, seuil_ldlt_niv2, id%MEM_DIST(0),
2731 & id%DKEEP(1), id%PIVNUL_LIST(1), lpn_list, id%LRGROUPS(1)
2732 & ,id%IPOOL_B_L0_OMP(1),id%LPOOL_B_L0_OMP,
2733 & id%IPOOL_A_L0_OMP(1),id%LPOOL_A_L0_OMP,id%L_VIRT_L0_OMP,
2734 & id%VIRT_L0_OMP(1), id%VIRT_L0_OMP_MAPPING(1),id%L_PHYS_L0_OMP,
2735 & id%PHYS_L0_OMP(1), id%PERM_L0_OMP(1), id%PTR_LEAFS_L0_OMP(1),
2736 & id%L0_OMP_MAPPING(1),id%LL0_OMP_MAPPING,
2737 & id%THREAD_LA, id%L0_OMP_FACTORS(1), id%LL0_OMP_FACTORS,
2738 & id%I4_L0_OMP(1,1), size(id%I4_L0_OMP,1), size(id%I4_L0_OMP,2),
2739 & id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1), size(id%I8_L0_OMP,2)
2740 & )
2741 id%IS => s_is_pointers%IW; NULLIFY(s_is_pointers%IW)
2742 id%S => s_is_pointers%A ; NULLIFY(s_is_pointers%A)
2743C
2744C ------------------------------
2745C Deallocate temporary workspace
2746C ------------------------------
2747 DEALLOCATE( iwk )
2748 DEALLOCATE( iwk8 )
2749 ENDIF
2750C ---------------------------------
2751C Free some workspace corresponding
2752C to the original matrix in
2753C arrowhead or elemental format.
2754C -----
2755C Note : INTARR was not allocated
2756C during factorization in the case
2757C of an assembled matrix.
2758C ---------------------------------
2759 IF ( keep(55) .eq. 0 ) THEN
2760C
2761C ----------------
2762C Assembled matrix
2763C ----------------
2764 IF (associated( id%DBLARR)) THEN
2765 DEALLOCATE(id%DBLARR)
2766 NULLIFY(id%DBLARR)
2767 ENDIF
2768C
2769 ELSE
2770C
2771C ----------------
2772C Elemental matrix
2773C ----------------
2774 IF (associated(id%INTARR)) THEN
2775 DEALLOCATE( id%INTARR)
2776 NULLIFY( id%INTARR )
2777 ENDIF
2778C ------------------------------------
2779C For the master from an hybrid host
2780C execution without scaling, then real
2781C values have not been copied !
2782C -------------------------------------
2783 IF ( id%MYID_NODES .eq. master
2784 & .AND. keep(46) .eq. 1
2785 & .AND. keep(52) .eq. 0 ) THEN
2786 NULLIFY( id%DBLARR )
2787 ELSE
2788 IF (associated( id%DBLARR)) THEN
2789 DEALLOCATE(id%DBLARR)
2790 NULLIFY(id%DBLARR)
2791 ENDIF
2792 END IF
2793 END IF
2794C Memroy statistics
2795C -----------------------------------
2796C If QR (Keep(19)) is not zero, and if
2797C the host does not have the information
2798C (ie is not slave), send information
2799C computed on the slaves during facto
2800C to the host.
2801C -----------------------------------
2802 IF ( keep(19) .NE. 0 ) THEN
2803 IF ( keep(46) .NE. 1 ) THEN
2804C Host was not working during facto_root
2805C Send him the information
2806 IF ( id%MYID .eq. master ) THEN
2807 CALL mpi_recv( keep(17), 1, mpi_integer, 1, defic_tag,
2808 & id%COMM, status, ierr )
2809 CALL mpi_recv( keep(143), 1, mpi_integer, 1, defic_tag,
2810 & id%COMM, status, ierr )
2811 ELSE IF ( id%MYID .EQ. 1 ) THEN
2812 CALL mpi_send( keep(17), 1, mpi_integer, 0, defic_tag,
2813 & id%COMM, ierr )
2814 CALL mpi_send( keep(143), 1, mpi_integer, 0, defic_tag,
2815 & id%COMM, ierr )
2816 END IF
2817 END IF
2818 END IF
2819C --------------------------------
2820C Deallocate communication buffers
2821C They will be reallocated
2822C in the solve.
2823C --------------------------------
2824 IF (allocated(bufr)) DEALLOCATE(bufr)
2825 CALL cmumps_buf_deall_small_buf( ierr )
2826C//PIV
2827 IF (keep(219).NE.0) THEN
2829 ENDIF
2830C
2831C Check for errors.
2832C After CMUMPS_FAC_B every slave is aware of an error.
2833C If master is included in computations, the call below should
2834C not be necessary.
2835 CALL mumps_propinfo( icntl(1), id%INFO(1),
2836 & id%COMM, id%MYID )
2837C
2839 IF (keep(201) .GT. 0) THEN
2840 IF ((keep(201).EQ.1) .OR. (keep(201).EQ.2)) THEN
2841 IF ( i_am_slave ) THEN
2842 CALL cmumps_ooc_clean_pending(ierr)
2843 IF(ierr.LT.0)THEN
2844 id%INFO(1)=ierr
2845 id%INFO(2)=0
2846 ENDIF
2847 ENDIF
2848 CALL mumps_propinfo( id%ICNTL(1), id%INFO(1),
2849 & id%COMM, id%MYID )
2850C We want to collect statistics even in case of
2851C error to understand if it is due to numerical
2852C issues
2853CC IF ( id%INFO(1) < 0 ) GOTO 500
2854 END IF
2855 END IF
2856 IF (id%MYID.EQ.master) THEN
2857 CALL mumps_secfin(time)
2858 id%DKEEP(94)=real(time)
2859 IF (keep(400).GT.0) THEN
2860C Facto time above L0_OMP = total time - facto time under L0_OMP
2861 id%DKEEP(96)=id%DKEEP(94)-id%DKEEP(95)
2862 ENDIF
2863 ENDIF
2864C =====================================================================
2865C COMPUTE MEMORY ALLOCATED BY MUMPS, INFO(16)
2866C ---------------------------------------------
2867 mem_eff_allocated = .true.
2868 CALL cmumps_max_mem( id%KEEP(1),id%KEEP8(1),
2869 & id%MYID, n, id%NELT, id%NA(1), id%LNA, id%KEEP8(28),
2870 & id%KEEP8(30),
2871 & id%NSLAVES, total_mbytes, .true., id%KEEP(201),
2872 & blr_strat, .true., total_bytes,
2873 & idummy, bdummy, mem_eff_allocated
2874 & , .false. ! UNDER_L0_OMP
2875 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
2876 & size(id%I8_L0_OMP,2)
2877 & )
2878 IF (keep(400) .GT. 0 ) THEN ! L0 activated
2879 CALL cmumps_max_mem( id%KEEP(1),id%KEEP8(1),
2880 & id%MYID, n, id%NELT, id%NA(1), id%LNA, id%KEEP8(28),
2881 & id%KEEP8(30),
2882 & id%NSLAVES, total_mbytes_under_l0, .true., id%KEEP(201),
2883 & blr_strat, .true., total_bytes_under_l0,
2884 & idummy, bdummy, mem_eff_allocated
2885 & , .true. ! UNDER_L0_OMP
2886 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
2887 & size(id%I8_L0_OMP,2)
2888 & )
2889 total_mbytes = max(total_mbytes,total_mbytes_under_l0)
2890 total_bytes = max(total_bytes, total_bytes_under_l0)
2891 ENDIF
2892 IF (id%KEEP8(24).NE.0) THEN
2893C WK_USER is not part of memory allocated by MUMPS
2894C and is not counted, id%KEEP8(23) should be zero
2895 id%INFO(16) = total_mbytes
2896 ELSE
2897C Note that even for the case of ICNTL(23)>0
2898C we report here the memory effectively allocated
2899C that can be smaller than ICNTL(23) !
2900 id%INFO(16) = total_mbytes
2901 ENDIF
2902C ----------------------------------------------------
2903C Centralize memory statistics on the host
2904C id%INFOG(18) = size of mem in Mbytes for facto,
2905C for the processor using largest memory
2906C id%INFOG(19) = size of mem in Mbytes for facto,
2907C sum over all processors
2908C ----------------------------------------------------
2909 CALL mumps_mem_centralize( id%MYID, id%COMM,
2910 & id%INFO(16), id%INFOG(18), irank )
2911 CALL cmumps_print_allocated_mem( prok, prokg, print_maxavg,
2912 & mp, mpg, id%INFO(16), id%INFOG(18), id%INFOG(19),
2913 & id%NSLAVES, irank,
2914 & id%KEEP(1) )
2915C If WK_USER is provided, this excludes WK_USER
2916 IF (prok ) THEN
2917 WRITE(mp,'(A,I12) ')
2918 & ' ** Eff. min. Space MBYTES for facto (INFO(16)):',
2919 & total_mbytes
2920 ENDIF
2921C ========================(INFO(16) RELATED)======================
2922C ---------------------------------------
2923C COMPUTE EFFECTIVE MEMORY USED INFO(22)
2924C ---------------------------------------
2925 perlu_on = .true.
2926 mem_eff_allocated = .false.
2927 CALL cmumps_max_mem( id%KEEP(1),id%KEEP8(1),
2928 & id%MYID, n, id%NELT, id%NA(1), id%LNA, id%KEEP8(28),
2929 & id%KEEP8(30),
2930 & id%NSLAVES, total_mbytes, .true., id%KEEP(201),
2931 & blr_strat, perlu_on, total_bytes,
2932 & idummy, bdummy, mem_eff_allocated
2933 & , .false. ! UNDER_L0_OMP
2934 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
2935 & size(id%I8_L0_OMP,2)
2936 & )
2937 IF (keep(400) .GT. 0 ) THEN ! L0 activated
2938 CALL cmumps_max_mem( id%KEEP(1),id%KEEP8(1),
2939 & id%MYID, n, id%NELT, id%NA(1), id%LNA, id%KEEP8(28),
2940 & id%KEEP8(30),
2941 & id%NSLAVES, total_mbytes_under_l0, .true., id%KEEP(201),
2942 & blr_strat, perlu_on, total_bytes_under_l0,
2943 & idummy, bdummy, mem_eff_allocated
2944 & , .true. ! UNDER_L0_OMP
2945 & , id%I8_L0_OMP(1,1), size(id%I8_L0_OMP,1),
2946 & size(id%I8_L0_OMP,2)
2947 & )
2948 total_mbytes = max(total_mbytes,total_mbytes_under_l0)
2949 total_bytes = max(total_bytes, total_bytes_under_l0)
2950 ENDIF
2951C -- TOTAL_BYTES and TOTAL_MBYTES includes both static
2952C -- (MAXS) and BLR structures computed as the SUM of the PEAKS
2953C -- (KEEP8(67) + KEEP8(70))
2954 id%KEEP8(7) = total_bytes
2955C -- INFO(22) holds the effective space (in Mbytes) used by MUMPS
2956C -- (it includes part of WK_USER used if provided by user)
2957 id%INFO(22) = total_mbytes
2958C ----------------------------------------------------
2959C Centralize memory statistics on the host
2960C INFOG(21) = size of effective mem (Mbytes) for facto,
2961C for the processor using largest memory
2962C INFOG(22) = size of effective mem (Mbytes) for facto,
2963C sum over all processors
2964C ----------------------------------------------------
2965 CALL mumps_mem_centralize( id%MYID, id%COMM,
2966 & id%INFO(22), id%INFOG(21), irank )
2967 IF ( prokg ) THEN
2968 IF (print_maxavg) THEN
2969 WRITE( mpg,'(A,I12) ')
2970 & ' ** Memory effectively used, max in Mbytes (INFOG(21)):',
2971 & id%INFOG(21)
2972 ENDIF
2973 WRITE( mpg,'(A,I12) ')
2974 & ' ** Memory effectively used, total in Mbytes (INFOG(22)):',
2975 & id%INFOG(22)
2976 END IF
2977 sum_info22_this_node=0
2978 CALL mpi_reduce( id%INFO(22), sum_info22_this_node, 1,
2979 & mpi_integer,
2980 & mpi_sum, 0, id%KEEP(411), ierr )
2981 CALL mpi_reduce( sum_info22_this_node, max_sum_info22_this_node,
2982 & 1, mpi_integer, mpi_max, 0, id%COMM, ierr )
2983 IF (prokg .AND. print_nodeinfo) THEN
2984 WRITE(mpg,'(A,I12)')
2985 & ' ** Max. effective space per compute node, in MBytes :',
2986 & max_sum_info22_this_node
2987 ENDIF
2988C
2989 IF (i_am_slave) THEN
2990 k67 = id%KEEP8(67)
2991 k68 = id%KEEP8(68)
2992 k70 = id%KEEP8(70)
2993 k74 = id%KEEP8(74)
2994 k75 = id%KEEP8(75)
2995 ELSE
2996 k67 = 0_8
2997 k68 = 0_8
2998 k70 = 0_8
2999 k74 = 0_8
3000 k75 = 0_8
3001 ENDIF
3002C -- Save the number of entries effectively used
3003C in main working array S
3004 CALL mumps_seti8toi4(k67,id%INFO(21))
3005C
3006C
3007 IF (keep(400) .GT.0 ) THEN
3008 IF (.NOT. i_am_slave) THEN
3009 id%DKEEP(95) = 0.0e0
3010 id%DKEEP(16) = 0.0e0
3011 ENDIF
3012 IF (id%NPROCS .GT. 1) THEN
3013C Compute average and max (across MPI's)
3014 CALL mpi_reduce(id%DKEEP(95), tmptime, 1,
3015 & mpi_real, mpi_sum, master, id%COMM, ierr)
3016 IF (id%MYID.EQ.master) timeavg=dble(tmptime)
3017 CALL mpi_reduce(id%DKEEP(16), tmpflop, 1,
3018 & mpi_real, mpi_sum, master, id%COMM, ierr)
3019 IF (id%MYID.EQ.master) flopavg=dble(tmpflop)
3020 IF (id%MYID.EQ.master) THEN
3021 timeavg = timeavg / id%NSLAVES
3022 flopavg = flopavg / id%NSLAVES
3023 ENDIF
3024 CALL mpi_reduce(id%DKEEP(95), tmptime, 1,
3025 & mpi_real, mpi_max, master, id%COMM, ierr)
3026 IF (id%MYID.EQ.master) timemax=dble(tmptime)
3027 CALL mpi_reduce(id%DKEEP(16), tmpflop, 1,
3028 & mpi_real, mpi_max, master, id%COMM, ierr)
3029 IF (id%MYID.EQ.master) flopmax=dble(tmpflop)
3030C (PROKG may only be true on master)
3031 IF ( prokg ) THEN
3032 WRITE(mpg,190) flopavg, flopmax
3033 WRITE(mpg,188) timeavg, timemax
3034 ENDIF
3035 ELSE
3036C Print DKEEP(95) directly without reduction
3037 IF ( prokg ) THEN
3038 WRITE(mpg,189) id%DKEEP(16)
3039 WRITE(mpg,187) id%DKEEP(95)
3040 ENDIF
3041 ENDIF
3042 ENDIF
3043 IF ( prokg ) THEN
3044 IF (id%INFO(1) .GE.0) THEN
3045 WRITE(mpg,180) id%DKEEP(94)
3046 ELSE
3047 WRITE(mpg,185) id%DKEEP(94)
3048 ENDIF
3049 ENDIF
3050C
3051C Sum RINFO(2) : total number of flops for assemblies
3052C Sum RINFO(3) : total number of flops for eliminations
3053C Initialize RINFO(4) in case BLR was not activated
3054 rinfo(4) = rinfo(3)
3055C
3056C Should work even if the master does some work
3057C
3058 CALL mpi_reduce( rinfo(2), rinfog(2), 2,
3059 & mpi_real,
3060 & mpi_sum, master, id%COMM, ierr)
3061C Reduce needed to dimension small working array
3062C on all procs during CMUMPS_GATHER_SOLUTION
3063 keep(247) = 0
3064 CALL mpi_reduce( keep(246), keep(247), 1, mpi_integer,
3065 & mpi_max, master, id%COMM, ierr)
3066C
3067C Reduce compression times: get max compression times
3068 CALL mpi_reduce( id%DKEEP(97), id%DKEEP(98), 1,
3069 & mpi_real,
3070 & mpi_max, master, id%COMM, ierr)
3071C
3072 CALL mpi_reduce( rinfo(2), rinfog(2), 2,
3073 & mpi_real,
3074 & mpi_sum, master, id%COMM, ierr)
3075 CALL mumps_reducei8( id%KEEP8(31)+id%KEEP8(64),id%KEEP8(6),
3076 & mpi_sum, master, id%COMM )
3077C
3078 IF (id%MYID.EQ.0) THEN
3079C In MegaBytes
3080 rinfog(16) = real(id%KEEP8(6)*int(keep(35),8))/real(1e6)
3081 IF (keep(201).LE.0) THEN
3082 rinfog(16) = zero
3083 ENDIF
3084 ENDIF
3085 CALL mumps_reducei8( id%KEEP8(48),id%KEEP8(148), mpi_sum,
3086 & master, id%COMM )
3087 CALL mumps_seti8toi4(id%KEEP8(148), infog(9))
3088C
3089 CALL mpi_reduce( int(id%INFO(10),8), id%KEEP8(128),
3090 & 1, mpi_integer8,
3091 & mpi_sum, master, id%COMM, ierr)
3092 IF (id%MYID.EQ.master) THEN
3093 CALL mumps_seti8toi4(id%KEEP8(128), id%INFOG(10))
3094 ENDIF
3095C Use MPI_MAX for this one to get largest front size
3096 CALL mpi_allreduce( id%INFO(11), infog(11), 1, mpi_integer,
3097 & mpi_max, id%COMM, ierr)
3098C make maximum effective frontal size available on all procs
3099C for solve phase
3100C (Note that INFO(11) includes root size on root master)
3101 keep(133) = infog(11)
3102 CALL mpi_reduce( id%INFO(12), infog(12), 3, mpi_integer,
3103 & mpi_sum, master, id%COMM, ierr)
3104 CALL mpi_reduce( keep(103), infog(25), 1, mpi_integer,
3105 & mpi_sum, master, id%COMM, ierr)
3106 keep(229) = infog(25)
3107 CALL mpi_reduce( keep(105), infog(25), 1, mpi_integer,
3108 & mpi_sum, master, id%COMM, ierr)
3109 keep(230) = infog(25)
3110C
3111 id%INFO(25) = keep(98)
3112 CALL mpi_allreduce( id%INFO(25), infog(25), 1, mpi_integer,
3113 & mpi_sum, id%COMM, ierr)
3114C Extra copies due to in-place stacking
3115 CALL mumps_reducei8( id%KEEP8(8), id%KEEP8(108), mpi_sum,
3116 & master, id%COMM )
3117C Entries in factors
3118 CALL mumps_seti8toi4(id%KEEP8(10), id%INFO(27))
3119 CALL mumps_reducei8( id%KEEP8(10),id%KEEP8(110), mpi_sum,
3120 & master, id%COMM )
3121 CALL mumps_seti8toi4(id%KEEP8(110), infog(29))
3122C Initialize INFO(28)/INFOG(35) in case BLR not activated
3123 id%INFO(28) = id%INFO(27)
3124 infog(35) = infog(29)
3125C ==============================
3126C LOW-RANK
3127C ==============================
3128 IF ( keep(486) .NE. 0 ) THEN !LR is activated
3129C Compute and Save local amount of flops in case of BLR
3130 rinfo(4) = real(flop_frfronts + flop_facto_fr - flop_lrgain
3132C
3133C Compute and Save local number of entries in compressed factors
3134C
3135 itmp8 = id%KEEP8(10) - int(mry_lu_lrgain,8)
3136 CALL mumps_seti8toi4( itmp8, id%INFO(28))
3137C
3138 CALL mpi_reduce( mry_lu_lrgain, tmp_mry_lu_lrgain
3139 & , 1, mpi_double_precision,
3140 & mpi_sum, master, id%COMM, ierr)
3141 CALL mpi_reduce( mry_lu_fr, tmp_mry_lu_fr
3142 & , 1, mpi_double_precision,
3143 & mpi_sum, master, id%COMM, ierr)
3144 CALL mpi_reduce( mry_cb_fr, tmp_mry_cb_fr
3145 & , 1, mpi_double_precision,
3146 & mpi_sum, master, id%COMM, ierr)
3147 CALL mpi_reduce( mry_cb_lrgain, tmp_mry_cb_lrgain
3148 & , 1, mpi_double_precision,
3149 & mpi_sum, master, id%COMM, ierr)
3150 CALL mpi_reduce( flop_lrgain, tmp_flop_lrgain
3151 & , 1, mpi_double_precision,
3152 & mpi_sum, master, id%COMM, ierr)
3153 CALL mpi_reduce( flop_trsm_fr, tmp_flop_trsm_fr
3154 & , 1, mpi_double_precision,
3155 & mpi_sum, master, id%COMM, ierr)
3156 CALL mpi_reduce( flop_trsm_lr, tmp_flop_trsm_lr
3157 & , 1, mpi_double_precision,
3158 & mpi_sum, master, id%COMM, ierr)
3159 CALL mpi_reduce( flop_update_fr, tmp_flop_update_fr
3160 & , 1, mpi_double_precision,
3161 & mpi_sum, master, id%COMM, ierr)
3162 CALL mpi_reduce( flop_update_lr, tmp_flop_update_lr
3163 & , 1, mpi_double_precision,
3164 & mpi_sum, master, id%COMM, ierr)
3166 & tmp_flop_frswap_compress
3167 & , 1, mpi_double_precision,
3168 & mpi_sum, master, id%COMM, ierr)
3170 & tmp_flop_midblk_compress
3171 & , 1, mpi_double_precision,
3172 & mpi_sum, master, id%COMM, ierr)
3173 CALL mpi_reduce( flop_update_lrlr3, tmp_flop_update_lrlr3
3174 & , 1, mpi_double_precision,
3175 & mpi_sum, master, id%COMM, ierr)
3176 CALL mpi_reduce(flop_accum_compress, tmp_flop_accum_compress
3177 & , 1, mpi_double_precision,
3178 & mpi_sum, master, id%COMM, ierr)
3179 CALL mpi_reduce( flop_trsm, tmp_flop_trsm
3180 & , 1, mpi_double_precision,
3181 & mpi_sum, master, id%COMM, ierr)
3182 CALL mpi_reduce( flop_panel, tmp_flop_panel
3183 & , 1, mpi_double_precision,
3184 & mpi_sum, master, id%COMM, ierr)
3185 CALL mpi_reduce( flop_frfronts, tmp_flop_frfronts
3186 & , 1, mpi_double_precision,
3187 & mpi_sum, master, id%COMM, ierr)
3188 CALL mpi_reduce( flop_compress, tmp_flop_compress
3189 & , 1, mpi_double_precision,
3190 & mpi_sum, master, id%COMM, ierr)
3191 CALL mpi_reduce( flop_decompress, tmp_flop_decompress
3192 & , 1, mpi_double_precision,
3193 & mpi_sum, master, id%COMM, ierr)
3194 CALL mpi_reduce( flop_cb_compress, tmp_flop_cb_compress
3195 & , 1, mpi_double_precision,
3196 & mpi_sum, master, id%COMM, ierr)
3197 CALL mpi_reduce( flop_cb_decompress,tmp_flop_cb_decompress
3198 & , 1, mpi_double_precision,
3199 & mpi_sum, master, id%COMM, ierr)
3200 CALL mpi_reduce( flop_facto_fr, tmp_flop_facto_fr
3201 & , 1, mpi_double_precision,
3202 & mpi_sum, master, id%COMM, ierr)
3203 CALL mpi_reduce( cnt_nodes,tmp_cnt_nodes
3204 & , 1, mpi_integer,
3205 & mpi_sum, master, id%COMM, ierr)
3206 IF (id%NPROCS.GT.1) THEN
3210 & , 1, mpi_double_precision,
3211 & mpi_sum, master, id%COMM, ierr)
3212 IF (id%MYID.EQ.master) THEN
3214 ENDIF
3216 & , 1, mpi_double_precision,
3217 & mpi_min, master, id%COMM, ierr)
3219 & , 1, mpi_double_precision,
3220 & mpi_max, master, id%COMM, ierr)
3221 ENDIF ! NPROCS > 1
3222 CALL mpi_reduce( time_update, tmp_time_update
3223 & , 1, mpi_double_precision,
3224 & mpi_sum, master, id%COMM, ierr)
3225 CALL mpi_reduce( time_update_lrlr1, tmp_time_update_lrlr1
3226 & , 1, mpi_double_precision,
3227 & mpi_sum, master, id%COMM, ierr)
3228 CALL mpi_reduce( time_update_lrlr2, tmp_time_update_lrlr2
3229 & , 1, mpi_double_precision,
3230 & mpi_sum, master, id%COMM, ierr)
3231 CALL mpi_reduce( time_update_lrlr3, tmp_time_update_lrlr3
3232 & , 1, mpi_double_precision,
3233 & mpi_sum, master, id%COMM, ierr)
3234 CALL mpi_reduce( time_update_frlr, tmp_time_update_frlr
3235 & , 1, mpi_double_precision,
3236 & mpi_sum, master, id%COMM, ierr)
3237 CALL mpi_reduce( time_update_frfr, tmp_time_update_frfr
3238 & , 1, mpi_double_precision,
3239 & mpi_sum, master, id%COMM, ierr)
3240 CALL mpi_reduce( time_diagcopy, tmp_time_diagcopy
3241 & , 1, mpi_double_precision,
3242 & mpi_sum, master, id%COMM, ierr)
3243 CALL mpi_reduce( time_compress,tmp_time_compress
3244 & , 1, mpi_double_precision,
3245 & mpi_sum, master, id%COMM, ierr)
3247 & tmp_time_midblk_compress
3248 & , 1, mpi_double_precision,
3249 & mpi_sum, master, id%COMM, ierr)
3251 & tmp_time_frswap_compress
3252 & , 1, mpi_double_precision,
3253 & mpi_sum, master, id%COMM, ierr)
3254 CALL mpi_reduce( time_cb_compress, tmp_time_cb_compress
3255 & , 1, mpi_double_precision,
3256 & mpi_sum, master, id%COMM, ierr)
3257 CALL mpi_reduce( time_decomp, tmp_time_decomp
3258 & , 1, mpi_double_precision,
3259 & mpi_sum, master, id%COMM, ierr)
3260 CALL mpi_reduce( time_decomp_ucfs, tmp_time_decomp_ucfs
3261 & , 1, mpi_double_precision,
3262 & mpi_sum, master, id%COMM, ierr)
3263 CALL mpi_reduce( time_decomp_asm1, tmp_time_decomp_asm1
3264 & , 1, mpi_double_precision,
3265 & mpi_sum, master, id%COMM, ierr)
3266 CALL mpi_reduce(time_decomp_locasm2, tmp_time_decomp_locasm2
3267 & , 1, mpi_double_precision,
3268 & mpi_sum, master, id%COMM, ierr)
3269 CALL mpi_reduce(time_decomp_maplig1, tmp_time_decomp_maplig1
3270 & , 1, mpi_double_precision,
3271 & mpi_sum, master, id%COMM, ierr)
3272 CALL mpi_reduce( time_decomp_asms2s, tmp_time_decomp_asms2s
3273 & , 1, mpi_double_precision,
3274 & mpi_sum, master, id%COMM, ierr)
3275 CALL mpi_reduce( time_decomp_asms2m, tmp_time_decomp_asms2m
3276 & , 1, mpi_double_precision,
3277 & mpi_sum, master, id%COMM, ierr)
3278 CALL mpi_reduce( time_panel, tmp_time_panel
3279 & , 1, mpi_double_precision,
3280 & mpi_sum, master, id%COMM, ierr)
3281 CALL mpi_reduce( time_fac_i, tmp_time_fac_i
3282 & , 1, mpi_double_precision,
3283 & mpi_sum, master, id%COMM, ierr)
3284 CALL mpi_reduce( time_fac_mq, tmp_time_fac_mq
3285 & , 1, mpi_double_precision,
3286 & mpi_sum, master, id%COMM, ierr)
3287 CALL mpi_reduce( time_fac_sq, tmp_time_fac_sq
3288 & , 1, mpi_double_precision,
3289 & mpi_sum, master, id%COMM, ierr)
3290 CALL mpi_reduce( time_lrtrsm, tmp_time_lrtrsm
3291 & , 1, mpi_double_precision,
3292 & mpi_sum, master, id%COMM, ierr)
3293 CALL mpi_reduce( time_frtrsm, tmp_time_frtrsm
3294 & , 1, mpi_double_precision,
3295 & mpi_sum, master, id%COMM, ierr)
3296 CALL mpi_reduce( time_frfronts, tmp_time_frfronts
3297 & , 1, mpi_double_precision,
3298 & mpi_sum, master, id%COMM, ierr)
3299 CALL mpi_reduce( time_lr_module, tmp_time_lr_module
3300 & , 1, mpi_double_precision,
3301 & mpi_sum, master, id%COMM, ierr)
3302 IF (id%MYID.EQ.master) THEN
3303 IF (id%NPROCS.GT.1) THEN
3304C rename the stat variable so that COMPUTE_GLOBAL_GAINS can work for any
3305C number of procs
3306 mry_lu_fr = tmp_mry_lu_fr
3307 mry_lu_lrgain = tmp_mry_lu_lrgain
3308 mry_cb_fr = tmp_mry_cb_fr
3309 mry_cb_lrgain = tmp_mry_cb_lrgain
3310 flop_lrgain = tmp_flop_lrgain
3311 flop_panel = tmp_flop_panel
3312 flop_trsm = tmp_flop_trsm
3313 flop_trsm_fr = tmp_flop_trsm_fr
3314 flop_trsm_lr = tmp_flop_trsm_lr
3315 flop_update_fr = tmp_flop_update_fr
3316 flop_update_lr = tmp_flop_update_lr
3317 flop_update_lrlr3 = tmp_flop_update_lrlr3
3318 flop_compress = tmp_flop_compress
3319 flop_midblk_compress = tmp_flop_midblk_compress
3320 flop_frswap_compress = tmp_flop_frswap_compress
3321 flop_accum_compress = tmp_flop_accum_compress
3322 flop_cb_compress = tmp_flop_cb_compress
3323 flop_decompress = tmp_flop_decompress
3324 flop_cb_decompress = tmp_flop_cb_decompress
3325 flop_frfronts = tmp_flop_frfronts
3326 flop_facto_fr = tmp_flop_facto_fr
3327 cnt_nodes = tmp_cnt_nodes
3328 time_update = tmp_time_update /id%NPROCS
3329 time_update_lrlr1 = tmp_time_update_lrlr1 /id%NPROCS
3330 time_update_lrlr2 = tmp_time_update_lrlr2 /id%NPROCS
3331 time_update_lrlr3 = tmp_time_update_lrlr3 /id%NPROCS
3332 time_update_frlr = tmp_time_update_frlr /id%NPROCS
3333 time_update_frfr = tmp_time_update_frfr /id%NPROCS
3334 time_compress = tmp_time_compress /id%NPROCS
3335 time_midblk_compress = tmp_time_midblk_compress/id%NPROCS
3336 time_frswap_compress = tmp_time_frswap_compress/id%NPROCS
3337 time_diagcopy = tmp_time_diagcopy /id%NPROCS
3338 time_cb_compress = tmp_time_cb_compress /id%NPROCS
3339 time_panel = tmp_time_panel /id%NPROCS
3340 time_fac_i = tmp_time_fac_i /id%NPROCS
3341 time_fac_mq = tmp_time_fac_mq /id%NPROCS
3342 time_fac_sq = tmp_time_fac_sq /id%NPROCS
3343 time_lrtrsm = tmp_time_lrtrsm /id%NPROCS
3344 time_frtrsm = tmp_time_frtrsm /id%NPROCS
3345 time_frfronts = tmp_time_frfronts /id%NPROCS
3346 time_lr_module = tmp_time_lr_module /id%NPROCS
3347 time_decomp = tmp_time_decomp /id%NPROCS
3348 time_decomp_ucfs = tmp_time_decomp_ucfs /id%NPROCS
3349 time_decomp_asm1 = tmp_time_decomp_asm1 /id%NPROCS
3350 time_decomp_locasm2 = tmp_time_decomp_locasm2 /id%NPROCS
3351 time_decomp_maplig1 = tmp_time_decomp_maplig1 /id%NPROCS
3352 time_decomp_asms2s = tmp_time_decomp_asms2s /id%NPROCS
3353 time_decomp_asms2m = tmp_time_decomp_asms2m /id%NPROCS
3354 ENDIF
3355 CALL compute_global_gains(id%KEEP8(110),id%RINFOG(3),
3356 & id%KEEP8(49), prokg, mpg)
3357C Number of entries in factor INFOG(35) in
3358C compressed form is updated as long as
3359C BLR is activated, this independently of the
3360C fact that factors are saved in LR.
3361 CALL mumps_seti8toi4(id%KEEP8(49), id%INFOG(35))
3362 frontwise = 0
3363C WRITE gains also compute stats stored in DKEEP array
3364 IF (lpok) THEN
3365 IF (cntl(7) < 0.0e0) THEN
3366C Warning : using negative values is an experimental and
3367C non recommended setting.
3368 WRITE(lp,'(/A/,A/,A/,A,A)')
3369 & ' WARNING in BLR input setting',
3370 & ' CNTL(7) < 0 is experimental: ',
3371 & ' RRQR precision = |CNTL(7| x ||A_pre||, ',
3372 & ' where A_pre is the preprocessed matrix as defined',
3373 & ' in the Users guide '
3374 ENDIF
3375 ENDIF
3376 CALL saveandwrite_gains(frontwise,
3377 & keep(489), id%DKEEP, n, id%ICNTL(36),
3378 & keep(487), keep(488), keep(490),
3379 & keep(491), keep(50), keep(486),
3380 & keep(472), keep(475), keep(478), keep(480),
3381 & keep(481),
3382 & keep(483), keep(484),
3383 & id%KEEP8(110), id%KEEP8(49),
3384 & keep(28), id%NPROCS, mpg, prokg)
3385C flops when BLR activated
3386 rinfog(14) = id%DKEEP(56)
3387 ELSE
3388 rinfog(14) = 0.0e00
3389 ENDIF
3390 ENDIF
3391C ==============================
3392C NULL PIVOTS AND RANK-REVEALING
3393C ==============================
3394 IF(keep(110) .EQ. 1) THEN
3395C -- make available to users the local number of null pivots detected
3396C -- with ICNTL(24) = 1.
3397 id%INFO(18) = keep(109)
3398 CALL mpi_allreduce( keep(109), keep(112), 1, mpi_integer,
3399 & mpi_sum, id%COMM, ierr)
3400 ELSE
3401 id%INFO(18) = 0
3402 keep(109) = 0
3403 keep(112) = 0
3404 ENDIF
3405 IF (id%MYID.EQ.master) THEN
3406C INFOG(28) deficiency resulting from ICNTL(24) and ICNTL(56).
3407 infog(28)=keep(112)+keep(17)
3408 ENDIF
3409C ========================================
3410C We now provide to the host the part of
3411C PIVNUL_LIST resulting from the processing
3412C of the root node and we update id%INFO(18)
3413C on the processor holding the root to
3414C include null pivots relative to the root
3415C ========================================
3416 IF (keep(17) .NE. 0) THEN
3417 IF (id%MYID .EQ. id_root) THEN
3418C Include in id%INFO(18) null pivots resulting
3419C from deficiency on the root. In this way,
3420C the sum of all id%INFO(18) is equal to INFOG(28).
3421 id%INFO(18)=id%INFO(18)+keep(17)
3422 ENDIF
3423 IF (id_root .EQ. master) THEN
3424 IF (id%MYID.EQ.master) THEN
3425C --------------------------------------------------
3426C Null pivots of root have been stored in
3427C PIVNUL_LIST(KEEP(109)+1:KEEP(109)+KEEP(17).
3428C Shift them at the end of the list because:
3429C * this is what we need to build the null space
3430C * we would otherwise overwrite them on the host
3431C when gathering null pivots from other processors
3432C --------------------------------------------------
3433 DO i= keep(17), 1, -1
3434c DO I=1, KEEP(17) % incorrect
3435C when KEEP(112) < KEEP(109)+ KEEP(17)
3436 id%PIVNUL_LIST(keep(112)+i)=id%PIVNUL_LIST(keep(109)+i)
3437 ENDDO
3438 ENDIF
3439 ELSE
3440C ---------------------------------
3441C Null pivots of root must be sent
3442C from the processor responsible of
3443C the root to the host (or MASTER).
3444C ---------------------------------
3445 IF (id%MYID .EQ. id_root) THEN
3446 CALL mpi_send(id%PIVNUL_LIST(keep(109)+1), keep(17),
3447 & mpi_integer, master, zero_piv,
3448 & id%COMM, ierr)
3449 ELSE IF (id%MYID .EQ. master) THEN
3450 CALL mpi_recv(id%PIVNUL_LIST(keep(112)+1), keep(17),
3451 & mpi_integer, id_root, zero_piv,
3452 & id%COMM, status, ierr )
3453 ENDIF
3454 ENDIF
3455 ENDIF
3456C ===========================
3457C gather zero pivots indices
3458C on the host node
3459C ===========================
3460C In case of non working host, the following code also
3461C works considering that KEEP(109) is equal to 0 on
3462C the non-working host
3463 IF(keep(110) .EQ. 1) THEN
3464 ALLOCATE(itmp2(id%NPROCS),stat = ierr ) ! deallocated in 490
3465 IF ( ierr .GT. 0 ) THEN
3466 id%INFO(1)=-13
3467 id%INFO(2)=id%NPROCS
3468 END IF
3469 CALL mumps_propinfo( icntl(1), id%INFO(1),
3470 & id%COMM, id%MYID )
3471 IF (id%INFO(1).LT.0) GOTO 490
3472 CALL mpi_gather ( keep(109),1, mpi_integer,
3473 & itmp2(1), 1, mpi_integer,
3474 & master, id%COMM, ierr)
3475 IF(id%MYID .EQ. master) THEN
3476 posbuf = itmp2(1)+1
3477C First null pivot of master is in
3478C position 1 of global list
3479 keep(220)=1
3480 DO i = 1,id%NPROCS-1
3481 CALL mpi_recv(id%PIVNUL_LIST(posbuf), itmp2(i+1),
3482 & mpi_integer,i,
3483 & zero_piv, id%COMM, status, ierr)
3484C Send position POSBUF of first null pivot of proc I
3485C in global list. Will allow to quickly identify during
3486C the solve step if one is concerned by a global position
3487C K, 0 <= K <= INFOG(28).
3488 CALL mpi_send(posbuf, 1, mpi_integer, i, zero_piv,
3489 & id%COMM, ierr)
3490 posbuf = posbuf + itmp2(i+1)
3491 ENDDO
3492 ELSE
3493 CALL mpi_send( id%PIVNUL_LIST(1), keep(109), mpi_integer,
3494 & master,zero_piv, id%COMM, ierr)
3495 CALL mpi_recv( keep(220), 1, mpi_integer, master, zero_piv,
3496 & id%COMM, status, ierr )
3497 ENDIF
3498 ENDIF
3499C =====================================
3500C Statistics relative to min/max pivots
3501C =====================================
3502 CALL mpi_reduce( id%DKEEP(19), rinfog(19), 1,
3503 & mpi_real,
3504 & mpi_min, master, id%COMM, ierr )
3505 CALL mpi_reduce( id%DKEEP(20), rinfog(20), 1,
3506 & mpi_real,
3507 & mpi_min, master, id%COMM, ierr )
3508 CALL mpi_reduce( id%DKEEP(21), rinfog(21), 1,
3509 & mpi_real,
3510 & mpi_max, master, id%COMM, ierr )
3511C =========================================
3512C Centralized number of swaps for pivoting
3513C =========================================
3514 CALL mpi_reduce( id%KEEP8(80), itemp8, 1, mpi_integer8,
3515 & mpi_sum, master, id%COMM, ierr )
3516 IF (id%MYID .EQ. master) THEN
3517 CALL mumps_seti8toi4(itemp8,id%INFOG(48))
3518 ENDIF
3519C ==========================================
3520C Centralized largest increase of panel size
3521C ==========================================
3522 CALL mpi_reduce( id%KEEP(425), id%INFOG(49), 1, mpi_integer,
3523 & mpi_max, master, id%COMM, ierr )
3524C =====================================
3525C Statistics concerning the determinant
3526C =====================================
3527C
3528C 1/ on the host better take into account null pivots if scaling:
3529C
3530C Since null pivots are excluded from the computation
3531C of the determinant, we also exclude the corresponding
3532C scaling entries. Since those entries have already been
3533C taken into account before the factorization, we multiply
3534C the determinant on the host by the scaling values corresponding
3535C to pivots in PIVNUL_LIST.
3536 IF (id%MYID.EQ.master .AND. lscal. and. keep(258).NE.0) THEN
3537 k = min(keep(143), keep(17))
3538 k = max(k, 0)
3539 DO i = 1, keep(112)+ k
3540c DO I = 1, id%INFOG(28) ! all null pivots + singular values
3542 & id%ROWSCA(id%PIVNUL_LIST(i)),
3543 & id%DKEEP(6), keep(259))
3545 & id%COLSCA(id%PIVNUL_LIST(i)),
3546 & id%DKEEP(6), keep(259))
3547 ENDDO
3548 ENDIF
3549C
3550C 2/ Swap signs depending on pivoting on each proc
3551C
3552 IF (keep(258).NE.0) THEN
3553C Return the determinant in INFOG(34) and RINFOG(12/13)
3554 IF (keep(260).EQ.-1) THEN ! Local to each processor
3555 id%DKEEP(6)=-id%DKEEP(6)
3556 id%DKEEP(7)=-id%DKEEP(7)
3557 ENDIF
3558C
3559C 3/ Perform a reduction
3560C
3562 & id%COMM, id%DKEEP(6), keep(259),
3563 & rinfog(12), infog(34), id%NPROCS)
3564C
3565C 4/ Swap sign if needed
3566C
3567 IF (id%KEEP(50).EQ.0 .AND. id%MYID.EQ. master) THEN
3568C Modify sign of determinant according
3569C to unsymmetric permutation (max-trans
3570C of max-weighted matching)
3571 IF (id%KEEP(23).NE.0) THEN
3573 & rinfog(12), id%N,
3574C id%STEP: used as workspace of size N still
3575C allocated on master; restored on exit
3576 & id%STEP(1),
3577 & id%UNS_PERM(1) )
3578C Remark that RINFOG(12/13) are modified only
3579C on the host but will be broadcast on exit
3580C from MUMPS (see CMUMPS_DRIVER)
3581 ENDIF
3582 ENDIF
3583 ENDIF
3584 490 IF (allocated(itmp2)) DEALLOCATE(itmp2)
3585 IF ( prokg ) THEN
3586C -----------------------------
3587C PRINT STATISTICS (on master)
3588C -----------------------------
3589 WRITE(mpg,99984) rinfog(2),rinfog(3),keep(52),
3590 & id%KEEP8(148),
3591 & id%KEEP8(128), infog(11), id%KEEP8(110)
3592 IF (id%KEEP(50) == 0) THEN
3593 ! off diag pivots
3594 WRITE(mpg, 99985) infog(12)
3595 END IF
3596 IF (id%KEEP(50) .NE. 1) THEN
3597 ! delayed pivots
3598 WRITE(mpg, 99982) infog(13)
3599 END IF
3600 IF (keep(97) .NE. 0) THEN
3601 ! tiny pivots
3602 WRITE(mpg, '(A,D16.4)')
3603 & ' Effective static pivoting thresh., CNTL(4) =', seuil
3604 WRITE(mpg, 99986) infog(25)
3605 ENDIF
3606 IF (id%KEEP(50) == 2) THEN
3607 !number of 2x2 pivots in type 1 nodes
3608 WRITE(mpg, 99988) keep(229)
3609 !number of 2x2 pivots in type 2 nodes
3610 WRITE(mpg, 99989) keep(230)
3611 ENDIF
3612 !number of zero pivots
3613 IF (keep(110) .NE.0) THEN
3614 WRITE(mpg, 99991) keep(112)
3615 ENDIF
3616 !Deficiency on root
3617 IF ( keep(19) .ne. 0 )
3618c IF ( KEEP(17) .ne. 0 )
3619 & WRITE(mpg, 99983) keep(17)
3620 !Total deficiency
3621 IF (keep(110).NE.0.OR.keep(19).NE.0)
3622c IF (KEEP(110).NE.0.OR.KEEP(17).NE.0)
3623 & WRITE(mpg, 99992) keep(17)+keep(112)
3624 ! Memory compress
3625 WRITE(mpg, 99981) infog(14)
3626 ! Extra copies due to ip stack in unsym case
3627 ! in core case (or OLD_OOC_PANEL)
3628 IF (id%KEEP8(108) .GT. 0_8) THEN
3629 WRITE(mpg, 99980) id%KEEP8(108)
3630 ENDIF
3631 IF ((keep(60).NE.0) .AND. infog(25).GT.0) THEN
3632 ! Schur on and tiny pivots set in last level
3633 ! before the Schur if KEEP(114)=0
3634 WRITE(mpg, '(A)')
3635 & " ** Warning Static pivoting was necessary"
3636 WRITE(mpg, '(A)')
3637 & " ** to factor interior variables with Schur ON"
3638 ENDIF
3639 IF (keep(258).NE.0) THEN
3640 WRITE(mpg,99978) rinfog(12)
3641 WRITE(mpg,99979) rinfog(13)
3642 WRITE(mpg,99977) infog(34)
3643 ENDIF
3644 END IF
3645* ==========================================
3646*
3647* End of Factorization Phase
3648*
3649* ==========================================
3650C
3651C Goto 500 is done when
3652C LOAD_INIT
3653C OOC_INIT_FACTO
3654C MUMPS_FDM_INIT
3655#if ! defined(NO_FDM_DESCBAND)
3656C MUMPS_FDBD_INIT
3657#endif
3658#if ! defined(NO_FDM_MAPROW)
3659C MUMPS_FMRD_INIT
3660#endif
3661C are all called.
3662C
3663 500 CONTINUE
3664C Redo free DBLARR (as in end_driver.F)
3665C in case an error occurred after allocating
3666C DBLARR and before freeing it above.
3667 IF (id%KEEP(46).EQ.1 .AND.
3668 & id%KEEP(55).NE.0 .AND.
3669 & id%MYID.EQ.master .AND.
3670 & id%KEEP(52) .EQ. 0) THEN
3671 NULLIFY(id%DBLARR)
3672 ELSE
3673 IF (associated(id%DBLARR)) THEN
3674 DEALLOCATE(id%DBLARR)
3675 NULLIFY(id%DBLARR)
3676 ENDIF
3677 ENDIF
3678#if ! defined(NO_FDM_DESCBAND)
3679 IF (i_am_slave) THEN
3680 CALL mumps_fdbd_end(id%INFO(1)) ! INFO(1): input only
3681 ENDIF
3682#endif
3683#if ! defined(NO_FDM_MAPROW)
3684 IF (i_am_slave) THEN
3685 CALL mumps_fmrd_end(id%INFO(1)) ! INFO(1): input only
3686 ENDIF
3687#endif
3688 IF (i_am_slave) THEN
3689C Terminate BLR module except if it is still needed for solve
3690 IF (
3691 & (
3692 & (keep(486).EQ.2)
3693 & )
3694 & .AND. id%INFO(1).GE.0
3695 & ) THEN
3696C Store pointer to BLR_ARRAY in MUMPS structure
3697C (requires successful factorization otherwise module is freed)
3698 CALL cmumps_blr_mod_to_struc(id%BLRARRAY_ENCODING)
3699 ELSE
3700C INFO(1) positive or negative
3701 CALL cmumps_blr_end_module(id%INFO(1), id%KEEP8, id%KEEP(34))
3702 ENDIF
3703 ENDIF
3704 IF (i_am_slave) THEN
3705 CALL mumps_fdm_end('A')
3706C Terminate BLR module except if it is still needed for solve
3707 IF (
3708 & (
3709 & (keep(486).EQ.2)
3710 & )
3711 & .AND. id%INFO(1).GE.0
3712 & ) THEN
3713 CALL mumps_fdm_mod_to_struc('F', id%FDM_F_ENCODING,
3714 & id%INFO(1))
3715 IF (.NOT. associated(id%FDM_F_ENCODING)) THEN
3716 WRITE(*,*) "Internal error 2 in CMUMPS_FAC_DRIVER"
3717 ENDIF
3718 ELSE
3719 CALL mumps_fdm_end('F')
3720 ENDIF
3721 ENDIF
3722C
3723C Goto 514 is done when an
3724C error occurred in MUMPS_FDM_INIT
3725C or (after FDM_INIT but before
3726C OOC_INIT)
3727C
3728 514 CONTINUE
3729 IF ( i_am_slave ) THEN
3730 IF ((keep(201).EQ.1).OR.(keep(201).EQ.2)) THEN
3731 CALL cmumps_ooc_end_facto(id,ierr)
3732 IF (id%ASSOCIATED_OOC_FILES) THEN
3733 id%ASSOCIATED_OOC_FILES = .false.
3734 ENDIF
3735 IF (ierr.LT.0 .AND. id%INFO(1) .GE. 0) id%INFO(1) = ierr
3736 ENDIF
3737 IF (wk_user_provided) THEN
3738C at the end of a phase S is always freed when WK_USER provided
3739 NULLIFY(id%S)
3740 ELSE IF (keep(201).NE.0) THEN
3741C ----------------------------------------
3742C In OOC or if KEEP(201).EQ.-1 we always
3743C free S at end of factorization. As id%S
3744C may be unassociated in case of error
3745C during or before the allocation of id%S,
3746C we only free S when it was associated.
3747C ----------------------------------------
3748 IF (associated(id%S)) DEALLOCATE(id%S)
3749 NULLIFY(id%S) ! in all cases
3750 id%KEEP8(23)=0_8
3751 ENDIF
3752 ELSE ! host not working
3753 IF (wk_user_provided) THEN
3754C at the end of a phase S is always freed when WK_USER provided
3755 NULLIFY(id%S)
3756 ELSE
3757 IF (associated(id%S)) DEALLOCATE(id%S)
3758 NULLIFY(id%S) ! in all cases
3759 id%KEEP8(23)=0_8
3760 END IF
3761 END IF
3762C
3763C Goto 513 is done in case of error where LOAD_INIT was
3764C called but not OOC_INIT_FACTO.
3765 513 CONTINUE
3766 IF ( i_am_slave ) THEN
3767 CALL cmumps_load_end( id%INFO(1), id%NSLAVES, ierr )
3768 IF (ierr.LT.0 .AND. id%INFO(1) .GE. 0) id%INFO(1) = ierr
3769 ENDIF
3770 CALL mumps_propinfo( icntl(1), id%INFO(1),
3771 & id%COMM, id%MYID )
3772C
3773C Goto 517 is done when an error occurs when GPU initialization
3774C has been performed but not LOAD_INIT or OOC_INIT_FACTO
3775C
3776 517 CONTINUE
3777C
3778C Goto 530 is done when an error occurs before
3779C the calls to GPU_INIT, LOAD_INIT and OOC_INIT_FACTO
3780 530 CONTINUE
3781C Fwd in facto: free RHS_MUMPS in case
3782C it was allocated.
3783 IF (rhs_mumps_allocated) DEALLOCATE(rhs_mumps)
3784 NULLIFY(rhs_mumps)
3785C
3786 id%KEEP8(26) = keep826_save
3787 RETURN
3788 120 FORMAT(/' Local redistrib: data local/sent =',i16,i16)
3789 125 FORMAT(/' Redistrib: total data local/sent =',i16,i16)
3790 130 FORMAT(//'****** FACTORIZATION STEP ********'/)
3791 160 FORMAT(
3792 & /' Elapsed time to reformat/distribute matrix =',f12.4)
3793 166 FORMAT(' Max difference from 1 after scaling the entries',
3794 & ' for ONE-NORM (option 7/8) =',d9.2)
3795 170 FORMAT(' STATISTICS PRIOR NUMERICAL FACTORIZATION ...'/
3796 & ' Size of internal working array S =',i16/
3797 & ' Size of internal working array IS =',i16/
3798 & ' Minimum (ICNTL(14)=0) size of S =',i16/
3799 & ' Minimum (ICNTL(14)=0) size of IS =',i16/
3800 & ' Real space for original matrix =',i16/
3801 & ' Integer space for original matrix =',i16/
3802 & ' INFO(3) Real space for factors (estimated) =',i16/
3803 & ' INFO(4) Integer space for factors (estim.) =',i16/
3804 & ' Maximum frontal size (estimated) =',i16)
3805 172 FORMAT(' GLOBAL STATISTICS PRIOR NUMERICAL FACTORIZATION ...'/
3806 & ' Number of working processes =',i16/
3807 & ' ICNTL(22) Out-of-core option =',i16/
3808 & ' ICNTL(35) BLR activation (eff. choice) =',i16/
3809 & ' ICNTL(14) Memory relaxation =',i16/
3810 & ' INFOG(3) Real space for factors (estimated)=',i16/
3811 & ' INFOG(4) Integer space for factors (estim.)=',i16/
3812 & ' Maximum frontal size (estimated) =',i16/
3813 & ' Number of nodes in the tree =',i16/
3814 & ' ICNTL(23) Memory allowed (value on host) =',i16/
3815 & ' Sum over all procs =',i16/
3816 & ' Memory provided by user, sum of LWK_USER =',i16/
3817 & ' Effective threshold for pivoting, CNTL(1) =',d16.4)
3818 173 FORMAT( ' Perform forward during facto, NRHS =',i16)
3819 174 FORMAT( ' KEEP(268) Relaxed pivoting effective value =',i16)
3820 180 FORMAT(/' Elapsed time for factorization =',f12.4)
3821 185 FORMAT(/' Elapsed time for (failed) factorization =',f12.4)
3822 187 FORMAT( ' Elapsed time under L0 =',f12.4)
3823 188 FORMAT( ' Elapsed time under L0 (avg/max across MPI) =',
3824 & f12.4,f12.4)
3825 189 FORMAT(/' Flops under L0 layer =',1pd12.3)
3826 190 FORMAT(/' Flops under L0 Layer (avg/max across MPI) =',
3827 & 1pd12.3,1pd12.3)
382899977 FORMAT( ' INFOG(34) Determinant (base 2 exponent) =',i16)
382999978 FORMAT( ' RINFOG(12) Determinant (real part) =',f16.8)
383099979 FORMAT( ' RINFOG(12) Determinant (imaginary part) =',f16.8)
383199980 FORMAT( ' Extra copies due to In-Place stacking =',i16)
383299981 FORMAT( ' INFOG(14) Number of memory compress =',i16)
383399982 FORMAT( ' INFOG(13) Number of delayed pivots =',i16)
383499983 FORMAT( ' Nb of singularities detected by ICNTL(56) =',i16)
383599991 FORMAT( ' Nb of null pivots detected by ICNTL(24) =',i16)
383699992 FORMAT( ' INFOG(28) Estimated deficiency =',i16)
383799984 FORMAT(/'Leaving factorization with ...'/
3838 & ' RINFOG(2) Operations in node assembly =',1pd10.3/
3839 & ' ------(3) Operations in node elimination =',1pd10.3/
3840 & ' ICNTL (8) Scaling effectively used =',i16/
3841 & ' INFOG (9) Real space for factors =',i16/
3842 & ' infog(10) Integer space for factors =',I16/
3843 & ' infog(11) maximum front size =',I16/
3844 & ' infog(29) number of entries in factors =',I16)
384599985 FORMAT( ' infog(12) number of off diagonal pivots =',I16)
384699986 FORMAT( ' infog(25) number of tiny pivots(static) =',I16)
384799988 FORMAT( ' number of 2x2 pivots in type 1 nodes =',I16)
384899989 FORMAT( ' number of 2x2 pivots in type 2 nodes =',I16)
#define mumps_abort
Definition VE_Metis.h:25
subroutine mumps_propinfo(icntl, info, comm, id)
subroutine cmumps_facto_send_arrowheads(n, nz, aspk, irn, icn, perm, lscal, colsca, rowsca, myid, slavef, procnode_steps, nbrecords, lp, comm, root, keep, keep8, fils, rg2l, intarr, lintarr, dblarr, ldblarr, ptraiw, ptrarw, frere_steps, step, a, la, istep_to_iniv2, i_am_cand, candidates)
subroutine cmumps_facto_recv_arrowhd2(n, dblarr, ldblarr, intarr, lintarr, ptraiw, ptrarw, keep, keep8, myid, comm, nbrecords, a, la, root, procnode_steps, slavef, perm, frere_steps, step, info1, info2)
subroutine cmumps_free_id_data_modules(id_fdm_f_encoding, id_blrarray_encoding, keep8, k34)
subroutine cmumps_fac_b(n, s_is_pointers, la, liw, sym_perm, na, lna, ne_steps, nfsiz, fils, step, frere, dad, cand, istep_to_iniv2, tab_pos_in_pere, ptrar, ldptrar, ptrist, ptlust_s, ptrfac, iw1, iw2, itloc, rhs_mumps, pool, lpool, cntl1, icntl, info, rinfo, keep, keep8, procnode_steps, slavef, comm_nodes, myid, myid_nodes, bufr, lbufr, lbufr_bytes, cmumps_lbuf, intarr, dblarr, root, nelt, frtptr, frtelt, comm_load, ass_irecv, seuil, seuil_ldlt_niv2, mem_distrib, dkeep, pivnul_list, lpn_list, lrgroups, ipool_b_l0_omp, lpool_b_l0_omp, ipool_a_l0_omp, lpool_a_l0_omp, l_virt_l0_omp, virt_l0_omp, virt_l0_omp_mapping, l_phys_l0_omp, phys_l0_omp, perm_l0_omp, ptr_leafs_l0_omp, l0_omp_mapping, ll0_omp_mapping, thread_la, l0_omp_factors, ll0_omp_factors, i4_l0_omp, nbstats_i4, nbcols_i4, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition cfac_b.F:30
subroutine cmumps_deter_square(deter, nexp)
subroutine cmumps_deter_reduction(comm, deter_in, nexp_in, deter_out, nexp_out, nprocs)
subroutine cmumps_updatedeter_scaling(piv, deter, nexp)
subroutine cmumps_deter_sign_perm(deter, n, visited, perm)
subroutine cmumps_deter_scaling_inverse(deter, nexp)
subroutine cmumps_elt_distrib(n, nelt, na_elt8, comm, myid, slavef, ielptr_loc8, relptr_loc8, eltvar_loc, eltval_loc, lintarr, ldblarr, keep, keep8, maxelt_size, frtptr, frtelt, a, la, fils, id, root)
subroutine cmumps_maxelt_size(eltptr, nelt, maxelt_size)
subroutine cmumps_redistribution(n, nz_loc8, id, dblarr, ldblarr, intarr, lintarr, ptraiw, ptrarw, keep, keep8, myid, comm, nbrecords a, la, root, procnode_steps, slavef, perm, step, icntl, info, nsend8, nlocal8, istep_to_iniv2, candidates)
subroutine cmumps_avgmax_stat8(prokg, mpg, val, nslaves, print_maxavg, comm, msg)
subroutine cmumps_print_allocated_mem(prok, prokg, print_maxavg, mp, mpg, info16, infog18, infog19, nslaves, irank, keep)
subroutine cmumps_extract_schur_redrhs(id)
subroutine cmumps_fac_a(n, nz8, nsca, aspk, irn, icn, colsca, rowsca, wk, lwk8, wk_real, lwk_real, icntl, info)
subroutine cmumps_anorminf(id, anorminf, lscal, eff_size_schur)
subroutine cmumps_simscaleabs(irn_loc, jcn_loc, a_loc, nz_loc, m, n, numprocs, myid, comm, rpartvec, cpartvec, rsndrcvsz, csndrcvsz, registre, iwrk, iwrksz, intsz, resz, op, rowsca, colsca, wrkrc, iszwrkrc, sym, nb1, nb2, nb3, eps, onenormerr, infnormerr)
subroutine cmumps_mem_allowed_set_maxs(maxs, blr_strat, ooc_strat, maxs_estim_relaxed8, keep, keep8, myid, n, nelt, na, lna, nslaves, icntl38, icntl39, iflag, ierror, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition ctools.F:1250
subroutine cmumps_max_mem(keep, keep8, myid, n, nelt, na, lna, nnz8, na_elt8, nslaves, memory_mbytes, eff, ooc_strat, blr_strat, perlu_on, memory_bytes, blr_case, sum_of_peaks, mem_eff_allocated, under_l0_omp, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition ctools.F:638
subroutine cmumps_l0_compute_peak_allowed(myid, n, nelt, na, lna, nslaves, blr_strat, ooc_strat, keep, keep8, iflag, ierror, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition ctools.F:1479
subroutine cmumps_set_blrstrat_and_maxs_k8(maxs_base8, maxs_base_relaxed8, blr_strat, keep, keep8)
Definition ctools.F:1165
subroutine cmumps_mem_allowed_set_k75(maxs, myid, under_l0_omp, n, nelt, na, lna, nslaves, blr_strat, ooc_strat, keep, keep8, iflag, ierror, i8_l0_omp, nbstats_i8, nbcols_i8)
Definition ctools.F:1432
subroutine cmumps_init_root_fac(n, root, fils, iroot, keep, info)
subroutine mpi_comm_split(comm, color, key, comm2, ierr)
Definition mpi.f:272
subroutine mpi_allreduce(sendbuf, recvbuf, cnt, datatype, operation, comm, ierr)
Definition mpi.f:103
subroutine mpi_comm_size(comm, size, ierr)
Definition mpi.f:263
subroutine mpi_bcast(buffer, cnt, datatype, root, comm, ierr)
Definition mpi.f:205
subroutine mpi_gather(sendbuf, cnt, datatype, recvbuf, reccnt, rectype, root, comm, ierr)
Definition mpi.f:56
subroutine mpi_comm_free(comm, ierr)
Definition mpi.f:238
subroutine mpi_comm_rank(comm, rank, ierr)
Definition mpi.f:254
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine, public cmumps_buf_deall_small_buf(ierr)
subroutine, public cmumps_buf_dist_irecv_size(cmumps_lbufr_bytes)
subroutine, public cmumps_buf_ini_myid(myid)
subroutine, public cmumps_buf_deall_max_array()
subroutine, public cmumps_buf_max_array_minsize(nfs4father, ierr)
subroutine, public cmumps_buf_alloc_small_buf(size, ierr)
subroutine, public cmumps_init_l0_omp_factors(id_l0_omp_factors)
subroutine, public cmumps_free_l0_omp_factors(id_l0_omp_factors)
subroutine, public cmumps_load_end(info1, nslaves, ierr)
subroutine, public cmumps_load_set_inicost(cost_subtree_arg, k64, dk15, k375, maxs)
subroutine, public cmumps_load_init(id, memory_md_arg, maxs)
subroutine, public cmumps_load_update(check_flops, process_bande, inc_load, keep, keep8)
subroutine, public cmumps_blr_end_module(info1, keep8, k34, lrsolve_act_opt)
subroutine, public cmumps_blr_mod_to_struc(id_blrarray_encoding)
subroutine, public cmumps_blr_init_module(initial_size, info)
double precision flop_accum_compress
Definition clr_stats.F:38
double precision flop_update_fr
Definition clr_stats.F:24
double precision time_cb_compress
Definition clr_stats.F:58
double precision flop_facto_lr
Definition clr_stats.F:24
double precision time_update_frlr
Definition clr_stats.F:53
double precision flop_midblk_compress
Definition clr_stats.F:38
double precision time_compress
Definition clr_stats.F:55
double precision time_decomp_locasm2
Definition clr_stats.F:72
double precision flop_decompress
Definition clr_stats.F:38
double precision time_decomp
Definition clr_stats.F:69
double precision time_frfronts
Definition clr_stats.F:67
double precision avg_flop_facto_lr
Definition clr_stats.F:81
double precision time_decomp_ucfs
Definition clr_stats.F:70
double precision flop_trsm
Definition clr_stats.F:24
double precision max_flop_facto_lr
Definition clr_stats.F:83
double precision flop_panel
Definition clr_stats.F:24
double precision flop_update_lr
Definition clr_stats.F:24
double precision time_update
Definition clr_stats.F:49
double precision mry_cb_lrgain
Definition clr_stats.F:17
double precision flop_frswap_compress
Definition clr_stats.F:38
double precision time_update_frfr
Definition clr_stats.F:54
double precision flop_cb_compress
Definition clr_stats.F:38
double precision time_frswap_compress
Definition clr_stats.F:57
double precision time_decomp_asm1
Definition clr_stats.F:71
double precision flop_trsm_lr
Definition clr_stats.F:24
double precision flop_trsm_fr
Definition clr_stats.F:24
double precision time_decomp_asms2m
Definition clr_stats.F:75
double precision time_update_lrlr1
Definition clr_stats.F:50
double precision mry_lu_lrgain
Definition clr_stats.F:17
double precision time_midblk_compress
Definition clr_stats.F:56
double precision time_lr_module
Definition clr_stats.F:59
subroutine saveandwrite_gains(local, k489, dkeep, n, icntl36, depth, bcksz, nassmin, nfrontmin, sym, k486, k472, k475, k478, k480, k481, k483, k484, k8110, k849, nbtreenodes, nprocs, mpg, prokg)
Definition clr_stats.F:578
double precision flop_frfronts
Definition clr_stats.F:38
double precision time_fac_sq
Definition clr_stats.F:66
double precision time_fac_mq
Definition clr_stats.F:65
double precision min_flop_facto_lr
Definition clr_stats.F:82
double precision mry_cb_fr
Definition clr_stats.F:17
double precision time_decomp_maplig1
Definition clr_stats.F:73
double precision flop_lrgain
Definition clr_stats.F:24
double precision time_lrtrsm
Definition clr_stats.F:61
double precision flop_update_lrlr3
Definition clr_stats.F:24
double precision time_diagcopy
Definition clr_stats.F:68
double precision time_fac_i
Definition clr_stats.F:64
double precision flop_compress
Definition clr_stats.F:38
double precision time_panel
Definition clr_stats.F:63
subroutine compute_global_gains(nb_entries_factor, flop_number, nb_entries_factor_withlr, prokg, mpg)
Definition clr_stats.F:535
double precision time_update_lrlr2
Definition clr_stats.F:51
double precision time_update_lrlr3
Definition clr_stats.F:52
integer cnt_nodes
Definition clr_stats.F:23
subroutine init_stats_global(id)
Definition clr_stats.F:344
double precision flop_facto_fr
Definition clr_stats.F:24
double precision mry_lu_fr
Definition clr_stats.F:17
double precision time_frtrsm
Definition clr_stats.F:62
double precision flop_cb_decompress
Definition clr_stats.F:38
double precision time_decomp_asms2s
Definition clr_stats.F:74
subroutine cmumps_clean_ooc_data(id, ierr)
Definition cmumps_ooc.F:568
subroutine cmumps_ooc_end_facto(id, ierr)
Definition cmumps_ooc.F:459
subroutine, public cmumps_ooc_init_facto(id, maxs)
Definition cmumps_ooc.F:114
subroutine cmumps_ooc_clean_pending(ierr)
Definition cmumps_ooc.F:446
subroutine, public mumps_fdbd_init(initial_size, info)
subroutine, public mumps_fdbd_end(info1)
subroutine, public mumps_fmrd_init(initial_size, info)
subroutine, public mumps_fmrd_end(info1)
subroutine, public mumps_fdm_mod_to_struc(what, id_fdm_encoding, info)
subroutine, public mumps_fdm_init(what, initial_size, info)
subroutine, public mumps_fdm_end(what)
subroutine static(v, vr, a, ar, ms, in, igrnod, weight_md, wfext)
Definition static.F:33
subroutine mumps_secfin(t)
subroutine mumps_seti8toi4(i8, i)
integer function mumps_get_pool_length(max_active_nodes, keep, keep8)
subroutine mumps_set_ierror(size8, ierror)
subroutine mumps_mem_centralize(myid, comm, info, infog, irank)
subroutine mumps_secdeb(t)
subroutine mumps_npiv_critical_path(n, nsteps, step, frere, fils, na, lna, ne, maxnpivtree)

◆ cmumps_print_allocated_mem()

subroutine cmumps_print_allocated_mem ( logical, intent(in) prok,
logical, intent(in) prokg,
logical, intent(in) print_maxavg,
integer, intent(in) mp,
integer, intent(in) mpg,
integer, intent(in) info16,
integer, intent(in) infog18,
integer, intent(in) infog19,
integer, intent(in) nslaves,
integer, intent(in) irank,
integer, dimension(500), intent(in) keep )

Definition at line 3851 of file cfac_driver.F.

3853 IMPLICIT NONE
3854C
3855C Purpose:
3856C =======
3857C Print memory allocated during factorization
3858C - called at beginning of factorization in full-rank
3859C - called at end of factorization in low-rank (because
3860C of dynamic allocations)
3861C
3862 LOGICAL, INTENT(IN) :: PROK, PROKG, PRINT_MAXAVG
3863 INTEGER, INTENT(IN) :: MP, MPG, INFO16, INFOG18, INFOG19
3864 INTEGER, INTENT(IN) :: IRANK, NSLAVES
3865 INTEGER, INTENT(IN) :: KEEP(500)
3866C
3867 IF ( prokg ) THEN
3868 IF (print_maxavg) THEN
3869 WRITE( mpg,'(A,I12) ')
3870 & ' ** Memory allocated, max in Mbytes (INFOG(18)):',
3871 & infog18
3872 ENDIF
3873 WRITE( mpg,'(/A,I12) ')
3874 & ' ** Memory allocated, total in Mbytes (INFOG(19)):',
3875 & infog19
3876 END IF
3877 RETURN