128 IMPLICIT NONE
129 INTEGER :: INODE, IFATH, IGRANDFATH, SPECIAL_ROOT,
130 & NFRONT, NPIV, LEAF, VARNUM, IERR
131 LOGICAL :: INODE_IS_A_LEAF
132 INTEGER(8) :: NFRONT8, NPIV8
133 INTEGER(8) :: SUM_CB, MAX_MEM
134 DOUBLE PRECISION :: COST_NODE
135 LOGICAL :: IN_L0INIT, SKIP_ABOVE
136 LOGICAL, EXTERNAL :: MUMPS_ROOTSSARBR, MUMPS_IN_OR_ROOT_SSARBR
137 INTEGER, EXTERNAL :: MUMPS_GET_POOL_LENGTH, MUMPS_TYPENODE
138 IF (associated(ipool_b_l0_omp)) THEN
139 WRITE(*,*) " Internal error 1 MUMPS_ANA_INITIALIZE_L0_OMP"
141 ENDIF
142 IF (associated(ipool_a_l0_omp)) THEN
143 WRITE(*,*) " Internal error 2 MUMPS_ANA_INITIALIZE_L0_OMP"
145 ENDIF
146 IF (associated(virt_l0_omp)) THEN
147 WRITE(*,*) " Internal error 3 MUMPS_ANA_INITIALIZE_L0_OMP"
149 ENDIF
150 IF (associated(virt_l0_omp_mapping)) THEN
151 WRITE(*,*) " Internal error 4 MUMPS_ANA_INITIALIZE_L0_OMP"
153 ENDIF
154 IF (associated(perm_l0_omp)) THEN
155 WRITE(*,*) " Internal error 5 MUMPS_ANA_INITIALIZE_L0_OMP"
157 ENDIF
158 IF (associated(ptr_leafs_l0_omp)) THEN
159 WRITE(*,*) " Internal error 6 MUMPS_ANA_INITIALIZE_L0_OMP"
161 ENDIF
162 ierr = idll_create( l0_omp_dll )
163 ierr = idll_create( leafs_above_l0_omp_dll )
164 ALLOCATE( threads_charge( nb_threads ), stat=ierr )
165 IF (ierr .GT. 0) THEN
166 info(1) = -7
167 info(2) = nb_threads
168 IF (lpok) WRITE(lp,150) 'THREADS_CHARGE'
169 GOTO 500
170 ENDIF
171 ALLOCATE( costs_mono_thread( nsteps ), stat=ierr )
172 IF(ierr.GT.0) THEN
173 info(1) = -7
174 info(2) = nsteps
175 IF (lpok) WRITE(lp, 150) ' COSTS_MONO_THREAD'
176 GOTO 500
177 ENDIF
178 ALLOCATE( costs_multi_thread( nsteps ), stat=ierr )
179 IF(ierr.GT.0) THEN
180 info(1) = -7
181 info(2) = nsteps
182 IF (lpok) WRITE(lp, 150) ' COSTS_MULTI_THREAD'
183 GOTO 500
184 ENDIF
185 ALLOCATE( schur_memory( nsteps ), stat=ierr )
186 IF(ierr.GT.0) THEN
187 info(1) = -7
188 info(2) = nsteps
189 IF (lpok) WRITE(lp, 150) ' SCHUR_MEMORY'
190 GOTO 500
191 ENDIF
192 ALLOCATE( subtree_factor_memory( nsteps ), stat=ierr )
193 IF(ierr.GT.0) THEN
194 info(1) = -7
195 info(2) = nsteps
196 IF (lpok) WRITE(lp, 150) ' SCHUR_FACTOR_MEMORY'
197 GOTO 500
198 ENDIF
199 ALLOCATE( subtree_memory( nsteps ), stat=ierr )
200 IF(ierr.GT.0) THEN
201 info(1) = -7
202 info(2) = nsteps
203 IF (lpok) WRITE(lp, 150) ' SUBTREE_MEMORY'
204 GOTO 500
205 ENDIF
206 ALLOCATE( cp_nstk_steps( nsteps ), stat=ierr )
207 IF(ierr.GT.0) THEN
208 info(1) = -7
209 info(2) = nsteps
210 IF (lpok) WRITE(lp, 150) ' CP_NSTK_STEPS'
211 GOTO 500
212 ENDIF
214 ALLOCATE( ipool_b_l0_omp( lpool_b_l0_omp) , stat=ierr )
215 IF(ierr.GT.0) THEN
216 info(1) = -7
217 info(2) = nsteps
218 IF (lpok) WRITE(lp, 150) ' id%IPOOL_B_L0_OMP'
219 GOTO 500
220 ENDIF
221 costs_mono_thread = 0.0d0
222 costs_multi_thread = 0.0d0
223 cost_under = 0.0d0
224 cost_above = 0.0d0
225 cost_total_best = huge(cost_total_best)
226 schur_memory = 0_8
227 subtree_factor_memory = 0_8
228 subtree_memory = 0_8
229 factor_size_under_l0 = 0_8
230 cp_nstk_steps(:) = nstk_steps(:)
231 IF (keep(403).NE.0) THEN
233 ENDIF
235 & myid_nodes,
236 & keep(199), na(1), lna,
237 & keep(1), keep8(1), step(1),
238 & procnode_steps(1),
239 & ipool_b_l0_omp(1), lpool_b_l0_omp)
240 leaf = leaf - 1
241 nbleaf_myid = leaf
242 IF (nbleaf_myid .EQ. 0) THEN
243 RETURN
244 ENDIF
245 90 CONTINUE
246 inode = ipool_b_l0_omp( leaf )
247 leaf = leaf - 1
248 inode_is_a_leaf=.true.
249 95 CONTINUE
250 nfront = nd( step( inode ) )
251 nfront8= int(nfront,8)
252 npiv = 0
253 varnum = inode
254 DO WHILE (varnum .GT. 0 )
255 npiv = npiv + 1
256 varnum = fils( varnum )
257 END DO
258 npiv8=int(npiv,8)
259 varnum = - varnum
260 IF (keep(50) .EQ. 0) THEN
261 schur_memory( step( inode ) ) =
262 & (nfront8 - npiv8)*(nfront8 - npiv8)
263 IF (keep(251) .EQ. 0) THEN
264 subtree_factor_memory( step( inode ) ) = nfront8 * nfront8
265 & - schur_memory( step( inode ) )
266 ELSE
267 subtree_factor_memory( step( inode ) ) = 0_8
268 END IF
269 ELSE
270 schur_memory( step( inode ) ) =
271 & (nfront8 - npiv8)*(nfront8 + 1_8 - npiv8)/2_8
272 IF (keep(251) .EQ. 0) THEN
273 subtree_factor_memory( step( inode ) ) = nfront8 * npiv8
274 ELSE
275 subtree_factor_memory( step( inode ) ) = 0_8
276 END IF
277 END IF
278 sum_cb = 0_8
279 max_mem = 0_8
280 IF (keep(403) .EQ. 0) THEN
282 & sym, 1, cost_node )
283 costs_mono_thread( step( inode ) ) = cost_node
284 ELSE
285 CALL cost_bench (npiv, nfront-npiv, 1, keep(50), cost_node)
286 costs_mono_thread( step( inode ) ) = cost_node
287 CALL cost_bench (npiv,nfront-npiv,nb_threads,keep(50),cost_node)
288 costs_multi_thread( step( inode ) ) = cost_node
289 END IF
290 DO WHILE (varnum .GT. 0 )
291 costs_mono_thread( step( inode ) ) =
292 & costs_mono_thread( step( inode ) )
293 & +
294 & costs_mono_thread( step( varnum ) )
295 max_mem =
max(max_mem,
296 & subtree_memory( step( varnum ) ) + sum_cb )
297 sum_cb = sum_cb + schur_memory( step( varnum ) ) +
298 & subtree_factor_memory( step( varnum ) )
299 subtree_factor_memory( step( inode ) ) =
300 & subtree_factor_memory( step( inode ) )
301 & + subtree_factor_memory( step( varnum ) )
302 varnum = frere( step( varnum ) )
303 END DO
304 subtree_memory( step( inode ) ) =
305 &
max( max_mem, nfront8*nfront8 + sum_cb )
306 ifath = dad( step( inode ) )
307 IF (ifath .NE. 0) THEN
308 igrandfath = dad( step( ifath ) )
309 ELSE
310 igrandfath = 0
311 ENDIF
312 special_root =
max(keep(38), keep(20))
313 skip_above = .false.
314 in_l0init = .false.
315 IF ( inode .EQ. special_root ) THEN
316 in_l0init = .false.
317 IF (inode_is_a_leaf) THEN
318 skip_above = .true.
319 GOTO 80
320 ELSE
321 WRITE(*,*) " Internal error 1 in MUMPS_ANA_INITIALIZE_L0_OMP",
322 & inode, special_root
324 ENDIF
325 ENDIF
326 IF ( ifath .NE. 0 .AND. ifath .EQ. keep(38) ) THEN
327 in_l0init = .false.
328 IF (inode_is_a_leaf) THEN
329 skip_above = .true.
330 GOTO 80
331 ELSE
332 WRITE(*,*) " Internal error 2 in MUMPS_ANA_INITIALIZE_L0_OMP",
333 & inode, ifath, keep(38)
335 ENDIF
336 ENDIF
337 IF ( slavef_during_mapping > 1 ) THEN
339 & procnode_steps( step( inode ) ), keep(199) )
341 & procnode_steps( step( inode ) ), keep(199) )
342 &) THEN
343 in_l0init = .false.
344 IF (inode_is_a_leaf) THEN
345 skip_above = .true.
346 GOTO 80
347 ELSE
348 WRITE(*,*)
349 & " Internal error 3 in MUMPS_ANA_INITIALIZE_L0_OMP",
350 & inode
352 ENDIF
353 ENDIF
354 ENDIF
355 IF (ifath.NE.0) THEN
357 in_l0init = .false.
358 IF (inode_is_a_leaf) THEN
359 skip_above = .true.
360 GOTO 80
361 ELSE
362 WRITE(*,*)
363 & " Internal error 5 in MUMPS_ANA_INITIALIZE_L0_OMP",
364 & inode, ifath
366 ENDIF
367 ENDIF
368 ENDIF
370 in_l0init = .false.
371 IF (inode_is_a_leaf) THEN
372 skip_above = .true.
373 GOTO 80
374 ELSE
375 WRITE(*,*)
376 & " Internal error 6 in MUMPS_ANA_INITIALIZE_L0_OMP",
377 & inode
379 ENDIF
380 ENDIF
381 IF ( ifath .EQ. 0 ) THEN
382 in_l0init = .true.
383 GOTO 80
384 ELSE
385 IF ( ifath .EQ. keep(20) ) THEN
386 in_l0init = .true.
387 GOTO 80
388 ENDIF
389 IF ( igrandfath .EQ. keep(38) .AND. keep(38) .NE. 0 ) THEN
390 in_l0init = .true.
391 GOTO 80
392 ENDIF
393 IF ( slavef_during_mapping > 1 ) THEN
395 & procnode_steps( step( ifath ) ), keep(199) )) THEN
396 in_l0init = .true.
397 GOTO 80
398 ENDIF
399 ENDIF
400 ENDIF
401 80 CONTINUE
402 IF (.NOT. skip_above) THEN
403 IF (keep(50).EQ.0) THEN
404 factor_size_under_l0 = factor_size_under_l0 +
405 & npiv8 * ( nfront8 + nfront8 - npiv8 )
406 ELSE
407 factor_size_under_l0 = factor_size_under_l0 +
408 & nfront8 * npiv8
409 ENDIF
410 ENDIF
411 IF ( in_l0init ) THEN
413 ELSE IF ( skip_above ) THEN
414 ierr = idll_push_back( leafs_above_l0_omp_dll, inode )
415 IF ( .NOT. inode_is_a_leaf ) THEN
416 WRITE(*,*)
417 & " Internal error 7 in MUMPS_ANA_INITIALIZE_L0_OMP",
418 & inode
420 ENDIF
421 ipool_b_l0_omp(leaf+1) = -inode
422 ELSE
423 cp_nstk_steps( step( ifath ) ) =
424 & cp_nstk_steps( step( ifath ) ) - 1
425 IF ( cp_nstk_steps( step( ifath ) ) .EQ. 0 ) THEN
426 inode = ifath
427 inode_is_a_leaf = .false.
428 GOTO 95
429 ENDIF
430 END IF
431 IF ( leaf .GT. 0 ) THEN
432 GOTO 90
433 END IF
434 500 CONTINUE
435 RETURN
436 150 FORMAT(
437 & /' ** ALLOC FAILURE IN MUMPS_ANA_INITIALIZE_L0_OMP FOR ',
438 & a30)
subroutine cost_bench(npiv, nschur, nb_core, sym, cost)
subroutine read_bench(arith, k50)