OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
ana_omp_m.F
Go to the documentation of this file.
1C
2C This file is part of MUMPS 5.5.1, released
3C on Tue Jul 12 13:17:24 UTC 2022
4C
5C
6C Copyright 1991-2022 CERFACS, CNRS, ENS Lyon, INP Toulouse, Inria,
7C Mumps Technologies, University of Bordeaux.
8C
9C This version of MUMPS is provided to you free of charge. It is
10C released under the CeCILL-C license
11C (see doc/CeCILL-C_V1-en.txt, doc/CeCILL-C_V1-fr.txt, and
12C https://cecill.info/licences/Licence_CeCILL-C_V1-en.html)
13C
15 CONTAINS
16 SUBROUTINE mumps_ana_l0_omp( NB_THREADS, N, NSTEPS, SYM, SLAVEF,
17 & DAD, FRERE, FILS, NSTK_STEPS, ND, STEP, PROCNODE_STEPS, KEEP,
18 & KEEP8, MYID_NODES, NA, LNA, ARITH, LPOOL_B_L0_OMP,
19 & IPOOL_B_L0_OMP, LPOOL_A_L0_OMP, IPOOL_A_L0_OMP,
20 & L_VIRT_L0_OMP, VIRT_L0_OMP, VIRT_L0_OMP_MAPPING,
21 & L_PHYS_L0_OMP, PHYS_L0_OMP, PERM_L0_OMP, PTR_LEAFS_L0_OMP,
22 & THREAD_LA, INFO, ICNTL )
23 USE mumps_idll
24 USE mumps_ddll
25 IMPLICIT NONE
26 include 'mpif.h'
27 INTEGER, INTENT ( IN ) :: NB_THREADS, N, NSTEPS, SYM
28 INTEGER, INTENT ( IN ) :: SLAVEF, MYID_NODES
29 INTEGER, INTENT ( IN ) :: LNA
30 INTEGER, INTENT ( IN ) :: DAD (:), FRERE (:)
31 INTEGER, INTENT ( IN ) :: FILS (:)
32 INTEGER, INTENT ( IN ) :: NSTK_STEPS (:)
33 INTEGER, INTENT ( IN ) :: ND (:), STEP (:)
34 INTEGER, INTENT ( IN ) :: PROCNODE_STEPS(:)
35 INTEGER, INTENT ( IN ) :: KEEP ( : )
36 INTEGER(8), INTENT ( IN ) :: KEEP8(:)
37 INTEGER, INTENT ( IN ) :: NA ( : )
38 CHARACTER(1), INTENT(IN) :: ARITH
39 INTEGER, INTENT ( OUT ) :: LPOOL_B_L0_OMP
40 INTEGER, INTENT ( OUT ) :: LPOOL_A_L0_OMP
41 INTEGER, INTENT ( OUT ) :: L_PHYS_L0_OMP
42 INTEGER, INTENT ( OUT ) :: L_VIRT_L0_OMP
43 INTEGER, DIMENSION(:), POINTER :: IPOOL_B_L0_OMP
44 INTEGER, DIMENSION(:), POINTER :: IPOOL_A_L0_OMP
45 INTEGER, DIMENSION(:), POINTER :: PHYS_L0_OMP
46 INTEGER, DIMENSION(:), POINTER :: VIRT_L0_OMP, VIRT_L0_OMP_MAPPING
47 INTEGER, DIMENSION(:), POINTER :: PERM_L0_OMP
48 INTEGER, DIMENSION(:), POINTER :: PTR_LEAFS_L0_OMP
49 INTEGER(8), INTENT ( OUT ) :: THREAD_LA
50 INTEGER, INTENT(INOUT) :: INFO(80)
51 INTEGER, INTENT(IN) :: ICNTL(60)
52 LOGICAL :: LPOK
53 INTEGER :: LP
54 INTEGER :: NB_REPEAT_ACCEPTL0, NB_MAX_IN_L0_ACCEPTL0
55 INTEGER :: THRESH_MEM, SLAVEF_DURING_MAPPING
56 REAL :: THRESH_EQUILIB
57 DOUBLE PRECISION, DIMENSION(1,1,1) :: BENCH
58 INTEGER :: INODE
59 INTEGER :: NBLEAF_MYID
60 DOUBLE PRECISION :: COST_UNDER, COST_ABOVE, COST_TOTAL_BEST
61 DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: THREADS_CHARGE
62 DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: COSTS_MONO_THREAD
63 DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: COSTS_MULTI_THREAD
64 INTEGER(8), DIMENSION(:), ALLOCATABLE :: SCHUR_MEMORY
65 INTEGER(8), DIMENSION(:), ALLOCATABLE :: SUBTREE_FACTOR_MEMORY
66 INTEGER(8), DIMENSION(:), ALLOCATABLE :: SUBTREE_MEMORY
67 INTEGER(8) :: FACTOR_SIZE_UNDER_L0, FACTOR_SIZE_PER_MPI
68 INTEGER, DIMENSION(:), ALLOCATABLE :: CP_NSTK_STEPS
69 TYPE ( IDLL_T ), POINTER :: L0_OMP_DLL
70 TYPE ( IDLL_T ), POINTER :: LEAFS_ABOVE_L0_OMP_DLL
71 INTEGER :: I
72 thresh_equilib = real(keep(408))/real(100)
73 IF ((thresh_equilib.GT..99).OR.(thresh_equilib.LT.0.01)) THEN
74 thresh_equilib = 0.9
75 ENDIF
76 thresh_mem = keep(397)
77 IF ((thresh_mem.LT.-1).OR.(thresh_mem.GT.100)) thresh_mem=100
78 IF (thresh_mem.EQ.-1) THEN
79 IF (nb_threads.EQ.2) THEN
80 thresh_mem = 50
81 ELSEIF (nb_threads.LE.4) THEN
82 thresh_mem = 60
83 ELSEIF (nb_threads.LT.8) THEN
84 thresh_mem = 70
85 ELSEIF (nb_threads.LE.12) THEN
86 thresh_mem = 80
87 ELSEIF (nb_threads.LE.20) THEN
88 thresh_mem = 85
89 ELSEIF (nb_threads.LE.36) THEN
90 thresh_mem = 90
91 ELSE
92 thresh_mem = 95
93 ENDIF
94 ENDIF
95 slavef_during_mapping = slavef
96 factor_size_per_mpi = keep8(101) / slavef_during_mapping
97 IF ( keep(261) .EQ. 0) THEN
98 WRITE(*,*)"KEEP(261) MUST BE SET TO 1 IN ORDER TO USE
99 & MULTITHREADED TREE PARALLELISM"
100 CALL mumps_abort()
101 END IF
102 lp = icntl(1)
103 lpok = ( lp .GT. 0 .AND. icntl(4) .GE.1 )
104 nb_repeat_acceptl0 = -1
105 nb_max_in_l0_acceptl0 = -1
107 IF (info(1) .LT. 0) GOTO 500
10810 CONTINUE
109 DO WHILE ( .NOT. mumps_ana_accept_l0_omp() )
110 IF (info(1) .LT. 0) GOTO 500
111 CALL l0_remove_node ( inode )
112 IF (inode .LT. 0) THEN
113 DO i = 1, lpool_b_l0_omp
114 IF (ipool_b_l0_omp( i ) .EQ. (- inode)) THEN
115 ipool_b_l0_omp( i ) = inode
116 GOTO 10
117 END IF
118 END DO
119 END IF
120 CALL l0_insert_children ( inode )
121 END DO
123 500 CONTINUE
125 RETURN
126 CONTAINS
128 IMPLICIT NONE
129 INTEGER :: INODE, IFATH, IGRANDFATH, SPECIAL_ROOT,
130 & nfront, npiv, leaf, varnum, ierr
131 LOGICAL :: INODE_IS_A_LEAF
132 INTEGER(8) :: NFRONT8, NPIV8
133 INTEGER(8) :: SUM_CB, MAX_MEM
134 DOUBLE PRECISION :: COST_NODE
135 LOGICAL :: IN_L0INIT, SKIP_ABOVE
136 LOGICAL, EXTERNAL :: MUMPS_ROOTSSARBR, MUMPS_IN_OR_ROOT_SSARBR
137 INTEGER, EXTERNAL :: MUMPS_GET_POOL_LENGTH, MUMPS_TYPENODE
138 IF (associated(ipool_b_l0_omp)) THEN
139 WRITE(*,*) " Internal error 1 MUMPS_ANA_INITIALIZE_L0_OMP"
140 CALL mumps_abort()
141 ENDIF
142 IF (associated(ipool_a_l0_omp)) THEN
143 WRITE(*,*) " Internal error 2 MUMPS_ANA_INITIALIZE_L0_OMP"
144 CALL mumps_abort()
145 ENDIF
146 IF (associated(virt_l0_omp)) THEN
147 WRITE(*,*) " Internal error 3 MUMPS_ANA_INITIALIZE_L0_OMP"
148 CALL mumps_abort()
149 ENDIF
150 IF (associated(virt_l0_omp_mapping)) THEN
151 WRITE(*,*) " Internal error 4 MUMPS_ANA_INITIALIZE_L0_OMP"
152 CALL mumps_abort()
153 ENDIF
154 IF (associated(perm_l0_omp)) THEN
155 WRITE(*,*) " Internal error 5 MUMPS_ANA_INITIALIZE_L0_OMP"
156 CALL mumps_abort()
157 ENDIF
158 IF (associated(ptr_leafs_l0_omp)) THEN
159 WRITE(*,*) " Internal error 6 MUMPS_ANA_INITIALIZE_L0_OMP"
160 CALL mumps_abort()
161 ENDIF
162 ierr = idll_create( l0_omp_dll )
163 ierr = idll_create( leafs_above_l0_omp_dll )
164 ALLOCATE( threads_charge( nb_threads ), stat=ierr )
165 IF (ierr .GT. 0) THEN
166 info(1) = -7
167 info(2) = nb_threads
168 IF (lpok) WRITE(lp,150) 'THREADS_CHARGE'
169 GOTO 500
170 ENDIF
171 ALLOCATE( costs_mono_thread( nsteps ), stat=ierr )
172 IF(ierr.GT.0) THEN
173 info(1) = -7
174 info(2) = nsteps
175 IF (lpok) WRITE(lp, 150) ' COSTS_MONO_THREAD'
176 GOTO 500
177 ENDIF
178 ALLOCATE( costs_multi_thread( nsteps ), stat=ierr )
179 IF(ierr.GT.0) THEN
180 info(1) = -7
181 info(2) = nsteps
182 IF (lpok) WRITE(lp, 150) ' COSTS_MULTI_THREAD'
183 GOTO 500
184 ENDIF
185 ALLOCATE( schur_memory( nsteps ), stat=ierr )
186 IF(ierr.GT.0) THEN
187 info(1) = -7
188 info(2) = nsteps
189 IF (lpok) WRITE(lp, 150) ' SCHUR_MEMORY'
190 GOTO 500
191 ENDIF
192 ALLOCATE( subtree_factor_memory( nsteps ), stat=ierr )
193 IF(ierr.GT.0) THEN
194 info(1) = -7
195 info(2) = nsteps
196 IF (lpok) WRITE(lp, 150) ' SCHUR_FACTOR_MEMORY'
197 GOTO 500
198 ENDIF
199 ALLOCATE( subtree_memory( nsteps ), stat=ierr )
200 IF(ierr.GT.0) THEN
201 info(1) = -7
202 info(2) = nsteps
203 IF (lpok) WRITE(lp, 150) ' SUBTREE_MEMORY'
204 GOTO 500
205 ENDIF
206 ALLOCATE( cp_nstk_steps( nsteps ), stat=ierr )
207 IF(ierr.GT.0) THEN
208 info(1) = -7
209 info(2) = nsteps
210 IF (lpok) WRITE(lp, 150) ' CP_NSTK_STEPS'
211 GOTO 500
212 ENDIF
213 lpool_b_l0_omp=mumps_get_pool_length(na(1),keep(1),keep8(1))
214 ALLOCATE( ipool_b_l0_omp( lpool_b_l0_omp) , stat=ierr )
215 IF(ierr.GT.0) THEN
216 info(1) = -7
217 info(2) = nsteps
218 IF (lpok) WRITE(lp, 150) ' id%IPOOL_B_L0_OMP'
219 GOTO 500
220 ENDIF
221 costs_mono_thread = 0.0d0
222 costs_multi_thread = 0.0d0
223 cost_under = 0.0d0
224 cost_above = 0.0d0
225 cost_total_best = huge(cost_total_best)
226 schur_memory = 0_8
227 subtree_factor_memory = 0_8
228 subtree_memory = 0_8
229 factor_size_under_l0 = 0_8
230 cp_nstk_steps(:) = nstk_steps(:)
231 IF (keep(403).NE.0) THEN
232 CALL read_bench( arith, keep(50) )
233 ENDIF
234 CALL mumps_init_pool_dist(n, leaf,
235 & myid_nodes,
236 & keep(199), na(1), lna,
237 & keep(1), keep8(1), step(1),
238 & procnode_steps(1),
239 & ipool_b_l0_omp(1), lpool_b_l0_omp)
240 leaf = leaf - 1
241 nbleaf_myid = leaf
242 IF (nbleaf_myid .EQ. 0) THEN
243 RETURN
244 ENDIF
245 90 CONTINUE
246 inode = ipool_b_l0_omp( leaf )
247 leaf = leaf - 1
248 inode_is_a_leaf=.true.
249 95 CONTINUE
250 nfront = nd( step( inode ) )
251 nfront8= int(nfront,8)
252 npiv = 0
253 varnum = inode
254 DO WHILE (varnum .GT. 0 )
255 npiv = npiv + 1
256 varnum = fils( varnum )
257 END DO
258 npiv8=int(npiv,8)
259 varnum = - varnum
260 IF (keep(50) .EQ. 0) THEN
261 schur_memory( step( inode ) ) =
262 & (nfront8 - npiv8)*(nfront8 - npiv8)
263 IF (keep(251) .EQ. 0) THEN
264 subtree_factor_memory( step( inode ) ) = nfront8 * nfront8
265 & - schur_memory( step( inode ) )
266 ELSE
267 subtree_factor_memory( step( inode ) ) = 0_8
268 END IF
269 ELSE
270 schur_memory( step( inode ) ) =
271 & (nfront8 - npiv8)*(nfront8 + 1_8 - npiv8)/2_8
272 IF (keep(251) .EQ. 0) THEN
273 subtree_factor_memory( step( inode ) ) = nfront8 * npiv8
274 ELSE
275 subtree_factor_memory( step( inode ) ) = 0_8
276 END IF
277 END IF
278 sum_cb = 0_8
279 max_mem = 0_8
280 IF (keep(403) .EQ. 0) THEN
281 CALL mumps_get_flops_cost ( nfront, npiv, npiv,
282 & sym, 1, cost_node )
283 costs_mono_thread( step( inode ) ) = cost_node
284 ELSE
285 CALL cost_bench (npiv, nfront-npiv, 1, keep(50), cost_node)
286 costs_mono_thread( step( inode ) ) = cost_node
287 CALL cost_bench (npiv,nfront-npiv,nb_threads,keep(50),cost_node)
288 costs_multi_thread( step( inode ) ) = cost_node
289 END IF
290 DO WHILE (varnum .GT. 0 )
291 costs_mono_thread( step( inode ) ) =
292 & costs_mono_thread( step( inode ) )
293 & +
294 & costs_mono_thread( step( varnum ) )
295 max_mem = max(max_mem,
296 & subtree_memory( step( varnum ) ) + sum_cb )
297 sum_cb = sum_cb + schur_memory( step( varnum ) ) +
298 & subtree_factor_memory( step( varnum ) )
299 subtree_factor_memory( step( inode ) ) =
300 & subtree_factor_memory( step( inode ) )
301 & + subtree_factor_memory( step( varnum ) )
302 varnum = frere( step( varnum ) )
303 END DO
304 subtree_memory( step( inode ) ) =
305 & max( max_mem, nfront8*nfront8 + sum_cb )
306 ifath = dad( step( inode ) )
307 IF (ifath .NE. 0) THEN
308 igrandfath = dad( step( ifath ) )
309 ELSE
310 igrandfath = 0
311 ENDIF
312 special_root = max(keep(38), keep(20))
313 skip_above = .false.
314 in_l0init = .false.
315 IF ( inode .EQ. special_root ) THEN
316 in_l0init = .false.
317 IF (inode_is_a_leaf) THEN
318 skip_above = .true.
319 GOTO 80
320 ELSE
321 WRITE(*,*) " Internal error 1 in MUMPS_ANA_INITIALIZE_L0_OMP",
322 & inode, special_root
323 CALL mumps_abort()
324 ENDIF
325 ENDIF
326 IF ( ifath .NE. 0 .AND. ifath .EQ. keep(38) ) THEN
327 in_l0init = .false.
328 IF (inode_is_a_leaf) THEN
329 skip_above = .true.
330 GOTO 80
331 ELSE
332 WRITE(*,*) " Internal error 2 in MUMPS_ANA_INITIALIZE_L0_OMP",
333 & inode, ifath, keep(38)
334 CALL mumps_abort()
335 ENDIF
336 ENDIF
337 IF ( slavef_during_mapping > 1 ) THEN
338 IF (mumps_rootssarbr(
339 & procnode_steps( step( inode ) ), keep(199) )
340 & .OR. .NOT. mumps_in_or_root_ssarbr(
341 & procnode_steps( step ( inode ) ), keep(199) )
342 &) THEN
343 in_l0init = .false.
344 IF (inode_is_a_leaf) THEN
345 skip_above = .true.
346 GOTO 80
347 ELSE
348 WRITE(*,*)
349 & " Internal error 3 in MUMPS_ANA_INITIALIZE_L0_OMP",
350 & inode
351 CALL mumps_abort()
352 ENDIF
353 ENDIF
354 ENDIF
355 IF (ifath.NE.0) THEN
356 IF ( mumps_typenode(step(ifath),keep(199)).EQ.2) THEN
357 in_l0init = .false.
358 IF (inode_is_a_leaf) THEN
359 skip_above = .true.
360 GOTO 80
361 ELSE
362 WRITE(*,*)
363 & " Internal error 5 in MUMPS_ANA_INITIALIZE_L0_OMP",
364 & inode, ifath
365 CALL mumps_abort()
366 ENDIF
367 ENDIF
368 ENDIF
369 IF ( mumps_typenode(step(inode),keep(199)).EQ.2) THEN
370 in_l0init = .false.
371 IF (inode_is_a_leaf) THEN
372 skip_above = .true.
373 GOTO 80
374 ELSE
375 WRITE(*,*)
376 & " Internal error 6 in MUMPS_ANA_INITIALIZE_L0_OMP",
377 & inode
378 CALL mumps_abort()
379 ENDIF
380 ENDIF
381 IF ( ifath .EQ. 0 ) THEN
382 in_l0init = .true.
383 GOTO 80
384 ELSE
385 IF ( ifath .EQ. keep(20) ) THEN
386 in_l0init = .true.
387 GOTO 80
388 ENDIF
389 IF ( igrandfath .EQ. keep(38) .AND. keep(38) .NE. 0 ) THEN
390 in_l0init = .true.
391 GOTO 80
392 ENDIF
393 IF ( slavef_during_mapping > 1 ) THEN
394 IF (mumps_rootssarbr(
395 & procnode_steps( step( ifath ) ), keep(199) )) THEN
396 in_l0init = .true.
397 GOTO 80
398 ENDIF
399 ENDIF
400 ENDIF
401 80 CONTINUE
402 IF (.NOT. skip_above) THEN
403 IF (keep(50).EQ.0) THEN
404 factor_size_under_l0 = factor_size_under_l0 +
405 & npiv8 * ( nfront8 + nfront8 - npiv8 )
406 ELSE
407 factor_size_under_l0 = factor_size_under_l0 +
408 & nfront8 * npiv8
409 ENDIF
410 ENDIF
411 IF ( in_l0init ) THEN
412 CALL l0_insert_node ( l0_omp_dll, inode )
413 ELSE IF ( skip_above ) THEN
414 ierr = idll_push_back( leafs_above_l0_omp_dll, inode )
415 IF ( .NOT. inode_is_a_leaf ) THEN
416 WRITE(*,*)
417 & " Internal error 7 in MUMPS_ANA_INITIALIZE_L0_OMP",
418 & inode
419 CALL mumps_abort()
420 ENDIF
421 ipool_b_l0_omp(leaf+1) = -inode
422 ELSE
423 cp_nstk_steps( step( ifath ) ) =
424 & cp_nstk_steps( step( ifath ) ) - 1
425 IF ( cp_nstk_steps( step( ifath ) ) .EQ. 0 ) THEN
426 inode = ifath
427 inode_is_a_leaf = .false.
428 GOTO 95
429 ENDIF
430 END IF
431 IF ( leaf .GT. 0 ) THEN
432 GOTO 90
433 END IF
434 500 CONTINUE
435 RETURN
436 150 FORMAT(
437 & /' ** ALLOC FAILURE IN MUMPS_ANA_INITIALIZE_L0_OMP FOR ',
438 & a30)
439 END SUBROUTINE mumps_ana_initialize_l0_omp
440 SUBROUTINE l0_insert_node ( DLL, INODE )
441 IMPLICIT NONE
442 INTEGER, INTENT ( IN ) :: INODE
443 TYPE ( IDLL_T ), POINTER :: DLL
444 INTEGER :: IERR
445 TYPE ( IDLL_NODE_T ), POINTER :: IDLL_NODE
446 ierr = idll_iterator_begin( dll, idll_node )
447 DO WHILE ( associated ( idll_node ) )
448 IF ( costs_mono_thread( step( idll_node%ELMT ) )
449 & .GT.
450 & costs_mono_thread( step( inode ) ) ) THEN
451 idll_node => idll_node%NEXT
452 ELSE
453 EXIT
454 END IF
455 END DO
456 IF ( .NOT. associated ( idll_node ) ) THEN
457 ierr = idll_push_back(dll, inode)
458 ELSE
459 ierr = idll_insert_before(dll, idll_node, inode)
460 ENDIF
461 RETURN
462 END SUBROUTINE l0_insert_node
463 SUBROUTINE l0_insert_children ( I_FATHER )
464 IMPLICIT NONE
465 INTEGER, INTENT ( IN ) :: I_FATHER
466 INTEGER :: I_SON, IERR
467 TYPE ( IDLL_T ), POINTER :: SON_DLL
468 TYPE ( IDLL_NODE_T ), POINTER :: IDLL_NODE
469 ierr = idll_create( son_dll )
470 i_son = i_father
471 DO WHILE ( i_son .GT. 0 )
472 i_son = fils( i_son )
473 END DO
474 i_son = - i_son
475 IF ( i_son .EQ. 0 ) THEN
476 RETURN
477 END IF
478 DO WHILE ( i_son .GT. 0 )
479 CALL l0_insert_node ( son_dll, i_son )
480 i_son = frere( step( i_son ) )
481 END DO
482 ierr = idll_iterator_begin( l0_omp_dll, idll_node )
483 ierr = idll_pop_front( son_dll, i_son )
484 IF ( ierr .NE. 0 ) THEN
485 GOTO 190
486 END IF
487 IF ( .NOT. associated( idll_node ) ) THEN
488 DO
489 ierr = idll_push_back( l0_omp_dll, i_son )
490 ierr = idll_pop_front( son_dll, i_son )
491 IF ( ierr .NE. 0 ) THEN
492 GOTO 190
493 END IF
494 END DO
495 ELSE
496 DO
497 IF ( costs_mono_thread( step( i_son )) .LE.
498 & costs_mono_thread( step( idll_node%ELMT ) ) ) THEN
499 IF ( associated ( idll_node%NEXT ) ) THEN
500 idll_node => idll_node%NEXT
501 ELSE
502 ierr = idll_push_back(l0_omp_dll, i_son)
503 ierr = idll_pop_front( son_dll, i_son )
504 IF ( ierr .NE. 0 ) THEN
505 GOTO 190
506 END IF
507 END IF
508 ELSE
509 ierr = idll_insert_before(l0_omp_dll, idll_node,i_son)
510 ierr = idll_pop_front( son_dll, i_son )
511 IF ( ierr .NE. 0 ) THEN
512 GOTO 190
513 END IF
514 END IF
515 END DO
516 END IF
517190 CONTINUE
518 ierr = idll_destroy( son_dll )
519 RETURN
520 END SUBROUTINE l0_insert_children
521 SUBROUTINE l0_remove_node ( INODE )
522 IMPLICIT NONE
523 INTEGER, INTENT ( OUT ) :: INODE
524 INTEGER :: I_SON, IERR, NPIV
525 ierr = idll_pop_front( l0_omp_dll, inode )
526 i_son = inode
527 npiv = 0
528 DO WHILE ( i_son .GT. 0 )
529 npiv = npiv + 1
530 i_son = fils( i_son )
531 END DO
532 i_son = - i_son
533 IF (keep(50) .EQ. 0) THEN
534 factor_size_under_l0 = factor_size_under_l0 -
535 & int(npiv, 8) * int(2 * nd(step(inode)) - npiv, 8)
536 ELSE
537 factor_size_under_l0 = factor_size_under_l0 -
538 & int(npiv, 8) * int(nd(step(inode)), 8)
539 ENDIF
540 IF ( i_son .EQ. 0 ) THEN
541 ierr = idll_push_back( leafs_above_l0_omp_dll, inode )
542 inode = -inode
543 ELSE IF (inode .GT. 0) THEN
544 cost_above = cost_above + costs_multi_thread(step( inode ))
545 END IF
546 RETURN
547 END SUBROUTINE l0_remove_node
549 LOGICAL :: mumps_ana_accept_l0_omp
550 INTEGER :: i, i_less_charged, IERR, nb_in_l0
551 DOUBLE PRECISION :: lightest_charge, heaviest_charge
552 TYPE ( idll_node_t ), POINTER :: idll_node
553 threads_charge = 0.0d0
554 nb_in_l0 = 0
555 ierr = idll_iterator_begin( l0_omp_dll, idll_node )
556 DO WHILE ( associated ( idll_node ) )
557 nb_in_l0 = nb_in_l0 + 1
558 i_less_charged = 1
559 lightest_charge = threads_charge( 1 )
560 DO i = 2, nb_threads
561 IF ( threads_charge( i ) .LT. lightest_charge ) THEN
562 i_less_charged = i
563 lightest_charge = threads_charge( i )
564 END IF
565 END DO
566 threads_charge( i_less_charged ) =
567 & threads_charge( i_less_charged )
568 & +
569 & costs_mono_thread( step( idll_node%ELMT ) )
570 idll_node => idll_node%NEXT
571 END DO
572 nb_max_in_l0_acceptl0 = max(nb_max_in_l0_acceptl0, nb_in_l0)
573 lightest_charge = threads_charge( 1 )
574 heaviest_charge = threads_charge( 1 )
575 DO i = 2, nb_threads
576 IF ( threads_charge( i ) .LT. lightest_charge ) THEN
577 lightest_charge = threads_charge( i )
578 ELSEIF ( threads_charge( i ) .GT. heaviest_charge ) THEN
579 heaviest_charge = threads_charge( i )
580 END IF
581 END DO
582 cost_under = heaviest_charge
583 IF (keep(403) .EQ. 0) THEN
585 & (
586 & dble(lightest_charge)/(dble(heaviest_charge)+1.d-12)
587 & .GT.thresh_equilib .AND.
588 &
589 & factor_size_under_l0 .LE.
590 & factor_size_per_mpi * int(thresh_mem,8) / 100_8
591 &
592 & )
593 & .OR.
594 & ( nb_in_l0 .LT. nb_max_in_l0_acceptl0 .AND.
595 & lightest_charge .EQ. 0.0d0 )
596 & .OR. ( nb_in_l0 .EQ. 0 )
598 IF (associated(phys_l0_omp)) THEN
599 DEALLOCATE(phys_l0_omp)
600 nullify(phys_l0_omp)
601 ENDIF
602 ierr = idll_2_array( l0_omp_dll, phys_l0_omp, l_phys_l0_omp )
603 IF (ierr .EQ. -2) THEN
604 info(1) = -7
605 info(2) = l_phys_l0_omp
606 RETURN
607 ENDIF
608 END IF
609 ELSE
610 IF (cost_under + cost_above .LT. cost_total_best) THEN
611 IF (associated(phys_l0_omp)) THEN
612 DEALLOCATE(phys_l0_omp)
613 nullify(phys_l0_omp)
614 ENDIF
615 ierr = idll_2_array( l0_omp_dll, phys_l0_omp, l_phys_l0_omp )
616 cost_total_best = cost_under + cost_above
617 nb_repeat_acceptl0 = 100
618 END IF
619 nb_repeat_acceptl0 = nb_repeat_acceptl0- 1
620 mumps_ana_accept_l0_omp = (nb_repeat_acceptl0 .EQ. 0)
621 END IF
622 RETURN
623 END FUNCTION mumps_ana_accept_l0_omp
625 IMPLICIT NONE
626 INTEGER :: INODE, OLD_INODE, I, J, K, LEAF, IERR
627 DOUBLE PRECISION :: LIGHTEST_CHARGE
628 INTEGER :: I_LESS_CHARGED
629 INTEGER(8) :: SUM_CB, MAX_MEM, MAX_MEM_ALL_THREADS
630 INTEGER :: MAX_TASK_PER_THREAD
631 TYPE ( IDLL_NODE_T ), POINTER :: IDLL_NODE
632 INTEGER, DIMENSION(:,:), ALLOCATABLE :: THREADS_TASKS
633 INTEGER, DIMENSION(:), ALLOCATABLE :: NB_TASK_PER_THREAD
634 INTEGER, DIMENSION(:), ALLOCATABLE :: INV_PERM_L0_OMP
635 EXTERNAL :: mumps_get_pool_length
636 INTEGER :: MUMPS_GET_POOL_LENGTH
637 IF (keep(402) .EQ. 0) THEN
638 l_virt_l0_omp = nb_threads + 1
639 ELSE
640 l_virt_l0_omp = l_phys_l0_omp + 1
641 END IF
642 lpool_a_l0_omp = mumps_get_pool_length(na(1),keep(1),keep8(1))
643 ALLOCATE ( virt_l0_omp( max(l_virt_l0_omp,1) ),
644 & virt_l0_omp_mapping( max(l_virt_l0_omp,1) ),
645 & stat=ierr )
646 IF(ierr.GT.0) THEN
647 info(1)=-7
648 info(2)=2*max(l_virt_l0_omp,1)
649 IF (lpok) WRITE(lp,150) 'id%VIRT_L0_OMP[_MAPPING]'
650 GOTO 300
651 ENDIF
652 ALLOCATE ( perm_l0_omp( max(l_phys_l0_omp,1) ), stat=ierr )
653 IF(ierr.GT.0) THEN
654 info(1)=-7
655 info(2)=max(l_phys_l0_omp,1)
656 IF (lpok) WRITE(lp,150) 'id%PERM_L0_OMP'
657 GOTO 300
658 ENDIF
659 ALLOCATE ( ptr_leafs_l0_omp( l_phys_l0_omp + 1 ), stat=ierr )
660 IF(ierr.GT.0) THEN
661 info(1)=-7
662 info(2)=max(l_phys_l0_omp,1)
663 IF (lpok) WRITE(lp,150) 'id%PTR_LEAFS_L0_OMP'
664 GOTO 300
665 ENDIF
666 ALLOCATE ( ipool_a_l0_omp( lpool_a_l0_omp ), stat=ierr )
667 IF(ierr.GT.0) THEN
668 info(1)=-7
669 info(2)=lpool_a_l0_omp
670 IF (lpok) WRITE(lp,150) 'id%IPOOL_A_L0_OMP'
671 GOTO 300
672 ENDIF
673 ALLOCATE ( nb_task_per_thread( nb_threads ), stat=ierr )
674 IF(ierr.GT.0) THEN
675 info(1)=-7
676 info(2)=nb_threads
677 IF (lpok) WRITE(lp,150) 'NB_TASK_PER_THREAD'
678 GOTO 300
679 ENDIF
680 ALLOCATE ( inv_perm_l0_omp( l_phys_l0_omp ), stat=ierr )
681 IF(ierr.GT.0) THEN
682 WRITE(*,*) "Allocation Error in MUMPS_ANA_FINALIZE_L0_OMP"
683 CALL mumps_abort()
684 ENDIF
685 nb_task_per_thread = 0
686 threads_charge = 0.0d0
687 DO i = 1, l_phys_l0_omp
688 i_less_charged = 1
689 lightest_charge = threads_charge( 1 )
690 DO j = 2, nb_threads
691 IF ( threads_charge( j ) .LT. lightest_charge ) THEN
692 i_less_charged = j
693 lightest_charge = threads_charge( j )
694 IF (threads_charge( j ) .EQ. 0) THEN
695 EXIT
696 ENDIF
697 END IF
698 END DO
699 nb_task_per_thread( i_less_charged ) =
700 & nb_task_per_thread( i_less_charged ) + 1
701 IF (keep(402) .NE. 0) THEN
702 virt_l0_omp_mapping(i) = i_less_charged
703 ENDIF
704 threads_charge( i_less_charged ) =
705 & threads_charge( i_less_charged )
706 & +
707 & costs_mono_thread( step( phys_l0_omp( i ) ) )
708 END DO
709 IF (keep(402) .EQ. 0) THEN
710 DO i = 1, nb_threads
711 virt_l0_omp_mapping(i) = i
712 ENDDO
713 ENDIF
714 virt_l0_omp_mapping(l_virt_l0_omp) = -999999
715 max_task_per_thread = 0
716 DO i = 1, nb_threads
717 max_task_per_thread = max(max_task_per_thread,
718 & nb_task_per_thread( i ) )
719 END DO
720 ALLOCATE ( threads_tasks( nb_threads, max_task_per_thread ),
721 & stat=ierr )
722 IF(ierr.GT.0) THEN
723 info(1)=-7
724 info(2)=nb_threads*max_task_per_thread
725 IF (lpok) WRITE(lp,150) 'THREADS_TASK'
726 GOTO 300
727 ENDIF
728 nb_task_per_thread = 0
729 threads_charge = 0.0d0
730 threads_tasks = 0
731 DO i = 1, l_phys_l0_omp
732 i_less_charged = 1
733 lightest_charge = threads_charge( 1 )
734 DO j = 2, nb_threads
735 IF ( threads_charge( j ) .LT. lightest_charge ) THEN
736 i_less_charged = j
737 lightest_charge = threads_charge( j )
738 END IF
739 END DO
740 nb_task_per_thread( i_less_charged ) =
741 & nb_task_per_thread( i_less_charged ) + 1
742 threads_tasks( i_less_charged, nb_task_per_thread
743 & ( i_less_charged ) ) = phys_l0_omp( i )
744 threads_charge( i_less_charged ) =
745 & threads_charge( i_less_charged )
746 & +
747 & costs_mono_thread( step( phys_l0_omp( i ) ) )
748 END DO
749 max_mem_all_threads = 0_8
750 DO i = 1, nb_threads
751 sum_cb = 0_8
752 max_mem = 0_8
753 DO j = 1, nb_task_per_thread( i )
754 max_mem = max( max_mem, subtree_memory( step(
755 & threads_tasks(i,j) ) ) + sum_cb )
756 sum_cb = sum_cb
757 & +schur_memory(step(threads_tasks(i,j)))
758 & +subtree_factor_memory(
759 & step(threads_tasks(i,j)))
760 END DO
761 max_mem = max( max_mem, sum_cb )
762 IF (keep(402) .EQ. 0) THEN
763 threads_charge( i ) = dble(max_mem)
764 END IF
765 max_mem_all_threads = max( max_mem_all_threads, max_mem )
766 END DO
767 max_mem_all_threads = ( max_mem_all_threads
768 & * int(100 + keep(12),8) ) / 100_8
769 thread_la = max(max_mem_all_threads,6_8)
770 IF (keep(402) .EQ. 0) THEN
771 k = 1
772 DO i = 1, nb_threads
773 virt_l0_omp(i) = k
774 DO j = 1, nb_task_per_thread( i )
775 phys_l0_omp(k) = threads_tasks(i,j)
776 k = k + 1
777 END DO
778 END DO
779 virt_l0_omp(nb_threads+1) = k
780 ELSE
781 DO i = 1, l_virt_l0_omp
782 virt_l0_omp(i) = i
783 END DO
784 END IF
785 DO i = 1, l_phys_l0_omp
786 inv_perm_l0_omp( i ) = i
787 END DO
788 IF ( l_phys_l0_omp .GT. 1 ) THEN
789 CALL mumps_quick_sort_phys_l0( n, step(1), phys_l0_omp(1),
790 & inv_perm_l0_omp, l_phys_l0_omp, 1, l_phys_l0_omp )
791 ENDIF
792 DO i = 1, l_phys_l0_omp
793 perm_l0_omp( inv_perm_l0_omp( i ) ) = i
794 END DO
795 j = nbleaf_myid
796 ptr_leafs_l0_omp( 1 ) = j
797 DO i = 1, l_phys_l0_omp
798 old_inode = 0
799 inode = phys_l0_omp( i )
800 DO WHILE ( inode .NE. 0 )
801 old_inode = inode
802 DO WHILE ( inode .GT. 0 )
803 inode = fils( inode )
804 END DO
805 inode = - inode
806 END DO
807 DO WHILE ( ipool_b_l0_omp( j ) .NE. old_inode )
808 j = j - 1
809 END DO
810 j = j - 1
811 ptr_leafs_l0_omp( i + 1 ) = j
812 END DO
813 cp_nstk_steps(:) = nstk_steps(:)
814 ipool_a_l0_omp = 0
815 leaf = 1
816 ierr = idll_iterator_begin( leafs_above_l0_omp_dll, idll_node )
817 DO WHILE ( associated( idll_node ) )
818 ipool_a_l0_omp( leaf ) = idll_node%ELMT
819 leaf = leaf + 1
820 idll_node => idll_node%NEXT
821 END DO
822 DO i = 1 , l_phys_l0_omp
823 IF ( dad( step( phys_l0_omp(i) ) ) .NE. 0 ) THEN
824 cp_nstk_steps( step( dad( step( phys_l0_omp(i) ) ) ) ) =
825 & cp_nstk_steps( step( dad( step( phys_l0_omp(i) ) ) ) )-1
826 IF (cp_nstk_steps(step(dad(step(phys_l0_omp(i))))) .EQ. 0)THEN
827 ipool_a_l0_omp( leaf ) = dad(step(phys_l0_omp( i )))
828 leaf = leaf + 1
829 END IF
830 END IF
831 END DO
832 leaf = leaf - 1
833 ipool_a_l0_omp(lpool_a_l0_omp) = leaf
834 ipool_a_l0_omp(lpool_a_l0_omp-1) = 0
835 ipool_a_l0_omp(lpool_a_l0_omp-2) = 0
836 IF (leaf .GT. 1) THEN
837 CALL mumps_quick_sort_ipool_po( n, step(1),
838 & ipool_a_l0_omp(1), leaf, 1, leaf )
839 ENDIF
840 300 CONTINUE
841 IF (allocated(nb_task_per_thread)) DEALLOCATE (nb_task_per_thread)
842 IF (allocated(inv_perm_l0_omp )) DEALLOCATE ( inv_perm_l0_omp )
843 IF (allocated(threads_tasks )) DEALLOCATE (threads_tasks )
844 RETURN
845 150 FORMAT(
846 & /' ** ALLOC FAILURE IN MUMPS_ANA_FINALIZE_L0_OMP FOR ',
847 & a30)
848 END SUBROUTINE mumps_ana_finalize_l0_omp
850 INTEGER :: IERR
851 IF (allocated(threads_charge)) DEALLOCATE(threads_charge )
852 IF (allocated(cp_nstk_steps )) DEALLOCATE(cp_nstk_steps )
853 IF (allocated(costs_mono_thread)) DEALLOCATE(costs_mono_thread )
854 IF (allocated(costs_multi_thread)) DEALLOCATE(costs_multi_thread)
855 IF (allocated(schur_memory)) DEALLOCATE(schur_memory )
856 IF (allocated(subtree_factor_memory))
857 & DEALLOCATE(subtree_factor_memory)
858 IF (allocated(subtree_memory)) DEALLOCATE(subtree_memory )
859 ierr = idll_destroy( leafs_above_l0_omp_dll )
860 ierr = idll_destroy( l0_omp_dll )
861 RETURN
862 END SUBROUTINE mumps_ana_free_l0_workspace
863 SUBROUTINE read_bench(ARITH, K50)
864 IMPLICIT NONE
865 INTEGER, INTENT(in) :: K50
866 CHARACTER(1), INTENT(in) :: ARITH
867 INTEGER NLINES, INDEX_NPIV, INDEX_NSCHUR, NB_CORE
868 INTEGER V, S, OLD_V, OLD_S, I
869 parameter(nlines=2812)
870 DOUBLE PRECISION :: AUX
871 CHARACTER(1) :: K50_STR
872 index_npiv = 0
873 index_nschur = 0
874 old_v = -1
875 old_s = -1
876 WRITE(k50_str,'(I1)') k50
877 OPEN(1,file=arith//'benchmark_sym_'//k50_str//'.csv')
878 DO I=1,NLINES
879 READ(1,*) V, S, NB_CORE, AUX
880.NE. IF (V OLD_V) THEN
881 INDEX_NPIV = INDEX_NPIV + 1
882 OLD_V = V
883 END IF
884.GT. IF (S OLD_S) THEN
885 INDEX_NSCHUR = INDEX_NSCHUR + 1
886 OLD_S = S
887.LT. ELSEIF (S OLD_S) THEN
888 INDEX_NSCHUR = 1
889 OLD_S = S
890 END IF
891 BENCH (INDEX_NPIV, INDEX_NSCHUR, NB_CORE) = AUX
892 END DO
893 CLOSE(1)
894 RETURN
895 END SUBROUTINE READ_BENCH
896 SUBROUTINE COST_BENCH (NPIV, NSCHUR, NB_CORE, SYM, COST)
897 IMPLICIT NONE
898 INTEGER, INTENT(IN) :: NPIV, NSCHUR, NB_CORE, SYM
899 DOUBLE PRECISION, INTENT(OUT) :: COST
900 INTEGER V, VV, S, SS
901 INTEGER LOW_INDEX_NPIV, LOW_INDEX_NSCHUR
902 INTEGER HIGH_INDEX_NPIV, HIGH_INDEX_NSCHUR
903 DOUBLE PRECISION :: APROX_COST_FLOPS, REAL_COST_FLOPS
904.LE. IF (NPIV 10) THEN
905 LOW_INDEX_NPIV = NPIV
906 V = NPIV
907 VV = NPIV + 1
908.LE. ELSEIF (NPIV 100) THEN
909 LOW_INDEX_NPIV = 9 + NPIV/10
910 V = (NPIV/10)*10
911 VV = (NPIV/10+1)*10
912.LE. ELSEIF (NPIV 1000) THEN
913 LOW_INDEX_NPIV = 18 + NPIV/100
914 V = (NPIV/100)*100
915 VV = (NPIV/100+1)*100
916.LE. ELSEIF (NPIV 10000) THEN
917 LOW_INDEX_NPIV = 27 + NPIV/1000
918 V = (NPIV/1000)*1000
919 VV = (NPIV/1000+1)*1000
920 ELSE
921 LOW_INDEX_NPIV = 37
922 V = (NPIV/10000)*10000
923 VV = (NPIV/10000+1)*10000
924 END IF
925.LE. IF (NSCHUR 10) THEN
926 LOW_INDEX_NSCHUR = NSCHUR + 1
927 S = NSCHUR
928 SS = NSCHUR + 1
929.LE. ELSEIF (NSCHUR 100) THEN
930 LOW_INDEX_NSCHUR = 10 + NSCHUR/10
931 S = (NSCHUR/10)*10
932 SS = (NSCHUR/10+1)*10
933.LE. ELSEIF (NSCHUR 1000) THEN
934 LOW_INDEX_NSCHUR = 19 + NSCHUR/100
935 S = (NSCHUR/100)*100
936 SS = (NSCHUR/100+1)*100
937.LE. ELSEIF (NSCHUR 10000) THEN
938 LOW_INDEX_NSCHUR = 28 + NSCHUR/1000
939 S = (NSCHUR/1000)*1000
940 SS = (NSCHUR/1000+1)*1000
941 ELSE
942 LOW_INDEX_NSCHUR = 38
943 S = (NSCHUR/10000)*10000
944 SS = (NSCHUR/10000+1)*10000
945 END IF
946.LT. IF (V 10000) THEN
947.LT. IF (S 10000) THEN
948 HIGH_INDEX_NPIV = LOW_INDEX_NPIV + 1
949 HIGH_INDEX_NSCHUR = LOW_INDEX_NSCHUR + 1
950 COST = (BENCH(LOW_INDEX_NPIV, LOW_INDEX_NSCHUR, NB_CORE)
951 & *(VV - NPIV)*(SS - NSCHUR)
952 & +BENCH(LOW_INDEX_NPIV, HIGH_INDEX_NSCHUR, NB_CORE)
953 & *(VV - NPIV)*(NSCHUR - S)
954 & +BENCH(HIGH_INDEX_NPIV, LOW_INDEX_NSCHUR, NB_CORE)
955 & *(NPIV - V)*(SS - NSCHUR)
956 & +BENCH(HIGH_INDEX_NPIV, HIGH_INDEX_NSCHUR, NB_CORE)
957 & *(NPIV - V)*(NSCHUR - S))
958 & /((VV - V)*(SS - S))
959 ELSE
960 HIGH_INDEX_NPIV = LOW_INDEX_NPIV + 1
961 HIGH_INDEX_NSCHUR = LOW_INDEX_NSCHUR
962 COST = (BENCH(LOW_INDEX_NPIV, LOW_INDEX_NSCHUR, NB_CORE)
963 & *(VV - NPIV)
964 & +BENCH(HIGH_INDEX_NPIV, LOW_INDEX_NSCHUR, NB_CORE)
965 & *(NPIV - V))
966 & /(VV - V)
967 CALL MUMPS_GET_FLOPS_COST ( NPIV+NSCHUR, NPIV, NPIV,
968 & SYM, 1, REAL_COST_FLOPS )
969 CALL MUMPS_GET_FLOPS_COST ( V+S, V, V,
970 & SYM, 1, APROX_COST_FLOPS )
971 COST = COST * (REAL_COST_FLOPS/APROX_COST_FLOPS)
972 END IF
973 ELSE
974 IF (NSCHUR < 10000) THEN
975 HIGH_INDEX_NPIV = LOW_INDEX_NPIV
976 HIGH_INDEX_NSCHUR = LOW_INDEX_NSCHUR + 1
977 COST = (BENCH(LOW_INDEX_NPIV, LOW_INDEX_NSCHUR, NB_CORE)
978 & *(SS - NSCHUR)
979 & +BENCH(LOW_INDEX_NPIV, HIGH_INDEX_NSCHUR, NB_CORE)
980 & *(NSCHUR - S))
981 & /(SS - S)
982 CALL MUMPS_GET_FLOPS_COST ( NPIV+NSCHUR, NPIV, NPIV,
983 & SYM, 1, REAL_COST_FLOPS )
984 CALL MUMPS_GET_FLOPS_COST ( V+S, V, V,
985 & SYM, 1, APROX_COST_FLOPS )
986 COST = COST * (REAL_COST_FLOPS/APROX_COST_FLOPS)
987 ELSE
988 HIGH_INDEX_NPIV = LOW_INDEX_NPIV
989 HIGH_INDEX_NSCHUR = LOW_INDEX_NSCHUR
990 COST = (BENCH(LOW_INDEX_NPIV, LOW_INDEX_NSCHUR, NB_CORE))
991 CALL MUMPS_GET_FLOPS_COST ( NPIV+NSCHUR, NPIV, NPIV,
992 & SYM, 1, REAL_COST_FLOPS )
993 CALL MUMPS_GET_FLOPS_COST ( V+S, V, V,
994 & SYM, 1, APROX_COST_FLOPS )
995 COST = COST * (REAL_COST_FLOPS/APROX_COST_FLOPS)
996 END IF
997 END IF
998 END SUBROUTINE COST_BENCH
999 END SUBROUTINE MUMPS_ANA_L0_OMP
1000 END MODULE MUMPS_ANA_OMP_M
1001 RECURSIVE SUBROUTINE MUMPS_QUICK_SORT_IPOOL_PO( N, STEP,
1002 & INTLIST, TAILLE, LO, HI )
1003 IMPLICIT NONE
1004 INTEGER N, TAILLE
1005 INTEGER STEP( N )
1006 INTEGER INTLIST( TAILLE )
1007 INTEGER LO, HI
1008 INTEGER I,J
1009 INTEGER ISWAP, PIVOT
1010 I = LO
1011 J = HI
1012 PIVOT = STEP(INTLIST((I+J)/2))
1013 10 IF (STEP(INTLIST(I)) > PIVOT) THEN
1014 I=I+1
1015 GOTO 10
1016 ENDIF
1017 20 IF (STEP(INTLIST(J)) < PIVOT) THEN
1018 J=J-1
1019 GOTO 20
1020 ENDIF
1021 IF (I < J) THEN
1022 ISWAP = INTLIST(I)
1023 INTLIST(I) = INTLIST(J)
1024 INTLIST(J)=ISWAP
1025 ENDIF
1026 IF ( I <= J) THEN
1027 I = I+1
1028 J = J-1
1029 ENDIF
1030 IF ( I <= J ) GOTO 10
1031 IF ( LO < J ) CALL MUMPS_QUICK_SORT_IPOOL_PO(N, STEP,
1032 & INTLIST, TAILLE, LO, J)
1033 IF ( I < HI ) CALL MUMPS_QUICK_SORT_IPOOL_PO(N, STEP,
1034 & INTLIST, TAILLE, I, HI)
1035 RETURN
1036 END SUBROUTINE MUMPS_QUICK_SORT_IPOOL_PO
1037 RECURSIVE SUBROUTINE MUMPS_QUICK_SORT_PHYS_L0( N, STEP,
1038 & INTLIST, INVPERM, TAILLE, LO, HI )
1039 IMPLICIT NONE
1040 INTEGER N, TAILLE
1041 INTEGER STEP( N )
1042 INTEGER INTLIST( TAILLE )
1043 INTEGER INVPERM( TAILLE )
1044 INTEGER LO, HI
1045 INTEGER I,J
1046 INTEGER ISWAP, PIVOT
1047 INTEGER dswap
1048 I = LO
1049 J = HI
1050 PIVOT = STEP(INTLIST((I+J)/2))
1051 10 IF (STEP(INTLIST(I)) < PIVOT) THEN
1052 I=I+1
1053 GOTO 10
1054 ENDIF
1055 20 IF (STEP(INTLIST(J)) > PIVOT) THEN
1056 J=J-1
1057 GOTO 20
1058 ENDIF
1059 IF (I < J) THEN
1060 ISWAP = INTLIST(I)
1061 INTLIST(I) = INTLIST(J)
1062 INTLIST(J)=ISWAP
1063 dswap = INVPERM(I)
1064 INVPERM(I) = INVPERM(J)
1065 INVPERM(J) = dswap
1066 ENDIF
1067 IF ( I <= J) THEN
1068 I = I+1
1069 J = J-1
1070 ENDIF
1071 IF ( I <= J ) GOTO 10
1072 IF ( LO < J ) CALL MUMPS_QUICK_SORT_PHYS_L0(N, STEP,
1073 & INTLIST, INVPERM, TAILLE, LO, J)
1074 IF ( I < HI ) CALL MUMPS_QUICK_SORT_PHYS_L0(N, STEP,
1075 & INTLIST, INVPERM, TAILLE, I, HI)
1076 RETURN
1077 END SUBROUTINE MUMPS_QUICK_SORT_PHYS_L0
1078 SUBROUTINE MUMPS_ANA_OMP_RETURN()
1079#if defined(BLR_MT)
1080#if ! defined(_OPENMP)
1081 COMPILATION FAILURE: -DBLR_MT requires compilation with openmp
1082 Please modify Makefile.inc and do 'make clean; make'
1083#endif
1084#endif
1085 RETURN
1086 END SUBROUTINE MUMPS_ANA_OMP_RETURN
#define mumps_abort
Definition VE_Metis.h:25
subroutine l0_remove_node(inode)
Definition ana_omp_m.F:522
subroutine l0_insert_node(dll, inode)
Definition ana_omp_m.F:441
logical function mumps_ana_accept_l0_omp()
Definition ana_omp_m.F:549
subroutine cost_bench(npiv, nschur, nb_core, sym, cost)
Definition ana_omp_m.F:897
subroutine mumps_ana_finalize_l0_omp()
Definition ana_omp_m.F:625
subroutine read_bench(arith, k50)
Definition ana_omp_m.F:864
recursive subroutine mumps_quick_sort_phys_l0(n, step, intlist, invperm, taille, lo, hi)
Definition ana_omp_m.F:1039
subroutine l0_insert_children(i_father)
Definition ana_omp_m.F:464
subroutine mumps_ana_initialize_l0_omp()
Definition ana_omp_m.F:128
recursive subroutine mumps_quick_sort_ipool_po(n, step, intlist, taille, lo, hi)
Definition ana_omp_m.F:1003
subroutine mumps_ana_free_l0_workspace()
Definition ana_omp_m.F:850
subroutine mumps_get_flops_cost(nfront, npiv, nass, keep50, level, cost)
Definition estim_flops.F:74
#define max(a, b)
Definition macros.h:21
subroutine mumps_ana_l0_omp(nb_threads, n, nsteps, sym, slavef, dad, frere, fils, nstk_steps, nd, step, procnode_steps, keep, keep8, myid_nodes, na, lna, arith, lpool_b_l0_omp, ipool_b_l0_omp, lpool_a_l0_omp, ipool_a_l0_omp, l_virt_l0_omp, virt_l0_omp, virt_l0_omp_mapping, l_phys_l0_omp, phys_l0_omp, perm_l0_omp, ptr_leafs_l0_omp, thread_la, info, icntl)
Definition ana_omp_m.F:23
integer function idll_push_back(dll, elmt)
integer function idll_2_array(dll, array, length)
integer function idll_iterator_begin(dll, ptr)
integer function idll_create(dll)
integer function idll_destroy(dll)
integer function idll_pop_front(dll, elmt)
integer function idll_insert_before(dll, node_after, elmt)
subroutine mumps_init_pool_dist(n, leaf, myid_nodes, k199, na, lna, keep, keep8, step, procnode_steps, ipool, lpool)
integer function mumps_get_pool_length(max_active_nodes, keep, keep8)