OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
mumps_static_mapping.F
Go to the documentation of this file.
1C
2C This file is part of MUMPS 5.5.1, released
3C on Tue Jul 12 13:17:24 UTC 2022
4C
5C
6C Copyright 1991-2022 CERFACS, CNRS, ENS Lyon, INP Toulouse, Inria,
7C Mumps Technologies, University of Bordeaux.
8C
9C This version of MUMPS is provided to you free of charge. It is
10C released under the CeCILL-C license
11C (see doc/CeCILL-C_V1-en.txt, doc/CeCILL-C_V1-fr.txt, and
12C https://cecill.info/licences/Licence_CeCILL-C_V1-en.html)
13C
16 IMPLICIT NONE
17 PRIVATE
20 integer,pointer,dimension(:,:),SAVE::cv_cand
21 integer,pointer,dimension(:),SAVE::cv_par2_nodes
22 integer,SAVE::cv_slavef,cv_nb_niv2,cv_lp,cv_mp
23 integer, parameter:: tsplit_beg=4
24 integer, parameter:: tsplit_mid=5
25 integer, parameter:: tsplit_last=6
26 integer,parameter::cv_invalid=-9999
27 DOUBLE PRECISION,parameter::cv_d_invalid=-9999.d0
28 integer,parameter::cv_equilib_flops=1
29 integer,parameter::cv_equilib_mem=2
30 integer,parameter::cv_error_memalloc = -13
31 integer,parameter::cv_error_memdeloc = -96
32 integer,dimension(:),allocatable,save :: mem_distribtmp
33 integer, dimension(:),allocatable, save :: table_of_process
34 integer,dimension(:),allocatable,save :: mem_distribmpi
35 integer, save ::ke69,nb_arch_nodes
36 logical,dimension(:),allocatable,save :: allowed_nodes
37 integer,dimension(:),allocatable,save :: score
39 integer::nodenumber
40 type(nodelist),pointer::next
41 end type nodelist
43 integer, pointer, dimension(:)::t2_nodenumbers
44 integer, pointer, dimension(:,:)::t2_cand
45 DOUBLE PRECISION, pointer, dimension(:)::t2_candcostw,
46 & t2_candcostm
47 integer:: nmb_t2s
48 end type alloc_arraytype
50 integer:: new_ison,new_ifather,old_keep2
51 DOUBLE PRECISION:: ncostw_oldinode,ncostm_oldinode,
52 & tcostw_oldinode,tcostm_oldinode
53 end type splitting_data
55 integer, dimension(:), pointer :: ind_proc
56 end type procs4node_t
57 DOUBLE PRECISION, pointer, dimension(:) ::
70 DOUBLE PRECISION :: mincostw
71 DOUBLE PRECISION:: cv_costw_upper,cv_costm_upper,
75 integer,pointer,dimension(:):: cv_nodetype,cv_nodelayer,
77 integer,dimension(:),pointer::
79 & cv_procnode,cv_ssarbr,cv_icntl
80 integer(8),dimension(:),pointer::cv_keep8
81 type(alloc_arraytype),pointer,dimension(:)::cv_layer_p2node
82 DOUBLE PRECISION,dimension(:),pointer:: cv_ncostw,
85 type(procs4node_t),dimension(:),pointer :: cv_prop_map
86 integer, dimension(:), pointer :: cv_sizeofblocks
87 logical :: cv_blkon
88 contains
89 subroutine mumps_distribute(n,slavef,icntl,info,
90 & ne,nfsiz,frere,fils,keep,KEEP8,
91 & procnode,ssarbr,nbsa,peak,istat
92 & , SIZEOFBLOCKS, LSIZEOFBLOCKS
93 & )
94 implicit none
95 integer,intent(in)::n,slavef
96 integer, intent(inout),TARGET:: ne(n),nfsiz(n),
97 & procnode(n),ssarbr(n),frere(n),fils(n),keep(500),
98 & icntl(60),info(80)
99 integer, intent(in) :: lsizeofblocks
100 integer, intent(in) :: sizeofblocks(lsizeofblocks)
101 INTEGER(8) keep8(150)
102 integer,intent(out)::nbsa,istat
103 integer ierr,nmb_thislayer,layernmb,mapalgo,allocok,i
104 integer,pointer,dimension(:)::thislayer
105 integer,parameter::memonly=1,floponly=2,hybrid=3
106 DOUBLE PRECISION::
107 & maxwork,minwork,maxmem,minmem,workbalance,membalance
108 DOUBLE PRECISION:: cost_root_node
109 DOUBLE PRECISION,dimension(:),allocatable:: work_per_proc
110 integer,dimension(:),allocatable::id_son
111 logical :: cont
112 character (len=48):: err_rep,subname
113 DOUBLE PRECISION peak
114 logical :: blkon
115 blkon = (sizeofblocks(1).GT.0)
116 cv_blkon = blkon
117 istat=-1
118 subname='DISTRIBUTE'
119 cv_lp=icntl(1)
120 cv_mp=icntl(3)
121 IF (icntl(4).LT.2) cv_mp=0
122 nullify(thislayer)
123 err_rep='INITPART1'
124 call mumps_initpart1(n,slavef,
125 & frere,fils,nfsiz,ne,keep,keep8,icntl,info,
126 & procnode,ssarbr,peak,ierr
127 & , sizeofblocks, lsizeofblocks
128 & )
129 if (ierr.ne.0) goto 99999
130 err_rep='PROCINIT'
131 call mumps_procinit(istat=ierr)
132 if (ierr.ne.0) goto 99999
133 err_rep='CALCCOST'
134 call mumps_calccosts(ierr)
135 if (ierr.ne.0) goto 99999
136 err_rep='ROOTLIST'
137 call mumps_rootlist(ierr)
138 if (ierr.ne.0) goto 99999
139 err_rep='LAYERL0'
140 call mumps_layerl0(ierr)
141 if (ierr.ne.0) goto 99999
142 if (ierr.ne.0) goto 99999
143 err_rep='INITPART2'
144 call mumps_initpart2(ierr)
145 if (ierr.ne.0) goto 99999
146 err_rep='WORKMEM_'
149 & maxwork,minwork,maxmem,minmem)
150 if(maxwork.gt.0.0d0) then
151 workbalance=minwork/maxwork
152 else
153 workbalance=0.0d0
154 endif
155 if(maxmem.gt.0.0d0) then
156 membalance=minmem/maxmem
157 else
158 membalance=0.0d0
159 endif
160 err_rep='mem_alloc'
161 allocate(thislayer(cv_maxnodenmb),stat=allocok)
162 if (allocok.gt.0) then
165 if(cv_lp.gt.0)
166 & write(cv_lp,*)'memory allocation error in ',subname
167 ierr = cv_error_memalloc
168 goto 99999
169 end if
170 cont=.TRUE.
171 layernmb=0
172 mapalgo=floponly
173 err_rep='select_type3'
174 call MUMPS_SELECT_TYPE3(ierr)
175.ne. if (ierr0) goto 99999
176.ne..and..eq. IF (cv_keep(38) 0 cv_keep(60) 0 ) THEN
177 call MUMPS_GET_FLOPS_COST(cv_nfsiz(keep(38)),
178 & cv_nfsiz(keep(38)), cv_nfsiz(keep(38)),
179 & cv_keep(50), 3, cost_root_node)
180 cost_root_node = cost_root_node / dble(cv_slavef)
181 do i=1, cv_slavef
182 cv_proc_memused(i)=cv_proc_memused(i)+
183 & dble(cv_nfsiz(keep(38)))*dble(cv_nfsiz(keep(38)))/
184 & dble(cv_slavef)
185 cv_proc_workload(i)=cv_proc_workload(i)+dble(cost_root_node)
186 enddo
187 ENDIF
188.OR..le. do while((cont)(layernmbcv_maxlayer))
189 err_rep='find_this'
190 call MUMPS_FIND_THISLAYER(layernmb,thislayer,nmb_thislayer,
191 & ierr)
192.ne. if (ierr0) goto 99999
193 err_rep='do_splitting'
194.gt. if(cv_keep(82) 0) then
195.gt. if(layernmb0) call MUMPS_SPLIT_DURING_MAPPING
196 & (layernmb,thislayer,nmb_thislayer,ierr)
197 endif
198.ne. if (ierr0) goto 99999
199 err_rep='assign_types'
200 call MUMPS_ASSIGN_TYPES(layernmb,thislayer,nmb_thislayer,
201 & ierr)
202.ne. if (ierr0) goto 99999
203.gt. if(layernmb0) then
204.eq..OR..eq..OR. if ((cv_keep(24)1)(cv_keep(24)2)
205.eq..OR..eq. & (cv_keep(24)4)(cv_keep(24)6)) then
206 err_rep='costs_layer_t2'
207 call MUMPS_COSTS_LAYER_T2(layernmb,nmb_thislayer,ierr)
208.eq..OR..eq. elseif((cv_keep(24)8)(cv_keep(24)10)
209.OR..eq..OR..eq. & (cv_keep(24)12)(cv_keep(24)14)
210.OR..eq..OR..eq. & (cv_keep(24)16)(cv_keep(24)18)) then
211 err_rep='costs_layer_t2pm'
212 call MUMPS_COSTS_LAYER_T2PM(layernmb,nmb_thislayer,ierr)
213 else
214 err_rep='wrong strategy for costs_layer_t2'
215 ierr = -9999
216 endif
217.ne. if (ierr0) goto 99999
218 err_rep='workmem_'
219 call MUMPS_WORKMEM_IMBALANCE(
220 & cv_proc_workload,cv_proc_memused,
221 & maxwork,minwork,maxmem,minmem)
222.gt. if(maxwork0.0D0) then
223 workbalance=minwork/maxwork
224 else
225 workbalance=0.0D0
226 endif
227.gt. if(maxmem0.0D0) then
228 membalance=minmem/maxmem
229 else
230 membalance=0.0D0
231 endif
232.eq. if(mapalgomemonly) then
233 err_rep='map_layer'
234 call MUMPS_MAP_LAYER(layernmb,thislayer,
235 & nmb_thislayer,cv_equilib_mem,ierr)
236.ne. if (ierr0) goto 99999
237.eq. elseif(mapalgofloponly) then
238 err_rep='map_layer'
239 call MUMPS_MAP_LAYER(layernmb,thislayer,
240 & nmb_thislayer,cv_equilib_flops,ierr)
241.ne. if (ierr0) goto 99999
242.eq. elseif(mapalgohybrid) then
243 if (workbalance <= membalance) then
244 err_rep='map_layer'
245 call MUMPS_MAP_LAYER(layernmb,thislayer,
246 & nmb_thislayer,cv_equilib_flops,ierr)
247.ne. if (ierr0) goto 99999
248 else
249 err_rep='map_layer'
250 call MUMPS_MAP_LAYER(layernmb,thislayer,
251 & nmb_thislayer,cv_equilib_mem,ierr)
252.ne. if (ierr0) goto 99999
253 endif
254 else
255.gt. if(cv_lp0)
256 & write(cv_lp,*)'unknown mapalgo in ',subname
257 return
258 endif
259 endif
260 layernmb=layernmb+1
261 err_rep='higher_layer'
262 call MUMPS_HIGHER_LAYER(layernmb,thislayer,
263 & nmb_thislayer,cont,ierr)
264.ne. if (ierr0) goto 99999
265 end do
266.EQ..OR..EQ..OR. IF ( (cv_keep(79)0)(cv_keep(79)3)
267.EQ..OR..EQ. & (cv_keep(79)5)(cv_keep(79)7)
268 & ) THEN
269.gt. if(cv_slavef4) then
270 err_rep='postprocess'
272 endif
273 ENDIF
274 err_rep='SETUP_CAND'
275 call mumps_setup_cand(ierr)
276 if (ierr.ne.0) goto 99999
277 err_rep='ENCODE_PROC'
278 call mumps_encode_procnode(ierr)
279 if (ierr.ne.0) goto 99999
280 err_rep='STORE_GLOB'
281 call mumps_store_globals(ne,nfsiz,frere,fils,keep,keep8,
282 & info,procnode,ssarbr,nbsa)
283 err_rep='mem_dealloc'
284 deallocate(thislayer,stat=ierr)
285 if (ierr.ne.0) then
286 if(cv_lp.gt.0)
287 & write(cv_lp,*)'Memory deallocation error in ',subname
288 ierr = cv_error_memdeloc
289 goto 99999
290 endif
291 err_rep='TERMGLOB'
292 call mumps_termglob(ierr)
293 if (ierr.ne.0) goto 99999
294 istat=0
295 return
29699999 continue
297 if(cv_lp.gt.0) then
298 write(cv_lp,*)'Error in ',subname,', layernmb=',layernmb
299 write(cv_lp,*)'procedure reporting the error: ',err_rep
300 endif
301 if(ierr.eq.cv_error_memalloc) then
302 info(1) = cv_info(1)
303 info(2) = cv_info(2)
304 endif
305 istat=ierr
306 return
307 CONTAINS
308 subroutine mumps_accept_l0(
309 & map_strat,workload,memused,accepted,
310 & istat)
311 implicit none
312 integer,intent(in)::map_strat
313 DOUBLE PRECISION,dimension(:),intent(in)::workload, memused
314 logical,intent(out)::accepted
315 integer,intent(out)::istat
316 DOUBLE PRECISION maxi,mini,mean,stddev, dpkeep102
317 integer i,nmb
318 character (len=48):: subname
319 logical alternative_criterion
320 DOUBLE PRECISION::
321 & minflops , minmem,
322 & cl_rate, dv_rate
323 istat=-1
324 if ( cv_keep(72) .EQ. 1) then
325 minflops = 2.0d0
326 minmem=50.0d0
327 cl_rate =0.8d0
328 dv_rate=0.2d0
329 else
330 IF (cv_keep(198).NE.0) THEN
331 minflops = 5.0d8
332 minmem=5.0d7
333 cl_rate =0.8d0
334 dv_rate=0.2d0
335 ELSE
336 minflops = 5.0d7
337 minmem=5.0d6
338 cl_rate =0.8d0
339 dv_rate=0.2d0
340 ENDIF
341 endif
342 dpkeep102 = dble(cv_keep(102))
343 IF (cv_keep(198).NE.0) THEN
344 IF (cv_slavef.LT.3)THEN
345 dpkeep102 = dble(150)
346 ELSEIF (cv_slavef.LT.5)THEN
347 dpkeep102 = dble(200)
348 ELSEIF (cv_slavef.LT.8)THEN
349 dpkeep102 = dble(250)
350 ELSEIF (cv_slavef.LT.32)THEN
351 dpkeep102 = dble(275)
352 ELSEIF (cv_slavef.LT.512)THEN
353 dpkeep102 = dble(300)
354 ELSEIF (cv_slavef.GE.512)THEN
355 dpkeep102 = dble(400)
356 ENDIF
357 ENDIF
358 subname='ACCEPT_L0'
359 accepted=.false.
360 alternative_criterion=.false.
361 if(map_strat.eq.cv_equilib_flops) then
362 maxi=maxval(workload)
363 mini=minval(workload)
364 if (maxi.lt.minflops) then
365 accepted=.true.
366 elseif(maxi.le.(dpkeep102/dble(100))*mini)then
367 accepted=.true.
368 endif
369 if ((.NOT.accepted).AND.(alternative_criterion)) then
370 mean=sum(workload)/max(dble(cv_slavef),dble(1))
371 stddev=dble(0)
372 do i=1,cv_slavef
373 stddev=stddev+
374 & (abs(workload(i)-mean)*abs(workload(i)-mean))
375 enddo
376 stddev=sqrt(stddev/max(dble(cv_slavef),dble(1)))
377 nmb=count(mask=abs(workload-mean)<stddev)
378 if((dble(nmb)/max(dble(cv_slavef),dble(1)).gt.cl_rate)
379 & .AND.(stddev.lt.dv_rate*mean)) accepted=.true.
380 endif
381 elseif(map_strat.eq.cv_equilib_mem) then
382 maxi=maxval(memused)
383 mini=minval(memused)
384 if (maxi.lt.minmem) then
385 accepted=.true.
386 else if(cv_slavef.lt.48) then
387 if (maxi.le.dble(2)*mini) accepted=.true.
388 else if(cv_slavef.lt.128) then
389 if (maxi.le.dble(4)*mini) accepted=.true.
390 else if(cv_slavef.lt.256) then
391 if (maxi.le.dble(6)*mini) accepted=.true.
392 else if(cv_slavef.lt.512) then
393 if (maxi.le.dble(8)*mini) accepted=.true.
394 else if(cv_slavef.gt.512) then
395 if (maxi.le.dble(10)*mini) accepted=.true.
396 end if
397 endif
398 istat=0
399 return
400 end subroutine mumps_accept_l0
401 subroutine mumps_arrangel0(map_strat,layerL0end,workload,memused,
402 & procnode,istat,respect_prop)
403 implicit none
404 integer, intent(in)::map_strat, layerL0end
405 DOUBLE PRECISION,dimension(:),intent(out)::workload, memused
406 integer, intent(out)::procnode(:),istat
407 logical, intent(in), OPTIONAL:: respect_prop
408 integer i,j,ierr, nodenumber,proc
409 DOUBLE PRECISION work,mem
410 character (len=48):: err_rep,subname
411 istat=-1
412 subname='ARRANGEL0'
413 if ((.NOT.associated(cv_tcostw)).OR.(.NOT.associated(cv_tcostm)))
414 & then
415 if(cv_lp.gt.0)
416 & write(cv_lp,*)'Error:tcost must be allocated in ',subname
417 return
418 end if
419 if((map_strat.ne.cv_equilib_flops).and.
420 & (map_strat.ne.cv_equilib_mem)) return
421 do i=1,cv_n
422 procnode(i)=cv_invalid
423 end do
424 do i=1,cv_slavef
425 workload(i)=cv_proc_workload(i)
426 memused(i)=cv_proc_memused(i)
427 end do
428 do i=cv_layerl0_start,layerl0end
429 nodenumber=cv_layerl0_array(i)
430 work=cv_tcostw(nodenumber)
431 mem=cv_tcostm(nodenumber)
432 err_rep='FIND_BEST_PROC'
433 if(present(respect_prop)) then
434 call mumps_find_best_proc(nodenumber,map_strat,work,mem,
435 & workload,memused,proc,ierr,respect_prop)
436 else
437 call mumps_find_best_proc(nodenumber,map_strat,work,mem,
438 & workload,memused,proc,ierr)
439 endif
440 if(ierr.eq.0) then
441 procnode(nodenumber)=proc
442 else
443 if(cv_lp.gt.0)
444 & write(cv_lp,*)'Error reported by ',err_rep,' in ',subname
445 do j=1,cv_slavef
446 workload(j)=cv_proc_workload(j)
447 memused(j)=cv_proc_memused(j)
448 end do
449 do j=1,cv_n
450 procnode(j)=cv_invalid
451 end do
452 return
453 end if
454 end do
455 istat=0
456 return
457 end subroutine mumps_arrangel0
458 subroutine mumps_assign_types( layernmb,thislayer,nmb_thislayer,
459 & istat )
460 implicit none
461 integer,intent(in)::layernmb,thislayer(:),
462 & nmb_thislayer
463 integer,intent(out)::istat
464 integer i,in,npiv,nfront,inode,inoderoot,par_nodes_in_layer,
465 & dummy,allocok
466 character (len=48):: subname
467 istat=-1
468 subname='ASSIGN_TYPES'
469 if((layernmb.lt.0).or.(layernmb.gt.cv_maxlayer)) return
470 if(cv_slavef.eq.1) then
471 if(layernmb.eq.0) then
472 do inode=1,cv_n
473 cv_nodetype(inode)=0
474 end do
475 end if
476 else if(layernmb.eq.0) then
477 do i=1,nmb_thislayer
478 inode=thislayer(i)
479 inoderoot=inode
480 if(cv_nodetype(inode).ne.cv_invalid) cycle
481 cv_nodetype(inode)=0
482 30 continue
483 in = inode
484 do while (in .ne. 0)
485 inode = in
486 do while (in .gt. 0)
487 in = cv_fils(in)
488 end do
489 if (in.lt.0) in=-in
490 end do
491 10 continue
492 if ( inode .ne. inoderoot ) then
493 cv_nodetype(inode)=-1
494 in = cv_frere(inode)
495 inode = abs(in)
496 if (in .lt. 0) then
497 go to 10
498 else
499 go to 30
500 end if
501 end if
502 end do
503 else
504 do i=1,nmb_thislayer
505 inode=thislayer(i)
506 in = inode
507 npiv = 0
508 do while (in.gt.0)
509 if (cv_blkon) then
510 npiv = npiv + cv_sizeofblocks(in)
511 else
512 npiv = npiv + 1
513 endif
514 in = cv_fils(in)
515 end do
516 nfront = cv_nfsiz(inode)
517 if(cv_nodetype(inode).ne.cv_invalid) cycle
518 if( ( mumps_istype2bysize(nfront,npiv)) .AND.
519 & (in.ne.0)) then
520 cv_nodetype(inode)=2
521 else
522 cv_nodetype(inode)=1
523 end if
524 end do
525 end if
526 if(layernmb.gt.0) then
527 par_nodes_in_layer=0
528 do i=1,nmb_thislayer
529 inode=thislayer(i)
530 if (mumps_is_node_of_type2(inode))
531 & par_nodes_in_layer=par_nodes_in_layer+1
532 enddo
533 if(par_nodes_in_layer.gt.0) then
534 allocate(
535 &cv_layer_p2node(layernmb)%t2_nodenumbers(par_nodes_in_layer),
536 &cv_layer_p2node(layernmb)%t2_cand(par_nodes_in_layer,cv_slavef+1),
537 &cv_layer_p2node(layernmb)%t2_candcostw(par_nodes_in_layer),
538 &cv_layer_p2node(layernmb)%t2_candcostm(par_nodes_in_layer),
539 & stat=allocok)
540 if (allocok.gt.0) then
542 cv_info(2) = (3+cv_slavef+1)*par_nodes_in_layer
543 istat = cv_error_memalloc
544 if(cv_lp.gt.0)
545 & write(cv_lp,*)'memory allocation error in ',subname
546 return
547 end if
548 cv_layer_p2node(layernmb)%nmb_t2s=par_nodes_in_layer
549 dummy=1
550 do i=1,nmb_thislayer
551 inode=thislayer(i)
552 if (mumps_is_node_of_type2(inode)) then
553 cv_layer_p2node(layernmb)%t2_nodenumbers(dummy)=inode
554 cv_layer_p2node(layernmb)%t2_cand(dummy,:)=0
555 cv_layer_p2node(layernmb)%t2_candcostw(dummy)
556 & =cv_d_invalid
557 cv_layer_p2node(layernmb)%t2_candcostm(dummy)
558 & =cv_d_invalid
559 dummy=dummy+1
560 endif
561 enddo
562 else
563 nullify(cv_layer_p2node(layernmb)%t2_nodenumbers,
564 & cv_layer_p2node(layernmb)%t2_cand,
565 & cv_layer_p2node(layernmb)%t2_candcostw,
566 & cv_layer_p2node(layernmb)%t2_candcostm)
567 end if
568 endif
569 istat=0
570 return
571 end subroutine mumps_assign_types
572 function mumps_bit_get(procs4node,procnumber)
573 implicit none
574 integer,intent(in)::procs4node(cv_size_ind_proc)
575 integer,intent(in)::procnumber
576 logical :: mumps_bit_get
577 integer pos1,pos2
578 pos1 = (procnumber-1)/cv_bitsize_of_int +1
579 pos2 = mod(procnumber-1,cv_bitsize_of_int)
580 mumps_bit_get=btest(procs4node(pos1),pos2)
581 return
582 end function mumps_bit_get
583 function mumps_bit_get4proc(inode,procnumber)
584!DEC$ NOOPTIMIZE
585 implicit none
586 integer, intent(in)::inode,procnumber
587 logical :: mumps_bit_get4proc
588 integer pos1,pos2
589 mumps_bit_get4proc=.false.
590 if((procnumber.lt.1).or.(procnumber.gt.cv_slavef)) return
591 if(.not.associated(cv_prop_map(inode)%ind_proc)) return
592 pos1 = (procnumber-1)/cv_bitsize_of_int +1
593 pos2 = mod(procnumber-1,cv_bitsize_of_int)
595 & (cv_prop_map(inode)%ind_proc(pos1),pos2)
596 return
597 end function mumps_bit_get4proc
598 subroutine mumps_bit_set(procs4node,procnumber,istat)
599 implicit none
600 integer, intent(inout)::procs4node(cv_size_ind_proc)
601 integer,intent(in)::procnumber
602 integer, intent(out)::istat
603 integer pos1,pos2
604 istat = -1
605 if((procnumber.lt.1).or.(procnumber.gt.cv_slavef)) return
606 if(cv_bitsize_of_int.le.0) return
607 pos1 = (procnumber-1)/cv_bitsize_of_int +1
608 pos2 = mod(procnumber-1,cv_bitsize_of_int)
609 procs4node(pos1)=ibset(procs4node(pos1),pos2)
610 istat = 0
611 return
612 end subroutine mumps_bit_set
613 subroutine mumps_calccosts(istat)
614 implicit none
615 integer,intent(out)::istat
616 integer i
617 DOUBLE PRECISION :: maxcostw_root
618 istat = -1
619 if ((.NOT.associated(cv_tcostw)).OR.(.NOT.associated(cv_tcostm)))
620 & then
621 if(cv_lp.gt.0)
622 & write(cv_lp,*)
623 & 'Error: tcost must be allocated in MUMPS_CALCCOSTS'
624 return
625 end if
626 maxcostw_root = 0d0
627 do i=1,cv_n
628 if (cv_frere(i).eq.cv_n+1) then
629 cv_tcostw(i)=0.0d0
630 cv_ncostw(i)=0.0d0
631 cv_tcostm(i)=0.0d0
632 cv_ncostm(i)=0.0d0
633 elseif (cv_frere(i).eq.0) then
634 cv_depth(i)=1
635 call mumps_treecosts(i)
636 maxcostw_root = max(maxcostw_root,cv_tcostw(i))
637 end if
638 end do
639 istat = 0
640 mincostw = 1.0d0+maxcostw_root/(dble(cv_maxnsteps)*
641 & dble(10*cv_slavef) )
642 return
643 end subroutine mumps_calccosts
644 subroutine mumps_calcnodecosts(npiv,nfront,costw,costm)
645 implicit none
646 integer,intent(in)::npiv,nfront
647 DOUBLE PRECISION,intent(out)::costw,costm
648 character (len=48):: subname
649 subname='CALCNODECOSTS'
650 if((npiv.le.1).and.(nfront.le.1)) then
651 costw = dble(0)
652 costm = dble(1)
653 else
654 if((cv_keep(494).ne.0).and.(cv_keep(471).ge.0).and.
655 & (npiv.ge.cv_keep(490)).and.(nfront.ge.cv_keep(491))) then
656 WRITE(*,*) " *** Temp internal error in MUMPS_CALCNODECOSTS:"
657 CALL mumps_abort()
658 call mumps_calcnodecosts_blr(npiv, nfront, costw, costm,
659 & cv_keep(471), cv_keep(472), cv_keep(475),
660 & cv_keep(488), cv_keep(50))
661 else
662 if(cv_keep(50).eq.0) then
663 costw= 2.0d0*dble(nfront)*dble(npiv)*dble(nfront-npiv-1)
664 & + dble(npiv)*dble(npiv+1)*dble(2*npiv+1)/dble(3)
665 & + dble(2*nfront-npiv-1) * dble(npiv) / dble(2)
666 costm= dble(npiv)*(dble(2*nfront)-dble(npiv))
667 else
668 costw= dble(npiv) *
669 & (dble(nfront)*dble(nfront)+dble(2*nfront) -
670 & dble(nfront+1) * dble(npiv+1) +
671 & dble(npiv+1) * dble(2*npiv+1) / dble(6))
672 costm= dble(npiv) * dble(nfront)
673 end if
674 end if
675 end if
676 if((costw.lt.0.0d0).or.(costm.lt.0.0d0)) then
677 endif
678 return
679 end subroutine mumps_calcnodecosts
680 SUBROUTINE mumps_calcnodecosts_blr(NPIV, NFRONT, COSTW, COSTM,
681 & K471, K472, K475, K488, SYM)
682 INTEGER, INTENT(IN) :: NPIV, NFRONT, SYM, K471, K472, K475, K488
683 DOUBLE PRECISION, INTENT(OUT) :: COSTW, COSTM
684 INTEGER :: IBCKSZ
685 DOUBLE PRECISION :: B,R,M,N
686 m = dble(npiv)
687 n = dble(nfront)
688 CALL compute_blr_vcs(k472, ibcksz, k488, npiv)
689 b = dble(ibcksz)
690 b = min(b,m)
691 IF (k471.EQ.0) THEN
692 r = 1.0d0
693 ELSEIF (k471.EQ.1) THEN
694 r = sqrt(dble(n))
695 ELSE
696 WRITE(*,*) 'Internal error in MUMPS_CALCNODECOSTS_BLR', k471
697 CALL mumps_abort()
698 ENDIF
699 r = min(r,b/2)
700 IF (sym.EQ.0) THEN
701 costw = m/b * b*(b+1.0d0)*(2.0d0*b+1.0d0)/3.0d0
702 IF (k475.EQ.0) THEN
703 costw = costw + 2.0d0*m/(b*b)*(n-(m+b)/2.0d0) * b*b*b
704 ELSEIF (k475.EQ.1) THEN
705 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * b*b*(r+b)
706 ELSEIF (k475.EQ.2) THEN
707 costw = costw + m/(b*b)*(2.0d0*n-3.0d0*m-2.0d0*b) * b*b*r
708 & + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*b
709 ELSEIF (k475.EQ.3) THEN
710 costw = costw + 2.0d0*m/(b*b)*(n-(m+b)/2.0d0) * b*b*r
711 ENDIF
712 costw = costw + 2.0d0*m/(b*b)*(n-(m+b)/2.0d0) * 2.0d0*b*b*r
713 costw = costw + (4.0d0*b*r*r + 2.0d0*b*b*r) * (
714 & (n-m)*(n-m)*m/(b*b*b)
715 & + (n-m)/b*(m/b-1.0d0)*m/b
716 & + (m/b-1.0d0)*m/b*(2.0d0*m/b-1.0d0)/6.0d0
717 & )
718 costm = m*(2.0d0*n-m)/(b*b) * 2.0d0*b*r
719 ELSE
720 costw = m/b * b*(b+1.0d0)*(2.0d0*b+1.0d0)/6.0d0
721 IF (k475.EQ.0.OR.k475.EQ.1) THEN
722 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * b*b*b
723 ELSEIF (k475.EQ.2) THEN
724 costw = costw + (n-m)*m/(b*b) * b*b*r
725 & + (m/b-1.0d0)*m/b*(m/b-1.0d0)/6.0d0 * b*b*b
726 ELSEIF (k475.EQ.3) THEN
727 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * b*b*r
728 ENDIF
729 costw = costw + m/(b*b)*(n-(m+b)/2.0d0) * 2.0d0*b*b*r
730 costw = costw + (4.0d0*b*r*r + 2.0d0*b*b*r) * (
731 & (n-m)*(n-m)*m/(b*b*b)/2.0d0
732 & + (n-m)/b*(m/b-1.0d0)*m/b/2.0d0
733 & + (m/b-1.0d0)*m/b*(m/b+1.0d0)/6.0d0
734 & )
735 costm = m*n/(b*b) * 2.0d0*b*r
736 ENDIF
737 END SUBROUTINE mumps_calcnodecosts_blr
738 SUBROUTINE mumps_costs_blr_t2_master(NPIV, NFRONT,
739 & COSTW, COSTM, K471, K472, K475, K488, SYM)
740 INTEGER, INTENT(IN) :: NPIV, NFRONT, SYM, K471, K472, K475, K488
741 DOUBLE PRECISION, INTENT(OUT) :: COSTW, COSTM
742 INTEGER :: IBCKSZ
743 DOUBLE PRECISION :: B,R,M,N
744 M = dble(npiv)
745 n = dble(nfront)
746 CALL compute_blr_vcs(k472, ibcksz, k488, npiv)
747 b = dble(ibcksz)
748 b = min(b,m)
749 IF (k471.EQ.0) THEN
750 r = 1.0d0
751 ELSEIF (k471.EQ.1) THEN
752 r = sqrt(dble(n))
753 ELSE
754 WRITE(*,*) 'Internal error in ',
756 CALL MUMPS_ABORT()
757 ENDIF
758 R = MIN(R,B/2)
759.EQ. IF (SYM0) THEN
760 COSTW = M/B * B*(B+1.0D0)*(2.0D0*B+1.0D0)/3.0D0
761.EQ. IF (K4750) THEN
762 COSTW = COSTW + M/(B*B)*(N-(M+B)/2.0D0) * B*B*B
763 & + (M/B-1.0D0)*M/B*(M/B-1.0D0)/6.0D0 * B*B*B
764.EQ. ELSEIF (K4751) THEN
765 COSTW = COSTW + M/(B*B)*(N-(M+B)/2.0D0) * B*B*B
766 & + (M/B-1.0D0)*M/B*(M/B-1.0D0)/6.0D0 * B*B*R
767.EQ. ELSEIF (K4752) THEN
768 COSTW = COSTW + M/(B*B)*(N-M) * B*B*R
769 & + (M/B-1.0D0)*M/B*(M/B-1.0D0)/6.0D0 * B*B*(B+R)
770.EQ. ELSEIF (K4753) THEN
771 COSTW = COSTW + M/(B*B)*(N-(M+B)/2.0D0) * B*B*R
772 & + (M/B-1.0D0)*M/B*(M/B-1.0D0)/6.0D0 * B*B*R
773 ENDIF
774 COSTW = COSTW + M/(B*B)*(N-(M+B)/2.0D0) * 2.0D0*B*B*R
775 & + (M/B-1.0D0)*M/B*(M/B-1.0D0)/6.0D0 * 2.0D0*B*B*R
776 COSTW = COSTW + (4.0D0*B*R*R + 2.0D0*B*B*R) * (
777 & (N-M)/B*(M/B-1.0D0)*M/B/2.0D0
778 & + (M/B-1.0D0)*M/B*(2.0D0*M/B-1.0D0)/6.0D0
779 & )
780 COSTM = M*N/(B*B) * 2.0D0*B*R
781 ELSE
782 COSTW = M/B * B*(B+1.0D0)*(2.0D0*B+1.0D0)/6.0D0
783.LE. IF (K4752) THEN
784 COSTW = COSTW + (M/B-1.0D0)*M/B*(M/B-1.0D0)/6.0D0 * B*B*B
785.EQ. ELSEIF (K4753) THEN
786 COSTW = COSTW + (M/B-1.0D0)*M/B*(M/B-1.0D0)/6.0D0 * B*B*R
787 ENDIF
788 COSTW = COSTW + (M/B-1.0D0)*M/B*(M/B-1.0D0)/6.0D0
789 & * 2.0D0*B*B*R
790 COSTW = COSTW + (4.0D0*B*R*R + 2.0D0*B*B*R) * (
791 & (M/B-1.0D0)*M/B*(M/B+1.0D0)/6.0D0
792 & )
793 COSTM = M*M/(B*B) * 2.0D0*B*R
794 ENDIF
795 END SUBROUTINE MUMPS_COSTS_BLR_T2_MASTER
796 SUBROUTINE MUMPS_COSTS_BLR_T2_SLAVE(NPIV, NFRONT,
797 & NROW, COSTW, COSTM, K471, K472, K475, K488, SYM)
798 INTEGER, INTENT(IN) :: NPIV, NFRONT, SYM, K471, K472,
799 & K475, K488
800 DOUBLE PRECISION, INTENT(IN) :: NROW
801 DOUBLE PRECISION, INTENT(OUT) :: COSTW, COSTM
802 INTEGER :: IBCKSZ
803 DOUBLE PRECISION :: B,R,M,N,P
804 M = NROW
805 N = DBLE(NFRONT)
806 P = DBLE(NPIV)
807 CALL COMPUTE_BLR_VCS(K472, IBCKSZ, K488, NPIV)
808 B = DBLE(IBCKSZ)
809 B = MIN(B,M)
810.EQ. IF (K4710) THEN
811 R = 1.0D0
812.EQ. ELSEIF (K4711) THEN
813 R = SQRT(DBLE(N))
814 ELSE
815 WRITE(*,*) 'internal error in ',
817 CALL MUMPS_ABORT()
818 ENDIF
819 R = MIN(R,B/2)
820 COSTW = 0.0D0
821.EQ. IF (K4750) THEN
822 COSTW = COSTW + (M*P)/(B*B) * B*B*B
823 ELSE
824 COSTW = COSTW + (M*P)/(B*B) * B*B*R
825 ENDIF
826 COSTW = COSTW + (M*P)/(B*B) * 2.0D0*B*B*R
827.EQ. IF (SYM0) THEN
828 COSTW = COSTW + (4.0D0*B*R*R + 2.0D0*B*B*R) * (
829 & M/B*(P/B-1.0D0)*P/B/2.0D0
830 & + (N-M)*M*P/(B*B*B)
831 & )
832 ELSE
833 COSTW = COSTW + (4.0D0*B*R*R + 2.0D0*B*B*R) * (
834 & M/B*(P/B-1.0D0)*P/B/2.0D0
835 & + (N-M)*M*P/(B*B*B)/2.0D0
836 & )
837 ENDIF
838 COSTM = M*P/(B*B) * 2.0D0*B*R
839 END SUBROUTINE MUMPS_COSTS_BLR_T2_SLAVE
840 subroutine MUMPS_COSTS_LAYER_T2(layernmb,nmb_thislayer,istat)
841 implicit none
842 integer,intent(in)::layernmb,nmb_thislayer
843 integer,intent(out)::istat
844 integer in,inode,j,kmax,npiv,nfront,ncb,ncol,
845 & min_needed,max_needed,more_than_needed,total_nmb_cand,
846 & nmb_type2_thislayer,fraction,
847 & total_cand_layer,cand_strat, keep48_loc
848 DOUBLE PRECISION flop1,work_type2_thislayer,
849 & relative_weight,workmaster,nrow
850 logical force_cand
851 character (len=48):: subname
852 integer MUMPS_REG_GETKMAX, MUMPS_BLOC2_GET_NSLAVESMIN,
853 & MUMPS_BLOC2_GET_NSLAVESMAX
854 external MUMPS_REG_GETKMAX, MUMPS_BLOC2_GET_NSLAVESMIN,
855 & MUMPS_BLOC2_GET_NSLAVESMAX
856 istat=-1
857 subname='costs_layer_t2'
858.lt. if (cv_keep(24)1) then
859.gt. if(cv_lp0)
860 & write(cv_lp,*)'error in ',subname,'. wrong keep24'
861 return
862 endif
863.eq. force_cand=(mod(cv_keep(24),2)0)
864 cand_strat=cv_keep(24)/2
865 nmb_type2_thislayer=cv_layer_p2node(layernmb)%nmb_t2s
866.gt. if (nmb_type2_thislayer0) then
867 work_type2_thislayer=0.0D0
868 do j=1,nmb_type2_thislayer
869 inode=cv_layer_p2node(layernmb)%t2_nodenumbers(j)
870 work_type2_thislayer=work_type2_thislayer+cv_ncostw(inode)
871 end do
872.le. if(cv_relax0.0D0) then
873.gt. if(cv_lp0)
874 & write(cv_lp,*)'error in ',subname,'. wrong cv_relax'
875 return
876 endif
877 total_cand_layer=int(cv_relax*dble(cv_slavef))
878 do j=1,nmb_type2_thislayer
879 inode=cv_layer_p2node(layernmb)%t2_nodenumbers(j)
880 nfront=cv_nfsiz(inode)
881 npiv=0
882 in=inode
883.gt. do while(in0)
884 if (cv_BLKON) then
885 npiv = npiv + cv_SIZEOFBLOCKS(in)
886 else
887 npiv=npiv+1
888 endif
889 in=cv_fils(in)
890 end do
891 ncb=nfront-npiv
892 kmax = MUMPS_REG_GETKMAX(cv_keep8(21),ncb)
893 if (force_cand) then
894 if (cv_keep(50) == 0) then
895 keep48_loc=0
896 else
897 keep48_loc=3
898 endif
899.EQ. if (cv_keep(48)5) keep48_loc = 5
900 min_needed = MUMPS_BLOC2_GET_NSLAVESMIN(
901 & cv_slavef, keep48_loc,cv_keep8(21),
902 & cv_keep(50),nfront,ncb,
903 & cv_keep(375), cv_keep(119))
904 max_needed = MUMPS_BLOC2_GET_NSLAVESMAX(
905 & cv_slavef, keep48_loc,cv_keep8(21),
906 & cv_keep(50),nfront,ncb,
907 & cv_keep(375), cv_keep(119))
908.eq. if(cand_strat1) then
909 more_than_needed = 0
910.eq. elseif (cand_strat2) then
911.gt. if(work_type2_thislayer0.0D0) then
912 relative_weight=cv_ncostw(inode)/work_type2_thislayer
913 else
914 relative_weight = 0.0D0
915 endif
916 fraction=nint(relative_weight *
917 & dble(total_cand_layer))
918 more_than_needed=min(max(0,cv_slavef-1-min_needed),
919 & max(0,fraction-min_needed) )
920.eq. elseif (cand_strat3) then
921 more_than_needed=cv_slavef-1-min_needed
922 else
923.gt. if(cv_lp0)
924 & write(cv_lp,*)'unknown cand. strategy in ',subname
925 return
926 endif
927 total_nmb_cand=min(min_needed+more_than_needed,
928 & cv_slavef-1)
929 total_nmb_cand=min(total_nmb_cand,max_needed)
930 else
931 total_nmb_cand=0
932 endif
933 cv_layer_p2node(layernmb)%t2_cand(j,cv_slavef+1)
934 & = total_nmb_cand
935.eq. if(cv_keep(50)0) then
936 flop1=dble(2*npiv)*dble(nfront)-
937 & dble(npiv+nfront)*dble(npiv+1)
938 flop1= dble(npiv)*flop1 +
939 & dble(2 * npiv-npiv-1)*dble(npiv)/dble(2)+
940 & dble(npiv)*dble(npiv+1)*dble(2*npiv+1)/dble(3)
941 else
942 flop1=dble(npiv)*
943 & ( dble(npiv)*dble(npiv)+dble(npiv)-
944 & dble(npiv*npiv+npiv+1) )+
945 & (dble(npiv)*dble(npiv+1)*dble(2*npiv+1))/dble(6)
946 endif
947 cv_ncostw(inode)=flop1
948.gt. if(total_nmb_cand0) then
949 nrow = dble(max(min(dble(ncb)/dble(total_nmb_cand),
950 & dble(kmax)),
951 & dble(ncb)/dble(cv_slavef-1)))
952.gt. elseif(cv_slavef1) then
953 nrow = dble(max(dble(kmax),
954 & dble(ncb)/dble(cv_slavef-1)))
955 else
956 nrow = dble(ncb)
957 endif
958.eq. if(cv_keep(50)0) then
959 flop1 = dble(npiv)*dble(nrow)+
960 & dble(nrow)*dble(npiv)*dble(2*nfront-npiv-1)
961 else
962 ncol= nfront
963 flop1 = dble(npiv)*dble(nrow)*
964 & (dble(2*ncol)-dble(nrow)-dble(npiv)+dble(1))
965 workmaster = dble(npiv)*dble(npiv)*dble(npiv)/dble(3)
966.gt. if (workmasterflop1) flop1=workmaster
967 endif
968 cv_layer_p2node(layernmb)%t2_candcostw(j)=flop1
969.eq. if(cv_keep(50)0) then
970 cv_ncostm(inode)=dble(npiv)*dble(nfront)
971 else
972 cv_ncostm(inode)=dble(npiv)*dble(npiv)
973 endif
974.eq. if(cv_keep(50)0) then
975 cv_layer_p2node(layernmb)%t2_candcostm(j)
976 & =dble(npiv)*dble(nrow)
977 else
978 cv_layer_p2node(layernmb)%t2_candcostm(j)
979 & =dble(npiv)*dble(nrow)
980 endif
981 end do
982 endif
983 istat=0
984 return
985 end subroutine MUMPS_COSTS_LAYER_T2
986 subroutine MUMPS_COSTS_LAYER_T2PM(layernmb,nmb_thislayer,istat)
987!DEC$ OPTIMIZE:1
988 implicit none
989 integer,intent(in)::layernmb,nmb_thislayer
990 integer,intent(out)::istat
991 integer in,inode,j,jj,kmax,npiv,nfront,ncb,ncol,
992 & total_nmb_cand,nmb_type2_thislayer,
993 & total_cand_layer,npropmap,min_needed,
994 & keep48_loc
995 DOUBLE PRECISION flop1,work_type2_thislayer,
996 & relative_weight,workmaster,nrow
997 DOUBLE PRECISION save_ncostw, save_ncostm
998 LOGICAL SPLITNODE, BLRNODE
999 character (len=48):: subname
1000 integer MUMPS_REG_GETKMAX, MUMPS_BLOC2_GET_NSLAVESMIN
1001 external MUMPS_REG_GETKMAX, MUMPS_BLOC2_GET_NSLAVESMIN
1002 istat=-1
1003 SPLITNODE=.FALSE.
1004 BLRNODE=.FALSE.
1005 save_ncostw = 1.0D0
1006 save_ncostm = 1.0D0
1007 subname='costs_layer_t2pm'
1008.ne..AND..ne. if((cv_keep(24)8)(cv_keep(24)10)
1009.AND..ne..AND..ne. & (cv_keep(24)12)(cv_keep(24)14)
1010.AND..ne..AND..ne. & (cv_keep(24)16)(cv_keep(24)18)) then
1011.gt. if(cv_lp0)
1012 & write(cv_lp,*)'error in ',subname,'. wrong keep24'
1013 return
1014 endif
1015 nmb_type2_thislayer=cv_layer_p2node(layernmb)%nmb_t2s
1016.gt. if (nmb_type2_thislayer0) then
1017 total_cand_layer=0
1018 work_type2_thislayer=0.0D0
1019 do j=1,nmb_type2_thislayer
1020 inode=cv_layer_p2node(layernmb)%t2_nodenumbers(j)
1021 work_type2_thislayer=work_type2_thislayer+cv_ncostw(inode)
1022 npropmap=0
1023 do jj=1,cv_slavef
1024 if( MUMPS_BIT_GET4PROC(inode,jj))
1025 & npropmap=npropmap+1
1026 end do
1027 total_cand_layer=total_cand_layer+npropmap
1028 end do
1029 do j=1,nmb_type2_thislayer
1030 inode=cv_layer_p2node(layernmb)%t2_nodenumbers(j)
1031 nfront=cv_nfsiz(inode)
1032.GT. SPLITNODE = (abs(cv_nodetype(inode))3)
1033 IF (SPLITNODE) THEN
1034 save_ncostw = cv_ncostw(inode)
1035 save_ncostm = cv_ncostm(inode)
1036 ENDIF
1037 npiv=0
1038 in=inode
1039.gt. do while(in0)
1040 if (cv_BLKON) then
1041 npiv = npiv + cv_SIZEOFBLOCKS(in)
1042 else
1043 npiv=npiv+1
1044 endif
1045 in=cv_fils(in)
1046 end do
1047 ncb=nfront-npiv
1048 kmax = MUMPS_REG_GETKMAX(cv_keep8(21),ncb)
1049.lt. if(kmax1) then
1050 kmax = max(kmax,1)
1051 endif
1052 if (cv_keep(50) == 0) then
1053 keep48_loc=0
1054 else
1055 keep48_loc=3
1056 endif
1057.EQ. if (cv_keep(48)5) keep48_loc = 5
1058 min_needed= MUMPS_BLOC2_GET_NSLAVESMIN
1059 & (cv_slavef, keep48_loc,cv_keep8(21),
1060 & cv_keep(50),nfront,ncb,
1061 & cv_keep(375), cv_keep(119))
1062.lt. if(min_needed1) then
1063.gt. if(cv_lp0)
1064 & write(cv_lp,*)'error in ',subname,'.neg min_needed'
1065 return
1066 endif
1067.eq..OR..eq..OR. if ((cv_keep(24)8)(cv_keep(24)14)
1068.eq. & (cv_keep(24)18)) then
1069 npropmap=0
1070 do jj=1,cv_slavef
1071 if( MUMPS_BIT_GET4PROC(inode,jj))
1072 & npropmap=npropmap+1
1073 end do
1074 total_nmb_cand=max(npropmap-1,min_needed)
1075.eq. elseif(cv_keep(24)10) then
1076.gt. if(work_type2_thislayer0.0D0) then
1077 relative_weight=cv_ncostw(inode)/work_type2_thislayer
1078 else
1079 relative_weight = 0.0D0
1080 endif
1081 total_nmb_cand=nint(relative_weight *
1082 & dble(total_cand_layer))
1083 total_nmb_cand=max(total_nmb_cand-1,min_needed)
1084.eq..OR..eq. elseif((cv_keep(24)12)(cv_keep(24)16)) then
1085.lt. if(layernmbcv_dist_L0_mixed_strat_bound) then
1086.gt. if(cv_mp0)then
1087 write(cv_mp,*)'strat', cv_keep(24),
1088 & ': use 8 on layer',layernmb
1089 endif
1090 npropmap=0
1091 do jj=1,cv_slavef
1092 if( MUMPS_BIT_GET4PROC(inode,jj))
1093 & npropmap=npropmap+1
1094 end do
1095 total_nmb_cand=max(npropmap-1,min_needed)
1096 else
1097.gt. if(cv_mp0)then
1098 write(cv_mp,*)'strat', cv_keep(24),
1099 & ': use 10 on layer',layernmb
1100 endif
1101.gt. if(work_type2_thislayer0.0D0) then
1102 relative_weight=cv_ncostw(inode)/work_type2_thislayer
1103 else
1104 relative_weight = 0.0D0
1105 endif
1106 total_nmb_cand=nint(relative_weight *
1107 & dble(total_cand_layer))
1108 total_nmb_cand=max(total_nmb_cand-1,min_needed)
1109 endif
1110 else
1111.gt. if(cv_lp0)
1112 & write(cv_lp,*)'unknown cand. strategy in ',subname
1113 return
1114 endif
1115 total_nmb_cand=max(total_nmb_cand,1)
1116 total_nmb_cand=min(total_nmb_cand,cv_slavef-1)
1117 total_nmb_cand=min(total_nmb_cand,ncb)
1118 cv_layer_p2node(layernmb)%t2_cand(j,cv_slavef+1)
1119 & = total_nmb_cand
1120.ne..and..ge..and. BLRNODE = ((cv_keep(494)0)(cv_keep(471)0)
1121.ge..and..ge. & (npivcv_keep(490))(nfrontcv_keep(491)))
1122 IF (BLRNODE) THEN
1123 call MUMPS_COSTS_BLR_T2_MASTER(npiv, nfront,
1124 & cv_ncostw(inode), cv_ncostm(inode),
1125 & cv_keep(471), cv_keep(472), cv_keep(475),
1126 & cv_keep(488), cv_keep(50))
1127 ELSE
1128.eq. if(cv_keep(50)0) then
1129 flop1=dble(2*npiv)*dble(nfront)-
1130 & dble(npiv+nfront)*dble(npiv+1)
1131 flop1= dble(npiv)*flop1 +
1132 & dble(2 * npiv-npiv-1)*dble(npiv)/dble(2)+
1133 & dble(npiv)*dble(npiv+1)*dble(2*npiv+1)/dble(3)
1134 else
1135 flop1=dble(npiv)*
1136 & ( dble(npiv)*dble(npiv)+dble(npiv)-
1137 & dble(npiv*npiv+npiv+1) )+
1138 & (dble(npiv)*dble(npiv+1)*dble(2*npiv+1))/dble(6)
1139 endif
1140 cv_ncostw(inode)=flop1
1141 ENDIF
1142 IF (SPLITNODE) THEN
1143 cv_layer_p2node(layernmb)%t2_candcostw(j)=
1144 & max(save_ncostw - cv_ncostw(inode), 1.0D0)
1145 ELSE
1146.gt. if(total_nmb_cand0) then
1147 nrow = dble(max(min(dble(ncb)/dble(total_nmb_cand),
1148 & dble(kmax)),
1149 & dble(ncb)/dble(cv_slavef-1)))
1150.gt. elseif(cv_slavef1) then
1151 nrow = dble(max(dble(kmax),
1152 & dble(ncb)/dble(cv_slavef-1)))
1153 else
1154 nrow = dble(ncb)
1155 endif
1156 IF (BLRNODE) THEN
1157 call MUMPS_COSTS_BLR_T2_SLAVE(npiv, nfront,
1158 & nrow,
1159 & cv_layer_p2node(layernmb)%t2_candcostw(j),
1160 & cv_layer_p2node(layernmb)%t2_candcostm(j),
1161 & cv_keep(471), cv_keep(472), cv_keep(475),
1162 & cv_keep(488), cv_keep(50))
1163 ELSE
1164.eq. if(cv_keep(50)0) then
1165 flop1 = dble(npiv)*dble(nrow)+
1166 & dble(nrow)*dble(npiv)*dble(2*nfront-npiv-1)
1167 else
1168 ncol= nfront
1169 flop1 = dble(npiv)*dble(nrow)*
1170 & (dble(2*ncol)-dble(nrow)-dble(npiv)+dble(1))
1171 workmaster = dble(npiv)*dble(npiv)*dble(npiv)/dble(3)
1172.gt. if (workmasterflop1) flop1=workmaster
1173 endif
1174 cv_layer_p2node(layernmb)%t2_candcostw(j)=flop1
1175 ENDIF
1176 ENDIF
1177.NOT. IF (BLRNODE) THEN
1178.eq. if(cv_keep(50)0) then
1179 cv_ncostm(inode)=dble(npiv)*dble(nfront)
1180 else
1181 cv_ncostm(inode)=dble(npiv)*dble(npiv)
1182 endif
1183 ENDIF
1184 IF (SPLITNODE) THEN
1185 cv_layer_p2node(layernmb)%t2_candcostm(j) =
1186 & max(save_ncostm - cv_ncostm(inode), 1.0D0)
1187.NOT. ELSEIF (BLRNODE) THEN
1188.eq. if(cv_keep(50)0) then
1189 cv_layer_p2node(layernmb)%t2_candcostm(j)
1190 & =dble(npiv)*dble(nrow)
1191 else
1192 cv_layer_p2node(layernmb)%t2_candcostm(j)
1193 & =dble(npiv)*dble(nrow)
1194 endif
1195 ENDIF
1196 end do
1197 endif
1198 istat=0
1199 return
1200 end subroutine MUMPS_COSTS_LAYER_T2PM
1201 subroutine MUMPS_SPLIT_DURING_MAPPING(
1202 & layernmb,thislayer,nmb_thislayer,
1203 & istat )
1204 implicit none
1205 integer,intent(in)::layernmb,nmb_thislayer
1206 integer,intent(in)::thislayer(:)
1207 integer,intent(out)::istat
1208 integer i,j,k1,k2,k3,ierr,inode,nfront,npiv,
1209 & npropmap, inode_tmp, allocok
1210 logical doit
1211 integer, allocatable, dimension(:) :: npivsplit
1212 integer :: lnpivsplit
1213 integer :: bsize
1214 integer :: k1_temp, npiv_beg, npiv_end
1215 character (len=48):: err_rep,subname
1216 istat=-1
1217 subname='split_during_mapping'
1218.lt..or..gt. if((layernmb0)(layernmbcv_maxlayer)) return
1219.eq. if (cv_slavef1) then
1220 return
1221 endif
1222.ne. if (cv_icntl(59) 0) then
1223 istat = 0
1224 return
1225 endif
1226 lnpivsplit = cv_keep(108)
1227 allocate(npivsplit(lnpivsplit),stat=allocok)
1228.NE. if (allocok 0) then
1229 cv_info(1) = cv_error_memalloc
1230 cv_info(2) = lnpivsplit
1231 istat = cv_error_memalloc
1232.gt. if(cv_lp0)
1233 & write(cv_lp,*)'memory allocation error in ',subname
1234 return
1235 endif
1236 do i=1,nmb_thislayer
1237 ierr=0
1238 inode=thislayer(i)
1239 nfront = cv_nfsiz(inode)
1240 inode_tmp=inode
1241 npiv=0
1242 do while (inode_tmp.gt.0)
1243 if (cv_blkon) then
1244 npiv = npiv + cv_sizeofblocks(inode_tmp)
1245 else
1246 npiv=npiv+1
1247 endif
1248 inode_tmp=cv_fils(inode_tmp)
1249 end do
1250 if (inode_tmp .eq. 0) cycle
1251 npropmap=0
1252 do j=1,cv_slavef
1253 if( mumps_bit_get4proc(inode,j)) then
1254 npropmap=npropmap+1
1255 endif
1256 end do
1257 IF ((keep(376) .EQ.1)
1258 & ) THEN
1259 err_rep='GET_SPLIT_4_PERF'
1260 CALL mumps_get_split_4_perf(inode, nfront, npiv,
1261 & dble(npropmap),
1262 & k1, lnpivsplit, npivsplit, n, cv_frere(1),
1263 & cv_keep(1),
1265 & istat)
1266 k3=k1
1267 doit = .true.
1268 GOTO 200
1269 ENDIF
1270 IF ((cv_keep(79) .EQ.0).OR.(cv_keep(79).GE.5)) THEN
1271 err_rep='GET_SPLIT_INKPART'
1272 call mumps_get_split_inkpart(inode,
1273 & doit,npiv,nfront,npropmap,k1,k3,
1274 & ierr)
1275 ELSE
1276 err_rep='GET_MEMSPLIT_INKPART'
1277 call mumps_get_memsplit_inkpart(inode,
1278 & doit,npiv,nfront,npropmap,k2,ierr)
1279 k1=k2
1280 k3=k2
1281 ENDIF
1282 if (ierr.eq.0) then
1283 if (lnpivsplit < k1) then
1284 write(*,*) 'error in', subname, lnpivsplit, k1, cv_keep(108)
1285 call mumps_abort()
1286 endif
1287 bsize = max(npiv/k1,1)
1288 if (cv_blkon) then
1289 inode_tmp = inode
1290 npiv_beg = 0
1291 npiv_end = 0
1292 k1_temp = 0
1293 do while (inode_tmp.gt.0)
1294 npiv_end = npiv_end + cv_sizeofblocks(inode_tmp)
1295 if (npiv_end-npiv_beg.ge.bsize) then
1296 k1_temp = k1_temp+1
1297 npivsplit(k1_temp) = npiv_end-npiv_beg
1298 npiv_beg = npiv_end
1299 if ( ( (npiv-npiv_beg).gt.0) .and.
1300 & (npiv-npiv_beg.LT.2*bsize)
1301 & ) then
1302 k1_temp = k1_temp+1
1303 npivsplit(k1_temp) = npiv - npiv_beg
1304 exit
1305 endif
1306 endif
1307 inode_tmp=cv_fils(inode_tmp)
1308 enddo
1309 if (k1_temp.eq.0) then
1310 k1_temp = 1
1311 npivsplit(1) = npiv
1312 else
1313 if (npiv_end.gt.npiv_beg) then
1314 k1_temp = k1_temp+1
1315 npivsplit(k1_temp) = npiv_end-npiv_beg
1316 endif
1317 endif
1318 k1 = k1_temp
1319 else
1320 do j = 1, k1-1
1321 npivsplit(j)= bsize
1322 enddo
1323 npivsplit(k1) = npiv-bsize*(k1-1)
1324 endif
1325 endif
1326 200 CONTINUE
1327 if(ierr.ne.0) then
1328 if(cv_lp.gt.0)
1329 & write(cv_lp,*)'Error reported by ',
1330 & err_rep,' in ',subname
1331 istat =ierr
1332 goto 100
1333 endif
1334 if ( ( k1.le.1).or.(k3.le.1).or.(.NOT.doit) ) cycle
1335 err_rep='SPLITNODE_INKPART'
1336 call mumps_splitnode_intree( inode, nfront, npiv, k1,
1337 & lnpivsplit, npivsplit, cv_keep(1), n, cv_fils(1),
1338 & cv_frere(1),
1339 & cv_nfsiz(1), cv_ne(1), cv_info(5),
1340 & cv_nsteps, cv_nodetype(1), ierr
1341 & , sizeofblocks, lsizeofblocks
1342 & , blkon
1343 & )
1344 if(ierr.ne.0) then
1345 if(cv_lp.gt.0)
1346 & write(cv_lp,*)'Error reported by ',err_rep,
1347 & ' in ',subname
1348 istat = ierr
1349 goto 100
1350 endif
1351 err_rep='SPLITNODE_UPDATE'
1352 call mumps_splitnode_update( inode, nfront, npiv, k1,
1353 & lnpivsplit, npivsplit,
1354 & ierr)
1355 if(ierr.ne.0) then
1356 if(cv_lp.gt.0)
1357 & write(cv_lp,*)'Error reported by ',err_rep,
1358 & ' in ',subname
1359 istat = ierr
1360 goto 100
1361 endif
1362 end do
1363 istat=0
1364 100 continue
1365 deallocate(npivsplit)
1366 return
1367 end subroutine mumps_split_during_mapping
1368 subroutine mumps_get_split_inkpart(inode,
1369 & doit,npiv,nfront,npropmap,k1,k3,istat)
1370 implicit none
1371 integer,intent(in)::inode
1372 logical,intent(out)::doit
1373 integer,intent(in) :: npiv, nfront, npropmap
1374 integer,intent(out) :: istat
1375 integer,intent(out) ::k1,k3
1376 integer npiv2,nfront2,npiv_son2
1377 integer ncb,kmax,keep48_loc,nslaves_max,
1378 & nslaves_estim,strat,kk
1379 DOUBLE PRECISION wk_master,wk_master2,wk_slave2
1380 integer MUMPS_REG_GETKMAX,
1383 external MUMPS_REG_GETKMAX
1384 external MUMPS_BLOC2_GET_NSLAVESMAX
1385 external MUMPS_BLOC2_GET_NSLAVESMIN
1386 doit=.false.
1387 k1=1
1388 k3 =1
1389 istat=-1
1390 doit=.true.
1391 if (cv_nodetype(inode) .gt. 0) then
1392 doit=.false.
1393 istat = 0
1394 return
1395 endif
1396 if ( (cv_frere(inode).eq.0) ) then
1397 doit=.false.
1398 istat = 0
1399 return
1400 endif
1401 npiv_son2 = max(npiv/2,1)
1402 if( .not. mumps_istype2bysize(nfront,npiv_son2) )then
1403 doit=.false.
1404 istat = 0
1405 return
1406 endif
1407 ncb = nfront - npiv
1408 kmax = mumps_reg_getkmax(cv_keep8(21),ncb)
1409 if (cv_keep(50) == 0) then
1410 keep48_loc=0
1411 else
1412 keep48_loc=3
1413 endif
1414 if (cv_keep(48).EQ.5) keep48_loc = 5
1415 if(npropmap .gt. cv_keep(83)) then
1416 nslaves_max = mumps_bloc2_get_nslavesmax(
1417 & cv_slavef, keep48_loc, cv_keep8(21),
1418 & cv_keep(50), nfront,
1419 & max(max(ncb,nfront-cv_keep(420)),1),
1420 & cv_keep(375), cv_keep(119))
1421 nslaves_estim = min(npropmap-1,nslaves_max)
1422 nslaves_estim = max(nslaves_estim,1)
1423 else
1424 nslaves_max = mumps_bloc2_get_nslavesmax(
1425 & cv_slavef, keep48_loc, cv_keep8(21),
1426 & cv_keep(50), nfront, ncb, cv_keep(375),
1427 & cv_keep(119) )
1428 nslaves_estim = mumps_bloc2_get_nslavesmin(
1429 & cv_slavef, keep48_loc,cv_keep8(21),
1430 & cv_keep(50), nfront, ncb,
1431 & cv_keep(375), cv_keep(119) )
1432 nslaves_estim = max(nslaves_estim,1)
1433 nslaves_estim = min(nslaves_estim,nslaves_max)
1434 endif
1435 if (cv_keep(50).eq.0) then
1436 wk_master = (dble(2)/dble(3))*
1437 & dble(npiv)*dble(npiv)*dble(npiv)+
1438 & dble(npiv)*dble(npiv)*dble(nfront-npiv)
1439 else
1440 wk_master = dble(npiv)*dble(npiv)*dble(npiv)/dble(3)
1441 end if
1442 strat = cv_keep(62)
1443 doit = .true.
1444 k1 = cv_keep(82)
1445 k3 = cv_keep(82)
1446 do kk=1,cv_keep(82)-1
1447 npiv2 = npiv/kk
1448 nfront2 = nfront-npiv+npiv2
1449 if (npiv2 .le. max(6*cv_keep(6),0).or.
1450 & (nfront2.le.cv_keep(9)) ) then
1451 k1 = max(1,kk-1)
1452 exit
1453 endif
1454 wk_master2 = wk_master / dble(kk)
1455 if (cv_keep(50).eq.0) then
1456 wk_slave2 = ( dble(npiv2)*dble(nfront2-npiv2) *
1457 & dble(2*nfront2-npiv2) ) / dble(nslaves_estim)
1458 else
1459 wk_slave2 =
1460 & ( dble(npiv2)*dble(nfront2-npiv2)*dble(nfront2) )
1461 & / dble(nslaves_estim)
1462 endif
1463 if(wk_master2.le.
1464 & (1.0d0 +dble(kk*strat)/dble(100))*wk_slave2) then
1465 k1 = kk
1466 exit
1467 endif
1468 enddo
1469 do kk=1,cv_keep(82)-1
1470 npiv2 = npiv/kk
1471 nfront2 = nfront
1472 if (npiv2 .le. max(6*cv_keep(6),0)) then
1473 k3 = max(1,kk-1)
1474 exit
1475 endif
1476 wk_master2 = wk_master / dble(kk)
1477 if (cv_keep(50).eq.0) then
1478 wk_slave2 = ( dble(npiv2)*dble(nfront2-npiv2) *
1479 & dble(2*nfront2-npiv2) ) / dble(nslaves_estim)
1480 else
1481 wk_slave2 =
1482 & ( dble(npiv2)*dble(nfront2-npiv2)*dble(nfront2) )
1483 & / dble(nslaves_estim)
1484 endif
1485 if(wk_master2.le.wk_slave2) then
1486 k3 = kk
1487 exit
1488 endif
1489 enddo
1490 k3 = min(npiv,k3)
1491 k1 = min(npiv,k1)
1492 IF (cv_keep(79).GE.1) THEN
1493 k1=min(k1, npropmap-1)
1494 k3=min(k3, npropmap-1)
1495 ENDIF
1496 if(k3 .lt. k1) then
1497 k3 = k1
1498 endif
1499 istat=0
1500 return
1501 end subroutine mumps_get_split_inkpart
1503 & doit,npiv,nfront,npropmap,k2,istat)
1504 implicit none
1505 integer,intent(in) :: inode
1506 logical,intent(out) :: doit
1507 integer,intent(in) :: npiv,nfront,npropmap
1508 integer,intent(out) :: istat
1509 integer,intent(out) :: k2
1510 integer npiv2,npiv_son2
1511 integer kk
1512 DOUBLE PRECISION mem_master, mem_slave
1513 doit=.false.
1514 k2=1
1515 istat=-1
1516 doit=.true.
1517 if (cv_nodetype(inode) .gt. 0) then
1518 doit=.false.
1519 istat = 0
1520 return
1521 endif
1522 if (cv_frere(inode).eq.0
1523 & ) then
1524 doit=.false.
1525 istat = 0
1526 return
1527 endif
1528 if ((nfront-npiv).lt.npropmap.OR.
1529 & (npropmap.le.0) ) then
1530 doit=.false.
1531 istat = 0
1532 return
1533 endif
1534 npiv_son2 = max(npiv/2,1)
1535 if( .not. mumps_istype2bysize(nfront,npiv_son2) )then
1536 doit=.false.
1537 istat = 0
1538 return
1539 endif
1540 doit = .true.
1541 k2 = min(cv_keep(82),npropmap-1)
1542 do kk=1,min(cv_keep(82)-1, npropmap-1)
1543 npiv2 = npiv/kk
1544 if(npiv2 .eq. 0) then
1545 k2 = max(1,kk-1)
1546 exit
1547 endif
1548 mem_slave = dble(nfront-npiv)*dble(nfront)/
1549 & dble(npropmap-kk+1)
1550 mem_master = dble(npiv2)*dble(nfront)
1551 if(mem_master.le.
1552 & (1.0d0 +dble(cv_keep(62))/dble(100))*mem_slave) then
1553 k2 = kk
1554 exit
1555 endif
1556 enddo
1557 k2 = max(k2, 1)
1558 k2 = min(k2, npiv)
1559 istat=0
1560 return
1561 end subroutine mumps_get_memsplit_inkpart
1562 subroutine mumps_splitnode_update(inode,nfront,npiv,k,
1563 & lnpivsplit, npivsplit,
1564 & istat)
1565 implicit none
1566 integer, intent(in)::nfront,npiv
1567 integer, intent(in):: k
1568 integer, intent(in)::lnpivsplit
1569 integer, intent(in)::npivsplit(lnpivsplit)
1570 integer, intent(in):: inode
1571 integer, intent(out)::istat
1572 integer lev,npiv_father,
1573 & npiv_son,nfrontk,npivk,next_father
1574 DOUBLE PRECISION:: ncostm,ncostw,ncostm_ison,ncostw_ison,
1575 & ncostm_ifather,ncostw_ifather
1576 integer::ison,ifather
1577 character (len=48):: subname
1578 istat=-1
1579 subname='SPLITNODE_UPDATE'
1580 npiv_son = npivsplit(1)
1581 ison = inode
1582 next_father = -frere(ison)
1583 ncostw=cv_ncostw(inode)
1584 ncostm=cv_ncostm(inode)
1585 nfrontk = nfront
1586 npivk = npiv
1587 call mumps_calcnodecosts(npiv_son,nfrontk,
1588 & ncostw_ison,ncostm_ison)
1589 cv_ncostw(ison)=ncostw_ison
1590 cv_ncostm(ison)=ncostm_ison
1591 if(associated(cv_tcostw)) cv_tcostw(ison) = cv_tcostw(inode)
1592 & -ncostw +cv_ncostw(ison)
1593 if(associated(cv_tcostm)) cv_tcostm(ison) = cv_tcostm(inode)
1594 & -ncostm +cv_ncostm(ison)
1595 do lev = 1, k-1
1596 ifather = next_father
1597 next_father = -frere(ifather)
1598 npiv_son= abs(npivsplit(lev))
1599 npiv_father=abs(npivsplit(lev+1))
1600 call mumps_calcnodecosts(npiv_father,nfrontk-npiv_son,
1601 & ncostw_ifather,ncostm_ifather)
1602 cv_ncostw(ifather)=ncostw_ifather
1603 cv_ncostm(ifather)=ncostm_ifather
1604 if(associated(cv_tcostw))
1605 & cv_tcostw(ifather) = cv_tcostw(ison)+cv_ncostw(ifather)
1606 if(associated(cv_tcostm))
1607 & cv_tcostm(ifather) = cv_tcostm(ison)+cv_ncostm(ifather)
1609 if(lev .gt. 1) then
1610 call mumps_propmap4split(inode,ison,ierr)
1611 if(ierr.ne.0) then
1612 if(cv_lp.gt.0)
1613 & write(cv_lp,*)'PROPMAP4SPLIT error in ',subname
1614 istat = ierr
1615 return
1616 endif
1617 endif
1618 nfrontk = nfrontk-npiv_son
1619 npivk = npivk - npiv_son
1620 ison = ifather
1621 enddo
1622 if (npivk .ne. npiv_father) then
1623 write(*,*) "Error 1 in MUMPS_SPLITNODE_UPDATE"
1624 call mumps_abort()
1625 endif
1626 call mumps_propmap4split(inode,ifather,ierr)
1627 if(ierr.ne.0) then
1628 if(cv_lp.gt.0)
1629 & write(cv_lp,*)'PROPMAP4SPLIT error in ',subname
1630 istat = ierr
1631 return
1632 endif
1633 cv_ncostw(inode) = ncostw
1634 cv_ncostm(inode) = ncostm
1635 istat = 0
1636 return
1637 end subroutine mumps_splitnode_update
1638 function mumps_is_node_of_type2 (inode)
1639 implicit none
1640 integer, intent(in) :: inode
1641 logical :: mumps_is_node_of_type2
1642 if (
1643 & (cv_nodetype(inode) .EQ. 2 ) .OR.
1644 & (cv_nodetype(inode) .EQ. tsplit_beg ) .OR.
1645 & (cv_nodetype(inode) .EQ. tsplit_mid ) .OR.
1646 & (cv_nodetype(inode) .EQ. -tsplit_mid ) .OR.
1647 & (cv_nodetype(inode) .EQ. tsplit_last) .OR.
1648 & (cv_nodetype(inode) .EQ. -tsplit_last)
1649 & ) then
1650 mumps_is_node_of_type2 = .true.
1651 else
1652 mumps_is_node_of_type2 = .false.
1653 endif
1654 return
1655 end function mumps_is_node_of_type2
1656 subroutine mumps_encode_procnode(istat)
1657 implicit none
1658 integer, intent(out)::istat
1659 integer i,in,inode
1660 character (len=48):: subname
1661 integer, external :: MUMPS_ENCODE_TPN_IPROC
1662 istat=-1
1663 subname='ENCODE_PROCNODE'
1664 do i=1,cv_nbsa
1665 inode=cv_ssarbr(i)
1666 cv_nodetype(inode)=0
1667 in=cv_fils(inode)
1668 do while (in>0)
1669 in=cv_fils(in)
1670 end do
1671 in=-in
1672 do while(in.gt.0)
1673 call mumps_typeinssarbr(in)
1674 in=cv_frere(in)
1675 enddo
1676 enddo
1677 do i=1,cv_n
1678 if (cv_frere(i).lt.cv_n+1) then
1679 if(cv_nodetype(i).eq.cv_invalid) then
1680 if(cv_lp.gt.0)
1681 & write(cv_lp,*)'Error in ',subname
1682 return
1683 endif
1684 if (i.eq.cv_keep(38)) then
1685 if (cv_nodetype(i).ne.3) then
1686 cv_nodetype(i)=3
1687 endif
1688 endif
1690 & cv_procnode(i)-1, cv_keep(199))
1691 in=cv_fils(i)
1692 do while (in>0)
1694 in=cv_fils(in)
1695 end do
1696 end if
1697 end do
1698 istat = 0
1699 return
1700 end subroutine mumps_encode_procnode
1701 subroutine mumps_fathson_replace(ifather,istat)
1702 implicit none
1703 integer,intent(in)::ifather
1704 integer,intent(out)::istat
1705 integer in,son,oldl0end
1706 logical father_has_sons
1707 character (len=48):: subname
1708 istat=-1
1709 subname='FATHSON_REPLACE'
1710 father_has_sons=.true.
1711 in=ifather
1712 do while (in.gt.0)
1713 in=cv_fils(in)
1714 end do
1715 if(in.eq.0) then
1716 cv_nodelayer(ifather)=1
1717 cv_keep(262)=cv_keep(262)+1
1718 father_has_sons=.false.
1719 end if
1720 if(cv_layerl0_end-cv_layerl0_start.gt.0) then
1722 elseif(father_has_sons) then
1724 else
1725 istat=1
1726 cv_nodelayer(ifather)=0
1727 return
1728 endif
1730 oldl0end = cv_layerl0_end
1731 if (father_has_sons) then
1732 son=-in
1733 son=-in
1734 10 continue
1736 if (cv_tcostw(son).GT.mincostw)
1741 if((cv_frere(son).gt.0).and.(cv_frere(son).lt.cv_n+1)) then
1742 son=cv_frere(son)
1743 goto 10
1744 end if
1745 endif
1750 if(cv_layerl0_end.gt.oldl0end) then
1751 call mumps_sort_msort(ierr,cv_layerl0_end-oldl0end,
1752 & cv_layerl0_array(oldl0end+1:cv_layerl0_end),
1754 if(ierr.ne.0) then
1755 if(cv_lp.gt.0)
1756 & write(cv_lp,*) 'Error reported by MUMPS_SORT_MSORT in',
1757 & subname
1758 istat = ierr
1759 return
1760 endif
1761 call mumps_sort_mmerge(
1762 & cv_layerl0_start,oldl0end,oldl0end-cv_layerl0_start+1,
1763 & oldl0end+1,cv_layerl0_end,cv_layerl0_end-oldl0end,
1766 if(ierr.ne.0) then
1767 if(cv_lp.gt.0)
1768 & write(cv_lp,*)
1769 & 'Error reported by MUMPS_SORT_MMERGE in',
1770 & subname
1771 istat = ierr
1772 return
1773 endif
1774 endif
1775 istat=0
1776 return
1777 end subroutine mumps_fathson_replace
1778 subroutine mumps_find_best_proc(inode,map_strat,work,mem,
1779 & workload,memused,proc,istat,respect_prop)
1780!DEC$ NOOPTIMIZE
1781 implicit none
1782 integer, intent(in)::inode,map_strat
1783 DOUBLE PRECISION,intent(in)::work,mem
1784 DOUBLE PRECISION,dimension(:),intent(inout)::workload, memused
1785 integer,intent(out):: proc,istat
1786 logical,intent(in),OPTIONAL::respect_prop
1787 integer i
1788 logical respect_proportional
1789 DOUBLE PRECISION dummy
1790 character (len=48):: subname
1791 istat=-1
1792 respect_proportional=.false.
1793 if(present(respect_prop)) respect_proportional=respect_prop
1794 subname='FIND_BEST_PROC'
1795 proc=-1
1796 if((map_strat.ne.cv_equilib_flops).and.
1797 & (map_strat.ne.cv_equilib_mem)) return
1798 dummy=huge(dummy)
1799 do i=cv_slavef,1,-1
1800 if (
1801 & ((.NOT.respect_proportional)
1802 & .OR.
1803 & (mumps_bit_get4proc(inode,i).AND.respect_proportional))
1804 & .AND.
1805 & (((workload(i).lt.dummy).AND.
1806 & (map_strat.eq.cv_equilib_flops))
1807 & .OR.
1808 & ((memused(i).lt.dummy).AND.
1809 & (map_strat.eq.cv_equilib_mem))))then
1810 if((.not.cv_constr_work).or.
1811 & (workload(i)+work.lt.cv_proc_maxwork(i))) then
1812 if((.not.cv_constr_mem).or.
1813 & (memused(i)+mem.lt.cv_proc_maxmem(i))) then
1814 proc=i
1815 if(map_strat.eq.cv_equilib_flops) then
1816 dummy=workload(i)
1817 elseif(map_strat.eq.cv_equilib_mem) then
1818 dummy=memused(i)
1819 endif
1820 end if
1821 end if
1822 end if
1823 end do
1824 if (proc.ne.-1) then
1825 workload(proc)=workload(proc)+work
1826 memused(proc)=memused(proc)+mem
1827 istat=0
1828 end if
1829 return
1830 end subroutine mumps_find_best_proc
1831 subroutine mumps_find_thislayer(nmb,
1832 & thislayer,nmb_thislayer,istat)
1833 implicit none
1834 integer, intent(in)::nmb
1835 integer,intent(out) :: thislayer(:)
1836 integer,intent(out) :: nmb_thislayer,istat
1837 integer i
1838 character (len=48):: subname
1839 istat=-1
1840 subname='FIND_THISLAYER'
1841 thislayer=0
1842 nmb_thislayer=0
1843 if((nmb.lt.0).or.(nmb.gt.cv_maxlayer)) return
1844 do i=1,cv_n
1845 if(cv_nodelayer(i).eq.nmb) then
1846 nmb_thislayer=nmb_thislayer+1
1847 if(nmb_thislayer.gt.cv_maxnodenmb) then
1848 if(cv_lp.gt.0)
1849 & write(cv_lp,*)'Problem with nmb_thislayer in ',subname
1850 return
1851 endif
1852 thislayer(nmb_thislayer)=i
1853 end if
1854 end do
1855 istat=0
1856 return
1857 end subroutine mumps_find_thislayer
1858 subroutine mumps_higher_layer(startlayer,thislayer,
1859 & nmb_thislayer,cont,istat)
1860 implicit none
1861 integer,intent(in)::startlayer,nmb_thislayer
1862 integer,intent(in)::thislayer(:)
1863 logical,intent(inout)::cont
1864 integer,intent(out)::istat
1865 integer :: visited
1866 integer il,i,current,in,ifather
1867 logical father_valid,upper_layer_exists
1868 character (len=48):: subname
1869 istat=-1
1870 subname='HIGHER_LAYER'
1871 if(.NOT.cont) return
1872 if(startlayer.lt.1) return
1873 current=startlayer-1
1874 visited = -current-1
1875 upper_layer_exists=.false.
1876 if (current.eq.0) then
1877 do i=1,cv_n
1878 if (cv_nodelayer(i).ne.current) then
1879 if(cv_nodelayer(i).eq.1) then
1880 upper_layer_exists=.true.
1881 exit
1882 endif
1883 endif
1884 enddo
1885 endif
1886 do il=1,nmb_thislayer
1887 i = thislayer(il)
1888 in=i
1889 if (cv_nodetype(in).eq.tsplit_beg) then
1890 do while (cv_frere(in).lt.0)
1891 ifather = -cv_frere(in)
1892 if (abs(cv_nodetype(ifather)).eq.tsplit_mid) then
1893 in = ifather
1894 cv_nodelayer(in) = -visited-1
1895 cycle
1896 else if (abs(cv_nodetype(ifather)).eq.tsplit_last) then
1897 in = ifather
1898 cv_nodelayer(in) = current
1899 exit
1900 else
1901 write(6,*) ' Internal error 1 in MUMPS_HIGHER_LAYER'
1902 call mumps_abort()
1903 endif
1904 end do
1905 endif
1906 enddo
1907 do il=1,nmb_thislayer
1908 i = thislayer(il)
1909 if (cv_nodelayer(i).lt.current) cycle
1910 in=i
1911 if (cv_nodetype(in).eq.tsplit_beg) then
1912 cv_nodelayer(in) = visited
1913 do while (cv_frere(in).lt.0)
1914 ifather = -cv_frere(in)
1915 if (abs(cv_nodetype(ifather)).eq.tsplit_mid) then
1916 in = ifather
1917 cv_nodelayer(in) = -visited-1
1918 cycle
1919 else if (abs(cv_nodetype(ifather)).eq.tsplit_last) then
1920 in = ifather
1921 exit
1922 else
1923 write(6,*) ' Internal error 1 in MUMPS_HIGHER_LAYER',
1924 & cv_nodetype(ifather)
1925 call mumps_abort()
1926 endif
1927 end do
1928 endif
1929 if(cv_frere(in).eq.0) cycle
1930 cv_nodelayer(in) = visited
1931 father_valid=.true.
1932 do while(cv_frere(in).gt.0)
1933 if (cv_nodelayer(cv_frere(in)).gt.current) then
1934 father_valid=.false.
1935 in = cv_frere(in)
1936 cycle
1937 endif
1938 if (cv_nodelayer(cv_frere(in)).eq.visited) exit
1939 in=cv_frere(in)
1940 if (cv_nodelayer(in).eq.current) then
1941 cv_nodelayer(in) = visited
1942 endif
1943 end do
1944 if (.not.father_valid .or. cv_frere(in).gt.0) then
1945 cycle
1946 endif
1947 ifather=-cv_frere(in)
1948 if(cv_nodelayer(ifather).eq.current+1) then
1949 cycle
1950 endif
1951 in=ifather
1952 do while (cv_fils(in).gt.0)
1953 in=cv_fils(in)
1954 end do
1955 in=-cv_fils(in)
1956 if(cv_nodelayer(in).gt.current) then
1957 father_valid=.false.
1958 else
1959 father_valid=.true.
1960 do while(cv_frere(in).gt.0)
1961 in=cv_frere(in)
1962 if(cv_nodelayer(in).gt.current) then
1963 father_valid=.false.
1964 exit
1965 endif
1966 if(cv_nodelayer(in).eq.visited) then
1967 exit
1968 endif
1969 end do
1970 endif
1971 if(father_valid) then
1972 cv_nodelayer(ifather)=current+1
1973 upper_layer_exists=.true.
1974 end if
1975 end do
1976 if (upper_layer_exists) then
1977 current=current+1
1978 cv_maxlayer=current
1979 cont=.true.
1980 else
1981 cv_maxlayer=current
1982 cont=.false.
1983 endif
1984 do il=1,nmb_thislayer
1985 i = thislayer(il)
1986 if (cv_nodelayer(i).eq.visited) cv_nodelayer(i) = -visited-1
1987 enddo
1988 istat=0
1989 return
1990 end subroutine mumps_higher_layer
1991 subroutine mumps_initpart1(n,slavef,
1992 & frere,fils,nfsiz,ne,keep,KEEP8,icntl,info,
1993 & procnode,ssarbr,peak,istat
1994 & , SIZEOFBLOCKS, LSIZEOFBLOCKS
1995 & )
1996 implicit none
1997 integer, intent(in)::n,slavef
1998 integer, intent(in), TARGET:: frere(n),fils(n),nfsiz(n),ne(n),
1999 & keep(500),icntl(60),info(80),
2000 & procnode(n),ssarbr(n)
2001 INTEGER(8), intent(in), TARGET:: KEEP8(150)
2002 integer,intent(out)::istat
2003 integer, intent(in) :: LSIZEOFBLOCKS
2004 integer, intent(in), TARGET :: SIZEOFBLOCKS(LSIZEOFBLOCKS)
2005 integer i,allocok,rest
2006 DOUBLE PRECISION peak
2007 character (len=48):: subname
2008 istat=-1
2014 nullify(cv_sizeofblocks)
2015 cv_sizeofblocks => sizeofblocks
2016 subname='INITPART1'
2017 cv_n=n
2018 cv_slavef=slavef
2019 cv_keep=>keep
2020 cv_keep8=>keep8
2021 if(cv_keep(82) .lt. 0) then
2022 write(cv_lp,*)
2023 & 'Warning in mumps_static_mapping : splitting is set off'
2024 cv_keep(82) = 0
2025 endif
2026 if(cv_keep(83) .lt. 0) then
2027 write(cv_lp,*)
2028 & 'warning in mumps_static_mapping : keep(83) reset to 0'
2029 cv_keep(83) = 0
2030 endif
2031 if(slavef.gt.1) then
2033 cv_maxdepth = slavef
2034 else
2035 cv_maxdepth = 0
2037 endif
2038 cv_bitsize_of_int = bit_size(n)
2039 if(cv_bitsize_of_int.le.0) then
2040 if(cv_lp.gt.0)
2041 & write(cv_lp,*)'Problem with bit size in ',subname
2042 return
2043 endif
2044 rest = mod(cv_slavef,cv_bitsize_of_int)
2045 if (rest.eq.0) then
2047 else
2049 endif
2050 allocate(cv_ncostw(n),cv_tcostw(n),cv_ncostm(n),cv_tcostm(n),
2052 & cv_layerworkload(slavef),cv_layermemused(slavef),
2053 & cv_prop_map(n),stat=allocok)
2054 if (allocok.gt.0) then
2056 cv_info(2) = 8*n+2*cv_slavef
2057 istat = cv_error_memalloc
2058 if(cv_lp.gt.0)
2059 & write(cv_lp,*)'memory allocation error in ',subname
2060 return
2061 end if
2062 if(cv_keep(82) .eq. 0) then
2063 if(cv_lp.gt.0)
2064 & write(cv_lp,*)' No splitting during static mapping '
2065 endif
2066 cv_frere=>frere
2067 cv_fils=>fils
2068 cv_nfsiz=>nfsiz
2069 cv_ne=>ne
2070 cv_icntl=>icntl
2071 cv_info=>info
2072 cv_procnode=>procnode
2073 cv_ssarbr=>ssarbr
2074 cv_ssarbr=0
2076 cv_nsteps=keep(28)
2077 if((keep(28).gt.n).OR.(keep(28).lt.0)) then
2078 if(cv_lp.gt.0)
2079 & write(cv_lp,*)'problem with nsteps in ',subname
2080 return
2081 end if
2082 cv_costw_upper=0.0d0
2083 cv_costm_upper=0.0d0
2084 cv_costw_layer0=0.0d0
2085 cv_costm_layer0=0.0d0
2086 cv_costw_total=0.0d0
2087 cv_costm_total=0.0d0
2088 cv_nodelayer=n+2
2090 cv_l0wthresh=0.0d0
2091 cv_splitthresh=0.45d0
2092 cv_relax=dble(1) + dble(max(0,keep(68)))/dble(100)
2093 cv_maxlayer=0
2095 cv_layerworkload=dble(0)
2096 cv_layermemused=dble(0)
2100 cv_last_splitting%new_ifather=cv_invalid
2102 cv_last_splitting%ncostw_oldinode=cv_d_invalid
2103 cv_last_splitting%ncostm_oldinode=cv_d_invalid
2104 cv_last_splitting%tcostw_oldinode=cv_d_invalid
2105 cv_last_splitting%tcostm_oldinode=cv_d_invalid
2106 do i=1,cv_n
2107 nullify(cv_prop_map(i)%ind_proc)
2108 end do
2109 istat=0
2110 return
2111 end subroutine mumps_initpart1
2112 subroutine mumps_initpart2(istat)
2113 implicit none
2114 integer,intent(out)::istat
2115 integer i,allocok,inode,in,inoderoot,ierr,maxcut
2116 character (len=48):: subname
2117 istat=-1
2118 subname='INITPART2'
2119 if(associated(cv_layerl0_array))deallocate(cv_layerl0_array)
2120 if(associated(cv_layerl0_sorted_costw))
2121 & deallocate(cv_layerl0_sorted_costw)
2122 deallocate(cv_depth,cv_tcostw,cv_tcostm,stat=ierr)
2123 if(ierr.ne.0) then
2124 if(cv_lp.gt.0)
2125 & write(cv_lp,*)'Memory deallocation error in ',subname
2126 istat = cv_error_memdeloc
2127 return
2128 end if
2129 if(cv_maxnsteps.lt.1) then
2130 if(cv_lp.gt.0)
2131 & write(cv_lp,*)'problem with maxnsteps in ',subname
2132 return
2133 end if
2135 do i=1,cv_nbsa
2136 inode=cv_ssarbr(i)
2137 inoderoot=inode
2138 300 continue
2139 in = inode
2140 do while (in.ne.0)
2141 inode = in
2142 do while (in.gt.0)
2143 in = cv_fils(in)
2144 end do
2145 if (in.lt.0) in=-in
2146 end do
2147 100 continue
2148 if (inode.ne.inoderoot) then
2150 in = cv_frere(inode)
2151 inode = abs(in)
2152 if (in.lt.0) then
2153 go to 100
2154 else
2155 go to 300
2156 end if
2157 end if
2158 end do
2159 if(cv_keep(82) .gt. 0) then
2160 maxcut = min((cv_keep(82)-1)*cv_maxnodenmb,cv_n)
2163 endif
2164 nullify(cv_layer_p2node)
2165 if(cv_maxnodenmb.lt.0) then
2166 if(cv_lp.gt.0)
2167 & write(cv_lp,*)'problem with maxnodenmb in ',subname
2168 return
2169 elseif(cv_maxnodenmb.lt.1) then
2170 cv_maxnodenmb = 1
2171 end if
2172 allocate(cv_layer_p2node(cv_maxnodenmb),stat=allocok)
2173 if (allocok.gt.0) then
2176 istat = cv_error_memalloc
2177 if(cv_lp.gt.0)
2178 & write(cv_lp,*)'memory allocation error in ',subname
2179 return
2180 end if
2181 do i=1,cv_maxnodenmb
2182 nullify(cv_layer_p2node(i)%t2_nodenumbers,
2183 & cv_layer_p2node(i)%t2_cand,
2184 & cv_layer_p2node(i)%t2_candcostw,
2185 & cv_layer_p2node(i)%t2_candcostm)
2186 cv_layer_p2node(i)%nmb_t2s=0
2187 enddo
2188 istat = 0
2189 end subroutine mumps_initpart2
2190 function mumps_istype2bysize(nfront,npiv)
2191 implicit none
2192 logical::mumps_istype2bysize
2193 integer,intent(in)::nfront,npiv
2194 mumps_istype2bysize=.false.
2195 if( (nfront - npiv > cv_keep(9))
2196 & .and. ((npiv > cv_keep(4)).or.(.true.))
2197 & .and. (cv_icntl(59).eq.0) ) mumps_istype2bysize=.true.
2198 return
2199 end function mumps_istype2bysize
2200 subroutine mumps_layerl0(istat)
2201 implicit none
2202 integer,intent(out)::istat
2203 integer i,ierr,inode
2204 logical accepted
2205 integer,parameter::map_strat=cv_equilib_flops
2206 character (len=48):: err_rep,subname
2207 logical use_geist_ng_replace, skiparrangeL0
2208 INTEGER MINSIZE_L0
2209 INTEGER CURRENT_SIZE_L0
2210 istat=-1
2211 subname='LAYERL0'
2212 accepted=.false.
2213 IF (cv_keep(72).EQ.2) THEN
2214 minsize_l0 = 6*cv_slavef
2215 ELSE
2216 IF (cv_keep(198).NE.0) THEN
2217 IF (cv_keep(198).EQ.1) THEN
2218 minsize_l0 = 3*cv_slavef
2219 ELSE
2220 minsize_l0 = 2*cv_slavef
2221 ENDIF
2222 ELSE
2223 minsize_l0 = 3*cv_slavef
2224 ENDIF
2225 ENDIF
2226 55 continue
2227 skiparrangel0 = .false.
2228 do while(.not.accepted)
2229 IF (cv_keep(198).EQ.2) THEN
2230 current_size_l0 = layerl0_endforarrangel0
2231 ELSE
2232 current_size_l0 = layerl0_endforarrangel0
2233 ENDIF
2234 IF ( ( (current_size_l0.LT.minsize_l0)
2235 & .OR. skiparrangel0
2236 & )
2237 & .AND.
2238 & (cv_layerl0_end.LT.cv_maxnsteps/2) ) THEN
2239 accepted = .false.
2240 ELSE
2241 err_rep='ARRANGEL0'
2244 & cv_procnode,ierr)
2245 if(ierr.ne.0) then
2246 if(cv_lp.gt.0)
2247 & write(cv_lp,*)'Error reported by ',err_rep,' in ',subname
2248 istat = ierr
2249 return
2250 end if
2251 err_rep='ACCEPT_L0'
2252 call mumps_accept_l0(map_strat,
2254 & accepted,ierr)
2255 if(ierr.ne.0) then
2256 if(cv_lp.gt.0)
2257 & write(cv_lp,*)'Error reported by ',err_rep,' in ',subname
2258 istat = ierr
2259 return
2260 end if
2261 ENDIF
2262 IF (cv_keep(198).EQ.0) THEN
2263 IF (cv_slavef.GT.16)
2264 & skiparrangel0 = .NOT.skiparrangel0
2265 ENDIF
2266 if (accepted.OR.(cv_costw_total.le.0.0d0)) then
2267 exit
2268 elseif(((cv_costw_layer0/cv_costw_total).gt.cv_l0wthresh) .AND.
2269 & (.true.))then
2270 err_rep='MAX_TCOST_L0'
2272 use_geist_ng_replace = .true.
2273 if(use_geist_ng_replace) then
2274 err_rep='FATHSON_REPLACE'
2275 call mumps_fathson_replace(inode,ierr)
2276 if(ierr.eq.1) then
2277 accepted=.true.
2278 elseif(ierr.ne.0) then
2279 if(cv_lp.gt.0)
2280 & write(cv_lp,*)
2281 & 'Error rep. by ',err_rep,' in ',subname
2282 istat = ierr
2283 return
2284 endif
2285 endif
2286 else
2287 accepted=.true.
2288 end if
2289 end do
2290 accepted=.true.
2291 if (accepted) then
2292 else
2293 goto 55
2294 endif
2295 err_rep='LIST2LAYER'
2296 call mumps_list2layer(ierr)
2297 if(ierr.ne.0) then
2298 if(cv_lp.gt.0)
2299 & write(cv_lp,*)'Error reported by ',err_rep,' in ',subname
2300 istat = ierr
2301 return
2302 end if
2303 err_rep='MAKE_PROPMAP'
2304 call mumps_make_propmap(ierr)
2305 if(ierr.ne.0) then
2306 if(cv_lp.gt.0)
2307 & write(cv_lp,*)'Error reported by ',err_rep,' in ',subname
2308 istat = ierr
2309 return
2310 end if
2311 if ( cv_keep(75).EQ.1 ) then
2312 call mumps_arrangel0(map_strat, cv_layerl0_end,
2314 & cv_procnode,ierr, respect_prop=.true.)
2315 if(ierr.ne.0) then
2316 if(cv_lp.gt.0)
2317 & write(cv_lp,*)'Error reported by ',err_rep,' in ',subname
2318 istat = ierr
2319 return
2320 end if
2321 else if (layerl0_endforarrangel0.LT.cv_layerl0_end) THEN
2322 call mumps_arrangel0(map_strat, cv_layerl0_end,
2324 & cv_procnode,ierr)
2325 endif
2327 do i=1,cv_slavef
2330 end do
2331 istat=0
2332 return
2333 end subroutine mumps_layerl0
2334 subroutine mumps_list2layer(istat)
2335 implicit none
2336 integer, intent(out)::istat
2337 character (len=48):: subname
2338 integer i,inode
2339 istat=-1
2340 subname='LIST2LAYER'
2342 cv_nbsa=0
2344 inode=cv_layerl0_array(i)
2345 if(inode.gt.0) then
2347 & ,max(cv_depth(inode)-cv_mixed_strat_bound,0))
2348 cv_nodelayer(inode)=0
2350 cv_ssarbr(cv_nbsa)=inode
2351 endif
2352 enddo
2353 istat=0
2354 return
2355 end subroutine mumps_list2layer
2356 subroutine mumps_make_propmap(istat)
2357 implicit none
2358 integer,intent(out)::istat
2359 integer i,pctr,pctr2,ierr
2360 character (len=48):: subname
2361 INTEGER, ALLOCATABLE, DIMENSION(:) :: procindex
2362 INTEGER :: allocok
2363 subname = "MUMPS_MAKE_PROPMAP"
2364 istat = -1
2365 ALLOCATE(procindex(cv_size_ind_proc),stat=allocok)
2366 IF (allocok > 0) THEN
2369 istat = cv_error_memalloc
2370 if(cv_lp.gt.0)
2371 & write(cv_lp,*) 'Memory allocation error in ',subname
2372 return
2373 ENDIF
2374 pctr=cv_n
2376 do i=1,cv_slavef
2377 call mumps_bit_set(procindex,i,ierr)
2378 if(ierr.ne.0) then
2379 if(cv_lp.gt.0)write(cv_lp,*)
2380 & 'MUMPS_BIT_SET signalled error to',subname
2381 istat = ierr
2382 GOTO 999
2383 end if
2384 end do
2385 do i=1,cv_n
2386 if(cv_frere(i).eq.0) then
2387 if(.NOT.associated(cv_prop_map(i)%ind_proc)) then
2388 call mumps_propmap_init(i,ierr)
2389 if(ierr.ne.0) then
2390 if(cv_lp.gt.0)
2391 & write(cv_lp,*)'PROPMAP_INIT signalled error to'
2392 & ,subname
2393 istat = ierr
2394 GOTO 999
2395 end if
2396 endif
2397 cv_prop_map(i)%ind_proc = procindex
2398 call mumps_propmap(i,pctr,ierr)
2399 if(ierr.ne.0) then
2400 if(cv_lp.gt.0)write(cv_lp,*)
2401 & 'PROPMAP signalled error to',subname
2402 istat = ierr
2403 GOTO 999
2404 endif
2405 if((cv_keep(24).eq.16).OR.(cv_keep(24).eq.18)) then
2406 call mumps_mod_propmap(i,pctr2,ierr)
2407 if(ierr.ne.0) then
2408 if(cv_lp.gt.0)write(cv_lp,*)
2409 & 'MOD_PROPMAP signalled error to',subname
2410 istat = ierr
2411 GOTO 999
2412 endif
2413 endif
2414 endif
2415 end do
2416 istat = 0
2417 999 CONTINUE
2418 DEALLOCATE(procindex)
2419 return
2420 end subroutine mumps_make_propmap
2421 subroutine mumps_map_layer(layernmb,thislayer,
2422 & nmb_thislayer,map_strat,istat)
2423 implicit none
2424 integer, intent(in)::layernmb,thislayer(:),
2425 & nmb_thislayer,map_strat
2426 integer,intent(out)::istat
2427 integer i,inode,j,k,ierr,nmb,aux_int,nmb_cand_needed
2428 DOUBLE PRECISION aux_flop,aux_mem
2429 INTEGER, ALLOCATABLE, DIMENSION(:) :: candid, sorted_nmb
2430 DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:) ::
2431 & sorted_costw, sorted_costm, old_workload, old_memused
2432 character (len=48):: err_rep,subname
2433 logical use_propmap
2434 istat=-1
2435 subname='MAP_LAYER'
2436 if((cv_keep(24).eq.8).OR.(cv_keep(24).eq.10)
2437 & .OR.(cv_keep(24).eq.12).OR.(cv_keep(24).eq.14)
2438 & .OR.(cv_keep(24).eq.16).OR.(cv_keep(24).eq.18)) then
2439 use_propmap=.true.
2440 else
2441 use_propmap=.false.
2442 endif
2443 if((layernmb.lt.0).or.(layernmb.gt.cv_maxlayer)) return
2444 if((map_strat.ne.cv_equilib_flops).and.
2445 & (map_strat.ne.cv_equilib_mem)) return
2446 ALLOCATE(candid(cv_slavef), sorted_nmb(2*nmb_thislayer),
2447 & sorted_costw(2*nmb_thislayer), sorted_costm(2*nmb_thislayer),
2448 & old_workload(cv_slavef), old_memused(cv_slavef), stat=allocok)
2449 if (allocok.gt.0) then
2451 cv_info(2) = 7*nmb_thislayer+2*cv_slavef
2452 istat = cv_error_memalloc
2453 if(cv_lp.gt.0)
2454 & write(cv_lp,*)'memory allocation error in ',subname
2455 goto 999
2456 end if
2457 do i=1,nmb_thislayer
2458 inode=thislayer(i)
2459 if (cv_nodetype(inode).eq.3) then
2460 cv_procnode(inode)=1
2461 exit
2462 end if
2463 end do
2464 do i=1,cv_slavef
2465 old_workload(i)=cv_layerworkload(i)
2466 old_memused(i)=cv_layermemused(i)
2467 enddo
2468 nmb=0
2469 do i=1,nmb_thislayer
2470 inode=thislayer(i)
2471 if(cv_nodetype(inode).eq.1) then
2472 nmb=nmb+1
2473 sorted_nmb(nmb)=inode
2474 sorted_costw(nmb)=cv_ncostw(inode)
2475 sorted_costm(nmb)=cv_ncostm(inode)
2476 else if(mumps_is_node_of_type2(inode)) then
2477 nmb=nmb+1
2478 do j=1,cv_layer_p2node(layernmb)%nmb_t2s
2479 if(cv_layer_p2node(layernmb)%t2_nodenumbers(j).ne.inode)
2480 & then
2481 cycle
2482 else
2483 sorted_costw(nmb)=
2484 & cv_layer_p2node(layernmb)%t2_candcostw(j)
2485 sorted_costm(nmb)=
2486 & cv_layer_p2node(layernmb)%t2_candcostm(j)
2487 endif
2488 enddo
2489 if((sorted_costw(nmb).eq.cv_d_invalid).OR.
2490 & (sorted_costm(nmb).eq.cv_d_invalid)) then
2491 if(cv_lp.gt.0)
2492 & write(cv_lp,*)'Error in ',subname
2493 goto 999
2494 end if
2495 if(sorted_costw(nmb).lt.cv_ncostw(inode))then
2496 sorted_costw(nmb)=cv_ncostw(inode)
2497 sorted_costm(nmb)=cv_ncostm(inode)
2498 sorted_nmb(nmb)=inode
2499 else
2500 sorted_nmb(nmb)=-inode
2501 endif
2502 else if(cv_nodetype(inode).eq.3) then
2503 cycle
2504 else
2505 if(cv_lp.gt.0)
2506 & write(cv_lp,*)'Unknown node type. Error in ',subname
2507 goto 999
2508 end if
2509 end do
2510 if (map_strat.eq.cv_equilib_flops) then
2511 call mumps_sort_msort(ierr,nmb,sorted_nmb(1:nmb),
2512 & sorted_costw(1:nmb),sorted_costm(1:nmb))
2513 elseif(map_strat.eq.cv_equilib_mem) then
2514 call mumps_sort_msort(ierr,nmb,sorted_nmb(1:nmb),
2515 & sorted_costm(1:nmb),sorted_costw(1:nmb))
2516 endif
2517 if(ierr.ne.0) then
2518 if(cv_lp.gt.0)
2519 & write(cv_lp,*)
2520 & 'Error reported by MUMPS_SORT_MSORT in ',subname
2521 istat = ierr
2522 goto 999
2523 endif
2524 do i=1,nmb
2525 aux_int=sorted_nmb(i)
2526 aux_flop=sorted_costw(i)
2527 aux_mem=sorted_costm(i)
2528 k=1
2529 if (aux_int.lt.0) then
2530 inode=-aux_int
2531 err_rep='SORTPROCS'
2532 if(use_propmap) then
2533 call mumps_sortprocs(map_strat,
2535 & inode=inode,istat=ierr)
2536 else
2537 call mumps_sortprocs(map_strat,
2539 & istat=ierr)
2540 end if
2541 if(ierr.ne.0) then
2542 if(cv_lp.gt.0)
2543 & write(cv_lp,*)
2544 & 'Error reported by ',err_rep,' in ',subname
2545 istat = ierr
2546 goto 999
2547 endif
2548 nmb_cand_needed=cv_invalid
2549 do j=1,cv_layer_p2node(layernmb)%nmb_t2s
2550 if(cv_layer_p2node(layernmb)%t2_nodenumbers(j).ne.inode)
2551 & then
2552 cycle
2553 else
2554 nmb_cand_needed=
2555 & cv_layer_p2node(layernmb)%t2_cand(j,cv_slavef+1)
2556 exit
2557 endif
2558 enddo
2559 if(nmb_cand_needed.eq.cv_invalid) then
2560 if(cv_lp.gt.0)
2561 & write(cv_lp,*)'Error in ',subname
2562 goto 999
2563 endif
2564 do while((k.le.cv_slavef).and.(nmb_cand_needed.gt.0))
2565 if(((.not.cv_constr_work).or.
2566 & (cv_proc_workload(cv_proc_sorted(k))+aux_flop.lt.
2568 & .AND.((.not.cv_constr_mem).or.
2569 & (cv_proc_memused(cv_proc_sorted(k))+aux_mem.lt.
2571 & .AND.
2572 & (cv_layer_p2node(layernmb)%t2_cand(j,cv_proc_sorted(k)).eq.0))
2573 & then
2575 & cv_proc_workload(cv_proc_sorted(k))+aux_flop
2577 & cv_proc_memused(cv_proc_sorted(k))+aux_mem
2578 cv_layer_p2node(layernmb)%t2_cand(j,cv_proc_sorted(k))
2579 & =inode
2581 & cv_layerworkload(cv_proc_sorted(k))+aux_flop
2583 & cv_layermemused(cv_proc_sorted(k))+aux_mem
2584 nmb_cand_needed=nmb_cand_needed-1
2585 k=k+1
2586 else
2587 k=k+1
2588 if(k.gt.cv_slavef) then
2589 if(cv_lp.gt.0)
2590 & write(cv_lp,*)'Error in ',subname
2591 goto 999
2592 endif
2593 end if
2594 end do
2595 if(nmb_cand_needed.gt.0) then
2596 if(cv_lp.gt.0)
2597 & write(cv_lp,*)'Error in ',subname
2598 goto 999
2599 endif
2600 aux_flop=cv_ncostw(inode)
2601 aux_mem=cv_ncostm(inode)
2602 do while(k.le.cv_slavef)
2603 if(((.not.cv_constr_work).or.
2604 & (cv_proc_workload(cv_proc_sorted(k))+aux_flop.lt.
2606 & .AND.((.not.cv_constr_mem).or.
2607 & (cv_proc_memused(cv_proc_sorted(k))+aux_mem.lt.
2609 & .AND.
2610 & (cv_layer_p2node(layernmb)%t2_cand(j,cv_proc_sorted(k)).eq.0))
2611 & then
2612 cv_procnode(inode)=cv_proc_sorted(k)
2614 & cv_proc_workload(cv_proc_sorted(k))+aux_flop
2616 & cv_proc_memused(cv_proc_sorted(k))+aux_mem
2617 cv_layer_p2node(layernmb)%t2_cand(j,cv_proc_sorted(k))
2618 & =-inode
2620 & cv_layerworkload(cv_proc_sorted(k))+aux_flop
2622 & cv_layermemused(cv_proc_sorted(k))+aux_mem
2623 exit
2624 else
2625 k=k+1
2626 if(k.gt.cv_slavef) then
2627 if(cv_lp.gt.0)
2628 & write(cv_lp,*)'Error in ',subname
2629 goto 999
2630 endif
2631 end if
2632 end do
2633 else
2634 inode=aux_int
2635 err_rep='SORTPROCS'
2636 if(use_propmap) then
2637 call mumps_sortprocs(map_strat,
2639 & inode=inode,istat=ierr)
2640 else
2641 call mumps_sortprocs(map_strat,
2643 & inode,istat=ierr)
2644 endif
2645 if(ierr.ne.0) then
2646 if(cv_lp.gt.0)
2647 & write(cv_lp,*)
2648 & 'Error reported by ',err_rep,' in ',subname
2649 istat = ierr
2650 goto 999
2651 endif
2652 if (cv_nodetype(inode).eq.1) then
2653 do while(k.le.cv_slavef)
2654 if((.not.cv_constr_work).or.
2655 & (cv_proc_workload(cv_proc_sorted(k))+aux_flop.lt.
2657 & .AND.((.not.cv_constr_mem).or.
2658 & (cv_proc_memused(cv_proc_sorted(k))+aux_mem.lt.
2659 & cv_proc_maxmem(cv_proc_sorted(k))))) then
2660 cv_procnode(inode)=cv_proc_sorted(k)
2662 & cv_proc_workload(cv_proc_sorted(k))+aux_flop
2664 & cv_proc_memused(cv_proc_sorted(k))+aux_mem
2666 & cv_layerworkload(cv_proc_sorted(k))+aux_flop
2668 & cv_layermemused(cv_proc_sorted(k))+aux_mem
2669 exit
2670 else
2671 k=k+1
2672 if(k.gt.cv_slavef) then
2673 if(cv_lp.gt.0)
2674 & write(cv_lp,*)'Inconsist data in ',subname
2675 goto 999
2676 endif
2677 end if
2678 end do
2679 elseif (mumps_is_node_of_type2(inode)) then
2680 do j=1,cv_layer_p2node(layernmb)%nmb_t2s
2681 if(cv_layer_p2node(layernmb)%t2_nodenumbers(j).ne.
2682 & inode) then
2683 cycle
2684 else
2685 exit
2686 endif
2687 enddo
2688 do while(k.le.cv_slavef)
2689 if(((.not.cv_constr_work).or.
2690 & (cv_proc_workload(cv_proc_sorted(k))+aux_flop.lt.
2692 & .AND.((.not.cv_constr_mem).or.
2693 & (cv_proc_memused(cv_proc_sorted(k))+aux_mem.lt.
2695 & .AND.
2696 & (cv_layer_p2node(layernmb)%t2_cand(j,cv_proc_sorted(k)).eq.0))
2697 & then
2698 cv_procnode(inode)=cv_proc_sorted(k)
2700 & cv_proc_workload(cv_proc_sorted(k))+aux_flop
2702 & cv_proc_memused(cv_proc_sorted(k))+aux_mem
2703 cv_layer_p2node(layernmb)%t2_cand(j,cv_proc_sorted(k))
2704 & =-inode
2706 & cv_layerworkload(cv_proc_sorted(k))+aux_flop
2708 & cv_layermemused(cv_proc_sorted(k))+aux_mem
2709 exit
2710 else
2711 k=k+1
2712 if(k.gt.cv_slavef) then
2713 if(cv_lp.gt.0)
2714 & write(cv_lp,*)'Error in ',subname
2715 goto 999
2716 endif
2717 end if
2718 end do
2719 nmb_cand_needed=cv_invalid
2720 do j=1,cv_layer_p2node(layernmb)%nmb_t2s
2721 if(cv_layer_p2node(layernmb)%t2_nodenumbers(j)
2722 & .ne.inode)
2723 & then
2724 cycle
2725 else
2726 nmb_cand_needed=
2727 & cv_layer_p2node(layernmb)%
2728 & t2_cand(j,cv_slavef+1)
2729 exit
2730 endif
2731 enddo
2732 if(nmb_cand_needed.eq.cv_invalid) then
2733 if(cv_lp.gt.0)
2734 & write(cv_lp,*)'Error in ',subname
2735 goto 999
2736 endif
2737 aux_flop=
2738 & cv_layer_p2node(layernmb)%t2_candcostw(j)
2739 aux_mem=
2740 & cv_layer_p2node(layernmb)%t2_candcostm(j)
2741 do while((k.le.cv_slavef).and.(nmb_cand_needed.gt.0))
2742 if(((.not.cv_constr_work).or.
2743 & (cv_proc_workload(cv_proc_sorted(k))+aux_flop.lt.
2745 & .AND.((.not.cv_constr_mem).or.
2746 & (cv_proc_memused(cv_proc_sorted(k))+aux_mem.lt.
2748 & .AND.
2749 & (cv_layer_p2node(layernmb)%
2750 & t2_cand(j,cv_proc_sorted(k)).eq.0))
2751 & then
2753 & cv_proc_workload(cv_proc_sorted(k))+aux_flop
2755 & cv_proc_memused(cv_proc_sorted(k))+aux_mem
2756 cv_layer_p2node(layernmb)%
2757 & t2_cand(j,cv_proc_sorted(k))
2758 & =inode
2760 & cv_layerworkload(cv_proc_sorted(k))+aux_flop
2762 & cv_layermemused(cv_proc_sorted(k))+aux_mem
2763 nmb_cand_needed=nmb_cand_needed-1
2764 k=k+1
2765 else
2766 k=k+1
2767 if(k.gt.cv_slavef) then
2768 if(cv_lp.gt.0)
2769 & write(cv_lp,*)'Error in ',subname
2770 goto 999
2771 endif
2772 end if
2773 end do
2774 if(nmb_cand_needed.gt.0) then
2775 if(cv_lp.gt.0)
2776 & write(cv_lp,*)'Error in ',subname
2777 goto 999
2778 endif
2779 end if
2780 end if
2781 end do
2782 do i=1,cv_layer_p2node(layernmb)%nmb_t2s
2783 nmb_cand_needed=
2784 & cv_layer_p2node(layernmb)%t2_cand(i,cv_slavef+1)
2785 candid= cv_layer_p2node(layernmb)%t2_cand(i,1:cv_slavef)
2786 cv_layer_p2node(layernmb)%t2_cand(i,1:cv_slavef)=-1
2787 k=0
2788 do j=1,cv_slavef
2789 if(candid(j).gt.0) then
2790 k=k+1
2791 cv_layer_p2node(layernmb)%t2_cand(i,k)=j-1
2792 end if
2793 end do
2794 if (k.ne.nmb_cand_needed) then
2795 if(cv_lp.gt.0)
2796 & write(cv_lp,*)'Error in ',subname
2797 goto 999
2798 endif
2799 enddo
2800 do i=1,cv_slavef
2801 cv_layerworkload(i)=cv_layerworkload(i)-old_workload(i)
2802 cv_layermemused(i)=cv_layermemused(i)-old_memused(i)
2803 enddo
2804 istat=0
2805 999 continue
2806 DEALLOCATE(candid, sorted_nmb, sorted_costw, sorted_costm,
2807 & old_workload, old_memused)
2808 return
2809 end subroutine mumps_map_layer
2810 recursive subroutine mumps_mapbelow(inode,procnmb,
2811 & procnode)
2812 integer,intent(in)::inode,procnmb
2813 integer,intent(inout)::procnode(:)
2814 integer in
2815 procnode(inode)=procnmb
2816 if (cv_fils(inode).eq.0) return
2817 in=cv_fils(inode)
2818 do while(in>0)
2819 procnode(in)=procnmb
2820 in=cv_fils(in)
2821 end do
2822 in=-in
2823 do while(in>0)
2824 call mumps_mapbelow(in,procnmb,procnode)
2825 in=cv_frere(in)
2826 end do
2827 return
2828 end subroutine mumps_mapbelow
2829 subroutine mumps_mapsubtree(procnode)
2830 implicit none
2831 integer,intent(inout)::procnode(:)
2832 integer i,inode,procnmb
2834 inode=cv_layerl0_array(i)
2835 if(inode.gt.0) then
2836 procnmb=procnode(inode)
2837 call mumps_mapbelow(inode,procnmb,procnode)
2838 endif
2839 enddo
2840 return
2841 end subroutine mumps_mapsubtree
2843 implicit none
2844 integer candid,inode,index,i,j,layernmb,master,nmbcand,swapper,
2845 & totalnmb,node_of_master,node_of_candid,node_of_swapper
2846 DOUBLE PRECISION::mastermem,slavemem,maxmem
2847 logical swapthem,cand_better_master_arch,cand_better_swapper_arch
2848 maxmem=maxval(cv_proc_memused(:))
2849 totalnmb=0
2850 do layernmb=cv_maxlayer,1,-1
2851 do i=1,cv_layer_p2node(layernmb)%nmb_t2s
2852 inode=cv_layer_p2node(layernmb)%t2_nodenumbers(i)
2853 master=cv_procnode(inode)
2854 if(ke69 .gt. 1) then
2855 allowed_nodes = .false.
2856 call mumps_fix_accepted_master(layernmb,i)
2857 node_of_master = mem_distribmpi(master-1)
2858 if (node_of_master .lt. 0 ) then
2859 if(cv_mp.gt.0) write(cv_mp,*)'node_of_master_not found'
2860 endif
2861 node_of_swapper = node_of_master
2862 endif
2863 mastermem=cv_proc_memused(master)
2864 nmbcand=cv_layer_p2node(layernmb)%t2_cand(i,cv_slavef+1)
2865 swapper=master
2866 index=0
2867 do j=1,nmbcand
2868 candid=cv_layer_p2node(layernmb)%t2_cand(i,j)+1
2869 slavemem=cv_proc_memused(candid)
2870 if(ke69 .gt. 1) then
2871 node_of_candid = mem_distribmpi(candid-1)
2872 if (node_of_candid .lt. 0 ) then
2873 if(cv_mp.gt.0) write(cv_mp,*)
2874 & 'node_of_candid_not found'
2875 endif
2876 endif
2877 if(ke69 .le. 1) then
2878 if((slavemem.lt.mastermem) .and.
2879 & (slavemem.lt.cv_proc_memused(swapper))) then
2880 swapper=candid
2881 index=j
2882 endif
2883 else
2884 cand_better_master_arch = (
2885 & (
2886 & (slavemem.lt.mastermem) .or.
2887 & (.not. allowed_nodes(node_of_master))
2888 & )
2889 & .and. allowed_nodes(node_of_candid)
2890 & )
2891 cand_better_swapper_arch = (
2892 & (
2893 & (slavemem.lt.cv_proc_memused(swapper)) .or.
2894 & (.not. allowed_nodes(node_of_swapper))
2895 & )
2896 & .and. allowed_nodes(node_of_candid)
2897 & )
2898 if(cand_better_master_arch .and.
2899 & cand_better_swapper_arch ) then
2900 swapper=candid
2901 node_of_swapper = node_of_candid
2902 index=j
2903 endif
2904 endif
2905 enddo
2906 if(swapper.ne.master) then
2907 swapthem = .false.
2908 if(0.75d0*mastermem.ge.cv_proc_memused(swapper))
2909 & swapthem=.true.
2910 if(mastermem.le.mastermem-cv_ncostm(inode)
2911 & +cv_layer_p2node(layernmb)%t2_candcostm(i))
2912 & swapthem=.false.
2913 if(mastermem.le.cv_proc_memused(swapper)
2914 & +cv_ncostm(inode)
2915 & -cv_layer_p2node(layernmb)%t2_candcostm(i))
2916 & swapthem=.false.
2917 if(maxmem.le.mastermem-cv_ncostm(inode)
2918 & +cv_layer_p2node(layernmb)%t2_candcostm(i))
2919 & swapthem=.false.
2920 if(maxmem.le.cv_proc_memused(swapper)+cv_ncostm(inode)
2921 & -cv_layer_p2node(layernmb)%t2_candcostm(i))
2922 & swapthem=.false.
2923 if(ke69 .gt. 1) then
2924 if (.not. allowed_nodes(node_of_master)) then
2925 swapthem=.true.
2926 endif
2927 endif
2928 if(.NOT.swapthem) cycle
2929 cv_proc_workload(master)=cv_proc_workload(master)
2930 & -cv_ncostw(inode)
2931 & +cv_layer_p2node(layernmb)%t2_candcostw(i)
2932 cv_proc_memused(master)=cv_proc_memused(master)
2933 & -cv_ncostm(inode)
2934 & +cv_layer_p2node(layernmb)%t2_candcostm(i)
2935 cv_proc_workload(swapper)=cv_proc_workload(swapper)
2936 & +cv_ncostw(inode)
2937 & -cv_layer_p2node(layernmb)%t2_candcostw(i)
2938 cv_proc_memused(swapper)=cv_proc_memused(swapper)
2939 & +cv_ncostm(inode)
2940 & -cv_layer_p2node(layernmb)%t2_candcostm(i)
2941 cv_layer_p2node(layernmb)%t2_cand(i,index)=master-1
2942 cv_procnode(inode)=swapper
2943 maxmem=maxval(cv_proc_memused(:))
2944 totalnmb = totalnmb+1
2945 endif
2946 enddo
2947 enddo
2948 end subroutine mumps_postprocess_mem
2949 subroutine mumps_procinit(maxwork,maxmem,istat)
2950 implicit none
2951 DOUBLE PRECISION,intent(in),OPTIONAL::maxwork(cv_slavef),
2952 & maxmem(cv_slavef)
2953 integer,intent(out)::istat
2954 integer i,allocok
2955 DOUBLE PRECISION dummy
2956 character (len=48):: subname
2957 istat=-1
2958 subname='PROCINIT'
2959 if(present(maxwork)) then
2960 cv_constr_work=.true.
2961 else
2962 cv_constr_work=.false.
2963 end if
2964 if(present(maxmem)) then
2965 cv_constr_mem=.true.
2966 else
2967 cv_constr_mem=.false.
2968 end if
2969 allocate(cv_proc_workload(cv_slavef),
2970 & cv_proc_maxwork(cv_slavef),
2971 & cv_proc_memused(cv_slavef),
2972 & cv_proc_maxmem(cv_slavef),
2973 & cv_proc_sorted(cv_slavef),
2974 & stat=allocok)
2975 if (allocok.gt.0) then
2977 cv_info(2) = 2*cv_slavef
2978 istat = cv_error_memalloc
2979 if(cv_lp.gt.0)
2980 & write(cv_lp,*)'memory allocation error in ',subname
2981 return
2982 end if
2983 allocate(work_per_proc(cv_slavef),id_son(cv_slavef),stat=allocok)
2984 if (allocok.gt.0) then
2986 cv_info(2) = 2*cv_slavef
2987 istat = cv_error_memalloc
2988 if(cv_lp.gt.0)
2989 & write(cv_lp,*)'memory allocation error in ',subname
2990 return
2991 end if
2992 do i=1,cv_slavef
2993 cv_proc_workload(i)=dble(0)
2994 if(cv_constr_work) then
2995 cv_proc_maxwork(i)=maxwork(i)
2996 else
2997 cv_proc_maxwork(i)=(huge(dummy))
2998 endif
2999 cv_proc_memused(i)=dble(0)
3000 if(cv_constr_mem) then
3001 cv_proc_maxmem(i)=maxmem(i)
3002 else
3003 cv_proc_maxmem(i)=(huge(dummy))
3004 endif
3005 end do
3006 do i=1, cv_slavef
3007 cv_proc_sorted(i)=i
3008 enddo
3009 istat=0
3010 return
3011 end subroutine mumps_procinit
3012 recursive subroutine mumps_mod_propmap
3013 & (inode_entry,ctr_entry,istat)
3014 implicit none
3015 integer, intent(in)::inode_entry,ctr_entry
3016 integer, intent(inout)::istat
3017 integer::j,k,in,in1,ierr,son,nmb_procs_inode,nmb_sons_inode,
3018 & current,i
3019 INTEGER, ALLOCATABLE, DIMENSION(:) :: procs4son
3020 INTEGER :: allocok
3021 character (len=48):: subname
3022 DOUBLE PRECISION :: relative_weight,costs_sons
3023 DOUBLE PRECISION :: loc_relax
3024 INTEGER :: depth
3025 INTEGER :: inode,ctr
3026 logical force_cand
3027 DOUBLE PRECISION y
3028 integer nmb_propmap_strict,share2,procsrest,current2
3029 integer k69onid
3030 INTEGER, ALLOCATABLE, DIMENSION(:) :: procs_inode
3031 LOGICAL update_ctr
3032 inode = inode_entry
3033 ctr = ctr_entry
3034 1234 CONTINUE
3035 if (ctr.le.0) then
3036 istat = 0
3037 return
3038 endif
3039 istat= -1
3040 if(cv_frere(inode).eq.cv_n+1) return
3041 subname='MOD_PROPMAP'
3042 if(.NOT.associated(cv_prop_map(inode)%ind_proc)) return
3043 nmb_sons_inode = 0
3044 costs_sons = dble(0)
3045 force_cand=(mod(cv_keep(24),2).eq.0)
3046 in = inode
3047 do while (cv_fils(in).gt.0)
3048 in=cv_fils(in)
3049 end do
3050 if (cv_fils(in).eq.0) then
3051 istat = 0
3052 goto 999
3053 endif
3054 in = -cv_fils(in)
3055 son=in
3056 do while(in.gt.0)
3057 nmb_sons_inode = nmb_sons_inode + 1
3058 if(cv_tcostw(in).le.0.0d0) then
3059 if(cv_lp.gt.0)
3060 & write(cv_lp,*)'Subtree costs for ',in,
3061 & ' should be positive in ',subname
3062 goto 999
3063 endif
3064 if (cv_keep(67) .ne. 1) then
3065 costs_sons = costs_sons + cv_tcostw(in)
3066 else
3067 costs_sons = costs_sons + cv_tcostm(in)
3068 end if
3069 in=cv_frere(in)
3070 enddo
3071 if(costs_sons.le.0d0) then
3072 if(cv_lp.gt.0)
3073 & write(cv_lp,*)'Error in ',subname
3074 & ,subname
3075 goto 999
3076 endif
3077 if ((cv_nodelayer(inode).eq.0).AND.
3078 & (cv_frere(inode).ne.cv_n+1)) then
3079 istat = 0
3080 goto 999
3081 endif
3082 IF (nmb_sons_inode.eq.1) THEN
3083 if(.NOT.associated(cv_prop_map(son)%ind_proc)) then
3084 WRITE(6,*) son, " cv_prop_map(son)%ind_proc not associated "
3085 endif
3086 cv_prop_map(son)%ind_proc = cv_prop_map(inode)%ind_proc
3087 inode = son
3088 GOTO 1234
3089 ENDIF
3090 ALLOCATE(procs_inode(cv_slavef),
3091 & procs4son(cv_size_ind_proc),stat=allocok)
3092 if (allocok.gt.0) then
3095 istat = cv_error_memalloc
3096 if(cv_lp.gt.0)
3097 & write(cv_lp,*)'memory allocation error in ',subname
3098 return
3099 end if
3100 procs_inode=-1
3101 nmb_procs_inode = 0
3102 do j=1,cv_slavef
3103 if( mumps_bit_get4proc(inode,j))then
3104 nmb_procs_inode = nmb_procs_inode + 1
3105 endif
3106 end do
3107 i=0
3108 do j=1,cv_slavef
3109 if(ke69 .gt.1) then
3110 call mumps_get_idp1_proc(j-1,
3111 & k69onid,ierr)
3112 else
3113 k69onid = j
3114 endif
3115 if(mumps_bit_get4proc(inode,k69onid))then
3116 i = i + 1
3117 procs_inode(i)=k69onid
3118 endif
3119 end do
3120 if(i.ne.nmb_procs_inode)then
3121 if(cv_lp.gt.0)
3122 & write(cv_lp,*)'Error in ',subname
3123 & ,subname
3124 goto 999
3125 endif
3126 if(nmb_procs_inode.eq.0) then
3127 if(cv_lp.gt.0)
3128 & write(cv_lp,*)'Error in ',subname
3129 & ,subname
3130 goto 999
3131 end if
3132 depth= max(cv_mixed_strat_bound - ctr,0)
3133 if ((cv_keep(24).eq.16).OR.(cv_keep(24).eq.18)) then
3134 if(depth.ge.cv_mixed_strat_bound) then
3135 loc_relax = dble(1)
3136 else
3137 loc_relax = dble(1) +
3138 & max(dble(cv_keep(77))/dble(100), dble(0))
3139 endif
3140 else
3141 loc_relax = dble(1)
3142 endif
3143 in=son
3144 current = 1
3145 do while(in.gt.0)
3146 update_ctr = .true.
3147 if( ( (nmb_sons_inode.ge.nmb_procs_inode).AND.
3148 & (nmb_procs_inode.LT.4) )
3149 & .OR. ( nmb_sons_inode.EQ.1 )
3150 & ) then
3151 procs4son = cv_prop_map(inode)%ind_proc
3152 IF (nmb_sons_inode.EQ.1) update_ctr=.false.
3153 else
3154 do k=1,cv_size_ind_proc
3155 do j=0,cv_bitsize_of_int-1
3156 procs4son(k)=ibclr(procs4son(k),j)
3157 end do
3158 end do
3159 nmb_propmap_strict=0
3160 do k=1,cv_slavef
3161 if( mumps_bit_get4proc(in,k)) then
3162 nmb_propmap_strict=nmb_propmap_strict+1
3163 call mumps_bit_set(procs4son,k,ierr)
3164 end if
3165 end do
3166 if(costs_sons.gt.0.0d0) then
3167 if (cv_keep(67) .ne. 1) then
3168 relative_weight=cv_tcostw(in)/costs_sons
3169 else
3170 relative_weight=cv_tcostm(in)/costs_sons
3171 endif
3172 else
3173 relative_weight=0.0d0
3174 endif
3175 current = nmb_propmap_strict
3176 share2=
3177 & max(0,nint(relative_weight*(loc_relax-dble(1))*
3178 & dble(nmb_procs_inode)))
3179 procsrest=nmb_procs_inode - nmb_propmap_strict
3180 share2=min(share2,procsrest)
3181 CALL random_number(y)
3182 current2=int(dble(y)*dble(procsrest))
3183 k=1
3184 i=1
3185 do while((share2.gt.0).and.(i.le.2))
3186 do j=1,nmb_procs_inode
3187 if(share2.le.0) exit
3188 k69onid = procs_inode(j)
3189 if(( mumps_bit_get4proc(inode,k69onid)).AND.
3190 & (.NOT.mumps_bit_get(procs4son,k69onid))) then
3191 if(k.ge.current2)then
3192 call mumps_bit_set(procs4son,k69onid,ierr)
3193 if(ierr.ne.0) then
3194 if(cv_lp.gt.0)write(cv_lp,*)
3195 & 'BIT_SET signalled error to',subname
3196 istat = ierr
3197 goto 999
3198 end if
3199 share2 = share2 - 1
3200 endif
3201 k=k+1
3202 end if
3203 enddo
3204 i=i+1
3205 enddo
3206 if(share2.ne.0) then
3207 if(cv_lp.gt.0) write(cv_lp,*)
3208 & 'Error reported in ',subname
3209 goto 999
3210 end if
3211 end if
3212 ierr=0
3213 in1=in
3214 cv_prop_map(in1)%ind_proc=procs4son
3215 IF (update_ctr) THEN
3216 call mumps_mod_propmap(in1,ctr-1,ierr)
3217 ELSE
3218 call mumps_mod_propmap(in1,ctr,ierr)
3219 ENDIF
3220 if(ierr.ne.0) then
3221 if(cv_lp.gt.0) write(cv_lp,*)
3222 & 'Error reported in ',subname
3223 istat=ierr
3224 goto 999
3225 endif
3226 in=cv_frere(in)
3227 end do
3228 istat = 0
3229 999 continue
3230 if (allocated(procs_inode)) DEALLOCATE(procs_inode)
3231 if (allocated(procs4son)) DEALLOCATE(procs4son)
3232 return
3233 end subroutine mumps_mod_propmap
3234 recursive subroutine mumps_propmap(inode_entry, ctr_entry, istat)
3235 implicit none
3236 integer, intent(in)::inode_entry,ctr_entry
3237 integer, intent(inout)::istat
3238 integer::j,k,in,in1,ierr,son,nmb_procs_inode,nmb_sons_inode,
3239 & share,current,offset,
3240 & in_tmp,nfront,npiv,ncb,
3241 & keep48_loc,min_cand_needed
3242 integer, dimension(:), allocatable :: procs4son
3243 character (len=48):: subname
3244 DOUBLE PRECISION :: relative_weight,costs_sons, shtemp
3245 DOUBLE PRECISION :: costs_sons_real
3246 DOUBLE PRECISION :: partofaproc
3247 LOGICAL :: skipsmallnodes
3248 parameter(partofaproc=0.01d0)
3249 DOUBLE PRECISION :: loc_relax
3250 INTEGER :: depth
3251 logical force_cand
3254 DOUBLE PRECISION y
3255 integer nmb_propmap_strict,share2,procsrest,current2
3256 integer k69onid,nb_free_procs,local_son_indice,nb_procs_for_sons,
3257 & ptr_upper_ro_procs
3258 integer :: inode, ctr
3259 INTEGER :: allocok
3260 logical upper_round_off,are_sons_treated
3261 DOUBLE PRECISION tmp_cost
3262 inode = inode_entry
3263 ctr = ctr_entry
3264 1234 CONTINUE
3265 if (ctr.le.0) then
3266 istat = 0
3267 return
3268 endif
3269 istat= -1
3270 if(cv_frere(inode).eq.cv_n+1) return
3271 subname='PROPMAP'
3272 nmb_procs_inode = 0
3273 do j=1,cv_slavef
3274 if( mumps_bit_get4proc(inode,j))
3275 & nmb_procs_inode = nmb_procs_inode + 1
3276 end do
3277 if(nmb_procs_inode.eq.0) then
3278 if(cv_lp.gt.0)
3279 & write(cv_lp,*)'Error in ',subname
3280 & ,subname
3281 return
3282 end if
3283 if ((cv_nodelayer(inode).eq.0).AND.
3284 & (cv_frere(inode).ne.cv_n+1)) then
3285 istat = 0
3286 return
3287 endif
3288 ptr_upper_ro_procs=1
3289 work_per_proc(1:cv_slavef)=0.0d0
3290 id_son(1:cv_slavef)=0
3291 nmb_sons_inode = 0
3292 costs_sons = dble(0)
3293 force_cand=(mod(cv_keep(24),2).eq.0)
3294 min_cand_needed=0
3295 in = inode
3296 do while (cv_fils(in).gt.0)
3297 in=cv_fils(in)
3298 end do
3299 if (cv_fils(in).eq.0) then
3300 istat = 0
3301 return
3302 endif
3303 in = -cv_fils(in)
3304 son=in
3305 do while(in.gt.0)
3306 nmb_sons_inode = nmb_sons_inode + 1
3307 if(cv_tcostw(in).le.0.0d0) then
3308 if(cv_lp.gt.0)
3309 & write(cv_lp,*)'Subtree costs for ',in,
3310 & ' should be positive in ',subname
3311 return
3312 endif
3313 if (cv_keep(67) .ne. 1) then
3314 costs_sons = costs_sons + cv_tcostw(in)
3315 else
3316 costs_sons = costs_sons + cv_tcostm(in)
3317 endif
3318 in=cv_frere(in)
3319 enddo
3320 IF (nmb_sons_inode.eq.1) THEN
3321 if(.NOT.associated(cv_prop_map(son)%ind_proc)) then
3322 call mumps_propmap_init(son,ierr)
3323 if(ierr.ne.0) then
3324 if(cv_lp.gt.0)
3325 & write(cv_lp,*)'PROPMAP_INIT signalled error to'
3326 & ,subname
3327 istat = ierr
3328 goto 999
3329 end if
3330 endif
3331 ctr = ctr -1
3332 cv_prop_map(son)%ind_proc = cv_prop_map(inode)%ind_proc
3333 inode = son
3334 GOTO 1234
3335 ENDIF
3336 costs_sons_real = costs_sons
3337 skipsmallnodes = .true.
3338 IF (costs_sons_real.gt.0.0d0) then
3339 in = son
3340 do while (in.gt.0)
3341 if (cv_keep(67) .ne. 1) then
3342 relative_weight=cv_tcostw(in)/costs_sons_real
3343 else
3344 relative_weight=cv_tcostm(in)/costs_sons_real
3345 endif
3346 shtemp = relative_weight*dble(nmb_procs_inode)
3347 IF (shtemp.lt.partofaproc) THEN
3348 if (cv_keep(67) .ne. 1) then
3349 costs_sons = costs_sons - cv_tcostw(in)
3350 else
3351 costs_sons = costs_sons - cv_tcostm(in)
3352 endif
3353 ENDIF
3354 in=cv_frere(in)
3355 enddo
3356 IF (costs_sons.LT. partofaproc*costs_sons_real) THEN
3357 costs_sons = costs_sons_real
3358 skipsmallnodes = .false.
3359 ENDIF
3360 ENDIF
3361 if(costs_sons.le.0.0d0) then
3362 if(cv_lp.gt.0)
3363 & write(cv_lp,*)'Error in ',subname
3364 & ,subname
3365 return
3366 endif
3367 if(cv_relax.le.0.0d0) then
3368 if(cv_lp.gt.0)
3369 & write(cv_lp,*)'Error in ',subname,'. Wrong cv_relax'
3370 return
3371 endif
3372 ALLOCATE(procs4son(cv_size_ind_proc),stat=allocok)
3373 IF (allocok .GT. 0) THEN
3376 istat = cv_error_memalloc
3377 if(cv_lp.gt.0)
3378 & write(cv_lp,*)
3379 & 'Memory allocation error in ',subname
3380 return
3381 ENDIF
3382 depth= max(cv_n - ctr,0)
3383 if(cv_keep(24).eq.8) then
3384 loc_relax = cv_relax
3385 elseif ((cv_keep(24).eq.16).OR.(cv_keep(24).eq.18)) then
3386 loc_relax = cv_relax
3387 elseif (cv_keep(24).eq.10) then
3388 loc_relax = cv_relax
3389 elseif ((cv_keep(24).eq.12).OR.(cv_keep(24).eq.14)) then
3390 if(depth.ge.cv_mixed_strat_bound) then
3391 loc_relax = cv_relax
3392 else
3393 loc_relax = cv_relax +
3394 & max(dble(cv_keep(77))/dble(100), dble(0))
3395 endif
3396 endif
3397 in=son
3398 current = 1
3399 local_son_indice=1
3400 nb_procs_for_sons=0
3401 upper_round_off=.false.
3402 are_sons_treated=.true.
3403 do while(in.gt.0)
3404 if( (nmb_sons_inode.ge.nmb_procs_inode).AND.
3405 & (nmb_procs_inode.LT.4) ) then
3406 procs4son = cv_prop_map(inode)%ind_proc
3407 are_sons_treated=.false.
3408 nb_procs_for_sons=nmb_procs_inode
3409 nmb_propmap_strict=nmb_procs_inode
3410 elseif(nmb_procs_inode .LE. cv_keep(83)) then
3411 procs4son = cv_prop_map(inode)%ind_proc
3412 are_sons_treated=.false.
3413 nb_procs_for_sons=nmb_procs_inode
3414 nmb_propmap_strict=nmb_procs_inode
3415 else
3416 do k=1,cv_size_ind_proc
3417 do j=0,cv_bitsize_of_int-1
3418 procs4son(k)=ibclr(procs4son(k),j)
3419 end do
3420 end do
3421 if(costs_sons.gt.0.0d0) then
3422 if (cv_keep(67) .ne. 1) then
3423 relative_weight=cv_tcostw(in)/costs_sons
3424 else
3425 relative_weight=cv_tcostm(in)/costs_sons
3426 endif
3427 else
3428 relative_weight=dble(0)
3429 endif
3430 shtemp = relative_weight*dble(nmb_procs_inode)
3431 IF ( (shtemp.LT.partofaproc)
3432 & .AND. ( skipsmallnodes ) ) THEN
3433 share = 1
3434 do j=current,cv_slavef
3435 if(ke69 .gt.1) then
3436 call mumps_get_idp1_proc(j-1,k69onid,ierr)
3437 else
3438 k69onid = j
3439 endif
3440 if( mumps_bit_get4proc(inode,k69onid)) then
3441 call mumps_bit_set(procs4son,k69onid,ierr)
3442 if(ierr.ne.0) then
3443 if(cv_lp.gt.0)write(cv_lp,*)
3444 & 'BIT_SET signalled error to',subname
3445 istat = ierr
3446 goto 999
3447 end if
3448 share = share -1
3449 exit
3450 endif
3451 enddo
3452 if (share.gt.0) then
3453 do j=1,current-1
3454 if(ke69 .gt.1) then
3455 call mumps_get_idp1_proc(j-1,k69onid,ierr)
3456 else
3457 k69onid = j
3458 endif
3459 if( mumps_bit_get4proc(inode,k69onid)) then
3460 call mumps_bit_set(procs4son,k69onid,ierr)
3461 if(ierr.ne.0) then
3462 if(cv_lp.gt.0)write(cv_lp,*)
3463 & 'BIT_SET signalled error to',subname
3464 istat = ierr
3465 goto 999
3466 end if
3467 share = share -1
3468 exit
3469 endif
3470 enddo
3471 endif
3472 if(share.ne.0) then
3473 if(cv_lp.gt.0) write(cv_lp,*)
3474 & 'Error reported in ',subname
3475 goto 999
3476 end if
3477 if(.NOT.associated(cv_prop_map(in)%ind_proc)) then
3478 call mumps_propmap_init(in,ierr)
3479 if(ierr.ne.0) then
3480 if(cv_lp.gt.0)
3481 & write(cv_lp,*)'PROPMAP_INIT signalled error to'
3482 & ,subname
3483 istat = ierr
3484 goto 999
3485 end if
3486 endif
3487 current = j
3488 cv_prop_map(in)%ind_proc = procs4son
3489 in = cv_frere(in)
3490 cycle
3491 ENDIF
3492 share = max(1,nint(shtemp))
3493 if (dble(share).ge.shtemp) then
3494 upper_round_off=.true.
3495 else
3496 upper_round_off = .false.
3497 endif
3498 share=min(share,nmb_procs_inode)
3499 nmb_propmap_strict=share
3500 nb_procs_for_sons=nb_procs_for_sons+nmb_propmap_strict
3501 offset=1
3502 do j=current,cv_slavef
3503 if(ke69 .gt.1) then
3504 call mumps_get_idp1_proc(j-1,k69onid,ierr)
3505 else
3506 k69onid = j
3507 endif
3508 if( mumps_bit_get4proc(inode,k69onid)) then
3509 call mumps_bit_set(procs4son,k69onid,ierr)
3510 if(ierr.ne.0) then
3511 if(cv_lp.gt.0)write(cv_lp,*)
3512 & 'BIT_SET signalled error to',subname
3513 istat = ierr
3514 goto 999
3515 end if
3516 share = share-1
3517 if(share.le.0) then
3518 current = j + offset
3519 if(current.gt.cv_slavef) current = 1
3520 exit
3521 end if
3522 end if
3523 end do
3524 if(share.gt.0) then
3525 do j=1,current-1
3526 if(ke69 .gt.1) then
3527 call mumps_get_idp1_proc(j-1,k69onid,ierr)
3528 else
3529 k69onid = j
3530 endif
3531 if( mumps_bit_get4proc(inode,k69onid)) then
3532 call mumps_bit_set(procs4son,k69onid,ierr)
3533 if(ierr.ne.0) then
3534 if(cv_lp.gt.0)write(cv_lp,*)
3535 & 'BIT_SET signalled error to',subname
3536 istat = ierr
3537 goto 999
3538 end if
3539 share = share-1
3540 if(share.le.0) then
3541 current = j + offset
3542 if(current.gt.cv_slavef) current = 1
3543 exit
3544 end if
3545 end if
3546 end do
3547 endif
3548 if(share.ne.0) then
3549 if(cv_lp.gt.0) write(cv_lp,*)
3550 & 'Error reported in ',subname
3551 goto 999
3552 end if
3553 if(.not.upper_round_off)then
3554 if(local_son_indice.lt.cv_slavef)then
3555 id_son(local_son_indice)=in
3556 if ( cv_keep(67) .ne. 1 ) then
3557 work_per_proc(local_son_indice)=cv_tcostw(in)/
3558 & dble(nmb_propmap_strict)
3559 else
3560 work_per_proc(local_son_indice)=cv_tcostm(in)/
3561 & dble(nmb_propmap_strict)
3562 endif
3563 local_son_indice=local_son_indice+1
3564 if(local_son_indice.eq.cv_slavef)then
3565 CALL mumps_sort_msort(ierr,cv_slavef,id_son,
3566 & work_per_proc)
3567 if(ierr.ne.0) then
3568 if(cv_lp.gt.0)
3569 & write(cv_lp,*)
3570 & 'Error reported by MUMPS_SORT_MSORT in ',subname
3571 istat = ierr
3572 goto 999
3573 endif
3574 endif
3575 else
3576 current2=cv_slavef
3577 if (cv_keep(67) .ne.1) then
3578 tmp_cost=cv_tcostw(in)/dble(nmb_propmap_strict)
3579 else
3580 tmp_cost=cv_tcostm(in)/dble(nmb_propmap_strict)
3581 endif
3582 do while(current2.ge.1)
3583 if(tmp_cost.lt.work_per_proc(current2))exit
3584 current2=current2-1
3585 enddo
3586 if(current2.ne.cv_slavef)then
3587 if(current2.eq.0)then
3588 current2=1
3589 endif
3590 do j=cv_slavef-1,current2,-1
3591 id_son(j+1)=id_son(j)
3592 work_per_proc(j+1)=work_per_proc(j)
3593 enddo
3594 id_son(current2)=in
3595 work_per_proc(current2)=tmp_cost
3596 endif
3597 endif
3598 endif
3599 upper_round_off=.false.
3600 endif
3601 if(.NOT.associated(cv_prop_map(in)%ind_proc)) then
3602 call mumps_propmap_init(in,ierr)
3603 if(ierr.ne.0) then
3604 if(cv_lp.gt.0)
3605 & write(cv_lp,*)'PROPMAP_INIT signalled error to'
3606 & ,subname
3607 istat = ierr
3608 goto 999
3609 end if
3610 endif
3611 cv_prop_map(in)%ind_proc = procs4son
3612 in=cv_frere(in)
3613 end do
3614 if(are_sons_treated)then
3615 if(nb_procs_for_sons.ne.nmb_procs_inode)then
3616 do j=1,nmb_procs_inode-nb_procs_for_sons
3617 procs4son=cv_prop_map(id_son(j))%ind_proc
3618 do while(current.le.cv_slavef)
3619 if(ke69 .gt.1) then
3620 call mumps_get_idp1_proc(current-1,k69onid,ierr)
3621 else
3622 k69onid = current
3623 endif
3624 if(.NOT.mumps_bit_get4proc(inode,k69onid)) then
3625 current=current+1
3626 else
3627 exit
3628 endif
3629 enddo
3630 call mumps_bit_set(procs4son,k69onid,ierr)
3631 cv_prop_map(id_son(j))%ind_proc=procs4son
3632 enddo
3633 ptr_upper_ro_procs=min(j,nmb_procs_inode-nb_procs_for_sons)
3634 endif
3635 endif
3636 in=son
3637 current = 1
3638 do while(in.gt.0)
3639 if( (nmb_sons_inode.ge.nmb_procs_inode).AND.
3640 & (nmb_procs_inode.LT.4) ) then
3641 procs4son = cv_prop_map(inode)%ind_proc
3642 elseif(nmb_procs_inode .LE. cv_keep(83)) then
3643 procs4son = cv_prop_map(inode)%ind_proc
3644 else
3645 procs4son = cv_prop_map(in)%ind_proc
3646 in_tmp=in
3647 nfront=cv_nfsiz(in_tmp)
3648 npiv=0
3649 in_tmp=in_tmp
3650 do while(in_tmp.gt.0)
3651 if (cv_blkon) then
3652 npiv = npiv + cv_sizeofblocks(in_tmp)
3653 else
3654 npiv=npiv+1
3655 endif
3656 in_tmp=cv_fils(in_tmp)
3657 end do
3658 ncb=nfront-npiv
3659 if (force_cand) then
3660 if (cv_keep(50) == 0) then
3661 keep48_loc=0
3662 else
3663 keep48_loc=3
3664 endif
3665 if (cv_keep(48).EQ.5) keep48_loc = 5
3666 min_cand_needed=
3668 & (cv_slavef, keep48_loc,cv_keep8(21),
3669 & cv_keep(50),
3670 & nfront,ncb,
3671 & cv_keep(375), cv_keep(119))
3672 min_cand_needed=min(cv_slavef,min_cand_needed+1)
3673 else
3674 min_cand_needed = 0
3675 endif
3676 min_cand_needed = max(min_cand_needed, cv_keep(91))
3677 if(costs_sons.gt.0.0d0) then
3678 if (cv_keep(67) .ne.1) then
3679 relative_weight=cv_tcostw(in)/costs_sons
3680 else
3681 relative_weight=cv_tcostm(in)/costs_sons
3682 endif
3683 else
3684 relative_weight=dble(0)
3685 endif
3686 nmb_propmap_strict=0
3687 do k=1,cv_slavef
3688 if( mumps_bit_get(procs4son,k)) then
3689 nmb_propmap_strict=nmb_propmap_strict+1
3690 end if
3691 end do
3692 offset=1
3693 share2=
3694 & max(0,nint(relative_weight*(loc_relax-dble(1))*
3695 & dble(nmb_procs_inode)))
3696 share2 = max(share2, min_cand_needed -nmb_propmap_strict,
3697 & (cv_keep(83)/2) - nmb_propmap_strict)
3698 procsrest=nmb_procs_inode - nmb_propmap_strict
3699 share2=min(share2,procsrest)
3700 share2 = 0
3701 CALL random_number(y)
3702 current2 =int(dble(y)*dble(procsrest))
3703 nb_free_procs=1
3704 do j=1,cv_slavef
3705 if(share2.le.0) exit
3706 if(ke69 .gt.1) then
3707 call mumps_get_idp1_proc(j-1,k69onid,ierr)
3708 else
3709 k69onid = j
3710 endif
3711 if(( mumps_bit_get4proc(inode,k69onid)).AND.
3712 & (.NOT.mumps_bit_get(procs4son,k69onid))) then
3713 if(nb_free_procs.ge.current2)then
3714 call mumps_bit_set(procs4son,k69onid,ierr)
3715 if(ierr.ne.0) then
3716 if(cv_lp.gt.0)write(cv_lp,*)
3717 & 'BIT_SET signalled error to',subname
3718 istat = ierr
3719 goto 999
3720 end if
3721 share2 = share2 - 1
3722 endif
3723 nb_free_procs=nb_free_procs+1
3724 end if
3725 end do
3726 if(share2.gt.0) then
3727 do j=1,cv_slavef
3728 if(share2.le.0) exit
3729 if(ke69 .gt.1) then
3730 call mumps_get_idp1_proc(j-1,k69onid,ierr)
3731 else
3732 k69onid = j
3733 endif
3734 if(( mumps_bit_get4proc(inode,k69onid)).AND.
3735 & (.NOT.mumps_bit_get(procs4son,k69onid))) then
3736 call mumps_bit_set(procs4son,k69onid,ierr)
3737 if(ierr.ne.0) then
3738 if(cv_lp.gt.0)write(cv_lp,*)
3739 & 'BIT_SET signalled error to',subname
3740 istat = ierr
3741 goto 999
3742 end if
3743 share2 = share2 - 1
3744 end if
3745 end do
3746 endif
3747 if(share2.ne.0) then
3748 if(cv_lp.gt.0) write(cv_lp,*)
3749 & 'Error reported in ',subname
3750 goto 999
3751 end if
3752 endif
3753 ierr=0
3754 in1=in
3755 cv_prop_map(in1)%ind_proc = procs4son
3756 IF (nmb_sons_inode.EQ.1) DEALLOCATE(procs4son)
3757 call mumps_propmap(in1,ctr-1,ierr)
3758 if(ierr.ne.0) then
3759 if(cv_lp.gt.0) write(cv_lp,*)
3760 & 'Error reported in ',subname
3761 istat=ierr
3762 goto 999
3763 endif
3764 in=cv_frere(in)
3765 end do
3766 istat = 0
3767 999 CONTINUE
3768 if (allocated(procs4son)) DEALLOCATE(procs4son)
3769 return
3770 end subroutine mumps_propmap
3771 subroutine mumps_propmap_init(inode,istat)
3772 implicit none
3773 integer, intent(in)::inode
3774 integer, intent(out)::istat
3775 integer j,k,allocok
3776 character (len=48):: subname
3777 istat = -1
3778 if(cv_frere(inode).eq.cv_n+1) return
3779 subname='PROPMAP_INIT'
3780 if(.not.associated(
3781 & cv_prop_map(inode)%ind_proc)) then
3782 allocate(cv_prop_map(inode)%ind_proc
3783 & (cv_size_ind_proc),stat=allocok)
3784 if (allocok.gt.0) then
3787 istat = cv_error_memalloc
3788 if(cv_lp.gt.0)
3789 & write(cv_lp,*)
3790 & 'memory allocation error in ',subname
3791 return
3792 end if
3793 end if
3794 do k=1,cv_size_ind_proc
3795 do j=0,cv_bitsize_of_int-1
3796 cv_prop_map(inode)%ind_proc(k)=
3797 & ibclr(cv_prop_map(inode)%ind_proc(k),j)
3798 end do
3799 end do
3800 istat = 0
3801 return
3802 end subroutine mumps_propmap_init
3803 subroutine mumps_propmap_term(inode,istat)
3804 integer,intent(in)::inode
3805 integer,intent(out)::istat
3806 integer ierr
3807 character (len=48):: subname
3808 subname='PROPMAP_TERM'
3809 istat =-1
3810 if(associated(cv_prop_map(inode)%ind_proc)) then
3811 deallocate(cv_prop_map(inode)%ind_proc, stat=ierr)
3812 if(ierr.ne.0) then
3813 if(cv_lp.gt.0)
3814 & write(cv_lp,*)'Memory deallocation error in ', subname
3815 istat = cv_error_memdeloc
3816 return
3817 endif
3818 nullify(cv_prop_map(inode)%ind_proc)
3819 end if
3820 istat =0
3821 return
3822 end subroutine mumps_propmap_term
3823 subroutine mumps_propmap4split(inode,ifather,istat)
3824 implicit none
3825 integer,intent(in)::inode,ifather
3826 integer,intent(out)::istat
3827 character (len=48):: subname
3828 istat= -1
3829 subname='PROPMAP4SPLIT'
3830 if((cv_frere(inode).eq.cv_n+1).OR.(cv_frere(ifather).eq.cv_n+1)
3831 & .OR.(.NOT.associated(cv_prop_map(inode)%ind_proc))) then
3832 if(cv_lp.gt.0)
3833 & write(cv_lp,*)'tototo signalled error to'
3834 & ,subname
3835 return
3836 endif
3837 if(.NOT.associated(cv_prop_map(ifather)%ind_proc)) then
3838 call mumps_propmap_init(ifather,ierr)
3839 if(ierr.ne.0) then
3840 if(cv_lp.gt.0)
3841 & write(cv_lp,*)'PROPMAP_INIT signalled error to '
3842 & ,subname
3843 istat = ierr
3844 return
3845 end if
3846 endif
3847 cv_prop_map(ifather)%ind_proc =
3848 & cv_prop_map(inode)%ind_proc
3849 istat=0
3850 return
3851 end subroutine mumps_propmap4split
3852 subroutine mumps_rootlist(istat)
3853 implicit none
3854 integer,intent(out)::istat
3855 integer i,allocok
3856 character (len=48):: subname
3857 istat=-1
3858 subname='ROOTLIST'
3860 & cv_layerl0_sorted_costw(cv_maxnsteps),stat=allocok)
3861 if (allocok.gt.0) then
3863 cv_info(2) = 12*cv_maxnsteps
3864 istat = cv_error_memalloc
3865 if(cv_lp.gt.0)
3866 & write(cv_lp,*)
3867 & 'memory allocation error in ',subname
3868 return
3869 end if
3870 do i=1,cv_maxnsteps
3871 cv_layerl0_sorted_costw(i)=dble(0)
3872 cv_layerl0_array(i)=0
3873 end do
3875 cv_layerl0_end = 0
3877 if ((.NOT.associated(cv_tcostw)).OR.(.NOT.associated(cv_tcostm)))
3878 & then
3879 if(cv_lp.gt.0)
3880 & write(cv_lp,*)'Error:tcost must be allocated in ',subname
3881 return
3882 end if
3883 cv_nbsa=0
3884 do i=1,cv_n
3885 if (cv_frere(i).eq.0) then
3888 IF (cv_tcostw(i).GT.mincostw)
3895 end if
3896 end do
3897 if(cv_nbsa.eq.0) then
3898 if(cv_lp.gt.0)
3899 & write(cv_lp,*)'Error:no root nodes in ',subname
3900 return
3901 end if
3905 IF (ierr .ne.0) then
3906 if(cv_lp.gt.0)
3907 & write(cv_lp,*)
3908 & 'Error reported by MUMPS_SORT_MSORT in ',subname
3909 istat = ierr
3910 return
3911 ENDIF
3914 istat=0
3915 return
3916 end subroutine mumps_rootlist
3917 subroutine mumps_select_type3(istat)
3918 implicit none
3919 integer,intent(out)::istat
3920 character (len=48):: subname
3921 subname='SELECT_TYPE3'
3922 CALL mumps_select_k38k20(cv_n, slavef, cv_mp, cv_icntl(13),
3923 & cv_keep(1), cv_frere(1), cv_nfsiz(1), istat)
3924 IF (istat .NE. 0) THEN
3925 if(cv_lp.gt.0)
3926 & write(cv_lp,*)
3927 & 'Error: Can''t select type 3 node in ',subname
3928 ELSE IF (cv_keep(38) .ne. 0) then
3929 IF(cv_nodelayer(cv_keep(38)).eq.0.and.
3930 & (cv_keep(60).EQ.0)) then
3931 cv_keep(38)=0
3932 ELSE
3933 cv_nodetype(cv_keep(38))=3
3934 ENDIF
3935 ENDIF
3936 RETURN
3937 end subroutine mumps_select_type3
3938 subroutine mumps_setup_cand(istat)
3939 integer,intent(out):: istat
3940 integer :: i,dummy,layernmb,allocok
3941 integer :: montype, nbcand, inode
3942 character (len=48) :: subname
3943 istat=-1
3944 subname='SETUP_CAND'
3945 cv_nb_niv2=0
3946 do i=1,cv_n
3948 end do
3950 nullify(cv_par2_nodes,cv_cand)
3951 if(cv_nb_niv2.GT.0) then
3952 allocate(cv_par2_nodes(cv_nb_niv2),
3953 & cv_cand(cv_nb_niv2,cv_slavef+1),stat=allocok)
3954 if (allocok.gt.0) then
3957 istat = cv_error_memalloc
3958 if(cv_lp.gt.0)
3959 & write(cv_lp,*)
3960 & 'memory allocation error in ',subname
3961 return
3962 end if
3964 cv_cand(:,:)=0
3965 dummy=1
3966 do layernmb=1,cv_maxlayer
3967 do i=1,cv_layer_p2node(layernmb)%nmb_t2s
3968 inode = cv_layer_p2node(layernmb)%t2_nodenumbers(i)
3969 cv_par2_nodes(dummy)= inode
3970 nbcand = cv_layer_p2node(layernmb)%t2_cand(i,cv_slavef+1)
3971 cv_cand(dummy,:)=cv_layer_p2node(layernmb)%t2_cand(i,:)
3972 montype= cv_nodetype(inode)
3973 if (montype.eq.tsplit_beg) then
3975 & cv_frere(1), cv_nodetype(1),
3976 & cv_par2_nodes(1), cv_procnode(1), cv_cand(1,1),
3977 & inode,
3978 & slavef, dummy, nbcand, istat)
3979 endif
3980 dummy=dummy+1
3981 enddo
3982 enddo
3983 if(dummy.ne.cv_nb_niv2+1) then
3984 if(cv_lp.gt.0)
3985 & write(cv_lp,*)'Error in ',subname,
3986 & ' : dummy =',dummy,'nbniv2 =',cv_nb_niv2
3987 return
3988 endif
3989 endif
3990 istat=0
3991 return
3992 end subroutine mumps_setup_cand
3993 subroutine mumps_sortprocs(map_strat,workload,memused,
3994 & inode,istat)
3995 implicit none
3996 integer,intent(in)::map_strat
3997 DOUBLE PRECISION,dimension(:),intent(in)::workload, memused
3998 integer, optional::inode,istat
3999 integer i,j,aux_int,nmb_procs,pos
4000 character (len=48):: subname
4001 logical enforce_prefsort
4002 logical use_propmap
4003 logical,SAVE::init1 = .false.
4004 logical,SAVE::init2 = .false.
4005 subname='SORTPROCS'
4006 enforce_prefsort=.true.
4007 use_propmap=present(inode)
4008 if(present(istat))istat=-1
4009 if((map_strat.ne.cv_equilib_flops).and.
4010 & (map_strat.ne.cv_equilib_mem)) then
4011 if(cv_lp.gt.0)
4012 & write(cv_lp,*)'error in ',subname
4013 return
4014 endif
4015 i=0
4016 do i = 1, cv_slavef
4017 cv_proc_sorted(i)=i
4018 enddo
4019 if (.not.present(inode)) then
4020 if(.NOT.init1) then
4021 init1=.true.
4022 end if
4023 do i=1,cv_slavef-1
4024 do j=i+1,cv_slavef
4025 if(((workload(cv_proc_sorted(j)).lt.
4026 & workload(cv_proc_sorted(i))).AND.
4027 & (map_strat.eq.cv_equilib_flops))
4028 & .OR.
4029 & ((memused(cv_proc_sorted(j)).lt.
4030 & memused(cv_proc_sorted(i))).AND.
4031 & (map_strat.eq.cv_equilib_mem)))then
4032 aux_int=cv_proc_sorted(j)
4034 cv_proc_sorted(i)=aux_int
4035 end if
4036 end do
4037 end do
4038 else if(present(inode)) then
4039 if (use_propmap) then
4040 if(.NOT.init2) then
4041 init2=.true.
4042 end if
4043 nmb_procs=0
4044 do pos=1,cv_slavef
4045 if( mumps_bit_get4proc(inode,pos)) then
4046 if (pos.le.nmb_procs) then
4047 exit
4048 else
4049 nmb_procs=nmb_procs+1
4050 aux_int=cv_proc_sorted(pos)
4051 cv_proc_sorted(pos)=
4052 & cv_proc_sorted(nmb_procs)
4053 cv_proc_sorted(nmb_procs)=aux_int
4054 cycle
4055 end if
4056 end if
4057 end do
4058 end if
4059 do i=1,nmb_procs-1
4060 do j=i+1,nmb_procs
4061 if(((workload(cv_proc_sorted(j)).lt.
4062 & workload(cv_proc_sorted(i))).AND.
4063 & (map_strat.eq.cv_equilib_flops))
4064 & .OR.
4065 & ((memused(cv_proc_sorted(j)).lt.
4066 & memused(cv_proc_sorted(i))).AND.
4067 & (map_strat.eq.cv_equilib_mem)))then
4068 aux_int=cv_proc_sorted(j)
4070 cv_proc_sorted(i)=aux_int
4071 end if
4072 end do
4073 end do
4074 do i=nmb_procs+1,cv_slavef-1
4075 do j=i+1,cv_slavef
4076 if(((workload(cv_proc_sorted(j)).lt.
4077 & workload(cv_proc_sorted(i))).AND.
4078 & (map_strat.eq.cv_equilib_flops))
4079 & .OR.
4080 & ((memused(cv_proc_sorted(j)).lt.
4081 & memused(cv_proc_sorted(i))).AND.
4082 & (map_strat.eq.cv_equilib_mem)))then
4083 aux_int=cv_proc_sorted(j)
4085 cv_proc_sorted(i)=aux_int
4086 end if
4087 end do
4088 end do
4089 if(.NOT.enforce_prefsort) then
4090 if(((2.0d0*workload(cv_proc_sorted(nmb_procs+1)).lt.
4091 & workload(cv_proc_sorted(1))).AND.
4092 & (map_strat.eq.cv_equilib_flops))
4093 & .OR.
4094 & ((2.0d0*memused(cv_proc_sorted(nmb_procs+1)).lt.
4095 & memused(cv_proc_sorted(1))).AND.
4096 & (map_strat.eq.cv_equilib_mem)))then
4097 do i=1,cv_slavef-1
4098 do j=i+1,cv_slavef
4099 if(((workload(cv_proc_sorted(j)).lt.
4100 & workload(cv_proc_sorted(i))).AND.
4101 & (map_strat.eq.cv_equilib_flops))
4102 & .OR.
4103 & ((memused(cv_proc_sorted(j)).lt.
4104 & memused(cv_proc_sorted(i))).AND.
4105 & (map_strat.eq.cv_equilib_mem)))then
4106 aux_int=cv_proc_sorted(j)
4108 cv_proc_sorted(i)=aux_int
4109 end if
4110 end do
4111 end do
4112 endif
4113 end if
4114 endif
4115 if(present(istat))istat=0
4116 return
4117 end subroutine mumps_sortprocs
4118 subroutine mumps_store_globals(ne,nfsiz,frere,fils,keep,KEEP8,
4119 & info,procnode,ssarbr,nbsa)
4120 implicit none
4121 integer,dimension(cv_n),intent(inout)::ne,nfsiz,frere,fils,
4122 & procnode,ssarbr
4123 integer, intent(inout):: keep(500),info(80),nbsa
4124 INTEGER(8) KEEP8(150)
4125 ne=cv_ne
4126 nfsiz=cv_nfsiz
4127 frere=cv_frere
4128 fils=cv_fils
4129 keep(2) =cv_keep(2)
4130 keep(20)=cv_keep(20)
4131 keep(28)=cv_nsteps
4132 keep(38)=cv_keep(38)
4133 keep(56)=cv_keep(56)
4134 keep(61)=cv_keep(61)
4135 info(5)=cv_info(5)
4136 info(6)=cv_nsteps
4137 procnode=cv_procnode
4138 ssarbr=cv_ssarbr
4139 nbsa=cv_nbsa
4140 end subroutine mumps_store_globals
4141 subroutine mumps_termglob(istat)
4142 implicit none
4143 integer,intent(out)::istat
4144 integer i,ierr,layernmb
4145 character (len=48):: subname
4146 istat=-1
4147 subname='TERMGLOB'
4155 & stat=ierr)
4156 if(ierr.ne.0) then
4157 if(cv_lp.gt.0)
4158 & write(cv_lp,*)'Memory deallocation error in ',subname
4159 istat = cv_error_memdeloc
4160 return
4161 end if
4162 deallocate(work_per_proc,id_son,stat=ierr)
4163 if(ierr.ne.0) then
4164 if(cv_lp.gt.0)
4165 & write(cv_lp,*)'Memory deallocation error in ',subname
4166 istat = cv_error_memdeloc
4167 return
4168 end if
4169 do layernmb=1,cv_maxlayer
4170 if(cv_layer_p2node(layernmb)%nmb_t2s.gt.0) then
4171 deallocate(cv_layer_p2node(layernmb)%t2_nodenumbers,
4172 & cv_layer_p2node(layernmb)%t2_cand,
4173 & cv_layer_p2node(layernmb)%t2_candcostw,
4174 & cv_layer_p2node(layernmb)%t2_candcostm,
4175 & stat=ierr)
4176 if(ierr.ne.0) then
4177 if(cv_lp.gt.0)
4178 & write(cv_lp,*)'Memory deallocation error in ',
4179 & subname
4180 istat = cv_error_memdeloc
4181 return
4182 end if
4183 endif
4184 enddo
4185 if(associated(cv_layer_p2node)) then
4186 deallocate(cv_layer_p2node,stat=ierr)
4187 if(ierr.ne.0) then
4188 if(cv_lp.gt.0)
4189 & write(cv_lp,*)'Memory deallocation error in ',subname
4190 istat = cv_error_memdeloc
4191 return
4192 end if
4193 end if
4194 do i=1,cv_n
4195 call mumps_propmap_term(i,ierr)
4196 if(ierr.ne.0) then
4197 if(cv_lp.gt.0)
4198 & write(cv_lp,*)'PROPMAP_TERM signalled error in ',
4199 & subname
4200 istat = ierr
4201 return
4202 end if
4203 end do
4204 if(associated(cv_prop_map))deallocate(cv_prop_map,stat=ierr)
4205 if(ierr.ne.0) then
4206 if(cv_lp.gt.0)
4207 & write(cv_lp,*)'Memory deallocation error in ',subname
4208 istat = cv_error_memdeloc
4209 return
4210 end if
4211 istat=0
4212 return
4213 end subroutine mumps_termglob
4214 recursive subroutine mumps_treecosts(pos)
4215 implicit none
4216 integer,intent(in)::pos
4217 integer i,nfront,npiv,nextpos
4218 if ((.NOT.associated(cv_tcostw)).OR.(.NOT.associated(cv_tcostm)))
4219 & then
4220 call mumps_abort()
4221 end if
4222 nfront=cv_nfsiz(pos)
4223 npiv=1
4224 nextpos=cv_fils(pos)
4225 do while (nextpos.gt.0)
4226 if (cv_blkon) then
4227 npiv = npiv + cv_sizeofblocks(nextpos)
4228 else
4229 npiv=npiv+1
4230 endif
4231 nextpos=cv_fils(nextpos)
4232 end do
4233 call mumps_calcnodecosts(npiv,nfront,
4234 & cv_ncostw(pos), cv_ncostm(pos))
4235 cv_tcostw(pos)=cv_ncostw(pos)
4236 cv_tcostm(pos)=cv_ncostm(pos)
4237 if (cv_ne(pos).ne.0) then
4238 nextpos=cv_fils(pos)
4239 do while(nextpos.gt.0)
4240 nextpos=cv_fils(nextpos)
4241 end do
4242 nextpos=-nextpos
4243 do i=1,cv_ne(pos)
4244 cv_depth(nextpos)=cv_depth(pos)+1
4245 call mumps_treecosts(nextpos)
4246 cv_tcostw(pos)=cv_tcostw(pos)+cv_tcostw(nextpos)
4247 cv_tcostm(pos)=cv_tcostm(pos)+cv_tcostm(nextpos)
4248 nextpos=cv_frere(nextpos)
4249 end do
4250 endif
4251 return
4252 end subroutine mumps_treecosts
4253 recursive subroutine mumps_typeinssarbr(inode)
4254 implicit none
4255 integer, intent(in)::inode
4256 integer in
4257 cv_nodetype(inode)=-1
4258 in=cv_fils(inode)
4259 do while (in>0)
4260 in=cv_fils(in)
4261 end do
4262 in=-in
4263 do while(in.gt.0)
4264 call mumps_typeinssarbr(in)
4265 in=cv_frere(in)
4266 enddo
4267 end subroutine mumps_typeinssarbr
4268 subroutine mumps_workmem_imbalance(workload,memused,
4269 & maxwork,minwork,maxmem,minmem)
4270 implicit none
4271 DOUBLE PRECISION,dimension(:),intent(in)::workload,
4272 & memused
4273 DOUBLE PRECISION,intent(out)::maxwork,minwork,maxmem,minmem
4274 maxwork=maxval(workload)
4275 minwork=minval(workload, mask= workload > dble(0))
4276 maxmem=maxval(memused)
4277 minmem=minval(memused, mask= memused > dble(0))
4278 end subroutine mumps_workmem_imbalance
4279 subroutine mumps_fix_accepted_master(layernumber,nodenumber)
4280 implicit none
4281 integer layernumber,nodenumber
4282 integer i
4283 integer inode
4284 integer current_max,current_proc
4285 current_max = 0
4286 score = 0
4287 allowed_nodes = .false.
4288 inode=cv_layer_p2node(layernumber)%t2_nodenumbers(nodenumber)
4289 do i=1,cv_layer_p2node(layernumber)%t2_cand(nodenumber,
4290 & cv_slavef+1)
4291 current_proc=cv_layer_p2node(layernumber)%t2_cand(nodenumber,i)
4292 if ( current_proc .ge. 0) then
4293 score(mem_distribmpi(current_proc)) =
4294 & score(mem_distribmpi(current_proc)) + 1
4295 endif
4296 enddo
4297 current_proc = cv_procnode(inode) - 1
4298 score(mem_distribmpi(current_proc)) =
4299 & score(mem_distribmpi(current_proc)) + 1
4300 do i=0,nb_arch_nodes - 1
4301 if ( score(i) .gt. current_max ) then
4302 current_max = score(i)
4303 allowed_nodes = .false.
4304 allowed_nodes(i) = .true.
4305 else
4306 if(score(i) .eq. current_max) then
4307 allowed_nodes(i) = .true.
4308 endif
4309 endif
4310 enddo
4311 return
4312 end subroutine mumps_fix_accepted_master
4313 end subroutine mumps_distribute
4314 subroutine mumps_return_candidates(par2_nodes,cand,
4315 & istat)
4316 integer, intent(out) :: par2_nodes(cv_nb_niv2), istat
4317 integer, intent(out) :: cand(:,:)
4318 character (len=48):: subname
4319 integer iloop
4320 istat=-1
4321 subname='MUMPS_RETURN_CANDIDATES'
4322 par2_nodes=cv_par2_nodes
4323 do iloop=1, cv_slavef+1
4324 cand(iloop,:)=cv_cand(:,iloop)
4325 enddo
4326 deallocate(cv_par2_nodes,cv_cand,stat=istat)
4327 if(istat.ne.0) then
4328 if(cv_lp.gt.0)
4329 & write(cv_lp,*)'Memory deallocation error in ',subname
4330 istat = cv_error_memdeloc
4331 return
4332 end if
4333 istat = 0
4334 return
4335 end subroutine mumps_return_candidates
4337 & total_comm,working_comm,keep69,par,
4338 & nbslaves,mem_distrib,informerr)
4339 implicit none
4340 include 'mpif.h'
4341 integer nbslaves
4342 integer, dimension(0:) :: mem_distrib
4343 integer total_comm,working_comm,keep69,par
4344 integer, dimension(:) ::informerr
4345 integer myrank
4346 integer host,i,ierr
4347 integer,dimension(:),allocatable :: buffer_memdistrib
4348 ierr = 0
4349 myrank = -1
4350 host = -1
4351 ke69 = keep69
4352 cv_slavef = nbslaves
4353 if (ke69 .eq. 1) then
4354 return
4355 endif
4356 if ( allocated(mem_distribtmp) ) deallocate(mem_distribtmp )
4357 allocate( mem_distribtmp( 0:cv_slavef-1 ),
4358 & buffer_memdistrib( 0:cv_slavef-1 ), stat=ierr )
4359 if ( ierr .gt. 0 ) then
4360 if(cv_mp.gt.0) write(cv_mp,*) 'pb allocation mem_dist'
4361 informerr(1) = -13
4362 informerr(2) = cv_slavef
4363 return
4364 end if
4365 mem_distribtmp = -1
4366 call mpi_comm_rank( total_comm, host, ierr )
4367 if ((par .eq. 1) .or. (host .ne. 0)) then
4368 call mpi_comm_rank( working_comm, myrank, ierr )
4369 call mumps_compute_distrib(ierr,myrank,
4370 & working_comm,mem_distrib)
4371 if ( ierr .ne. 0 ) then
4372 if(cv_mp.gt.0)
4373 & write(cv_mp,*) 'pb in mumps_init_arch_parameters'
4374 informerr(1) = -13
4375 informerr(2) = cv_slavef
4376 return
4377 end if
4378 mem_distribtmp = mem_distrib
4379 call mumps_fix_node_master(ierr)
4380 if ( ierr .ne. 0 ) then
4381 if(cv_mp.gt.0) write(cv_mp,*)
4383 informerr(1) = -13
4384 informerr(2) = cv_slavef
4385 return
4386 endif
4387 endif
4388.le. if(ke69 0) then
4389 deallocate(mem_distribtmp)
4390 deallocate(buffer_memdistrib)
4391 return
4392 endif
4393 call MPI_ALLREDUCE(mem_distribtmp(0),buffer_memdistrib(0),
4394 & cv_slavef,MPI_INTEGER,
4395 & MPI_MAX,total_comm,ierr)
4396 mem_distribtmp = buffer_memdistrib
4397 deallocate (buffer_memdistrib)
4398 call MUMPS_COMPUTE_NB_ARCH_NODES()
4399.le. if((cv_slavef/nb_arch_nodes) 4) then
4400 do i = 0, cv_slavef-1
4401.NE. if ( mem_distrib(i) 1 ) then
4402 mem_distrib(i)=max(ke69/2,2)
4403 endif
4404 enddo
4405 endif
4406.eq..or. if((nb_arch_nodes 1)
4407.eq. & (nb_arch_nodes cv_slavef)) then
4408 ke69 = 1
4409 keep69 = 1
4410 deallocate(mem_distribtmp)
4411 return
4412 endif
4413.eq. if (host 0) then
4414 if ( allocated(mem_distribmpi) ) deallocate(mem_distribmpi )
4415 allocate( mem_distribmpi( 0:cv_slavef-1 ), stat=ierr )
4416.gt. if ( ierr 0 ) then
4417.gt. if(cv_mp0) write(cv_mp,*) 'pb allocation mem_dist'
4418 informerr(1) = -13
4419 informerr(2) = cv_slavef
4420 return
4421 endif
4422 call MUMPS_ALLOC_ALLOW_MASTER(ierr)
4423.ne. if(ierr 0 ) then
4424 return
4425 endif
4426 mem_distribmpi = mem_distribtmp
4427 call MUMPS_FIX_TABLE_OF_PROCESS(ierr)
4428.ne. if ( ierr 0 ) then
4429.gt. if(cv_mp0)
4430 & write(cv_mp,*) 'pb in mumps_init_arch_parameters'
4431 informerr(1) = -13
4432 informerr(2) = cv_slavef
4433 return
4434 endif
4435 else
4436 deallocate(mem_distribtmp)
4437 endif
4438 return
4439 end subroutine MUMPS_INIT_ARCH_PARAMETERS
4440 subroutine MUMPS_COMPUTE_NB_ARCH_NODES()
4441 implicit none
4442 integer i
4443 nb_arch_nodes = 0
4444 do i=0,cv_slavef-1
4445.eq. if(mem_distribtmp(i) i) then
4446 nb_arch_nodes = nb_arch_nodes + 1
4447 endif
4448 enddo
4449 return
4450 end subroutine MUMPS_COMPUTE_NB_ARCH_NODES
4451 subroutine MUMPS_FIX_TABLE_OF_PROCESS(ierr)
4452 implicit none
4453 external MUMPS_SORT_INT
4454 integer i,precnode,nodecount
4455 integer sizesmp
4456 integer ierr
4457 ierr = 0
4458 sizesmp = 0
4459 if ( allocated(table_of_process) )
4460 & deallocate(table_of_process )
4461 allocate( table_of_process(0:cv_slavef-1), stat=ierr )
4462.gt. if ( ierr 0 ) then
4463.gt. if(cv_mp0) write(cv_mp,*)
4464 & 'pb allocation in mumps_fix_table_of_process'
4465 return
4466 end if
4467 do i=0,cv_slavef - 1
4468 table_of_process(i) = i
4469 enddo
4470 call MUMPS_SORT_INT(cv_slavef,mem_distribtmp(0),
4471 & table_of_process(0))
4472 precnode = 0
4473 nodecount = 0
4474 do i=0,cv_slavef-1
4475.eq. if(mem_distribtmp(i) precnode) then
4476 sizesmp = sizesmp + 1
4477 mem_distribtmp(i) = nodecount
4478 mem_distribmpi(table_of_process(i)) = nodecount
4479 else
4480 score(nodecount) = sizesmp
4481 sizesmp = 1
4482 nodecount = nodecount + 1
4483 precnode = mem_distribtmp(i)
4484 mem_distribtmp(i) = nodecount
4485 mem_distribmpi(table_of_process(i)) = nodecount
4486 endif
4487 enddo
4488 score(nodecount) = sizesmp
4489 do i=0,cv_slavef-1
4490 mem_distribtmp(i) = score(mem_distribtmp(i))
4491 enddo
4492 CALL MUMPS_SORT_INT_DEC(cv_slavef,mem_distribtmp(0),
4493 & table_of_process(0))
4494 ierr = 0
4495 return
4496 end subroutine MUMPS_FIX_TABLE_OF_PROCESS
4497 subroutine MUMPS_FIX_NODE_MASTER(ierr)
4498 implicit none
4499 integer i,j,ierr
4500 integer idmaster
4501 idmaster = -1
4502 ierr = 0
4503 do i=0,cv_slavef-1
4504.eq. if (mem_distribtmp(i) 1) then
4505 idmaster = i
4506 do j=i,cv_slavef-1
4507.eq. if (mem_distribtmp(j) 1) then
4508 mem_distribtmp(j) = idmaster
4509 else
4510 mem_distribtmp(j) = 0
4511 endif
4512 enddo
4513 return
4514 else
4515 mem_distribtmp(i) = 0
4516 endif
4517 enddo
4518.gt. if(cv_mp0) write(cv_mp,*)'problem in mumps_fix_node_master:
4519 & cannot find a master'
4520 ierr = 1
4521 return
4522 end subroutine MUMPS_FIX_NODE_MASTER
4523 subroutine MUMPS_COMPUTE_DISTRIB(ierr,myrank,working_comm,
4524 & mem_distrib)
4525 implicit none
4526 include 'mpif.h'
4527 integer ierr,resultlen,myrank,i,working_comm
4528 integer , dimension(0:) :: mem_distrib
4529 integer allocok
4530 character(len=MPI_MAX_PROCESSOR_NAME) name
4531 integer, dimension(:),allocatable :: namercv
4532 integer, dimension(:),allocatable :: myname
4533 integer lenrcv
4534 external MUMPS_COMPARE_TAB
4535 logical MUMPS_COMPARE_TAB
4536 ierr = 0
4537 call MPI_GET_PROCESSOR_NAME(name,resultlen,ierr)
4538 allocate(myname(resultlen),stat=allocok)
4539.gt. if ( allocok 0 ) then
4540.gt. if(cv_mp0) write(cv_mp,*)
4541 & 'pb allocation in compute_dist for myname'
4542 ierr = 1
4543 return
4544 end if
4545 do i=1, resultlen
4546 myname(i) = ichar(name(i:i))
4547 enddo
4548 do i=0, cv_slavef-1
4549.eq. if(myrank i) then
4550 lenrcv = resultlen
4551 else
4552 lenrcv = 0
4553 endif
4554 call MPI_BCAST(lenrcv,1,MPI_INTEGER,i,
4555 & working_comm,ierr)
4556 allocate(namercv(lenrcv),stat=allocok)
4557.gt. if ( allocok 0 ) then
4558.gt. if(cv_mp0) write(cv_mp,*)
4559 & 'pb allocation in compute_dist for namercv'
4560 ierr = 1
4561 return
4562 end if
4563.eq. if(myrank i) then
4564 namercv = myname
4565 endif
4566 call MPI_BCAST(namercv,lenrcv,MPI_INTEGER,i,
4567 & working_comm,ierr)
4568 if( MUMPS_COMPARE_TAB(myname,namercv,
4569 & resultlen,lenrcv)) then
4570 mem_distrib(i)=1
4571 else
4572 mem_distrib(i)=ke69
4573 endif
4574 deallocate(namercv)
4575 enddo
4576 deallocate(myname)
4577 ierr = 0
4578 return
4579 end subroutine MUMPS_COMPUTE_DISTRIB
4580 subroutine MUMPS_GET_IDP1_PROC(current_proc,idarch,ierr)
4581 implicit none
4582 integer current_proc
4583 integer idarch,ierr
4584 ierr = 0
4585.ge. if (current_proc cv_slavef) then
4586 ierr = -1
4587 return
4588 endif
4589.lt. if (current_proc 0) then
4590 idarch = 1
4591 return
4592 else
4593 idarch = table_of_process(current_proc) + 1
4594 endif
4595 return
4596 end subroutine MUMPS_GET_IDP1_PROC
4597 subroutine MUMPS_END_ARCH_CV()
4598 if (allocated(table_of_process)) deallocate(table_of_process)
4599 if (allocated(allowed_nodes)) deallocate(allowed_nodes)
4600 if (allocated(score)) deallocate(score)
4601 if (allocated(mem_distribtmp)) deallocate(mem_distribtmp)
4602 if (allocated(mem_distribmpi)) deallocate(mem_distribmpi)
4603 return
4604 end subroutine MUMPS_END_ARCH_CV
4605 subroutine MUMPS_ALLOC_ALLOW_MASTER(ierr)
4606 integer ierr
4607 ierr = 0
4608 if (allocated(allowed_nodes)) deallocate(allowed_nodes)
4609 allocate( allowed_nodes(0:nb_arch_nodes-1),stat=ierr)
4610.gt. if ( ierr 0 ) then
4611.gt. if(cv_mp0) write(cv_mp,*)
4612 & 'pb allocation mumps_alloc_allow_master'
4613 ierr = -13
4614 return
4615 end if
4616 allowed_nodes = .FALSE.
4617 if (allocated(score)) deallocate(score)
4618 allocate( score(0:nb_arch_nodes-1),stat=ierr)
4619.gt. if ( ierr 0 ) then
4620.gt. if(cv_mp0) write(cv_mp,*)
4621 & 'pb allocation mumps_alloc_allow_master'
4622 ierr = -13
4623 return
4624 end if
4625 score = 0
4626 ierr = 0
4627 return
4628 end subroutine MUMPS_ALLOC_ALLOW_MASTER
4629 SUBROUTINE MUMPS_SORT_MMERGE(start1st,end1st,dim1,
4630 & start2nd,end2nd,dim2,
4631 & indx,
4632 & val, istat)
4633 implicit none
4634 integer, intent(in):: start1st,end1st,dim1,start2nd,end2nd,dim2
4635 integer, intent(inout):: indx(:)
4636 DOUBLE PRECISION, intent(inout):: val(:)
4637 INTEGER, intent(out) :: istat
4638 INTEGER, ALLOCATABLE, DIMENSION(:) :: index
4639 DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: dummy1
4640 integer :: a,b,c
4641 integer :: allocok
4642 character (len=48):: subname
4643 subname = "MUMPS_SORT_MMERGE"
4644 istat=-1
4645 ALLOCATE(index(dim1+dim2),dummy1(dim1+dim2),stat=allocok)
4646.gt. if ( allocok 0 ) then
4647 cv_info(1) = cv_error_memalloc
4648 cv_info(2) = dim1+dim2+dim1+dim2
4649 istat = cv_error_memalloc
4650.gt. if(cv_lp0)
4651 & write(cv_lp,*)
4652 & 'memory allocation error in ',subname
4653 return
4654 end if
4655 a=start1st
4656 b=start2nd
4657 c=1
4658.LT..AND..LT. do while((aend1st+1)(bend2nd+1))
4659.GT. if(val(a)val(b))then
4660 index(c)=indx(a)
4661 dummy1(c)=val(a)
4662 a=a+1
4663 c=c+1
4664 else
4665 index(c)=indx(b)
4666 dummy1(c)=val(b)
4667 b=b+1
4668 c=c+1
4669 endif
4670 end do
4671.LT. if(aend1st+1) then
4672.LT. do while(aend1st+1)
4673 index(c)=indx(a)
4674 dummy1(c)=val(a)
4675 a=a+1
4676 c=c+1
4677 enddo
4678.LT. elseif(bend2nd+1) then
4679.LT. do while(bend2nd+1)
4680 index(c)=indx(b)
4681 dummy1(c)=val(b)
4682 b=b+1
4683 c=c+1
4684 enddo
4685 endif
4686 indx(start1st:end1st)=index(1:dim1)
4687 val(start1st:end1st)=dummy1(1:dim1)
4688 indx(start2nd:end2nd)=index(dim1+1:dim1+dim2)
4689 val(start2nd:end2nd)=dummy1(dim1+1:dim1+dim2)
4690 DEALLOCATE(index,dummy1)
4691 istat=0
4692 return
4693 end SUBROUTINE MUMPS_SORT_MMERGE
4694 SUBROUTINE MUMPS_SORT_MSORT(istat,dim,indx,val1,val2)
4695 implicit none
4696 integer, intent(in):: dim
4697 integer, intent(inout):: indx(:)
4698 integer, intent(out)::istat
4699 DOUBLE PRECISION, intent(inout):: val1(:)
4700 DOUBLE PRECISION, intent(inout),optional:: val2(:)
4701 INTEGER, ALLOCATABLE, DIMENSION(:) :: index, dummy1
4702 DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:) :: dummy2
4703 integer, parameter :: ss = 35
4704 integer :: a,b,c,i,k,l,r,s,stackl(ss),stackr(ss)
4705 integer :: allocok
4706 character (len=48):: subname
4707 istat=-1
4708 subname = "MUMPS_SORT_MSORT"
4709 ALLOCATE(index(dim),dummy1(dim),dummy2(dim),stat=allocok)
4710.gt. if (allocok0) then
4711 cv_info(1) = cv_error_memalloc
4712 cv_info(2) = 3*dim
4713 istat = cv_error_memalloc
4714.gt. if(cv_lp0)
4715 & write(cv_lp,*)'memory allocation error in ',subname
4716 return
4717 end if
4718 do i=1,dim
4719 index(i)=i
4720 enddo
4721 s = 1
4722 stackl(1) = 1
4723 stackr(1) = dim
4724 5511 CONTINUE
4725 l = stackl(s)
4726 r = stackr(s)
4727 k = (l+r) / 2
4728.LT. if(lk) then
4729.GE. if(sss) stop 'maxsize of stack reached'
4730 s = s + 1
4731 stackl(s) = l
4732 stackr(s) = k
4733 goto 5511
4734 endif
4735 5512 CONTINUE
4736 l = stackl(s)
4737 r = stackr(s)
4738 k = (l+r) / 2
4739.LT. if(k+1r) then
4740.GE. if(sss) stop 'maxsize of stack reached'
4741 s = s + 1
4742 stackl(s) = k+1
4743 stackr(s) = r
4744 goto 5511
4745 endif
4746 5513 CONTINUE
4747 l = stackl(s)
4748 r = stackr(s)
4749 k = (l+r) / 2
4750 a=l
4751 b=k+1
4752 c=1
4753.LT..AND..LT. do while((ak+1)(br+1))
4754.GT. if(val1(index(a))val1(index(b)))then
4755 dummy1(c)=index(a)
4756 a=a+1
4757 c=c+1
4758 else
4759 dummy1(c)=index(b)
4760 b=b+1
4761 c=c+1
4762 endif
4763 end do
4764.LT. if(ak+1) then
4765 dummy1(c:r-l+1)=index(a:k)
4766.LT. elseif(br+1) then
4767 dummy1(c:r-l+1)=index(b:r)
4768 endif
4769 index(l:r)=dummy1(1:r-l+1)
4770.GT. if(s1) then
4771 s = s - 1
4772.EQ. if(lstackl(s)) goto 5512
4773.EQ. if(rstackr(s)) goto 5513
4774 endif
4775 do i=1,dim
4776 dummy1(i)=indx(index(i))
4777 enddo
4778 indx=dummy1
4779 do i=1,dim
4780 dummy2(i)=val1(index(i))
4781 enddo
4782 val1=dummy2
4783 if(present(val2)) then
4784 do i=1,dim
4785 dummy2(i)=val2(index(i))
4786 enddo
4787 val2=dummy2
4788 endif
4789 istat=0
4790 DEALLOCATE(index,dummy1,dummy2)
4791 return
4792 end subroutine MUMPS_SORT_MSORT
4793 END MODULE MUMPS_STATIC_MAPPING
4794 SUBROUTINE MUMPS_SELECT_K38K20(N, SLAVEF, MP,
4795 & ICNTL13, KEEP, FRERE, ND, ISTAT)
4796 IMPLICIT NONE
4797 INTEGER, intent(in) :: N, SLAVEF, ICNTL13, MP
4798 INTEGER KEEP(500)
4799 INTEGER FRERE(N), ND(N)
4800 INTEGER, intent(out) :: ISTAT
4801 INTEGER IROOTTREE, SIZEROOT, NFRONT, I
4802 ISTAT = 0
4803.EQ..or..EQ. IF (KEEP(60)2 KEEP(60)3 ) THEN
4804 ELSE
4805.EQ..OR..GT..OR. IF((SLAVEF1)(ICNTL130)
4806.NE. & (KEEP(60)0)) THEN
4807 KEEP(38) = 0
4808 ELSE
4809 IROOTTREE=-1
4810 SIZEROOT=-1
4811 DO I=1,N
4812.EQ. IF (FRERE(I)0) THEN
4813 NFRONT = ND(I)
4814.GT. IF (NFRONT SIZEROOT) THEN
4815 IROOTTREE = I
4816 SIZEROOT = NFRONT
4817 END IF
4818 END IF
4819 END DO
4820.EQ..OR..EQ. IF ((IROOTTREE-1)(SIZEROOT-1)) THEN
4821 ISTAT = -1
4822 RETURN
4823 ENDIF
4824.LE. IF (SIZEROOTSLAVEF) THEN
4825 KEEP(38) = 0
4826.GT. ELSE IF((SIZEROOTKEEP(37))
4827.AND..EQ. & (KEEP(53)0)
4828 & ) THEN
4829.GT. IF (MP0) WRITE(MP,*) 'a root of estimated size ',
4830 & SIZEROOT,' has been selected for scalapack.'
4831 KEEP(38) = IROOTTREE
4832 ELSE
4833 KEEP(38) = 0
4834.GT. IF (MP0) WRITE(MP,'(a,i9,a)')
4835 & ' warning: largest root node of size ', SIZEROOT,
4836 & ' not selected for parallel execution'
4837 END IF
4838.EQ..AND..NE. IF ((KEEP(38)0)(KEEP(53)0)) THEN
4839 KEEP(20) = IROOTTREE
4840.EQ. ELSE IF (KEEP(60)0) THEN
4841 KEEP(20) = 0
4842 ENDIF
4843 ENDIF
4844 ENDIF
4845 RETURN
4846 END SUBROUTINE MUMPS_SELECT_K38K20
4847 SUBROUTINE MUMPS_SPLITNODE_INTREE(inode,nfront,npiv,k,
4848 & lnpivsplit, npivsplit, keep, n, fils, frere,
4849 & nfsiz, ne, info5_nfrmax, k28_nsteps, nodetype,
4850 & istat
4851 & , SIZEOFBLOCKS, LSIZEOFBLOCKS
4852 & , BLKON
4853 & )
4854 implicit none
4855 integer, intent(in)::nfront,npiv
4856 integer, intent(in):: k
4857 integer, intent(in)::lnpivsplit
4858 integer, intent(in)::npivsplit(lnpivsplit)
4859 integer, intent(in):: inode
4860 integer, intent(out)::istat
4861 integer, intent(inout):: keep(500)
4862 integer, intent(inout):: k28_nsteps
4863 integer, intent(in) :: info5_nfrmax
4864 integer, intent(in) :: n
4865 integer, intent(inout)::frere(n), fils(n), nfsiz(n), ne(n)
4866 integer, intent(inout):: nodetype(n)
4867 integer, intent(in) :: LSIZEOFBLOCKS
4868 integer, intent(in) :: SIZEOFBLOCKS(LSIZEOFBLOCKS)
4869 logical,intent(in) :: BLKON
4870 integer i,lev,in,in_son,in_father,in_grandpa,npiv_father,
4871 & npiv_son,nfrontk,npivk,d1,f1,e1,dk,fk,next_father
4872 integer::ison,ifather
4873 character (len=48):: subname
4874 integer, parameter:: tsplit_beg=4
4875 integer, parameter:: tsplit_mid=5
4876 integer, parameter:: tsplit_last=6
4877 istat=-1
4878 subname='splitnode_intree'
4879 ison=-1
4880 ifather=-1
4881 nfrontk = nfront
4882 npivk = npiv
4883 npiv_son = npivsplit(1)
4884 keep(2)=max(keep(2),nfront-npiv_son)
4885 d1 = inode
4886 f1 = d1
4887 e1 = frere(d1)
4888 if (BLKON) then
4889 i= SIZEOFBLOCKS(f1)
4890.lt. do while (inpiv_son)
4891 f1 = fils(f1)
4892 i = i + SIZEOFBLOCKS(f1)
4893 enddo
4894 else
4895 do i=1,npiv_son-1
4896 f1 = fils(f1)
4897 enddo
4898 endif
4899 ison = d1
4900 in_son = f1
4901 next_father = fils(in_son)
4902 do lev = 1, k-1
4903 ifather = next_father
4904 in_father = ifather
4905 npiv_son= abs(npivsplit(lev))
4906 npiv_father=abs(npivsplit(lev+1))
4907 if (BLKON) then
4908 i= SIZEOFBLOCKS(in_father)
4909.lt. do while (inpiv_father)
4910 in_father=fils(in_father)
4911 i = i + SIZEOFBLOCKS(in_father)
4912 enddo
4913 else
4914 do i=1,npiv_father-1
4915 in_father=fils(in_father)
4916 enddo
4917 endif
4918 frere(ison)=-ifather
4919 next_father = fils(in_father)
4920 fils(in_father)=-ison
4921 nfsiz(ison)=nfrontk
4922 nfsiz(ifather)=nfrontk-npiv_son
4923 ne(ifather)=1
4924 keep(61)=keep(61)+1
4925.EQ. IF (keep(79)0) THEN
4926 if( nfront-npiv_son > keep(9)) then
4927 nodetype(ifather) = 2
4928 else
4929 nodetype(ifather) = 1
4930 endif
4931 ELSE
4932.EQ. if (lev1) then
4933 nodetype(ison) = tsplit_beg
4934 endif
4935.eq. if (levk-1) then
4936 nodetype(ifather) = tsplit_last
4937 else
4938 nodetype(ifather) = tsplit_mid
4939 endif
4940 if (npivsplit(lev+1) < 0) then
4941.eq. if (levk-1) then
4942 nodetype(ifather)=-tsplit_last
4943 else
4944 nodetype(ifather)=-tsplit_mid
4945 endif
4946 endif
4947 ENDIF
4948 nfrontk = nfrontk-npiv_son
4949 npivk = npivk - npiv_son
4950 ison = ifather
4951 in_son = in_father
4952 enddo
4953 dk = ifather
4954 fk = in_father
4955# if (check_mumps_static_mapping >= 3)
4956 write(6,*) ' last(close to root) node in chain :', ifather
4957#endif
4958 fils(f1) = next_father
4959 frere(dk) = e1
4960 in = e1
4961.gt. do while (in0)
4962 in=frere(in)
4963 end do
4964 in = -in
4965.gt. do while(fils(in)0)
4966 in=fils(in)
4967 end do
4968 in_grandpa = in
4969.eq. if(fils(in_grandpa)-d1) then
4970 fils(in_grandpa)=-dk
4971 else
4972 in=-fils(in_grandpa)
4973.ne. do while(frere(in) d1)
4974 in=frere(in)
4975 end do
4976 frere(in) = dk
4977 end if
4978 k28_nsteps = k28_nsteps + k-1
4979 istat = 0
4980 return
4981 END SUBROUTINE MUMPS_SPLITNODE_INTREE
4982 subroutine MUMPS_SETUP_CAND_CHAIN(n, nb_niv2,
4983 & frere, nodetype, par2_nodes,
4984 & procnode, cand, inode_chain, slavef, dummy, nbcand, istat)
4985 implicit none
4986 integer, intent(in) :: n, nb_niv2, slavef
4987 integer,intent(in)::frere(n)
4988 integer, intent(inout) :: par2_nodes(nb_niv2), procnode(n)
4989 integer,intent(inout)::nodetype(n)
4990 integer,intent(inout)::cand(nb_niv2, slavef+1)
4991 integer,intent(in)::inode_chain
4992 integer,intent(inout)::dummy, nbcand
4993 integer,intent(out):: istat
4994 integer, parameter:: tsplit_beg=4
4995 integer, parameter:: tsplit_mid=5
4996 integer, parameter:: tsplit_last=6
4997 integer, parameter:: invalid=-9999
4998 integer :: inode, ifather, k
4999 logical :: last_iteration_reached
5000 istat = -1
5001 inode = inode_chain
5002 k = 1
5003 do
5004.not..lt. if ( (frere(inode) 0) ) then
5005 write(*,*) " Internal error 0 in SETUP_CAND",
5006 & frere(inode), inode
5007 CALL MUMPS_ABORT()
5008 endif
5009 ifather = -frere(inode)
5010.eq. last_iteration_reached = (abs(nodetype(ifather))tsplit_last)
5011 par2_nodes(dummy+1) = ifather
5012 procnode(ifather) = cand(dummy,1) + 1
5013.eq..or. if ( (nodetype(ifather)tsplit_mid)
5014.eq. & (nodetype(ifather)tsplit_last) ) then
5015.lt. if (nbcand2) then
5016 par2_nodes(dummy+1) = ifather
5017 procnode(ifather) = procnode(inode)
5018 cand(dummy+1,:) = cand(dummy,:)
5019 dummy = dummy + 1
5020 write(6,*) ' mapping property',
5021 & ' of procs in chain lost '
5022 CALL MUMPS_ABORT()
5023 endif
5024 cand(dummy+1,1:nbcand-1+k-1) = cand(dummy,2:nbcand+k-1)
5025 cand(dummy+1,nbcand-1+k) = procnode(inode)-1
5026 cand(dummy+1,nbcand-1+k+1:slavef) = invalid
5027 nbcand = nbcand -1
5028 k = k + 1
5029.eq..or. else if ( (nodetype(ifather)-tsplit_mid)
5030.eq. & (nodetype(ifather)-tsplit_last) ) then
5031.eq. if (nodetype(inode)tsplit_beg) then
5032 nodetype(inode)=2
5033 else
5034 nodetype(inode)=tsplit_last
5035 endif
5036.eq. if (nodetype(ifather) -tsplit_last) then
5037 nodetype(ifather) = 2
5038 else
5039 nodetype(ifather) = tsplit_beg
5040 endif
5041 cand(dummy+1,1:nbcand-1+k-1) = cand(dummy,2:nbcand+k-1)
5042 cand(dummy+1,nbcand-1+k) = procnode(inode)-1
5043 nbcand = nbcand+k-1
5044 k = 1
5045 else
5046 write(6,*) ' internal error 2 in setup_cand',
5047 & ' in, ifather =', inode, ifather,
5048 & ' nodetype(ifather) ', nodetype(ifather)
5049 CALL MUMPS_ABORT()
5050 endif
5051 cand(dummy+1,slavef+1)= nbcand
5052 dummy = dummy+1
5053 if (last_iteration_reached) exit
5054 inode = ifather
5055 end do
5056 istat = 0
5057 end subroutine MUMPS_SETUP_CAND_CHAIN
5058 subroutine MUMPS_GET_SPLIT_4_PERF(inode, nfront, npiv, nproc,
5059 & k, lnpivsplit, npivsplit,
5060 & n, frere, keep,
5061 & fils, BLKON, sizeofblocks,
5062 & istat)
5063 implicit none
5064 integer,intent(in)::inode, nfront, npiv, lnpivsplit, n
5065 integer,intent(in)::frere(n)
5066 integer,intent(in) :: fils(n)
5067 logical, intent(in) :: BLKON
5068 integer, intent(in) :: sizeofblocks(*)
5069 integer,intent(in)::keep(500)
5070 double precision, intent(in):: nproc
5071 integer,intent(out)::k, npivsplit(lnpivsplit), istat
5072 logical :: nosplit
5073 integer :: inode_tmp
5074 integer :: kk, optimization_strategy, nass, npiv2
5075 double precision :: nproc2
5076 integer :: npivOld, npivNew
5077 double precision :: timeFacOld, timeFacNew, timeAss
5078 double precision ,parameter :: alpha=8.0D9
5079 double precision ,parameter :: gamma=1.2D9
5080.le. nosplit = npiv npiv4equilibreRows(nfront, nproc)
5081 optimization_strategy = 0
5082.or..eq. nosplit = nosplit (frere(inode) 0)
5083 if ( nosplit ) then
5084 k = 1
5085 npivsplit(1) = npiv
5086 istat = 0
5087 return
5088 endif
5089.le. if (nproc 1.0d0) then
5090 k = 1
5091 npivsplit(1) = npiv
5092 istat = -1
5093 return
5094 endif
5095 nproc2 = nproc
5096 nass = 0
5097 kk = 0
5098 inode_tmp = inode
5099.lt. do while (nass npiv)
5100.eq..or. if ((nproc2 2.0d0)
5101.le. & (nfront - nass 6*keep(9))) then
5102 npiv2 = npiv - nass
5103.gt. else if (nproc2 2) then
5104.eq. if (optimization_strategy 0) then
5105 npiv2 = min(npiv - nass,
5106 & npiv4equilibreRows(nfront - nass, nproc2 ))
5107.eq. else if (optimization_strategy 1) then
5108.eq. if (nproc2 nproc) then
5109 npiv2 = min(npiv - nass,
5110 & npiv4equilibreFlops(nfront - nass, nproc2 ))
5111 else
5112 npiv2 = min(npiv - nass,
5113 & npiv4equilibreRows(nfront - nass, nproc2 ))
5114 endif
5115 else
5116 write(*,*) "Internal error in MUMPS_GET_SPLIT_4_PERF,"
5117 write(*,*) "optimization_strategy not implemented"
5118 call MUMPS_ABORT()
5119 endif
5120 endif
5121 kk = kk + 1
5122 IF (BLKON) THEN
5123 npivsplit(kk) = 0
5124.LT..and..gt. DO WHILE (npivsplit(kk) npiv2 inode_tmp 0)
5125 npivsplit(kk) = npivsplit(kk) + sizeofblocks(inode_tmp)
5126 inode_tmp= fils(inode_tmp)
5127 ENDDO
5128 npiv2 = npivsplit(kk)
5129 ELSE
5130 npivsplit(kk) = npiv2
5131 ENDIF
5132.ge. if (keep(79) 1
5133.and..ne. & kk 1) then
5134.eq. if (optimization_strategy 0) then
5135 npivOld = min(npiv - nass,
5136 & npiv4equilibreRows(nfront - nass, nproc ))
5137 npivNew = min(npiv - nass,
5138 & npiv4equilibreRows(nfront - nass, nproc2 - 1.0d0))
5139.eq. else if (optimization_strategy 1) then
5140 npivOld = min(npiv - nass,
5141 & npiv4equilibreFlops(nfront - nass, nproc ))
5142 npivNew = min(npiv - nass,
5143 & npiv4equilibreRows(nfront - nass, nproc2 - 1.0d0))
5144 else
5145 write(*,*) "Internal error in MUMPS_GET_SPLIT_4_PERF,"
5146 write(*,*) "optimization_strategy not implemented"
5147 call MUMPS_ABORT()
5148 endif
5149 timeAss = timeAssembly(int(nfront-nass,8), nproc2)
5150 timeFacOld = timeFacto(int(nfront-nass,8), int(npivOld,8),
5151 & nproc)
5152 timeFacNew = timeFacto(int(nfront-nass,8),int(npivNew,8),
5153 & nproc2-1)
5154 if ( (flopsFactoPanel(int(npivOld,8),int(nfront-nass,8))+
5155 & flopsUpdate(int(nfront-nass-npivOld,8),
5156 & int(nfront-nass-npivOld,8), int(npivOld,8)))/
5157 & (timeFacOld+timeAss)
5158.gt. & (flopsFactoPanel(int(npivNew,8),int(nfront-nass,8))+
5159 & flopsUpdate(int(nfront-nass-npivNew,8),
5160 & int(nfront-nass-npivNew,8), int(npivNew,8)))/
5161 & timeFacNew ) then
5162 npivsplit(kk) = -npiv2
5163 nproc2 = nproc
5164 else
5165 nproc2 = nproc2 - 1.0d0
5166 npiv2 = npivNew
5167 npivsplit(kk)=npivNew
5168 endif
5169 endif
5170 nass = nass + npiv2
5171 enddo
5172 k = kk
5173 istat=0
5174 return
5175 CONTAINS
5176 function npiv4equilibreRows(nfront, nproc)
5177 implicit none
5178 integer npiv4equilibreRows
5179 integer, intent(in) :: nfront
5180 double precision, intent(in) :: nproc
5181 npiv4equilibreRows = max(1, int(dble(nfront)/nproc))
5182 return
5183 end function npiv4equilibreRows
5184 function npiv4equilibreFlops(nfront, nproc)
5185 implicit none
5186 integer npiv4equilibreFlops
5187 integer, intent(in) :: nfront
5188 double precision, intent(in) :: nproc
5189 double precision::n,s,a,b,c,sdelta,npiv
5190 n = dble(nfront)
5191 s = nproc - 1.0d0
5192 a = s/3.+1.
5193 b = -3.*n - s*n - s/2.
5194 c = 2.*n**2 + s*n + s/6.
5195 sdelta = (b*b) - 4*a*c
5196 if (sdelta < 0.0E0) then
5197 WRITE(*,*) "Delta < 0 in npiv4equilibreFlops"
5198 call MUMPS_ABORT()
5199 endif
5200 sdelta = sqrt(sdelta)
5201 npiv = (-b - sdelta)/(2*a)
5202 npiv4equilibreFlops = max(1, int(npiv))
5203 return
5204 end function npiv4equilibreFlops
5205 function flopsFactoPanel(nbrows, nbcols)
5206 integer(8) :: nbrows, nbcols
5207 double precision :: flopsFactoPanel
5208 flopsFactoPanel = (nbrows*((-1.d0/3.d0)*nbrows**2 +
5209 & (nbcols + 1.d0/2.d0)*nbrows +
5210 & (nbcols + 1.d0/6.d0)))
5211 end function flopsFactoPanel
5212 function flopsUpdate(m, n, k)
5213 integer(8) :: m, n, k
5214 double precision :: flopsUpdate
5215 flopsUpdate = dble(2*m*n*k + m*k**2)
5216 end function flopsUpdate
5217 function timeFacto(nfront, npiv, nproc)
5218 integer(8), intent(in) :: nfront, npiv
5219 double precision, intent(in) :: nproc
5220 double precision :: timeFacto
5221 timeFacto = (max(flopsFactoPanel(npiv,nfront),
5222 & flopsUpdate(nfront-npiv, nfront-npiv, npiv)/
5223 & (nproc-1))/alpha)
5224 end function timeFacto
5225 function timeNIV1(nfront, npiv)
5226 integer(8) :: nfront, npiv
5227 double precision :: timeNIV1
5228 timeNIV1 = ((flopsFactoPanel(npiv, nfront) +
5229 & flopsUpdate(nfront - npiv, nfront - npiv, npiv))/alpha)
5230 end function timeNIV1
5231 function timeAssembly(n, p)
5232 integer(8) :: n
5233 double precision, intent(in) :: p
5234 double precision :: timeAssembly
5235 timeAssembly = ((n*n/p)/(gamma/(log(p)/log(2.0d0))))
5236 end function timeAssembly
5237 end subroutine MUMPS_GET_SPLIT_4_PERF
#define mumps_abort
Definition VE_Metis.h:25
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine mpi_comm_rank(comm, rank, ierr)
Definition mpi.f:254
for(i8=*sizetab-1;i8 >=0;i8--)
subroutine mumps_get_split_4_perf(inode, nfront, npiv, nproc, k, lnpivsplit, npivsplit, n, frere, keep, fils, blkon, sizeofblocks, istat)
subroutine mumps_find_thislayer(nmb, thislayer, nmb_thislayer, istat)
subroutine mumps_setup_cand_chain(n, nb_niv2, frere, nodetype, par2_nodes, procnode, cand, inode_chain, slavef, dummy, nbcand, istat)
subroutine mumps_map_layer(layernmb, thislayer, nmb_thislayer, map_strat, istat)
subroutine mumps_splitnode_intree(inode, nfront, npiv, k, lnpivsplit, npivsplit, keep, n, fils, frere, nfsiz, ne, info5_nfrmax, k28_nsteps, nodetype, istat, sizeofblocks, lsizeofblocks, blkon)
subroutine mumps_initpart2(istat)
subroutine mumps_procinit(maxwork, maxmem, istat)
subroutine mumps_setup_cand(istat)
subroutine mumps_split_during_mapping(layernmb, thislayer, nmb_thislayer, istat)
subroutine mumps_costs_blr_t2_slave(npiv, nfront, nrow, costw, costm, k471, k472, k475, k488, sym)
subroutine mumps_find_best_proc(inode, map_strat, work, mem, workload, memused, proc, istat, respect_prop)
subroutine mumps_postprocess_mem()
subroutine mumps_assign_types(layernmb, thislayer, nmb_thislayer, istat)
subroutine mumps_store_globals(ne, nfsiz, frere, fils, keep, keep8, info, procnode, ssarbr, nbsa)
subroutine mumps_select_k38k20(n, slavef, mp, icntl13, keep, frere, nd, istat)
subroutine mumps_select_type3(istat)
subroutine mumps_splitnode_update(inode, nfront, npiv, k, lnpivsplit, npivsplit, istat)
subroutine mumps_encode_procnode(istat)
logical function mumps_bit_get(procs4node, procnumber)
subroutine mumps_initpart1(n, slavef, frere, fils, nfsiz, ne, keep, keep8, icntl, info, procnode, ssarbr, peak, istat, sizeofblocks, lsizeofblocks)
recursive subroutine mumps_treecosts(pos)
subroutine mumps_accept_l0(map_strat, workload, memused, accepted, istat)
subroutine mumps_fathson_replace(ifather, istat)
subroutine mumps_higher_layer(startlayer, thislayer, nmb_thislayer, cont, istat)
subroutine mumps_bit_set(procs4node, procnumber, istat)
logical function mumps_bit_get4proc(inode, procnumber)
subroutine mumps_rootlist(istat)
subroutine mumps_calcnodecosts(npiv, nfront, costw, costm)
subroutine mumps_propmap_init(inode, istat)
recursive subroutine mumps_typeinssarbr(inode)
subroutine mumps_mapsubtree(procnode)
subroutine mumps_get_memsplit_inkpart(inode, doit, npiv, nfront, npropmap, k2, istat)
recursive subroutine mumps_propmap(inode_entry, ctr_entry, istat)
subroutine mumps_sortprocs(map_strat, workload, memused, inode, istat)
subroutine mumps_get_split_inkpart(inode, doit, npiv, nfront, npropmap, k1, k3, istat)
subroutine mumps_calccosts(istat)
subroutine mumps_costs_blr_t2_master(npiv, nfront, costw, costm, k471, k472, k475, k488, sym)
subroutine mumps_list2layer(istat)
subroutine mumps_propmap4split(inode, ifather, istat)
subroutine mumps_arrangel0(map_strat, layerl0end, workload, memused, procnode, istat, respect_prop)
logical function mumps_istype2bysize(nfront, npiv)
recursive subroutine mumps_mapbelow(inode, procnmb, procnode)
subroutine mumps_make_propmap(istat)
subroutine mumps_propmap_term(inode, istat)
logical function mumps_is_node_of_type2(inode)
subroutine mumps_termglob(istat)
recursive subroutine mumps_mod_propmap(inode_entry, ctr_entry, istat)
subroutine mumps_workmem_imbalance(workload, memused, maxwork, minwork, maxmem, minmem)
subroutine mumps_layerl0(istat)
subroutine mumps_fix_accepted_master(layernumber, nodenumber)
subroutine mumps_calcnodecosts_blr(npiv, nfront, costw, costm, k471, k472, k475, k488, sym)
integer function mumps_bloc2_get_nslavesmin(slavef, k48, k821, k50, nfront, ncb, k375, k119)
integer function mumps_bloc2_get_nslavesmax(slavef, k48, k821, k50, nfront, ncb, k375, k119)
integer function mumps_reg_getkmax(keep821, ncb)
subroutine compute_blr_vcs(k472, ibcksz, maxsize, nass)
Definition lr_common.F:18
integer, dimension(:), pointer cv_frere
double precision cv_costw_layer0
integer, dimension(:), allocatable, save mem_distribtmp
subroutine mumps_get_idp1_proc(current_proc, idarch, ierr)
double precision, dimension(:), pointer cv_layerworkload
subroutine mumps_sort_msort(istat, dim, indx, val1, val2)
double precision, dimension(:), pointer cv_proc_workload
double precision, dimension(:), pointer cv_tcostw
integer, dimension(:), allocatable, save score
integer, dimension(:), pointer cv_nodetype
double precision cv_splitthresh
integer, dimension(:), pointer cv_info
subroutine mumps_alloc_allow_master(ierr)
integer, dimension(:), allocatable, save table_of_process
integer, parameter tsplit_beg
type(alloc_arraytype), dimension(:), pointer cv_layer_p2node
logical, dimension(:), allocatable, save allowed_nodes
subroutine mumps_compute_distrib(ierr, myrank, working_comm, mem_distrib)
double precision, dimension(:), pointer cv_ncostw
double precision, dimension(:), pointer cv_layerl0_sorted_costw
double precision cv_costm_upper
double precision cv_costm_total
double precision, dimension(:), pointer cv_ncostm
integer, dimension(:), pointer cv_icntl
double precision cv_costm_layer0
double precision, parameter cv_d_invalid
integer, parameter cv_invalid
subroutine mumps_fix_node_master(ierr)
double precision cv_l0wthresh
integer, parameter tsplit_last
integer, dimension(:), pointer cv_nfsiz
integer, parameter cv_equilib_flops
integer, dimension(:), pointer cv_sizeofblocks
subroutine, public mumps_end_arch_cv()
type(procs4node_t), dimension(:), pointer cv_prop_map
double precision, dimension(:), pointer cv_proc_memused
integer, dimension(:), pointer cv_nodelayer
subroutine, public mumps_init_arch_parameters(total_comm, working_comm, keep69, par, nbslaves, mem_distrib, informerr)
double precision, dimension(:), pointer cv_proc_maxmem
double precision, dimension(:), pointer cv_tcostm
integer, dimension(:), pointer cv_fils
integer, parameter cv_error_memdeloc
integer, parameter cv_error_memalloc
subroutine mumps_sort_mmerge(start1st, end1st, dim1, start2nd, end2nd, dim2, indx, val, istat)
double precision, dimension(:), pointer cv_proc_maxwork
integer, dimension(:,:), pointer, save cv_cand
integer, dimension(:), pointer cv_keep
integer, dimension(:), pointer cv_layerl0_array
double precision, dimension(:), pointer cv_layermemused
integer, parameter cv_equilib_mem
subroutine, public mumps_distribute(n, slavef, icntl, info, ne, nfsiz, frere, fils, keep, keep8, procnode, ssarbr, nbsa, peak, istat, sizeofblocks, lsizeofblocks)
double precision cv_costw_total
integer, dimension(:), pointer, save cv_par2_nodes
integer, dimension(:), pointer cv_proc_sorted
integer, dimension(:), allocatable, save mem_distribmpi
integer(8), dimension(:), pointer cv_keep8
double precision cv_costw_upper
subroutine mumps_fix_table_of_process(ierr)
integer, parameter tsplit_mid
integer, dimension(:), pointer cv_procnode
integer, dimension(:), pointer cv_depth
type(splitting_data) cv_last_splitting
integer, dimension(:), pointer cv_ne
subroutine, public mumps_return_candidates(par2_nodes, cand, istat)
integer, dimension(:), pointer cv_ssarbr
PORD_INT myrank(void)
integer function mumps_encode_tpn_iproc(tpn, iproc, k199)