90 INTEGER,
INTENT(IN) :: NPARTSASS, NPARTSCB
91 INTEGER,
POINTER,
DIMENSION(:) :: CUT
92 INTEGER :: LOC_MIN_ASS, LOC_MIN_CB, LOC_MAX_ASS, LOC_MAX_CB,
93 & LOC_TOT_ASS, LOC_TOT_CB
94 DOUBLE PRECISION :: LOC_AVG_ASS, LOC_AVG_CB
105 loc_avg_ass = ( loc_tot_ass * loc_avg_ass
106 & + cut(i+1) - cut(i) )
107 & / (loc_tot_ass + 1)
108 loc_tot_ass = loc_tot_ass + 1
109 IF (cut(i+1) - cut(i) .LE. loc_min_ass)
THEN
110 loc_min_ass = cut(i+1) - cut(i)
112 IF (cut(i+1) - cut(i) .GE. loc_max_ass)
THEN
113 loc_max_ass = cut(i+1) - cut(i)
116 DO i = npartsass+1,npartsass+npartscb
117 loc_avg_cb = ( loc_tot_cb * loc_avg_cb
118 & + cut(i+1) - cut(i) )
120 loc_tot_cb = loc_tot_cb + 1
121 IF (cut(i+1) - cut(i) .LE. loc_min_cb)
THEN
122 loc_min_cb = cut(i+1) - cut(i)
124 IF (cut(i+1) - cut(i) .GE. loc_max_cb)
THEN
125 loc_max_cb = cut(i+1) - cut(i)
140 DOUBLE PRECISION,
INTENT(IN) :: F
141 LOGICAL,
INTENT(IN) :: CB
153 & CB_COMPRESS, FRSWAP)
156 DOUBLE PRECISION :: HR_COST,BUILDQ_COST,
158 LOGICAL,
OPTIONAL :: REC_ACC, CB_COMPRESS, FRSWAP
162 hr_cost = dble(k*k*k/3_8 + 4_8*k*m*n - (2_8*m+n)*k*k)
164 buildq_cost = dble(2_8*k*k*m - k*k*k)
168 hr_and_buildq_cost = hr_cost + buildq_cost
172 IF (
present(rec_acc))
THEN
180 IF (
present(cb_compress))
THEN
181 IF (cb_compress)
THEN
188 IF (
present(frswap))
THEN
200 INTEGER,
INTENT(IN) :: LorU
201 DOUBLE PRECISION :: LR_COST, FR_COST, LR_GAIN
203 fr_cost = dble(lrb%M*lrb%N*lrb%N)
205 lr_cost = dble(lrb%K*lrb%N*lrb%N)
210 fr_cost = dble(lrb%M-1)*dble(lrb%N*lrb%N)
212 lr_cost = dble(lrb%N-1)*dble(lrb%N*lrb%K)
217 lr_gain = fr_cost - lr_cost
224 & MIDBLK_COMPRESS, RANK_IN, BUILDQ,
225 & IS_SYMDIAG, LUA_ACTIVATED, REC_ACC)
226 TYPE(
lrb_type),
INTENT(IN) :: LRB1,LRB2
227 LOGICAL,
INTENT(IN) :: BUILDQ, IS_SYMDIAG, LUA_ACTIVATED
228 INTEGER,
INTENT(IN) :: RANK_IN,
229 LOGICAL,
INTENT(IN),
OPTIONAL :: REC_ACC
230 DOUBLE PRECISION :: COST_FR, COST_LR, COST_LRLR1, COST_LRLR2,
231 & cost_lrlr3, cost_frlr, cost_frfr,
232 & cost_compress, cost_lr_and_compress, lr_gain
233 DOUBLE PRECISION :: M1,N1,K1,M2,N2,K2,RANK
234 LOGICAL :: REC_ACC_LOC
247 cost_compress = 0.0d0
248 IF (
present(rec_acc))
THEN
249 rec_acc_loc = rec_acc
251 rec_acc_loc = .false.
253 IF ((.NOT.lrb1%ISLR).AND.(.NOT.lrb2%ISLR))
THEN
254 cost_frfr = 2.0d0*m1*m2*n1
255 cost_lr = 2.0d0*m1*m2*n1
256 cost_fr = 2.0d0*m1*m2*n1
257 ELSEIF (lrb1%ISLR.AND.(.NOT.lrb2%ISLR))
THEN
258 cost_frlr = 2.0d0*k1*m2*n1
259 cost_lrlr3 = 2.0d0*m1*m2*k1
260 cost_lr = cost_frlr + cost_lrlr3
261 cost_fr = 2.0d0*m1*m2*n1
262 ELSEIF ((.NOT.lrb1%ISLR).AND.lrb2%ISLR)
THEN
263 cost_frlr = 2.0d0*m1*k2*n1
264 cost_lrlr3 = 2.0d0*m1*m2*k2
265 cost_lr = cost_frlr + cost_lrlr3
266 cost_fr = 2.0d0*m1*m2*n1
268 IF (midblk_compress.GE.1)
THEN
269 cost_compress = rank*rank*rank/3.0d0 +
271 & (2.0d0*k1+k2)*rank*rank
273 cost_compress = cost_compress + 4.0d0*rank*rank*k1
277 cost_lrlr1 = 2.0d0*k1*k2*n1
278 IF ((midblk_compress.GE.1).AND.buildq
THEN
279 cost_lrlr2 = 2.0d0*k1*m1*rank + 2.0d0*k2*m2*rank
280 cost_lrlr3 = 2.0d0*m1*m2*rank
283 cost_lrlr2 = 2.0d0*k1*m1*k2
284 cost_lrlr3 = 2.0d0*m1*m2*k2
286 cost_lrlr2 = 2.0d0*k1*m2*k2
287 cost_lrlr3 = 2.0d0*m1*m2*k1
290 cost_lr = cost_lrlr1 + cost_lrlr2 + cost_lrlr3
291 cost_fr = 2.0d0*m1*m2*n1
294 cost_fr = cost_fr/2.0d0
295 cost_lrlr3 = cost_lrlr3/2.0d0
296 cost_frfr = cost_frfr/2.0d0
297 cost_lr = cost_lr - cost_lrlr3 - cost_frfr
299 IF (lua_activated)
THEN
300 cost_lr = cost_lr - cost_lrlr3
302 IF (rec_acc_loc)
THEN
303 cost_lr_and_compress = cost_lr + cost_compress
309 IF (.NOT.rec_acc_loc)
THEN
313 lr_gain = cost_fr - cost_lr
321 INTEGER,
INTENT(IN) :: NIV
322 DOUBLE PRECISION :: FLOP_COST
323 flop_cost = 2.0d0*dble(lrb%M)*dble(lrb%N)*dble(lrb%K)
330 & NPROW, NPCOL, MYID)
331 INTEGER,
intent(in) :: KEEP50, NFRONT, NPIV,
333 DOUBLE PRECISION :: COST, COST_PER_PROC
334 INTEGER,
PARAMETER :: LEVEL3 = 3
337 cost_per_proc = dble(int( cost,8) / int(nprow * npcol,8))
345 TYPE (CMUMPS_STRUC),
TARGET :: id
410 INTEGER,
INTENT(IN) :: NASS, NCB, SYM, NELIM
411 DOUBLE PRECISION :: MRY
415 mry = dble(npiv)*(dble(npiv)+1.d0)/2.d0
416 & + dble(npiv)*dble(ncb+nelim)
418 mry = dble(npiv)*dble(npiv)
419 & + 2.0d0*dble(npiv)*dble(ncb+nelim)
427 INTEGER,
INTENT(IN) :: NROWS, NCOLS, SYM
428 DOUBLE PRECISION :: MRY
430 mry = dble(ncols)*dble(nrows)
432 mry = dble(ncols-nrows)*dble(nrows) +
433 & dble(nrows)*dble(nrows+1)/2.d0
443 DOUBLE PRECISION :: LRGAIND
444 lrgaind = dble(lrb%M*lrb%N-(lrb%M+lrb%N)*lrb%K)
452 INTEGER,
INTENT(IN) :: NBBLOCKS
453 TYPE(LRB_TYPE),
INTENT(IN) :: BLR_PANEL(:)
454 DOUBLE PRECISION :: MRY
458 IF (blr_panel(i)%ISLR)
THEN
459 mry = mry + dble(blr_panel(i)%M*blr_panel(i)%N
460 & - blr_panel(i)%K*(blr_panel(i)%M + blr_panel(i)%N))
469 INTEGER,
INTENT(IN) :: NFRONT, SYM, NASS, NPIV, NIV
470 DOUBLE PRECISION :: FLOP
471 CALL MUMPS_GET_FLOPS_COST(NFRONT, NPIV, NASS,
478 & NASS1, KEEP50, INODE)
479 INTEGER,
INTENT(IN) :: NROW1, NCOL1, KEEP50, , INODE
480 DOUBLE PRECISION :: NROW2, NCOL2, NASS2
481 DOUBLE PRECISION :: FLOP
485 IF (keep50.EQ.0)
THEN
486 flop = nrow2*nass2*nass2
487 & + 2.0d0*nrow2*nass2*(ncol2-nass2)
491 & + nrow2*nass2*nrow2
492 & + 2.0d0*nrow2*nass2*(ncol2-nass2-nrow2)
501 INTEGER,
INTENT(IN) :: NFRONT, NPIV, NASS, SYM, NIV
502 DOUBLE PRECISION :: FLOP_FAC
504 & sym, niv, flop_fac)
512 INTEGER,
INTENT(IN) :: NROW1, NCOL1, KEEP50, NASS1, INODE
513 DOUBLE PRECISION :: NROW2, NCOL2, NASS2
514 DOUBLE PRECISION :: FLOP
518 IF (keep50.EQ.0)
THEN
519 flop = nrow2*nass2*nass2
520 & + 2.0d0*nrow2*nass2*(ncol2-nass2)
524 & + nrow2*nass2*nrow2
525 & + 2.0d0*nrow2*nass2*(ncol2-nass2-nrow2)
533 & FLOP_NUMBER, NB_ENTRIES_FACTOR_withLR,
535 INTEGER(8),
INTENT(IN) :: NB_ENTRIES_FACTOR
536 INTEGER,
INTENT(IN) :: MPG
537 LOGICAL,
INTENT(IN) ::
538 REAL,
INTENT(IN) :: FLOP_NUMBER
539 INTEGER(8),
INTENT(OUT) ::
540 & NB_ENTRIES_FACTOR_withLR
541 IF (nb_entries_factor < 0)
THEN
542 IF (prokg.AND.mpg.GT.0)
THEN
543 WRITE(mpg,*)
"NEGATIVE NUMBER OF ENTRIES IN FACTOR"
544 WRITE(mpg,*)
"===> OVERFLOW ?"
556 nb_entries_factor_withlr = nb_entries_factor -
558 IF (nb_entries_factor.EQ.0)
THEN
574 & DEPTH, BCKSZ, NASSMIN, NFRONTMIN, SYM, K486,
575 & K472, K475, K478, K480, K481, K483, K484,
577 & NBTREENODES, NPROCS, MPG, PROKG)
578 INTEGER,
INTENT(IN) :: LOCAL,K489,DEPTH, N,
579 & ICNTL36, BCKSZ,NASSMIN,
580 & nfrontmin, k486, nbtreenodes, mpg,
581 & k472, k475, k478, k480, k481, k483, k484,
583 INTEGER(8),
INTENT(IN) :: K8110, K849
584 LOGICAL,
INTENT(IN) :: PROKG
587 prok = (prokg.AND.(mpg.GE.0))
590 &
'-------------- Beginning of BLR statistics -------------------',
593 &
' ICNTL(36) BLR variant = ', icntl36
594 WRITE(mpg,
'(A,ES8.1)')
595 &
' CNTL(7) Dropping parameter controlling accuracy = ',
598 &
' Statistics after BLR factorization :'
600 &
' Number of BLR fronts = ',
603 IF (prok)
WRITE(mpg,
'(A,F8.1,A)')
604 &
' Fraction of factors in BLR fronts =',
608 &
' Statistics on the number of entries in factors :'
609 WRITE(mpg,
'(A,ES10.3,A,F5.1,A)')
610 &
' INFOG(29) Theoretical nb of entries in factors ='
611 & ,real(k8110),
' (100.0%)'
612 WRITE(mpg,
'(A,ES10.3,A,F5.1,A)')
613 &
' INFOG(35) Effective nb of entries (% of INFOG(29)) ='
615 & ,real(100)*(real(k849)/real(
max(k8110,1_8)))
618 IF (prok)
WRITE(mpg,
'(A)')
619 &
' Statistics on operation counts (OPC):'
626 WRITE(mpg,
'(A,ES10.3,A,F5.1,A)')
627 &
' RINFOG(3) Total theoretical operations counts ='
629 WRITE(mpg,
'(A,ES10.3,A,F5.1,A)')
630 &
' RINFOG(14) Total effective OPC (% of RINFOG(3)) ='
635 IF (prok)
WRITE(mpg,
'(A,A)')
636 &
'-------------- End of BLR statistics -------------------------',
subroutine mumps_get_flops_cost(nfront, npiv, nass, keep50, level, cost)
double precision flop_accum_compress
double precision flop_update_fr
double precision time_cb_compress
double precision factor_processed_fraction
double precision flop_facto_lr
double precision time_update_frlr
double precision time_lrana_gethalo
double precision flop_midblk_compress
double precision flop_update_frlr
double precision time_compress
double precision time_decomp_locasm2
double precision flop_decompress
double precision time_decomp
integer max_blocksize_ass
double precision time_frfronts
double precision avg_flop_facto_lr
subroutine upd_flop_facto_fr(nfront, nass, npiv, sym, niv)
double precision time_decomp_ucfs
double precision flop_trsm
double precision max_flop_facto_lr
double precision flop_panel
double precision flop_update_lr
subroutine upd_mry_cb_fr(nrows, ncols, sym)
double precision time_update
double precision mry_cb_lrgain
double precision flop_frswap_compress
double precision time_update_frfr
double precision flop_cb_compress
double precision time_frswap_compress
double precision time_decomp_asm1
double precision global_mry_lpro_compr
double precision global_mry_ltot_compr
double precision time_upd_nelim
double precision flop_trsm_lr
double precision flop_trsm_fr
double precision time_decomp_asms2m
double precision time_update_lrlr1
double precision mry_lu_lrgain
subroutine stats_compute_flop_slave_type2(nrow1, ncol1, nass1, keep50, inode)
double precision time_midblk_compress
subroutine upd_mry_cb_lrgain(lrb)
double precision time_lr_module
double precision total_flop
integer min_blocksize_ass
integer(kind=8) factor_size
double precision flop_update_lrlr2
subroutine saveandwrite_gains(local, k489, dkeep, n, icntl36, depth, bcksz, nassmin, nfrontmin, sym, k486, k472, k475, k478, k480, k481, k483, k484, k8110, k849, nbtreenodes, nprocs, mpg, prokg)
subroutine upd_flop_compress(lr_b, rec_acc, cb_compress, frswap)
double precision flop_frfronts
double precision time_fac_sq
subroutine upd_mry_lu_fr(nass, ncb, sym, nelim)
double precision time_fac_mq
double precision min_flop_facto_lr
integer total_nblocks_ass
double precision mry_cb_fr
double precision avg_blocksize_cb
double precision time_decomp_maplig1
double precision flop_lrgain
double precision time_lrtrsm
double precision flop_update_lrlr3
subroutine upd_flop_update(lrb1, lrb2, midblk_compress, rank_in, buildq, is_symdiag, lua_activated, rec_acc)
double precision flop_update_lrlr1
double precision time_diagcopy
double precision time_fac_i
double precision time_lrana_sepgrouping
subroutine upd_mry_lu_lrgain(blr_panel, nbblocks)
double precision flop_compress
double precision time_panel
subroutine upd_flop_trsm(lrb, loru)
subroutine compute_global_gains(nb_entries_factor, flop_number, nb_entries_factor_withlr, prokg, mpg)
double precision time_update_lrlr2
double precision time_update_lrlr3
subroutine init_stats_global(id)
double precision flop_update_frfr
subroutine upd_flop_frfronts(nfront, npiv, nass, sym, niv)
double precision time_lrana_gnew
double precision flop_facto_fr
subroutine upd_flop_root(keep50, nfront, npiv, nprow, npcol, myid)
double precision avg_blocksize_ass
subroutine upd_flop_decompress(f, cb)
double precision mry_lu_fr
subroutine collect_blocksizes(cut, npartsass, npartscb)
double precision time_frtrsm
double precision flop_cb_decompress
subroutine upd_flop_frfront_slave(nrow1, ncol1, nass1, keep50, inode)
double precision time_lrana_lrgrouping
double precision time_decomp_asms2s
subroutine upd_flop_update_lrlr3(lrb, niv)