OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
ana_omp_m.F File Reference

Go to the source code of this file.

Modules

module  mumps_ana_omp_m

Functions/Subroutines

subroutine mumps_ana_omp_m::mumps_ana_l0_omp (nb_threads, n, nsteps, sym, slavef, dad, frere, fils, nstk_steps, nd, step, procnode_steps, keep, keep8, myid_nodes, na, lna, arith, lpool_b_l0_omp, ipool_b_l0_omp, lpool_a_l0_omp, ipool_a_l0_omp, l_virt_l0_omp, virt_l0_omp, virt_l0_omp_mapping, l_phys_l0_omp, phys_l0_omp, perm_l0_omp, ptr_leafs_l0_omp, thread_la, info, icntl)
subroutine mumps_ana_initialize_l0_omp ()
subroutine l0_insert_node (dll, inode)
subroutine l0_insert_children (i_father)
subroutine l0_remove_node (inode)
logical function mumps_ana_accept_l0_omp ()
subroutine mumps_ana_finalize_l0_omp ()
subroutine mumps_ana_free_l0_workspace ()
subroutine read_bench (arith, k50)
subroutine cost_bench (npiv, nschur, nb_core, sym, cost)
recursive subroutine mumps_quick_sort_ipool_po (n, step, intlist, taille, lo, hi)
recursive subroutine mumps_quick_sort_phys_l0 (n, step, intlist, invperm, taille, lo, hi)
subroutine mumps_ana_omp_return ()

Function/Subroutine Documentation

◆ cost_bench()

subroutine mumps_ana_l0_omp::cost_bench ( integer, intent(in) npiv,
integer, intent(in) nschur,
integer, intent(in) nb_core,
integer, intent(in) sym,
double precision, intent(out) cost )
private

Definition at line 896 of file ana_omp_m.F.

897 IMPLICIT NONE
898 INTEGER, INTENT(IN) :: NPIV, NSCHUR, NB_CORE, SYM
899 DOUBLE PRECISION, INTENT(OUT) :: COST
900 INTEGER V, VV, S, SS
901 INTEGER LOW_INDEX_NPIV, LOW_INDEX_NSCHUR
902 INTEGER HIGH_INDEX_NPIV, HIGH_INDEX_NSCHUR
903 DOUBLE PRECISION :: APROX_COST_FLOPS, REAL_COST_FLOPS
904 IF (npiv .LE. 10) THEN
905 low_index_npiv = npiv
906 v = npiv
907 vv = npiv + 1
908 ELSEIF (npiv .LE. 100) THEN
909 low_index_npiv = 9 + npiv/10
910 v = (npiv/10)*10
911 vv = (npiv/10+1)*10
912 ELSEIF (npiv .LE. 1000) THEN
913 low_index_npiv = 18 + npiv/100
914 v = (npiv/100)*100
915 vv = (npiv/100+1)*100
916 ELSEIF (npiv .LE. 10000) THEN
917 low_index_npiv = 27 + npiv/1000
918 v = (npiv/1000)*1000
919 vv = (npiv/1000+1)*1000
920 ELSE
921 low_index_npiv = 37
922 v = (npiv/10000)*10000
923 vv = (npiv/10000+1)*10000
924 END IF
925 IF (nschur .LE. 10) THEN
926 low_index_nschur = nschur + 1
927 s = nschur
928 ss = nschur + 1
929 ELSEIF (nschur .LE. 100) THEN
930 low_index_nschur = 10 + nschur/10
931 s = (nschur/10)*10
932 ss = (nschur/10+1)*10
933 ELSEIF (nschur .LE. 1000) THEN
934 low_index_nschur = 19 + nschur/100
935 s = (nschur/100)*100
936 ss = (nschur/100+1)*100
937 ELSEIF (nschur .LE. 10000) THEN
938 low_index_nschur = 28 + nschur/1000
939 s = (nschur/1000)*1000
940 ss = (nschur/1000+1)*1000
941 ELSE
942 low_index_nschur = 38
943 s = (nschur/10000)*10000
944 ss = (nschur/10000+1)*10000
945 END IF
946 IF (v .LT. 10000) THEN
947 IF (s .LT. 10000) THEN
948 high_index_npiv = low_index_npiv + 1
949 high_index_nschur = low_index_nschur + 1
950 cost = (bench(low_index_npiv, low_index_nschur, nb_core)
951 & *(vv - npiv)*(ss - nschur)
952 & +bench(low_index_npiv, high_index_nschur, nb_core)
953 & *(vv - npiv)*(nschur - s)
954 & +bench(high_index_npiv, low_index_nschur, nb_core)
955 & *(npiv - v)*(ss - nschur)
956 & +bench(high_index_npiv, high_index_nschur, nb_core)
957 & *(npiv - v)*(nschur - s))
958 & /((vv - v)*(ss - s))
959 ELSE
960 high_index_npiv = low_index_npiv + 1
961 high_index_nschur = low_index_nschur
962 cost = (bench(low_index_npiv, low_index_nschur, nb_core)
963 & *(vv - npiv)
964 & +bench(high_index_npiv, low_index_nschur, nb_core)
965 & *(npiv - v))
966 & /(vv - v)
967 CALL mumps_get_flops_cost ( npiv+nschur, npiv, npiv,
968 & sym, 1, real_cost_flops )
969 CALL mumps_get_flops_cost ( v+s, v, v,
970 & sym, 1, aprox_cost_flops )
971 cost = cost * (real_cost_flops/aprox_cost_flops)
972 END IF
973 ELSE
974 IF (nschur < 10000) THEN
975 high_index_npiv = low_index_npiv
976 high_index_nschur = low_index_nschur + 1
977 cost = (bench(low_index_npiv, low_index_nschur, nb_core)
978 & *(ss - nschur)
979 & +bench(low_index_npiv, high_index_nschur, nb_core)
980 & *(nschur - s))
981 & /(ss - s)
982 CALL mumps_get_flops_cost ( npiv+nschur, npiv, npiv,
983 & sym, 1, real_cost_flops )
984 CALL mumps_get_flops_cost ( v+s, v, v,
985 & sym, 1, aprox_cost_flops )
986 cost = cost * (real_cost_flops/aprox_cost_flops)
987 ELSE
988 high_index_npiv = low_index_npiv
989 high_index_nschur = low_index_nschur
990 cost = (bench(low_index_npiv, low_index_nschur, nb_core))
991 CALL mumps_get_flops_cost ( npiv+nschur, npiv, npiv,
992 & sym, 1, real_cost_flops )
993 CALL mumps_get_flops_cost ( v+s, v, v,
994 & sym, 1, aprox_cost_flops )
995 cost = cost * (real_cost_flops/aprox_cost_flops)
996 END IF
997 END IF
subroutine mumps_get_flops_cost(nfront, npiv, nass, keep50, level, cost)
Definition estim_flops.F:74

◆ l0_insert_children()

subroutine mumps_ana_l0_omp::l0_insert_children ( integer, intent(in) i_father)
private

Definition at line 463 of file ana_omp_m.F.

464 IMPLICIT NONE
465 INTEGER, INTENT ( IN ) :: I_FATHER
466 INTEGER :: I_SON, IERR
467 TYPE ( IDLL_T ), POINTER :: SON_DLL
468 TYPE ( IDLL_NODE_T ), POINTER :: IDLL_NODE
469 ierr = idll_create( son_dll )
470 i_son = i_father
471 DO WHILE ( i_son .GT. 0 )
472 i_son = fils( i_son )
473 END DO
474 i_son = - i_son
475 IF ( i_son .EQ. 0 ) THEN
476 RETURN
477 END IF
478 DO WHILE ( i_son .GT. 0 )
479 CALL l0_insert_node ( son_dll, i_son )
480 i_son = frere( step( i_son ) )
481 END DO
482 ierr = idll_iterator_begin( l0_omp_dll, idll_node )
483 ierr = idll_pop_front( son_dll, i_son )
484 IF ( ierr .NE. 0 ) THEN
485 GOTO 190
486 END IF
487 IF ( .NOT. associated( idll_node ) ) THEN
488 DO
489 ierr = idll_push_back( l0_omp_dll, i_son )
490 ierr = idll_pop_front( son_dll, i_son )
491 IF ( ierr .NE. 0 ) THEN
492 GOTO 190
493 END IF
494 END DO
495 ELSE
496 DO
497 IF ( costs_mono_thread( step( i_son )) .LE.
498 & costs_mono_thread( step( idll_node%ELMT ) ) ) THEN
499 IF ( associated ( idll_node%NEXT ) ) THEN
500 idll_node => idll_node%NEXT
501 ELSE
502 ierr = idll_push_back(l0_omp_dll, i_son)
503 ierr = idll_pop_front( son_dll, i_son )
504 IF ( ierr .NE. 0 ) THEN
505 GOTO 190
506 END IF
507 END IF
508 ELSE
509 ierr = idll_insert_before(l0_omp_dll, idll_node,i_son)
510 ierr = idll_pop_front( son_dll, i_son )
511 IF ( ierr .NE. 0 ) THEN
512 GOTO 190
513 END IF
514 END IF
515 END DO
516 END IF
517190 CONTINUE
518 ierr = idll_destroy( son_dll )
519 RETURN
subroutine l0_insert_node(dll, inode)
Definition ana_omp_m.F:441

◆ l0_insert_node()

subroutine mumps_ana_l0_omp::l0_insert_node ( type ( idll_t ), pointer dll,
integer, intent(in) inode )
private

Definition at line 440 of file ana_omp_m.F.

441 IMPLICIT NONE
442 INTEGER, INTENT ( IN ) :: INODE
443 TYPE ( IDLL_T ), POINTER :: DLL
444 INTEGER :: IERR
445 TYPE ( IDLL_NODE_T ), POINTER :: IDLL_NODE
446 ierr = idll_iterator_begin( dll, idll_node )
447 DO WHILE ( associated ( idll_node ) )
448 IF ( costs_mono_thread( step( idll_node%ELMT ) )
449 & .GT.
450 & costs_mono_thread( step( inode ) ) ) THEN
451 idll_node => idll_node%NEXT
452 ELSE
453 EXIT
454 END IF
455 END DO
456 IF ( .NOT. associated ( idll_node ) ) THEN
457 ierr = idll_push_back(dll, inode)
458 ELSE
459 ierr = idll_insert_before(dll, idll_node, inode)
460 ENDIF
461 RETURN

◆ l0_remove_node()

subroutine mumps_ana_l0_omp::l0_remove_node ( integer, intent(out) inode)
private

Definition at line 521 of file ana_omp_m.F.

522 IMPLICIT NONE
523 INTEGER, INTENT ( OUT ) :: INODE
524 INTEGER :: I_SON, IERR, NPIV
525 ierr = idll_pop_front( l0_omp_dll, inode )
526 i_son = inode
527 npiv = 0
528 DO WHILE ( i_son .GT. 0 )
529 npiv = npiv + 1
530 i_son = fils( i_son )
531 END DO
532 i_son = - i_son
533 IF (keep(50) .EQ. 0) THEN
534 factor_size_under_l0 = factor_size_under_l0 -
535 & int(npiv, 8) * int(2 * nd(step(inode)) - npiv, 8)
536 ELSE
537 factor_size_under_l0 = factor_size_under_l0 -
538 & int(npiv, 8) * int(nd(step(inode)), 8)
539 ENDIF
540 IF ( i_son .EQ. 0 ) THEN
541 ierr = idll_push_back( leafs_above_l0_omp_dll, inode )
542 inode = -inode
543 ELSE IF (inode .GT. 0) THEN
544 cost_above = cost_above + costs_multi_thread(step( inode ))
545 END IF
546 RETURN

◆ mumps_ana_accept_l0_omp()

logical function mumps_ana_l0_omp::mumps_ana_accept_l0_omp
private

Definition at line 548 of file ana_omp_m.F.

549 LOGICAL :: MUMPS_ANA_ACCEPT_L0_OMP
550 INTEGER :: I, I_LESS_CHARGED, IERR, NB_IN_L0
551 DOUBLE PRECISION :: LIGHTEST_CHARGE, HEAVIEST_CHARGE
552 TYPE ( IDLL_NODE_T ), POINTER :: IDLL_NODE
553 threads_charge = 0.0d0
554 nb_in_l0 = 0
555 ierr = idll_iterator_begin( l0_omp_dll, idll_node )
556 DO WHILE ( associated ( idll_node ) )
557 nb_in_l0 = nb_in_l0 + 1
558 i_less_charged = 1
559 lightest_charge = threads_charge( 1 )
560 DO i = 2, nb_threads
561 IF ( threads_charge( i ) .LT. lightest_charge ) THEN
562 i_less_charged = i
563 lightest_charge = threads_charge( i )
564 END IF
565 END DO
566 threads_charge( i_less_charged ) =
567 & threads_charge( i_less_charged )
568 & +
569 & costs_mono_thread( step( idll_node%ELMT ) )
570 idll_node => idll_node%NEXT
571 END DO
572 nb_max_in_l0_acceptl0 = max(nb_max_in_l0_acceptl0, nb_in_l0)
573 lightest_charge = threads_charge( 1 )
574 heaviest_charge = threads_charge( 1 )
575 DO i = 2, nb_threads
576 IF ( threads_charge( i ) .LT. lightest_charge ) THEN
577 lightest_charge = threads_charge( i )
578 ELSEIF ( threads_charge( i ) .GT. heaviest_charge ) THEN
579 heaviest_charge = threads_charge( i )
580 END IF
581 END DO
582 cost_under = heaviest_charge
583 IF (keep(403) .EQ. 0) THEN
585 & (
586 & dble(lightest_charge)/(dble(heaviest_charge)+1.d-12)
587 & .GT.thresh_equilib .AND.
588 &
589 & factor_size_under_l0 .LE.
590 & factor_size_per_mpi * int(thresh_mem,8) / 100_8
591 &
592 & )
593 & .OR.
594 & ( nb_in_l0 .LT. nb_max_in_l0_acceptl0 .AND.
595 & lightest_charge .EQ. 0.0d0 )
596 & .OR. ( nb_in_l0 .EQ. 0 )
598 IF (associated(phys_l0_omp)) THEN
599 DEALLOCATE(phys_l0_omp)
600 nullify(phys_l0_omp)
601 ENDIF
602 ierr = idll_2_array( l0_omp_dll, phys_l0_omp, l_phys_l0_omp )
603 IF (ierr .EQ. -2) THEN
604 info(1) = -7
605 info(2) = l_phys_l0_omp
606 RETURN
607 ENDIF
608 END IF
609 ELSE
610 IF (cost_under + cost_above .LT. cost_total_best) THEN
611 IF (associated(phys_l0_omp)) THEN
612 DEALLOCATE(phys_l0_omp)
613 nullify(phys_l0_omp)
614 ENDIF
615 ierr = idll_2_array( l0_omp_dll, phys_l0_omp, l_phys_l0_omp )
616 cost_total_best = cost_under + cost_above
617 nb_repeat_acceptl0 = 100
618 END IF
619 nb_repeat_acceptl0 = nb_repeat_acceptl0- 1
620 mumps_ana_accept_l0_omp = (nb_repeat_acceptl0 .EQ. 0)
621 END IF
622 RETURN
logical function mumps_ana_accept_l0_omp()
Definition ana_omp_m.F:549
#define max(a, b)
Definition macros.h:21

◆ mumps_ana_finalize_l0_omp()

subroutine mumps_ana_l0_omp::mumps_ana_finalize_l0_omp
private

Definition at line 624 of file ana_omp_m.F.

625 IMPLICIT NONE
626 INTEGER :: INODE, OLD_INODE, I, J, K, LEAF, IERR
627 DOUBLE PRECISION :: LIGHTEST_CHARGE
628 INTEGER :: I_LESS_CHARGED
629 INTEGER(8) :: SUM_CB, MAX_MEM, MAX_MEM_ALL_THREADS
630 INTEGER :: MAX_TASK_PER_THREAD
631 TYPE ( IDLL_NODE_T ), POINTER :: IDLL_NODE
632 INTEGER, DIMENSION(:,:), ALLOCATABLE :: THREADS_TASKS
633 INTEGER, DIMENSION(:), ALLOCATABLE :: NB_TASK_PER_THREAD
634 INTEGER, DIMENSION(:), ALLOCATABLE :: INV_PERM_L0_OMP
635 EXTERNAL :: mumps_get_pool_length
636 INTEGER :: MUMPS_GET_POOL_LENGTH
637 IF (keep(402) .EQ. 0) THEN
638 l_virt_l0_omp = nb_threads + 1
639 ELSE
640 l_virt_l0_omp = l_phys_l0_omp + 1
641 END IF
642 lpool_a_l0_omp = mumps_get_pool_length(na(1),keep(1),keep8(1))
643 ALLOCATE ( virt_l0_omp( max(l_virt_l0_omp,1) ),
644 & virt_l0_omp_mapping( max(l_virt_l0_omp,1) ),
645 & stat=ierr )
646 IF(ierr.GT.0) THEN
647 info(1)=-7
648 info(2)=2*max(l_virt_l0_omp,1)
649 IF (lpok) WRITE(lp,150) 'id%VIRT_L0_OMP[_MAPPING]'
650 GOTO 300
651 ENDIF
652 ALLOCATE ( perm_l0_omp( max(l_phys_l0_omp,1) ), stat=ierr )
653 IF(ierr.GT.0) THEN
654 info(1)=-7
655 info(2)=max(l_phys_l0_omp,1)
656 IF (lpok) WRITE(lp,150) 'id%PERM_L0_OMP'
657 GOTO 300
658 ENDIF
659 ALLOCATE ( ptr_leafs_l0_omp( l_phys_l0_omp + 1 ), stat=ierr )
660 IF(ierr.GT.0) THEN
661 info(1)=-7
662 info(2)=max(l_phys_l0_omp,1)
663 IF (lpok) WRITE(lp,150) 'id%PTR_LEAFS_L0_OMP'
664 GOTO 300
665 ENDIF
666 ALLOCATE ( ipool_a_l0_omp( lpool_a_l0_omp ), stat=ierr )
667 IF(ierr.GT.0) THEN
668 info(1)=-7
669 info(2)=lpool_a_l0_omp
670 IF (lpok) WRITE(lp,150) 'id%IPOOL_A_L0_OMP'
671 GOTO 300
672 ENDIF
673 ALLOCATE ( nb_task_per_thread( nb_threads ), stat=ierr )
674 IF(ierr.GT.0) THEN
675 info(1)=-7
676 info(2)=nb_threads
677 IF (lpok) WRITE(lp,150) 'NB_TASK_PER_THREAD'
678 GOTO 300
679 ENDIF
680 ALLOCATE ( inv_perm_l0_omp( l_phys_l0_omp ), stat=ierr )
681 IF(ierr.GT.0) THEN
682 WRITE(*,*) "Allocation Error in MUMPS_ANA_FINALIZE_L0_OMP"
683 CALL mumps_abort()
684 ENDIF
685 nb_task_per_thread = 0
686 threads_charge = 0.0d0
687 DO i = 1, l_phys_l0_omp
688 i_less_charged = 1
689 lightest_charge = threads_charge( 1 )
690 DO j = 2, nb_threads
691 IF ( threads_charge( j ) .LT. lightest_charge ) THEN
692 i_less_charged = j
693 lightest_charge = threads_charge( j )
694 IF (threads_charge( j ) .EQ. 0) THEN
695 EXIT
696 ENDIF
697 END IF
698 END DO
699 nb_task_per_thread( i_less_charged ) =
700 & nb_task_per_thread( i_less_charged ) + 1
701 IF (keep(402) .NE. 0) THEN
702 virt_l0_omp_mapping(i) = i_less_charged
703 ENDIF
704 threads_charge( i_less_charged ) =
705 & threads_charge( i_less_charged )
706 & +
707 & costs_mono_thread( step( phys_l0_omp( i ) ) )
708 END DO
709 IF (keep(402) .EQ. 0) THEN
710 DO i = 1, nb_threads
711 virt_l0_omp_mapping(i) = i
712 ENDDO
713 ENDIF
714 virt_l0_omp_mapping(l_virt_l0_omp) = -999999
715 max_task_per_thread = 0
716 DO i = 1, nb_threads
717 max_task_per_thread = max(max_task_per_thread,
718 & nb_task_per_thread( i ) )
719 END DO
720 ALLOCATE ( threads_tasks( nb_threads, max_task_per_thread ),
721 & stat=ierr )
722 IF(ierr.GT.0) THEN
723 info(1)=-7
724 info(2)=nb_threads*max_task_per_thread
725 IF (lpok) WRITE(lp,150) 'THREADS_TASK'
726 GOTO 300
727 ENDIF
728 nb_task_per_thread = 0
729 threads_charge = 0.0d0
730 threads_tasks = 0
731 DO i = 1, l_phys_l0_omp
732 i_less_charged = 1
733 lightest_charge = threads_charge( 1 )
734 DO j = 2, nb_threads
735 IF ( threads_charge( j ) .LT. lightest_charge ) THEN
736 i_less_charged = j
737 lightest_charge = threads_charge( j )
738 END IF
739 END DO
740 nb_task_per_thread( i_less_charged ) =
741 & nb_task_per_thread( i_less_charged ) + 1
742 threads_tasks( i_less_charged, nb_task_per_thread
743 & ( i_less_charged ) ) = phys_l0_omp( i )
744 threads_charge( i_less_charged ) =
745 & threads_charge( i_less_charged )
746 & +
747 & costs_mono_thread( step( phys_l0_omp( i ) ) )
748 END DO
749 max_mem_all_threads = 0_8
750 DO i = 1, nb_threads
751 sum_cb = 0_8
752 max_mem = 0_8
753 DO j = 1, nb_task_per_thread( i )
754 max_mem = max( max_mem, subtree_memory( step(
755 & threads_tasks(i,j) ) ) + sum_cb )
756 sum_cb = sum_cb
757 & +schur_memory(step(threads_tasks(i,j)))
758 & +subtree_factor_memory(
759 & step(threads_tasks(i,j)))
760 END DO
761 max_mem = max( max_mem, sum_cb )
762 IF (keep(402) .EQ. 0) THEN
763 threads_charge( i ) = dble(max_mem)
764 END IF
765 max_mem_all_threads = max( max_mem_all_threads, max_mem )
766 END DO
767 max_mem_all_threads = ( max_mem_all_threads
768 & * int(100 + keep(12),8) ) / 100_8
769 thread_la = max(max_mem_all_threads,6_8)
770 IF (keep(402) .EQ. 0) THEN
771 k = 1
772 DO i = 1, nb_threads
773 virt_l0_omp(i) = k
774 DO j = 1, nb_task_per_thread( i )
775 phys_l0_omp(k) = threads_tasks(i,j)
776 k = k + 1
777 END DO
778 END DO
779 virt_l0_omp(nb_threads+1) = k
780 ELSE
781 DO i = 1, l_virt_l0_omp
782 virt_l0_omp(i) = i
783 END DO
784 END IF
785 DO i = 1, l_phys_l0_omp
786 inv_perm_l0_omp( i ) = i
787 END DO
788 IF ( l_phys_l0_omp .GT. 1 ) THEN
789 CALL mumps_quick_sort_phys_l0( n, step(1), phys_l0_omp(1),
790 & inv_perm_l0_omp, l_phys_l0_omp, 1, l_phys_l0_omp )
791 ENDIF
792 DO i = 1, l_phys_l0_omp
793 perm_l0_omp( inv_perm_l0_omp( i ) ) = i
794 END DO
795 j = nbleaf_myid
796 ptr_leafs_l0_omp ( 1 ) = j
797 DO i = 1, l_phys_l0_omp
798 old_inode = 0
799 inode = phys_l0_omp( i )
800 DO WHILE ( inode .NE. 0 )
801 old_inode = inode
802 DO WHILE ( inode .GT. 0 )
803 inode = fils( inode )
804 END DO
805 inode = - inode
806 END DO
807 DO WHILE ( ipool_b_l0_omp( j ) .NE. old_inode )
808 j = j - 1
809 END DO
810 j = j - 1
811 ptr_leafs_l0_omp( i + 1 ) = j
812 END DO
813 cp_nstk_steps(:) = nstk_steps(:)
814 ipool_a_l0_omp = 0
815 leaf = 1
816 ierr = idll_iterator_begin( leafs_above_l0_omp_dll, idll_node )
817 DO WHILE ( associated( idll_node ) )
818 ipool_a_l0_omp( leaf ) = idll_node%ELMT
819 leaf = leaf + 1
820 idll_node => idll_node%NEXT
821 END DO
822 DO i = 1 , l_phys_l0_omp
823 IF ( dad( step( phys_l0_omp(i) ) ) .NE. 0 ) THEN
824 cp_nstk_steps( step( dad( step( phys_l0_omp(i) ) ) ) ) =
825 & cp_nstk_steps( step( dad( step( phys_l0_omp(i) ) ) ) )-1
826 IF (cp_nstk_steps(step(dad(step(phys_l0_omp(i))))) .EQ. 0)THEN
827 ipool_a_l0_omp( leaf ) = dad(step(phys_l0_omp( i )))
828 leaf = leaf + 1
829 END IF
830 END IF
831 END DO
832 leaf = leaf - 1
833 ipool_a_l0_omp(lpool_a_l0_omp) = leaf
834 ipool_a_l0_omp(lpool_a_l0_omp-1) = 0
835 ipool_a_l0_omp(lpool_a_l0_omp-2) = 0
836 IF (leaf .GT. 1) THEN
837 CALL mumps_quick_sort_ipool_po( n, step(1),
838 & ipool_a_l0_omp(1), leaf, 1, leaf )
839 ENDIF
840 300 CONTINUE
841 IF (allocated(nb_task_per_thread)) DEALLOCATE (nb_task_per_thread)
842 IF (allocated(inv_perm_l0_omp )) DEALLOCATE ( inv_perm_l0_omp )
843 IF (allocated(threads_tasks )) DEALLOCATE (threads_tasks )
844 RETURN
845 150 FORMAT(
846 & /' ** ALLOC FAILURE IN MUMPS_ANA_FINALIZE_L0_OMP FOR ',
847 & a30)
#define mumps_abort
Definition VE_Metis.h:25
recursive subroutine mumps_quick_sort_phys_l0(n, step, intlist, invperm, taille, lo, hi)
Definition ana_omp_m.F:1039
recursive subroutine mumps_quick_sort_ipool_po(n, step, intlist, taille, lo, hi)
Definition ana_omp_m.F:1003
integer function mumps_get_pool_length(max_active_nodes, keep, keep8)

◆ mumps_ana_free_l0_workspace()

subroutine mumps_ana_l0_omp::mumps_ana_free_l0_workspace
private

Definition at line 849 of file ana_omp_m.F.

850 INTEGER :: IERR
851 IF (allocated(threads_charge)) DEALLOCATE(threads_charge )
852 IF (allocated(cp_nstk_steps )) DEALLOCATE(cp_nstk_steps )
853 IF (allocated(costs_mono_thread)) DEALLOCATE(costs_mono_thread )
854 IF (allocated(costs_multi_thread)) DEALLOCATE(costs_multi_thread)
855 IF (allocated(schur_memory)) DEALLOCATE(schur_memory )
856 IF (allocated(subtree_factor_memory))
857 & DEALLOCATE(subtree_factor_memory)
858 IF (allocated(subtree_memory)) DEALLOCATE(subtree_memory )
859 ierr = idll_destroy( leafs_above_l0_omp_dll )
860 ierr = idll_destroy( l0_omp_dll )
861 RETURN

◆ mumps_ana_initialize_l0_omp()

subroutine mumps_ana_l0_omp::mumps_ana_initialize_l0_omp

Definition at line 127 of file ana_omp_m.F.

128 IMPLICIT NONE
129 INTEGER :: INODE, IFATH, IGRANDFATH, SPECIAL_ROOT,
130 & NFRONT, NPIV, LEAF, VARNUM, IERR
131 LOGICAL :: INODE_IS_A_LEAF
132 INTEGER(8) :: NFRONT8, NPIV8
133 INTEGER(8) :: SUM_CB, MAX_MEM
134 DOUBLE PRECISION :: COST_NODE
135 LOGICAL :: IN_L0INIT, SKIP_ABOVE
136 LOGICAL, EXTERNAL :: MUMPS_ROOTSSARBR, MUMPS_IN_OR_ROOT_SSARBR
137 INTEGER, EXTERNAL :: MUMPS_GET_POOL_LENGTH, MUMPS_TYPENODE
138 IF (associated(ipool_b_l0_omp)) THEN
139 WRITE(*,*) " Internal error 1 MUMPS_ANA_INITIALIZE_L0_OMP"
140 CALL mumps_abort()
141 ENDIF
142 IF (associated(ipool_a_l0_omp)) THEN
143 WRITE(*,*) " Internal error 2 MUMPS_ANA_INITIALIZE_L0_OMP"
144 CALL mumps_abort()
145 ENDIF
146 IF (associated(virt_l0_omp)) THEN
147 WRITE(*,*) " Internal error 3 MUMPS_ANA_INITIALIZE_L0_OMP"
148 CALL mumps_abort()
149 ENDIF
150 IF (associated(virt_l0_omp_mapping)) THEN
151 WRITE(*,*) " Internal error 4 MUMPS_ANA_INITIALIZE_L0_OMP"
152 CALL mumps_abort()
153 ENDIF
154 IF (associated(perm_l0_omp)) THEN
155 WRITE(*,*) " Internal error 5 MUMPS_ANA_INITIALIZE_L0_OMP"
156 CALL mumps_abort()
157 ENDIF
158 IF (associated(ptr_leafs_l0_omp)) THEN
159 WRITE(*,*) " Internal error 6 MUMPS_ANA_INITIALIZE_L0_OMP"
160 CALL mumps_abort()
161 ENDIF
162 ierr = idll_create( l0_omp_dll )
163 ierr = idll_create( leafs_above_l0_omp_dll )
164 ALLOCATE( threads_charge( nb_threads ), stat=ierr )
165 IF (ierr .GT. 0) THEN
166 info(1) = -7
167 info(2) = nb_threads
168 IF (lpok) WRITE(lp,150) 'THREADS_CHARGE'
169 GOTO 500
170 ENDIF
171 ALLOCATE( costs_mono_thread( nsteps ), stat=ierr )
172 IF(ierr.GT.0) THEN
173 info(1) = -7
174 info(2) = nsteps
175 IF (lpok) WRITE(lp, 150) ' COSTS_MONO_THREAD'
176 GOTO 500
177 ENDIF
178 ALLOCATE( costs_multi_thread( nsteps ), stat=ierr )
179 IF(ierr.GT.0) THEN
180 info(1) = -7
181 info(2) = nsteps
182 IF (lpok) WRITE(lp, 150) ' COSTS_MULTI_THREAD'
183 GOTO 500
184 ENDIF
185 ALLOCATE( schur_memory( nsteps ), stat=ierr )
186 IF(ierr.GT.0) THEN
187 info(1) = -7
188 info(2) = nsteps
189 IF (lpok) WRITE(lp, 150) ' SCHUR_MEMORY'
190 GOTO 500
191 ENDIF
192 ALLOCATE( subtree_factor_memory( nsteps ), stat=ierr )
193 IF(ierr.GT.0) THEN
194 info(1) = -7
195 info(2) = nsteps
196 IF (lpok) WRITE(lp, 150) ' SCHUR_FACTOR_MEMORY'
197 GOTO 500
198 ENDIF
199 ALLOCATE( subtree_memory( nsteps ), stat=ierr )
200 IF(ierr.GT.0) THEN
201 info(1) = -7
202 info(2) = nsteps
203 IF (lpok) WRITE(lp, 150) ' SUBTREE_MEMORY'
204 GOTO 500
205 ENDIF
206 ALLOCATE( cp_nstk_steps( nsteps ), stat=ierr )
207 IF(ierr.GT.0) THEN
208 info(1) = -7
209 info(2) = nsteps
210 IF (lpok) WRITE(lp, 150) ' CP_NSTK_STEPS'
211 GOTO 500
212 ENDIF
213 lpool_b_l0_omp=mumps_get_pool_length(na(1),keep(1),keep8(1))
214 ALLOCATE( ipool_b_l0_omp( lpool_b_l0_omp) , stat=ierr )
215 IF(ierr.GT.0) THEN
216 info(1) = -7
217 info(2) = nsteps
218 IF (lpok) WRITE(lp, 150) ' id%IPOOL_B_L0_OMP'
219 GOTO 500
220 ENDIF
221 costs_mono_thread = 0.0d0
222 costs_multi_thread = 0.0d0
223 cost_under = 0.0d0
224 cost_above = 0.0d0
225 cost_total_best = huge(cost_total_best)
226 schur_memory = 0_8
227 subtree_factor_memory = 0_8
228 subtree_memory = 0_8
229 factor_size_under_l0 = 0_8
230 cp_nstk_steps(:) = nstk_steps(:)
231 IF (keep(403).NE.0) THEN
232 CALL read_bench( arith, keep(50) )
233 ENDIF
234 CALL mumps_init_pool_dist(n, leaf,
235 & myid_nodes,
236 & keep(199), na(1), lna,
237 & keep(1), keep8(1), step(1),
238 & procnode_steps(1),
239 & ipool_b_l0_omp(1), lpool_b_l0_omp)
240 leaf = leaf - 1
241 nbleaf_myid = leaf
242 IF (nbleaf_myid .EQ. 0) THEN
243 RETURN
244 ENDIF
245 90 CONTINUE
246 inode = ipool_b_l0_omp( leaf )
247 leaf = leaf - 1
248 inode_is_a_leaf=.true.
249 95 CONTINUE
250 nfront = nd( step( inode ) )
251 nfront8= int(nfront,8)
252 npiv = 0
253 varnum = inode
254 DO WHILE (varnum .GT. 0 )
255 npiv = npiv + 1
256 varnum = fils( varnum )
257 END DO
258 npiv8=int(npiv,8)
259 varnum = - varnum
260 IF (keep(50) .EQ. 0) THEN
261 schur_memory( step( inode ) ) =
262 & (nfront8 - npiv8)*(nfront8 - npiv8)
263 IF (keep(251) .EQ. 0) THEN
264 subtree_factor_memory( step( inode ) ) = nfront8 * nfront8
265 & - schur_memory( step( inode ) )
266 ELSE
267 subtree_factor_memory( step( inode ) ) = 0_8
268 END IF
269 ELSE
270 schur_memory( step( inode ) ) =
271 & (nfront8 - npiv8)*(nfront8 + 1_8 - npiv8)/2_8
272 IF (keep(251) .EQ. 0) THEN
273 subtree_factor_memory( step( inode ) ) = nfront8 * npiv8
274 ELSE
275 subtree_factor_memory( step( inode ) ) = 0_8
276 END IF
277 END IF
278 sum_cb = 0_8
279 max_mem = 0_8
280 IF (keep(403) .EQ. 0) THEN
281 CALL mumps_get_flops_cost ( nfront, npiv, npiv,
282 & sym, 1, cost_node )
283 costs_mono_thread( step( inode ) ) = cost_node
284 ELSE
285 CALL cost_bench (npiv, nfront-npiv, 1, keep(50), cost_node)
286 costs_mono_thread( step( inode ) ) = cost_node
287 CALL cost_bench (npiv,nfront-npiv,nb_threads,keep(50),cost_node)
288 costs_multi_thread( step( inode ) ) = cost_node
289 END IF
290 DO WHILE (varnum .GT. 0 )
291 costs_mono_thread( step( inode ) ) =
292 & costs_mono_thread( step( inode ) )
293 & +
294 & costs_mono_thread( step( varnum ) )
295 max_mem = max(max_mem,
296 & subtree_memory( step( varnum ) ) + sum_cb )
297 sum_cb = sum_cb + schur_memory( step( varnum ) ) +
298 & subtree_factor_memory( step( varnum ) )
299 subtree_factor_memory( step( inode ) ) =
300 & subtree_factor_memory( step( inode ) )
301 & + subtree_factor_memory( step( varnum ) )
302 varnum = frere( step( varnum ) )
303 END DO
304 subtree_memory( step( inode ) ) =
305 & max( max_mem, nfront8*nfront8 + sum_cb )
306 ifath = dad( step( inode ) )
307 IF (ifath .NE. 0) THEN
308 igrandfath = dad( step( ifath ) )
309 ELSE
310 igrandfath = 0
311 ENDIF
312 special_root = max(keep(38), keep(20))
313 skip_above = .false.
314 in_l0init = .false.
315 IF ( inode .EQ. special_root ) THEN
316 in_l0init = .false.
317 IF (inode_is_a_leaf) THEN
318 skip_above = .true.
319 GOTO 80
320 ELSE
321 WRITE(*,*) " Internal error 1 in MUMPS_ANA_INITIALIZE_L0_OMP",
322 & inode, special_root
323 CALL mumps_abort()
324 ENDIF
325 ENDIF
326 IF ( ifath .NE. 0 .AND. ifath .EQ. keep(38) ) THEN
327 in_l0init = .false.
328 IF (inode_is_a_leaf) THEN
329 skip_above = .true.
330 GOTO 80
331 ELSE
332 WRITE(*,*) " Internal error 2 in MUMPS_ANA_INITIALIZE_L0_OMP",
333 & inode, ifath, keep(38)
334 CALL mumps_abort()
335 ENDIF
336 ENDIF
337 IF ( slavef_during_mapping > 1 ) THEN
339 & procnode_steps( step( inode ) ), keep(199) )
340 & .OR. .NOT. mumps_in_or_root_ssarbr(
341 & procnode_steps( step( inode ) ), keep(199) )
342 &) THEN
343 in_l0init = .false.
344 IF (inode_is_a_leaf) THEN
345 skip_above = .true.
346 GOTO 80
347 ELSE
348 WRITE(*,*)
349 & " Internal error 3 in MUMPS_ANA_INITIALIZE_L0_OMP",
350 & inode
351 CALL mumps_abort()
352 ENDIF
353 ENDIF
354 ENDIF
355 IF (ifath.NE.0) THEN
356 IF ( mumps_typenode(step(ifath),keep(199)).EQ.2) THEN
357 in_l0init = .false.
358 IF (inode_is_a_leaf) THEN
359 skip_above = .true.
360 GOTO 80
361 ELSE
362 WRITE(*,*)
363 & " Internal error 5 in MUMPS_ANA_INITIALIZE_L0_OMP",
364 & inode, ifath
365 CALL mumps_abort()
366 ENDIF
367 ENDIF
368 ENDIF
369 IF ( mumps_typenode(step(inode),keep(199)).EQ.2) THEN
370 in_l0init = .false.
371 IF (inode_is_a_leaf) THEN
372 skip_above = .true.
373 GOTO 80
374 ELSE
375 WRITE(*,*)
376 & " Internal error 6 in MUMPS_ANA_INITIALIZE_L0_OMP",
377 & inode
378 CALL mumps_abort()
379 ENDIF
380 ENDIF
381 IF ( ifath .EQ. 0 ) THEN
382 in_l0init = .true.
383 GOTO 80
384 ELSE
385 IF ( ifath .EQ. keep(20) ) THEN
386 in_l0init = .true.
387 GOTO 80
388 ENDIF
389 IF ( igrandfath .EQ. keep(38) .AND. keep(38) .NE. 0 ) THEN
390 in_l0init = .true.
391 GOTO 80
392 ENDIF
393 IF ( slavef_during_mapping > 1 ) THEN
395 & procnode_steps( step( ifath ) ), keep(199) )) THEN
396 in_l0init = .true.
397 GOTO 80
398 ENDIF
399 ENDIF
400 ENDIF
401 80 CONTINUE
402 IF (.NOT. skip_above) THEN
403 IF (keep(50).EQ.0) THEN
404 factor_size_under_l0 = factor_size_under_l0 +
405 & npiv8 * ( nfront8 + nfront8 - npiv8 )
406 ELSE
407 factor_size_under_l0 = factor_size_under_l0 +
408 & nfront8 * npiv8
409 ENDIF
410 ENDIF
411 IF ( in_l0init ) THEN
412 CALL l0_insert_node ( l0_omp_dll, inode )
413 ELSE IF ( skip_above ) THEN
414 ierr = idll_push_back( leafs_above_l0_omp_dll, inode )
415 IF ( .NOT. inode_is_a_leaf ) THEN
416 WRITE(*,*)
417 & " Internal error 7 in MUMPS_ANA_INITIALIZE_L0_OMP",
418 & inode
419 CALL mumps_abort()
420 ENDIF
421 ipool_b_l0_omp(leaf+1) = -inode
422 ELSE
423 cp_nstk_steps( step( ifath ) ) =
424 & cp_nstk_steps( step( ifath ) ) - 1
425 IF ( cp_nstk_steps( step( ifath ) ) .EQ. 0 ) THEN
426 inode = ifath
427 inode_is_a_leaf = .false.
428 GOTO 95
429 ENDIF
430 END IF
431 IF ( leaf .GT. 0 ) THEN
432 GOTO 90
433 END IF
434 500 CONTINUE
435 RETURN
436 150 FORMAT(
437 & /' ** ALLOC FAILURE IN MUMPS_ANA_INITIALIZE_L0_OMP FOR ',
438 & a30)
subroutine cost_bench(npiv, nschur, nb_core, sym, cost)
Definition ana_omp_m.F:897
subroutine read_bench(arith, k50)
Definition ana_omp_m.F:864
logical function mumps_in_or_root_ssarbr(procinfo_inode, k199)
subroutine mumps_init_pool_dist(n, leaf, myid_nodes, k199, na, lna, keep, keep8, step, procnode_steps, ipool, lpool)
logical function mumps_rootssarbr(procinfo_inode, k199)
integer function mumps_typenode(procinfo_inode, k199)

◆ mumps_ana_omp_return()

subroutine mumps_ana_omp_return

Definition at line 1078 of file ana_omp_m.F.

1079#if defined(BLR_MT)
1080#if ! defined(_OPENMP)
1081 compilation failure: -dblr_mt requires compilation with openmp
1082 please modify makefile.inc and do 'make clean; make'
1083#endif
1084#endif
1085 RETURN

◆ mumps_quick_sort_ipool_po()

recursive subroutine mumps_quick_sort_ipool_po ( integer n,
integer, dimension( n ) step,
integer, dimension( taille ) intlist,
integer taille,
integer lo,
integer hi )

Definition at line 1001 of file ana_omp_m.F.

1003 IMPLICIT NONE
1004 INTEGER N, TAILLE
1005 INTEGER STEP( N )
1006 INTEGER INTLIST( TAILLE )
1007 INTEGER LO, HI
1008 INTEGER I,J
1009 INTEGER ISWAP, PIVOT
1010 i = lo
1011 j = hi
1012 pivot = step(intlist((i+j)/2))
1013 10 IF (step(intlist(i)) > pivot) THEN
1014 i=i+1
1015 GOTO 10
1016 ENDIF
1017 20 IF (step(intlist(j)) < pivot) THEN
1018 j=j-1
1019 GOTO 20
1020 ENDIF
1021 IF (i < j) THEN
1022 iswap = intlist(i)
1023 intlist(i) = intlist(j)
1024 intlist(j)=iswap
1025 ENDIF
1026 IF ( i <= j) THEN
1027 i = i+1
1028 j = j-1
1029 ENDIF
1030 IF ( i <= j ) GOTO 10
1031 IF ( lo < j ) CALL mumps_quick_sort_ipool_po(n, step,
1032 & intlist, taille, lo, j)
1033 IF ( i < hi ) CALL mumps_quick_sort_ipool_po(n, step,
1034 & intlist, taille, i, hi)
1035 RETURN

◆ mumps_quick_sort_phys_l0()

recursive subroutine mumps_quick_sort_phys_l0 ( integer n,
integer, dimension( n ) step,
integer, dimension( taille ) intlist,
integer, dimension( taille ) invperm,
integer taille,
integer lo,
integer hi )

Definition at line 1037 of file ana_omp_m.F.

1039 IMPLICIT NONE
1040 INTEGER N, TAILLE
1041 INTEGER STEP( N )
1042 INTEGER INTLIST( TAILLE )
1043 INTEGER INVPERM( TAILLE )
1044 INTEGER LO, HI
1045 INTEGER I,J
1046 INTEGER ISWAP, PIVOT
1047 INTEGER dswap
1048 i = lo
1049 j = hi
1050 pivot = step(intlist((i+j)/2))
1051 10 IF (step(intlist(i)) < pivot) THEN
1052 i=i+1
1053 GOTO 10
1054 ENDIF
1055 20 IF (step(intlist(j)) > pivot) THEN
1056 j=j-1
1057 GOTO 20
1058 ENDIF
1059 IF (i < j) THEN
1060 iswap = intlist(i)
1061 intlist(i) = intlist(j)
1062 intlist(j)=iswap
1063 dswap = invperm(i)
1064 invperm(i) = invperm(j)
1065 invperm(j) = dswap
1066 ENDIF
1067 IF ( i <= j) THEN
1068 i = i+1
1069 j = j-1
1070 ENDIF
1071 IF ( i <= j ) GOTO 10
1072 IF ( lo < j ) CALL mumps_quick_sort_phys_l0(n, step,
1073 & intlist, invperm, taille, lo, j)
1074 IF ( i < hi ) CALL mumps_quick_sort_phys_l0(n, step,
1075 & intlist, invperm, taille, i, hi)
1076 RETURN
subroutine dswap(n, dx, incx, dy, incy)
DSWAP
Definition dswap.f:82

◆ read_bench()

subroutine mumps_ana_l0_omp::read_bench ( character(1), intent(in) arith,
integer, intent(in) k50 )
private

Definition at line 863 of file ana_omp_m.F.

864 IMPLICIT NONE
865 INTEGER, INTENT(in) :: K50
866 CHARACTER(1), INTENT(in) :: ARITH
867 INTEGER NLINES, INDEX_NPIV, INDEX_NSCHUR, NB_CORE
868 INTEGER V, S, OLD_V, OLD_S, I
869 parameter(nlines=2812)
870 DOUBLE PRECISION :: AUX
871 CHARACTER(1) :: K50_STR
872 index_npiv = 0
873 index_nschur = 0
874 old_v = -1
875 old_s = -1
876 WRITE(k50_str,'(I1)') k50
877 OPEN(1,file=arith//'benchmark_sym_'//k50_str//'.csv')
878 DO i=1,nlines
879 READ(1,*) v, s, nb_core, aux
880 IF (v .NE. old_v) THEN
881 index_npiv = index_npiv + 1
882 old_v = v
883 END IF
884 IF (s .GT. old_s) THEN
885 index_nschur = index_nschur + 1
886 old_s = s
887 ELSEIF (s .LT. old_s) THEN
888 index_nschur = 1
889 old_s = s
890 END IF
891 bench(index_npiv, index_nschur, nb_core) = aux
892 END DO
893 CLOSE(1)
894 RETURN