1608 IMPLICIT NONE
1609 include 'mpif.h'
1610 include 'mumps_tags.h'
1611 INTEGER :: IERR_MPI, MASTER
1612 parameter( master = 0 )
1613 INTEGER :: STATUS(MPI_STATUS_SIZE)
1614 INTEGER, INTENT(IN) :: MYID, COMM, NPROCS
1615 TYPE(LMATRIX_T) :: LUMAT
1616 INTEGER, INTENT(IN) :: N, NSTEPS, LNA, SYM,
1617 & HALO_DEPTH, SEP_SIZE, GROUP_SIZE
1618 INTEGER, INTENT(IN) :: SIZEMAPCOL
1619 INTEGER, INTENT(IN) :: MAPCOL(SIZEMAPCOL)
1620 INTEGER, INTENT(INOUT) :: IFLAG, IERROR
1621 INTEGER, INTENT(INOUT) :: K38, K20, K264, K265
1622 INTEGER, INTENT(IN) :: K482, K10, MAXFRONT, K60, K54
1623 INTEGER, INTENT(IN) :: LP
1624 LOGICAL, INTENT(IN) :: LPOK
1625 INTEGER, INTENT(OUT) :: K142
1626 INTEGER, INTENT(IN) :: ICNTL(60)
1627 INTEGER, POINTER :: FILS(:), FRERE_STEPS(:), STEP(:),
1628 & NA(:), DAD_STEPS(:), LRGROUPS(:)
1629 INTEGER, INTENT(IN) :: SIZEOFBLOCKS(N)
1630 INTEGER, INTENT(IN) :: K472, K469
1631 INTEGER :: K482_LOC, K469_LOC, K38ou20, K142_GLOB
1632 INTEGER :: I, F, PV, NV, NVEXPANDED, NODE
1633 REAL :: COMPRESS_RATIO
1634 LOGICAL :: PVSCHANGED
1635 INTEGER :: PVSCHANGED_INT, PVSCHANGED_INT_GLOB, IPROC
1636 INTEGER :: NBGROUPS, NBGROUPS_local
1637 INTEGER, ALLOCATABLE, DIMENSION (:) :: PVS, WORK
1638 INTEGER :: NBGROUPS_sent
1639 INTEGER :: NBNODES_LOC, SIZE_SENT, ISHIFT,
1640 & MSGSOU, ILOOP
1641 INTEGER, ALLOCATABLE, DIMENSION (:) :: TRACE, WORKH,
1642 & GEN2HALO
1643 INTEGER, ALLOCATABLE, DIMENSION (:) :: TRACE_PRV, WORKH_PRV,
1644 & GEN2HALO_PRV
1645 INTEGER :: STEP_SCALAPACK_ROOT
1646 INTEGER :: GROUP_SIZE2, IERR, OMP_NUM
1647 INTEGER :: IERR_PRIV
1648 LOGICAL :: MAPCOL_PROVIDED
1649#if defined(ptscotch) || defined(scotch)
1650 INTEGER :: VSCOTCH
1651 LOGICAL :: SCOTCH_IS_THREAD_SAFE
1652 INTEGER :: PTHREAD_NUMBER, NOMP
1653#endif
1654 mapcol_provided = (mapcol(1).GE.0)
1655 k38ou20=
max(k38,k20)
1656 IF (k38ou20.GT.0) THEN
1657 step_scalapack_root = step(k38ou20)
1658 ELSE
1659 step_scalapack_root = 0
1660 ENDIF
1661 IF (mapcol_provided) THEN
1662 CALL mpi_bcast( fils(1), n, mpi_integer,
1663 & master, comm, ierr )
1664 ENDIF
1665 IF((k482.LE.0) .OR. (k482.GT.3)) THEN
1666#if defined(parmetis) || defined(metis) || defined(parmetis3) || defined(metis4)
1667 k482_loc = 1
1668#elif defined(ptscotch) || defined(scotch)
1669 k482_loc = 2
1670#else
1671 k482_loc = 3
1672#endif
1673 ELSE IF (k482.EQ.1) THEN
1674#if !defined(parmetis) && !defined(metis) && !defined(parmetis3) && !defined(metis4)
1675#if defined(ptscotch) || defined(scotch)
1676 k482_loc = 2
1677#else
1678 k482_loc = 3
1679#endif
1680#else
1681 k482_loc = 1
1682#endif
1683 ELSE IF (k482.EQ.2) THEN
1684#if !defined(ptscotch) && !defined(scotch)
1685#if defined(parmetis) || defined(metis) || defined(parmetis3) || defined(metis4)
1686 k482_loc = 1
1687#else
1688 k482_loc = 3
1689#endif
1690#else
1691 k482_loc = 2
1692#endif
1693 ELSE IF (k482.EQ.3) THEN
1694 k482_loc = 3
1695 END IF
1696 k469_loc = k469
1697#if defined(ptscotch) || defined(scotch)
1698 scotch_is_thread_safe = .false.
1699 IF (k482_loc.EQ.2) THEN
1700 CALL mumps_scotch_version (vscotch)
1701 IF (vscotch.GE.7) scotch_is_thread_safe=.true.
1702 ENDIF
1703 IF (k482_loc.EQ.2.AND.(.NOT.scotch_is_thread_safe) ) THEN
1704 k469_loc = 1
1705 ENDIF
1706#endif
1707 nbgroups = 0
1708 k142 = 0
1709 ALLOCATE( pvs(nsteps), stat=ierr)
1710 IF (ierr.GT.0) THEN
1711 iflag = -7
1712 ierror = nsteps
1713 IF (lpok) WRITE(lp,*) " Error allocate integer array of ",
1714 * "size: ", ierror
1715 GOTO 491
1716 ENDIF
1717 lrgroups = -1
1718 IF (k469_loc.NE.2) THEN
1719 ALLOCATE(trace(n), workh(n), gen2halo(n),
1720 & stat=ierr)
1721 IF (ierr.GT.0) THEN
1722 IF (lpok) WRITE(lp,*) " Error allocate integer array of ",
1723 * "size: ", 3*n
1724 iflag = -7
1725 ierror = 3*n
1726 GOTO 491
1727 ENDIF
1728 ENDIF
1729491 CONTINUE
1731 & comm, myid )
1732 IF (iflag.LT.0) GOTO 501
1733#if defined(ptscotch) || defined(scotch)
1734 nomp=0
1735 IF (k482_loc.EQ.2) THEN
1736
1737 IF (nomp .GT. 0) THEN
1738 CALL mumps_scotch_get_pthread_number (pthread_number)
1739 nomp =1
1740 CALL mumps_scotch_set_pthread_number (nomp)
1741 ENDIF
1742 ENDIF
1743#endif
1744 k142 = 0
1745 pvschanged = .false.
1746 omp_num = 1
1747
1748 omp_num =
min(omp_num,8)
1749
1750
1751
1752
1753
1754
1755 ALLOCATE(work(2*maxfront+1), stat=ierr_priv)
1756 IF (ierr_priv.GT.0) THEN
1757 IF (lpok) WRITE(lp,*) " Error allocate integer array of ",
1758 * "size: ", 2*maxfront+1
1759
1760 iflag = -7
1761
1762
1763 ierror = 2*maxfront+1
1764
1765 ENDIF
1766 IF (ierr_priv .EQ. 0 .AND. k469_loc.EQ.2) THEN
1767 ALLOCATE(trace_prv(n), workh_prv(n), gen2halo_prv(n),
1768 & stat=ierr_priv)
1769 IF (ierr_priv.GT.0) THEN
1770 IF (lpok) WRITE(lp,*) " Error allocate integer array of ",
1771 * "size: ", 3*n
1772
1773 iflag = -7
1774
1775
1776 ierror = 3*n
1777
1778 ENDIF
1779 ENDIF
1780
1781 IF (iflag .LT. 0 ) THEN
1782 GOTO 498
1783 ENDIF
1784 IF (k469_loc.EQ.2) THEN
1785 trace_prv = 0
1786 ELSE
1787
1788 trace = 0
1789
1790 ENDIF
1791
1792 DO i = 1,n
1793 IF (step(i).GT.0) pvs(step(i)) = i
1794 END DO
1795
1796
1797 DO node=nsteps,1,-1
1798 IF (iflag.LT.0) cycle
1799 IF (mapcol_provided) THEN
1800 iproc = mapcol(node)
1801 IF (iproc.NE.myid) THEN
1802 pvs(node) = -999
1803 cycle
1804 ENDIF
1805 ENDIF
1806 pv = pvs(node)
1807 nv = 0
1808 nvexpanded = 0
1809 f = pv
1810 DO WHILE(f .GT. 0)
1811 nv = nv+1
1812 nvexpanded = nvexpanded+sizeofblocks(f)
1813 work(nv) = f
1814 f = fils(f)
1815 END DO
1816 compress_ratio = real(nvexpanded)/real(nv)
1817 CALL compute_blr_vcs(k472, group_size2, group_size, nvexpanded)
1818 IF (nvexpanded .GE. group_size2) THEN
1819 IF ( (k482_loc.EQ.3)
1820 & .OR.
1821 & ( (k60.NE.0).AND.(work(1).EQ.k38ou20) )
1822 & )
1823 & THEN
1824 group_size2 =
max(int(real(group_size2)/compress_ratio), 1)
1825
1826 DO i=1,nv
1827 lrgroups(work(i))=nbgroups+1+(i-1)/group_size2
1828 END DO
1829 nbgroups = nbgroups + (nv-1)/group_size2 + 1
1830
1831 ELSE
1832 IF (k469_loc .EQ. 2) THEN
1833 CALL sep_grouping_ab(nv, nvexpanded, work(1), n,
1834 & lrgroups, nbgroups, lumat, sizeofblocks,
1835 & group_size, halo_depth, trace_prv, workh_prv,
1836 & node, gen2halo_prv, k482_loc, k472, k469_loc,
1837 & sep_size, k142, k10, lp, lpok, iflag, ierror)
1838 ELSE
1839 CALL sep_grouping_ab(nv, nvexpanded, work(1), n,
1840 & lrgroups, nbgroups, lumat, sizeofblocks,
1841 & group_size, halo_depth, trace, workh,
1842 & node, gen2halo, k482_loc, k472, k469_loc,
1843 & sep_size, k142, k10, lp, lpok, iflag, ierror)
1844 ENDIF
1845 IF (iflag.LT.0) cycle
1846 pvs(node) = work(1)
1847
1848 pvschanged = .true.
1849
1850 step(work(1)) = abs(step(work(1)))
1851 IF (step(work(1)).EQ.step_scalapack_root) THEN
1852 IF (k38.GT.0) THEN
1853 k38 = work(1)
1854 ELSE
1855 k20 = work(1)
1856 ENDIF
1857 ENDIF
1858 DO i=1, nv-1
1859 step(work(i+1)) = -step(work(1))
1860 IF (fils(work(i)).LE.0) THEN
1861 fils(work(nv)) = fils(work(i))
1862 ENDIF
1863 fils(work(i)) = work(i+1)
1864 ENDDO
1865 ENDIF
1866 ELSE
1867
1868 nbgroups = nbgroups + 1
1869 nbgroups_local = nbgroups
1870
1871 IF (nvexpanded .GE. sep_size) THEN
1872 DO i = 1, nv
1873 lrgroups( work(i) ) = nbgroups_local
1874 ENDDO
1875 ELSE
1876 DO i = 1, nv
1877 lrgroups( work(i) ) = -nbgroups_local
1878 ENDDO
1879 ENDIF
1880 ENDIF
1881 ENDDO
1882
1883 498 CONTINUE
1884
1886 & comm, myid )
1887
1888
1889 IF (iflag.LT.0) GOTO 500
1890 IF (k469_loc.EQ.2) THEN
1891 IF (allocated(trace_prv)) DEALLOCATE(trace_prv)
1892 IF (allocated(workh_prv)) DEALLOCATE(workh_prv)
1893 IF (allocated(gen2halo_prv)) DEALLOCATE(gen2halo_prv)
1894 ENDIF
1895
1896 IF (k469_loc.NE.2) THEN
1897 IF (allocated(workh)) DEALLOCATE(workh)
1898 IF (allocated(trace)) DEALLOCATE(trace)
1899 IF (allocated(gen2halo)) DEALLOCATE(gen2halo)
1900 ENDIF
1901
1902 IF (.NOT.mapcol_provided) THEN
1903
1904 IF (pvschanged) THEN
1905 pvschanged_int_glob = 1
1906 ELSE
1907 pvschanged_int_glob = 0
1908 ENDIF
1909
1910 ELSE
1911
1912 IF (pvschanged) THEN
1913 pvschanged_int = 1
1914 ELSE
1915 pvschanged_int = 0
1916 ENDIF
1918 & mpi_integer,
1919 & mpi_max, comm, ierr_mpi )
1920 pvschanged_int_glob = 1
1921 IF (pvschanged_int_glob.NE.0) THEN
1922 IF (nprocs.GT.1) THEN
1923 ALLOCATE(workh(2*n+3*nsteps+1), stat=ierr_priv)
1924 IF (ierr_priv.GT.0) THEN
1925 IF (lpok) WRITE(lp,*)
1926 & " Error allocate integer array of ",
1927 & "size: ", 2*maxfront+1
1928 iflag = -7
1929 ierror = 2*n+3*nsteps+1
1930 ENDIF
1932 & comm, myid )
1933 IF (iflag.LT.0) GOTO 499
1934 IF (myid.EQ.master) THEN
1935 iproc = 0
1936 DO WHILE (iproc.NE.nprocs-1)
1937 iproc = iproc + 1
1938 CALL mpi_recv( nbnodes_loc, 1, mpi_integer,
1939 & mpi_any_source,
1940 & grouping, comm, status, ierr )
1941 msgsou = status( mpi_source )
1942 IF (nbnodes_loc.EQ.0) THEN
1943 cycle
1944 ENDIF
1945 CALL mpi_recv( nbgroups_sent, 1, mpi_integer,
1946 & msgsou, grouping, comm, status, ierr )
1947 CALL mpi_recv( size_sent, 1, mpi_integer,
1948 & msgsou, grouping, comm, status, ierr )
1949 CALL mpi_recv( workh, size_sent, mpi_integer,
1950 & msgsou, grouping, comm, status, ierr )
1951 ishift = 0
1952 DO iloop=1, nbnodes_loc
1953 ishift = ishift+1
1954 node = workh(ishift)
1955 ishift = ishift+1
1956 nv = workh(ishift)
1957 pvs(node) = workh(ishift+1)
1958 step(workh(ishift+1)) = node
1959 IF (step(workh(ishift+1)).EQ.step_scalapack_root) THEN
1960 IF (k38.GT.0) THEN
1961 k38 = workh(ishift+1)
1962 ELSE
1963 k20 = workh(ishift+1)
1964 END IF
1965 END IF
1966 DO i=2, nv
1967 step(workh(i+ishift)) = -node
1968 END DO
1969 DO i=1, nv
1970 fils(workh(i+ishift)) = workh(i+1+ishift)
1971 IF (workh(nv+1+i+ishift).LT.0) THEN
1972 lrgroups(workh(i+ishift)) =
1973 & - nbgroups + workh(nv+1+i+ishift)
1974 ELSE
1975 lrgroups(workh(i+ishift)) =
1976 & nbgroups + workh(nv+1+i+ishift)
1977 END IF
1978 END DO
1979 ishift = ishift + 2*nv +1
1980 END DO
1981 nbgroups = nbgroups + nbgroups_sent
1982 ENDDO
1983 ELSE
1984 nbnodes_loc = 0
1985 size_sent = 0
1986 ishift = 0
1987 DO node = 1,nsteps
1988 iproc = mapcol(node)
1989 IF (iproc.EQ.myid) THEN
1990 nbnodes_loc = nbnodes_loc + 1
1991 ishift = ishift +1
1992 workh(ishift) = node
1993 ishift = ishift +1
1994 nv = 0
1995 f = pvs(node)
1996 DO WHILE (f.GT.0)
1997 nv = nv + 1
1998 workh(nv+ishift) = f
1999 f = fils(f)
2000 ENDDO
2001 workh(ishift) = nv
2002 workh(nv+1+ishift) = f
2003 DO i=1, nv
2004 workh(nv+1+i+ishift) = lrgroups(workh(i+ishift))
2005 ENDDO
2006 ishift = ishift + 2*nv+1
2007 ENDIF
2008 ENDDO
2009 size_sent = ishift
2010 CALL mpi_send( nbnodes_loc, 1, mpi_integer, master,
2011 & grouping, comm, ierr )
2012 IF (nbnodes_loc.GT.0) THEN
2013 CALL mpi_send( nbgroups, 1, mpi_integer, master,
2014 & grouping, comm, ierr )
2015 CALL mpi_send( size_sent, 1, mpi_integer, master,
2016 & grouping, comm, ierr )
2017 CALL mpi_send( workh, size_sent, mpi_integer, master,
2018 & grouping, comm, ierr )
2019 ENDIF
2020 ENDIF
2021 ENDIF
2022 ENDIF
2023 499 CONTINUE
2024
2025 ENDIF
2026
2027 IF (iflag.LT.0) GOTO 500
2028 IF (myid.EQ.master) THEN
2029 IF (pvschanged_int_glob.EQ.0) GOTO 500
2030
2031 DO node = 1,nsteps
2032 IF(frere_steps(node) .GT. 0) THEN
2033 frere_steps(node) = pvs(abs(step(frere_steps(node))))
2034 ELSE IF(frere_steps(node) .LT. 0) THEN
2035 frere_steps(node) = -pvs(abs(step(dad_steps(node))))
2036 ENDIF
2037 IF(dad_steps(node) .NE. 0) THEN
2038 dad_steps(node) = pvs(abs(step(dad_steps(node))))
2039 END IF
2040 ENDDO
2041
2042
2043 DO i=3,lna
2044 na(i) = pvs(abs(step(na(i))))
2045 ENDDO
2046
2047
2048 DO i=1,n
2049 IF (fils(i).LT.0) THEN
2050 fils(i) = -pvs(abs(step(-fils(i))))
2051 ENDIF
2052 ENDDO
2053
2054 ENDIF
2055 500 CONTINUE
2056 IF (allocated(work)) DEALLOCATE(work)
2057 IF (k469_loc.EQ.2) THEN
2058 IF (allocated(trace_prv)) DEALLOCATE(trace_prv)
2059 IF (allocated(workh_prv)) DEALLOCATE(workh_prv)
2060 IF (allocated(gen2halo_prv)) DEALLOCATE(gen2halo_prv)
2061 ENDIF
2062
2063 k142_glob = 0
2065 & mpi_integer,
2066 & mpi_max, master, comm, ierr_mpi )
2067 k142 = k142_glob
2068#if defined(ptscotch) || defined(scotch)
2069 IF (k482_loc.EQ.2.AND.nomp .GT. 0) THEN
2070 CALL mumps_scotch_set_pthread_number (pthread_number)
2071 ENDIF
2072#endif
2073 501 CONTINUE
2074 IF (k469_loc.NE.2) THEN
2075 IF (allocated(trace)) DEALLOCATE(trace)
2076 IF (allocated(workh)) DEALLOCATE(workh)
2077 IF (allocated(gen2halo)) DEALLOCATE(gen2halo)
2078 ENDIF
2079 IF (allocated(pvs)) DEALLOCATE(pvs)
2080 RETURN
subroutine mumps_propinfo(icntl, info, comm, id)
subroutine mpi_recv(buf, cnt, datatype, source, tag, comm, status, ierr)
subroutine mpi_reduce(sendbuf, recvbuf, cnt, datatype, op, root, comm, ierr)
subroutine mpi_send(buf, cnt, datatype, dest, tag, comm, ierr)
subroutine mpi_allreduce(sendbuf, recvbuf, cnt, datatype, operation, comm, ierr)
subroutine mpi_bcast(buffer, cnt, datatype, root, comm, ierr)