OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
clr_core.F File Reference

Go to the source code of this file.

Modules

module  cmumps_lr_core

Functions/Subroutines

subroutine cmumps_lr_core::init_lrb (lrb_out, k, m, n, islr)
subroutine cmumps_lr_core::is_front_blr_candidate (inode, niv, nfront, nass, blron, k489, k490, k491, k492, k20, k60, idad, k38, lrstatus, n, lrgroups)
subroutine cmumps_lr_core::alloc_lrb (lrb_out, k, m, n, islr, iflag, ierror, keep8)
subroutine cmumps_lr_core::alloc_lrb_from_acc (acc_lrb, lrb_out, k, m, n, loru, iflag, ierror, keep8)
subroutine cmumps_lr_core::regrouping2 (cut, npartsass, nass, npartscb, ncb, ibcksz, onlycb, k472)
subroutine cmumps_lr_core::cmumps_lrtrsm (a, la, poselt_local, nfront, lda, lrb, niv, sym, loru, iw, offset_iw)
subroutine cmumps_lr_core::cmumps_lrgemm_scaling (lrb, scaled, a, la, diag, ld_diag, iw2, poseltt, nfront, block, maxi_cluster)
subroutine cmumps_lr_core::cmumps_lrgemm4 (alpha, lrb1, lrb2, beta, a, la, poseltt, nfront, sym, iflag, ierror, midblk_compress, toleps, tol_opt, kpercent, rank, buildq, lua_activated, loru, lrb3, maxi_rank, maxi_cluster, diag, ld_diag, iw2, block)
subroutine cmumps_lr_core::cmumps_decompress_acc (acc_lrb, maxi_cluster, maxi_rank, a, la, poseltt, nfront, niv, loru, count_flops)
subroutine cmumps_lr_core::cmumps_compress_fr_updates (acc_lrb, maxi_cluster, maxi_rank, a, la, poseltt, nfront, niv, toleps, tol_opt, kpercent, buildq, loru, cb_compress)
subroutine cmumps_lr_core::cmumps_recompress_acc (acc_lrb, maxi_cluster, maxi_rank, a, la, poseltt, nfront, niv, midblk_compress, toleps, tol_opt, kpercent_rmb, kpercent_lua, new_acc_rank)
recursive subroutine cmumps_lr_core::cmumps_recompress_acc_narytree (acc_lrb, maxi_cluster, maxi_rank, a, la, poseltt, keep8, nfront, niv, midblk_compress, toleps, tol_opt, kpercent_rmb, kpercent_lua, k478, rank_list, pos_list, nb_nodes, level, acc_tmp)
subroutine cmumps_lr_core::cmumps_recompress_acc_v2 (acc_lrb, maxi_cluster, maxi_rank, a, la, poseltt, nfront, niv, midblk_compress, toleps, tol_opt, kpercent_rmb, kpercent_lua, new_acc_rank)
subroutine cmumps_lr_core::max_cluster (cut, cut_size, maxi_cluster)
subroutine cmumps_lr_core::cmumps_get_lua_order (nb_blocks, order, rank, iwhandler, sym, fs_or_cb, i, j, frfr_updates, lbandslave_in, k474, blr_u_col)
subroutine cmumps_lr_core::cmumps_blr_asm_niv1 (a, la, posel1, nfront, nass1, iwhandler, son_iw, liw, lstk, nelim, k1, k2, sym, keep, keep8, opassw)
subroutine cmumps_truncated_rrqr (m, n, a, lda, jpvt, tau, work, ldw, rwork, toleps, tol_opt, rank, maxrank, info, islr)

Function/Subroutine Documentation

◆ cmumps_truncated_rrqr()

subroutine cmumps_truncated_rrqr ( integer m,
integer n,
complex, dimension(lda,*) a,
integer lda,
integer, dimension(*) jpvt,
complex, dimension(*) tau,
complex, dimension(ldw,*) work,
integer ldw,
real, dimension(*) rwork,
real toleps,
integer tol_opt,
integer rank,
integer maxrank,
integer info,
logical islr )

Definition at line 1608 of file clr_core.F.

1611C This routine computes a Rank-Revealing QR factorization of a dense
1612C matrix A. The factorization is truncated when the absolute value of
1613C a diagonal coefficient of the R factor becomes smaller than a
1614C prescribed threshold TOLEPS. The resulting partial Q and R factors
1615C provide a rank-k approximation of the input matrix A with accuracy
1616C TOLEPS.
1617C
1618C This routine is obtained by merging the LAPACK
1619C (http://www.netlib.org/lapack/) CGEQP3 and CLAQPS routines and by
1620C applying a minor modification to the outer factorization loop in
1621C order to stop computations as soon as possible when the required
1622C accuracy is reached.
1623C
1624C Copyright (c) 1992-2017 The University of Tennessee and The
1625C University of Tennessee Research Foundation. All rights reserved.
1626C Copyright (c) 2000-2017 The University of California Berkeley.
1627C All rights reserved.
1628C Copyright (c) 2006-2017 The University of Colorado Denver.
1629C All rights reserved.
1630C
1631C Redistribution and use in source and binary forms, with or without
1632C modification, are permitted provided that the following conditions
1633C are met:
1634C
1635C - Redistributions of source code must retain the above copyright
1636C notice, this list of conditions and the following disclaimer.
1637C
1638C - Redistributions in binary form must reproduce the above
1639C copyright notice, this list of conditions and the following
1640C disclaimer listed in this license in the documentation and/or
1641C other materials provided with the distribution.
1642C
1643C - Neither the name of the copyright holders nor the names of its
1644C contributors may be used to endorse or promote products derived from
1645C this software without specific prior written permission.
1646C
1647C The copyright holders provide no reassurances that the source code
1648C provided does not infringe any patent, copyright, or any other
1649C intellectual property rights of third parties. The copyright holders
1650C disclaim any liability to any recipient for claims brought against
1651C recipient by any third party for infringement of that parties
1652C intellectual property rights.
1653C
1654C THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1655C "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1656C LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1657C A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1658C OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1659C SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1660C LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1661C DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1662C THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1663C (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1664C OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1665C
1666 IMPLICIT NONE
1667C
1668 INTEGER :: INFO, LDA, LDW, M, N, RANK, MAXRANK
1669C TOL_OPT controls the tolerance option used
1670C >0 => use 2-norm (||.||_X = ||.||_2)
1671C <0 => use Frobenius-norm (||.||_X = ||.||_F)
1672C Furthermore, depending on abs(TOL_OPT):
1673C 1 => absolute: ||B_{I(k+1:end),J(k+1:end)}||_X <= TOLEPS
1674C 2 => relative to 2-norm of the compressed block:
1675C ||B_{I(k+1:end),J(k+1:end)}||_X <= TOLEPS*||B_{I,J}||_2
1676C 3 => relative to the max of the 2-norms of the row and column diagonal blocks
1677C ||B_{I(k+1:end),J{k+1:end}}||_X <= TOLEPS*max(||B_{I,I}||_2,||B_{J,J}||_2)
1678C 4 => relative to the sqrt of product of the 2-norms of the row and column diagonal blocks
1679C ||B_{I(k+1:end),J{k+1:end}}||_X <= TOLEPS*sqrt(||B_{I,I}||_2*||B_{J,J}||_2)
1680 INTEGER :: TOL_OPT
1681 REAL :: TOLEPS
1682 INTEGER :: JPVT(*)
1683 REAL :: RWORK(*)
1684 COMPLEX :: A(LDA,*), TAU(*)
1685 COMPLEX :: WORK(LDW,*)
1686 LOGICAL :: ISLR
1687 REAL :: TOLEPS_EFF, TRUNC_ERR
1688 INTEGER, PARAMETER :: INB=1, inbmin=2
1689 INTEGER :: J, JB, MINMN, NB
1690 INTEGER :: OFFSET, ITEMP
1691 INTEGER :: LSTICC, PVT, K, RK
1692 REAL :: TEMP, TEMP2, TOL3Z
1693 COMPLEX :: AKK
1694 LOGICAL INADMISSIBLE
1695 REAL, PARAMETER :: RZERO=0.0e+0, rone=1.0e+0
1696 COMPLEX :: ZERO
1697 COMPLEX :: ONE
1698 parameter( one = ( 1.0e+0, 0.0e+0 ) )
1699 parameter( zero = ( 0.0e+0, 0.0e+0 ) )
1700 REAL :: slamch
1701 INTEGER :: ilaenv, isamax
1702 EXTERNAL :: isamax, slamch
1703 EXTERNAL cgeqrf, cunmqr, xerbla
1704 EXTERNAL ilaenv
1705 EXTERNAL cgemm, cgemv, clarfg, cswap
1706 REAL, EXTERNAL :: scnrm2
1707 REAL, EXTERNAL :: snrm2
1708 info = 0
1709 islr = .false.
1710 IF( m.LT.0 ) THEN
1711 info = -1
1712 ELSE IF( n.LT.0 ) THEN
1713 info = -2
1714 ELSE IF( lda.LT.max( 1, m ) ) THEN
1715 info = -4
1716 END IF
1717 IF( info.EQ.0 ) THEN
1718 IF( ldw.LT.n ) THEN
1719 info = -8
1720 END IF
1721 END IF
1722 IF( info.NE.0 ) THEN
1723 WRITE(*,999) -info
1724 RETURN
1725 END IF
1726 minmn = min(m,n)
1727 IF( minmn.EQ.0 ) THEN
1728 rank = 0
1729 RETURN
1730 END IF
1731 nb = ilaenv( inb, 'CGEQRF', ' ', m, n, -1, -1 )
1732 SELECT CASE(abs(tol_opt))
1733 CASE(1)
1734 toleps_eff = toleps
1735 CASE(2)
1736C TOLEPS_EFF will be computed at step K=1 below
1737 CASE DEFAULT
1738 write(*,*) 'Internal error in CMUMPS_TRUNCATED_RRQR: TOL_OPT =',
1739 & tol_opt
1740 CALL mumps_abort()
1741 END SELECT
1742 toleps_eff = toleps
1743C
1744C Avoid pointers (and TARGET attribute on RWORK/WORK)
1745C because of implicit interface. An implicit interface
1746C is needed to avoid intermediate array copies
1747C VN1 => RWORK(1:N)
1748C VN2 => RWORK(N+1:2*N)
1749C AUXV => WORK(1:LDW,1:1)
1750C F => WORK(1:LDW,2:NB+1)
1751C LDF = LDW
1752* Initialize partial column norms. The first N elements of work
1753* store the exact column norms.
1754 DO j = 1, n
1755C VN1( J ) = scnrm2( M, A( 1, J ), 1 )
1756 rwork( j ) = scnrm2( m, a( 1, j ), 1 )
1757C VN2( J ) = VN1( J )
1758 rwork( n + j ) = rwork( j )
1759 jpvt(j) = j
1760 END DO
1761 IF (tol_opt.LT.0) THEN
1762C Compute TRUNC_ERR for first step
1763C TRUNC_ERR = snrm2( N, VN1( 1 ), 1 )
1764 trunc_err = snrm2( n, rwork( 1 ), 1 )
1765 ENDIF
1766 offset = 0
1767 tol3z = sqrt(slamch('Epsilon'))
1768 DO
1769 jb = min(nb,minmn-offset)
1770 lsticc = 0
1771 k = 0
1772 DO
1773 IF(k.EQ.jb) EXIT
1774 k = k+1
1775 rk = offset+k
1776C PVT = ( RK-1 ) + ISAMAX( N-RK+1, VN1( RK ), 1 )
1777 pvt = ( rk-1 ) + isamax( n-rk+1, rwork( rk ), 1 )
1778 IF (rk.EQ.1) THEN
1779C IF (abs(TOL_OPT).EQ.2) TOLEPS_EFF = VN1(PVT)*TOLEPS
1780 IF (abs(tol_opt).EQ.2) toleps_eff = rwork(pvt)*toleps
1781 ENDIF
1782 IF (tol_opt.GT.0) THEN
1783C TRUNC_ERR = VN1(PVT)
1784 trunc_err = rwork(pvt)
1785C ELSE
1786C TRUNC_ERR has been already computed at previous step
1787 ENDIF
1788 IF(trunc_err.LT.toleps_eff) THEN
1789 rank = rk-1
1790 islr = .true.
1791 RETURN
1792 ENDIF
1793 inadmissible = (rk.GT.maxrank)
1794 IF (inadmissible) THEN
1795 rank = rk
1796 info = rk
1797 islr = .false.
1798 RETURN
1799 END IF
1800 IF( pvt.NE.rk ) THEN
1801 CALL cswap( m, a( 1, pvt ), 1, a( 1, rk ), 1 )
1802c CALL cswap( K-1, F( PVT-OFFSET, 1 ), LDF,
1803c & F( K, 1 ), LDF )
1804 CALL cswap( k-1, work( pvt-offset, 2 ), ldw,
1805 & work( k, 2 ), ldw )
1806 itemp = jpvt(pvt)
1807 jpvt(pvt) = jpvt(rk)
1808 jpvt(rk) = itemp
1809C VN1(PVT) = VN1(RK)
1810C VN2(PVT) = VN2(RK)
1811 rwork(pvt) = rwork(rk)
1812 rwork(n+pvt) = rwork(n+rk)
1813 END IF
1814* Apply previous Householder reflectors to column K:
1815* A(RK:M,RK) := A(RK:M,RK) - A(RK:M,OFFSET+1:RK-1)*F(K,1:K-1)**H.
1816 IF( k.GT.1 ) THEN
1817 DO j = 1, k-1
1818C F( K, J ) = CONJG( F( K, J ) )
1819 work( k, j+1 ) = conjg( work( k, j+1 ) )
1820 END DO
1821 CALL cgemv( 'No transpose', m-rk+1, k-1, -one,
1822C & A(RK,OFFSET+1), LDA, F(K,1), LDF,
1823 & a(rk,offset+1), lda, work(k,2), ldw,
1824 & one, a(rk,rk), 1 )
1825 DO j = 1, k - 1
1826C F( K, J ) = CONJG( F( K, J ) )
1827 work( k, j + 1 ) = conjg( work( k, j + 1 ) )
1828 END DO
1829 END IF
1830* Generate elementary reflector H(k).
1831 IF( rk.LT.m ) THEN
1832 CALL clarfg( m-rk+1, a(rk,rk), a(rk+1,rk), 1, tau(rk) )
1833 ELSE
1834 CALL clarfg( 1, a(rk,rk), a(rk,rk), 1, tau(rk) )
1835 END IF
1836 akk = a(rk,rk)
1837 a(rk,rk) = one
1838* Compute Kth column of F:
1839* F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)**H*A(RK:M,K).
1840 IF( rk.LT.n ) THEN
1841 CALL cgemv( 'Conjugate transpose', m-rk+1, n-rk, tau(rk),
1842 & a(rk,rk+1), lda, a(rk,rk), 1, zero,
1843C & F( K+1, K ), 1 )
1844 & work( k+1, k+1 ), 1 )
1845 END IF
1846* Padding F(1:K,K) with zeros.
1847 DO j = 1, k
1848C F( J, K ) = ZERO
1849 work( j, k+1 ) = zero
1850 END DO
1851* Incremental updating of F:
1852* F(1:N,K) := F(1:N-OFFSET,K) -
1853* tau(RK)*F(1:N,1:K-1)*A(RK:M,OFFSET+1:RK-1)**H*A(RK:M,RK).
1854 IF( k.GT.1 ) THEN
1855 CALL cgemv( 'Conjugate transpose', m-rk+1, k-1, -tau(rk),
1856 & a(rk,offset+1), lda, a(rk,rk), 1, zero,
1857 & work(1,1), 1 )
1858C & AUXV(1,1), 1 )
1859 CALL cgemv( 'no transpose', N-OFFSET, K-1, ONE,
1860 & WORK(1,2), LDW, WORK(1,1), 1, ONE, WORK(1,K+1), 1 )
1861C & F(1,1), LDF, AUXV(1,1), 1, ONE, F(1,K), 1 )
1862 END IF
1863* Update the current row of A:
1864* A(RK,RK+1:N) := A(RK,RK+1:N) - A(RK,OFFSET+1:RK)*F(K+1:N,1:K)**H.
1865.LT. IF( RKN ) THEN
1866 CALL cgemm( 'no transpose', 'conjugate transpose',
1867 & 1, N-RK,
1868C & K, -ONE, A( RK, OFFSET+1 ), LDA, F( K+1, 1 ), LDF,
1869 & K, -ONE, A( RK, OFFSET+1 ), LDA, WORK( K+1,2 ), LDW,
1870 & ONE, A( RK, RK+1 ), LDA )
1871 END IF
1872* Update partial column norms.
1873*
1874.LT. IF( RKMINMN ) THEN
1875 DO J = RK + 1, N
1876C IF( VN1( J ).NE.RZERO ) THEN
1877.NE. IF( RWORK( J )RZERO ) THEN
1878*
1879* NOTE: The following 4 lines follow from the analysis in
1880* Lapack Working Note 176.
1881*
1882C TEMP = ABS( A( RK, J ) ) / VN1( J )
1883 TEMP = ABS( A( RK, J ) ) / RWORK( J )
1884 TEMP = MAX( RZERO, ( RONE+TEMP )*( RONE-TEMP ) )
1885C TEMP2 = TEMP*( VN1( J ) / VN2( J ) )**2
1886 TEMP2 = TEMP*( RWORK( J ) / RWORK( N+J ) )**2
1887.LE. IF( TEMP2 TOL3Z ) THEN
1888C VN2( J ) = REAL( LSTICC )
1889 RWORK( N+J ) = REAL( LSTICC )
1890 LSTICC = J
1891 ELSE
1892C VN1( J ) = VN1( J )*SQRT( TEMP )
1893 RWORK( J ) = RWORK( J )*SQRT( TEMP )
1894 END IF
1895 END IF
1896 END DO
1897 END IF
1898 A( RK, RK ) = AKK
1899.NE. IF (LSTICC0) EXIT
1900.LT. IF (TOL_OPT0) THEN
1901C Compute TRUNC_ERR for next step
1902C TRUNC_ERR = snrm2( N-RK, VN1( RK+1 ), 1 )
1903 TRUNC_ERR = snrm2( N-RK, RWORK( RK+1 ), 1 )
1904 ENDIF
1905 END DO
1906* Apply the block reflector to the rest of the matrix:
1907* A(RK+1:M,RK+1:N) := A(RK+1:M,RK+1:N) -
1908* A(RK+1:M,OFFSET+1:RK)*F(K+1:N-OFFSET,1:K)**H.
1909.LT. IF( RKMIN(N,M) ) THEN
1910 CALL cgemm( 'no transpose', 'conjugate transpose', M-RK,
1911 & N-RK, K, -ONE, A(RK+1,OFFSET+1), LDA,
1912C & F(K+1,1), LDF, ONE, A(RK+1,RK+1), LDA )
1913 & WORK(K+1,2), LDW, ONE, A(RK+1,RK+1), LDA )
1914 END IF
1915* Recomputation of difficult columns.
1916.GT. DO WHILE( LSTICC0 )
1917C ITEMP = NINT( VN2( LSTICC ) )
1918 ITEMP = NINT( RWORK( N + LSTICC ) )
1919C VN1( LSTICC ) = scnrm2( M-RK, A( RK+1, LSTICC ), 1 )
1920 RWORK( LSTICC ) = scnrm2( M-RK, A( RK+1, LSTICC ), 1 )
1921*
1922* NOTE: The computation of RWORK( LSTICC ) relies on the fact that
1923* SNRM2 does not fail on vectors with norm below the value of
1924* SQRT(DLAMCH('S'))
1925*
1926C VN2( LSTICC ) = VN1( LSTICC )
1927 RWORK( N + LSTICC ) = RWORK( LSTICC )
1928 LSTICC = ITEMP
1929 END DO
1930.GE. IF(RKMINMN) EXIT
1931 OFFSET = RK
1932.LT. IF (TOL_OPT0) THEN
1933C Compute TRUNC_ERR for next step
1934C TRUNC_ERR = snrm2( N-RK, VN1( RK+1 ), 1 )
1935 TRUNC_ERR = snrm2( N-RK, RWORK( RK+1 ), 1 )
1936 ENDIF
1937 END DO
1938 RANK = RK
1939.NOT..GT. ISLR = (RKMAXRANK)
1940 RETURN
1941 999 FORMAT ('on entry to cmumps_truncated_rrqr, parameter number',
1942 & I2,' had an illegal value')
#define mumps_abort
Definition VE_Metis.h:25
subroutine cmumps_truncated_rrqr(m, n, a, lda, jpvt, tau, work, ldw, rwork, toleps, tol_opt, rank, maxrank, info, islr)
Definition clr_core.F:1611
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:162
subroutine xerbla(srname, info)
XERBLA
Definition xerbla.f:60
integer function isamax(n, sx, incx)
ISAMAX
Definition isamax.f:71
subroutine cgeqrf(m, n, a, lda, tau, work, lwork, info)
CGEQRF
Definition cgeqrf.f:146
subroutine clarfg(n, alpha, x, incx, tau)
CLARFG generates an elementary reflector (Householder matrix).
Definition clarfg.f:106
subroutine cunmqr(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
CUNMQR
Definition cunmqr.f:168
subroutine cswap(n, cx, incx, cy, incy)
CSWAP
Definition cswap.f:81
subroutine cgemv(trans, m, n, alpha, a, lda, x, incx, beta, y, incy)
CGEMV
Definition cgemv.f:158
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:187
real(wp) function snrm2(n, x, incx)
SNRM2
Definition snrm2.f90:89
real(wp) function scnrm2(n, x, incx)
SCNRM2
Definition scnrm2.f90:90
real function slamch(cmach)
SLAMCH
Definition slamch.f:68
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21