Functions/Subroutines
subroutine	pbdtrnv (icontxt, xdist, trans, n, nb, nz, x, incx, beta, y, incy, ixrow, ixcol, iyrow, iycol, work)
subroutine	pbdtr2a1 (icontxt, n, nb, nz, x, incx, beta, y, incy, intv)
subroutine	pbdtr2b1 (icontxt, trans, n, nb, nz, x, incx, beta, y, incy, jinx, jiny)

Function/Subroutine Documentation

◆ pbdtr2a1()

subroutine pbdtr2a1	(	integer	icontxt,
		integer	n,
		integer	nb,
		integer	nz,
		double precision, dimension( * )	x,
		integer	incx,
		double precision	beta,
		double precision, dimension( * )	y,
		integer	incy,
		integer	intv )

Definition at line 646 of file pbdtrnv.f.

*
*  -- PB-BLAS routine (version 2.1) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory.
*     April 28, 1996
*
*     .. Scalar Arguments ..
      INTEGER              ICONTXT, N, NB, NZ, INCX, INCY, INTV
      DOUBLE PRECISION     BETA
*     ..
*     .. Array Arguments ..
      DOUBLE PRECISION     X( * ), Y( * )
*     ..
*
*  Purpose
*  =======
*
*     y <== x
*     y is a scattered vector, copied from a condensed vector x.
*
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC            min
*     ..
*     .. External Functions ..
      INTEGER              ICEIL
      EXTERNAL             iceil
*     ..
*     .. External Subroutines ..
      EXTERNAL             pbdvecadd
*     ..
*     .. Parameters ..
      DOUBLE PRECISION     ONE
      parameter( one = 1.0d+0 )
*     ..
*     .. Local Variables ..
      INTEGER              IX, IY, JZ, K, ITER
*
      ix = 0
      iy = 0
      jz = nz
      iter = iceil( n+nz, intv )
*
      IF( iter.GT.1 ) THEN
         CALL pbdvecadd( icontxt, 'G', nb-jz, one, x(ix*incx+1), incx,
     $                   beta, y(iy*incy+1), incy )
         ix = ix + nb   - jz
         iy = iy + intv - jz
         jz = 0
*
         DO 10 k = 2, iter-1
            CALL pbdvecadd( icontxt, 'G', nb, one, x(ix*incx+1), incx,
     $                      beta, y(iy*incy+1), incy )
            ix = ix + nb
            iy = iy + intv
   10    CONTINUE
      END IF
*
      CALL pbdvecadd( icontxt, 'G', min( n-iy, nb-jz ), one,
     $                x(ix*incx+1), incx, beta, y(iy*incy+1), incy )
*
      RETURN
*
*     End of PBDTR2A1
*

◆ pbdtr2b1()

subroutine pbdtr2b1	(	integer	icontxt,
		character*1	trans,
		integer	n,
		integer	nb,
		integer	nz,
		double precision, dimension( * )	x,
		integer	incx,
		double precision	beta,
		double precision, dimension( * )	y,
		integer	incy,
		integer	jinx,
		integer	jiny )

Definition at line 718 of file pbdtrnv.f.

*
*  -- PB-BLAS routine (version 2.1) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory.
*     April 28, 1996
*
*     .. Scalar Arguments ..
      CHARACTER*1          TRANS
      INTEGER              ICONTXT, N, NB, NZ, INCX, INCY, JINX, JINY
      DOUBLE PRECISION     BETA
*     ..
*     .. Array Arguments ..
      DOUBLE PRECISION     X( * ), Y( * )
*     ..
*
*  Purpose
*  =======
*
*     y <== x + beta * y
*     y is a condensed vector, copied from a scattered vector x
*
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC            min
*     ..
*     .. External Functions ..
      INTEGER              ICEIL
      EXTERNAL             iceil
*     ..
*     .. External Subroutines ..
      EXTERNAL             pbdvecadd
*     ..
*     .. Parameters ..
      DOUBLE PRECISION     ONE
      parameter( one = 1.0d+0 )
*     ..
*     .. Local Variables ..
      INTEGER              IX, IY, JZ, K, ITER, LENX, LENY
*
      IF( jinx.EQ.1 .AND. jiny.EQ.1 ) THEN
         CALL pbdvecadd( icontxt, trans, n, one, x, incx, beta,
     $                   y, incy )
*
      ELSE
         ix   = 0
         iy   = 0
         jz   = nz
         lenx = nb * jinx
         leny = nb * jiny
         iter = iceil( n+nz, lenx )
*
         IF( iter.GT.1 ) THEN
            CALL pbdvecadd( icontxt, trans, nb-jz, one, x(ix*incx+1),
     $                      incx, beta, y(iy*incy+1), incy )
            ix = ix + lenx - jz
            iy = iy + leny - jz
            jz = 0
*
            DO 10 k = 2, iter-1
               CALL pbdvecadd( icontxt, trans, nb, one, x(ix*incx+1),
     $                         incx, beta, y(iy*incy+1), incy )
               ix = ix + lenx
               iy = iy + leny
   10       CONTINUE
         END IF
*
         CALL pbdvecadd( icontxt, trans, min( n-ix, nb-jz ), one,
     $                   x(ix*incx+1), incx, beta, y(iy*incy+1), incy )
      END IF
*
      RETURN
*
*     End of PBDTR2B1
*

◆ pbdtrnv()

subroutine pbdtrnv	(	integer	icontxt,
		character*1	xdist,
		character*1	trans,
		integer	n,
		integer	nb,
		integer	nz,
		double precision, dimension( * )	x,
		integer	incx,
		double precision	beta,
		double precision, dimension( * )	y,
		integer	incy,
		integer	ixrow,
		integer	ixcol,
		integer	iyrow,
		integer	iycol,
		double precision, dimension( * )	work )

Definition at line 1 of file pbdtrnv.f.

*
*  -- PB-BLAS routine (version 2.1) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory.
*     April 28, 1996
*
*     Jaeyoung Choi, Oak Ridge National Laboratory
*     Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
*     David Walker,  Oak Ridge National Laboratory
*
*     .. Scalar Arguments ..
      CHARACTER*1        TRANS, XDIST
      INTEGER            ICONTXT, INCX, INCY, IXCOL, IXROW, IYCOL,
     $                   IYROW, N, NB, NZ
      DOUBLE PRECISION   BETA
*     ..
*     .. Array Arguments ..
      DOUBLE PRECISION   WORK( * ), X( * ), Y( * )
*     ..
*
*  Purpose
*  =======
*
*  PBDTRNV transposes a column vector to row vector, or a row vector to
*  column vector by reallocating data distribution.
*
*     Y := X'
*
*  where X and Y are N vectors.
*
*  Parameters
*  ==========
*
*  ICONTXT (input) INTEGER
*          ICONTXT is the BLACS mechanism for partitioning communication
*          space.  A defining property of a context is that a message in
*          a context cannot be sent or received in another context.  The
*          BLACS context includes the definition of a grid, and each
*          process' coordinates in it.
*
*  XDIST   (input) CHARACTER*1
*          XDIST specifies whether X is a column vector or a row vector,
*
*            XDIST = 'C',  X is a column vector (distributed columnwise)
*            XDIST = 'R',  X is a row vector    (distributed rowwise)
*
*  TRANS   (input) CHARACTER*1
*          TRANS specifies whether the transposed format is transpose
*          or conjugate transpose.  If the vectors X and Y are real,
*          the argument is ignored.
*
*             TRANS = 'T',  transpose
*             TRANS = 'C',  conjugate transpose
*
*  N       (input) INTEGER
*          N specifies the (global) number of the vector X and the
*          vector Y.  N >= 0.
*
*  NB      (input) INTEGER
*          NB specifies the block size of vectors X and Y.  NB >= 0.
*
*  NZ      (input) INTEGER
*          NZ is the column offset to specify the column distance from
*          the beginning of the block to the first element of the
*          vector X, and the row offset to the first element of the
*          vector Y if XDIST = 'C'.
*          Otherwise, it is row offset to specify the row distance
*          from the beginning of the block to the first element of the
*          vector X, and the column offset to the first element of the
*          vector Y.  0 < NZ <= NB.
*
*  X       (input) DOUBLE PRECISION array of dimension  at least
*          ( 1 + (Np-1) * abs(INCX)) in IXCOL if XDIST = 'C', or
*          ( 1 + (Nq-1) * abs(INCX)) in IXROW if XDIST = 'R'.
*          The incremented array X must contain the vector X.
*
*  INCX    (input) INTEGER
*          INCX specifies the increment for the elements of X.
*          INCX <> 0.
*
*  BETA    (input) DOUBLE PRECISION
*          BETA specifies scaler beta.
*
*  Y       (input/output) DOUBLE PRECISION array of dimension at least
*          ( 1 + (Nq-1) * abs(INCY)) in IYROW if XDIST = 'C', or
*          ( 1 + (Np-1) * abs(INCY)) in IYCOL if XDIST = 'R', or
*          The incremented array Y must contain the vector Y.
*          Y will not be referenced if beta is zero.
*
*  INCY    (input) INTEGER
*          INCY specifies the increment for the elements of Y.
*          INCY <> 0.
*
*  IXROW   (input) INTEGER
*          IXROW specifies a row of the process template, which holds
*          the first element of the vector X. If X is a row vector and
*          all rows of processes have a copy of X, then set IXROW = -1.
*
*  IXCOL   (input) INTEGER
*          IXCOL specifies  a column of the process template,
*          which holds the first element of the vector X.  If  X is  a
*          column block and all columns of processes have a copy of X,
*          then set IXCOL = -1.
*
*  IYROW   (input) INTEGER
*          IYROW specifies the current row process which holds the
*          first element of the vector Y, which is transposed of X.
*          If X  is a column vector and the transposed  row vector Y is
*          distributed all rows of processes, set IYROW = -1.
*
*  IYCOL   (input) INTEGER
*          IYCOL specifies  the current column process  which holds
*          the first element of the vector Y, which is transposed of Y.
*          If X is a row block and the transposed column vector Y is
*          distributed all columns of processes, set IYCOL = -1.
*
*  WORK    (workspace) DOUBLE PRECISION array of dimension Size(WORK).
*          It needs extra working space of x**T or x**H.
*
*  Parameters Details
*  ==================
*
*  Nx      It is a local portion  of N owned by a process, where x is
*          replaced by  either p (=NPROW) or q (=NPCOL)).  The value is
*          determined by N, NB, NZ, x, and MI, where NB is a block size,
*          NZ is a offset from the beginning of the block,  and MI is a
*          row or column position  in a process template. Nx is equal
*          to  or less than Nx0 = CEIL( N+NZ, NB*x ) * NB.
*
*  Communication Scheme
*  ====================
*
*  The communication scheme of the routine is set to '1-tree', which is
*  fan-out.  (For details, see BLACS user's guide.)
*
*  Memory Requirement of WORK
*  ==========================
*
*  NN   = N + NZ
*  Npb  = CEIL( NN, NB*NPROW )
*  Nqb  = CEIL( NN, NB*NPCOL )
*  LCMP = LCM / NPROW
*  LCMQ = LCM / NPCOL
*
*   (1) XDIST = 'C'
*     (a) IXCOL != -1
*         Size(WORK) = CEIL(Nqb,LCMQ)*NB
*     (b) IXCOL = -1
*         Size(WORK) = CEIL(Nqb,LCMQ)*NB * MIN(LCMQ,CEIL(NN,NB))
*
*   (2) XDIST = 'R'
*     (a) IXROW != -1
*         Size(WORK) = CEIL(Npb,LCMP)*NB
*     (b) IXROW = -1
*         Size(WORK) = CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(NN,NB))
*
*  Notes
*  -----
*  More precise space can be computed as
*
*  CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(NN,NB,0,0,NPROW), NB, 0, 0, LCMP)
*  CEIL(Nqb,LCMQ)*NB => NUMROC( NUMROC(NN,NB,0,0,NPCOL), NB, 0, 0, LCMQ)
*
*  =====================================================================
*
*     .. Parameters ..
      DOUBLE PRECISION   ONE, ZERO
      parameter( one = 1.0d+0, zero = 0.0d+0 )
*     ..
*     .. Local Scalars ..
      LOGICAL            COLFORM, ROWFORM
      INTEGER            I, IDEX, IGD, INFO, JDEX, JYCOL, JYROW, JZ, KZ,
     $                   LCM, LCMP, LCMQ, MCCOL, MCROW, MRCOL, MRROW,
     $                   MYCOL, MYROW, NN, NP, NP0, NP1, NPCOL, NPROW,
     $                   NQ, NQ0, NQ1
      DOUBLE PRECISION   TBETA
*     ..
*     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            ILCM, ICEIL, NUMROC
      EXTERNAL           lsame, ilcm, iceil, numroc
*     ..
*     .. External Subroutines ..
      EXTERNAL           blacs_gridinfo, dgebr2d, dgebs2d, dgerv2d,
     $                   dgesd2d, pbdtr2a1, pbdtr2b1, pbdtrget,
     $                   pbdtrst1, pbdvecadd, pxerbla
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          max, min, mod
*     ..
*     .. Executable Statements ..
*
*     Quick return if possible.
*
      IF( n.EQ.0 ) RETURN
*
      CALL blacs_gridinfo( icontxt, nprow, npcol, myrow, mycol )
*
      colform = lsame( xdist, 'C' )
      rowform = lsame( xdist, 'R' )
*
*     Test the input parameters.
*
      info = 0
      IF( ( .NOT.colform ) .AND. ( .NOT.rowform ) ) THEN
         info = 2
      ELSE IF( n   .LT.0                          ) THEN
         info = 4
      ELSE IF( nb  .LT.1                          ) THEN
         info = 5
      ELSE IF( nz  .LT.0 .OR. nz.GE.nb            ) THEN
         info = 6
      ELSE IF( incx.EQ.0                          ) THEN
         info = 8
      ELSE IF( incy.EQ.0                          ) THEN
         info = 11
      ELSE IF( ixrow.LT.-1 .OR. ixrow.GE.nprow .OR.
     $       ( ixrow.EQ.-1 .AND. colform )        ) THEN
         info = 12
      ELSE IF( ixcol.LT.-1 .OR. ixcol.GE.npcol .OR.
     $       ( ixcol.EQ.-1 .AND. rowform )        ) THEN
         info = 13
      ELSE IF( iyrow.LT.-1 .OR. iyrow.GE.nprow .OR.
     $       ( iyrow.EQ.-1 .AND. rowform )        ) THEN
         info = 14
      ELSE IF( iycol.LT.-1 .OR. iycol.GE.npcol .OR.
     $       ( iycol.EQ.-1 .AND. colform )        ) THEN
         info = 15
      END IF
*
   10 CONTINUE
      IF( info.NE.0 ) THEN
         CALL pxerbla( icontxt, 'PBDTRNV ', info )
         RETURN
      END IF
*
*     Start the operations.
*
*     LCM : the least common multiple of NPROW and NPCOL
*
      lcm  = ilcm( nprow, npcol )
      lcmp = lcm   / nprow
      lcmq = lcm   / npcol
      igd  = npcol / lcmp
      nn   = n + nz
*
*     When x is a column vector
*
      IF( colform ) THEN
*
*       Form  y <== x'  ( x is a column vector )
*
*                                        ||
*                                        ||
*            _____________               ||
*            -----(y)-----      <==     (x)
*                                        ||
*                                        ||
*                                        ||
*
        IF(      ixrow.LT.0  .OR. ixrow.GE.nprow ) THEN
          info = 12
        ELSE IF( ixcol.LT.-1 .OR. ixcol.GE.npcol ) THEN
          info = 13
        ELSE IF( iyrow.LT.-1 .OR. iyrow.GE.nprow ) THEN
          info = 14
        ELSE IF( iycol.LT.0  .OR. iycol.GE.npcol ) THEN
          info = 15
        END IF
        IF( info.NE.0 ) GO TO 10
*
*       MRROW : row relative position in template from IXROW
*       MRCOL : column relative position in template from IYCOL
*
        mrrow = mod( nprow+myrow-ixrow, nprow )
        mrcol = mod( npcol+mycol-iycol, npcol )
        jyrow = iyrow
        IF( iyrow.EQ.-1 ) jyrow = ixrow
*
        np  = numroc( nn, nb, myrow, ixrow, nprow )
        IF( mrrow.EQ.0 ) np = np - nz
        nq  = numroc( nn, nb, mycol, iycol, npcol )
        IF( mrcol.EQ.0 ) nq = nq - nz
        nq0 = numroc( numroc(nn, nb, 0, 0, npcol), nb, 0, 0, lcmq )
*
*       When a column process of IXCOL has a column block A,
*
        IF( ixcol .GE. 0 ) THEN
          tbeta = zero
          IF( myrow.EQ.jyrow ) tbeta = beta
          kz = nz
*
          DO 20 i = 0, min( lcm, iceil(nn,nb) ) - 1
            mcrow = mod( mod(i, nprow) + ixrow, nprow )
            mccol = mod( mod(i, npcol) + iycol, npcol )
            IF( lcmq.EQ.1 )  nq0 = numroc( nn, nb, i, 0, npcol )
            jdex  = (i/npcol) * nb
            IF( mrcol.EQ.0 ) jdex = max(0, jdex-nz)
*
*           A source node copies the blocks to WORK, and send it
*
            IF( myrow.EQ.mcrow .AND. mycol.EQ.ixcol ) THEN
*
*             The source node is a destination node
*
              idex = (i/nprow) * nb
              IF( mrrow.EQ.0 ) idex = max( 0, idex-nz )
              IF( myrow.EQ.jyrow .AND. mycol.EQ.mccol ) THEN
                CALL pbdtr2b1( icontxt, trans, np-idex, nb, kz,
     $                          x(idex*incx+1), incx, tbeta,
     $                          y(jdex*incy+1), incy, lcmp, lcmq )
*
*             The source node sends blocks to a destination node
*
              ELSE
                CALL pbdtr2b1( icontxt, trans, np-idex, nb, kz,
     $                         x(idex*incx+1), incx, zero, work, 1,
     $                         lcmp, 1 )
                CALL dgesd2d( icontxt, 1, nq0-kz, work, 1,
     $                        jyrow, mccol )
              END IF
*
*           A destination node receives the copied vector
*
            ELSE IF( myrow.EQ.jyrow .AND. mycol.EQ.mccol ) THEN
              IF( lcmq.EQ.1 .AND. tbeta.EQ.zero ) THEN
                CALL dgerv2d( icontxt, 1, nq0-kz, y, incy,
     $                        mcrow, ixcol )
              ELSE
                CALL dgerv2d( icontxt, 1, nq0-kz, work, 1,
     $                        mcrow, ixcol )
                CALL pbdtr2a1( icontxt, nq-jdex, nb, kz, work, 1, tbeta,
     $                         y(jdex*incy+1), incy, lcmq*nb )
              END IF
            END IF
            kz = 0
   20     CONTINUE
*
*         Broadcast a row block of WORK in each column of template
*
          IF( iyrow.EQ.-1 ) THEN
            IF( myrow.EQ.jyrow ) THEN
              CALL dgebs2d( icontxt, 'Col', '1-tree', 1, nq, y, incy )
            ELSE
              CALL dgebr2d( icontxt, 'Col', '1-tree', 1, nq, y, incy,
     $                     jyrow, mycol )
             END IF
          END IF
*
*       When all column procesors have a copy of the column block A,
*
        ELSE
          IF( lcmq.EQ.1 ) nq0 = nq
*
*         Processors, which have diagonal blocks of X, copy them to
*         WORK array in transposed form
*
          kz = 0
          IF( mrrow.EQ.0 ) kz = nz
          jz = 0
          IF( mrrow.EQ.0 .AND. mycol.EQ.iycol ) jz = nz
*
          DO 30 i = 0, lcmp - 1
            IF( mrcol.EQ.mod(nprow*i+mrrow, npcol) ) THEN
              idex = max( 0, i*nb-kz )
              IF( lcmq.EQ.1 .AND. (iyrow.EQ.-1.OR.iyrow.EQ.myrow) ) THEN
                 CALL pbdtr2b1( icontxt, trans, np-idex, nb, jz,
     $                          x(idex*incx+1), incx, beta, y, incy,
     $                          lcmp, 1 )
              ELSE
                 CALL pbdtr2b1( icontxt, trans, np-idex, nb, jz,
     $                          x(idex*incx+1), incx, zero, work, 1,
     $                          lcmp, 1 )
              END IF
            END IF
   30     CONTINUE
*
*         Get diagonal blocks of A for each column of the template
*
          mcrow = mod( mod(mrcol, nprow) + ixrow, nprow )
          IF( lcmq.GT.1 ) THEN
            mccol = mod( npcol+mycol-iycol, npcol )
            CALL pbdtrget( icontxt, 'Row', 1, nq0, iceil( nn, nb ),
     $                     work, 1, mcrow, mccol, igd, myrow, mycol,
     $                     nprow, npcol )
          END IF
*
*         Broadcast a row block of WORK in every row of template
*
          IF( iyrow.EQ.-1 ) THEN
            IF( myrow.EQ.mcrow ) THEN
              IF( lcmq.GT.1 ) THEN
                kz = 0
                IF( mycol.EQ.iycol ) kz = nz
                CALL pbdtrst1( icontxt, 'Row', nq, nb, kz, work, 1,
     $                         beta, y, incy, lcmp, lcmq, nq0 )
              END IF
              CALL dgebs2d( icontxt, 'Col', '1-tree', 1, nq, y, incy )
            ELSE
              CALL dgebr2d( icontxt, 'Col', '1-tree', 1, nq, y, incy,
     $                      mcrow, mycol )
            END IF
*
*         Send a row block of WORK to the destination row
*
          ELSE
            IF( lcmq.EQ.1 ) THEN
              IF( myrow.EQ.mcrow ) THEN
                IF( myrow.NE.iyrow )
     $            CALL dgesd2d( icontxt, 1, nq0, work, 1, iyrow, mycol )
              ELSE IF( myrow.EQ.iyrow ) THEN
                IF( beta.EQ.zero ) THEN
                  CALL dgerv2d( icontxt, 1, nq0, y, incy, mcrow, mycol )
                ELSE
                  CALL dgerv2d( icontxt, 1, nq0, work, 1, mcrow, mycol )
                  CALL pbdvecadd( icontxt, 'G', nq0, one, work, 1,
     $                            beta, y, incy )
                END IF
              END IF
*
            ELSE
              nq1 = nq0 * min( lcmq, max( 0, iceil(nn,nb)-mccol ) )
              IF( myrow.EQ.mcrow ) THEN
                IF( myrow.NE.iyrow )
     $            CALL dgesd2d( icontxt, 1, nq1, work, 1, iyrow, mycol )
              ELSE IF( myrow.EQ.iyrow ) THEN
                CALL dgerv2d( icontxt, 1, nq1, work, 1, mcrow, mycol )
              END IF
*
              IF( myrow.EQ.iyrow ) THEN
                kz = 0
                IF( mycol.EQ.iycol ) kz = nz
                CALL pbdtrst1( icontxt, 'Row', nq, nb, kz, work, 1,
     $                         beta, y, incy, lcmp, lcmq, nq0 )
              END IF
            END IF
          END IF
        END IF
*
*     When x is a row vector
*
      ELSE
*
*       Form  y <== x'  ( x is a row block )
*
*           ||
*           ||
*           ||               _____________
*          (y)      <==      -----(x)-----
*           ||
*           ||
*           ||
*
        IF(      ixrow.LT.-1 .OR. ixrow.GE.nprow ) THEN
          info = 12
        ELSE IF( ixcol.LT.0  .OR. ixcol.GE.npcol ) THEN
          info = 13
        ELSE IF( iyrow.LT.0  .OR. iyrow.GE.nprow ) THEN
          info = 14
        ELSE IF( iycol.LT.-1 .OR. iycol.GE.npcol ) THEN
          info = 15
        END IF
        IF( info.NE.0 ) GO TO 10
*
*       MRROW : row relative position in template from IYROW
*       MRCOL : column relative position in template from IXCOL
*
        mrrow = mod( nprow+myrow-iyrow, nprow )
        mrcol = mod( npcol+mycol-ixcol, npcol )
        jycol = iycol
        IF( iycol.EQ.-1 ) jycol = ixcol
*
        np  = numroc( nn, nb, myrow, iyrow, nprow )
        IF( mrrow.EQ.0 ) np = np - nz
        nq  = numroc( nn, nb, mycol, ixcol, npcol )
        IF( mrcol.EQ.0 ) nq = nq - nz
        np0 = numroc( numroc(nn, nb, 0, 0, nprow), nb, 0, 0, lcmp )
*
*       When a row process of IXROW has a row block A,
*
        IF( ixrow .GE. 0 ) THEN
          tbeta = zero
          IF( mycol.EQ.jycol ) tbeta = beta
          kz = nz
*
          DO 40 i = 0, min( lcm, iceil(nn,nb) ) - 1
            mcrow = mod( mod(i, nprow) + iyrow, nprow )
            mccol = mod( mod(i, npcol) + ixcol, npcol )
            IF( lcmp.EQ.1 ) np0 = numroc( nn, nb, i, 0, nprow )
            jdex  = (i/nprow) * nb
            IF( mrrow.EQ.0 ) jdex = max(0, jdex-nz)
*
*           A source node copies the blocks to WORK, and send it
*
            IF( myrow.EQ.ixrow .AND. mycol.EQ.mccol ) THEN
*
*             The source node is a destination node
*
              idex = (i/npcol) * nb
              IF( mrcol.EQ.0 ) idex = max( 0, idex-nz )
              IF( myrow.EQ.mcrow .AND. mycol.EQ.jycol ) THEN
                CALL pbdtr2b1( icontxt, trans, nq-idex, nb, kz,
     $                         x(idex*incx+1), incx, tbeta,
     $                         y(jdex*incy+1), incy, lcmq, lcmp )
*
*             The source node sends blocks to a destination node
*
              ELSE
                CALL pbdtr2b1( icontxt, trans, nq-idex, nb, kz,
     $                         x(idex*incx+1), incx, zero, work, 1,
     $                         lcmq, 1 )
                CALL dgesd2d( icontxt, 1, np0-kz, work, 1,
     $                        mcrow, jycol )
              END IF
*
*           A destination node receives the copied blocks
*
            ELSE IF( myrow.EQ.mcrow .AND. mycol.EQ.jycol ) THEN
              IF( lcmp.EQ.1 .AND. tbeta.EQ.zero ) THEN
                CALL dgerv2d( icontxt, 1, np0-kz, y, incy,
     $                        ixrow, mccol )
              ELSE
                CALL dgerv2d( icontxt, 1, np0-kz, work, 1,
     $                        ixrow, mccol )
                CALL pbdtr2a1( icontxt, np-jdex, nb, kz, work, 1, tbeta,
     $                         y(jdex*incy+1), incy, lcmp*nb )
              END IF
            END IF
            kz = 0
   40     CONTINUE
*
*         Broadcast a column vector Y in each row of template
*
          IF( iycol.EQ.-1 ) THEN
            IF( mycol.EQ.jycol ) THEN
              CALL dgebs2d( icontxt, 'Row', '1-tree', 1, np, y, incy )
            ELSE
              CALL dgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,
     $                      myrow, jycol )
            END IF
          END IF
*
*       When all row procesors have a copy of the row block A,
*
        ELSE
          IF( lcmp.EQ.1 ) np0 = np
*
*         Processors, which have diagonal blocks of A, copy them to
*         WORK array in transposed form
*
          kz = 0
          IF( mrcol.EQ.0 ) kz = nz
          jz = 0
          IF( mrcol.EQ.0 .AND. myrow.EQ.iyrow ) jz = nz
*
          DO 50 i = 0, lcmq-1
            IF( mrrow.EQ.mod(npcol*i+mrcol, nprow) ) THEN
              idex = max( 0, i*nb-kz )
              IF( lcmp.EQ.1 .AND. (iycol.EQ.-1.OR.iycol.EQ.mycol) ) THEN
                CALL pbdtr2b1( icontxt, trans, nq-idex, nb, jz,
     $                          x(idex*incx+1), incx, beta, y, incy,
     $                          lcmq, 1 )
              ELSE
                CALL pbdtr2b1( icontxt, trans, nq-idex, nb, jz,
     $                         x(idex*incx+1), incx, zero, work, 1,
     $                         lcmq, 1 )
              END IF
            END IF
   50     CONTINUE
*
*         Get diagonal blocks of A for each row of the template
*
          mccol = mod( mod(mrrow, npcol) + ixcol, npcol )
          IF( lcmp.GT.1 ) THEN
            mcrow = mod( nprow+myrow-iyrow, nprow )
            CALL pbdtrget( icontxt, 'Col', 1, np0, iceil( nn, nb ),
     $                     work, 1, mcrow, mccol, igd, myrow, mycol,
     $                     nprow, npcol )
          END IF
*
*         Broadcast a column block of WORK in every column of template
*
          IF( iycol.EQ.-1 ) THEN
            IF( mycol.EQ.mccol ) THEN
              IF( lcmp.GT.1 ) THEN
                kz = 0
                IF( myrow.EQ.iyrow ) kz = nz
                CALL pbdtrst1( icontxt, 'Col', np, nb, kz, work, 1,
     $                         beta, y, incy, lcmp, lcmq, np0 )
              END IF
              CALL dgebs2d( icontxt, 'Row', '1-tree', 1, np, y, incy )
            ELSE
              CALL dgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,
     $                      myrow, mccol )
            END IF
*
*         Send a column block of WORK to the destination column
*
          ELSE
            IF( lcmp.EQ.1 ) THEN
              IF( mycol.EQ.mccol ) THEN
                IF( mycol.NE.iycol )
     $            CALL dgesd2d( icontxt, 1, np, work, 1, myrow, iycol )
              ELSE IF( mycol.EQ.iycol ) THEN
                IF( beta.EQ.zero ) THEN
                  CALL dgerv2d( icontxt, 1, np, y, incy, myrow, mccol )
                ELSE
                  CALL dgerv2d( icontxt, 1, np, work, 1, myrow, mccol )
                  CALL pbdvecadd( icontxt, 'G', np, one, work, 1, beta,
     $                            y, incy )
                END IF
              END IF
*
            ELSE
              np1 = np0 * min( lcmp, max( 0, iceil(nn,nb)-mcrow ) )
              IF( mycol.EQ.mccol ) THEN
                IF( mycol.NE.iycol )
     $            CALL dgesd2d( icontxt, 1, np1, work, 1, myrow, iycol )
              ELSE IF( mycol.EQ.iycol ) THEN
                CALL dgerv2d( icontxt, 1, np1, work, 1, myrow, mccol )
              END IF
*
              IF( mycol.EQ.iycol ) THEN
                kz = 0
                IF( myrow.EQ.iyrow ) kz = nz
                CALL pbdtrst1( icontxt, 'Col', np, nb, kz, work, 1,
     $                         beta, y, incy, lcmp, lcmq, np0 )
              END IF
            END IF
          END IF
        END IF
      END IF
*
      RETURN
*
*     End of PBDTRNV
*

OpenRadioss 2025.1.11 OpenRadioss project

Functions/Subroutines

Function/Subroutine Documentation

◆ pbdtr2a1()

◆ pbdtr2b1()

◆ pbdtrnv()