OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pbstran.f File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine pbstran (icontxt, adist, trans, m, n, nb, a, lda, beta, c, ldc, iarow, iacol, icrow, iccol, work)
subroutine pbstr2at (icontxt, adist, trans, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq)
subroutine pbstr2bt (icontxt, adist, trans, m, n, nb, a, lda, beta, b, ldb, intv)
subroutine pbstr2af (icontxt, adist, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq, nint)

Function/Subroutine Documentation

◆ pbstr2af()

subroutine pbstr2af ( integer icontxt,
character*1 adist,
integer m,
integer n,
integer nb,
real, dimension( lda, * ) a,
integer lda,
real beta,
real, dimension( ldb, * ) b,
integer ldb,
integer lcmp,
integer lcmq,
integer nint )

Definition at line 790 of file pbstran.f.

792*
793* -- PB-BLAS routine (version 2.1) --
794* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
795* April 28, 1996
796*
797* .. Scalar Arguments ..
798 CHARACTER*1 ADIST
799 INTEGER ICONTXT, M, N, NB, LDA, LDB, LCMP, LCMQ, NINT
800 REAL BETA
801* ..
802* .. Array Arguments ..
803 REAL A( LDA, * ), B( LDB, * )
804* ..
805*
806* Purpose
807* =======
808*
809* PBSTR2AF forms T <== A + BETA*T, where T is a scattered block
810* row (or column) copied from a (condensed) block column (or row) of A
811*
812* =====================================================================
813*
814* .. Parameters ..
815 REAL ONE
816 parameter( one = 1.0e+0 )
817* ..
818* .. Local Scalars ..
819 INTEGER JA, JB, K, INTV
820* ..
821* .. External Functions ..
822 LOGICAL LSAME
823 INTEGER ICEIL
824 EXTERNAL lsame, iceil
825* ..
826* .. Intrinsic Functions ..
827 INTRINSIC min
828* ..
829* .. Executable Statements ..
830*
831 IF( lsame( adist, 'R' ) ) THEN
832 intv = nb * lcmq
833 ja = 1
834 jb = 1
835 DO 10 k = 1, iceil( nint, nb )
836 CALL pbsmatadd( icontxt, 'G', m, min( n-jb+1, nb ), one,
837 $ a(1,ja), lda, beta, b(1,jb), ldb )
838 ja = ja + nb
839 jb = jb + intv
840 10 CONTINUE
841*
842* if( LSAME( ADIST, 'C' ) ) then
843*
844 ELSE
845 intv = nb * lcmp
846 ja = 1
847 jb = 1
848 DO 20 k = 1, iceil( nint, nb )
849 CALL pbsmatadd( icontxt, 'G', min( m-jb+1, nb ), n, one,
850 $ a(ja,1), lda, beta, b(jb,1), ldb )
851 ja = ja + nb
852 jb = jb + intv
853 20 CONTINUE
854 END IF
855*
856 RETURN
857*
858* End of PBSTR2AF
859*
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
integer function iceil(inum, idenom)
Definition iceil.f:2
#define min(a, b)
Definition macros.h:20
subroutine pbsmatadd(icontxt, mode, m, n, alpha, a, lda, beta, b, ldb)
Definition pbsmatadd.f:3

◆ pbstr2at()

subroutine pbstr2at ( integer icontxt,
character*1 adist,
character*1 trans,
integer m,
integer n,
integer nb,
real, dimension( lda, * ) a,
integer lda,
real beta,
real, dimension( ldb, * ) b,
integer ldb,
integer lcmp,
integer lcmq )

Definition at line 612 of file pbstran.f.

614*
615* -- PB-BLAS routine (version 2.1) --
616* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
617* April 28, 1996
618*
619* .. Scalar Arguments ..
620 CHARACTER*1 ADIST, TRANS
621 INTEGER ICONTXT, LCMP, LCMQ, LDA, LDB, M, N, NB
622 REAL BETA
623* ..
624* .. Array Arguments ..
625 REAL A( LDA, * ), B( LDB, * )
626* ..
627*
628* Purpose
629* =======
630*
631* PBSTR2AT forms B <== A^T + beta*B, or A^C + beta*B
632* B is a ((conjugate) transposed) scattered block row (or column),
633* copied from a scattered block column (or row) of A
634*
635* =====================================================================
636*
637* .. Parameters ..
638 REAL ONE
639 parameter( one = 1.0e+0 )
640* ..
641* .. Local Scalars ..
642 INTEGER IA, IB, K, INTV, JNTV
643* ..
644* .. External Subroutines ..
645 EXTERNAL pbsmatadd
646* ..
647* .. External Functions ..
648 LOGICAL LSAME
649 INTEGER ICEIL
650 EXTERNAL lsame, iceil
651* ..
652* .. Intrinsic Functions ..
653 INTRINSIC min
654* ..
655* .. Excutable Statements ..
656*
657 IF( lcmp.EQ.lcmq ) THEN
658 CALL pbsmatadd( icontxt, trans, n, m, one, a, lda, beta, b,
659 $ ldb )
660*
661 ELSE
662*
663* If A is a column block ( ADIST = 'C' ),
664*
665 IF( lsame( adist, 'C' ) ) THEN
666 intv = lcmp * nb
667 jntv = lcmq * nb
668 ia = 1
669 ib = 1
670 DO 10 k = 1, iceil( m, intv )
671 CALL pbsmatadd( icontxt, trans, n, min( m-ia+1, nb ),
672 $ one, a(ia,1), lda, beta, b(1,ib), ldb )
673 ia = ia + intv
674 ib = ib + jntv
675 10 CONTINUE
676*
677* If A is a row block ( ADIST = 'R' ),
678*
679 ELSE
680 intv = lcmp * nb
681 jntv = lcmq * nb
682 ia = 1
683 ib = 1
684 DO 20 k = 1, iceil( n, jntv )
685 CALL pbsmatadd( icontxt, trans, min( n-ia+1, nb ), m,
686 $ one, a(1,ia), lda, beta, b(ib,1), ldb )
687 ia = ia + jntv
688 ib = ib + intv
689 20 CONTINUE
690 END IF
691 END IF
692*
693 RETURN
694*
695* End of PBSTR2AT
696*

◆ pbstr2bt()

subroutine pbstr2bt ( integer icontxt,
character*1 adist,
character*1 trans,
integer m,
integer n,
integer nb,
real, dimension( lda, * ) a,
integer lda,
real beta,
real, dimension( ldb, * ) b,
integer ldb,
integer intv )

Definition at line 703 of file pbstran.f.

705*
706* -- PB-BLAS routine (version 2.1) --
707* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
708* April 28, 1996
709*
710* .. Scalar Arguments ..
711 CHARACTER*1 ADIST, TRANS
712 INTEGER ICONTXT, INTV, LDA, LDB, M, N, NB
713 REAL BETA
714* ..
715* .. Array Arguments ..
716 REAL A( LDA, * ), B( LDB, * )
717* ..
718*
719* Purpose
720* =======
721*
722* PBSTR2BT forms T <== A^T + beta*T or A^C + beta*T, where T is a
723* ((conjugate) transposed) condensed block row (or column), copied from
724* a scattered block column (or row) of A
725*
726* =====================================================================
727*
728* .. Parameters ..
729 REAL ONE
730 parameter( one = 1.0e+0 )
731* ..
732* .. Local Scalars ..
733 INTEGER IA, IB, K
734* ..
735* .. External Functions ..
736 LOGICAL LSAME
737 INTEGER ICEIL
738 EXTERNAL lsame, iceil
739* ..
740* .. External Subroutines ..
741 EXTERNAL pbsmatadd
742* ..
743* .. Intrinsic Functions ..
744 INTRINSIC min
745* ..
746* .. Excutable Statements ..
747*
748 IF( intv.EQ.nb ) THEN
749 CALL pbsmatadd( icontxt, trans, n, m, one, a, lda, beta, b,
750 $ ldb )
751*
752 ELSE
753*
754* If A is a column block ( ADIST = 'C' ),
755*
756 IF( lsame( adist, 'C' ) ) THEN
757 ia = 1
758 ib = 1
759 DO 10 k = 1, iceil( m, intv )
760 CALL pbsmatadd( icontxt, trans, n, min( m-ia+1, nb ),
761 $ one, a(ia,1), lda, beta, b(1,ib), ldb )
762 ia = ia + intv
763 ib = ib + nb
764 10 CONTINUE
765*
766* If A is a row block (ADIST = 'R'),
767*
768 ELSE
769 ia = 1
770 ib = 1
771 DO 20 k = 1, iceil( n, intv )
772 CALL pbsmatadd( icontxt, trans, min( n-ia+1, nb ), m,
773 $ one, a(1,ia), lda, beta, b(ib,1), ldb )
774 ia = ia + intv
775 ib = ib + nb
776 20 CONTINUE
777 END IF
778 END IF
779*
780 RETURN
781*
782* End of PBSTR2BT
783*

◆ pbstran()

subroutine pbstran ( integer icontxt,
character*1 adist,
character*1 trans,
integer m,
integer n,
integer nb,
real, dimension( lda, * ) a,
integer lda,
real beta,
real, dimension( ldc, * ) c,
integer ldc,
integer iarow,
integer iacol,
integer icrow,
integer iccol,
real, dimension( * ) work )

Definition at line 1 of file pbstran.f.

3*
4* -- PB-BLAS routine (version 2.1) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
6* April 28, 1996
7*
8* Jaeyoung Choi, Oak Ridge National Laboratory
9* Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
10* David Walker, Oak Ridge National Laboratory
11*
12* .. Scalar Arguments ..
13 CHARACTER*1 ADIST, TRANS
14 INTEGER IACOL, IAROW, ICCOL, ICONTXT, ICROW, LDA, LDC,
15 $ M, N, NB
16 REAL BETA
17* ..
18* .. Array Arguments ..
19 REAL A( LDA, * ), C( LDC, * ), WORK( * )
20* ..
21*
22* Purpose
23* =======
24*
25* PBSTRAN transposes a column block to row block, or a row block to
26* column block by reallocating data distribution.
27*
28* C := A^T + beta*C, or C := A^C + beta*C
29*
30* where A is an M-by-N matrix and C is an N-by-M matrix, and the size
31* of M or N is limited to its block size NB.
32*
33* The first elements of the matrices A, and C should be located at
34* the beginnings of their first blocks. (not the middle of the blocks.)
35*
36* Parameters
37* ==========
38*
39* ICONTXT (input) INTEGER
40* ICONTXT is the BLACS mechanism for partitioning communication
41* space. A defining property of a context is that a message in
42* a context cannot be sent or received in another context. The
43* BLACS context includes the definition of a grid, and each
44* process' coordinates in it.
45*
46* ADIST - (input) CHARACTER*1
47* ADIST specifies whether A is a column block or a row block.
48*
49* ADIST = 'C', A is a column block
50* ADIST = 'R', A is a row block
51*
52* TRANS - (input) CHARACTER*1
53* TRANS specifies whether the transposed format is transpose
54* or conjugate transpose. If the matrices A and C are real,
55* the argument is ignored.
56*
57* TRANS = 'T', transpose
58* TRANS = 'C', conjugate transpose
59*
60* M - (input) INTEGER
61* M specifies the (global) number of rows of the matrix (block
62* column or block row) A and of columns of the matrix C.
63* M >= 0.
64*
65* N - (input) INTEGER
66* N specifies the (global) number of columns of the matrix
67* (block column or block row) A and of columns of the matrix
68* C. N >= 0.
69*
70* NB - (input) INTEGER
71* NB specifies the column block size of the matrix A and the
72* row block size of the matrix C when ADIST = 'C'. Otherwise,
73* it specifies the row block size of the matrix A and the
74* column block size of the matrix C. NB >= 1.
75*
76* A (input) REAL array of DIMENSION ( LDA, Lx ),
77* where Lx is N when ADIST = 'C', or Nq when ADIST = 'R'.
78* Before entry with ADIST = 'C', the leading Mp by N part of
79* the array A must contain the matrix A, otherwise the leading
80* M by Nq part of the array A must contain the matrix A. See
81* parameter details for the values of Mp and Nq.
82*
83* LDA (input) INTEGER
84* LDA specifies the leading dimension of (local) A as declared
85* in the calling (sub) program. LDA >= MAX(1,Mp) when
86* ADIST = 'C', or LDA >= MAX(1,M) otherwise.
87*
88* BETA (input) REAL
89* BETA specifies scaler beta.
90*
91* C (input/output) REAL array of DIMENSION ( LDC, Lx ),
92* where Lx is Mq when ADIST = 'C', or N when ADIST = 'R'.
93* If ADIST = 'C', the leading N-by-Mq part of the array C
94* contains the (local) matrix C, otherwise the leading
95* Np-by-M part of the array C must contain the (local) matrix
96* C. C will not be referenced if beta is zero.
97*
98* LDC (input) INTEGER
99* LDC specifies the leading dimension of (local) C as declared
100* in the calling (sub) program. LDC >= MAX(1,N) when ADIST='C',
101* or LDC >= MAX(1,Np) otherwise.
102*
103* IAROW (input) INTEGER
104* IAROW specifies a row of the process template,
105* which holds the first block of the matrix A. If A is a row
106* of blocks (ADIST = 'R') and all rows of processes have a copy
107* of A, then set IAROW = -1.
108*
109* IACOL (input) INTEGER
110* IACOL specifies a column of the process template,
111* which holds the first block of the matrix A. If A is a
112* column of blocks (ADIST = 'C') and all columns of processes
113* have a copy of A, then set IACOL = -1.
114*
115* ICROW (input) INTEGER
116* ICROW specifies the current row process which holds
117* the first block of the matrix C, which is transposed of A.
118* If C is a row of blocks (ADIST = 'C') and the transposed
119* row block C is distributed all rows of processes, set
120* ICROW = -1.
121*
122* ICCOL (input) INTEGER
123* ICCOL specifies the current column process which holds
124* the first block of the matrix C, which is transposed of A.
125* If C is a column of blocks (ADIST = 'R') and the transposed
126* column block C is distributed all columns of processes,
127* set ICCOL = -1.
128*
129* WORK (workspace) REAL array of dimension Size(WORK).
130* It needs extra working space of A'.
131*
132* Parameters Details
133* ==================
134*
135* Lx It is a local portion of L owned by a process, (L is
136* replaced by M, or N, and x is replaced by either p (=NPROW)
137* or q (=NPCOL)). The value is determined by L, LB, x, and
138* MI, where LB is a block size and MI is a row or column
139* position in a process template. Lx is equal to or less
140* than Lx0 = CEIL( L, LB*x ) * LB.
141*
142* Communication Scheme
143* ====================
144*
145* The communication scheme of the routine is set to '1-tree', which is
146* fan-out. (For details, see BLACS user's guide.)
147*
148* Memory Requirement of WORK
149* ==========================
150*
151* Mqb = CEIL( M, NB*NPCOL )
152* Npb = CEIL( N, NB*NPROW )
153* LCMQ = LCM / NPCOL
154* LCMP = LCM / NPROW
155*
156* (1) ADIST = 'C'
157* (a) IACOL != -1
158* Size(WORK) = N * CEIL(Mqb,LCMQ)*NB
159* (b) IACOL = -1
160* Size(WORK) = N * CEIL(Mqb,LCMQ)*NB * MIN(LCMQ,CEIL(M,NB))
161*
162* (2) ADIST = 'R'
163* (a) IAROW != -1
164* Size(WORK) = M * CEIL(Npb,LCMP)*NB
165* (b) IAROW = -1
166* Size(WORK) = M * CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(N,NB))
167*
168* Notes
169* -----
170* More precise space can be computed as
171*
172* CEIL(Mqb,LCMQ)*NB => NUMROC( NUMROC(M,NB,0,0,NPCOL), NB, 0, 0, LCMQ )
173* CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(N,NB,0,0,NPROW), NB, 0, 0, LCMP )
174*
175* =====================================================================
176*
177* ..
178* .. Parameters ..
179 REAL ONE, ZERO
180 parameter( one = 1.0e+0, zero = 0.0e+0 )
181* ..
182* .. Local Scalars ..
183 LOGICAL COLFORM, ROWFORM
184 INTEGER I, IDEX, IGD, INFO, JCCOL, JCROW, JDEX, LCM,
185 $ LCMP, LCMQ, MCCOL, MCROW, ML, MP, MQ, MQ0,
186 $ MRCOL, MRROW, MYCOL, MYROW, NP, NP0, NPCOL,
187 $ NPROW, NQ
188 REAL TBETA
189* ..
190* .. External Functions ..
191 LOGICAL LSAME
192 INTEGER ILCM, ICEIL, NUMROC
193 EXTERNAL ilcm, iceil, lsame, numroc
194* ..
195* .. External Subroutines ..
198 $ sgebs2d, sgerv2d, sgesd2d
199* ..
200* .. Intrinsic Functions ..
201 INTRINSIC max, min, mod
202* ..
203* .. Executable Statements ..
204*
205* Quick return if possible.
206*
207 IF( m.EQ.0 .OR. n.EQ.0 ) RETURN
208*
209 CALL blacs_gridinfo( icontxt, nprow, npcol, myrow, mycol )
210*
211 colform = lsame( adist, 'C' )
212 rowform = lsame( adist, 'R' )
213*
214* Test the input parameters.
215*
216 info = 0
217 IF( ( .NOT.colform ) .AND. ( .NOT.rowform ) ) THEN
218 info = 2
219 ELSE IF( m .LT.0 ) THEN
220 info = 4
221 ELSE IF( n .LT.0 ) THEN
222 info = 5
223 ELSE IF( nb.LT.1 ) THEN
224 info = 6
225 ELSE IF( iarow.LT.-1 .OR. iarow.GE.nprow .OR.
226 $ ( iarow.EQ.-1 .AND. colform ) ) THEN
227 info = 12
228 ELSE IF( iacol.LT.-1 .OR. iacol.GE.npcol .OR.
229 $ ( iacol.EQ.-1 .AND. rowform ) ) THEN
230 info = 13
231 ELSE IF( icrow.LT.-1 .OR. icrow.GE.nprow .OR.
232 $ ( icrow.EQ.-1 .AND. rowform ) ) THEN
233 info = 14
234 ELSE IF( iccol.LT.-1 .OR. iccol.GE.npcol .OR.
235 $ ( iccol.EQ.-1 .AND. colform ) ) THEN
236 info = 15
237 END IF
238*
239 10 CONTINUE
240 IF( info .NE. 0 ) THEN
241 CALL pxerbla( icontxt, 'PBSTRAN ', info )
242 RETURN
243 END IF
244*
245* Start the operations.
246*
247* LCM : the least common multiple of NPROW and NPCOL
248*
249 lcm = ilcm( nprow, npcol )
250 lcmp = lcm / nprow
251 lcmq = lcm / npcol
252 igd = npcol / lcmp
253*
254* When A is a column block
255*
256 IF( colform ) THEN
257*
258* Form C <== A' ( A is a column block )
259* _
260* | |
261* | |
262* _____________ | |
263* |______C______| <== |A|
264* | |
265* | |
266* |_|
267*
268* MRROW : row relative position in template from IAROW
269* MRCOL : column relative position in template from ICCOL
270*
271 mrrow = mod( nprow+myrow-iarow, nprow )
272 mrcol = mod( npcol+mycol-iccol, npcol )
273 jcrow = icrow
274 IF( icrow.EQ.-1 ) jcrow = iarow
275*
276 mp = numroc( m, nb, myrow, iarow, nprow )
277 mq = numroc( m, nb, mycol, iccol, npcol )
278 mq0 = numroc( numroc(m, nb, 0, 0, npcol), nb, 0, 0, lcmq )
279*
280 IF( lda.LT.mp .AND.
281 $ ( iacol.EQ.mycol .OR. iacol.EQ.-1 ) ) THEN
282 info = 8
283 ELSE IF( ldc.LT.n .AND.
284 $ ( icrow.EQ.myrow .OR. icrow.EQ.-1 ) ) THEN
285 info = 11
286 END IF
287 IF( info.NE.0 ) GO TO 10
288*
289* When a column process of IACOL has a column block A,
290*
291 IF( iacol.GE.0 ) THEN
292 tbeta = zero
293 IF( myrow.EQ.jcrow ) tbeta = beta
294*
295 DO 20 i = 0, min( lcm, iceil(m,nb) ) - 1
296 mcrow = mod( mod(i, nprow) + iarow, nprow )
297 mccol = mod( mod(i, npcol) + iccol, npcol )
298 IF( lcmq.EQ.1 ) mq0 = numroc( m, nb, i, 0, npcol )
299 jdex = (i/npcol) * nb
300*
301* A source node copies the blocks to WORK, and send it
302*
303 IF( myrow.EQ.mcrow .AND. mycol.EQ.iacol ) THEN
304*
305* The source node is a destination node
306*
307 idex = (i/nprow) * nb
308 IF( myrow.EQ.jcrow .AND. mycol.EQ.mccol ) THEN
309 CALL pbstr2at( icontxt, 'Col', trans, mp-idex, n, nb,
310 $ a(idex+1,1), lda, tbeta, c(1,jdex+1),
311 $ ldc, lcmp, lcmq )
312*
313* The source node sends blocks to a destination node
314*
315 ELSE
316 CALL pbstr2bt( icontxt, 'Col', trans, mp-idex, n, nb,
317 $ a(idex+1,1), lda, zero, work, n,
318 $ lcmp*nb )
319 CALL sgesd2d( icontxt, n, mq0, work, n, jcrow, mccol )
320 END IF
321*
322* A destination node receives the copied blocks
323*
324 ELSE IF( myrow.EQ.jcrow .AND. mycol.EQ.mccol ) THEN
325 IF( lcmq.EQ.1 .AND. tbeta.EQ.zero ) THEN
326 CALL sgerv2d( icontxt, n, mq0, c, ldc, mcrow, iacol )
327 ELSE
328 CALL sgerv2d( icontxt, n, mq0, work, n, mcrow, iacol )
329 CALL pbstr2af( icontxt, 'Row', n, mq-jdex, nb, work, n,
330 $ tbeta, c(1,jdex+1), ldc, lcmp, lcmq,
331 $ mq0 )
332 END IF
333 END IF
334 20 CONTINUE
335*
336* Broadcast a row block of C in each column of template
337*
338 IF( icrow.EQ.-1 ) THEN
339 IF( myrow.EQ.jcrow ) THEN
340 CALL sgebs2d( icontxt, 'Col', '1-tree', n, mq, c, ldc )
341 ELSE
342 CALL sgebr2d( icontxt, 'Col', '1-tree', n, mq, c, ldc,
343 $ jcrow, mycol )
344 END IF
345 END IF
346*
347* When all column procesors have a copy of the column block A,
348*
349 ELSE
350 IF( lcmq.EQ.1 ) mq0 = mq
351*
352* Processors, which have diagonal blocks of A, copy them to
353* WORK array in transposed form
354*
355 DO 30 i = 0, lcmp-1
356 IF( mrcol.EQ.mod( nprow*i+mrrow, npcol ) ) THEN
357 IF( lcmq.EQ.1.AND.(icrow.EQ.-1.OR.icrow.EQ.myrow) ) THEN
358 CALL pbstr2bt( icontxt, 'Col', trans, mp-i*nb, n, nb,
359 $ a(i*nb+1,1), lda, beta, c, ldc,
360 $ lcmp*nb )
361 ELSE
362 CALL pbstr2bt( icontxt, 'Col', trans, mp-i*nb, n, nb,
363 $ a(i*nb+1,1), lda, zero, work, n,
364 $ lcmp*nb )
365 END IF
366 END IF
367 30 CONTINUE
368*
369* Get diagonal blocks of A for each column of the template
370*
371 mcrow = mod( mod(mrcol,nprow)+iarow, nprow )
372 IF( lcmq.GT.1 ) THEN
373 mccol = mod( npcol+mycol-iccol, npcol )
374 CALL pbstrget( icontxt, 'Row', n, mq0, iceil(m,nb), work, n,
375 $ mcrow, mccol, igd, myrow, mycol, nprow,
376 $ npcol )
377 END IF
378*
379* Broadcast a row block of WORK in every row of template
380*
381 IF( icrow.EQ.-1 ) THEN
382 IF( myrow.EQ.mcrow ) THEN
383 IF( lcmq.GT.1 )
384 $ CALL pbstrsrt( icontxt, 'Row', n, mq, nb, work, n, beta,
385 $ c, ldc, lcmp, lcmq, mq0 )
386 CALL sgebs2d( icontxt, 'Col', '1-tree', n, mq, c, ldc )
387 ELSE
388 CALL sgebr2d( icontxt, 'Col', '1-tree', n, mq, c, ldc,
389 $ mcrow, mycol )
390 END IF
391*
392* Send a row block of WORK to the destination row
393*
394 ELSE
395 IF( lcmq.EQ.1 ) THEN
396 IF( myrow.EQ.mcrow ) THEN
397 IF( myrow.NE.icrow )
398 $ CALL sgesd2d( icontxt, n, mq, work, n, icrow, mycol )
399 ELSE IF( myrow.EQ.icrow ) THEN
400 IF( beta.EQ.zero ) THEN
401 CALL sgerv2d( icontxt, n, mq, c, ldc, mcrow, mycol )
402 ELSE
403 CALL sgerv2d( icontxt, n, mq, work, n, mcrow, mycol )
404 CALL pbsmatadd( icontxt, 'G', n, mq, one, work, n,
405 $ beta, c, ldc )
406 END IF
407 END IF
408*
409 ELSE
410 ml = mq0 * min( lcmq, max(0,iceil(m,nb)-mccol) )
411 IF( myrow.EQ.mcrow ) THEN
412 IF( myrow.NE.icrow )
413 $ CALL sgesd2d( icontxt, n, ml, work, n, icrow, mycol )
414 ELSE IF( myrow.EQ.icrow ) THEN
415 CALL sgerv2d( icontxt, n, ml, work, n, mcrow, mycol )
416 END IF
417*
418 IF( myrow.EQ.icrow )
419 $ CALL pbstrsrt( icontxt, 'Row', n, mq, nb, work, n, beta,
420 $ c, ldc, lcmp, lcmq, mq0 )
421 END IF
422 END IF
423*
424 END IF
425*
426* When A is a row block
427*
428 ELSE
429*
430* Form C <== A' ( A is a row block )
431* _
432* | |
433* | |
434* | | _____________
435* |C| <== |______A______|
436* | |
437* | |
438* |_|
439*
440* MRROW : row relative position in template from ICROW
441* MRCOL : column relative position in template from IACOL
442*
443 mrrow = mod( nprow+myrow-icrow, nprow )
444 mrcol = mod( npcol+mycol-iacol, npcol )
445 jccol = iccol
446 IF( iccol.EQ.-1 ) jccol = iacol
447*
448 np = numroc( n, nb, myrow, icrow, nprow )
449 nq = numroc( n, nb, mycol, iacol, npcol )
450 np0 = numroc( numroc(n, nb, 0, 0, nprow), nb, 0, 0, lcmp )
451*
452 IF( lda.LT.m .AND.
453 $ ( iarow.EQ.myrow .OR. iarow.EQ.-1 ) ) THEN
454 info = 8
455 ELSE IF( ldc.LT.np .AND.
456 $ ( iccol.EQ.mycol .OR. iccol.EQ.-1 ) ) THEN
457 info = 11
458 END IF
459 IF( info.NE.0 ) GO TO 10
460*
461* When a row process of IAROW has a row block A,
462*
463 IF( iarow.GE.0 ) THEN
464 tbeta = zero
465 IF( mycol.EQ.jccol ) tbeta = beta
466*
467 DO 40 i = 0, min( lcm, iceil(n,nb) ) - 1
468 mcrow = mod( mod(i, nprow) + icrow, nprow )
469 mccol = mod( mod(i, npcol) + iacol, npcol )
470 IF( lcmp.EQ.1 ) np0 = numroc( n, nb, i, 0, nprow )
471 idex = (i/nprow) * nb
472*
473* A source node copies the blocks to WORK, and send it
474*
475 IF( myrow.EQ.iarow .AND. mycol.EQ.mccol ) THEN
476*
477* The source node is a destination node
478*
479 jdex = (i/npcol) * nb
480 IF( myrow.EQ.mcrow .AND. mycol.EQ.jccol ) THEN
481 CALL pbstr2at( icontxt, 'Row', trans, m, nq-jdex, nb,
482 $ a(1,jdex+1), lda, tbeta, c(idex+1,1),
483 $ ldc, lcmp, lcmq )
484*
485* The source node sends blocks to a destination node
486*
487 ELSE
488 CALL pbstr2bt( icontxt, 'Row', trans, m, nq-jdex, nb,
489 $ a(1,jdex+1), lda, zero, work, np0,
490 $ lcmq*nb )
491 CALL sgesd2d( icontxt, np0, m, work, np0,
492 $ mcrow, jccol )
493 END IF
494*
495* A destination node receives the copied blocks
496*
497 ELSE IF( myrow.EQ.mcrow .AND. mycol.EQ.jccol ) THEN
498 IF( lcmp.EQ.1 .AND. tbeta.EQ.zero ) THEN
499 CALL sgerv2d( icontxt, np0, m, c, ldc, iarow, mccol )
500 ELSE
501 CALL sgerv2d( icontxt, np0, m, work, np0, iarow, mccol )
502 CALL pbstr2af( icontxt, 'Col', np-idex, m, nb, work,
503 $ np0, tbeta, c(idex+1,1), ldc, lcmp, lcmq,
504 $ np0 )
505 END IF
506 END IF
507 40 CONTINUE
508*
509* Broadcast a column block of WORK in each row of template
510*
511 IF( iccol.EQ.-1 ) THEN
512 IF( mycol.EQ.jccol ) THEN
513 CALL sgebs2d( icontxt, 'Row', '1-tree', np, m, c, ldc )
514 ELSE
515 CALL sgebr2d( icontxt, 'Row', '1-tree', np, m, c, ldc,
516 $ myrow, jccol )
517 END IF
518 END IF
519*
520* When all row procesors have a copy of the row block A,
521*
522 ELSE
523 IF( lcmp.EQ.1 ) np0 = np
524*
525* Processors, which have diagonal blocks of A, copy them to
526* WORK array in transposed form
527*
528 DO 50 i = 0, lcmq-1
529 IF( mrrow.EQ.mod(npcol*i+mrcol, nprow) ) THEN
530 IF( lcmp.EQ.1.AND.(iccol.EQ.-1.OR.iccol.EQ.mycol) ) THEN
531 CALL pbstr2bt( icontxt, 'Row', trans, m, nq-i*nb, nb,
532 $ a(1,i*nb+1), lda, beta, c, ldc,
533 $ lcmq*nb )
534 ELSE
535 CALL pbstr2bt( icontxt, 'Row', trans, m, nq-i*nb, nb,
536 $ a(1,i*nb+1), lda, zero, work, np0,
537 $ lcmq*nb )
538 END IF
539 END IF
540 50 CONTINUE
541*
542* Get diagonal blocks of A for each row of the template
543*
544 mccol = mod( mod(mrrow, npcol)+iacol, npcol )
545 IF( lcmp.GT.1 ) THEN
546 mcrow = mod( nprow+myrow-icrow, nprow )
547 CALL pbstrget( icontxt, 'Col', np0, m, iceil(n,nb), work,
548 $ np0, mcrow, mccol, igd, myrow, mycol, nprow,
549 $ npcol )
550 END IF
551*
552* Broadcast a column block of WORK in every column of template
553*
554 IF( iccol.EQ.-1 ) THEN
555 IF( mycol.EQ.mccol ) THEN
556 IF( lcmp.GT.1 )
557 $ CALL pbstrsrt( icontxt, 'Col', np, m, nb, work, np0,
558 $ beta, c, ldc, lcmp, lcmq, np0 )
559 CALL sgebs2d( icontxt, 'Row', '1-tree', np, m, c, ldc )
560 ELSE
561 CALL sgebr2d( icontxt, 'Row', '1-tree', np, m, c, ldc,
562 $ myrow, mccol )
563 END IF
564*
565* Send a column block of WORK to the destination column
566*
567 ELSE
568 IF( lcmp.EQ.1 ) THEN
569 IF( mycol.EQ.mccol ) THEN
570 IF( mycol.NE.iccol )
571 $ CALL sgesd2d( icontxt, np, m, work, np, myrow, iccol )
572 ELSE IF( mycol.EQ.iccol ) THEN
573 IF( beta.EQ.zero ) THEN
574 CALL sgerv2d( icontxt, np, m, c, ldc, myrow, mccol )
575 ELSE
576 CALL sgerv2d( icontxt, np, m, work, np, myrow, mccol )
577 CALL pbsmatadd( icontxt, 'G', np, m, one, work, np,
578 $ beta, c, ldc )
579 END IF
580 END IF
581*
582 ELSE
583 ml = m * min( lcmp, max( 0, iceil(n,nb) - mcrow ) )
584 IF( mycol.EQ.mccol ) THEN
585 IF( mycol.NE.iccol )
586 $ CALL sgesd2d( icontxt, np0, ml, work, np0,
587 $ myrow, iccol )
588 ELSE IF( mycol.EQ.iccol ) THEN
589 CALL sgerv2d( icontxt, np0, ml, work, np0,
590 $ myrow, mccol )
591 END IF
592*
593 IF( mycol.EQ.iccol )
594 $ CALL pbstrsrt( icontxt, 'Col', np, m, nb, work, np0,
595 $ beta, c, ldc, lcmp, lcmq, np0 )
596 END IF
597 END IF
598*
599 END IF
600 END IF
601*
602 RETURN
603*
604* End of PBSTRAN
605*
integer function ilcm(m, n)
Definition ilcm.f:2
#define max(a, b)
Definition macros.h:21
subroutine sgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1072
subroutine pxerbla(contxt, srname, info)
Definition mpi.f:1600
subroutine sgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1113
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pbstr2at(icontxt, adist, trans, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq)
Definition pbstran.f:614
subroutine pbstr2bt(icontxt, adist, trans, m, n, nb, a, lda, beta, b, ldb, intv)
Definition pbstran.f:705
subroutine pbstr2af(icontxt, adist, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq, nint)
Definition pbstran.f:792
subroutine pbstrget(icontxt, adist, m, n, mnb, a, lda, mcrow, mccol, igd, myrow, mycol, nprow, npcol)
Definition pbstrget.f:3
subroutine pbstrsrt(icontxt, adist, m, n, nb, a, lda, beta, b, ldb, lcmp, lcmq, nint)
Definition pbstrsrt.f:3