OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pbctrnv.f File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine pbctrnv (icontxt, xdist, trans, n, nb, nz, x, incx, beta, y, incy, ixrow, ixcol, iyrow, iycol, work)
subroutine pbctr2a1 (icontxt, n, nb, nz, x, incx, beta, y, incy, intv)
subroutine pbctr2b1 (icontxt, trans, n, nb, nz, x, incx, beta, y, incy, jinx, jiny)

Function/Subroutine Documentation

◆ pbctr2a1()

subroutine pbctr2a1 ( integer icontxt,
integer n,
integer nb,
integer nz,
complex, dimension( * ) x,
integer incx,
complex beta,
complex, dimension( * ) y,
integer incy,
integer intv )

Definition at line 647 of file pbctrnv.f.

649*
650* -- PB-BLAS routine (version 2.1) --
651* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
652* April 28, 1996
653*
654* .. Scalar Arguments ..
655 INTEGER ICONTXT, N, NB, NZ, INCX, INCY, INTV
656 COMPLEX BETA
657* ..
658* .. Array Arguments ..
659 COMPLEX X( * ), Y( * )
660* ..
661*
662* Purpose
663* =======
664*
665* y <== x
666* y is a scattered vector, copied from a condensed vector x.
667*
668* ..
669* .. Intrinsic Functions ..
670 INTRINSIC min
671* ..
672* .. External Functions ..
673 INTEGER ICEIL
674 EXTERNAL iceil
675* ..
676* .. External Subroutines ..
677 EXTERNAL pbcvecadd
678* ..
679* .. Parameters ..
680 COMPLEX ONE
681 parameter( one = ( 1.0e+0, 0.0e+0 ) )
682* ..
683* .. Local Variables ..
684 INTEGER IX, IY, JZ, K, ITER
685*
686 ix = 0
687 iy = 0
688 jz = nz
689 iter = iceil( n+nz, intv )
690*
691 IF( iter.GT.1 ) THEN
692 CALL pbcvecadd( icontxt, 'G', nb-jz, one, x(ix*incx+1), incx,
693 $ beta, y(iy*incy+1), incy )
694 ix = ix + nb - jz
695 iy = iy + intv - jz
696 jz = 0
697*
698 DO 10 k = 2, iter-1
699 CALL pbcvecadd( icontxt, 'G', nb, one, x(ix*incx+1), incx,
700 $ beta, y(iy*incy+1), incy )
701 ix = ix + nb
702 iy = iy + intv
703 10 CONTINUE
704 END IF
705*
706 CALL pbcvecadd( icontxt, 'G', min( n-iy, nb-jz ), one,
707 $ x(ix*incx+1), incx, beta, y(iy*incy+1), incy )
708*
709 RETURN
710*
711* End of PBCTR2A1
712*
integer function iceil(inum, idenom)
Definition iceil.f:2
#define min(a, b)
Definition macros.h:20
subroutine pbcvecadd(icontxt, mode, n, alpha, x, incx, beta, y, incy)
Definition pbcvecadd.f:3

◆ pbctr2b1()

subroutine pbctr2b1 ( integer icontxt,
character*1 trans,
integer n,
integer nb,
integer nz,
complex, dimension( * ) x,
integer incx,
complex beta,
complex, dimension( * ) y,
integer incy,
integer jinx,
integer jiny )

Definition at line 719 of file pbctrnv.f.

721*
722* -- PB-BLAS routine (version 2.1) --
723* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
724* April 28, 1996
725*
726* .. Scalar Arguments ..
727 CHARACTER*1 TRANS
728 INTEGER ICONTXT, N, NB, NZ, INCX, INCY, JINX, JINY
729 COMPLEX BETA
730* ..
731* .. Array Arguments ..
732 COMPLEX X( * ), Y( * )
733* ..
734*
735* Purpose
736* =======
737*
738* y <== x + beta * y
739* y is a condensed vector, copied from a scattered vector x
740*
741* ..
742* .. Intrinsic Functions ..
743 INTRINSIC min
744* ..
745* .. External Functions ..
746 INTEGER ICEIL
747 EXTERNAL iceil
748* ..
749* .. External Subroutines ..
750 EXTERNAL pbcvecadd
751* ..
752* .. Parameters ..
753 COMPLEX ONE
754 parameter( one = ( 1.0e+0, 0.0e+0 ) )
755* ..
756* .. Local Variables ..
757 INTEGER IX, IY, JZ, K, ITER, LENX, LENY
758*
759 IF( jinx.EQ.1 .AND. jiny.EQ.1 ) THEN
760 CALL pbcvecadd( icontxt, trans, n, one, x, incx, beta,
761 $ y, incy )
762*
763 ELSE
764 ix = 0
765 iy = 0
766 jz = nz
767 lenx = nb * jinx
768 leny = nb * jiny
769 iter = iceil( n+nz, lenx )
770*
771 IF( iter.GT.1 ) THEN
772 CALL pbcvecadd( icontxt, trans, nb-jz, one, x(ix*incx+1),
773 $ incx, beta, y(iy*incy+1), incy )
774 ix = ix + lenx - jz
775 iy = iy + leny - jz
776 jz = 0
777*
778 DO 10 k = 2, iter-1
779 CALL pbcvecadd( icontxt, trans, nb, one, x(ix*incx+1),
780 $ incx, beta, y(iy*incy+1), incy )
781 ix = ix + lenx
782 iy = iy + leny
783 10 CONTINUE
784 END IF
785*
786 CALL pbcvecadd( icontxt, trans, min( n-ix, nb-jz ), one,
787 $ x(ix*incx+1), incx, beta, y(iy*incy+1), incy )
788 END IF
789*
790 RETURN
791*
792* End of PBCTR2B1
793*

◆ pbctrnv()

subroutine pbctrnv ( integer icontxt,
character*1 xdist,
character*1 trans,
integer n,
integer nb,
integer nz,
complex, dimension( * ) x,
integer incx,
complex beta,
complex, dimension( * ) y,
integer incy,
integer ixrow,
integer ixcol,
integer iyrow,
integer iycol,
complex, dimension( * ) work )

Definition at line 1 of file pbctrnv.f.

4*
5* -- PB-BLAS routine (version 2.1) --
6* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
7* April 28, 1996
8*
9* Jaeyoung Choi, Oak Ridge National Laboratory
10* Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
11* David Walker, Oak Ridge National Laboratory
12*
13* .. Scalar Arguments ..
14 CHARACTER*1 TRANS, XDIST
15 INTEGER ICONTXT, INCX, INCY, IXCOL, IXROW, IYCOL,
16 $ IYROW, N, NB, NZ
17 COMPLEX BETA
18* ..
19* .. Array Arguments ..
20 COMPLEX WORK( * ), X( * ), Y( * )
21* ..
22*
23* Purpose
24* =======
25*
26* PBCTRNV transposes a column vector to row vector, or a row vector to
27* column vector by reallocating data distribution.
28*
29* Y := X'
30*
31* where X and Y are N vectors.
32*
33* Parameters
34* ==========
35*
36* ICONTXT (input) INTEGER
37* ICONTXT is the BLACS mechanism for partitioning communication
38* space. A defining property of a context is that a message in
39* a context cannot be sent or received in another context. The
40* BLACS context includes the definition of a grid, and each
41* process' coordinates in it.
42*
43* XDIST (input) CHARACTER*1
44* XDIST specifies whether X is a column vector or a row vector,
45*
46* XDIST = 'C', X is a column vector (distributed columnwise)
47* XDIST = 'R', X is a row vector (distributed rowwise)
48*
49* TRANS (input) CHARACTER*1
50* TRANS specifies whether the transposed format is transpose
51* or conjugate transpose. If the vectors X and Y are real,
52* the argument is ignored.
53*
54* TRANS = 'T', transpose
55* TRANS = 'C', conjugate transpose
56*
57* N (input) INTEGER
58* N specifies the (global) number of the vector X and the
59* vector Y. N >= 0.
60*
61* NB (input) INTEGER
62* NB specifies the block size of vectors X and Y. NB >= 0.
63*
64* NZ (input) INTEGER
65* NZ is the column offset to specify the column distance from
66* the beginning of the block to the first element of the
67* vector X, and the row offset to the first element of the
68* vector Y if XDIST = 'C'.
69* Otherwise, it is row offset to specify the row distance
70* from the beginning of the block to the first element of the
71* vector X, and the column offset to the first element of the
72* vector Y. 0 < NZ <= NB.
73*
74* X (input) COMPLEX array of dimension at least
75* ( 1 + (Np-1) * abs(INCX)) in IXCOL if XDIST = 'C', or
76* ( 1 + (Nq-1) * abs(INCX)) in IXROW if XDIST = 'R'.
77* The incremented array X must contain the vector X.
78*
79* INCX (input) INTEGER
80* INCX specifies the increment for the elements of X.
81* INCX <> 0.
82*
83* BETA (input) COMPLEX
84* BETA specifies scaler beta.
85*
86* Y (input/output) COMPLEX array of dimension at least
87* ( 1 + (Nq-1) * abs(INCY)) in IYROW if XDIST = 'C', or
88* ( 1 + (Np-1) * abs(INCY)) in IYCOL if XDIST = 'R', or
89* The incremented array Y must contain the vector Y.
90* Y will not be referenced if beta is zero.
91*
92* INCY (input) INTEGER
93* INCY specifies the increment for the elements of Y.
94* INCY <> 0.
95*
96* IXROW (input) INTEGER
97* IXROW specifies a row of the process template, which holds
98* the first element of the vector X. If X is a row vector and
99* all rows of processes have a copy of X, then set IXROW = -1.
100*
101* IXCOL (input) INTEGER
102* IXCOL specifies a column of the process template,
103* which holds the first element of the vector X. If X is a
104* column block and all columns of processes have a copy of X,
105* then set IXCOL = -1.
106*
107* IYROW (input) INTEGER
108* IYROW specifies the current row process which holds the
109* first element of the vector Y, which is transposed of X.
110* If X is a column vector and the transposed row vector Y is
111* distributed all rows of processes, set IYROW = -1.
112*
113* IYCOL (input) INTEGER
114* IYCOL specifies the current column process which holds
115* the first element of the vector Y, which is transposed of Y.
116* If X is a row block and the transposed column vector Y is
117* distributed all columns of processes, set IYCOL = -1.
118*
119* WORK (workspace) COMPLEX array of dimension Size(WORK).
120* It needs extra working space of x**T or x**H.
121*
122* Parameters Details
123* ==================
124*
125* Nx It is a local portion of N owned by a process, where x is
126* replaced by either p (=NPROW) or q (=NPCOL)). The value is
127* determined by N, NB, NZ, x, and MI, where NB is a block size,
128* NZ is a offset from the beginning of the block, and MI is a
129* row or column position in a process template. Nx is equal
130* to or less than Nx0 = CEIL( N+NZ, NB*x ) * NB.
131*
132* Communication Scheme
133* ====================
134*
135* The communication scheme of the routine is set to '1-tree', which is
136* fan-out. (For details, see BLACS user's guide.)
137*
138* Memory Requirement of WORK
139* ==========================
140*
141* NN = N + NZ
142* Npb = CEIL( NN, NB*NPROW )
143* Nqb = CEIL( NN, NB*NPCOL )
144* LCMP = LCM / NPROW
145* LCMQ = LCM / NPCOL
146*
147* (1) XDIST = 'C'
148* (a) IXCOL != -1
149* Size(WORK) = CEIL(Nqb,LCMQ)*NB
150* (b) IXCOL = -1
151* Size(WORK) = CEIL(Nqb,LCMQ)*NB * MIN(LCMQ,CEIL(NN,NB))
152*
153* (2) XDIST = 'R'
154* (a) IXROW != -1
155* Size(WORK) = CEIL(Npb,LCMP)*NB
156* (b) IXROW = -1
157* Size(WORK) = CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(NN,NB))
158*
159* Notes
160* -----
161* More precise space can be computed as
162*
163* CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(NN,NB,0,0,NPROW), NB, 0, 0, LCMP)
164* CEIL(Nqb,LCMQ)*NB => NUMROC( NUMROC(NN,NB,0,0,NPCOL), NB, 0, 0, LCMQ)
165*
166* =====================================================================
167*
168* .. Parameters ..
169 COMPLEX ONE, ZERO
170 parameter( one = ( 1.0e+0, 0.0e+0 ),
171 $ zero = ( 0.0e+0, 0.0e+0 ) )
172* ..
173* .. Local Scalars ..
174 LOGICAL COLFORM, ROWFORM
175 INTEGER I, IDEX, IGD, INFO, JDEX, JYCOL, JYROW, JZ, KZ,
176 $ LCM, LCMP, LCMQ, MCCOL, MCROW, MRCOL, MRROW,
177 $ MYCOL, MYROW, NN, NP, NP0, NP1, NPCOL, NPROW,
178 $ NQ, NQ0, NQ1
179 COMPLEX TBETA
180* ..
181* .. External Functions ..
182 LOGICAL LSAME
183 INTEGER ILCM, ICEIL, NUMROC
184 EXTERNAL lsame, ilcm, iceil, numroc
185* ..
186* .. External Subroutines ..
187 EXTERNAL blacs_gridinfo, cgebr2d, cgebs2d, cgerv2d,
188 $ cgesd2d, pbctr2a1, pbctr2b1, pbctrget,
190* ..
191* .. Intrinsic Functions ..
192 INTRINSIC max, min, mod
193* ..
194* .. Executable Statements ..
195*
196* Quick return if possible.
197*
198 IF( n.EQ.0 ) RETURN
199*
200 CALL blacs_gridinfo( icontxt, nprow, npcol, myrow, mycol )
201*
202 colform = lsame( xdist, 'C' )
203 rowform = lsame( xdist, 'R' )
204*
205* Test the input parameters.
206*
207 info = 0
208 IF( ( .NOT.colform ) .AND. ( .NOT.rowform ) ) THEN
209 info = 2
210 ELSE IF( n .LT.0 ) THEN
211 info = 4
212 ELSE IF( nb .LT.1 ) THEN
213 info = 5
214 ELSE IF( nz .LT.0 .OR. nz.GE.nb ) THEN
215 info = 6
216 ELSE IF( incx.EQ.0 ) THEN
217 info = 8
218 ELSE IF( incy.EQ.0 ) THEN
219 info = 11
220 ELSE IF( ixrow.LT.-1 .OR. ixrow.GE.nprow .OR.
221 $ ( ixrow.EQ.-1 .AND. colform ) ) THEN
222 info = 12
223 ELSE IF( ixcol.LT.-1 .OR. ixcol.GE.npcol .OR.
224 $ ( ixcol.EQ.-1 .AND. rowform ) ) THEN
225 info = 13
226 ELSE IF( iyrow.LT.-1 .OR. iyrow.GE.nprow .OR.
227 $ ( iyrow.EQ.-1 .AND. rowform ) ) THEN
228 info = 14
229 ELSE IF( iycol.LT.-1 .OR. iycol.GE.npcol .OR.
230 $ ( iycol.EQ.-1 .AND. colform ) ) THEN
231 info = 15
232 END IF
233*
234 10 CONTINUE
235 IF( info.NE.0 ) THEN
236 CALL pxerbla( icontxt, 'PBCTRNV ', info )
237 RETURN
238 END IF
239*
240* Start the operations.
241*
242* LCM : the least common multiple of NPROW and NPCOL
243*
244 lcm = ilcm( nprow, npcol )
245 lcmp = lcm / nprow
246 lcmq = lcm / npcol
247 igd = npcol / lcmp
248 nn = n + nz
249*
250* When x is a column vector
251*
252 IF( colform ) THEN
253*
254* Form y <== x' ( x is a column vector )
255*
256* ||
257* ||
258* _____________ ||
259* -----(y)----- <== (x)
260* ||
261* ||
262* ||
263*
264 IF( ixrow.LT.0 .OR. ixrow.GE.nprow ) THEN
265 info = 12
266 ELSE IF( ixcol.LT.-1 .OR. ixcol.GE.npcol ) THEN
267 info = 13
268 ELSE IF( iyrow.LT.-1 .OR. iyrow.GE.nprow ) THEN
269 info = 14
270 ELSE IF( iycol.LT.0 .OR. iycol.GE.npcol ) THEN
271 info = 15
272 END IF
273 IF( info.NE.0 ) GO TO 10
274*
275* MRROW : row relative position in template from IXROW
276* MRCOL : column relative position in template from IYCOL
277*
278 mrrow = mod( nprow+myrow-ixrow, nprow )
279 mrcol = mod( npcol+mycol-iycol, npcol )
280 jyrow = iyrow
281 IF( iyrow.EQ.-1 ) jyrow = ixrow
282*
283 np = numroc( nn, nb, myrow, ixrow, nprow )
284 IF( mrrow.EQ.0 ) np = np - nz
285 nq = numroc( nn, nb, mycol, iycol, npcol )
286 IF( mrcol.EQ.0 ) nq = nq - nz
287 nq0 = numroc( numroc(nn, nb, 0, 0, npcol), nb, 0, 0, lcmq )
288*
289* When a column process of IXCOL has a column block A,
290*
291 IF( ixcol .GE. 0 ) THEN
292 tbeta = zero
293 IF( myrow.EQ.jyrow ) tbeta = beta
294 kz = nz
295*
296 DO 20 i = 0, min( lcm, iceil(nn,nb) ) - 1
297 mcrow = mod( mod(i, nprow) + ixrow, nprow )
298 mccol = mod( mod(i, npcol) + iycol, npcol )
299 IF( lcmq.EQ.1 ) nq0 = numroc( nn, nb, i, 0, npcol )
300 jdex = (i/npcol) * nb
301 IF( mrcol.EQ.0 ) jdex = max(0, jdex-nz)
302*
303* A source node copies the blocks to WORK, and send it
304*
305 IF( myrow.EQ.mcrow .AND. mycol.EQ.ixcol ) THEN
306*
307* The source node is a destination node
308*
309 idex = (i/nprow) * nb
310 IF( mrrow.EQ.0 ) idex = max( 0, idex-nz )
311 IF( myrow.EQ.jyrow .AND. mycol.EQ.mccol ) THEN
312 CALL pbctr2b1( icontxt, trans, np-idex, nb, kz,
313 $ x(idex*incx+1), incx, tbeta,
314 $ y(jdex*incy+1), incy, lcmp, lcmq )
315*
316* The source node sends blocks to a destination node
317*
318 ELSE
319 CALL pbctr2b1( icontxt, trans, np-idex, nb, kz,
320 $ x(idex*incx+1), incx, zero, work, 1,
321 $ lcmp, 1 )
322 CALL cgesd2d( icontxt, 1, nq0-kz, work, 1,
323 $ jyrow, mccol )
324 END IF
325*
326* A destination node receives the copied vector
327*
328 ELSE IF( myrow.EQ.jyrow .AND. mycol.EQ.mccol ) THEN
329 IF( lcmq.EQ.1 .AND. tbeta.EQ.zero ) THEN
330 CALL cgerv2d( icontxt, 1, nq0-kz, y, incy,
331 $ mcrow, ixcol )
332 ELSE
333 CALL cgerv2d( icontxt, 1, nq0-kz, work, 1,
334 $ mcrow, ixcol )
335 CALL pbctr2a1( icontxt, nq-jdex, nb, kz, work, 1, tbeta,
336 $ y(jdex*incy+1), incy, lcmq*nb )
337 END IF
338 END IF
339 kz = 0
340 20 CONTINUE
341*
342* Broadcast a row block of WORK in each column of template
343*
344 IF( iyrow.EQ.-1 ) THEN
345 IF( myrow.EQ.jyrow ) THEN
346 CALL cgebs2d( icontxt, 'Col', '1-tree', 1, nq, y, incy )
347 ELSE
348 CALL cgebr2d( icontxt, 'Col', '1-tree', 1, nq, y, incy,
349 $ jyrow, mycol )
350 END IF
351 END IF
352*
353* When all column procesors have a copy of the column block A,
354*
355 ELSE
356 IF( lcmq.EQ.1 ) nq0 = nq
357*
358* Processors, which have diagonal blocks of X, copy them to
359* WORK array in transposed form
360*
361 kz = 0
362 IF( mrrow.EQ.0 ) kz = nz
363 jz = 0
364 IF( mrrow.EQ.0 .AND. mycol.EQ.iycol ) jz = nz
365*
366 DO 30 i = 0, lcmp - 1
367 IF( mrcol.EQ.mod(nprow*i+mrrow, npcol) ) THEN
368 idex = max( 0, i*nb-kz )
369 IF( lcmq.EQ.1 .AND. (iyrow.EQ.-1.OR.iyrow.EQ.myrow) ) THEN
370 CALL pbctr2b1( icontxt, trans, np-idex, nb, jz,
371 $ x(idex*incx+1), incx, beta, y, incy,
372 $ lcmp, 1 )
373 ELSE
374 CALL pbctr2b1( icontxt, trans, np-idex, nb, jz,
375 $ x(idex*incx+1), incx, zero, work, 1,
376 $ lcmp, 1 )
377 END IF
378 END IF
379 30 CONTINUE
380*
381* Get diagonal blocks of A for each column of the template
382*
383 mcrow = mod( mod(mrcol, nprow) + ixrow, nprow )
384 IF( lcmq.GT.1 ) THEN
385 mccol = mod( npcol+mycol-iycol, npcol )
386 CALL pbctrget( icontxt, 'Row', 1, nq0, iceil( nn, nb ),
387 $ work, 1, mcrow, mccol, igd, myrow, mycol,
388 $ nprow, npcol )
389 END IF
390*
391* Broadcast a row block of WORK in every row of template
392*
393 IF( iyrow.EQ.-1 ) THEN
394 IF( myrow.EQ.mcrow ) THEN
395 IF( lcmq.GT.1 ) THEN
396 kz = 0
397 IF( mycol.EQ.iycol ) kz = nz
398 CALL pbctrst1( icontxt, 'Row', nq, nb, kz, work, 1,
399 $ beta, y, incy, lcmp, lcmq, nq0 )
400 END IF
401 CALL cgebs2d( icontxt, 'Col', '1-tree', 1, nq, y, incy )
402 ELSE
403 CALL cgebr2d( icontxt, 'Col', '1-tree', 1, nq, y, incy,
404 $ mcrow, mycol )
405 END IF
406*
407* Send a row block of WORK to the destination row
408*
409 ELSE
410 IF( lcmq.EQ.1 ) THEN
411 IF( myrow.EQ.mcrow ) THEN
412 IF( myrow.NE.iyrow )
413 $ CALL cgesd2d( icontxt, 1, nq0, work, 1, iyrow, mycol )
414 ELSE IF( myrow.EQ.iyrow ) THEN
415 IF( beta.EQ.zero ) THEN
416 CALL cgerv2d( icontxt, 1, nq0, y, incy, mcrow, mycol )
417 ELSE
418 CALL cgerv2d( icontxt, 1, nq0, work, 1, mcrow, mycol )
419 CALL pbcvecadd( icontxt, 'G', nq0, one, work, 1,
420 $ beta, y, incy )
421 END IF
422 END IF
423*
424 ELSE
425 nq1 = nq0 * min( lcmq, max( 0, iceil(nn,nb)-mccol ) )
426 IF( myrow.EQ.mcrow ) THEN
427 IF( myrow.NE.iyrow )
428 $ CALL cgesd2d( icontxt, 1, nq1, work, 1, iyrow, mycol )
429 ELSE IF( myrow.EQ.iyrow ) THEN
430 CALL cgerv2d( icontxt, 1, nq1, work, 1, mcrow, mycol )
431 END IF
432*
433 IF( myrow.EQ.iyrow ) THEN
434 kz = 0
435 IF( mycol.EQ.iycol ) kz = nz
436 CALL pbctrst1( icontxt, 'Row', nq, nb, kz, work, 1,
437 $ beta, y, incy, lcmp, lcmq, nq0 )
438 END IF
439 END IF
440 END IF
441 END IF
442*
443* When x is a row vector
444*
445 ELSE
446*
447* Form y <== x' ( x is a row block )
448*
449* ||
450* ||
451* || _____________
452* (y) <== -----(x)-----
453* ||
454* ||
455* ||
456*
457 IF( ixrow.LT.-1 .OR. ixrow.GE.nprow ) THEN
458 info = 12
459 ELSE IF( ixcol.LT.0 .OR. ixcol.GE.npcol ) THEN
460 info = 13
461 ELSE IF( iyrow.LT.0 .OR. iyrow.GE.nprow ) THEN
462 info = 14
463 ELSE IF( iycol.LT.-1 .OR. iycol.GE.npcol ) THEN
464 info = 15
465 END IF
466 IF( info.NE.0 ) GO TO 10
467*
468* MRROW : row relative position in template from IYROW
469* MRCOL : column relative position in template from IXCOL
470*
471 mrrow = mod( nprow+myrow-iyrow, nprow )
472 mrcol = mod( npcol+mycol-ixcol, npcol )
473 jycol = iycol
474 IF( iycol.EQ.-1 ) jycol = ixcol
475*
476 np = numroc( nn, nb, myrow, iyrow, nprow )
477 IF( mrrow.EQ.0 ) np = np - nz
478 nq = numroc( nn, nb, mycol, ixcol, npcol )
479 IF( mrcol.EQ.0 ) nq = nq - nz
480 np0 = numroc( numroc(nn, nb, 0, 0, nprow), nb, 0, 0, lcmp )
481*
482* When a row process of IXROW has a row block A,
483*
484 IF( ixrow .GE. 0 ) THEN
485 tbeta = zero
486 IF( mycol.EQ.jycol ) tbeta = beta
487 kz = nz
488*
489 DO 40 i = 0, min( lcm, iceil(nn,nb) ) - 1
490 mcrow = mod( mod(i, nprow) + iyrow, nprow )
491 mccol = mod( mod(i, npcol) + ixcol, npcol )
492 IF( lcmp.EQ.1 ) np0 = numroc( nn, nb, i, 0, nprow )
493 jdex = (i/nprow) * nb
494 IF( mrrow.EQ.0 ) jdex = max(0, jdex-nz)
495*
496* A source node copies the blocks to WORK, and send it
497*
498 IF( myrow.EQ.ixrow .AND. mycol.EQ.mccol ) THEN
499*
500* The source node is a destination node
501*
502 idex = (i/npcol) * nb
503 IF( mrcol.EQ.0 ) idex = max( 0, idex-nz )
504 IF( myrow.EQ.mcrow .AND. mycol.EQ.jycol ) THEN
505 CALL pbctr2b1( icontxt, trans, nq-idex, nb, kz,
506 $ x(idex*incx+1), incx, tbeta,
507 $ y(jdex*incy+1), incy, lcmq, lcmp )
508*
509* The source node sends blocks to a destination node
510*
511 ELSE
512 CALL pbctr2b1( icontxt, trans, nq-idex, nb, kz,
513 $ x(idex*incx+1), incx, zero, work, 1,
514 $ lcmq, 1 )
515 CALL cgesd2d( icontxt, 1, np0-kz, work, 1,
516 $ mcrow, jycol )
517 END IF
518*
519* A destination node receives the copied blocks
520*
521 ELSE IF( myrow.EQ.mcrow .AND. mycol.EQ.jycol ) THEN
522 IF( lcmp.EQ.1 .AND. tbeta.EQ.zero ) THEN
523 CALL cgerv2d( icontxt, 1, np0-kz, y, incy,
524 $ ixrow, mccol )
525 ELSE
526 CALL cgerv2d( icontxt, 1, np0-kz, work, 1,
527 $ ixrow, mccol )
528 CALL pbctr2a1( icontxt, np-jdex, nb, kz, work, 1, tbeta,
529 $ y(jdex*incy+1), incy, lcmp*nb )
530 END IF
531 END IF
532 kz = 0
533 40 CONTINUE
534*
535* Broadcast a column vector Y in each row of template
536*
537 IF( iycol.EQ.-1 ) THEN
538 IF( mycol.EQ.jycol ) THEN
539 CALL cgebs2d( icontxt, 'Row', '1-tree', 1, np, y, incy )
540 ELSE
541 CALL cgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,
542 $ myrow, jycol )
543 END IF
544 END IF
545*
546* When all row procesors have a copy of the row block A,
547*
548 ELSE
549 IF( lcmp.EQ.1 ) np0 = np
550*
551* Processors, which have diagonal blocks of A, copy them to
552* WORK array in transposed form
553*
554 kz = 0
555 IF( mrcol.EQ.0 ) kz = nz
556 jz = 0
557 IF( mrcol.EQ.0 .AND. myrow.EQ.iyrow ) jz = nz
558*
559 DO 50 i = 0, lcmq-1
560 IF( mrrow.EQ.mod(npcol*i+mrcol, nprow) ) THEN
561 idex = max( 0, i*nb-kz )
562 IF( lcmp.EQ.1 .AND. (iycol.EQ.-1.OR.iycol.EQ.mycol) ) THEN
563 CALL pbctr2b1( icontxt, trans, nq-idex, nb, jz,
564 $ x(idex*incx+1), incx, beta, y, incy,
565 $ lcmq, 1 )
566 ELSE
567 CALL pbctr2b1( icontxt, trans, nq-idex, nb, jz,
568 $ x(idex*incx+1), incx, zero, work, 1,
569 $ lcmq, 1 )
570 END IF
571 END IF
572 50 CONTINUE
573*
574* Get diagonal blocks of A for each row of the template
575*
576 mccol = mod( mod(mrrow, npcol) + ixcol, npcol )
577 IF( lcmp.GT.1 ) THEN
578 mcrow = mod( nprow+myrow-iyrow, nprow )
579 CALL pbctrget( icontxt, 'Col', 1, np0, iceil( nn, nb ),
580 $ work, 1, mcrow, mccol, igd, myrow, mycol,
581 $ nprow, npcol )
582 END IF
583*
584* Broadcast a column block of WORK in every column of template
585*
586 IF( iycol.EQ.-1 ) THEN
587 IF( mycol.EQ.mccol ) THEN
588 IF( lcmp.GT.1 ) THEN
589 kz = 0
590 IF( myrow.EQ.iyrow ) kz = nz
591 CALL pbctrst1( icontxt, 'Col', np, nb, kz, work, 1,
592 $ beta, y, incy, lcmp, lcmq, np0 )
593 END IF
594 CALL cgebs2d( icontxt, 'Row', '1-tree', 1, np, y, incy )
595 ELSE
596 CALL cgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,
597 $ myrow, mccol )
598 END IF
599*
600* Send a column block of WORK to the destination column
601*
602 ELSE
603 IF( lcmp.EQ.1 ) THEN
604 IF( mycol.EQ.mccol ) THEN
605 IF( mycol.NE.iycol )
606 $ CALL cgesd2d( icontxt, 1, np, work, 1, myrow, iycol )
607 ELSE IF( mycol.EQ.iycol ) THEN
608 IF( beta.EQ.zero ) THEN
609 CALL cgerv2d( icontxt, 1, np, y, incy, myrow, mccol )
610 ELSE
611 CALL cgerv2d( icontxt, 1, np, work, 1, myrow, mccol )
612 CALL pbcvecadd( icontxt, 'G', np, one, work, 1, beta,
613 $ y, incy )
614 END IF
615 END IF
616*
617 ELSE
618 np1 = np0 * min( lcmp, max( 0, iceil(nn,nb)-mcrow ) )
619 IF( mycol.EQ.mccol ) THEN
620 IF( mycol.NE.iycol )
621 $ CALL cgesd2d( icontxt, 1, np1, work, 1, myrow, iycol )
622 ELSE IF( mycol.EQ.iycol ) THEN
623 CALL cgerv2d( icontxt, 1, np1, work, 1, myrow, mccol )
624 END IF
625*
626 IF( mycol.EQ.iycol ) THEN
627 kz = 0
628 IF( myrow.EQ.iyrow ) kz = nz
629 CALL pbctrst1( icontxt, 'Col', np, nb, kz, work, 1,
630 $ beta, y, incy, lcmp, lcmq, np0 )
631 END IF
632 END IF
633 END IF
634 END IF
635 END IF
636*
637 RETURN
638*
639* End of PBCTRNV
640*
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
integer function ilcm(m, n)
Definition ilcm.f:2
#define max(a, b)
Definition macros.h:21
subroutine cgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1062
subroutine pxerbla(contxt, srname, info)
Definition mpi.f:1600
subroutine cgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1103
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pbctrget(icontxt, adist, m, n, mnb, a, lda, mcrow, mccol, igd, myrow, mycol, nprow, npcol)
Definition pbctrget.f:3
subroutine pbctr2a1(icontxt, n, nb, nz, x, incx, beta, y, incy, intv)
Definition pbctrnv.f:649
subroutine pbctr2b1(icontxt, trans, n, nb, nz, x, incx, beta, y, incy, jinx, jiny)
Definition pbctrnv.f:721
subroutine pbctrst1(icontxt, xdist, n, nb, nz, x, incx, beta, y, incy, lcmp, lcmq, nint)
Definition pbctrst1.f:3