OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pbztrnv.f File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine pbztrnv (icontxt, xdist, trans, n, nb, nz, x, incx, beta, y, incy, ixrow, ixcol, iyrow, iycol, work)
subroutine pbztr2a1 (icontxt, n, nb, nz, x, incx, beta, y, incy, intv)
subroutine pbztr2b1 (icontxt, trans, n, nb, nz, x, incx, beta, y, incy, jinx, jiny)

Function/Subroutine Documentation

◆ pbztr2a1()

subroutine pbztr2a1 ( integer icontxt,
integer n,
integer nb,
integer nz,
complex*16, dimension( * ) x,
integer incx,
complex*16 beta,
complex*16, dimension( * ) y,
integer incy,
integer intv )

Definition at line 647 of file pbztrnv.f.

649*
650* -- PB-BLAS routine (version 2.1) --
651* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
652* April 28, 1996
653*
654* .. Scalar Arguments ..
655 INTEGER ICONTXT, N, NB, NZ, INCX, INCY, INTV
656 COMPLEX*16 BETA
657* ..
658* .. Array Arguments ..
659 COMPLEX*16 X( * ), Y( * )
660* ..
661*
662* Purpose
663* =======
664*
665* y <== x
666* y is a scattered vector, copied from a condensed vector x.
667*
668* ..
669* .. Intrinsic Functions ..
670 INTRINSIC min
671* ..
672* .. External Functions ..
673 INTEGER ICEIL
674 EXTERNAL iceil
675* ..
676* .. External Subroutines ..
677 EXTERNAL pbzvecadd
678* ..
679* .. Parameters ..
680 COMPLEX*16 ONE
681 parameter( one = ( 1.0d+0, 0.0d+0 ) )
682* ..
683* .. Local Variables ..
684 INTEGER IX, IY, JZ, K, ITER
685*
686 ix = 0
687 iy = 0
688 jz = nz
689 iter = iceil( n+nz, intv )
690*
691 IF( iter.GT.1 ) THEN
692 CALL pbzvecadd( icontxt, 'G', nb-jz, one, x(ix*incx+1), incx,
693 $ beta, y(iy*incy+1), incy )
694 ix = ix + nb - jz
695 iy = iy + intv - jz
696 jz = 0
697*
698 DO 10 k = 2, iter-1
699 CALL pbzvecadd( icontxt, 'G', nb, one, x(ix*incx+1), incx,
700 $ beta, y(iy*incy+1), incy )
701 ix = ix + nb
702 iy = iy + intv
703 10 CONTINUE
704 END IF
705*
706 CALL pbzvecadd( icontxt, 'G', min( n-iy, nb-jz ), one,
707 $ x(ix*incx+1), incx, beta, y(iy*incy+1), incy )
708*
709 RETURN
710*
711* End of PBZTR2A1
712*
integer function iceil(inum, idenom)
Definition iceil.f:2
#define min(a, b)
Definition macros.h:20
subroutine pbzvecadd(icontxt, mode, n, alpha, x, incx, beta, y, incy)
Definition pbzvecadd.f:3

◆ pbztr2b1()

subroutine pbztr2b1 ( integer icontxt,
character*1 trans,
integer n,
integer nb,
integer nz,
complex*16, dimension( * ) x,
integer incx,
complex*16 beta,
complex*16, dimension( * ) y,
integer incy,
integer jinx,
integer jiny )

Definition at line 719 of file pbztrnv.f.

721*
722* -- PB-BLAS routine (version 2.1) --
723* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
724* April 28, 1996
725*
726* .. Scalar Arguments ..
727 CHARACTER*1 TRANS
728 INTEGER ICONTXT, N, NB, NZ, INCX, INCY, JINX, JINY
729 COMPLEX*16 BETA
730* ..
731* .. Array Arguments ..
732 COMPLEX*16 X( * ), Y( * )
733* ..
734*
735* Purpose
736* =======
737*
738* y <== x + beta * y
739* y is a condensed vector, copied from a scattered vector x
740*
741* ..
742* .. Intrinsic Functions ..
743 INTRINSIC min
744* ..
745* .. External Functions ..
746 INTEGER ICEIL
747 EXTERNAL iceil
748* ..
749* .. External Subroutines ..
750 EXTERNAL pbzvecadd
751* ..
752* .. Parameters ..
753 COMPLEX*16 ONE
754 parameter( one = ( 1.0d+0, 0.0d+0 ) )
755* ..
756* .. Local Variables ..
757 INTEGER IX, IY, JZ, K, ITER, LENX, LENY
758*
759 IF( jinx.EQ.1 .AND. jiny.EQ.1 ) THEN
760 CALL pbzvecadd( icontxt, trans, n, one, x, incx, beta,
761 $ y, incy )
762*
763 ELSE
764 ix = 0
765 iy = 0
766 jz = nz
767 lenx = nb * jinx
768 leny = nb * jiny
769 iter = iceil( n+nz, lenx )
770*
771 IF( iter.GT.1 ) THEN
772 CALL pbzvecadd( icontxt, trans, nb-jz, one, x(ix*incx+1),
773 $ incx, beta, y(iy*incy+1), incy )
774 ix = ix + lenx - jz
775 iy = iy + leny - jz
776 jz = 0
777*
778 DO 10 k = 2, iter-1
779 CALL pbzvecadd( icontxt, trans, nb, one, x(ix*incx+1),
780 $ incx, beta, y(iy*incy+1), incy )
781 ix = ix + lenx
782 iy = iy + leny
783 10 CONTINUE
784 END IF
785*
786 CALL pbzvecadd( icontxt, trans, min( n-ix, nb-jz ), one,
787 $ x(ix*incx+1), incx, beta, y(iy*incy+1), incy )
788 END IF
789*
790 RETURN
791*
792* End of PBZTR2B1
793*

◆ pbztrnv()

subroutine pbztrnv ( integer icontxt,
character*1 xdist,
character*1 trans,
integer n,
integer nb,
integer nz,
complex*16, dimension( * ) x,
integer incx,
complex*16 beta,
complex*16, dimension( * ) y,
integer incy,
integer ixrow,
integer ixcol,
integer iyrow,
integer iycol,
complex*16, dimension( * ) work )

Definition at line 1 of file pbztrnv.f.

4*
5* -- PB-BLAS routine (version 2.1) --
6* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
7* April 28, 1996
8*
9* Jaeyoung Choi, Oak Ridge National Laboratory
10* Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
11* David Walker, Oak Ridge National Laboratory
12*
13* .. Scalar Arguments ..
14 CHARACTER*1 TRANS, XDIST
15 INTEGER ICONTXT, INCX, INCY, IXCOL, IXROW, IYCOL,
16 $ IYROW, N, NB, NZ
17 COMPLEX*16 BETA
18* ..
19* .. Array Arguments ..
20 COMPLEX*16 WORK( * ), X( * ), Y( * )
21* ..
22*
23* Purpose
24* =======
25*
26* PBZTRNV transposes a column vector to row vector, or a row vector to
27* column vector by reallocating data distribution.
28*
29* Y := X'
30*
31* where X and Y are N vectors.
32*
33* Parameters
34* ==========
35*
36* ICONTXT (input) INTEGER
37* ICONTXT is the BLACS mechanism for partitioning communication
38* space. A defining property of a context is that a message in
39* a context cannot be sent or received in another context. The
40* BLACS context includes the definition of a grid, and each
41* process' coordinates in it.
42*
43* XDIST (input) CHARACTER*1
44* XDIST specifies whether X is a column vector or a row vector,
45*
46* XDIST = 'C', X is a column vector (distributed columnwise)
47* XDIST = 'R', X is a row vector (distributed rowwise)
48*
49* TRANS (input) CHARACTER*1
50* TRANS specifies whether the transposed format is transpose
51* or conjugate transpose. If the vectors X and Y are real,
52* the argument is ignored.
53*
54* TRANS = 'T', transpose
55* TRANS = 'C', conjugate transpose
56*
57* N (input) INTEGER
58* N specifies the (global) number of the vector X and the
59* vector Y. N >= 0.
60*
61* NB (input) INTEGER
62* NB specifies the block size of vectors X and Y. NB >= 0.
63*
64* NZ (input) INTEGER
65* NZ is the column offset to specify the column distance from
66* the beginning of the block to the first element of the
67* vector X, and the row offset to the first element of the
68* vector Y if XDIST = 'C'.
69* Otherwise, it is row offset to specify the row distance
70* from the beginning of the block to the first element of the
71* vector X, and the column offset to the first element of the
72* vector Y. 0 < NZ <= NB.
73*
74* X (input) COMPLEX*16 array of dimension at least
75* ( 1 + (Np-1) * abs(INCX)) in IXCOL if XDIST = 'C', or
76* ( 1 + (Nq-1) * abs(INCX)) in IXROW if XDIST = 'R'.
77* The incremented array X must contain the vector X.
78*
79* INCX (input) INTEGER
80* INCX specifies the increment for the elements of X.
81* INCX <> 0.
82*
83* BETA (input) COMPLEX*16
84* BETA specifies scaler beta.
85*
86* Y (input/output) COMPLEX*16 array of dimension at least
87* ( 1 + (Nq-1) * abs(INCY)) in IYROW if XDIST = 'C', or
88* ( 1 + (Np-1) * abs(INCY)) in IYCOL if XDIST = 'R', or
89* The incremented array Y must contain the vector Y.
90* Y will not be referenced if beta is zero.
91*
92* INCY (input) INTEGER
93* INCY specifies the increment for the elements of Y.
94* INCY <> 0.
95*
96* IXROW (input) INTEGER
97* IXROW specifies a row of the process template, which holds
98* the first element of the vector X. If X is a row vector and
99* all rows of processes have a copy of X, then set IXROW = -1.
100*
101* IXCOL (input) INTEGER
102* IXCOL specifies a column of the process template,
103* which holds the first element of the vector X. If X is a
104* column block and all columns of processes have a copy of X,
105* then set IXCOL = -1.
106*
107* IYROW (input) INTEGER
108* IYROW specifies the current row process which holds the
109* first element of the vector Y, which is transposed of X.
110* If X is a column vector and the transposed row vector Y is
111* distributed all rows of processes, set IYROW = -1.
112*
113* IYCOL (input) INTEGER
114* IYCOL specifies the current column process which holds
115* the first element of the vector Y, which is transposed of Y.
116* If X is a row block and the transposed column vector Y is
117* distributed all columns of processes, set IYCOL = -1.
118*
119* WORK (workspace) COMPLEX*16 array of dimension Size(WORK).
120* It needs extra working space of x**T or x**H.
121*
122* Parameters Details
123* ==================
124*
125* Nx It is a local portion of N owned by a process, where x is
126* replaced by either p (=NPROW) or q (=NPCOL)). The value is
127* determined by N, NB, NZ, x, and MI, where NB is a block size,
128* NZ is a offset from the beginning of the block, and MI is a
129* row or column position in a process template. Nx is equal
130* to or less than Nx0 = CEIL( N+NZ, NB*x ) * NB.
131*
132* Communication Scheme
133* ====================
134*
135* The communication scheme of the routine is set to '1-tree', which is
136* fan-out. (For details, see BLACS user's guide.)
137*
138* Memory Requirement of WORK
139* ==========================
140*
141* NN = N + NZ
142* Npb = CEIL( NN, NB*NPROW )
143* Nqb = CEIL( NN, NB*NPCOL )
144* LCMP = LCM / NPROW
145* LCMQ = LCM / NPCOL
146*
147* (1) XDIST = 'C'
148* (a) IXCOL != -1
149* Size(WORK) = CEIL(Nqb,LCMQ)*NB
150* (b) IXCOL = -1
151* Size(WORK) = CEIL(Nqb,LCMQ)*NB * MIN(LCMQ,CEIL(NN,NB))
152*
153* (2) XDIST = 'R'
154* (a) IXROW != -1
155* Size(WORK) = CEIL(Npb,LCMP)*NB
156* (b) IXROW = -1
157* Size(WORK) = CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(NN,NB))
158*
159* Notes
160* -----
161* More precise space can be computed as
162*
163* CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(NN,NB,0,0,NPROW), NB, 0, 0, LCMP)
164* CEIL(Nqb,LCMQ)*NB => NUMROC( NUMROC(NN,NB,0,0,NPCOL), NB, 0, 0, LCMQ)
165*
166* =====================================================================
167*
168* .. Parameters ..
169 COMPLEX*16 ONE, ZERO
170 parameter( one = ( 1.0d+0, 0.0d+0 ),
171 $ zero = ( 0.0d+0, 0.0d+0 ) )
172* ..
173* .. Local Scalars ..
174 LOGICAL COLFORM, ROWFORM
175 INTEGER I, IDEX, IGD, INFO, JDEX, JYCOL, JYROW, JZ, KZ,
176 $ LCM, LCMP, LCMQ, MCCOL, MCROW, MRCOL, MRROW,
177 $ MYCOL, MYROW, NN, NP, NP0, NP1, NPCOL, NPROW,
178 $ NQ, NQ0, NQ1
179 COMPLEX*16 TBETA
180* ..
181* .. External Functions ..
182 LOGICAL LSAME
183 INTEGER ILCM, ICEIL, NUMROC
184 EXTERNAL lsame, ilcm, iceil, numroc
185* ..
186* .. External Subroutines ..
189 $ zgerv2d, zgesd2d
190* ..
191* .. Intrinsic Functions ..
192 INTRINSIC max, min, mod
193* ..
194* .. Executable Statements ..
195*
196* Quick return if possible.
197*
198 IF( n.EQ.0 ) RETURN
199*
200 CALL blacs_gridinfo( icontxt, nprow, npcol, myrow, mycol )
201*
202 colform = lsame( xdist, 'C' )
203 rowform = lsame( xdist, 'R' )
204*
205* Test the input parameters.
206*
207 info = 0
208 IF( ( .NOT.colform ) .AND. ( .NOT.rowform ) ) THEN
209 info = 2
210 ELSE IF( n .LT.0 ) THEN
211 info = 4
212 ELSE IF( nb .LT.1 ) THEN
213 info = 5
214 ELSE IF( nz .LT.0 .OR. nz.GE.nb ) THEN
215 info = 6
216 ELSE IF( incx.EQ.0 ) THEN
217 info = 8
218 ELSE IF( incy.EQ.0 ) THEN
219 info = 11
220 ELSE IF( ixrow.LT.-1 .OR. ixrow.GE.nprow .OR.
221 $ ( ixrow.EQ.-1 .AND. colform ) ) THEN
222 info = 12
223 ELSE IF( ixcol.LT.-1 .OR. ixcol.GE.npcol .OR.
224 $ ( ixcol.EQ.-1 .AND. rowform ) ) THEN
225 info = 13
226 ELSE IF( iyrow.LT.-1 .OR. iyrow.GE.nprow .OR.
227 $ ( iyrow.EQ.-1 .AND. rowform ) ) THEN
228 info = 14
229 ELSE IF( iycol.LT.-1 .OR. iycol.GE.npcol .OR.
230 $ ( iycol.EQ.-1 .AND. colform ) ) THEN
231 info = 15
232 END IF
233*
234 10 CONTINUE
235 IF( info.NE.0 ) THEN
236 CALL pxerbla( icontxt, 'PBZTRNV ', info )
237 RETURN
238 END IF
239*
240* Start the operations.
241*
242* LCM : the least common multiple of NPROW and NPCOL
243*
244 lcm = ilcm( nprow, npcol )
245 lcmp = lcm / nprow
246 lcmq = lcm / npcol
247 igd = npcol / lcmp
248 nn = n + nz
249*
250* When x is a column vector
251*
252 IF( colform ) THEN
253*
254* Form y <== x' ( x is a column vector )
255*
256* ||
257* ||
258* _____________ ||
259* -----(y)----- <== (x)
260* ||
261* ||
262* ||
263*
264 IF( ixrow.LT.0 .OR. ixrow.GE.nprow ) THEN
265 info = 12
266 ELSE IF( ixcol.LT.-1 .OR. ixcol.GE.npcol ) THEN
267 info = 13
268 ELSE IF( iyrow.LT.-1 .OR. iyrow.GE.nprow ) THEN
269 info = 14
270 ELSE IF( iycol.LT.0 .OR. iycol.GE.npcol ) THEN
271 info = 15
272 END IF
273 IF( info.NE.0 ) GO TO 10
274*
275* MRROW : row relative position in template from IXROW
276* MRCOL : column relative position in template from IYCOL
277*
278 mrrow = mod( nprow+myrow-ixrow, nprow )
279 mrcol = mod( npcol+mycol-iycol, npcol )
280 jyrow = iyrow
281 IF( iyrow.EQ.-1 ) jyrow = ixrow
282*
283 np = numroc( nn, nb, myrow, ixrow, nprow )
284 IF( mrrow.EQ.0 ) np = np - nz
285 nq = numroc( nn, nb, mycol, iycol, npcol )
286 IF( mrcol.EQ.0 ) nq = nq - nz
287 nq0 = numroc( numroc(nn, nb, 0, 0, npcol), nb, 0, 0, lcmq )
288*
289* When a column process of IXCOL has a column block A,
290*
291 IF( ixcol .GE. 0 ) THEN
292 tbeta = zero
293 IF( myrow.EQ.jyrow ) tbeta = beta
294 kz = nz
295*
296 DO 20 i = 0, min( lcm, iceil(nn,nb) ) - 1
297 mcrow = mod( mod(i, nprow) + ixrow, nprow )
298 mccol = mod( mod(i, npcol) + iycol, npcol )
299 IF( lcmq.EQ.1 ) nq0 = numroc( nn, nb, i, 0, npcol )
300 jdex = (i/npcol) * nb
301 IF( mrcol.EQ.0 ) jdex = max(0, jdex-nz)
302*
303* A source node copies the blocks to WORK, and send it
304*
305 IF( myrow.EQ.mcrow .AND. mycol.EQ.ixcol ) THEN
306*
307* The source node is a destination node
308*
309 idex = (i/nprow) * nb
310 IF( mrrow.EQ.0 ) idex = max( 0, idex-nz )
311 IF( myrow.EQ.jyrow .AND. mycol.EQ.mccol ) THEN
312 CALL pbztr2b1( icontxt, trans, np-idex, nb, kz,
313 $ x(idex*incx+1), incx, tbeta,
314 $ y(jdex*incy+1), incy, lcmp, lcmq )
315*
316* The source node sends blocks to a destination node
317*
318 ELSE
319 CALL pbztr2b1( icontxt, trans, np-idex, nb, kz,
320 $ x(idex*incx+1), incx, zero, work, 1,
321 $ lcmp, 1 )
322 CALL zgesd2d( icontxt, 1, nq0-kz, work, 1,
323 $ jyrow, mccol )
324 END IF
325*
326* A destination node receives the copied vector
327*
328 ELSE IF( myrow.EQ.jyrow .AND. mycol.EQ.mccol ) THEN
329 IF( lcmq.EQ.1 .AND. tbeta.EQ.zero ) THEN
330 CALL zgerv2d( icontxt, 1, nq0-kz, y, incy,
331 $ mcrow, ixcol )
332 ELSE
333 CALL zgerv2d( icontxt, 1, nq0-kz, work, 1,
334 $ mcrow, ixcol )
335 CALL pbztr2a1( icontxt, nq-jdex, nb, kz, work, 1, tbeta,
336 $ y(jdex*incy+1), incy, lcmq*nb )
337 END IF
338 END IF
339 kz = 0
340 20 CONTINUE
341*
342* Broadcast a row block of WORK in each column of template
343*
344 IF( iyrow.EQ.-1 ) THEN
345 IF( myrow.EQ.jyrow ) THEN
346 CALL zgebs2d( icontxt, 'Col', '1-tree', 1, nq, y, incy )
347 ELSE
348 CALL zgebr2d( icontxt, 'Col', '1-tree', 1, nq, y, incy,
349 $ jyrow, mycol )
350 END IF
351 END IF
352*
353* When all column procesors have a copy of the column block A,
354*
355 ELSE
356 IF( lcmq.EQ.1 ) nq0 = nq
357*
358* Processors, which have diagonal blocks of X, copy them to
359* WORK array in transposed form
360*
361 kz = 0
362 IF( mrrow.EQ.0 ) kz = nz
363 jz = 0
364 IF( mrrow.EQ.0 .AND. mycol.EQ.iycol ) jz = nz
365*
366 DO 30 i = 0, lcmp - 1
367 IF( mrcol.EQ.mod(nprow*i+mrrow, npcol) ) THEN
368 idex = max( 0, i*nb-kz )
369 IF( lcmq.EQ.1 .AND. (iyrow.EQ.-1.OR.iyrow.EQ.myrow) ) THEN
370 CALL pbztr2b1( icontxt, trans, np-idex, nb, jz,
371 $ x(idex*incx+1), incx, beta, y, incy,
372 $ lcmp, 1 )
373 ELSE
374 CALL pbztr2b1( icontxt, trans, np-idex, nb, jz,
375 $ x(idex*incx+1), incx, zero, work, 1,
376 $ lcmp, 1 )
377 END IF
378 END IF
379 30 CONTINUE
380*
381* Get diagonal blocks of A for each column of the template
382*
383 mcrow = mod( mod(mrcol, nprow) + ixrow, nprow )
384 IF( lcmq.GT.1 ) THEN
385 mccol = mod( npcol+mycol-iycol, npcol )
386 CALL pbztrget( icontxt, 'Row', 1, nq0, iceil( nn, nb ),
387 $ work, 1, mcrow, mccol, igd, myrow, mycol,
388 $ nprow, npcol )
389 END IF
390*
391* Broadcast a row block of WORK in every row of template
392*
393 IF( iyrow.EQ.-1 ) THEN
394 IF( myrow.EQ.mcrow ) THEN
395 IF( lcmq.GT.1 ) THEN
396 kz = 0
397 IF( mycol.EQ.iycol ) kz = nz
398 CALL pbztrst1( icontxt, 'Row', nq, nb, kz, work, 1,
399 $ beta, y, incy, lcmp, lcmq, nq0 )
400 END IF
401 CALL zgebs2d( icontxt, 'Col', '1-tree', 1, nq, y, incy )
402 ELSE
403 CALL zgebr2d( icontxt, 'Col', '1-tree', 1, nq, y, incy,
404 $ mcrow, mycol )
405 END IF
406*
407* Send a row block of WORK to the destination row
408*
409 ELSE
410 IF( lcmq.EQ.1 ) THEN
411 IF( myrow.EQ.mcrow ) THEN
412 IF( myrow.NE.iyrow )
413 $ CALL zgesd2d( icontxt, 1, nq0, work, 1, iyrow, mycol )
414 ELSE IF( myrow.EQ.iyrow ) THEN
415 IF( beta.EQ.zero ) THEN
416 CALL zgerv2d( icontxt, 1, nq0, y, incy, mcrow, mycol )
417 ELSE
418 CALL zgerv2d( icontxt, 1, nq0, work, 1, mcrow, mycol )
419 CALL pbzvecadd( icontxt, 'G', nq0, one, work, 1,
420 $ beta, y, incy )
421 END IF
422 END IF
423*
424 ELSE
425 nq1 = nq0 * min( lcmq, max( 0, iceil(nn,nb)-mccol ) )
426 IF( myrow.EQ.mcrow ) THEN
427 IF( myrow.NE.iyrow )
428 $ CALL zgesd2d( icontxt, 1, nq1, work, 1, iyrow, mycol )
429 ELSE IF( myrow.EQ.iyrow ) THEN
430 CALL zgerv2d( icontxt, 1, nq1, work, 1, mcrow, mycol )
431 END IF
432*
433 IF( myrow.EQ.iyrow ) THEN
434 kz = 0
435 IF( mycol.EQ.iycol ) kz = nz
436 CALL pbztrst1( icontxt, 'Row', nq, nb, kz, work, 1,
437 $ beta, y, incy, lcmp, lcmq, nq0 )
438 END IF
439 END IF
440 END IF
441 END IF
442*
443* When x is a row vector
444*
445 ELSE
446*
447* Form y <== x' ( x is a row block )
448*
449* ||
450* ||
451* || _____________
452* (y) <== -----(x)-----
453* ||
454* ||
455* ||
456*
457 IF( ixrow.LT.-1 .OR. ixrow.GE.nprow ) THEN
458 info = 12
459 ELSE IF( ixcol.LT.0 .OR. ixcol.GE.npcol ) THEN
460 info = 13
461 ELSE IF( iyrow.LT.0 .OR. iyrow.GE.nprow ) THEN
462 info = 14
463 ELSE IF( iycol.LT.-1 .OR. iycol.GE.npcol ) THEN
464 info = 15
465 END IF
466 IF( info.NE.0 ) GO TO 10
467*
468* MRROW : row relative position in template from IYROW
469* MRCOL : column relative position in template from IXCOL
470*
471 mrrow = mod( nprow+myrow-iyrow, nprow )
472 mrcol = mod( npcol+mycol-ixcol, npcol )
473 jycol = iycol
474 IF( iycol.EQ.-1 ) jycol = ixcol
475*
476 np = numroc( nn, nb, myrow, iyrow, nprow )
477 IF( mrrow.EQ.0 ) np = np - nz
478 nq = numroc( nn, nb, mycol, ixcol, npcol )
479 IF( mrcol.EQ.0 ) nq = nq - nz
480 np0 = numroc( numroc(nn, nb, 0, 0, nprow), nb, 0, 0, lcmp )
481*
482* When a row process of IXROW has a row block A,
483*
484 IF( ixrow .GE. 0 ) THEN
485 tbeta = zero
486 IF( mycol.EQ.jycol ) tbeta = beta
487 kz = nz
488*
489 DO 40 i = 0, min( lcm, iceil(nn,nb) ) - 1
490 mcrow = mod( mod(i, nprow) + iyrow, nprow )
491 mccol = mod( mod(i, npcol) + ixcol, npcol )
492 IF( lcmp.EQ.1 ) np0 = numroc( nn, nb, i, 0, nprow )
493 jdex = (i/nprow) * nb
494 IF( mrrow.EQ.0 ) jdex = max(0, jdex-nz)
495*
496* A source node copies the blocks to WORK, and send it
497*
498 IF( myrow.EQ.ixrow .AND. mycol.EQ.mccol ) THEN
499*
500* The source node is a destination node
501*
502 idex = (i/npcol) * nb
503 IF( mrcol.EQ.0 ) idex = max( 0, idex-nz )
504 IF( myrow.EQ.mcrow .AND. mycol.EQ.jycol ) THEN
505 CALL pbztr2b1( icontxt, trans, nq-idex, nb, kz,
506 $ x(idex*incx+1), incx, tbeta,
507 $ y(jdex*incy+1), incy, lcmq, lcmp )
508*
509* The source node sends blocks to a destination node
510*
511 ELSE
512 CALL pbztr2b1( icontxt, trans, nq-idex, nb, kz,
513 $ x(idex*incx+1), incx, zero, work, 1,
514 $ lcmq, 1 )
515 CALL zgesd2d( icontxt, 1, np0-kz, work, 1,
516 $ mcrow, jycol )
517 END IF
518*
519* A destination node receives the copied blocks
520*
521 ELSE IF( myrow.EQ.mcrow .AND. mycol.EQ.jycol ) THEN
522 IF( lcmp.EQ.1 .AND. tbeta.EQ.zero ) THEN
523 CALL zgerv2d( icontxt, 1, np0-kz, y, incy,
524 $ ixrow, mccol )
525 ELSE
526 CALL zgerv2d( icontxt, 1, np0-kz, work, 1,
527 $ ixrow, mccol )
528 CALL pbztr2a1( icontxt, np-jdex, nb, kz, work, 1, tbeta,
529 $ y(jdex*incy+1), incy, lcmp*nb )
530 END IF
531 END IF
532 kz = 0
533 40 CONTINUE
534*
535* Broadcast a column vector Y in each row of template
536*
537 IF( iycol.EQ.-1 ) THEN
538 IF( mycol.EQ.jycol ) THEN
539 CALL zgebs2d( icontxt, 'Row', '1-tree', 1, np, y, incy )
540 ELSE
541 CALL zgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,
542 $ myrow, jycol )
543 END IF
544 END IF
545*
546* When all row procesors have a copy of the row block A,
547*
548 ELSE
549 IF( lcmp.EQ.1 ) np0 = np
550*
551* Processors, which have diagonal blocks of A, copy them to
552* WORK array in transposed form
553*
554 kz = 0
555 IF( mrcol.EQ.0 ) kz = nz
556 jz = 0
557 IF( mrcol.EQ.0 .AND. myrow.EQ.iyrow ) jz = nz
558*
559 DO 50 i = 0, lcmq-1
560 IF( mrrow.EQ.mod(npcol*i+mrcol, nprow) ) THEN
561 idex = max( 0, i*nb-kz )
562 IF( lcmp.EQ.1 .AND. (iycol.EQ.-1.OR.iycol.EQ.mycol) ) THEN
563 CALL pbztr2b1( icontxt, trans, nq-idex, nb, jz,
564 $ x(idex*incx+1), incx, beta, y, incy,
565 $ lcmq, 1 )
566 ELSE
567 CALL pbztr2b1( icontxt, trans, nq-idex, nb, jz,
568 $ x(idex*incx+1), incx, zero, work, 1,
569 $ lcmq, 1 )
570 END IF
571 END IF
572 50 CONTINUE
573*
574* Get diagonal blocks of A for each row of the template
575*
576 mccol = mod( mod(mrrow, npcol) + ixcol, npcol )
577 IF( lcmp.GT.1 ) THEN
578 mcrow = mod( nprow+myrow-iyrow, nprow )
579 CALL pbztrget( icontxt, 'Col', 1, np0, iceil( nn, nb ),
580 $ work, 1, mcrow, mccol, igd, myrow, mycol,
581 $ nprow, npcol )
582 END IF
583*
584* Broadcast a column block of WORK in every column of template
585*
586 IF( iycol.EQ.-1 ) THEN
587 IF( mycol.EQ.mccol ) THEN
588 IF( lcmp.GT.1 ) THEN
589 kz = 0
590 IF( myrow.EQ.iyrow ) kz = nz
591 CALL pbztrst1( icontxt, 'Col', np, nb, kz, work, 1,
592 $ beta, y, incy, lcmp, lcmq, np0 )
593 END IF
594 CALL zgebs2d( icontxt, 'Row', '1-tree', 1, np, y, incy )
595 ELSE
596 CALL zgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,
597 $ myrow, mccol )
598 END IF
599*
600* Send a column block of WORK to the destination column
601*
602 ELSE
603 IF( lcmp.EQ.1 ) THEN
604 IF( mycol.EQ.mccol ) THEN
605 IF( mycol.NE.iycol )
606 $ CALL zgesd2d( icontxt, 1, np, work, 1, myrow, iycol )
607 ELSE IF( mycol.EQ.iycol ) THEN
608 IF( beta.EQ.zero ) THEN
609 CALL zgerv2d( icontxt, 1, np, y, incy, myrow, mccol )
610 ELSE
611 CALL zgerv2d( icontxt, 1, np, work, 1, myrow, mccol )
612 CALL pbzvecadd( icontxt, 'G', np, one, work, 1, beta,
613 $ y, incy )
614 END IF
615 END IF
616*
617 ELSE
618 np1 = np0 * min( lcmp, max( 0, iceil(nn,nb)-mcrow ) )
619 IF( mycol.EQ.mccol ) THEN
620 IF( mycol.NE.iycol )
621 $ CALL zgesd2d( icontxt, 1, np1, work, 1, myrow, iycol )
622 ELSE IF( mycol.EQ.iycol ) THEN
623 CALL zgerv2d( icontxt, 1, np1, work, 1, myrow, mccol )
624 END IF
625*
626 IF( mycol.EQ.iycol ) THEN
627 kz = 0
628 IF( myrow.EQ.iyrow ) kz = nz
629 CALL pbztrst1( icontxt, 'Col', np, nb, kz, work, 1,
630 $ beta, y, incy, lcmp, lcmq, np0 )
631 END IF
632 END IF
633 END IF
634 END IF
635 END IF
636*
637 RETURN
638*
639* End of PBZTRNV
640*
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
integer function ilcm(m, n)
Definition ilcm.f:2
#define max(a, b)
Definition macros.h:21
subroutine zgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1092
subroutine zgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1051
subroutine pxerbla(contxt, srname, info)
Definition mpi.f:1600
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pbztrget(icontxt, adist, m, n, mnb, a, lda, mcrow, mccol, igd, myrow, mycol, nprow, npcol)
Definition pbztrget.f:3
subroutine pbztr2b1(icontxt, trans, n, nb, nz, x, incx, beta, y, incy, jinx, jiny)
Definition pbztrnv.f:721
subroutine pbztr2a1(icontxt, n, nb, nz, x, incx, beta, y, incy, intv)
Definition pbztrnv.f:649
subroutine pbztrst1(icontxt, xdist, n, nb, nz, x, incx, beta, y, incy, lcmp, lcmq, nint)
Definition pbztrst1.f:3