OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pbdtrnv.f File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine pbdtrnv (icontxt, xdist, trans, n, nb, nz, x, incx, beta, y, incy, ixrow, ixcol, iyrow, iycol, work)
subroutine pbdtr2a1 (icontxt, n, nb, nz, x, incx, beta, y, incy, intv)
subroutine pbdtr2b1 (icontxt, trans, n, nb, nz, x, incx, beta, y, incy, jinx, jiny)

Function/Subroutine Documentation

◆ pbdtr2a1()

subroutine pbdtr2a1 ( integer icontxt,
integer n,
integer nb,
integer nz,
double precision, dimension( * ) x,
integer incx,
double precision beta,
double precision, dimension( * ) y,
integer incy,
integer intv )

Definition at line 646 of file pbdtrnv.f.

648*
649* -- PB-BLAS routine (version 2.1) --
650* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
651* April 28, 1996
652*
653* .. Scalar Arguments ..
654 INTEGER ICONTXT, N, NB, NZ, INCX, INCY, INTV
655 DOUBLE PRECISION BETA
656* ..
657* .. Array Arguments ..
658 DOUBLE PRECISION X( * ), Y( * )
659* ..
660*
661* Purpose
662* =======
663*
664* y <== x
665* y is a scattered vector, copied from a condensed vector x.
666*
667* ..
668* .. Intrinsic Functions ..
669 INTRINSIC min
670* ..
671* .. External Functions ..
672 INTEGER ICEIL
673 EXTERNAL iceil
674* ..
675* .. External Subroutines ..
676 EXTERNAL pbdvecadd
677* ..
678* .. Parameters ..
679 DOUBLE PRECISION ONE
680 parameter( one = 1.0d+0 )
681* ..
682* .. Local Variables ..
683 INTEGER IX, IY, JZ, K, ITER
684*
685 ix = 0
686 iy = 0
687 jz = nz
688 iter = iceil( n+nz, intv )
689*
690 IF( iter.GT.1 ) THEN
691 CALL pbdvecadd( icontxt, 'G', nb-jz, one, x(ix*incx+1), incx,
692 $ beta, y(iy*incy+1), incy )
693 ix = ix + nb - jz
694 iy = iy + intv - jz
695 jz = 0
696*
697 DO 10 k = 2, iter-1
698 CALL pbdvecadd( icontxt, 'G', nb, one, x(ix*incx+1), incx,
699 $ beta, y(iy*incy+1), incy )
700 ix = ix + nb
701 iy = iy + intv
702 10 CONTINUE
703 END IF
704*
705 CALL pbdvecadd( icontxt, 'G', min( n-iy, nb-jz ), one,
706 $ x(ix*incx+1), incx, beta, y(iy*incy+1), incy )
707*
708 RETURN
709*
710* End of PBDTR2A1
711*
integer function iceil(inum, idenom)
Definition iceil.f:2
#define min(a, b)
Definition macros.h:20
subroutine pbdvecadd(icontxt, mode, n, alpha, x, incx, beta, y, incy)
Definition pbdvecadd.f:3

◆ pbdtr2b1()

subroutine pbdtr2b1 ( integer icontxt,
character*1 trans,
integer n,
integer nb,
integer nz,
double precision, dimension( * ) x,
integer incx,
double precision beta,
double precision, dimension( * ) y,
integer incy,
integer jinx,
integer jiny )

Definition at line 718 of file pbdtrnv.f.

720*
721* -- PB-BLAS routine (version 2.1) --
722* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
723* April 28, 1996
724*
725* .. Scalar Arguments ..
726 CHARACTER*1 TRANS
727 INTEGER ICONTXT, N, NB, NZ, INCX, INCY, JINX, JINY
728 DOUBLE PRECISION BETA
729* ..
730* .. Array Arguments ..
731 DOUBLE PRECISION X( * ), Y( * )
732* ..
733*
734* Purpose
735* =======
736*
737* y <== x + beta * y
738* y is a condensed vector, copied from a scattered vector x
739*
740* ..
741* .. Intrinsic Functions ..
742 INTRINSIC min
743* ..
744* .. External Functions ..
745 INTEGER ICEIL
746 EXTERNAL iceil
747* ..
748* .. External Subroutines ..
749 EXTERNAL pbdvecadd
750* ..
751* .. Parameters ..
752 DOUBLE PRECISION ONE
753 parameter( one = 1.0d+0 )
754* ..
755* .. Local Variables ..
756 INTEGER IX, IY, JZ, K, ITER, LENX, LENY
757*
758 IF( jinx.EQ.1 .AND. jiny.EQ.1 ) THEN
759 CALL pbdvecadd( icontxt, trans, n, one, x, incx, beta,
760 $ y, incy )
761*
762 ELSE
763 ix = 0
764 iy = 0
765 jz = nz
766 lenx = nb * jinx
767 leny = nb * jiny
768 iter = iceil( n+nz, lenx )
769*
770 IF( iter.GT.1 ) THEN
771 CALL pbdvecadd( icontxt, trans, nb-jz, one, x(ix*incx+1),
772 $ incx, beta, y(iy*incy+1), incy )
773 ix = ix + lenx - jz
774 iy = iy + leny - jz
775 jz = 0
776*
777 DO 10 k = 2, iter-1
778 CALL pbdvecadd( icontxt, trans, nb, one, x(ix*incx+1),
779 $ incx, beta, y(iy*incy+1), incy )
780 ix = ix + lenx
781 iy = iy + leny
782 10 CONTINUE
783 END IF
784*
785 CALL pbdvecadd( icontxt, trans, min( n-ix, nb-jz ), one,
786 $ x(ix*incx+1), incx, beta, y(iy*incy+1), incy )
787 END IF
788*
789 RETURN
790*
791* End of PBDTR2B1
792*

◆ pbdtrnv()

subroutine pbdtrnv ( integer icontxt,
character*1 xdist,
character*1 trans,
integer n,
integer nb,
integer nz,
double precision, dimension( * ) x,
integer incx,
double precision beta,
double precision, dimension( * ) y,
integer incy,
integer ixrow,
integer ixcol,
integer iyrow,
integer iycol,
double precision, dimension( * ) work )

Definition at line 1 of file pbdtrnv.f.

4*
5* -- PB-BLAS routine (version 2.1) --
6* University of Tennessee, Knoxville, Oak Ridge National Laboratory.
7* April 28, 1996
8*
9* Jaeyoung Choi, Oak Ridge National Laboratory
10* Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
11* David Walker, Oak Ridge National Laboratory
12*
13* .. Scalar Arguments ..
14 CHARACTER*1 TRANS, XDIST
15 INTEGER ICONTXT, INCX, INCY, IXCOL, IXROW, IYCOL,
16 $ IYROW, N, NB, NZ
17 DOUBLE PRECISION BETA
18* ..
19* .. Array Arguments ..
20 DOUBLE PRECISION WORK( * ), X( * ), Y( * )
21* ..
22*
23* Purpose
24* =======
25*
26* PBDTRNV transposes a column vector to row vector, or a row vector to
27* column vector by reallocating data distribution.
28*
29* Y := X'
30*
31* where X and Y are N vectors.
32*
33* Parameters
34* ==========
35*
36* ICONTXT (input) INTEGER
37* ICONTXT is the BLACS mechanism for partitioning communication
38* space. A defining property of a context is that a message in
39* a context cannot be sent or received in another context. The
40* BLACS context includes the definition of a grid, and each
41* process' coordinates in it.
42*
43* XDIST (input) CHARACTER*1
44* XDIST specifies whether X is a column vector or a row vector,
45*
46* XDIST = 'C', X is a column vector (distributed columnwise)
47* XDIST = 'R', X is a row vector (distributed rowwise)
48*
49* TRANS (input) CHARACTER*1
50* TRANS specifies whether the transposed format is transpose
51* or conjugate transpose. If the vectors X and Y are real,
52* the argument is ignored.
53*
54* TRANS = 'T', transpose
55* TRANS = 'C', conjugate transpose
56*
57* N (input) INTEGER
58* N specifies the (global) number of the vector X and the
59* vector Y. N >= 0.
60*
61* NB (input) INTEGER
62* NB specifies the block size of vectors X and Y. NB >= 0.
63*
64* NZ (input) INTEGER
65* NZ is the column offset to specify the column distance from
66* the beginning of the block to the first element of the
67* vector X, and the row offset to the first element of the
68* vector Y if XDIST = 'C'.
69* Otherwise, it is row offset to specify the row distance
70* from the beginning of the block to the first element of the
71* vector X, and the column offset to the first element of the
72* vector Y. 0 < NZ <= NB.
73*
74* X (input) DOUBLE PRECISION array of dimension at least
75* ( 1 + (Np-1) * abs(INCX)) in IXCOL if XDIST = 'C', or
76* ( 1 + (Nq-1) * abs(INCX)) in IXROW if XDIST = 'R'.
77* The incremented array X must contain the vector X.
78*
79* INCX (input) INTEGER
80* INCX specifies the increment for the elements of X.
81* INCX <> 0.
82*
83* BETA (input) DOUBLE PRECISION
84* BETA specifies scaler beta.
85*
86* Y (input/output) DOUBLE PRECISION array of dimension at least
87* ( 1 + (Nq-1) * abs(INCY)) in IYROW if XDIST = 'C', or
88* ( 1 + (Np-1) * abs(INCY)) in IYCOL if XDIST = 'R', or
89* The incremented array Y must contain the vector Y.
90* Y will not be referenced if beta is zero.
91*
92* INCY (input) INTEGER
93* INCY specifies the increment for the elements of Y.
94* INCY <> 0.
95*
96* IXROW (input) INTEGER
97* IXROW specifies a row of the process template, which holds
98* the first element of the vector X. If X is a row vector and
99* all rows of processes have a copy of X, then set IXROW = -1.
100*
101* IXCOL (input) INTEGER
102* IXCOL specifies a column of the process template,
103* which holds the first element of the vector X. If X is a
104* column block and all columns of processes have a copy of X,
105* then set IXCOL = -1.
106*
107* IYROW (input) INTEGER
108* IYROW specifies the current row process which holds the
109* first element of the vector Y, which is transposed of X.
110* If X is a column vector and the transposed row vector Y is
111* distributed all rows of processes, set IYROW = -1.
112*
113* IYCOL (input) INTEGER
114* IYCOL specifies the current column process which holds
115* the first element of the vector Y, which is transposed of Y.
116* If X is a row block and the transposed column vector Y is
117* distributed all columns of processes, set IYCOL = -1.
118*
119* WORK (workspace) DOUBLE PRECISION array of dimension Size(WORK).
120* It needs extra working space of x**T or x**H.
121*
122* Parameters Details
123* ==================
124*
125* Nx It is a local portion of N owned by a process, where x is
126* replaced by either p (=NPROW) or q (=NPCOL)). The value is
127* determined by N, NB, NZ, x, and MI, where NB is a block size,
128* NZ is a offset from the beginning of the block, and MI is a
129* row or column position in a process template. Nx is equal
130* to or less than Nx0 = CEIL( N+NZ, NB*x ) * NB.
131*
132* Communication Scheme
133* ====================
134*
135* The communication scheme of the routine is set to '1-tree', which is
136* fan-out. (For details, see BLACS user's guide.)
137*
138* Memory Requirement of WORK
139* ==========================
140*
141* NN = N + NZ
142* Npb = CEIL( NN, NB*NPROW )
143* Nqb = CEIL( NN, NB*NPCOL )
144* LCMP = LCM / NPROW
145* LCMQ = LCM / NPCOL
146*
147* (1) XDIST = 'C'
148* (a) IXCOL != -1
149* Size(WORK) = CEIL(Nqb,LCMQ)*NB
150* (b) IXCOL = -1
151* Size(WORK) = CEIL(Nqb,LCMQ)*NB * MIN(LCMQ,CEIL(NN,NB))
152*
153* (2) XDIST = 'R'
154* (a) IXROW != -1
155* Size(WORK) = CEIL(Npb,LCMP)*NB
156* (b) IXROW = -1
157* Size(WORK) = CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(NN,NB))
158*
159* Notes
160* -----
161* More precise space can be computed as
162*
163* CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(NN,NB,0,0,NPROW), NB, 0, 0, LCMP)
164* CEIL(Nqb,LCMQ)*NB => NUMROC( NUMROC(NN,NB,0,0,NPCOL), NB, 0, 0, LCMQ)
165*
166* =====================================================================
167*
168* .. Parameters ..
169 DOUBLE PRECISION ONE, ZERO
170 parameter( one = 1.0d+0, zero = 0.0d+0 )
171* ..
172* .. Local Scalars ..
173 LOGICAL COLFORM, ROWFORM
174 INTEGER I, IDEX, IGD, INFO, JDEX, JYCOL, JYROW, JZ, KZ,
175 $ LCM, LCMP, LCMQ, MCCOL, MCROW, MRCOL, MRROW,
176 $ MYCOL, MYROW, NN, NP, NP0, NP1, NPCOL, NPROW,
177 $ NQ, NQ0, NQ1
178 DOUBLE PRECISION TBETA
179* ..
180* .. External Functions ..
181 LOGICAL LSAME
182 INTEGER ILCM, ICEIL, NUMROC
183 EXTERNAL lsame, ilcm, iceil, numroc
184* ..
185* .. External Subroutines ..
186 EXTERNAL blacs_gridinfo, dgebr2d, dgebs2d, dgerv2d,
187 $ dgesd2d, pbdtr2a1, pbdtr2b1, pbdtrget,
189* ..
190* .. Intrinsic Functions ..
191 INTRINSIC max, min, mod
192* ..
193* .. Executable Statements ..
194*
195* Quick return if possible.
196*
197 IF( n.EQ.0 ) RETURN
198*
199 CALL blacs_gridinfo( icontxt, nprow, npcol, myrow, mycol )
200*
201 colform = lsame( xdist, 'C' )
202 rowform = lsame( xdist, 'R' )
203*
204* Test the input parameters.
205*
206 info = 0
207 IF( ( .NOT.colform ) .AND. ( .NOT.rowform ) ) THEN
208 info = 2
209 ELSE IF( n .LT.0 ) THEN
210 info = 4
211 ELSE IF( nb .LT.1 ) THEN
212 info = 5
213 ELSE IF( nz .LT.0 .OR. nz.GE.nb ) THEN
214 info = 6
215 ELSE IF( incx.EQ.0 ) THEN
216 info = 8
217 ELSE IF( incy.EQ.0 ) THEN
218 info = 11
219 ELSE IF( ixrow.LT.-1 .OR. ixrow.GE.nprow .OR.
220 $ ( ixrow.EQ.-1 .AND. colform ) ) THEN
221 info = 12
222 ELSE IF( ixcol.LT.-1 .OR. ixcol.GE.npcol .OR.
223 $ ( ixcol.EQ.-1 .AND. rowform ) ) THEN
224 info = 13
225 ELSE IF( iyrow.LT.-1 .OR. iyrow.GE.nprow .OR.
226 $ ( iyrow.EQ.-1 .AND. rowform ) ) THEN
227 info = 14
228 ELSE IF( iycol.LT.-1 .OR. iycol.GE.npcol .OR.
229 $ ( iycol.EQ.-1 .AND. colform ) ) THEN
230 info = 15
231 END IF
232*
233 10 CONTINUE
234 IF( info.NE.0 ) THEN
235 CALL pxerbla( icontxt, 'PBDTRNV ', info )
236 RETURN
237 END IF
238*
239* Start the operations.
240*
241* LCM : the least common multiple of NPROW and NPCOL
242*
243 lcm = ilcm( nprow, npcol )
244 lcmp = lcm / nprow
245 lcmq = lcm / npcol
246 igd = npcol / lcmp
247 nn = n + nz
248*
249* When x is a column vector
250*
251 IF( colform ) THEN
252*
253* Form y <== x' ( x is a column vector )
254*
255* ||
256* ||
257* _____________ ||
258* -----(y)----- <== (x)
259* ||
260* ||
261* ||
262*
263 IF( ixrow.LT.0 .OR. ixrow.GE.nprow ) THEN
264 info = 12
265 ELSE IF( ixcol.LT.-1 .OR. ixcol.GE.npcol ) THEN
266 info = 13
267 ELSE IF( iyrow.LT.-1 .OR. iyrow.GE.nprow ) THEN
268 info = 14
269 ELSE IF( iycol.LT.0 .OR. iycol.GE.npcol ) THEN
270 info = 15
271 END IF
272 IF( info.NE.0 ) GO TO 10
273*
274* MRROW : row relative position in template from IXROW
275* MRCOL : column relative position in template from IYCOL
276*
277 mrrow = mod( nprow+myrow-ixrow, nprow )
278 mrcol = mod( npcol+mycol-iycol, npcol )
279 jyrow = iyrow
280 IF( iyrow.EQ.-1 ) jyrow = ixrow
281*
282 np = numroc( nn, nb, myrow, ixrow, nprow )
283 IF( mrrow.EQ.0 ) np = np - nz
284 nq = numroc( nn, nb, mycol, iycol, npcol )
285 IF( mrcol.EQ.0 ) nq = nq - nz
286 nq0 = numroc( numroc(nn, nb, 0, 0, npcol), nb, 0, 0, lcmq )
287*
288* When a column process of IXCOL has a column block A,
289*
290 IF( ixcol .GE. 0 ) THEN
291 tbeta = zero
292 IF( myrow.EQ.jyrow ) tbeta = beta
293 kz = nz
294*
295 DO 20 i = 0, min( lcm, iceil(nn,nb) ) - 1
296 mcrow = mod( mod(i, nprow) + ixrow, nprow )
297 mccol = mod( mod(i, npcol) + iycol, npcol )
298 IF( lcmq.EQ.1 ) nq0 = numroc( nn, nb, i, 0, npcol )
299 jdex = (i/npcol) * nb
300 IF( mrcol.EQ.0 ) jdex = max(0, jdex-nz)
301*
302* A source node copies the blocks to WORK, and send it
303*
304 IF( myrow.EQ.mcrow .AND. mycol.EQ.ixcol ) THEN
305*
306* The source node is a destination node
307*
308 idex = (i/nprow) * nb
309 IF( mrrow.EQ.0 ) idex = max( 0, idex-nz )
310 IF( myrow.EQ.jyrow .AND. mycol.EQ.mccol ) THEN
311 CALL pbdtr2b1( icontxt, trans, np-idex, nb, kz,
312 $ x(idex*incx+1), incx, tbeta,
313 $ y(jdex*incy+1), incy, lcmp, lcmq )
314*
315* The source node sends blocks to a destination node
316*
317 ELSE
318 CALL pbdtr2b1( icontxt, trans, np-idex, nb, kz,
319 $ x(idex*incx+1), incx, zero, work, 1,
320 $ lcmp, 1 )
321 CALL dgesd2d( icontxt, 1, nq0-kz, work, 1,
322 $ jyrow, mccol )
323 END IF
324*
325* A destination node receives the copied vector
326*
327 ELSE IF( myrow.EQ.jyrow .AND. mycol.EQ.mccol ) THEN
328 IF( lcmq.EQ.1 .AND. tbeta.EQ.zero ) THEN
329 CALL dgerv2d( icontxt, 1, nq0-kz, y, incy,
330 $ mcrow, ixcol )
331 ELSE
332 CALL dgerv2d( icontxt, 1, nq0-kz, work, 1,
333 $ mcrow, ixcol )
334 CALL pbdtr2a1( icontxt, nq-jdex, nb, kz, work, 1, tbeta,
335 $ y(jdex*incy+1), incy, lcmq*nb )
336 END IF
337 END IF
338 kz = 0
339 20 CONTINUE
340*
341* Broadcast a row block of WORK in each column of template
342*
343 IF( iyrow.EQ.-1 ) THEN
344 IF( myrow.EQ.jyrow ) THEN
345 CALL dgebs2d( icontxt, 'Col', '1-tree', 1, nq, y, incy )
346 ELSE
347 CALL dgebr2d( icontxt, 'Col', '1-tree', 1, nq, y, incy,
348 $ jyrow, mycol )
349 END IF
350 END IF
351*
352* When all column procesors have a copy of the column block A,
353*
354 ELSE
355 IF( lcmq.EQ.1 ) nq0 = nq
356*
357* Processors, which have diagonal blocks of X, copy them to
358* WORK array in transposed form
359*
360 kz = 0
361 IF( mrrow.EQ.0 ) kz = nz
362 jz = 0
363 IF( mrrow.EQ.0 .AND. mycol.EQ.iycol ) jz = nz
364*
365 DO 30 i = 0, lcmp - 1
366 IF( mrcol.EQ.mod(nprow*i+mrrow, npcol) ) THEN
367 idex = max( 0, i*nb-kz )
368 IF( lcmq.EQ.1 .AND. (iyrow.EQ.-1.OR.iyrow.EQ.myrow) ) THEN
369 CALL pbdtr2b1( icontxt, trans, np-idex, nb, jz,
370 $ x(idex*incx+1), incx, beta, y, incy,
371 $ lcmp, 1 )
372 ELSE
373 CALL pbdtr2b1( icontxt, trans, np-idex, nb, jz,
374 $ x(idex*incx+1), incx, zero, work, 1,
375 $ lcmp, 1 )
376 END IF
377 END IF
378 30 CONTINUE
379*
380* Get diagonal blocks of A for each column of the template
381*
382 mcrow = mod( mod(mrcol, nprow) + ixrow, nprow )
383 IF( lcmq.GT.1 ) THEN
384 mccol = mod( npcol+mycol-iycol, npcol )
385 CALL pbdtrget( icontxt, 'Row', 1, nq0, iceil( nn, nb ),
386 $ work, 1, mcrow, mccol, igd, myrow, mycol,
387 $ nprow, npcol )
388 END IF
389*
390* Broadcast a row block of WORK in every row of template
391*
392 IF( iyrow.EQ.-1 ) THEN
393 IF( myrow.EQ.mcrow ) THEN
394 IF( lcmq.GT.1 ) THEN
395 kz = 0
396 IF( mycol.EQ.iycol ) kz = nz
397 CALL pbdtrst1( icontxt, 'Row', nq, nb, kz, work, 1,
398 $ beta, y, incy, lcmp, lcmq, nq0 )
399 END IF
400 CALL dgebs2d( icontxt, 'Col', '1-tree', 1, nq, y, incy )
401 ELSE
402 CALL dgebr2d( icontxt, 'Col', '1-tree', 1, nq, y, incy,
403 $ mcrow, mycol )
404 END IF
405*
406* Send a row block of WORK to the destination row
407*
408 ELSE
409 IF( lcmq.EQ.1 ) THEN
410 IF( myrow.EQ.mcrow ) THEN
411 IF( myrow.NE.iyrow )
412 $ CALL dgesd2d( icontxt, 1, nq0, work, 1, iyrow, mycol )
413 ELSE IF( myrow.EQ.iyrow ) THEN
414 IF( beta.EQ.zero ) THEN
415 CALL dgerv2d( icontxt, 1, nq0, y, incy, mcrow, mycol )
416 ELSE
417 CALL dgerv2d( icontxt, 1, nq0, work, 1, mcrow, mycol )
418 CALL pbdvecadd( icontxt, 'G', nq0, one, work, 1,
419 $ beta, y, incy )
420 END IF
421 END IF
422*
423 ELSE
424 nq1 = nq0 * min( lcmq, max( 0, iceil(nn,nb)-mccol ) )
425 IF( myrow.EQ.mcrow ) THEN
426 IF( myrow.NE.iyrow )
427 $ CALL dgesd2d( icontxt, 1, nq1, work, 1, iyrow, mycol )
428 ELSE IF( myrow.EQ.iyrow ) THEN
429 CALL dgerv2d( icontxt, 1, nq1, work, 1, mcrow, mycol )
430 END IF
431*
432 IF( myrow.EQ.iyrow ) THEN
433 kz = 0
434 IF( mycol.EQ.iycol ) kz = nz
435 CALL pbdtrst1( icontxt, 'Row', nq, nb, kz, work, 1,
436 $ beta, y, incy, lcmp, lcmq, nq0 )
437 END IF
438 END IF
439 END IF
440 END IF
441*
442* When x is a row vector
443*
444 ELSE
445*
446* Form y <== x' ( x is a row block )
447*
448* ||
449* ||
450* || _____________
451* (y) <== -----(x)-----
452* ||
453* ||
454* ||
455*
456 IF( ixrow.LT.-1 .OR. ixrow.GE.nprow ) THEN
457 info = 12
458 ELSE IF( ixcol.LT.0 .OR. ixcol.GE.npcol ) THEN
459 info = 13
460 ELSE IF( iyrow.LT.0 .OR. iyrow.GE.nprow ) THEN
461 info = 14
462 ELSE IF( iycol.LT.-1 .OR. iycol.GE.npcol ) THEN
463 info = 15
464 END IF
465 IF( info.NE.0 ) GO TO 10
466*
467* MRROW : row relative position in template from IYROW
468* MRCOL : column relative position in template from IXCOL
469*
470 mrrow = mod( nprow+myrow-iyrow, nprow )
471 mrcol = mod( npcol+mycol-ixcol, npcol )
472 jycol = iycol
473 IF( iycol.EQ.-1 ) jycol = ixcol
474*
475 np = numroc( nn, nb, myrow, iyrow, nprow )
476 IF( mrrow.EQ.0 ) np = np - nz
477 nq = numroc( nn, nb, mycol, ixcol, npcol )
478 IF( mrcol.EQ.0 ) nq = nq - nz
479 np0 = numroc( numroc(nn, nb, 0, 0, nprow), nb, 0, 0, lcmp )
480*
481* When a row process of IXROW has a row block A,
482*
483 IF( ixrow .GE. 0 ) THEN
484 tbeta = zero
485 IF( mycol.EQ.jycol ) tbeta = beta
486 kz = nz
487*
488 DO 40 i = 0, min( lcm, iceil(nn,nb) ) - 1
489 mcrow = mod( mod(i, nprow) + iyrow, nprow )
490 mccol = mod( mod(i, npcol) + ixcol, npcol )
491 IF( lcmp.EQ.1 ) np0 = numroc( nn, nb, i, 0, nprow )
492 jdex = (i/nprow) * nb
493 IF( mrrow.EQ.0 ) jdex = max(0, jdex-nz)
494*
495* A source node copies the blocks to WORK, and send it
496*
497 IF( myrow.EQ.ixrow .AND. mycol.EQ.mccol ) THEN
498*
499* The source node is a destination node
500*
501 idex = (i/npcol) * nb
502 IF( mrcol.EQ.0 ) idex = max( 0, idex-nz )
503 IF( myrow.EQ.mcrow .AND. mycol.EQ.jycol ) THEN
504 CALL pbdtr2b1( icontxt, trans, nq-idex, nb, kz,
505 $ x(idex*incx+1), incx, tbeta,
506 $ y(jdex*incy+1), incy, lcmq, lcmp )
507*
508* The source node sends blocks to a destination node
509*
510 ELSE
511 CALL pbdtr2b1( icontxt, trans, nq-idex, nb, kz,
512 $ x(idex*incx+1), incx, zero, work, 1,
513 $ lcmq, 1 )
514 CALL dgesd2d( icontxt, 1, np0-kz, work, 1,
515 $ mcrow, jycol )
516 END IF
517*
518* A destination node receives the copied blocks
519*
520 ELSE IF( myrow.EQ.mcrow .AND. mycol.EQ.jycol ) THEN
521 IF( lcmp.EQ.1 .AND. tbeta.EQ.zero ) THEN
522 CALL dgerv2d( icontxt, 1, np0-kz, y, incy,
523 $ ixrow, mccol )
524 ELSE
525 CALL dgerv2d( icontxt, 1, np0-kz, work, 1,
526 $ ixrow, mccol )
527 CALL pbdtr2a1( icontxt, np-jdex, nb, kz, work, 1, tbeta,
528 $ y(jdex*incy+1), incy, lcmp*nb )
529 END IF
530 END IF
531 kz = 0
532 40 CONTINUE
533*
534* Broadcast a column vector Y in each row of template
535*
536 IF( iycol.EQ.-1 ) THEN
537 IF( mycol.EQ.jycol ) THEN
538 CALL dgebs2d( icontxt, 'Row', '1-tree', 1, np, y, incy )
539 ELSE
540 CALL dgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,
541 $ myrow, jycol )
542 END IF
543 END IF
544*
545* When all row procesors have a copy of the row block A,
546*
547 ELSE
548 IF( lcmp.EQ.1 ) np0 = np
549*
550* Processors, which have diagonal blocks of A, copy them to
551* WORK array in transposed form
552*
553 kz = 0
554 IF( mrcol.EQ.0 ) kz = nz
555 jz = 0
556 IF( mrcol.EQ.0 .AND. myrow.EQ.iyrow ) jz = nz
557*
558 DO 50 i = 0, lcmq-1
559 IF( mrrow.EQ.mod(npcol*i+mrcol, nprow) ) THEN
560 idex = max( 0, i*nb-kz )
561 IF( lcmp.EQ.1 .AND. (iycol.EQ.-1.OR.iycol.EQ.mycol) ) THEN
562 CALL pbdtr2b1( icontxt, trans, nq-idex, nb, jz,
563 $ x(idex*incx+1), incx, beta, y, incy,
564 $ lcmq, 1 )
565 ELSE
566 CALL pbdtr2b1( icontxt, trans, nq-idex, nb, jz,
567 $ x(idex*incx+1), incx, zero, work, 1,
568 $ lcmq, 1 )
569 END IF
570 END IF
571 50 CONTINUE
572*
573* Get diagonal blocks of A for each row of the template
574*
575 mccol = mod( mod(mrrow, npcol) + ixcol, npcol )
576 IF( lcmp.GT.1 ) THEN
577 mcrow = mod( nprow+myrow-iyrow, nprow )
578 CALL pbdtrget( icontxt, 'Col', 1, np0, iceil( nn, nb ),
579 $ work, 1, mcrow, mccol, igd, myrow, mycol,
580 $ nprow, npcol )
581 END IF
582*
583* Broadcast a column block of WORK in every column of template
584*
585 IF( iycol.EQ.-1 ) THEN
586 IF( mycol.EQ.mccol ) THEN
587 IF( lcmp.GT.1 ) THEN
588 kz = 0
589 IF( myrow.EQ.iyrow ) kz = nz
590 CALL pbdtrst1( icontxt, 'Col', np, nb, kz, work, 1,
591 $ beta, y, incy, lcmp, lcmq, np0 )
592 END IF
593 CALL dgebs2d( icontxt, 'Row', '1-tree', 1, np, y, incy )
594 ELSE
595 CALL dgebr2d( icontxt, 'Row', '1-tree', 1, np, y, incy,
596 $ myrow, mccol )
597 END IF
598*
599* Send a column block of WORK to the destination column
600*
601 ELSE
602 IF( lcmp.EQ.1 ) THEN
603 IF( mycol.EQ.mccol ) THEN
604 IF( mycol.NE.iycol )
605 $ CALL dgesd2d( icontxt, 1, np, work, 1, myrow, iycol )
606 ELSE IF( mycol.EQ.iycol ) THEN
607 IF( beta.EQ.zero ) THEN
608 CALL dgerv2d( icontxt, 1, np, y, incy, myrow, mccol )
609 ELSE
610 CALL dgerv2d( icontxt, 1, np, work, 1, myrow, mccol )
611 CALL pbdvecadd( icontxt, 'G', np, one, work, 1, beta,
612 $ y, incy )
613 END IF
614 END IF
615*
616 ELSE
617 np1 = np0 * min( lcmp, max( 0, iceil(nn,nb)-mcrow ) )
618 IF( mycol.EQ.mccol ) THEN
619 IF( mycol.NE.iycol )
620 $ CALL dgesd2d( icontxt, 1, np1, work, 1, myrow, iycol )
621 ELSE IF( mycol.EQ.iycol ) THEN
622 CALL dgerv2d( icontxt, 1, np1, work, 1, myrow, mccol )
623 END IF
624*
625 IF( mycol.EQ.iycol ) THEN
626 kz = 0
627 IF( myrow.EQ.iyrow ) kz = nz
628 CALL pbdtrst1( icontxt, 'Col', np, nb, kz, work, 1,
629 $ beta, y, incy, lcmp, lcmq, np0 )
630 END IF
631 END IF
632 END IF
633 END IF
634 END IF
635*
636 RETURN
637*
638* End of PBDTRNV
639*
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
integer function ilcm(m, n)
Definition ilcm.f:2
#define max(a, b)
Definition macros.h:21
subroutine dgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1082
subroutine pxerbla(contxt, srname, info)
Definition mpi.f:1600
subroutine dgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1123
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pbdtrget(icontxt, adist, m, n, mnb, a, lda, mcrow, mccol, igd, myrow, mycol, nprow, npcol)
Definition pbdtrget.f:3
subroutine pbdtr2a1(icontxt, n, nb, nz, x, incx, beta, y, incy, intv)
Definition pbdtrnv.f:648
subroutine pbdtr2b1(icontxt, trans, n, nb, nz, x, incx, beta, y, incy, jinx, jiny)
Definition pbdtrnv.f:720
subroutine pbdtrst1(icontxt, xdist, n, nb, nz, x, incx, beta, y, incy, lcmp, lcmq, nint)
Definition pbdtrst1.f:3