OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pdlarfb.f File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine pdlarfb (side, trans, direct, storev, m, n, k, v, iv, jv, descv, t, c, ic, jc, descc, work)

Function/Subroutine Documentation

◆ pdlarfb()

subroutine pdlarfb ( character side,
character trans,
character direct,
character storev,
integer m,
integer n,
integer k,
double precision, dimension( * ) v,
integer iv,
integer jv,
integer, dimension( * ) descv,
double precision, dimension( * ) t,
double precision, dimension( * ) c,
integer ic,
integer jc,
integer, dimension( * ) descc,
double precision, dimension( * ) work )

Definition at line 1 of file pdlarfb.f.

3*
4* -- ScaLAPACK auxiliary routine (version 2.0.2) --
5* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6* May 1 2012
7*
8* .. Scalar Arguments ..
9 CHARACTER SIDE, TRANS, DIRECT, STOREV
10 INTEGER IC, IV, JC, JV, K, M, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCC( * ), DESCV( * )
14 DOUBLE PRECISION C( * ), T( * ), V( * ), WORK( * )
15* ..
16*
17* Purpose
18* =======
19*
20* PDLARFB applies a real block reflector Q or its transpose Q**T to a
21* real distributed M-by-N matrix sub( C ) = C(IC:IC+M-1,JC:JC+N-1)
22* from the left or the right.
23*
24* Notes
25* =====
26*
27* Each global data object is described by an associated description
28* vector. This vector stores the information required to establish
29* the mapping between an object element and its corresponding process
30* and memory location.
31*
32* Let A be a generic term for any 2D block cyclicly distributed array.
33* Such a global array has an associated description vector DESCA.
34* In the following comments, the character _ should be read as
35* "of the global array".
36*
37* NOTATION STORED IN EXPLANATION
38* --------------- -------------- --------------------------------------
39* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
40* DTYPE_A = 1.
41* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
42* the BLACS process grid A is distribu-
43* ted over. The context itself is glo-
44* bal, but the handle (the integer
45* value) may vary.
46* M_A (global) DESCA( M_ ) The number of rows in the global
47* array A.
48* N_A (global) DESCA( N_ ) The number of columns in the global
49* array A.
50* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
51* the rows of the array.
52* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
53* the columns of the array.
54* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
55* row of the array A is distributed.
56* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
57* first column of the array A is
58* distributed.
59* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
60* array. LLD_A >= MAX(1,LOCr(M_A)).
61*
62* Let K be the number of rows or columns of a distributed matrix,
63* and assume that its process grid has dimension p x q.
64* LOCr( K ) denotes the number of elements of K that a process
65* would receive if K were distributed over the p processes of its
66* process column.
67* Similarly, LOCc( K ) denotes the number of elements of K that a
68* process would receive if K were distributed over the q processes of
69* its process row.
70* The values of LOCr() and LOCc() may be determined via a call to the
71* ScaLAPACK tool function, NUMROC:
72* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
73* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
74* An upper bound for these quantities may be computed by:
75* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
76* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
77*
78* Arguments
79* =========
80*
81* SIDE (global input) CHARACTER
82* = 'L': apply Q or Q**T from the Left;
83* = 'R': apply Q or Q**T from the Right.
84*
85* TRANS (global input) CHARACTER
86* = 'N': No transpose, apply Q;
87* = 'T': Transpose, apply Q**T.
88*
89* DIRECT (global input) CHARACTER
90* Indicates how Q is formed from a product of elementary
91* reflectors
92* = 'F': Q = H(1) H(2) . . . H(k) (Forward)
93* = 'B': Q = H(k) . . . H(2) H(1) (Backward)
94*
95* STOREV (global input) CHARACTER
96* Indicates how the vectors which define the elementary
97* reflectors are stored:
98* = 'C': Columnwise
99* = 'R': Rowwise
100*
101* M (global input) INTEGER
102* The number of rows to be operated on i.e the number of rows
103* of the distributed submatrix sub( C ). M >= 0.
104*
105* N (global input) INTEGER
106* The number of columns to be operated on i.e the number of
107* columns of the distributed submatrix sub( C ). N >= 0.
108*
109* K (global input) INTEGER
110* The order of the matrix T (= the number of elementary
111* reflectors whose product defines the block reflector).
112*
113* V (local input) DOUBLE PRECISION pointer into the local memory
114* to an array of dimension ( LLD_V, LOCc(JV+K-1) ) if
115* STOREV = 'C', ( LLD_V, LOCc(JV+M-1)) if STOREV = 'R' and
116* SIDE = 'L', ( LLD_V, LOCc(JV+N-1) ) if STOREV = 'R' and
117* SIDE = 'R'. It contains the local pieces of the distributed
118* vectors V representing the Householder transformation.
119* See further details.
120* If STOREV = 'C' and SIDE = 'L', LLD_V >= MAX(1,LOCr(IV+M-1));
121* if STOREV = 'C' and SIDE = 'R', LLD_V >= MAX(1,LOCr(IV+N-1));
122* if STOREV = 'R', LLD_V >= LOCr(IV+K-1).
123*
124* IV (global input) INTEGER
125* The row index in the global array V indicating the first
126* row of sub( V ).
127*
128* JV (global input) INTEGER
129* The column index in the global array V indicating the
130* first column of sub( V ).
131*
132* DESCV (global and local input) INTEGER array of dimension DLEN_.
133* The array descriptor for the distributed matrix V.
134*
135* T (local input) DOUBLE PRECISION array, dimension MB_V by MB_V
136* if STOREV = 'R' and NB_V by NB_V if STOREV = 'C'. The trian-
137* gular matrix T in the representation of the block reflector.
138*
139* C (local input/local output) DOUBLE PRECISION pointer into the
140* local memory to an array of dimension (LLD_C,LOCc(JC+N-1)).
141* On entry, the M-by-N distributed matrix sub( C ). On exit,
142* sub( C ) is overwritten by Q*sub( C ) or Q'*sub( C ) or
143* sub( C )*Q or sub( C )*Q'.
144*
145* IC (global input) INTEGER
146* The row index in the global array C indicating the first
147* row of sub( C ).
148*
149* JC (global input) INTEGER
150* The column index in the global array C indicating the
151* first column of sub( C ).
152*
153* DESCC (global and local input) INTEGER array of dimension DLEN_.
154* The array descriptor for the distributed matrix C.
155*
156* WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK)
157* If STOREV = 'C',
158* if SIDE = 'L',
159* LWORK >= ( NqC0 + MpC0 ) * K
160* else if SIDE = 'R',
161* LWORK >= ( NqC0 + MAX( NpV0 + NUMROC( NUMROC( N+ICOFFC,
162* NB_V, 0, 0, NPCOL ), NB_V, 0, 0, LCMQ ),
163* MpC0 ) ) * K
164* end if
165* else if STOREV = 'R',
166* if SIDE = 'L',
167* LWORK >= ( MpC0 + MAX( MqV0 + NUMROC( NUMROC( M+IROFFC,
168* MB_V, 0, 0, NPROW ), MB_V, 0, 0, LCMP ),
169* NqC0 ) ) * K
170* else if SIDE = 'R',
171* LWORK >= ( MpC0 + NqC0 ) * K
172* end if
173* end if
174*
175* where LCMQ = LCM / NPCOL with LCM = ICLM( NPROW, NPCOL ),
176*
177* IROFFV = MOD( IV-1, MB_V ), ICOFFV = MOD( JV-1, NB_V ),
178* IVROW = INDXG2P( IV, MB_V, MYROW, RSRC_V, NPROW ),
179* IVCOL = INDXG2P( JV, NB_V, MYCOL, CSRC_V, NPCOL ),
180* MqV0 = NUMROC( M+ICOFFV, NB_V, MYCOL, IVCOL, NPCOL ),
181* NpV0 = NUMROC( N+IROFFV, MB_V, MYROW, IVROW, NPROW ),
182*
183* IROFFC = MOD( IC-1, MB_C ), ICOFFC = MOD( JC-1, NB_C ),
184* ICROW = INDXG2P( IC, MB_C, MYROW, RSRC_C, NPROW ),
185* ICCOL = INDXG2P( JC, NB_C, MYCOL, CSRC_C, NPCOL ),
186* MpC0 = NUMROC( M+IROFFC, MB_C, MYROW, ICROW, NPROW ),
187* NpC0 = NUMROC( N+ICOFFC, MB_C, MYROW, ICROW, NPROW ),
188* NqC0 = NUMROC( N+ICOFFC, NB_C, MYCOL, ICCOL, NPCOL ),
189*
190* ILCM, INDXG2P and NUMROC are ScaLAPACK tool functions;
191* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
192* the subroutine BLACS_GRIDINFO.
193*
194* Alignment requirements
195* ======================
196*
197* The distributed submatrices V(IV:*, JV:*) and C(IC:IC+M-1,JC:JC+N-1)
198* must verify some alignment properties, namely the following
199* expressions should be true:
200*
201* If STOREV = 'Columnwise'
202* If SIDE = 'Left',
203* ( MB_V.EQ.MB_C .AND. IROFFV.EQ.IROFFC .AND. IVROW.EQ.ICROW )
204* If SIDE = 'Right',
205* ( MB_V.EQ.NB_C .AND. IROFFV.EQ.ICOFFC )
206* else if STOREV = 'Rowwise'
207* If SIDE = 'Left',
208* ( NB_V.EQ.MB_C .AND. ICOFFV.EQ.IROFFC )
209* If SIDE = 'Right',
210* ( NB_V.EQ.NB_C .AND. ICOFFV.EQ.ICOFFC .AND. IVCOL.EQ.ICCOL )
211* end if
212*
213* =====================================================================
214*
215* .. Parameters ..
216 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
217 $ LLD_, MB_, M_, NB_, N_, RSRC_
218 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
219 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
220 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
221 DOUBLE PRECISION ONE, ZERO
222 parameter( one = 1.0d+0, zero = 0.0d+0 )
223* ..
224* .. Local Scalars ..
225 LOGICAL FORWARD
226 CHARACTER COLBTOP, ROWBTOP, TRANST, UPLO
227 INTEGER HEIGHT, IBASE, ICCOL, ICOFFC, ICOFFV, ICROW,
228 $ ICTXT, II, IIBEG, IIC, IIEND, IINXT, IIV,
229 $ ILASTCOL, ILASTROW, ILEFT, IOFF, IOFFC, IOFFV,
230 $ IPT, IPV, IPW, IPW1, IRIGHT, IROFFC, IROFFV,
231 $ ITOP, IVCOL, IVROW, JJ, JJBEG, JJC, JJEND,
232 $ JJNXT, JJV, KP, KQ, LDC, LDV, LV, LW, MBV, MPC,
233 $ MPC0, MQV, MQV0, MYCOL, MYDIST, MYROW, NBV,
234 $ NPV, NPV0, NPCOL, NPROW, NQC, NQC0, WIDE
235* ..
236* .. External Subroutines ..
238 $ dgsum2d, dlamov, dlaset, dtrbr2d,
239 $ dtrbs2d, dtrmm, infog1l, infog2l, pb_topget,
240 $ pbdtran
241* ..
242* .. Intrinsic Functions ..
243 INTRINSIC max, min, mod
244* ..
245* .. External Functions ..
246 LOGICAL LSAME
247 INTEGER ICEIL, NUMROC
248 EXTERNAL iceil, lsame, numroc
249* ..
250* .. Executable Statements ..
251*
252* Quick return if possible
253*
254 IF( m.LE.0 .OR. n.LE.0 .OR. k.LE.0 )
255 $ RETURN
256*
257* Get grid parameters
258*
259 ictxt = descc( ctxt_ )
260 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
261*
262 IF( lsame( trans, 'N' ) ) THEN
263 transt = 'T'
264 ELSE
265 transt = 'N'
266 END IF
267 forward = lsame( direct, 'F' )
268 IF( forward ) THEN
269 uplo = 'U'
270 ELSE
271 uplo = 'L'
272 END IF
273*
274 CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
275 $ ivrow, ivcol )
276 CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic, jjc,
277 $ icrow, iccol )
278 ldc = descc( lld_ )
279 ldv = descv( lld_ )
280 iic = min( iic, ldc )
281 iiv = min( iiv, ldv )
282 iroffc = mod( ic-1, descc( mb_ ) )
283 icoffc = mod( jc-1, descc( nb_ ) )
284 mbv = descv( mb_ )
285 nbv = descv( nb_ )
286 iroffv = mod( iv-1, mbv )
287 icoffv = mod( jv-1, nbv )
288 mpc = numroc( m+iroffc, descc( mb_ ), myrow, icrow, nprow )
289 nqc = numroc( n+icoffc, descc( nb_ ), mycol, iccol, npcol )
290 IF( mycol.EQ.iccol )
291 $ nqc = nqc - icoffc
292 IF( myrow.EQ.icrow )
293 $ mpc = mpc - iroffc
294 jjc = min( jjc, max( 1, jjc+nqc-1 ) )
295 jjv = min( jjv, max( 1, numroc( descv( n_ ), nbv, mycol,
296 $ descv( csrc_ ), npcol ) ) )
297 ioffc = iic + ( jjc-1 ) * ldc
298 ioffv = iiv + ( jjv-1 ) * ldv
299*
300 IF( lsame( storev, 'C' ) ) THEN
301*
302* V is stored columnwise
303*
304 IF( lsame( side, 'L' ) ) THEN
305*
306* Form Q*sub( C ) or Q'*sub( C )
307*
308* Locally V( IOFFV ) is MPV x K, C( IOFFC ) is MPC x NQC
309* WORK( IPV ) is MPC x K = V( IOFFV ), MPC = MPV
310* WORK( IPW ) is NQC x K = C( IOFFC )' * V( IOFFV )
311*
312 ipv = 1
313 ipw = ipv + mpc * k
314 lv = max( 1, mpc )
315 lw = max( 1, nqc )
316*
317* Broadcast V to the other process columns.
318*
319 CALL pb_topget( ictxt, 'broadcast', 'rowwise', ROWBTOP )
320.EQ. IF( MYCOLIVCOL ) THEN
321 CALL DGEBS2D( ICTXT, 'rowwise', ROWBTOP, MPC, K,
322 $ V( IOFFV ), LDV )
323.EQ. IF( MYROWIVROW )
324 $ CALL DTRBS2D( ICTXT, 'rowwise', ROWBTOP, UPLO,
325 $ 'non unit', K, K, T, NBV )
326 CALL DLAMOV( 'all', MPC, K, V( IOFFV ), LDV, WORK( IPV ),
327 $ LV )
328 ELSE
329 CALL DGEBR2D( ICTXT, 'rowwise', ROWBTOP, MPC, K,
330 $ WORK( IPV ), LV, MYROW, IVCOL )
331.EQ. IF( MYROWIVROW )
332 $ CALL DTRBR2D( ICTXT, 'rowwise', ROWBTOP, UPLO,
333 $ 'non unit', K, K, T, NBV, MYROW, IVCOL )
334 END IF
335*
336 IF( FORWARD ) THEN
337*
338* WORK(IPV) = ( V1 ) where V1 is unit lower triangular,
339* ( V2 ) zeroes upper triangular part of V1
340*
341 MYDIST = MOD( MYROW-IVROW+NPROW, NPROW )
342 ITOP = MAX( 0, MYDIST*MBV - IROFFV )
343 IIBEG = IIV
344 IIEND = IIBEG + MPC - 1
345 IINXT = MIN( ICEIL( IIBEG, MBV )*MBV, IIEND )
346*
347 10 CONTINUE
348.GT. IF( K-ITOP 0 ) THEN
349 CALL DLASET( 'upper', IINXT-IIBEG+1, K-ITOP, ZERO,
350 $ ONE, WORK( IPV+IIBEG-IIV+ITOP*LV ), LV )
351 MYDIST = MYDIST + NPROW
352 ITOP = MYDIST * MBV - IROFFV
353 IIBEG = IINXT + 1
354 IINXT = MIN( IINXT+MBV, IIEND )
355 GO TO 10
356 END IF
357*
358 ELSE
359*
360* WORK(IPV) = ( V1 ) where V2 is unit upper triangular,
361* ( V2 ) zeroes lower triangular part of V2
362*
363 JJ = JJV
364 IOFF = MOD( IV+M-K-1, MBV )
365 CALL INFOG1L( IV+M-K, MBV, NPROW, MYROW, DESCV( RSRC_ ),
366 $ II, ILASTROW )
367 KP = NUMROC( K+IOFF, MBV, MYROW, ILASTROW, NPROW )
368.EQ. IF( MYROWILASTROW )
369 $ KP = KP - IOFF
370 MYDIST = MOD( MYROW-ILASTROW+NPROW, NPROW )
371 ITOP = MYDIST * MBV - IOFF
372 IBASE = MIN( ITOP+MBV, K )
373 ITOP = MIN( MAX( 0, ITOP ), K )
374*
375 20 CONTINUE
376.LE. IF( JJ( JJV+K-1 ) ) THEN
377 HEIGHT = IBASE - ITOP
378 CALL DLASET( 'all', KP, ITOP-JJ+JJV, ZERO, ZERO,
379 $ WORK( IPV+II-IIV+(JJ-JJV)*LV ), LV )
380 CALL DLASET( 'lower', KP, HEIGHT, ZERO, ONE,
381 $ WORK( IPV+II-IIV+ITOP*LV ), LV )
382 KP = MAX( 0, KP - HEIGHT )
383 II = II + HEIGHT
384 JJ = JJV + IBASE
385 MYDIST = MYDIST + NPROW
386 ITOP = MYDIST * MBV - IOFF
387 IBASE = MIN( ITOP + MBV, K )
388 ITOP = MIN( ITOP, K )
389 GO TO 20
390 END IF
391*
392 END IF
393*
394* WORK( IPW ) = C( IOFFC )' * V (NQC x MPC x K) -> NQC x K
395*
396.GT. IF( MPC0 ) THEN
397 CALL DGEMM( 'transpose', 'no transpose', NQC, K, MPC,
398 $ ONE, C( IOFFC ), LDC, WORK( IPV ), LV, ZERO,
399 $ WORK( IPW ), LW )
400 ELSE
401 CALL DLASET( 'all', NQC, K, ZERO, ZERO, WORK( IPW ), LW )
402 END IF
403*
404 CALL DGSUM2D( ICTXT, 'columnwise', ' ', NQC, K, WORK( IPW ),
405 $ LW, IVROW, MYCOL )
406*
407.EQ. IF( MYROWIVROW ) THEN
408*
409* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
410*
411 CALL DTRMM( 'right', UPLO, TRANST, 'non unit', NQC, K,
412 $ ONE, T, NBV, WORK( IPW ), LW )
413 CALL DGEBS2D( ICTXT, 'columnwise', ' ', NQC, K,
414 $ WORK( IPW ), LW )
415 ELSE
416 CALL DGEBR2D( ICTXT, 'columnwise', ' ', NQC, K,
417 $ WORK( IPW ), LW, IVROW, MYCOL )
418 END IF
419*
420* C C - V * W'
421* C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )'
422* MPC x NQC MPC x K K x NQC
423*
424 CALL DGEMM( 'no transpose', 'transpose', MPC, NQC, K, -ONE,
425 $ WORK( IPV ), LV, WORK( IPW ), LW, ONE,
426 $ C( IOFFC ), LDC )
427*
428 ELSE
429*
430* Form sub( C )*Q or sub( C )*Q'
431*
432* ICOFFC = IROFFV is required by the current transposition
433* routine PBDTRAN
434*
435 NPV0 = NUMROC( N+IROFFV, MBV, MYROW, IVROW, NPROW )
436.EQ. IF( MYROWIVROW ) THEN
437 NPV = NPV0 - IROFFV
438 ELSE
439 NPV = NPV0
440 END IF
441.EQ. IF( MYCOLICCOL ) THEN
442 NQC0 = NQC + ICOFFC
443 ELSE
444 NQC0 = NQC
445 END IF
446*
447* Locally V( IOFFV ) is NPV x K C( IOFFC ) is MPC x NQC
448* WORK( IPV ) is K x NQC0 = [ . V( IOFFV ) ]'
449* WORK( IPW ) is NPV0 x K = [ . V( IOFFV )' ]'
450* WORK( IPT ) is the workspace for PBDTRAN
451*
452 IPV = 1
453 IPW = IPV + K * NQC0
454 IPT = IPW + NPV0 * K
455 LV = MAX( 1, K )
456 LW = MAX( 1, NPV0 )
457*
458.EQ. IF( MYCOLIVCOL ) THEN
459.EQ. IF( MYROWIVROW ) THEN
460 CALL DLASET( 'all', IROFFV, K, ZERO, ZERO,
461 $ WORK( IPW ), LW )
462 IPW1 = IPW + IROFFV
463 CALL DLAMOV( 'all', NPV, K, V( IOFFV ), LDV,
464 $ WORK( IPW1 ), LW )
465 ELSE
466 IPW1 = IPW
467 CALL DLAMOV( 'all', NPV, K, V( IOFFV ), LDV,
468 $ WORK( IPW1 ), LW )
469 END IF
470*
471 IF( FORWARD ) THEN
472*
473* WORK(IPW) = ( . V1' V2' )' where V1 is unit lower
474* triangular, zeroes upper triangular part of V1
475*
476 MYDIST = MOD( MYROW-IVROW+NPROW, NPROW )
477 ITOP = MAX( 0, MYDIST*MBV - IROFFV )
478 IIBEG = IIV
479 IIEND = IIBEG + NPV - 1
480 IINXT = MIN( ICEIL( IIBEG, MBV )*MBV, IIEND )
481*
482 30 CONTINUE
483.GT. IF( ( K-ITOP )0 ) THEN
484 CALL DLASET( 'upper', IINXT-IIBEG+1, K-ITOP, ZERO,
485 $ ONE, WORK( IPW1+IIBEG-IIV+ITOP*LW ),
486 $ LW )
487 MYDIST = MYDIST + NPROW
488 ITOP = MYDIST * MBV - IROFFV
489 IIBEG = IINXT + 1
490 IINXT = MIN( IINXT+MBV, IIEND )
491 GO TO 30
492 END IF
493*
494 ELSE
495*
496* WORK( IPW ) = ( . V1' V2' )' where V2 is unit upper
497* triangular, zeroes lower triangular part of V2.
498*
499 JJ = JJV
500 CALL INFOG1L( IV+N-K, MBV, NPROW, MYROW,
501 $ DESCV( RSRC_ ), II, ILASTROW )
502 IOFF = MOD( IV+N-K-1, MBV )
503 KP = NUMROC( K+IOFF, MBV, MYROW, ILASTROW, NPROW )
504.EQ. IF( MYROWILASTROW )
505 $ KP = KP - IOFF
506 MYDIST = MOD( MYROW-ILASTROW+NPROW, NPROW )
507 ITOP = MYDIST * MBV - IOFF
508 IBASE = MIN( ITOP+MBV, K )
509 ITOP = MIN( MAX( 0, ITOP ), K )
510*
511 40 CONTINUE
512.LE. IF( JJ( JJV+K-1 ) ) THEN
513 HEIGHT = IBASE - ITOP
514 CALL DLASET( 'all', KP, ITOP-JJ+JJV, ZERO, ZERO,
515 $ WORK( IPW1+II-IIV+(JJ-JJV)*LW ), LW )
516 CALL DLASET( 'lower', KP, HEIGHT, ZERO, ONE,
517 $ WORK( IPW1+II-IIV+ITOP*LW ), LW )
518 KP = MAX( 0, KP - HEIGHT )
519 II = II + HEIGHT
520 JJ = JJV + IBASE
521 MYDIST = MYDIST + NPROW
522 ITOP = MYDIST * MBV - IOFF
523 IBASE = MIN( ITOP + MBV, K )
524 ITOP = MIN( ITOP, K )
525 GO TO 40
526 END IF
527 END IF
528 END IF
529*
530 CALL PBDTRAN( ICTXT, 'columnwise', 'transpose', N+IROFFV, K,
531 $ MBV, WORK( IPW ), LW, ZERO, WORK( IPV ), LV,
532 $ IVROW, IVCOL, -1, ICCOL, WORK( IPT ) )
533*
534* WORK( IPV ) = ( . V' ) -> WORK( IPV ) = V' is K x NQC
535*
536.EQ. IF( MYCOLICCOL )
537 $ IPV = IPV + ICOFFC * LV
538*
539* WORK( IPW ) becomes MPC x K = C( IOFFC ) * V
540* WORK( IPW ) = C( IOFFC ) * V (MPC x NQC x K) -> MPC x K
541*
542 LW = MAX( 1, MPC )
543*
544.GT. IF( NQC0 ) THEN
545 CALL DGEMM( 'no transpose', 'transpose', MPC, K, NQC,
546 $ ONE, C( IOFFC ), LDC, WORK( IPV ), LV, ZERO,
547 $ WORK( IPW ), LW )
548 ELSE
549 CALL DLASET( 'all', MPC, K, ZERO, ZERO, WORK( IPW ), LW )
550 END IF
551*
552 CALL DGSUM2D( ICTXT, 'rowwise', ' ', MPC, K, WORK( IPW ),
553 $ LW, MYROW, IVCOL )
554*
555* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
556*
557.EQ. IF( MYCOLIVCOL ) THEN
558.EQ. IF( MYROWIVROW ) THEN
559*
560* Broadcast the block reflector to the other rows.
561*
562 CALL DTRBS2D( ICTXT, 'columnwise', ' ', UPLO,
563 $ 'non unit', K, K, T, NBV )
564 ELSE
565 CALL DTRBR2D( ICTXT, 'columnwise', ' ', UPLO,
566 $ 'non unit', K, K, T, NBV, IVROW, MYCOL )
567 END IF
568 CALL DTRMM( 'right', UPLO, TRANS, 'non unit', MPC, K,
569 $ ONE, T, NBV, WORK( IPW ), LW )
570*
571 CALL DGEBS2D( ICTXT, 'rowwise', ' ', MPC, K, WORK( IPW ),
572 $ LW )
573 ELSE
574 CALL DGEBR2D( ICTXT, 'rowwise', ' ', MPC, K, WORK( IPW ),
575 $ LW, MYROW, IVCOL )
576 END IF
577*
578* C C - W * V'
579* C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
580* MPC x NQC MPC x K K x NQC
581*
582 CALL DGEMM( 'no transpose', 'no transpose', MPC, NQC, K,
583 $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE,
584 $ C( IOFFC ), LDC )
585 END IF
586*
587 ELSE
588*
589* V is stored rowwise
590*
591 IF( LSAME( SIDE, 'l' ) ) THEN
592*
593* Form Q*sub( C ) or Q'*sub( C )
594*
595* IROFFC = ICOFFV is required by the current transposition
596* routine PBDTRAN
597*
598 MQV0 = NUMROC( M+ICOFFV, NBV, MYCOL, IVCOL, NPCOL )
599.EQ. IF( MYCOLIVCOL ) THEN
600 MQV = MQV0 - ICOFFV
601 ELSE
602 MQV = MQV0
603 END IF
604.EQ. IF( MYROWICROW ) THEN
605 MPC0 = MPC + IROFFC
606 ELSE
607 MPC0 = MPC
608 END IF
609*
610* Locally V( IOFFV ) is K x MQV, C( IOFFC ) is MPC x NQC
611* WORK( IPV ) is MPC0 x K = [ . V( IOFFV ) ]'
612* WORK( IPW ) is K x MQV0 = [ . V( IOFFV ) ]
613* WORK( IPT ) is the workspace for PBDTRAN
614*
615 IPV = 1
616 IPW = IPV + MPC0 * K
617 IPT = IPW + K * MQV0
618 LV = MAX( 1, MPC0 )
619 LW = MAX( 1, K )
620*
621.EQ. IF( MYROWIVROW ) THEN
622.EQ. IF( MYCOLIVCOL ) THEN
623 CALL DLASET( 'all', K, ICOFFV, ZERO, ZERO,
624 $ WORK( IPW ), LW )
625 IPW1 = IPW + ICOFFV * LW
626 CALL DLAMOV( 'all', K, MQV, V( IOFFV ), LDV,
627 $ WORK( IPW1 ), LW )
628 ELSE
629 IPW1 = IPW
630 CALL DLAMOV( 'all', K, MQV, V( IOFFV ), LDV,
631 $ WORK( IPW1 ), LW )
632 END IF
633*
634 IF( FORWARD ) THEN
635*
636* WORK( IPW ) = ( . V1 V2 ) where V1 is unit upper
637* triangular, zeroes lower triangular part of V1
638*
639 MYDIST = MOD( MYCOL-IVCOL+NPCOL, NPCOL )
640 ILEFT = MAX( 0, MYDIST * NBV - ICOFFV )
641 JJBEG = JJV
642 JJEND = JJV + MQV - 1
643 JJNXT = MIN( ICEIL( JJBEG, NBV ) * NBV, JJEND )
644*
645 50 CONTINUE
646.GT. IF( ( K-ILEFT )0 ) THEN
647 CALL DLASET( 'lower', K-ILEFT, JJNXT-JJBEG+1, ZERO,
648 $ ONE,
649 $ WORK( IPW1+ILEFT+(JJBEG-JJV)*LW ),
650 $ LW )
651 MYDIST = MYDIST + NPCOL
652 ILEFT = MYDIST * NBV - ICOFFV
653 JJBEG = JJNXT + 1
654 JJNXT = MIN( JJNXT+NBV, JJEND )
655 GO TO 50
656 END IF
657*
658 ELSE
659*
660* WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower
661* triangular, zeroes upper triangular part of V2.
662*
663 II = IIV
664 CALL INFOG1L( JV+M-K, NBV, NPCOL, MYCOL,
665 $ DESCV( CSRC_ ), JJ, ILASTCOL )
666 IOFF = MOD( JV+M-K-1, NBV )
667 KQ = NUMROC( K+IOFF, NBV, MYCOL, ILASTCOL, NPCOL )
668.EQ. IF( MYCOLILASTCOL )
669 $ KQ = KQ - IOFF
670 MYDIST = MOD( MYCOL-ILASTCOL+NPCOL, NPCOL )
671 ILEFT = MYDIST * NBV - IOFF
672 IRIGHT = MIN( ILEFT+NBV, K )
673 ILEFT = MIN( MAX( 0, ILEFT ), K )
674*
675 60 CONTINUE
676.LE. IF( II( IIV+K-1 ) ) THEN
677 WIDE = IRIGHT - ILEFT
678 CALL DLASET( 'all', ILEFT-II+IIV, KQ, ZERO, ZERO,
679 $ WORK( IPW1+II-IIV+(JJ-JJV)*LW ), LW )
680 CALL DLASET( 'upper', WIDE, KQ, ZERO, ONE,
681 $ WORK( IPW1+ILEFT+(JJ-JJV)*LW ), LW )
682 KQ = MAX( 0, KQ - WIDE )
683 II = IIV + IRIGHT
684 JJ = JJ + WIDE
685 MYDIST = MYDIST + NPCOL
686 ILEFT = MYDIST * NBV - IOFF
687 IRIGHT = MIN( ILEFT + NBV, K )
688 ILEFT = MIN( ILEFT, K )
689 GO TO 60
690 END IF
691 END IF
692 END IF
693*
694* WORK( IPV ) = WORK( IPW )' (replicated) is MPC0 x K
695*
696 CALL PBDTRAN( ICTXT, 'rowwise', 'transpose', K, M+ICOFFV,
697 $ NBV, WORK( IPW ), LW, ZERO, WORK( IPV ), LV,
698 $ IVROW, IVCOL, ICROW, -1, WORK( IPT ) )
699*
700* WORK( IPV ) = ( . V )' -> WORK( IPV ) = V' is MPC x K
701*
702.EQ. IF( MYROWICROW )
703 $ IPV = IPV + IROFFC
704*
705* WORK( IPW ) becomes NQC x K = C( IOFFC )' * V'
706* WORK( IPW ) = C( IOFFC )' * V' (NQC x MPC x K) -> NQC x K
707*
708 LW = MAX( 1, NQC )
709*
710.GT. IF( MPC0 ) THEN
711 CALL DGEMM( 'transpose', 'no transpose', NQC, K, MPC,
712 $ ONE, C( IOFFC ), LDC, WORK( IPV ), LV, ZERO,
713 $ WORK( IPW ), LW )
714 ELSE
715 CALL DLASET( 'all', NQC, K, ZERO, ZERO, WORK( IPW ), LW )
716 END IF
717*
718 CALL DGSUM2D( ICTXT, 'columnwise', ' ', NQC, K, WORK( IPW ),
719 $ LW, IVROW, MYCOL )
720*
721* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
722*
723.EQ. IF( MYROWIVROW ) THEN
724.EQ. IF( MYCOLIVCOL ) THEN
725*
726* Broadcast the block reflector to the other columns.
727*
728 CALL DTRBS2D( ICTXT, 'rowwise', ' ', UPLO, 'non unit',
729 $ k, k, t, mbv )
730 ELSE
731 CALL dtrbr2d( ictxt, 'Rowwise', ' ', UPLO, 'non unit',
732 $ K, K, T, MBV, MYROW, IVCOL )
733 END IF
734 CALL DTRMM( 'right', UPLO, TRANST, 'non unit', nqc, k,
735 $ one, t, mbv, work( ipw ), lw )
736*
737 CALL dgebs2d( ictxt, 'Columnwise', ' ', nqc, k,
738 $ work( ipw ), lw )
739 ELSE
740 CALL dgebr2d( ictxt, 'Columnwise', ' ', nqc, k,
741 $ work( ipw ), lw, ivrow, mycol )
742 END IF
743*
744* C C - V' * W'
745* C( IOFFC ) = C( IOFFC ) - WORK( IPV ) * WORK( IPW )'
746* MPC x NQC MPC x K K x NQC
747*
748 CALL dgemm( 'No transpose', 'Transpose', mpc, nqc, k, -one,
749 $ work( ipv ), lv, work( ipw ), lw, one,
750 $ c( ioffc ), ldc )
751*
752 ELSE
753*
754* Form Q*sub( C ) or Q'*sub( C )
755*
756* Locally V( IOFFV ) is K x NQV, C( IOFFC ) is MPC x NQC
757* WORK( IPV ) is K x NQV = V( IOFFV ), NQV = NQC
758* WORK( IPW ) is MPC x K = C( IOFFC ) * V( IOFFV )'
759*
760 ipv = 1
761 ipw = ipv + k * nqc
762 lv = max( 1, k )
763 lw = max( 1, mpc )
764*
765* Broadcast V to the other process rows.
766*
767 CALL pb_topget( ictxt, 'Broadcast', 'Columnwise', colbtop )
768 IF( myrow.EQ.ivrow ) THEN
769 CALL dgebs2d( ictxt, 'Columnwise', colbtop, k, nqc,
770 $ v( ioffv ), ldv )
771 IF( mycol.EQ.ivcol )
772 $ CALL dtrbs2d( ictxt, 'Columnwise', colbtop, uplo,
773 $ 'Non unit', k, k, t, mbv )
774 CALL dlamov( 'All', k, nqc, v( ioffv ), ldv, work( ipv ),
775 $ lv )
776 ELSE
777 CALL dgebr2d( ictxt, 'Columnwise', colbtop, k, nqc,
778 $ work( ipv ), lv, ivrow, mycol )
779 IF( mycol.EQ.ivcol )
780 $ CALL dtrbr2d( ictxt, 'Columnwise', colbtop, uplo,
781 $ 'Non unit', k, k, t, mbv, ivrow, mycol )
782 END IF
783*
784 IF( forward ) THEN
785*
786* WORK(IPW) = ( V1 V2 ) where V1 is unit upper
787* triangular, zeroes lower triangular part of V1
788*
789 mydist = mod( mycol-ivcol+npcol, npcol )
790 ileft = max( 0, mydist * nbv - icoffv )
791 jjbeg = jjv
792 jjend = jjv + nqc - 1
793 jjnxt = min( iceil( jjbeg, nbv ) * nbv, jjend )
794*
795 70 CONTINUE
796 IF( ( k-ileft ).GT.0 ) THEN
797 CALL dlaset( 'Lower', k-ileft, jjnxt-jjbeg+1, zero,
798 $ one, work( ipv+ileft+(jjbeg-jjv)*lv ),
799 $ lv )
800 mydist = mydist + npcol
801 ileft = mydist * nbv - icoffv
802 jjbeg = jjnxt + 1
803 jjnxt = min( jjnxt+nbv, jjend )
804 GO TO 70
805 END IF
806*
807 ELSE
808*
809* WORK( IPW ) = ( . V1 V2 ) where V2 is unit lower
810* triangular, zeroes upper triangular part of V2.
811*
812 ii = iiv
813 CALL infog1l( jv+n-k, nbv, npcol, mycol, descv( csrc_ ),
814 $ jj, ilastcol )
815 ioff = mod( jv+n-k-1, nbv )
816 kq = numroc( k+ioff, nbv, mycol, ilastcol, npcol )
817 IF( mycol.EQ.ilastcol )
818 $ kq = kq - ioff
819 mydist = mod( mycol-ilastcol+npcol, npcol )
820 ileft = mydist * nbv - ioff
821 iright = min( ileft+nbv, k )
822 ileft = min( max( 0, ileft ), k )
823*
824 80 CONTINUE
825 IF( ii.LE.( iiv+k-1 ) ) THEN
826 wide = iright - ileft
827 CALL dlaset( 'All', ileft-ii+iiv, kq, zero, zero,
828 $ work( ipv+ii-iiv+(jj-jjv)*lv ), lv )
829 CALL dlaset( 'Upper', wide, kq, zero, one,
830 $ work( ipv+ileft+(jj-jjv)*lv ), lv )
831 kq = max( 0, kq - wide )
832 ii = iiv + iright
833 jj = jj + wide
834 mydist = mydist + npcol
835 ileft = mydist * nbv - ioff
836 iright = min( ileft + nbv, k )
837 ileft = min( ileft, k )
838 GO TO 80
839 END IF
840*
841 END IF
842*
843* WORK( IPV ) is K x NQC = V = V( IOFFV )
844* WORK( IPW ) = C( IOFFC ) * V' (MPC x NQC x K) -> MPC x K
845*
846 IF( nqc.GT.0 ) THEN
847 CALL dgemm( 'no transpose', 'transpose', MPC, K, NQC,
848 $ ONE, C( IOFFC ), LDC, WORK( IPV ), LV, ZERO,
849 $ WORK( IPW ), LW )
850 ELSE
851 CALL DLASET( 'all', MPC, K, ZERO, ZERO, WORK( IPW ), LW )
852 END IF
853*
854 CALL DGSUM2D( ICTXT, 'rowwise', ' ', MPC, K, WORK( IPW ),
855 $ LW, MYROW, IVCOL )
856*
857* WORK( IPW ) = WORK( IPW ) * T' or WORK( IPW ) * T
858*
859.EQ. IF( MYCOLIVCOL ) THEN
860 CALL DTRMM( 'right', UPLO, TRANS, 'non unit', MPC, K,
861 $ ONE, T, MBV, WORK( IPW ), LW )
862 CALL DGEBS2D( ICTXT, 'rowwise', ' ', MPC, K, WORK( IPW ),
863 $ LW )
864 ELSE
865 CALL DGEBR2D( ICTXT, 'rowwise', ' ', MPC, K, WORK( IPW ),
866 $ LW, MYROW, IVCOL )
867 END IF
868*
869* C C - W * V
870* C( IOFFC ) = C( IOFFC ) - WORK( IPW ) * WORK( IPV )
871* MPC x NQC MPC x K K x NQC
872*
873 CALL DGEMM( 'no transpose', 'no transpose', MPC, NQC, K,
874 $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE,
875 $ C( IOFFC ), LDC )
876*
877 END IF
878*
879 END IF
880*
881 RETURN
882*
883* End of PDLARFB
884*
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition dlaset.f:110
logical function lsame(ca, cb)
LSAME
Definition lsame.f:53
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:187
subroutine dtrmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
DTRMM
Definition dtrmm.f:177
integer function iceil(inum, idenom)
Definition iceil.f:2
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3
#define min(a, b)
Definition macros.h:20
#define max(a, b)
Definition macros.h:21
subroutine dgebs2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1082
subroutine dgebr2d(contxt, scope, top, m, n, a, lda)
Definition mpi.f:1123
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition mpi.f:937
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition mpi.f:786
subroutine pbdtran(icontxt, adist, trans, m, n, nb, a, lda, beta, c, ldc, iarow, iacol, icrow, iccol, work)
Definition pbdtran.f:3
subroutine jc(p, t, a, b, cm, cn, tref, tm, epsm, sigmam, jc_yield, tan_jc)
Definition sigeps106.F:339