OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
pclahrd.f
Go to the documentation of this file.
1 SUBROUTINE pclahrd( N, K, NB, A, IA, JA, DESCA, TAU, T, Y, IY, JY,
2 $ DESCY, WORK )
3*
4* -- ScaLAPACK auxiliary routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 INTEGER IA, IY, JA, JY, K, N, NB
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * ), DESCY( * )
14 COMPLEX A( * ), T( * ), TAU( * ), WORK( * ), Y( * )
15* ..
16*
17* Purpose
18* =======
19*
20* PCLAHRD reduces the first NB columns of a complex general
21* N-by-(N-K+1) distributed matrix A(IA:IA+N-1,JA:JA+N-K) so that
22* elements below the k-th subdiagonal are zero. The reduction is
23* performed by an unitary similarity transformation Q' * A * Q. The
24* routine returns the matrices V and T which determine Q as a block
25* reflector I - V*T*V', and also the matrix Y = A * V * T.
26*
27* This is an auxiliary routine called by PCGEHRD. In the following
28* comments sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1).
29*
30* Arguments
31* =========
32*
33* N (global input) INTEGER
34* The number of rows and columns to be operated on, i.e. the
35* order of the distributed submatrix sub( A ).
36* N >= 0.
37*
38* K (global input) INTEGER
39* The offset for the reduction. Elements below the k-th
40* subdiagonal in the first NB columns are reduced to zero.
41*
42* NB (global input) INTEGER
43* The number of columns to be reduced.
44*
45* A (local input/local output) COMPLEX pointer into
46* the local memory to an array of dimension (LLD_A,
47* LOCc(JA+N-K)). On entry, this array contains the the local
48* pieces of the N-by-(N-K+1) general distributed matrix
49* A(IA:IA+N-1,JA:JA+N-K). On exit, the elements on and above
50* the k-th subdiagonal in the first NB columns are overwritten
51* with the corresponding elements of the reduced distributed
52* matrix; the elements below the k-th subdiagonal, with the
53* array TAU, represent the matrix Q as a product of elementary
54* reflectors. The other columns of A(IA:IA+N-1,JA:JA+N-K) are
55* unchanged. See Further Details.
56*
57* IA (global input) INTEGER
58* The row index in the global array A indicating the first
59* row of sub( A ).
60*
61* JA (global input) INTEGER
62* The column index in the global array A indicating the
63* first column of sub( A ).
64*
65* DESCA (global and local input) INTEGER array of dimension DLEN_.
66* The array descriptor for the distributed matrix A.
67*
68* TAU (local output) COMPLEX array, dimension LOCc(JA+N-2)
69* The scalar factors of the elementary reflectors (see Further
70* Details). TAU is tied to the distributed matrix A.
71*
72* T (local output) COMPLEX array, dimension (NB_A,NB_A)
73* The upper triangular matrix T.
74*
75* Y (local output) COMPLEX pointer into the local memory
76* to an array of dimension (LLD_Y,NB_A). On exit, this array
77* contains the local pieces of the N-by-NB distributed
78* matrix Y. LLD_Y >= LOCr(IA+N-1).
79*
80* IY (global input) INTEGER
81* The row index in the global array Y indicating the first
82* row of sub( Y ).
83*
84* JY (global input) INTEGER
85* The column index in the global array Y indicating the
86* first column of sub( Y ).
87*
88* DESCY (global and local input) INTEGER array of dimension DLEN_.
89* The array descriptor for the distributed matrix Y.
90*
91* WORK (local workspace) COMPLEX array, dimension (NB)
92*
93* Further Details
94* ===============
95*
96* The matrix Q is represented as a product of nb elementary reflectors
97*
98* Q = H(1) H(2) . . . H(nb).
99*
100* Each H(i) has the form
101*
102* H(i) = I - tau * v * v'
103*
104* where tau is a complex scalar, and v is a complex vector with
105* v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
106* A(ia+i+k:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
107*
108* The elements of the vectors v together form the (n-k+1)-by-nb matrix
109* V which is needed, with T and Y, to apply the transformation to the
110* unreduced part of the matrix, using an update of the form:
111* A(ia:ia+n-1,ja:ja+n-k) := (I-V*T*V')*(A(ia:ia+n-1,ja:ja+n-k)-Y*V').
112*
113* The contents of A(ia:ia+n-1,ja:ja+n-k) on exit are illustrated by the
114* following example with n = 7, k = 3 and nb = 2:
115*
116* ( a h a a a )
117* ( a h a a a )
118* ( a h a a a )
119* ( h h a a a )
120* ( v1 h a a a )
121* ( v1 v2 a a a )
122* ( v1 v2 a a a )
123*
124* where a denotes an element of the original matrix
125* A(ia:ia+n-1,ja:ja+n-k), h denotes a modified element of the upper
126* Hessenberg matrix H, and vi denotes an element of the vector
127* defining H(i).
128*
129* =====================================================================
130*
131* .. Parameters ..
132 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
133 $ lld_, mb_, m_, nb_, n_, rsrc_
134 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
135 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
136 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
137 COMPLEX ONE, ZERO
138 parameter( one = ( 1.0e+0, 0.0e+0 ),
139 $ zero = ( 0.0e+0, 0.0e+0 ) )
140* ..
141* .. Local Scalars ..
142 LOGICAL IPROC
143 INTEGER I, IACOL, IAROW, ICTXT, IOFF, II, J, JJ, JL,
144 $ jt, jw, l, myrow, mycol, npcol, nprow, nq
145 COMPLEX EI
146* ..
147* .. Local Arrays ..
148 INTEGER DESCW( DLEN_ )
149* ..
150* .. External Functions ..
151 INTEGER NUMROC
152 EXTERNAL numroc
153* ..
154* .. External Subroutines ..
155 EXTERNAL blacs_gridinfo, caxpy, ccopy, cscal,
157 $ pcgemv, pclacgv, pclarfg, pcscal
158* ..
159* .. Intrinsic Functions ..
160 INTRINSIC min, mod
161* ..
162* .. Executable Statements ..
163*
164* Quick return if possible
165*
166 IF( n.LE.1 )
167 $ RETURN
168*
169 ictxt = desca( ctxt_ )
170 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
171*
172 ioff = mod( ja-1, desca( nb_ ) )
173 CALL infog2l( ia+k, ja, desca, nprow, npcol, myrow, mycol, ii,
174 $ jj, iarow, iacol )
175*
176 iproc = ( myrow.EQ.iarow .AND. mycol.EQ.iacol )
177 nq = numroc( n+ja-1, desca( nb_ ), mycol, iacol, npcol )
178 IF( mycol.EQ.iacol )
179 $ nq = nq - ioff
180*
181 ei = zero
182
183 jw = ioff + 1
184 CALL descset( descw, 1, desca( mb_ ), 1, desca( mb_ ), iarow,
185 $ iacol, ictxt, 1 )
186*
187 DO 10 l = 1, nb
188 i = ia + k + l - 2
189 j = ja + l - 1
190*
191 IF( l.GT.1 ) THEN
192*
193* Update A(ia:ia+n-1,j)
194*
195* Compute i-th column of A - Y * V'
196*
197 CALL pclacgv( l-1, a, i, ja, desca, desca( m_ ) )
198 CALL pcgemv( 'No transpose', n, l-1, -one, y, iy, jy, descy,
199 $ a, i, ja, desca, desca( m_ ), one, a, ia, j,
200 $ desca, 1 )
201 CALL pclacgv( l-1, a, i, ja, desca, desca( m_ ) )
202*
203* Apply I - V * T' * V' to this column (call it b) from the
204* left, using the last column of T as workspace
205*
206* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows)
207* ( V2 ) ( b2 )
208*
209* where V1 is unit lower triangular
210*
211* w := V1' * b1
212*
213 IF( iproc ) THEN
214 CALL ccopy( l-1, a( (jj+l-2)*desca( lld_ )+ii ), 1,
215 $ work( jw ), 1 )
216 CALL ctrmv( 'Lower', 'Conjugate transpose', 'unit', L-1,
217 $ A( (JJ-1)*DESCA( LLD_ )+II ), DESCA( LLD_ ),
218 $ WORK( JW ), 1 )
219 END IF
220*
221* w := w + V2'*b2
222*
223 CALL PCGEMV( 'conjugate transpose', N-K-L+1, L-1, ONE, A,
224 $ I+1, JA, DESCA, A, I+1, J, DESCA, 1, ONE, WORK,
225 $ 1, JW, DESCW, DESCW( M_ ) )
226*
227* w := T'*w
228*
229 IF( IPROC )
230 $ CALL CTRMV( 'upper', 'conjugate transpose', 'non-unit',
231 $ L-1, T, DESCA( NB_ ), WORK( JW ), 1 )
232*
233* b2 := b2 - V2*w
234*
235 CALL PCGEMV( 'no transpose', N-K-L+1, L-1, -ONE, A, I+1, JA,
236 $ DESCA, WORK, 1, JW, DESCW, DESCW( M_ ), ONE,
237 $ A, I+1, J, DESCA, 1 )
238*
239* b1 := b1 - V1*w
240*
241 IF( IPROC ) THEN
242 CALL CTRMV( 'lower', 'no transpose', 'unit', L-1,
243 $ A( (JJ-1)*DESCA( LLD_ )+II ), DESCA( LLD_ ),
244 $ WORK( JW ), 1 )
245 CALL CAXPY( L-1, -ONE, WORK( JW ), 1,
246 $ A( ( JJ+L-2 )*DESCA( LLD_ )+II ), 1 )
247 END IF
248 CALL PCELSET( A, I, J-1, DESCA, EI )
249 END IF
250*
251* Generate the elementary reflector H(i) to annihilate
252* A(ia+k+i:ia+n-1,j)
253*
254 CALL PCLARFG( N-K-L+1, EI, I+1, J, A, MIN( I+2, N+IA-1 ), J,
255 $ DESCA, 1, TAU )
256 CALL PCELSET( A, I+1, J, DESCA, ONE )
257*
258* Compute Y(iy:y+n-1,jy+l-1)
259*
260 CALL PCGEMV( 'no transpose', N, N-K-L+1, ONE, A, IA, J+1,
261 $ DESCA, A, I+1, J, DESCA, 1, ZERO, Y, IY, JY+L-1,
262 $ DESCY, 1 )
263 CALL PCGEMV( 'conjugate transpose', N-K-L+1, L-1, ONE, A, I+1,
264 $ JA, DESCA, A, I+1, J, DESCA, 1, ZERO, WORK, 1, JW,
265 $ DESCW, DESCW( M_ ) )
266 CALL PCGEMV( 'no transpose', N, L-1, -ONE, Y, IY, JY, DESCY,
267 $ WORK, 1, JW, DESCW, DESCW( M_ ), ONE, Y, IY,
268 $ JY+L-1, DESCY, 1 )
269 JL = MIN( JJ+L-1, JA+NQ-1 )
270 CALL PCSCAL( N, TAU( JL ), Y, IY, JY+L-1, DESCY, 1 )
271*
272* Compute T(1:i,i)
273*
274 IF( IPROC ) THEN
275 JT = ( L-1 ) * DESCA( NB_ )
276 CALL CSCAL( L-1, -TAU( JL ), WORK( JW ), 1 )
277 CALL CCOPY( L-1, WORK( JW ), 1, T( JT+1 ), 1 )
278 CALL CTRMV( 'upper', 'no transpose', 'non-unit', L-1, T,
279 $ DESCA( NB_ ), T( JT+1 ), 1 )
280 T( JT+L ) = TAU( JL )
281 END IF
282 10 CONTINUE
283*
284 CALL PCELSET( A, K+NB+IA-1, J, DESCA, EI )
285*
286 RETURN
287*
288* End of PCLAHRD
289*
290 END
subroutine caxpy(n, ca, cx, incx, cy, incy)
CAXPY
Definition caxpy.f:88
subroutine ccopy(n, cx, incx, cy, incy)
CCOPY
Definition ccopy.f:81
subroutine cscal(n, ca, cx, incx)
CSCAL
Definition cscal.f:78
subroutine ctrmv(uplo, trans, diag, n, a, lda, x, incx)
CTRMV
Definition ctrmv.f:147
#define min(a, b)
Definition macros.h:20
subroutine pcscal(n, alpha, x, ix, jx, descx, incx)
Definition mpi.f:955
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition mpi.f:1610
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition mpi.f:937
subroutine blacs_gridinfo(cntxt, nprow, npcol, myrow, mycol)
Definition mpi.f:754
subroutine pcelset(a, ia, ja, desca, alpha)
Definition pcelset.f:2
subroutine pclacgv(n, x, ix, jx, descx, incx)
Definition pclacgv.f:2
subroutine pclahrd(n, k, nb, a, ia, ja, desca, tau, t, y, iy, jy, descy, work)
Definition pclahrd.f:3
subroutine pclarfg(n, alpha, iax, jax, x, ix, jx, descx, incx, tau)
Definition pclarfg.f:3