1 SUBROUTINE pdlarzb( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V,
2 $ IV, JV, DESCV, T, C, IC, JC, DESCC, WORK )
9 CHARACTER DIRECT, SIDE, STOREV, TRANS
10 INTEGER IC, IV, JC, JV, K, L, M, N
13 INTEGER DESCC( * ), DESCV( * )
14 DOUBLE PRECISION C( * ), T( * ), V( * ), WORK( * )
221 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
222 $ lld_, mb_, m_, nb_, n_, rsrc_
223 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
224 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
225 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
226 DOUBLE PRECISION ONE, ZERO
227 parameter( one = 1.0d+0, zero = 0.0d+0 )
231 CHARACTER COLBTOP, TRANST
232 INTEGER ICCOL1, ICCOL2, ICOFFC1, ICOFFC2, ICOFFV,
233 $ icrow1, icrow2, ictxt, iibeg, iic1, iic2,
234 $ iiend, iinxt, iiv, ileft, info, ioffc2, ioffv,
235 $ ipt, ipv, ipw, iroffc1, iroffc2, itop, ivcol,
236 $ ivrow, jjbeg, jjend, jjnxt, jjc1, jjc2, jjv,
237 $ ldc, ldv, lv, lw, mbc, mbv, mpc1, mpc2, mpc20,
238 $ mqv, mqv0, mycol, mydist, myrow, nbc, nbv,
239 $ npcol, nprow, nqc1, nqc2, nqcall, nqv
252 INTEGER ICEIL, NUMROC
253 EXTERNAL iceil, lsame, numroc
259 IF( m.LE.0 .OR. n.LE.0 .OR. k.LE.0 )
264 ictxt = descc( ctxt_ )
270 IF( .NOT.lsame( direct,
'B' ) )
THEN
272 ELSE IF( .NOT.lsame( storev,
'R' ) )
THEN
276 CALL pxerbla( ictxt,
'PDLARZB', -info )
277 CALL blacs_abort( ictxt, 1 )
281 left = lsame( side,
'L' )
282 IF( lsame( trans,
'N' ) )
THEN
288 CALL infog2l( iv, jv, descv, nprow, npcol, myrow, mycol, iiv, jjv,
292 icoffv = mod( jv-1, nbv )
293 nqv = numroc( l+icoffv, nbv, mycol, ivcol, npcol )
297 iiv =
min( iiv, ldv )
298 jjv =
min( jjv,
max( 1, numroc( descv( n_ ), nbv, mycol,
299 $ descv( csrc_ ), npcol ) ) )
300 ioffv = iiv + ( jjv-1 ) * ldv
303 nqcall = numroc( descc( n_ ), nbc, mycol, descc( csrc_ ), npcol )
304 CALL infog2l( ic, jc, descc, nprow, npcol, myrow, mycol, iic1,
305 $ jjc1, icrow1, iccol1 )
307 iic1 =
min( iic1, ldc )
308 jjc1 =
min( jjc1,
max( 1, nqcall ) )
311 iroffc1 = mod( ic-1, mbc )
312 mpc1 = numroc( k+iroffc1, mbc, myrow, icrow1, nprow )
313 IF( myrow.EQ.icrow1 )
314 $ mpc1 = mpc1 - iroffc1
315 icoffc1 = mod( jc-1, nbc )
316 nqc1 = numroc( n+icoffc1, nbc, mycol, iccol1, npcol )
317 IF( mycol.EQ.iccol1 )
318 $ nqc1 = nqc1 - icoffc1
319 CALL infog2l( ic+m-l, jc, descc, nprow, npcol, myrow, mycol,
320 $ iic2, jjc2, icrow2, iccol2 )
321 iroffc2 = mod( ic+m-l-1, mbc )
322 mpc2 = numroc( l+iroffc2, mbc, myrow, icrow2, nprow )
323 IF( myrow.EQ.icrow2 )
324 $ mpc2 = mpc2 - iroffc2
328 iroffc1 = mod( ic-1, mbc )
329 mpc1 = numroc( m+iroffc1, mbc, myrow, icrow1, nprow )
330 IF( myrow.EQ.icrow1 )
331 $ mpc1 = mpc1 - iroffc1
332 icoffc1 = mod( jc-1, nbc )
333 nqc1 = numroc( k+icoffc1, nbc, mycol, iccol1, npcol )
334 IF( mycol.EQ.iccol1 )
335 $ nqc1 = nqc1 - icoffc1
336 CALL infog2l( ic, jc+n-l, descc, nprow, npcol, myrow, mycol,
337 $ iic2, jjc2, icrow2, iccol2 )
340 icoffc2 = mod( jc+n-l-1, nbc )
341 nqc2 = numroc( l+icoffc2, nbc, mycol, iccol2, npcol )
342 IF( mycol.EQ.iccol2 )
343 $ nqc2 = nqc2 - icoffc2
345 iic2 =
min( iic2, ldc )
346 jjc2 =
min( jjc2, nqcall )
347 ioffc2 = iic2 + ( jjc2-1 ) * ldc
349 IF( lsame( side,
'L' ) )
THEN
356 mqv0 = numroc( m+icoffv, nbv, mycol, ivcol, npcol )
357 IF( mycol.EQ.ivcol )
THEN
362 IF( myrow.EQ.icrow2 )
THEN
363 mpc20 = mpc2 + iroffc2
374 ipw = ipv + mpc20 * k
379 IF( myrow.EQ.ivrow )
THEN
380 IF( mycol.EQ.ivcol )
THEN
381 CALL dlamov(
'All', k, mqv, v( ioffv ), ldv,
382 $ work( ipw+icoffv*lw ), lw )
384 CALL dlamov(
'All', k, mqv, v( ioffv ), ldv,
391 CALL pbdtran( ictxt,
'Rowwise',
'Transpose', k, m+icoffv,
392 $ descv( nb_ ), work( ipw ), lw, zero,
393 $ work( ipv ), lv, ivrow, ivcol, icrow2, -1,
398 IF( myrow.EQ.icrow2 )
399 $ ipv = ipv + iroffc2
407 CALL dgemm(
'Transpose',
'No transpose', nqc2, k, mpc2,
408 $ one, c( ioffc2 ), ldc, work( ipv ), lv, zero,
411 CALL dlaset(
'All', nqc2, k, zero, zero, work( ipw ), lw )
417 mydist = mod( myrow-icrow1+nprow, nprow )
418 itop =
max( 0, mydist * mbc - iroffc1 )
420 iiend = iic1 + mpc1 - 1
421 iinxt =
min( iceil( iibeg, mbc ) * mbc, iiend )
424 IF( iibeg.LE.iinxt )
THEN
425 CALL pbdmatadd( ictxt,
'Transpose', nqc2, iinxt-iibeg+1,
426 $ one, c( iibeg+(jjc1-1)*ldc ), ldc, one,
427 $ work( ipw+itop ), lw )
428 mydist = mydist + nprow
429 itop = mydist * mbc - iroffc1
431 iinxt =
min( iinxt+mbc, iiend )
436 CALL dgsum2d( ictxt,
'Columnwise',
' ', nqc2, k, work( ipw ),
441 IF( myrow.EQ.ivrow )
THEN
446 CALL dtrbs2d( ictxt,
'Rowwise',
' ',
'Lower',
'Non unit',
449 CALL dtrbr2d( ictxt,
'Rowwise',
' ',
'Lower',
'Non unit',
450 $ k, k, t, mbv, myrow, ivcol )
452 CALL dtrmm(
'Right',
'Lower', transt,
'Non unit', nqc2, k,
455 CALL dgebs2d( ictxt, 'columnwise
', ' ', NQC2, K,
458 CALL DGEBR2D( ICTXT, 'columnwise
', ' ', nqc2, k,
459 $ work( ipw ), lw, ivrow, mycol )
465 mydist = mod( myrow-icrow1+nprow, nprow )
466 itop =
max( 0, mydist * mbc - iroffc1 )
468 iiend = iic1 + mpc1 - 1
469 iinxt =
min( iceil( iibeg, mbc ) * mbc, iiend )
472 IF( iibeg.LE.iinxt )
THEN
473 CALL pbdmatadd( ictxt,
'Transpose', iinxt-iibeg+1, nqc2,
474 $ -one, work( ipw+itop ), lw, one,
475 $ c( iibeg+(jjc1-1)*ldc ), ldc )
476 mydist = mydist + nprow
477 itop = mydist * mbc - iroffc1
479 iinxt =
min( iinxt+mbc, iiend )
488 CALL dgemm(
'No transpose',
'Transpose', mpc2, nqc2, k, -one,
489 $ work( ipv ), lv, work( ipw ), lw, one,
507 CALL pb_topget( ictxt,
'Broadcast',
'Columnwise', colbtop )
508 IF( myrow.EQ.ivrow )
THEN
509 CALL dgebs2d( ictxt,
'Columnwise', colbtop, k, nqc2,
512 $
CALL dtrbs2d( ictxt,
'Columnwise', colbtop,
'Lower',
513 $
'Non unit', k, k, t, mbv )
514 CALL dlamov(
'All', k, nqc2, v( ioffv ), ldv, work( ipv ),
517 CALL dgebr2d( ictxt,
'Columnwise', colbtop, k, nqc2,
518 $ work( ipv ), lv, ivrow, mycol )
520 $
CALL dtrbr2d( ictxt,
'Columnwise', colbtop,
',
521 $ 'non unit
', K, K, T, MBV, IVROW, MYCOL )
528 CALL DGEMM( 'no transpose
', 'transpose
', MPC2, K, NQC2,
529 $ ONE, C( IOFFC2 ), LDC, WORK( IPV ), LV, ZERO,
532 CALL DLASET( 'all
', MPC2, K, ZERO, ZERO, WORK( IPW ), LW )
538 MYDIST = MOD( MYCOL-ICCOL1+NPCOL, NPCOL )
539 ILEFT = MAX( 0, MYDIST * NBC - ICOFFC1 )
541 JJEND = JJC1 + NQC1 - 1
542 JJNXT = MIN( ICEIL( JJBEG, NBC ) * NBC, JJEND )
545.LE.
IF( JJBEGJJNXT ) THEN
546 CALL PBDMATADD( ICTXT, 'no
', MPC2,
547 $ JJNXT-JJBEG+1, ONE,
548 $ C( IIC1+(JJBEG-1)*LDC ), LDC, ONE,
549 $ WORK( IPW+ILEFT*LW ), LW )
550 MYDIST = MYDIST + NPCOL
551 ILEFT = MYDIST * NBC - ICOFFC1
553 JJNXT = MIN( JJNXT+NBC, JJEND )
558 CALL DGSUM2D( ICTXT, 'rowwise
', ' ', MPC2, K, WORK( IPW ),
563.EQ.
IF( MYCOLIVCOL ) THEN
564 CALL DTRMM( 'right
', 'lower
', TRANS, 'non unit
', MPC2, K,
565 $ ONE, T, MBV, WORK( IPW ), LW )
566 CALL DGEBS2D( ICTXT, 'rowwise
', ' ', MPC2, K, WORK( IPW ),
569 CALL DGEBR2D( ICTXT, 'rowwise
', ' ', MPC2, K, WORK( IPW ),
576 MYDIST = MOD( MYCOL-ICCOL1+NPCOL, NPCOL )
577 ILEFT = MAX( 0, MYDIST * NBC - ICOFFC1 )
579 JJEND = JJC1 + NQC1 - 1
580 JJNXT = MIN( ICEIL( JJBEG, NBC ) * NBC, JJEND )
583.LE.
IF( JJBEGJJNXT ) THEN
584 CALL PBDMATADD( ICTXT, 'no transpose
', MPC2,
585 $ JJNXT-JJBEG+1, -ONE,
586 $ WORK( IPW+ILEFT*LW ), LW, ONE,
587 $ C( IIC1+(JJBEG-1)*LDC ), LDC )
588 MYDIST = MYDIST + NPCOL
589 ILEFT = MYDIST * NBC - ICOFFC1
591 JJNXT = MIN( JJNXT+NBC, JJEND )
601 $ CALL DGEMM( 'no transpose
', 'no transpose
', MPC2, NQC2, K,
602 $ -ONE, WORK( IPW ), LW, WORK( IPV ), LV, ONE,