1 SUBROUTINE pdgbtrf( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF,
9 INTEGER BWL, BWU, INFO, JA, LAF, LWORK, N
12 INTEGER DESCA( * ), IPIV( * )
13 DOUBLE PRECISION A( * ), AF( * ), WORK( * )
357 parameter( one = 1.0d+0 )
358 DOUBLE PRECISION ZERO
359 parameter( zero = 0.0d+0 )
361 parameter( int_one = 1 )
362 INTEGER DESCMULT, BIGNUM
363 parameter( descmult = 100, bignum = descmult*descmult )
364 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
365 $ lld_, mb_, m_, nb_, n_, rsrc_
366 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
367 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
368 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
371 INTEGER APTR, BBPTR, BIPTR, BM, BM1, BM2, BMN, BN, BW,
372 $ csrc, dbptr, first_proc, i, i1, i2, ictxt,
373 $ ictxt_new, ictxt_save, idum3, j, ja_new, jptr,
374 $ l, laf_min, lbwl, lbwu, ldb, ldbb, llda, lm,
375 $ lmj, ln, lnj, lptr, mycol, myrow, my_num_cols,
376 $ nb, neicol, np, npact, npcol, nprow, npstr,
377 $ np_save, nrhs, odd_n, odd_size, odptr, ofst,
378 $ part_offset, part_size, return_code, store_n_a,
382 INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 )
412 IF( return_code.NE.0 )
THEN
418 ictxt = desca_1xp( 2 )
419 csrc = desca_1xp( 5 )
421 llda = desca_1xp( 6 )
422 store_n_a = desca_1xp( 3 )
432 IF( lwork.LT.-1 )
THEN
434 ELSE IF( lwork.EQ.-1 )
THEN
444 IF( n+ja-1.GT.store_n_a )
THEN
448 IF( ( bwl.GT.n-1 ) .OR. ( bwl.LT.0 ) )
THEN
452 IF( ( bwu.GT.n-1 ) .OR. ( bwu.LT.0 ) )
THEN
456 IF( llda.LT.( 2*bwl+2*bwu+1 ) )
THEN
468 IF( nprow.NE.1 )
THEN
472 IF( n.GT.np*nb-mod( ja-1, nb ) )
THEN
474 CALL pxerbla( ictxt,
'PDGBTRF, D&C alg.: only 1 block per proc'
479 IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.( bwl+bwu+1 ) ) )
THEN
481 CALL pxerbla( ictxt,
'PDGBTRF, D&C alg.: NB too small', -info )
488 laf_min = ( nb+bwu )*( bwl+bwu ) + 6*( bwl+bwu )*( bwl+2*bwu )
490 IF( laf.LT.laf_min )
THEN
494 CALL pxerbla( ictxt,
'PDGBTRF: auxiliary storage error ',
503 work( 1 ) = work_size_min
505 IF( lwork.LT.work_size_min )
THEN
506 IF( lwork.NE.-1 )
THEN
509 work( 1 ) = work_size_min
510 CALL pxerbla( ictxt,
'PDGBTRF: worksize error ', -info )
517 param_check( 9, 1 ) = desca( 5 )
518 param_check( 8, 1 ) = desca( 4 )
519 param_check( 7, 1 ) = desca( 3 )
520 param_check( 6, 1 ) = desca( 1 )
521 param_check( 5, 1 ) = ja
522 param_check( 4, 1 ) = bwu
523 param_check( 3, 1 ) = bwl
524 param_check( 2, 1 ) = n
525 param_check( 1, 1 ) = idum3
527 param_check( 9, 2 ) = 605
528 param_check( 8, 2 ) = 604
529 param_check( 7, 2 ) = 603
530 param_check( 6, 2 ) = 601
531 param_check( 5, 2 ) = 5
532 param_check( 4, 2 ) = 3
533 param_check( 3, 2 ) = 2
534 param_check( 2, 2 ) = 1
535 param_check( 1, 2 ) = 11
543 ELSE IF( info.LT.-descmult )
THEN
546 info = -info*descmult
551 CALL globchk( ictxt, 9, param_check, 9, param_check( 1, 3 ),
557 IF( info.EQ.bignum )
THEN
559 ELSE IF( mod( info, descmult ).EQ.0 )
THEN
560 info = -info / descmult
566 CALL pxerbla( ictxt,
'PDGBTRF', -info )
579 part_offset = nb*( ( ja-1 ) / ( npcol*nb ) )
581 IF( ( mycol-csrc ).LT.( ja-part_offset-1 ) / nb )
THEN
582 part_offset = part_offset + nb
585 IF( mycol.LT.csrc )
THEN
586 part_offset = part_offset - nb
595 first_proc = mod( ( ja-1 ) / nb+csrc, npcol )
599 ja_new = mod( ja-1, nb ) + 1
604 np = ( ja_new+n-2 ) / nb + 1
608 CALL reshape( ictxt, int_one, ictxt_new, int_one, first_proc,
615 desca_1xp( 2 ) = ictxt_new
623 IF( myrow.LT.0 )
THEN
636 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
640 IF( mycol.EQ.0 )
THEN
641 part_offset = part_offset + mod( ja_new-1, part_size )
642 my_num_cols = my_num_cols - mod( ja_new-1, part_size )
647 ofst = part_offset*llda
651 odd_size = numroc( n, part_size, mycol, 0, npcol )
660 DO 30 j = 1, odd_size
662 a( i+( j-1 )*llda ) = zero
676 IF( mycol.LE.npcol-2 )
THEN
680 biptr = ( nb-bw )*llda + 2*bw + 1
682 CALL dtrsd2d( ictxt, 'u
', 'n
',
683 $ MIN( BW, BWU+NUMROC( N, NB, MYCOL+1, 0,
684 $ NPCOL ) ), BW, A( BIPTR ), LLDA-1, 0, MYCOL+1 )
699.NE.
IF( MYCOL0 ) THEN
709.NE.
IF( MYCOLNPCOL-1 ) THEN
712.NE.
ELSE IF( MYCOL0 ) THEN
714 LN = MAX( ODD_SIZE-BW, 0 )
722 CALL DGBTRF( LM, LN, LBWL, LBWU, A( APTR ), LLDA, IPIV, INFO )
725 INFO = INFO + NB*MYCOL
737 DO 40 J = MAX( LN-BW+1, 1 ), LN
739 LMJ = MIN( LBWL, LM-J )
740 LNJ = MIN( BW, J+BW-LN+APTR-1 )
744 JPTR = J - ( LN+1 ) + 2*BW + 1 - LBWL + LN*LLDA
753 LPTR = L - ( LN+1 ) + 2*BW + 1 - LBWL + LN*LLDA
755 CALL DSWAP( LNJ, A( LPTR ), LDB, A( JPTR ), LDB )
762 LPTR = BW + 1 + APTR + ( J-1 )*LLDA
764 CALL DGER( LMJ, LNJ, -ONE, A( LPTR ), 1, A( JPTR ), LDB,
774.GT.
IF( MYCOL0 ) THEN
775 CALL DTRRV2D( ICTXT, 'u
', 'n
', MIN( BW, LM ), BW, AF( 1 ), BW,
780 DO 60 I2 = 1, MIN( BW, LM )
781 DO 50 I1 = I2 + 1, BW
782 AF( I1+( I2-1 )*BW ) = AF( I2+( I1-1 )*BW )
783 AF( I2+( I1-1 )*BW ) = ZERO
791 LMJ = MIN( LBWL, LM-J )
795 CALL DSWAP( BW, AF( ( L-1 )*BW+1 ), 1,
796 $ AF( ( J-1 )*BW+1 ), 1 )
799 LPTR = BW + 1 + APTR + ( J-1 )*LLDA
801 CALL DGER( NRHS, LMJ, -ONE, AF( ( J-1 )*BW+1 ), 1,
802 $ A( LPTR ), 1, AF( J*BW+1 ), BW )
817.NE.
IF( MYCOLNPCOL-1 ) THEN
821 BM = MIN( BW, ODD_SIZE ) + BWU
822 BN = MIN( BW, ODD_SIZE )
828 BBPTR = ( NB+BWU )*BW + 1
834 DBPTR = BW + 1 + LBWU + LN*LLDA
836 CALL DLAMOV( 'g
', BM, BN, A( DBPTR ), LLDA-1, AF( BBPTR+BW*LDBB ),
842 DO 90 I = J + LBWL, BM - 1
843 AF( BBPTR+BW*LDBB+( J-1 )*LDBB+I ) = ZERO
847.NE.
IF( MYCOL0 ) THEN
851 ODPTR = ( LM-BM )*BW + 1
852 CALL DLATCPY( 'g
', BW, BM, AF( ODPTR ), BW,
853 $ AF( BBPTR+2*BW*LDBB ), LDBB )
856.EQ.
IF( NPCOL1 ) THEN
860 CALL DGETRF( N-LN, N-LN, AF( BBPTR+BW*LDBB ), LDBB,
861 $ IPIV( LN+1 ), INFO )
882.EQ.
IF( MOD( MYCOL, NPSTR )0 ) THEN
887.EQ.
IF( MOD( MYCOL, 2*NPSTR )0 ) THEN
891 NEICOL = MYCOL + NPSTR
893.LT.
IF( NEICOL / NPSTRNPACT-1 ) THEN
895.EQ.
ELSE IF( NEICOL / NPSTRNPACT-1 ) THEN
896 ODD_N = NUMROC( N, NB, NPCOL-1, 0, NPCOL )
897 BMN = MIN( BW, ODD_N ) + BWU
909.LE.
IF( NEICOL / NPSTRNPACT-1 ) THEN
911 CALL DGESD2D( ICTXT, BM, 2*BW, AF( BBPTR+BW*LDBB ), LDBB,
914 CALL DGERV2D( ICTXT, BMN, 2*BW, AF( BBPTR+BM ), LDBB, 0,
917.EQ.
IF( NPACT2 ) THEN
921 CALL DLAMOV( 'g
', BMN, BW, AF( BBPTR+BM ), LDBB,
922 $ AF( BBPTR+2*BW*LDBB+BM ), LDBB )
933 NEICOL = MYCOL - NPSTR
935.EQ.
IF( NEICOL0 ) THEN
944 CALL DGESD2D( ICTXT, BM, 2*BW, AF( BBPTR+BW*LDBB ), LDBB, 0,
947 CALL DLAMOV( 'g
', BM, 2*BW, AF( BBPTR+BW*LDBB ), LDBB,
948 $ AF( BBPTR+BMN ), LDBB )
950 DO 130 J = BBPTR + 2*BW*LDBB, BBPTR + 3*BW*LDBB - 1, LDBB
951 DO 120 I = 0, LDBB - 1
956 CALL DGERV2D( ICTXT, BMN, 2*BW, AF( BBPTR+BW*LDBB ), LDBB,
959.EQ.
IF( NPACT2 ) THEN
963 CALL DLAMOV( 'g
', BM, BW, AF( BBPTR+BMN ), LDBB,
964 $ AF( BBPTR+2*BW*LDBB+BMN ), LDBB )
971.NE.
IF( NPACT2 ) THEN
973 CALL DGETRF( BM+BMN, BW, AF( BBPTR+BW*LDBB ), LDBB,
974 $ IPIV( LN+1 ), INFO )
978 DO 150 J = BBPTR, BBPTR + BW*LDBB - 1, LDBB
979 DO 140 I = 0, BM1 - 1
984 CALL DLASWP( BW, AF( BBPTR ), LDBB, 1, BW, IPIV( LN+1 ), 1 )
986 CALL DTRSM( 'l
', 'l
', 'n
', 'u
', BW, BW, ONE,
987 $ AF( BBPTR+BW*LDBB ), LDBB, AF( BBPTR ), LDBB )
991 CALL DGEMM( 'n
', 'n
', BM+BMN-BW, BW, BW, -ONE,
992 $ AF( BBPTR+BW*LDBB+BW ), LDBB, AF( BBPTR ), LDBB,
993 $ ONE, AF( BBPTR+BW ), LDBB )
999 CALL DLASWP( NRHS, AF( BBPTR+2*BW*LDBB ), LDBB, 1, BW,
1002 CALL DTRSM( 'l
', 'l
', 'n
', 'u
', BW, NRHS, ONE,
1003 $ AF( BBPTR+BW*LDBB ), LDBB,
1004 $ AF( BBPTR+2*BW*LDBB ), LDBB )
1008 CALL DGEMM( 'n
', 'n', bm+bmn-bw, nrhs, bw, -one,
1009 $ af( bbptr+bw*ldbb+bw ), ldbb,
1010 $ af( bbptr+2*bw*ldbb ), ldbb, one,
1011 $ af( bbptr+2*bw*ldbb+bw ), ldbb )
1016 IF( mod( mycol, 2*npstr ).EQ.0 )
THEN
1025 CALL dlamov(
'G', bm, bw, af( bbptr+bw ), ldbb,
1026 $ af( bbptr+bw*ldbb ), ldbb )
1027 CALL dlamov(
'G', bm, bw, af( bbptr+2*bw*ldbb+bw ), ldbb,
1028 $ af( bbptr+2*bw*ldbb ), ldbb )
1032 DO 170 j = 0, bw - 1
1033 DO 160 i = 0, bm - 1
1034 af( bbptr+2*bw*ldbb+bw+j*ldbb+i ) = zero
1044 CALL dgetrf( bm+bmn, bm+bmn, af( bbptr+bw*ldbb ), ldbb,
1045 $ ipiv( ln+1 ), info )
1054 npact = ( npact+1 ) / 2
1067 IF( ictxt.NE.ictxt_new )
THEN
1082 work( 1 ) = work_size_min
1086 CALL igamx2d( ictxt,
'A',
' ', 1, 1, info, 1, info, info, -1, 0,
1089 IF( mycol.EQ.0 )
THEN
1090 CALL igebs2d( ictxt,
'A',
' ', 1, 1, info, 1 )
1092 CALL igebr2d( ictxt,
'A',
' ', 1, 1, info, 1, 0, 0 )