1 SUBROUTINE psgbtrf( N, BWL, BWU, A, JA, DESCA, IPIV, AF, LAF,
9 INTEGER BWL, BWU, INFO, JA, LAF, LWORK, N
12 INTEGER DESCA( * ), IPIV( * )
13 REAL A( * ), AF( * ), WORK( * )
357 parameter( one = 1.0e+0 )
359 parameter( zero = 0.0e+0 )
361 parameter( int_one = 1 )
362 INTEGER DESCMULT, BIGNUM
363 parameter( descmult = 100, bignum = descmult*descmult )
364 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
365 $ lld_, mb_, m_, nb_, n_, rsrc_
366 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
367 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
368 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
371 INTEGER APTR, BBPTR, BIPTR, BM, BM1, BM2, , BN, BW,
372 $ csrc, dbptr, first_proc, i, i1, i2, ictxt,
373 $ ictxt_new, ictxt_save, idum3, j, ja_new, jptr,
374 $ l, laf_min, lbwl, lbwu, ldb, ldbb, llda, lm,
375 $ lmj, ln, lnj, lptr, mycol, myrow, my_num_cols,
376 $ nb, neicol, np, npact, npcol, nprow, npstr,
377 $ np_save, nrhs, odd_n, odd_size, odptr, ofst,
382 INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 )
412 IF( return_code.NE.0 )
THEN
418 ictxt = desca_1xp( 2 )
419 csrc = desca_1xp( 5 )
421 llda = desca_1xp( 6 )
422 store_n_a = desca_1xp( 3 )
432 IF( lwork.LT.-1 )
THEN
434 ELSE IF( lwork.EQ.-1 )
THEN
444 IF( n+ja-1.GT.store_n_a )
THEN
448 IF( ( bwl.GT.n-1 ) .OR. ( bwl.LT.0 ) )
THEN
452 IF( ( bwu.GT.n-1 ) .OR. ( bwu.LT.0 ) )
THEN
456 IF( llda.LT.( 2*bwl+2*bwu+1 ) )
THEN
468 IF( nprow.NE.1 )
THEN
472 IF( n.GT.np*nb-mod( ja-1, nb ) )
THEN
474 CALL pxerbla( ictxt,
'PSGBTRF, D&C alg.: only 1 block per proc'
479 IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.( bwl+bwu+1 ) ) )
THEN
481 CALL pxerbla( ictxt,
'PSGBTRF, D&C alg.: NB too small', -info )
488 laf_min = ( nb+bwu )*( bwl+bwu ) + 6*( bwl+bwu )*( bwl+2*bwu )
490 IF( laf.LT.laf_min )
THEN
494 CALL pxerbla( ictxt,
'PSGBTRF: auxiliary storage error ',
503 work( 1 ) = work_size_min
505 IF( lwork.LT.work_size_min )
THEN
506 IF( lwork.NE.-1 )
THEN
509 work( 1 ) = work_size_min
517 PARAM_CHECK( 9, 1 ) = DESCA( 5 )
518 PARAM_CHECK( 8, 1 ) = DESCA( 4 )
519 PARAM_CHECK( 7, 1 ) = DESCA( 3 )
520 PARAM_CHECK( 6, 1 ) = DESCA( 1 )
521 PARAM_CHECK( 5, 1 ) = JA
522 PARAM_CHECK( 4, 1 ) = BWU
523 PARAM_CHECK( 3, 1 ) = BWL
524 PARAM_CHECK( 2, 1 ) = N
525 PARAM_CHECK( 1, 1 ) = IDUM3
527 PARAM_CHECK( 9, 2 ) = 605
528 PARAM_CHECK( 8, 2 ) = 604
529 PARAM_CHECK( 7, 2 ) = 603
530 PARAM_CHECK( 6, 2 ) = 601
531 PARAM_CHECK( 5, 2 ) = 5
532 PARAM_CHECK( 4, 2 ) = 3
533 PARAM_CHECK( 3, 2 ) = 2
534 PARAM_CHECK( 2, 2 ) = 1
535 PARAM_CHECK( 1, 2 ) = 11
543.LT.
ELSE IF( INFO-DESCMULT ) THEN
546 INFO = -INFO*DESCMULT
551 CALL GLOBCHK( ICTXT, 9, PARAM_CHECK, 9, PARAM_CHECK( 1, 3 ),
557.EQ.
IF( INFOBIGNUM ) THEN
559.EQ.
ELSE IF( MOD( INFO, DESCMULT )0 ) THEN
560 INFO = -INFO / DESCMULT
566 CALL PXERBLA( ICTXT, 'psgbtrf', -INFO )
579 PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) )
581.LT.
IF( ( MYCOL-CSRC )( JA-PART_OFFSET-1 ) / NB ) THEN
582 PART_OFFSET = PART_OFFSET + NB
585.LT.
IF( MYCOLCSRC ) THEN
586 PART_OFFSET = PART_OFFSET - NB
595 FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL )
599 JA_NEW = MOD( JA-1, NB ) + 1
604 NP = ( JA_NEW+N-2 ) / NB + 1
608 CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC,
615 DESCA_1XP( 2 ) = ICTXT_NEW
619 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
623.LT.
IF( MYROW0 ) THEN
636 MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL )
640.EQ.
IF( MYCOL0 ) THEN
641 PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE )
642 MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE )
647 OFST = PART_OFFSET*LLDA
651 ODD_SIZE = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL )
660 DO 30 J = 1, ODD_SIZE
662 A( I+( J-1 )*LLDA ) = ZERO
676.LE.
IF( MYCOLNPCOL-2 ) THEN
680 BIPTR = ( NB-BW )*LLDA + 2*BW + 1
682 CALL STRSD2D( ICTXT, 'u
', 'n
',
683 $ MIN( BW, BWU+NUMROC( N, NB, MYCOL+1, 0,
684 $ NPCOL ) ), BW, A( BIPTR ), LLDA-1, 0, MYCOL+1 )
699.NE.
IF( MYCOL0 ) THEN
709.NE.
IF( MYCOLNPCOL-1 ) THEN
712.NE.
ELSE IF( MYCOL0 ) THEN
714 LN = MAX( ODD_SIZE-BW, 0 )
722 CALL SGBTRF( LM, LN, LBWL, LBWU, A( APTR ), LLDA, IPIV, INFO )
725 INFO = INFO + NB*MYCOL
737 DO 40 J = MAX( LN-BW+1, 1 ), LN
739 LMJ = MIN( LBWL, LM-J )
740 LNJ = MIN( BW, J+BW-LN+APTR-1 )
744 JPTR = J - ( LN+1 ) + 2*BW + 1 - LBWL + LN*LLDA
753 LPTR = L - ( LN+1 ) + 2*BW + 1 - LBWL + LN*LLDA
755 CALL SSWAP( LNJ, A( LPTR ), LDB, A( JPTR ), LDB )
762 LPTR = BW + 1 + APTR + ( J-1 )*LLDA
764 CALL SGER( LMJ, LNJ, -ONE, A( LPTR ), 1, A( JPTR ), LDB,
774.GT.
IF( MYCOL0 ) THEN
775 CALL STRRV2D( ICTXT, 'u
', 'n
', MIN( BW, LM ), BW, AF( 1 ), BW,
780 DO 60 I2 = 1, MIN( BW, LM )
781 DO 50 I1 = I2 + 1, BW
782 AF( I1+( I2-1 )*BW ) = AF( I2+( I1-1 )*BW )
783 AF( I2+( I1-1 )*BW ) = ZERO
791 LMJ = MIN( LBWL, LM-J )
795 CALL SSWAP( BW, AF( ( L-1 )*BW+1 ), 1,
796 $ AF( ( J-1 )*BW+1 ), 1 )
799 LPTR = BW + 1 + APTR + ( J-1 )*LLDA
801 CALL SGER( NRHS, LMJ, -ONE, AF( ( J-1 )*BW+1 ), 1,
802 $ A( LPTR ), 1, AF( J*BW+1 ), BW )
817.NE.
IF( MYCOLNPCOL-1 ) THEN
821 BM = MIN( BW, ODD_SIZE ) + BWU
822 BN = MIN( BW, ODD_SIZE )
828 BBPTR = ( NB+BWU )*BW + 1
834 DBPTR = BW + 1 + LBWU + LN*LLDA
836 CALL SLAMOV( 'g
', BM, BN, A( DBPTR ), LLDA-1, AF( BBPTR+BW*LDBB ),
842 DO 90 I = J + LBWL, BM - 1
843 AF( BBPTR+BW*LDBB+( J-1 )*LDBB+I ) = ZERO
847.NE.
IF( MYCOL0 ) THEN
851 ODPTR = ( LM-BM )*BW + 1
852 CALL SLATCPY( 'g
', BW, BM, AF( ODPTR ), BW,
853 $ AF( BBPTR+2*BW*LDBB ), LDBB )
856.EQ.
IF( NPCOL1 ) THEN
860 CALL SGETRF( N-LN, N-LN, AF( BBPTR+BW*LDBB ), LDBB,
861 $ IPIV( LN+1 ), INFO )
882.EQ.
IF( MOD( MYCOL, NPSTR )0 ) THEN
887.EQ.
IF( MOD( MYCOL, 2*NPSTR )0 ) THEN
891 NEICOL = MYCOL + NPSTR
893.LT.
IF( NEICOL / NPSTRNPACT-1 ) THEN
895.EQ.
ELSE IF( NEICOL / NPSTRNPACT-1 ) THEN
896 ODD_N = NUMROC( N, NB, NPCOL-1, 0, NPCOL )
897 BMN = MIN( BW, ODD_N ) + BWU
909.LE.
IF( NEICOL / NPSTRNPACT-1 ) THEN
911 CALL SGESD2D( ICTXT, BM, 2*BW, AF( BBPTR+BW*LDBB ), LDBB,
914 CALL SGERV2D( ICTXT, BMN, 2*BW, AF( BBPTR+BM ), LDBB, 0,
917.EQ.
IF( NPACT2 ) THEN
921 CALL SLAMOV( 'g
', BMN, BW, AF( BBPTR+BM ), LDBB,
922 $ AF( BBPTR+2*BW*LDBB+BM ), LDBB )
933 NEICOL = MYCOL - NPSTR
935.EQ.
IF( NEICOL0 ) THEN
944 CALL SGESD2D( ICTXT, BM, 2*BW, AF( BBPTR+BW*LDBB ), LDBB, 0,
947 CALL SLAMOV( 'g
', BM, 2*BW, AF( BBPTR+BW*LDBB ), LDBB,
948 $ AF( BBPTR+BMN ), LDBB )
950 DO 130 J = BBPTR + 2*BW*LDBB, BBPTR + 3*BW*LDBB - 1, LDBB
951 DO 120 I = 0, LDBB - 1
956 CALL SGERV2D( ICTXT, BMN, 2*BW, AF( BBPTR+BW*LDBB ), LDBB,
959.EQ.
IF( NPACT2 ) THEN
963 CALL SLAMOV( 'g
', BM, BW, AF( BBPTR+BMN ), LDBB,
964 $ AF( BBPTR+2*BW*LDBB+BMN ), LDBB )
971.NE.
IF( NPACT2 ) THEN
973 CALL SGETRF( BM+BMN, BW, AF( BBPTR+BW*LDBB ), LDBB,
974 $ IPIV( LN+1 ), INFO )
978 DO 150 J = BBPTR, BBPTR + BW*LDBB - 1, LDBB
979 DO 140 I = 0, BM1 - 1
984 CALL SLASWP( BW, AF( BBPTR ), LDBB, 1, BW, IPIV( LN+1 ), 1 )
986 CALL STRSM( 'l
', 'l
', 'n
', 'u
', BW, BW, ONE,
987 $ AF( BBPTR+BW*LDBB ), LDBB, AF( BBPTR ), LDBB )
991 CALL SGEMM( 'n
', 'n', bm+bmn-bw, bw, bw, -one,
992 $ af( bbptr+bw*ldbb+bw ), ldbb, af( bbptr ), ldbb,
993 $ one, af( bbptr+bw ), ldbb )
999 CALL slaswp( nrhs, af( bbptr+2*bw*ldbb ), ldbb, 1, bw,
1002 CALL strsm(
'L',
'L',
'N',
'U', bw, nrhs, one,
1004 $ af( bbptr+2*bw*ldbb ), ldbb )
1008 CALL sgemm(
'N',
'N', bm+bmn-bw, nrhs, bw, -one,
1009 $ af( bbptr+bw*ldbb+bw ), ldbb,
1010 $ af( bbptr+2*bw*ldbb ), ldbb, one,
1011 $ af( bbptr+2*bw*ldbb+bw ), ldbb )
1016 IF( mod( mycol, 2*npstr ).EQ.0 )
THEN
1025 CALL slamov(
'G', bm, bw, af( bbptr+bw ), ldbb,
1026 $ af( bbptr+bw*ldbb ), ldbb )
1027 CALL slamov(
'G', bm, bw, af( bbptr+2*bw*ldbb+bw ), ldbb,
1028 $ af( bbptr+2*bw*ldbb ), ldbb )
1032 DO 170 j = 0, bw - 1
1033 DO 160 i = 0, bm - 1
1034 af( bbptr+2*bw*ldbb+bw+j*ldbb+i ) = zero
1044 CALL sgetrf( bm+bmn, bm+bmn, af( bbptr+bw*ldbb ), ldbb,
1045 $ ipiv( ln+1 ), info )
1054 npact = ( npact+1 ) / 2
1067 IF( ictxt.NE.ictxt_new )
THEN
1082 work( 1 ) = work_size_min
1086 CALL igamx2d( ictxt,
'A',
' ', 1, 1, info, 1, info, info,
1089 IF( mycol.EQ.0 )
THEN
1090 CALL igebs2d( ictxt,
'A',
' ', 1, 1, info, 1 )
1092 CALL igebr2d( ictxt,
'A',
' ', 1, 1, info, 1, 0, 0 )