1 SUBROUTINE psdbtrf( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK,
9 INTEGER BWL, BWU, INFO, JA, LAF, LWORK, N
13 REAL A( * ), ( * ), WORK( * )
343 parameter( one = 1.0e+0 )
345 parameter( zero = 0.0e+0 )
347 parameter( int_one = 1 )
348 INTEGER DESCMULT, BIGNUM
349 parameter( descmult = 100, bignum = descmult*descmult )
350 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
351 $ lld_, mb_, m_, nb_, n_, rsrc_
352 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
353 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
354 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
357 INTEGER , CSRC, FIRST_PROC, I, I1, I2, ICTXT,
358 $ ictxt_new, ictxt_save, idum3, ja_new, laf_min,
359 $ level_dist, llda, max_bw, mbw2, mycol, myrow,
360 $ my_num_cols, nb, next_tri_size_m,
361 $ next_tri_size_n, np, npcol, nprow, np_save,
362 $ odd_size, ofst, part_offset, part_size,
363 $ prev_tri_size_m, prev_tri_size_n, return_code,
364 $ store_n_a, up_prev_tri_size_m,
365 $ up_prev_tri_size_n, work_size_min, work_u
368 INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 )
374 $ strsd2d,
globchk, igamx2d, igebr2d, igebs2d,
397 IF( return_code.NE.0 )
THEN
403 ictxt = desca_1xp( 2 )
404 csrc = desca_1xp( 5 )
406 llda = desca_1xp( 6 )
407 store_n_a = desca_1xp( 3 )
414 max_bw =
max( bwl, bwu )
422 IF( lwork.LT.-1 )
THEN
424 ELSE IF( lwork.EQ.-1 )
THEN
434 IF( n+ja-1.GT.store_n_a )
THEN
438 IF( ( bwl.GT.n-1 ) .OR. ( bwl.LT.0 ) )
THEN
442 IF( ( bwu.GT.n-1 ) .OR. ( bwu.LT.0 ) )
THEN
446 IF( llda.LT.( bwl+bwu+1 ) )
THEN
456 IF( nprow.NE.1 )
THEN
460 IF( n.GT.np*nb-mod( ja-1, nb ) )
THEN
462 CALL pxerbla( ictxt,
'PSDBTRF, D&C alg.: only 1 block per proc'
467 IF( ( ja+n-1.GT.nb ) .AND. ( nb.LT.2*
max( bwl, bwu ) ) )
THEN
476 LAF_MIN = NB*( BWL+BWU ) + 6*MAX( BWL, BWU )*MAX( BWL, BWU )
478.LT.
IF( LAFLAF_MIN ) THEN
482 CALL PXERBLA( ICTXT, 'psdbtrf: auxiliary storage error
',
489 WORK_SIZE_MIN = MAX( BWL, BWU )*MAX( BWL, BWU )
491 WORK( 1 ) = WORK_SIZE_MIN
493.LT.
IF( LWORKWORK_SIZE_MIN ) THEN
494.NE.
IF( LWORK-1 ) THEN
496 CALL PXERBLA( ICTXT, 'psdbtrf: worksize error
', -INFO )
503 PARAM_CHECK( 9, 1 ) = DESCA( 5 )
504 PARAM_CHECK( 8, 1 ) = DESCA( 4 )
505 PARAM_CHECK( 7, 1 ) = DESCA( 3 )
506 PARAM_CHECK( 6, 1 ) = DESCA( 1 )
507 PARAM_CHECK( 5, 1 ) = JA
508 PARAM_CHECK( 4, 1 ) = BWU
509 PARAM_CHECK( 3, 1 ) = BWL
510 PARAM_CHECK( 2, 1 ) = N
511 PARAM_CHECK( 1, 1 ) = IDUM3
513 PARAM_CHECK( 9, 2 ) = 605
514 PARAM_CHECK( 8, 2 ) = 604
515 PARAM_CHECK( 7, 2 ) = 603
516 PARAM_CHECK( 6, 2 ) = 601
517 PARAM_CHECK( 5, 2 ) = 5
518 PARAM_CHECK( 4, 2 ) = 3
519 PARAM_CHECK( 3, 2 ) = 2
520 PARAM_CHECK( 2, 2 ) = 1
521 PARAM_CHECK( 1, 2 ) = 10
529.LT.
ELSE IF( INFO-DESCMULT ) THEN
532 INFO = -INFO*DESCMULT
537 CALL GLOBCHK( ICTXT, 9, PARAM_CHECK, 9, PARAM_CHECK( 1, 3 ),
543.EQ.
IF( INFOBIGNUM ) THEN
545.EQ.
ELSE IF( MOD( INFO, DESCMULT )0 ) THEN
546 INFO = -INFO / DESCMULT
552 CALL PXERBLA( ICTXT, 'psdbtrf', -INFO )
565 PART_OFFSET = NB*( ( JA-1 ) / ( NPCOL*NB ) )
567.LT.
IF( ( MYCOL-CSRC )( JA-PART_OFFSET-1 ) / NB ) THEN
568 PART_OFFSET = PART_OFFSET + NB
571.LT.
IF( MYCOLCSRC ) THEN
572 PART_OFFSET = PART_OFFSET - NB
581 FIRST_PROC = MOD( ( JA-1 ) / NB+CSRC, NPCOL )
585 JA_NEW = MOD( JA-1, NB ) + 1
590 NP = ( JA_NEW+N-2 ) / NB + 1
594 CALL RESHAPE( ICTXT, INT_ONE, ICTXT_NEW, INT_ONE, FIRST_PROC,
601 DESCA_1XP( 2 ) = ICTXT_NEW
605 CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
609.LT.
IF( MYROW0 ) THEN
622 MY_NUM_COLS = NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL )
626.EQ.
IF( MYCOL0 ) THEN
627 PART_OFFSET = PART_OFFSET + MOD( JA_NEW-1, PART_SIZE )
628 MY_NUM_COLS = MY_NUM_COLS - MOD( JA_NEW-1, PART_SIZE )
633 OFST = PART_OFFSET*LLDA
637 ODD_SIZE = MY_NUM_COLS
638.LT.
IF( MYCOLNP-1 ) THEN
639 ODD_SIZE = ODD_SIZE - MAX_BW
644 WORK_U = BWU*ODD_SIZE + 3*MBW2
655 DO 20 I = 1, WORK_SIZE_MIN
669.GT.
IF( MYCOL0 ) THEN
670 PREV_TRI_SIZE_M = MIN( BWL, NUMROC( N, PART_SIZE, MYCOL, 0,
672 PREV_TRI_SIZE_N = MIN( BWL, NUMROC( N, PART_SIZE, MYCOL-1, 0,
676.GT.
IF( MYCOL0 ) THEN
677 UP_PREV_TRI_SIZE_M = MIN( BWU,
678 $ NUMROC( N, PART_SIZE, MYCOL, 0, NPCOL ) )
679 UP_PREV_TRI_SIZE_N = MIN( BWU,
680 $ NUMROC( N, PART_SIZE, MYCOL-1, 0,
684.LT.
IF( MYCOLNPCOL-1 ) THEN
685 NEXT_TRI_SIZE_M = MIN( BWL, NUMROC( N, PART_SIZE, MYCOL+1, 0,
687 NEXT_TRI_SIZE_N = MIN( BWL, NUMROC( N, PART_SIZE, MYCOL, 0,
691.LT.
IF( MYCOLNP-1 ) THEN
697 CALL STRSD2D( ICTXT, 'u
', 'n
', NEXT_TRI_SIZE_M,
698 $ NEXT_TRI_SIZE_N, A( OFST+( MY_NUM_COLS-BWL )*
699 $ LLDA+( BWL+BWU+1 ) ), LLDA-1, 0, MYCOL+1 )
706 CALL SDBTRF( ODD_SIZE, ODD_SIZE, BWL, BWU, A( OFST+1 ), LLDA,
715.LT.
IF( MYCOLNP-1 ) THEN
722 CALL SLATCPY( 'u
', BWL, BWL, A( ( OFST+( BWL+BWU+1 )+
723 $ ( ODD_SIZE-BWL )*LLDA ) ), LLDA-1,
724 $ AF( ODD_SIZE*BWU+2*MBW2+1+MAX_BW-BWL ), MAX_BW )
725 CALL SLAMOV( 'l
', BWU, BWU, A( ( OFST+1+ODD_SIZE*LLDA ) ),
726 $ LLDA-1, AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1+MAX_BW-
731 CALL STBTRS( 'l
', 'n
', 'u
', BWU, BWL, BWU,
732 $ A( OFST+BWU+1+( ODD_SIZE-BWU )*LLDA ), LLDA,
733 $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1+MAX_BW-BWU ),
738 CALL STBTRS( 'u
', 't
', 'n
', BWL, BWU, BWL,
739 $ A( OFST+1+( ODD_SIZE-BWL )*LLDA ), LLDA,
740 $ AF( ODD_SIZE*BWU+2*MBW2+1+MAX_BW-BWL ), MAX_BW,
746 CALL SLATCPY( 'l
', BWL, BWL, AF( ODD_SIZE*BWU+2*MBW2+1+MAX_BW-
747 $ BWL ), MAX_BW, A( ( OFST+( BWL+BWU+1 )+
748 $ ( ODD_SIZE-BWL )*LLDA ) ), LLDA-1 )
752 CALL SLAMOV( 'l
', BWU, BWU, AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1+
753 $ MAX_BW-BWU ), MAX_BW, A( ( OFST+1+ODD_SIZE*
764 CALL SGEMM( 't',
'N', max_bw, max_bw, max_bw, -one,
765 $ af( odd_size*bwu+2*mbw2+1 ), max_bw,
767 $ a( ofst+odd_size*llda+1+bwu ), llda-1 )
776 IF( mycol.NE.0 )
THEN
786 CALL strrv2d( ictxt,
'U',
'N', prev_tri_size_m,
787 $ prev_tri_size_n, af( work_u+1 ), bwl, 0,
797 DO 40 i2 = i1 + 1, bwl
798 af( work_u+i2+( i1-1 )*bwl ) = af( work_u+i1+( i2-1 )*
800 af( work_u+i1+( i2-1 )*bwl ) = zero
804 DO 60 i1 = 2, odd_size
805 i2 =
min( i1-1, bwl )
806 CALL sgemv(
'N', bwl, i2, -one,
807 $ af( work_u+1+( i1-1-i2 )*bwl ), bwl,
808 $ a( ofst+bwu+1+i2+( i1-1-i2 )*llda ), llda-1,
809 $ one, af( work_u+1+( i1-1 )*bwl ), 1 )
818 CALL slamov(
'L', up_prev_tri_size_n, up_prev_tri_size_m,
819 $ a( ofst+1 ), llda-1, af( 1 ), bwu )
821 DO 80 i1 = 1, odd_size
822 i2 =
min( bwu, i1-1 )
823 CALL sgemv(
'N', bwu, i2, -one, af( ( i1-1-i2 )*bwu+1 ),
824 $ bwu, a( ofst+bwu+1-i2+( i1-1 )*llda ), 1,
825 $ one, af( ( i1-1 )*bwu+1 ), 1 )
828 af( ( i1-1 )*bwu+i ) = af( ( i1-1 )*bwu+i ) /
829 $ a( ( i1-1 )*llda+bwu+1 )
839 af( odd_size*bwu+2*mbw2+i ) = zero
842 CALL sgemm(
'N',
'T', bwu, bwl, odd_size, -one, af( 1 ),
843 $ bwu, af( work_u+1 ), bwl, zero,
844 $ af( 1+
max( 0, bwl-bwu )+odd_size*bwu+( 2*max_bw+
845 $
max( 0, bwu-bwl ) )*max_bw ), max_bw )
851 CALL sgesd2d( ictxt, max_bw, max_bw,
852 $ af( odd_size*bwu+2*mbw2+1 ), max_bw, 0,
856 IF( mycol.LT.np-1 )
THEN
867 CALL slamov(
'N', bwl, bwl,
868 $ af( work_u+( odd_size-bwl )*bwl+1 ), bwl,
869 $ af( ( odd_size )*bwu+1+( max_bw-bwl ) ),
872 CALL strmm(
'R',
'U',
'T',
'N', bwl, bwl, -one,
873 $ a( ( ofst+( bwl+bwu+1 )+( odd_size-bwl )*
874 $ llda ) ), llda-1, af( ( odd_size )*bwu+1+
875 $ ( max_bw-bwl ) ), max_bw )
883 CALL slamov(
'N', bwu, bwu, af( ( odd_size-bwu )*bwu+1 ),
884 $ bwu, af( work_u+( odd_size )*bwl+1+max_bw-
887 CALL strmm(
'R',
'L',
'N',
'N', bwu, bwu, -one,
888 $ a( ( ofst+1+odd_size*llda ) ), llda-1,
889 $ af( work_u+( odd_size )*bwl+1+max_bw-bwu ),
903 CALL igamx2d( ictxt,
'A',
' ', 1, 1, info, 1, info, info, -1, 0,
906 IF( mycol.EQ.0 )
THEN
907 CALL igebs2d( ictxt,
'A',
' ', 1, 1, info, 1 )
909 CALL igebr2d( ictxt,
'A',
' ', 1, 1, info, 1, 0, 0 )
927 IF( mycol.EQ.npcol-1 )
THEN
935 IF( ( mod( mycol+1, 2 ).EQ.0 ) .AND. ( mycol.GT.0 ) )
THEN
937 CALL sgesd2d( ictxt, max_bw, max_bw, af( odd_size*bwu+1 ),
938 $ max_bw, 0, mycol-1 )
940 CALL sgesd2d( ictxt, max_bw, max_bw,
941 $ af( work_u+odd_size*bwl+1 ), max_bw, 0, mycol-1 )
948 CALL slamov(
'N', max_bw, max_bw, a( ofst+odd_size*llda+bwu+1 ),
949 $ llda-1, af( odd_size*bwu+mbw2+1 ), max_bw )
953 IF( mycol.LT.npcol-1 )
THEN
955 CALL sgerv2d( ictxt, max_bw, max_bw,
956 $ af( odd_size*bwu+2*mbw2+1 ), max_bw, 0, mycol+1 )
960 CALL saxpy( mbw2, one, af( odd_size*bwu+2*mbw2+1 ), 1,
961 $ af( odd_size*bwu+mbw2+1 ), 1 )
976 IF( mod( ( mycol+1 ) / level_dist, 2 ).NE.0 )
981 IF( mycol-level_dist.GE.0 )
THEN
982 CALL sgerv2d( ictxt, max_bw, max_bw, work( 1 ), max_bw, 0,
985 CALL saxpy( mbw2, one, work( 1 ), 1, af( odd_size*bwu+mbw2+1 ),
992 IF( mycol+level_dist.LT.npcol-1 )
THEN
993 CALL sgerv2d( ictxt, max_bw, max_bw, work( 1 ), max_bw, 0,
996 CALL saxpy( mbw2, one, work( 1 ), 1, af( odd_size*bwu+mbw2+1 ),
1001 level_dist = level_dist*2
1013 CALL sdbtrf( max_bw, max_bw,
min( max_bw-1, bwl ),
1014 $
min( max_bw-1, bwu ), af( odd_size*bwu+mbw2+1-
1015 $ (
min( max_bw-1, bwu ) ) ), max_bw+1, info )
1017 IF( info.NE.0 )
THEN
1018 info = npcol + mycol
1026 IF( level_dist.EQ.1 )
THEN
1027 comm_proc = mycol + 1
1032 CALL slamov(
'N', max_bw, max_bw, af( odd_size*bwu+1 ), max_bw,
1033 $ af( work_u+odd_size*bwl+2*mbw2+1 ), max_bw )
1035 CALL slamov(
'N', max_bw, max_bw, af( work_u+odd_size*bwl+1 ),
1036 $ max_bw, af( odd_size*bwu+2*mbw2+1 ), max_bw )
1039 comm_proc = mycol + level_dist / 2
1042 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
1044 CALL sgerv2d( ictxt, max_bw, max_bw, af( odd_size*bwu+1 ),
1045 $ max_bw, 0, comm_proc )
1047 CALL sgerv2d( ictxt, max_bw, max_bw,
1048 $ af( work_u+odd_size*bwl+1 ), max_bw, 0,
1051 IF( info.EQ.0 )
THEN
1057 CALL stbtrs(
'L',
'N',
'U', bwu,
min( bwl, bwu-1 ), bwu,
1058 $ af( odd_size*bwu+mbw2+1+( max_bw+1 )*( max_bw-
1059 $ bwu ) ), max_bw+1, af( work_u+odd_size*bwl+1+
1060 $ max_bw-bwu ), max_bw, info )
1065 CALL stbtrs(
'U',
'T',
'N', bwl,
min( bwu, bwl-1 ), bwl,
1066 $ af( odd_size*bwu+mbw2+1-
min( bwu,
1067 $ bwl-1 )+( max_bw+1 )*( max_bw-bwl ) ),
1068 $ max_bw+1, af( odd_size*bwu+1+max_bw-bwl ),
1076 CALL sgemm(
'T',
'N', max_bw, max_bw, max_bw, -one,
1077 $ af( ( odd_size )*bwu+1 ), max_bw,
1078 $ af( work_u+( odd_size )*bwl+1 ), max_bw, zero,
1079 $ work( 1 ), max_bw )
1083 CALL sgesd2d( ictxt, max_bw, max_bw, work( 1 ), max_bw, 0,
1084 $ mycol+level_dist )
1094 IF( ( mycol / level_dist.GT.0 ) .AND.
1095 $ ( mycol / level_dist.LE.( npcol-1 ) / level_dist-1 ) )
THEN
1097 IF( level_dist.GT.1 )
THEN
1102 CALL sgerv2d( ictxt, max_bw, max_bw,
1103 $ af( work_u+odd_size*bwl+2*mbw2+1 ), max_bw, 0,
1104 $ mycol-level_dist / 2 )
1109 CALL sgerv2d( ictxt, max_bw, max_bw,
1110 $ af( odd_size*bwu+2*mbw2+1 ), max_bw, 0,
1111 $ mycol-level_dist / 2 )
1116 IF( info.EQ.0 )
THEN
1123 CALL slatcpy(
'N', max_bw, max_bw,
1124 $ af( work_u+odd_size*bwl+2*mbw2+1 ), max_bw,
1125 $ work( 1 ), max_bw )
1127 CALL stbtrs(
'L',
'N',
'U', max_bw,
min( bwl, max_bw-1 ),
1128 $ bwl, af( odd_size*bwu+mbw2+1 ), max_bw+1,
1129 $ work( 1+max_bw*( max_bw-bwl ) ), max_bw, info )
1133 CALL slatcpy(
'N', max_bw, max_bw, work( 1 ), max_bw,
1134 $ af( work_u+odd_size*bwl+2*mbw2+1 ), max_bw )
1140 CALL slatcpy(
'N', max_bw, max_bw,
1141 $ af( odd_size*bwu+2*mbw2+1 ), max_bw,
1142 $ work( 1 ), max_bw )
1144 CALL stbtrs(
'U',
'T',
'N', max_bw,
min( bwu, max_bw-1 ),
1145 $ bwu, af( odd_size*bwu+mbw2+1-
min( bwu,
1146 $ max_bw-1 ) ), max_bw+1,
1147 $ work( 1+max_bw*( max_bw-bwu ) ), max_bw, info )
1151 CALL slatcpy(
'N', max_bw, max_bw, work( 1 ), max_bw,
1152 $ af( odd_size*bwu+2*mbw2+1 ), max_bw )
1161 CALL sgemm(
'N',
'T', max_bw, max_bw, max_bw, -one,
1162 $ af( ( odd_size )*bwu+2*mbw2+1 ), max_bw,
1163 $ af( work_u+( odd_size )*bwl+2*mbw2+1 ), max_bw,
1164 $ zero, work( 1 ), max_bw )
1168 CALL sgesd2d( ictxt, max_bw, max_bw, work( 1 ), max_bw, 0,
1169 $ mycol-level_dist )
1173 IF( mycol / level_dist.LE.( npcol-1 ) / level_dist-2 )
THEN
1177 IF( ( mod( mycol / ( 2*level_dist ), 2 ) ).EQ.0 )
THEN
1178 comm_proc = mycol + level_dist
1180 comm_proc = mycol - level_dist
1187 CALL sgemm(
'N',
'N', max_bw, max_bw, max_bw, -one,
1188 $ af( work_u+odd_size*bwl+2*mbw2+1 ), max_bw,
1189 $ af( odd_size*bwu+1 ), max_bw, zero, work( 1 ),
1194 CALL sgesd2d( ictxt, max_bw, max_bw, work( 1 ), max_bw, 0,
1197 CALL sgemm(
'N',
'N', max_bw, max_bw, max_bw, -one,
1198 $ af( odd_size*bwu+2*mbw2+1 ), max_bw,
1199 $ af( work_u+odd_size*bwl+1 ), max_bw, zero,
1200 $ work( 1 ), max_bw )
1204 CALL sgesd2d( ictxt, max_bw, max_bw, work( 1 ), max_bw, 0,
1220 IF( ictxt_save.NE.ictxt_new )
THEN
1233 work( 1 ) = work_size_min
1237 CALL igamx2d( ictxt,
'A',
' ', 1, 1, info, 1, info, info, -1, 0,
1240 IF( mycol.EQ.0 )
THEN
1241 CALL igebs2d( ictxt,
'A',
' ', 1, 1, info, 1 )
1243 CALL igebr2d( ictxt,
'A',
' ', 1, 1, info, 1, 0, 0 )