1 SUBROUTINE pzdbtrf( N, BWL, BWU, A, JA, DESCA, AF, LAF, WORK,
9 INTEGER BWL, , INFO, JA, LAF, LWORK, N
13 COMPLEX*16 A( * ), AF( * ), WORK( * )
348 DOUBLE PRECISION ONE, ZERO
349 parameter( one = 1.0d+0 )
350 parameter( zero = 0.0d+0 )
351 COMPLEX*16 CONE, CZERO
352 parameter( cone = ( 1.0d+0, 0.0d+0 ) )
353 parameter( czero = ( 0.0d+0, 0.0d+0 ) )
355 parameter( int_one = 1 )
356 INTEGER DESCMULT, BIGNUM
357 parameter(descmult = 100, bignum = descmult * descmult)
358 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
359 $ lld_, mb_, m_, nb_, n_, rsrc_
360 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
361 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
362 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
365 INTEGER COMM_PROC, CSRC, FIRST_PROC, I, ICTXT,
366 $ ictxt_new, ictxt_save, idum3, ja_new, laf_min,
367 $ level_dist, llda, max_bw, mbw2, mycol, myrow,
368 $ my_num_cols, nb, next_tri_size_m,
369 $ next_tri_size_n, np, npcol, nprow, np_save,
370 $ odd_size, ofst, part_offset, part_size,
371 $ prev_tri_size_m, prev_tri_size_n, return_code,
372 $ store_n_a, up_prev_tri_size_m,
373 $ up_prev_tri_size_n, work_size_min, work_u
376 INTEGER DESCA_1XP( 7 ), PARAM_CHECK( 9, 3 )
388 EXTERNAL lsame, numroc
391 INTRINSIC ichar,
min, mod
406 IF( return_code .NE. 0)
THEN
407 info = -( 6*100 + 2 )
412 ictxt = desca_1xp( 2 )
413 csrc = desca_1xp( 5 )
415 llda = desca_1xp( 6 )
416 store_n_a = desca_1xp( 3 )
423 max_bw =
max(bwl,bwu)
424 mbw2 = max_bw * max_bw
431 IF( lwork .LT. -1)
THEN
433 ELSE IF ( lwork .EQ. -1 )
THEN
443 IF( n+ja-1 .GT. store_n_a )
THEN
447 IF(( bwl .GT. n-1 ) .OR.
448 $ ( bwl .LT. 0 ) )
THEN
452 IF(( bwu .GT. n-1 ) .OR.
453 $ ( bwu .LT. 0 ) )
THEN
457 IF( llda .LT. (bwl+bwu+1) )
THEN
458 info = -( 6*100 + 6 )
462 info = -( 6*100 + 4 )
467 IF( nprow .NE. 1 )
THEN
471 IF( n .GT. np*nb-mod( ja-1, nb ))
THEN
474 $
'PZDBTRF, D&C alg.: only 1 block per proc',
479 IF((ja+n-1.GT.nb) .AND. ( nb.LT.2*
max(bwl,bwu) ))
THEN
482 $
'PZDBTRF, D&C alg.: NB too small',
490 laf_min = nb*(bwl+bwu
492 IF( laf .LT. laf_min )
THEN
497 $
'PZDBTRF: auxiliary storage error ',
504 work_size_min =
max(bwl,bwu)*
max(bwl,bwu)
506 work( 1 ) = work_size_min
508 IF( lwork .LT. work_size_min )
THEN
509 IF( lwork .NE. -1 )
THEN
512 $
'PZDBTRF: worksize error ',
520 param_check( 9, 1 ) = desca(5)
521 param_check( 8, 1 ) = desca(4)
522 param_check( 7, 1 ) = desca(3)
523 param_check( 6, 1 ) = desca(1)
524 param_check( 5, 1 ) = ja
525 param_check( 4, 1 ) = bwu
526 param_check( 3, 1 ) = bwl
527 param_check( 2, 1 ) = n
528 param_check( 1, 1 ) = idum3
530 param_check( 9, 2 ) = 605
531 param_check( 8, 2 ) = 604
532 param_check( 7, 2 ) = 603
533 param_check( 6, 2 ) = 601
534 param_check( 5, 2 ) = 5
535 param_check( 4, 2 ) = 3
537 param_check( 2, 2 ) = 1
538 param_check( 1, 2 ) = 10
546 ELSE IF( info.LT.-descmult )
THEN
549 info = -info * descmult
554 CALL globchk( ictxt, 9, param_check, 9,
555 $ param_check( 1, 3 ), info )
560 IF( info.EQ.bignum )
THEN
562 ELSE IF( mod( info, descmult ) .EQ. 0 )
THEN
563 info = -info / descmult
569 CALL pxerbla( ictxt,
'PZDBTRF', -info )
582 part_offset = nb*( (ja-1)/(npcol*nb) )
584 IF ( (mycol-csrc) .LT. (ja-part_offset-1)/nb
THEN
585 part_offset = part_offset + nb
588 IF ( mycol .LT. csrc )
THEN
589 part_offset = part_offset - nb
598 first_proc = mod( ( ja-1 )/nb+csrc, npcol )
602 ja_new = mod( ja-1, nb ) + 1
607 np = ( ja_new+n-2 )/nb + 1
611 CALL reshape( ictxt, int_one, ictxt_new, int_one,
612 $ first_proc, int_one, np )
618 desca_1xp( 2 ) = ictxt_new
626 IF( myrow .LT. 0 )
THEN
639 my_num_cols = numroc( n, part_size, mycol, 0, npcol )
643 IF ( mycol .EQ. 0 )
THEN
644 part_offset = part_offset+mod( ja_new-1, part_size )
645 my_num_cols = my_num_cols - mod(ja_new-1, part_size )
650 ofst = part_offset*llda
654 odd_size = my_num_cols
655 IF ( mycol .LT. np-1 )
THEN
656 odd_size = odd_size - max_bw
661 work_u = bwu*odd_size + 3*mbw2
672 DO 20 i=1, work_size_min
686 IF( mycol .GT. 0 )
THEN
687 prev_tri_size_m=
min( bwl,
688 $ numroc( n, part_size, mycol, 0, npcol ) )
689 prev_tri_size_n=
min( bwl,
690 $ numroc( n, part_size, mycol-1, 0, npcol ) )
693 IF( mycol .GT. 0 )
THEN
694 up_prev_tri_size_m=
min( bwu,
695 $ numroc( n, part_size, mycol, 0, npcol ) )
696 up_prev_tri_size_n=
min( bwu,
697 $ numroc( n, part_size, mycol-1, 0, npcol ) )
700 IF( mycol .LT. npcol-1 )
THEN
701 next_tri_size_m=
min( bwl,
702 $ numroc( n, part_size, mycol+1, 0, npcol ) )
703 next_tri_size_n=
min( bwl,
704 $ numroc( n, part_size, mycol, 0, npcol ) )
707 IF ( mycol .LT. np-1 )
THEN
713 CALL ztrsd2d( ictxt,
'U',
'N', next_tri_size_m,
715 $ a( ofst+(my_num_cols-bwl)*llda+(bwl+bwu+1) ),
716 $ llda-1, 0, mycol+1 )
723 CALL zdbtrf( odd_size, odd_size, bwl, bwu, a( ofst + 1),
732 IF ( mycol .LT. np-1 )
THEN
740 $ a(( ofst+(bwl+bwu+1)+(odd_size-bwl)*llda )),
741 $ llda-1, af( odd_size*bwu+2*mbw2+1+max_bw-bwl ),
743 CALL zlamov(
'L', bwu, bwu, a( ( ofst+1+odd_size*llda ) ),
745 $ af( work_u+odd_size*bwl+2*mbw2+1+max_bw-bwu ),
750 CALL ztbtrs(
'L',
'N',
'U', bwu, bwl, bwu,
751 $ a( ofst+bwu+1+(odd_size-bwu )*llda ), llda,
757 CALL ztbtrs(
'U',
'C',
'N', bwl, bwu, bwl,
758 $ a( ofst+1+(odd_size-bwl)*llda ), llda,
759 $ af( odd_size*bwu+2*mbw2+1+max_bw-bwl ), max_bw,
766 $ af( odd_size*bwu+2*mbw2+1+max_bw-bwl ), max_bw,
767 $ a(( ofst+(bwl+bwu+1)+(odd_size-bwl)*llda )),
772 CALL zlamov(
'L', bwu, bwu,
774 $ max_bw, a(( ofst+1+odd_size*llda )), llda-1 )
784 CALL zgemm(
'C',
'N', max_bw, max_bw, max_bw, -cone ,
785 $ af( odd_size*bwu+2*mbw2+1), max_bw,
786 $ af( work_u+odd_size*bwl+2*mbw2+1), max_bw, cone,
787 $ a( ofst+odd_size*llda+1+bwu ), llda-1 )
796 IF ( mycol .NE. 0 )
THEN
806 CALL ztrrv2d( ictxt,
'U',
'N', prev_tri_size_m,
807 $ prev_tri_size_n, af( work_u+1 ), odd_size, 0,
814 CALL ztbtrs(
'L',
'N', 'u
', ODD_SIZE, BWL, BWL,
815 $ A( OFST + BWU+1 ), LLDA, AF( WORK_U+1 ),
824 CALL ZLATCPY( 'l
', UP_PREV_TRI_SIZE_N, UP_PREV_TRI_SIZE_M,
825 $ A( OFST+1 ), LLDA-1, AF( 1 ), ODD_SIZE )
827 CALL ZTBTRS( 'u
', 'c
', 'n
', ODD_SIZE, BWU, BWU,
828 $ A( OFST + 1 ), LLDA,
829 $ AF( 1 ), ODD_SIZE, INFO )
838 AF( ODD_SIZE*BWU+2*MBW2+I ) = CZERO
841 CALL ZGEMM( 'c
', 'n
', BWU, BWL, ODD_SIZE,
842 $ -CONE, AF( 1 ), ODD_SIZE,
843 $ AF( WORK_U+1 ), ODD_SIZE, CZERO,
844 $ AF( 1+MAX(0,BWL-BWU)+ODD_SIZE*BWU+
845 $ (2*MAX_BW+MAX(0,BWU-BWL))*MAX_BW),
852 CALL ZGESD2D( ICTXT, MAX_BW, MAX_BW,
853 $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW, 0,
857.LT.
IF ( MYCOL NP-1 ) THEN
868 CALL ZLATCPY( 'n
', BWL, BWL,
869 $ AF( WORK_U+ODD_SIZE-BWL+1 ), ODD_SIZE,
870 $ AF( (ODD_SIZE)*BWU+1+(MAX_BW-BWL) ),
873 CALL ZTRMM( 'r
', 'u
', 'c
', 'n
', BWL, BWL, -CONE,
874 $ A( ( OFST+(BWL+BWU+1)+(ODD_SIZE-BWL)*LLDA ) ),
875 $ LLDA-1, AF( (ODD_SIZE)*BWU+1+(MAX_BW-BWL) ),
884 CALL ZLATCPY( 'n
', BWU, BWU,
885 $ AF( ODD_SIZE-BWU+1 ), ODD_SIZE,
886 $ AF( WORK_U+(ODD_SIZE)*BWL+1+MAX_BW-BWU ),
889 CALL ZTRMM( 'r
', 'l
', 'n
', 'n
', BWU, BWU, -CONE,
890 $ A( ( OFST+1+ODD_SIZE*LLDA ) ), LLDA-1,
891 $ AF( WORK_U+(ODD_SIZE)*BWL+1+MAX_BW-BWU ),
905 CALL IGAMX2D( ICTXT, 'a
', ' ', 1, 1, INFO, 1, INFO, INFO,
908.EQ.
IF( MYCOL0 ) THEN
909 CALL IGEBS2D( ICTXT, 'a
', ' ', 1, 1, INFO, 1 )
911 CALL IGEBR2D( ICTXT, 'a
', ' ', 1, 1, INFO, 1, 0, 0 )
914.NE.
IF ( INFO0 ) THEN
929.EQ.
IF( MYCOL NPCOL-1 ) THEN
937.EQ..AND..GT.
IF( (MOD( MYCOL+1, 2 ) 0) ( MYCOL 0 ) ) THEN
939 CALL ZGESD2D( ICTXT, MAX_BW, MAX_BW,
940 $ AF( ODD_SIZE*BWU+1 ),
941 $ MAX_BW, 0, MYCOL-1 )
943 CALL ZGESD2D( ICTXT, MAX_BW, MAX_BW,
944 $ AF( WORK_U+ODD_SIZE*BWL+1 ),
945 $ MAX_BW, 0, MYCOL-1 )
952 CALL ZLAMOV( 'n
', MAX_BW, MAX_BW,
953 $ A( OFST+ODD_SIZE*LLDA+BWU+1 ),
954 $ LLDA-1, AF( ODD_SIZE*BWU+MBW2+1 ),
959.LT.
IF( MYCOL NPCOL-1 ) THEN
961 CALL ZGERV2D( ICTXT, MAX_BW, MAX_BW,
962 $ AF( ODD_SIZE*BWU+2*MBW2+1 ),
968 CALL ZAXPY( MBW2, CONE,
969 $ AF( ODD_SIZE*BWU+2*MBW2+1 ),
970 $ 1, AF( ODD_SIZE*BWU+MBW2+1 ), 1 )
985.NE.
IF( MOD( (MYCOL+1)/LEVEL_DIST, 2) 0 ) GOTO 11
989.GE.
IF( MYCOL-LEVEL_DIST 0 ) THEN
990 CALL ZGERV2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ),
991 $ MAX_BW, 0, MYCOL-LEVEL_DIST )
993 CALL ZAXPY( MBW2, CONE, WORK( 1 ), 1,
994 $ AF( ODD_SIZE*BWU+MBW2+1 ), 1 )
1000.LT.
IF( MYCOL+LEVEL_DIST NPCOL-1 ) THEN
1001 CALL ZGERV2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ),
1002 $ MAX_BW, 0, MYCOL+LEVEL_DIST )
1004 CALL ZAXPY( MBW2, CONE, WORK( 1 ), 1,
1005 $ AF( ODD_SIZE*BWU+MBW2+1 ), 1 )
1009 LEVEL_DIST = LEVEL_DIST*2
1021 CALL ZDBTRF( MAX_BW, MAX_BW, MIN( MAX_BW-1, BWL ),
1022 $ MIN( MAX_BW-1, BWU ), AF( ODD_SIZE*BWU+MBW2+1
1023 $ -( MIN( MAX_BW-1, BWU ))), MAX_BW+1, INFO )
1025.NE.
IF( INFO0 ) THEN
1026 INFO = NPCOL + MYCOL
1034.EQ.
IF( LEVEL_DIST 1 )THEN
1035 COMM_PROC = MYCOL + 1
1040 CALL ZLAMOV( 'n
', MAX_BW, MAX_BW, AF( ODD_SIZE*BWU+1 ),
1041 $ MAX_BW, AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ),
1044 CALL ZLAMOV( 'n
', MAX_BW, MAX_BW, AF( WORK_U+ODD_SIZE*BWL+1 ),
1045 $ MAX_BW, AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW )
1048 COMM_PROC = MYCOL + LEVEL_DIST/2
1051.LE.
IF( MYCOL/LEVEL_DIST (NPCOL-1)/LEVEL_DIST-2 )THEN
1053 CALL ZGERV2D( ICTXT, MAX_BW, MAX_BW,
1054 $ AF( ODD_SIZE*BWU+1 ),
1055 $ MAX_BW, 0, COMM_PROC )
1057 CALL ZGERV2D( ICTXT, MAX_BW, MAX_BW,
1058 $ AF( WORK_U+ODD_SIZE*BWL+1 ),
1059 $ MAX_BW, 0, COMM_PROC )
1061.EQ.
IF( INFO 0 ) THEN
1068 $ 'l
', 'n
', 'u
', BWU, MIN( BWL, BWU-1 ), BWU,
1070 $ MBW2+1+(MAX_BW+1)*(MAX_BW-BWU)), MAX_BW+1,
1071 $ AF( WORK_U+ODD_SIZE*BWL+1+MAX_BW-BWU), MAX_BW, INFO )
1077 $ 'u
', 'c
', 'n
', BWL, MIN( BWU, BWL-1 ), BWL,
1079 $ MBW2+1-MIN( BWU, BWL-1 )+(MAX_BW+1)*(MAX_BW-BWL)), MAX_BW+1,
1080 $ AF( ODD_SIZE*BWU+1+MAX_BW-BWL), MAX_BW, INFO )
1087 CALL ZGEMM( 'c
', 'n
', MAX_BW, MAX_BW, MAX_BW, -CONE,
1088 $ AF( (ODD_SIZE)*BWU+1 ), MAX_BW,
1089 $ AF( WORK_U+(ODD_SIZE)*BWL+1 ), MAX_BW, CZERO,
1090 $ WORK( 1 ), MAX_BW )
1094 CALL ZGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW,
1095 $ 0, MYCOL+LEVEL_DIST )
1105.GT..AND.
IF( (MYCOL/LEVEL_DIST 0 )
1106.LE.
$ ( MYCOL/LEVEL_DIST (NPCOL-1)/LEVEL_DIST-1 ) ) THEN
1108.GT.
IF( LEVEL_DIST 1)THEN
1113 CALL ZGERV2D( ICTXT, MAX_BW, MAX_BW,
1114 $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ),
1115 $ MAX_BW, 0, MYCOL-LEVEL_DIST/2 )
1120 CALL ZGERV2D( ICTXT, MAX_BW, MAX_BW,
1121 $ AF( ODD_SIZE*BWU+2*MBW2+1 ),
1122 $ MAX_BW, 0, MYCOL-LEVEL_DIST/2 )
1127.EQ.
IF( INFO0 ) THEN
1134 CALL ZLATCPY( 'n
', MAX_BW, MAX_BW, AF( WORK_U+ODD_SIZE*BWL+
1135 $ 2*MBW2+1), MAX_BW, WORK( 1 ), MAX_BW )
1138 $ 'l
', 'n
', 'u
', MAX_BW, MIN( BWL, MAX_BW-1 ), BWL,
1139 $ AF( ODD_SIZE*BWU+MBW2+1), MAX_BW+1,
1140 $ WORK( 1+MAX_BW*(MAX_BW-BWL) ), MAX_BW, INFO )
1145 $ 'n
', MAX_BW, MAX_BW, WORK( 1 ), MAX_BW,
1146 $ AF( WORK_U+ODD_SIZE*BWL+
1147 $ 2*MBW2+1), MAX_BW )
1153 CALL ZLATCPY( 'n
', MAX_BW, MAX_BW, AF( ODD_SIZE*BWU+
1154 $ 2*MBW2+1), MAX_BW, WORK( 1 ), MAX_BW )
1157 $ 'u
', 'c
', 'n
', MAX_BW, MIN( BWU, MAX_BW-1 ), BWU,
1158 $ AF( ODD_SIZE*BWU+MBW2+1-MIN( BWU, MAX_BW-1 )), MAX_BW+1,
1159 $ WORK( 1+MAX_BW*(MAX_BW-BWU) ), MAX_BW, INFO )
1164 $ 'n
', MAX_BW, MAX_BW, WORK( 1 ), MAX_BW,
1166 $ 2*MBW2+1), MAX_BW )
1175 CALL ZGEMM( 'n
', 'c
', MAX_BW, MAX_BW, MAX_BW, -CONE,
1176 $ AF( (ODD_SIZE)*BWU+2*MBW2+1 ), MAX_BW,
1177 $ AF( WORK_U+(ODD_SIZE)*BWL+2*MBW2+1 ), MAX_BW,
1178 $ CZERO, WORK( 1 ), MAX_BW )
1182 CALL ZGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW,
1183 $ 0, MYCOL-LEVEL_DIST )
1187.LE.
IF( MYCOL/LEVEL_DIST (NPCOL-1)/LEVEL_DIST-2 ) THEN
1191.EQ.
IF( ( MOD( MYCOL/( 2*LEVEL_DIST ),2 )) 0 ) THEN
1192 COMM_PROC = MYCOL + LEVEL_DIST
1194 COMM_PROC = MYCOL - LEVEL_DIST
1201 CALL ZGEMM( 'n
', 'n
', MAX_BW, MAX_BW, MAX_BW, -CONE,
1202 $ AF( WORK_U+ODD_SIZE*BWL+2*MBW2+1 ), MAX_BW,
1203 $ AF( ODD_SIZE*BWU+1 ), MAX_BW, CZERO,
1204 $ WORK( 1 ), MAX_BW )
1208 CALL ZGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW,
1211 CALL ZGEMM( 'n
', 'n
', MAX_BW, MAX_BW, MAX_BW, -CONE,
1212 $ AF( ODD_SIZE*BWU+2*MBW2+1 ), MAX_BW,
1213 $ AF( WORK_U+ODD_SIZE*BWL+1 ), MAX_BW, CZERO,
1214 $ WORK( 1 ), MAX_BW )
1218 CALL ZGESD2D( ICTXT, MAX_BW, MAX_BW, WORK( 1 ), MAX_BW,
1234.NE.
IF( ICTXT_SAVE ICTXT_NEW ) THEN
1235 CALL BLACS_GRIDEXIT( ICTXT_NEW )
1247 WORK( 1 ) = WORK_SIZE_MIN
1251 CALL IGAMX2D( ICTXT, 'a
', ' ', 1, 1, INFO, 1, INFO, INFO,
1254.EQ.
IF( MYCOL0 ) THEN
1255 CALL IGEBS2D( ICTXT, 'a
', ' ', 1, 1, INFO, 1 )
1257 CALL IGEBR2D( ICTXT, 'a
', ' ', 1, 1, INFO, 1, 0, 0 )