Functions/Subroutines
subroutine	pclahqr (wantt, wantz, n, ilo, ihi, a, desca, w, iloz, ihiz, z, descz, work, lwork, iwork, ilwork, info)
Function/Subroutine Documentation

◆ pclahqr()

subroutine pclahqr	(	logical	wantt,
		logical	wantz,
		integer	n,
		integer	ilo,
		integer	ihi,
		complex, dimension( * )	a,
		integer, dimension( * )	desca,
		complex, dimension( * )	w,
		integer	iloz,
		integer	ihiz,
		complex, dimension( * )	z,
		integer, dimension( * )	descz,
		complex, dimension( * )	work,
		integer	lwork,
		integer, dimension( * )	iwork,
		integer	ilwork,
		integer	info )
Definition at line 1 of file pclahqr.f.
*
*  -- ScaLAPACK routine (version 1.7.3) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,
*     and University of California, Berkeley.
*     1.7.3: March   22, 2006
*            modification suggested by Mark Fahey and Greg Henry
*     1.7.0: July    31, 2001
*
*     .. Scalar Arguments ..
      LOGICAL            WANTT, WANTZ
      INTEGER            IHI, IHIZ, ILO, ILOZ, ILWORK, INFO, LWORK, N
*     ..
*     .. Array Arguments ..
      INTEGER            DESCA( * ), DESCZ( * ), IWORK( * )
      COMPLEX            A( * ), W( * ), WORK( * ), Z( * )
*     ..
*
*  Purpose
*  =======
*
*  PCLAHQR is an auxiliary routine used to find the Schur decomposition
*    and or eigenvalues of a matrix already in Hessenberg form from
*    cols ILO to IHI.
*  If Z = I, and WANTT=WANTZ=.TRUE., H gets replaced with Z'HZ,
*    with Z'Z=I, and H in Schur form.
*
*  Notes
*  =====
*
*  Each global data object is described by an associated description
*  vector.  This vector stores the information required to establish
*  the mapping between an object element and its corresponding process
*  and memory location.
*
*  Let A be a generic term for any 2D block cyclicly distributed array.
*  Such a global array has an associated description vector DESCA.
*  In the following comments, the character _ should be read as
*  "of the global array".
*
*  NOTATION        STORED IN      EXPLANATION
*  --------------- -------------- --------------------------------------
*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,
*                                 DTYPE_A = 1.
*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
*                                 the BLACS process grid A is distribu-
*                                 ted over. The context itself is glo-
*                                 bal, but the handle (the integer
*                                 value) may vary.
*  M_A    (global) DESCA( M_ )    The number of rows in the global
*                                 array A.
*  N_A    (global) DESCA( N_ )    The number of columns in the global
*                                 array A.
*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute
*                                 the rows of the array.
*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute
*                                 the columns of the array.
*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
*                                 row of the array A is distributed.
*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the
*                                 first column of the array A is
*                                 distributed.
*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local
*                                 array.  LLD_A >= MAX(1,LOCp(M_A)).
*
*  Let K be the number of rows or columns of a distributed matrix,
*  and assume that its process grid has dimension p x q.
*  LOCp( K ) denotes the number of elements of K that a process
*  would receive if K were distributed over the p processes of its
*  process column.
*  Similarly, LOCq( K ) denotes the number of elements of K that a
*  process would receive if K were distributed over the q processes of
*  its process row.
*  The values of LOCp() and LOCq() may be determined via a call to the
*  ScaLAPACK tool function, NUMROC:
*          LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
*          LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
*  An upper bound for these quantities may be computed by:
*          LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
*          LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
*
*  Arguments
*  =========
*
*  WANTT   (global input) LOGICAL
*          = .TRUE. : the full Schur form T is required;
*          = .FALSE.: only eigenvalues are required.
*
*  WANTZ   (global input) LOGICAL
*          = .TRUE. : the matrix of Schur vectors Z is required;
*          = .FALSE.: Schur vectors are not required.
*
*  N       (global input) INTEGER
*          The order of the Hessenberg matrix A (and Z if WANTZ).
*          N >= 0.
*
*  ILO     (global input) INTEGER
*  IHI     (global input) INTEGER
*          It is assumed that A is already upper quasi-triangular in
*          rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless
*          ILO = 1). PCLAHQR works primarily with the Hessenberg
*          submatrix in rows and columns ILO to IHI, but applies
*          transformations to all of H if WANTT is .TRUE..
*          1 <= ILO <= max(1,IHI); IHI <= N.
*
*  A       (global input/output) COMPLEX array, dimension
*          (DESCA(LLD_),*)
*          On entry, the upper Hessenberg matrix A.
*          On exit, if WANTT is .TRUE., A is upper triangular in rows
*          and columns ILO:IHI.  If WANTT is .FALSE., the contents of
*          A are unspecified on exit.
*
*  DESCA   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix A.
*
*  W      (global replicated output) COMPLEX array, dimension (N)
*          The computed eigenvalues ILO to IHI are stored in the
*          corresponding elements of W.  If WANTT is .TRUE., the
*          eigenvalues are stored in the same order as on the diagonal
*          of the Schur form returned in A.  A may be returned with
*          larger diagonal blocks until the next release.
*
*  ILOZ    (global input) INTEGER
*  IHIZ    (global input) INTEGER
*          Specify the rows of Z to which transformations must be
*          applied if WANTZ is .TRUE..
*          1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
*
*  Z       (global input/output) COMPLEX array.
*          If WANTZ is .TRUE., on entry Z must contain the current
*          matrix Z of transformations accumulated by PCHSEQR, and on
*          exit Z has been updated; transformations are applied only to
*          the submatrix Z(ILOZ:IHIZ,ILO:IHI).
*          If WANTZ is .FALSE., Z is not referenced.
*
*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.
*          The array descriptor for the distributed matrix Z.
*
*  WORK    (local output) COMPLEX array of size LWORK
*          (Unless LWORK=-1, in which case WORK must be at least size 1)
*
*  LWORK   (local input) INTEGER
*          WORK(LWORK) is a local array and LWORK is assumed big enough
*          so that LWORK >= 3*N +
*                MAX( 2*MAX(DESCZ(LLD_),DESCA(LLD_)) + 2*LOCq(N),
*                     7*Ceil(N/HBL)/LCM(NPROW,NPCOL)) +
*                MAX( 2*N, (8*LCM(NPROW,NPCOL)+2)**2 )
*          If LWORK=-1, then WORK(1) gets set to the above number and
*          the code returns immediately.
*
*  IWORK   (global and local input) INTEGER array of size ILWORK
*          This will hold some of the IBLK integer arrays.
*          This is held as a place holder for a future release.
*          Currently unreferenced.
*
*  ILWORK  (local input) INTEGER
*          This will hold the size of the IWORK array.
*          This is held as a place holder for a future release.
*          Currently unreferenced.
*
*  INFO    (global output) INTEGER
*          < 0: parameter number -INFO incorrect or inconsistent
*          = 0: successful exit
*          > 0: PCLAHQR failed to compute all the eigenvalues ILO to IHI
*               in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
*               elements i+1:ihi of W contains those eigenvalues
*               which have been successfully computed.
*
*  Logic:
*       This algorithm is very similar to SLAHQR.  Unlike SLAHQR,
*       instead of sending one double shift through the largest
*       unreduced submatrix, this algorithm sends multiple double shifts
*       and spaces them apart so that there can be parallelism across
*       several processor row/columns.  Another critical difference is
*       that this algorithm aggregrates multiple transforms together in
*       order to apply them in a block fashion.
*
*  Important Local Variables:
*       IBLK = The maximum number of bulges that can be computed.
*           Currently fixed.  Future releases this won't be fixed.
*       HBL  = The square block size (HBL=DESCA(MB_)=DESCA(NB_))
*       ROTN = The number of transforms to block together
*       NBULGE = The number of bulges that will be attempted on the
*           current submatrix.
*       IBULGE = The current number of bulges started.
*       K1(*),K2(*) = The current bulge loops from K1(*) to K2(*).
*
*  Subroutines:
*       From LAPACK, this routine calls:
*           CLAHQR     -> Serial QR used to determine shifts and
*                         eigenvalues
*           CLARFG     -> Determine the Householder transforms
*
*       This ScaLAPACK, this routine calls:
*           PCLACONSB  -> To determine where to start each iteration
*           CLAMSH     -> Sends multiple shifts through a small
*                         submatrix to see how the consecutive
*                         subdiagonals change (if PCLACONSB indicates
*                         we can start a run in the middle)
*           PCLAWIL    -> Given the shift, get the transformation
*           PCLACP3    -> Parallel array to local replicated array copy
*                         & back.
*           CLAREF     -> Row/column reflector applier.  Core routine
*                         here.
*           PCLASMSUB  -> Finds negligible subdiagonal elements.
*
*  Current Notes and/or Restrictions:
*       1.) This code requires the distributed block size to be square
*           and at least six (6); unlike simpler codes like LU, this
*           algorithm is extremely sensitive to block size.  Unwise
*           choices of too small a block size can lead to bad
*           performance.
*       2.) This code requires A and Z to be distributed identically
*           and have identical contxts.  A future version may allow Z to
*           have a different contxt to 1D row map it to all nodes (so no
*           communication on Z is necessary.)
*       3.) This code does not currently block the initial transforms
*           so that none of the rows or columns for any bulge are
*           completed until all are started.  To offset pipeline
*           start-up it is recommended that at least 2*LCM(NPROW,NPCOL)
*           bulges are used (if possible)
*       4.) The maximum number of bulges currently supported is fixed at
*           32.  In future versions this will be limited only by the
*           incoming WORK and IWORK array.
*       5.) The matrix A must be in upper Hessenberg form.  If elements
*           below the subdiagonal are nonzero, the resulting transforms
*           may be nonsimilar.  This is also true with the LAPACK
*           routine CLAHQR.
*       6.) For this release, this code has only been tested for
*           RSRC_=CSRC_=0, but it has been written for the general case.
*       7.) Currently, all the eigenvalues are distributed to all the
*           nodes.  Future releases will probably distribute the
*           eigenvalues by the column partitioning.
*       8.) The internals of this routine are subject to change.
*       9.) To optimize this for your architecture, try tuning CLAREF.
*       10.) This code has only been tested for WANTZ = .TRUE. and may
*           behave unpredictably for WANTZ set to .FALSE.
*
*  Further Details
*  ===============
*
*  Contributed by Mark Fahey, June, 2000.
*
*  =====================================================================
*
*     .. Parameters ..
      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_,
     $                   LLD_, MB_, M_, NB_, N_, RSRC_
      parameter( block_cyclic_2d = 1, dlen_ = 9, dt_ = 1,
     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )
      REAL               RONE
      parameter( rone = 1.0e+0 )
      COMPLEX            ZERO, ONE
      parameter( zero = ( 0.0e+0, 0.0e+0 ),
     $                   one = ( 1.0e+0, 0.0e+0 ) )
      REAL               CONST
      parameter( const = 1.50e+0 )
      INTEGER            IBLK
      parameter( iblk = 32 )
*     ..
*     .. Local Scalars ..
      LOGICAL            SKIP
      INTEGER            CONTXT, DOWN, HBL, I, I1, I2, IAFIRST, IBULGE,
     $                   ICBUF, ICOL, ICOL1, ICOL2, IDIA, IERR, II,
     $                   IRBUF, IROW, IROW1, IROW2, ISPEC, ISTART,
     $                   ISTARTCOL, ISTARTROW, ISTOP, ISUB, ISUP,
     $                   ITERMAX, ITMP1, ITMP2, ITN, ITS, IZBUF, J,
     $                   JAFIRST, JBLK, JJ, K, KI, L, LCMRC, LDA, LDZ,
     $                   LEFT, LIHIH, LIHIZ, LILOH, LILOZ, LOCALI1,
     $                   LOCALI2, LOCALK, LOCALM, M, MODKM1, MYCOL,
     $                   MYROW, NBULGE, NH, NODE, NPCOL, NPROW, NQ, NR,
     $                   NUM, NZ, RIGHT, ROTN, UP, VECSIDX
      REAL               CS, OVFL, S, SMLNUM, ULP, UNFL
      COMPLEX            CDUM, H10, H11, H22, H33, H43H34, H44, SN, SUM,
     $                   T1, T1COPY, T2, T3, V1SAVE, V2, V2SAVE, V3,
     $                   V3SAVE
*     ..
*     .. Local Arrays ..
      INTEGER            ICURCOL( IBLK ), ICURROW( IBLK ), K1( IBLK ),
     $                   K2( IBLK ), KCOL( IBLK ), KP2COL( IBLK ),
     $                   KP2ROW( IBLK ), KROW( IBLK )
      COMPLEX            S1( 2*IBLK, 2*IBLK ), SMALLA( 6, 6, IBLK ),
     $                   VCOPY( 3 )
*     ..
*     .. External Functions ..
      INTEGER            ILCM, NUMROC
      REAL               PSLAMCH
      EXTERNAL           ilcm, numroc, pslamch
*     ..
*     .. External Subroutines ..
      EXTERNAL           blacs_gridinfo, igamn2d, igebr2d, igebs2d,
     $                   infog1l, infog2l, pslabad, pxerbla, pclaconsb,
     $                   pclacp3, pclasmsub, pclawil, pcrot, ccopy,
     $                   cgebr2d, cgebs2d, cgerv2d, cgesd2d, cgsum2d,
     $                   clahqr2, clamsh, clanv2, claref, clarfg
*     ..
*     .. Intrinsic Functions ..
*
      INTRINSIC          abs, real, conjg, aimag, max, min, mod
*     ..
*     .. Statement Functions ..
      REAL               CABS1
*     ..
*     .. Statement Function definitions ..
      cabs1( cdum ) = abs( real( cdum ) ) + abs( aimag( cdum ) )
*     ..
*     .. Executable Statements ..
*
      info = 0
*
      itermax = 30*( ihi-ilo+1 )
      IF( n.EQ.0 )
     $   RETURN
*
*     NODE (IAFIRST,JAFIRST) OWNS A(1,1)
*
      hbl = desca( mb_ )
      contxt = desca( ctxt_ )
      lda = desca( lld_ )
      iafirst = desca( rsrc_ )
      jafirst = desca( csrc_ )
      ldz = descz( lld_ )
      CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )
      node = myrow*npcol + mycol
      num = nprow*npcol
      left = mod( mycol+npcol-1, npcol )
      right = mod( mycol+1, npcol )
      up = mod( myrow+nprow-1, nprow )
      down = mod( myrow+1, nprow )
      lcmrc = ilcm( nprow, npcol )
      IF( ( nprow.LE.3 ) .OR. ( npcol.LE.3 ) ) THEN
         skip = .true.
      ELSE
         skip = .false.
      END IF
*
*     Determine the number of columns we have so we can check workspace
*
      nq = numroc( n, hbl, mycol, jafirst, npcol )
      jj = n / hbl
      IF( jj*hbl.LT.n )
     $   jj = jj + 1
      jj = 7*jj / lcmrc
      jj = 3*n + max( 2*max( lda, ldz )+2*nq, jj )
      jj = jj + max( 2*n, ( 8*lcmrc+2 )**2 )
      IF( lwork.EQ.-1 ) THEN
         work( 1 ) = jj
         RETURN
      END IF
      IF( lwork.LT.jj ) THEN
         info = -14
      END IF
      IF( descz( ctxt_ ).NE.desca( ctxt_ ) ) THEN
         info = -( 1300+ctxt_ )
      END IF
      IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
         info = -( 700+nb_ )
      END IF
      IF( descz( mb_ ).NE.descz( nb_ ) ) THEN
         info = -( 1300+nb_ )
      END IF
      IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
         info = -( 1300+mb_ )
      END IF
      IF( ( desca( rsrc_ ).NE.0 ) .OR. ( desca( csrc_ ).NE.0 ) ) THEN
         info = -( 700+rsrc_ )
      END IF
      IF( ( descz( rsrc_ ).NE.0 ) .OR. ( descz( csrc_ ).NE.0 ) ) THEN
         info = -( 1300+rsrc_ )
      END IF
      IF( ( ilo.GT.n ) .OR. ( ilo.LT.1 ) ) THEN
         info = -4
      END IF
      IF( ( ihi.GT.n ) .OR. ( ihi.LT.1 ) ) THEN
         info = -5
      END IF
      IF( hbl.LT.5 ) THEN
         info = -( 700+mb_ )
      END IF
      CALL igamn2d( contxt, 'ALL', ' ', 1, 1, info, 1, itmp1, itmp2, -1,
     $              -1, -1 )
      IF( info.LT.0 ) THEN
         CALL pxerbla( contxt, 'PCLAHQR', -info )
         RETURN
      END IF
*
*     Set work array indices
*
      vecsidx = 0
      idia = 3*n
      isub = 3*n
      isup = 3*n
      irbuf = 3*n
      icbuf = 3*n
      izbuf = 5*n
*
*     Find a value for ROTN
*
      rotn = hbl / 3
      rotn = min( rotn, hbl-2 )
      rotn = max( rotn, 1 )
*
      IF( ilo.EQ.ihi ) THEN
         CALL infog2l( ilo, ilo, desca, nprow, npcol, myrow, mycol,
     $                 irow, icol, ii, jj )
         IF( ( myrow.EQ.ii ) .AND. ( mycol.EQ.jj ) ) THEN
            w( ilo ) = a( ( icol-1 )*lda+irow )
         ELSE
            w( ilo ) = zero
         END IF
         RETURN
      END IF
*
      nh = ihi - ilo + 1
      nz = ihiz - iloz + 1
*
      CALL infog1l( iloz, hbl, nprow, myrow, iafirst, liloz, lihiz )
      lihiz = numroc( ihiz, hbl, myrow, iafirst, nprow )
*
*     Set machine-dependent constants for the stopping criterion.
*     If NORM(H) <= SQRT(OVFL), overflow should not occur.
*
      unfl = pslamch( contxt, 'SAFE MINIMUM' )
      ovfl = rone / unfl
      CALL pslabad( contxt, unfl, ovfl )
      ulp = pslamch( contxt, 'PRECISION' )
      smlnum = unfl*( nh / ulp )
*
*     I1 and I2 are the indices of the first row and last column of H
*     to which transformations must be applied. If eigenvalues only are
*     being computed, I1 and I2 are set inside the main loop.
*
      IF( wantt ) THEN
         i1 = 1
         i2 = n
      END IF
*
*     ITN is the total number of QR iterations allowed.
*
      itn = itermax
*
*     The main loop begins here. I is the loop index and decreases from
*     IHI to ILO in steps of our schur block size (<=2*IBLK). Each
*     iteration of the loop works  with the active submatrix in rows
*     and columns L to I.   Eigenvalues I+1 to IHI have already
*     converged. Either L = ILO or the global A(L,L-1) is negligible
*     so that the matrix splits.
*
      i = ihi
   10 CONTINUE
      l = ilo
      IF( i.LT.ilo )
     $   GO TO 570
*
*     Perform QR iterations on rows and columns ILO to I until a
*     submatrix of order 1 or 2 splits off at the bottom because a
*     subdiagonal element has become negligible.
*
      DO 540 its = 0, itn
*
*        Look for a single small subdiagonal element.
*
         CALL pclasmsub( a, desca, i, l, k, smlnum, work( irbuf+1 ),
     $                   lwork-irbuf )
         l = k
*
         IF( l.GT.ilo ) THEN
*
*           H(L,L-1) is negligible
*
            CALL infog2l( l, l-1, desca, nprow, npcol, myrow, mycol,
     $                    irow, icol, itmp1, itmp2 )
            IF( ( myrow.EQ.itmp1 ) .AND. ( mycol.EQ.itmp2 ) ) THEN
               a( ( icol-1 )*lda+irow ) = zero
            END IF
            work( isub+l-1 ) = zero
         END IF
*
*        Exit from loop if a submatrix of order 1 or 2 has split off.
*
         IF( wantt ) THEN
*           For Schur form, use 2x2 blocks
            IF( l.GE.i-1 ) THEN
               GO TO 550
            END IF
         ELSE
*           If we don't want the Schur form, use bigger blocks.
            IF( l.GE.i-( 2*iblk-1 ) ) THEN
               GO TO 550
            END IF
         END IF
*
*        Now the active submatrix is in rows and columns L to I. If
*        eigenvalues only are being computed, only the active submatrix
*        need be transformed.
*
         IF( .NOT.wantt ) THEN
            i1 = l
            i2 = i
         END IF
*
*        Copy submatrix of size 2*JBLK and prepare to do generalized
*           Wilkinson shift or an exceptional shift
*
         jblk = min( iblk, ( ( i-l+1 ) / 2 )-1 )
         IF( jblk.GT.lcmrc ) THEN
*
*           Make sure it's divisible by LCM (we want even workloads!)
*
            jblk = jblk - mod( jblk, lcmrc )
         END IF
         jblk = min( jblk, 2*lcmrc )
         jblk = max( jblk, 1 )
*
         CALL pclacp3( 2*jblk, i-2*jblk+1, a, desca, s1, 2*iblk, -1, -1,
     $                 0 )
         IF( ( its.EQ.20 .OR. its.EQ.40 ) .AND. ( jblk.GT.1 ) ) THEN
*
*           Exceptional shift.
*
            DO 20 ii = 2*jblk, 2, -1
               s1( ii, ii ) = const*( cabs1( s1( ii, ii ) )+
     $                        cabs1( s1( ii, ii-1 ) ) )
               s1( ii, ii-1 ) = zero
               s1( ii-1, ii ) = zero
   20       CONTINUE
            s1( 1, 1 ) = const*cabs1( s1( 1, 1 ) )
         ELSE
            CALL clahqr2( .false., .false., 2*jblk, 1, 2*jblk, s1,
     $                   2*iblk, work( irbuf+1 ), 1, 2*jblk, z, ldz,
     $                   ierr )
*
*           Prepare to use Wilkinson's double shift
*
            h44 = s1( 2*jblk, 2*jblk )
            h33 = s1( 2*jblk-1, 2*jblk-1 )
            h43h34 = s1( 2*jblk-1, 2*jblk )*s1( 2*jblk, 2*jblk-1 )
*
         END IF
*
*        Look for two consecutive small subdiagonal elements:
*           PCLACONSB is the routine that does this.
*
         CALL pclaconsb( a, desca, i, l, m, h44, h33, h43h34,
     $                   work( irbuf+1 ), lwork-irbuf )
*
*        Double-shift QR step
*
*        NBULGE is the number of bulges that will be attempted
*
         istop = min( m+rotn-1-mod( m-( m / hbl )*hbl-1, rotn ), i-2 )
         istop = min( istop, m+hbl-3-mod( m-1, hbl ) )
         istop = min( istop, i2-2 )
         istop = max( istop, m )
         nbulge = ( i-1-istop ) / hbl
*
*        Do not exceed maximum determined.
*
         nbulge = min( nbulge, jblk )
         IF( nbulge.GT.lcmrc ) THEN
*
*           Make sure it's divisible by LCM (we want even workloads!)
*
            nbulge = nbulge - mod( nbulge, lcmrc )
         END IF
         nbulge = max( nbulge, 1 )
*
*        If we are starting in the middle because of consecutive small
*           subdiagonal elements, we need to see how many bulges we
*           can send through without breaking the consecutive small
*           subdiagonal property.
*
         IF( ( nbulge.GT.1 ) .AND. ( m.GT.l ) ) THEN
*
*           Copy a chunk of elements from global A(M-1:,M-1:)
*
            CALL infog2l( m+2, m+2, desca, nprow, npcol, myrow, mycol,
     $                    irow1, icol1, itmp1, itmp2 )
            ii = min( 4*nbulge+2, n-m+2 )
            CALL pclacp3( ii, m-1, a, desca, work( irbuf+1 ), ii, itmp1,
     $                    itmp2, 0 )
            IF( ( myrow.EQ.itmp1 ) .AND. ( mycol.EQ.itmp2 ) ) THEN
*
*              Find a new NBULGE based on the bulges we have.
*
               CALL clamsh( s1, 2*iblk, nbulge, jblk, work( irbuf+1 ),
     $                      ii, ii, ulp )
               IF( num.GT.1 ) THEN
                  CALL igebs2d( contxt, 'ALL', ' ', 1, 1, nbulge, 1 )
               END IF
            ELSE
*
*              Everyone needs to receive the new NBULGE
*
               CALL igebr2d( contxt, 'ALL', ' ', 1, 1, nbulge, 1, itmp1,
     $                       itmp2 )
            END IF
         END IF
*
*        IBULGE is the number of bulges going so far
*
         ibulge = 1
*
*        "A" row defs : main row transforms from LOCALK to LOCALI2
*
         CALL infog1l( m, hbl, npcol, mycol, jafirst, itmp1, localk )
         localk = nq
         CALL infog1l( 1, hbl, npcol, mycol, jafirst, icol1, locali2 )
         locali2 = numroc( i2, hbl, mycol, jafirst, npcol )
*
*        "A" col defs : main col transforms from LOCALI1 to LOCALM
*
         CALL infog1l( i1, hbl, nprow, myrow, iafirst, locali1, icol1 )
         CALL infog1l( 1, hbl, nprow, myrow, iafirst, localm, icol1 )
         icol1 = numroc( min( m+3, i ), hbl, myrow, iafirst, nprow )
*
*        Which row & column will start the bulges
*
         istartrow = mod( ( m+1 ) / hbl, nprow ) + iafirst
         istartcol = mod( ( m+1 ) / hbl, npcol ) + jafirst
*
         CALL infog1l( m, hbl, nprow, myrow, iafirst, ii, itmp2 )
         CALL infog1l( m, hbl, npcol, mycol, jafirst, jj, itmp2 )
         CALL infog1l( 1, hbl, nprow, myrow, iafirst, istop,
     $                 kp2row( 1 ) )
         kp2row( 1 ) = numroc( m+2, hbl, myrow, iafirst, nprow )
         CALL infog1l( 1, hbl, npcol, mycol, jafirst, istop,
     $                 kp2col( 1 ) )
         kp2col( 1 ) = numroc( m+2, hbl, mycol, jafirst, npcol )
*
*        Set all values for bulges.  All bulges are stored in
*          intermediate steps as loops over KI.  Their current "task"
*          over the global M to I-1 values is always K1(KI) to K2(KI).
*          However, because there are many bulges, K1(KI) & K2(KI) might
*          go past that range while later bulges (KI+1,KI+2,etc..) are
*          finishing up.  Even if ROTN=1, in order to minimize border
*          communication sometimes K1(KI)=HBL-2 & K2(KI)=HBL-1 so both
*          border messages can be handled at once.
*
*        Rules:
*              If MOD(K1(KI)-1,HBL) < HBL-2 then MOD(K2(KI)-1,HBL)<HBL-2
*              If MOD(K1(KI)-1,HBL) = HBL-1 then MOD(K2(KI)-1,HBL)=HBL-1
*              K2(KI)-K1(KI) <= ROTN
*
*        We first hit a border when MOD(K1(KI)-1,HBL)=HBL-2 and we hit
*        it again when MOD(K1(KI)-1,HBL)=HBL-1.
*
         DO 30 ki = 1, nbulge
            k1( ki ) = m
            istop = min( m+rotn-1-mod( m-( m / hbl )*hbl-1, rotn ),
     $              i-2 )
            istop = min( istop, m+hbl-3-mod( m-1, hbl ) )
            istop = min( istop, i2-2 )
            istop = max( istop, m )
            IF( ( mod( m-1, hbl ).EQ.hbl-2 ) .AND.
     $          ( istop.LT.min( i-2, i2-2 ) ) ) THEN
               istop = istop + 1
            END IF
            k2( ki ) = istop
            icurrow( ki ) = istartrow
            icurcol( ki ) = istartcol
            krow( ki ) = ii
            kcol( ki ) = jj
            IF( ki.GT.1 )
     $         kp2row( ki ) = kp2row( 1 )
            IF( ki.GT.1 )
     $         kp2col( ki ) = kp2col( 1 )
   30    CONTINUE
*
*        Get first transform on node who owns M+2,M+2
*
         DO 31 itmp1 = 1, 3
            vcopy(itmp1) = zero
   31    CONTINUE
         itmp1 = istartrow
         itmp2 = istartcol
         CALL pclawil( itmp1, itmp2, m, a, desca, h44, h33, h43h34,
     $                 vcopy )
         v1save = vcopy( 1 )
         v2save = vcopy( 2 )
         v3save = vcopy( 3 )
*
*        The main implicit shift Francis loops over the bulges starts
*           here!
*
         IF( k2( ibulge ).LE.i-1 ) THEN
   40       CONTINUE
            IF( ( k1( ibulge ).GE.m+5 ) .AND. ( ibulge.LT.nbulge ) )
     $           THEN
               IF( ( mod( k2( ibulge )+2, hbl ).EQ.mod( k2( ibulge+1 )+
     $             2, hbl ) ) .AND. ( k1( 1 ).LE.i-1 ) ) THEN
                  h44 = s1( 2*jblk-2*ibulge, 2*jblk-2*ibulge )
                  h33 = s1( 2*jblk-2*ibulge-1, 2*jblk-2*ibulge-1 )
                  h43h34 = s1( 2*jblk-2*ibulge-1, 2*jblk-2*ibulge )*
     $                     s1( 2*jblk-2*ibulge, 2*jblk-2*ibulge-1 )
                  itmp1 = istartrow
                  itmp2 = istartcol
                  CALL pclawil( itmp1, itmp2, m, a, desca, h44, h33,
     $                          h43h34, vcopy )
                  v1save = vcopy( 1 )
                  v2save = vcopy( 2 )
                  v3save = vcopy( 3 )
                  ibulge = ibulge + 1
               END IF
            END IF
*
*        When we hit a border, there are row and column transforms that
*          overlap over several processors and the code gets very
*          "congested."  As a remedy, when we first hit a border, a 6x6
*          *local* matrix is generated on one node (called SMALLA) and
*          work is done on that.  At the end of the border, the data is
*          passed back and everything stays a lot simpler.
*
            DO 120 ki = 1, ibulge
*
               istart = max( k1( ki ), m )
               istop = min( k2( ki ), i-1 )
               k = istart
               modkm1 = mod( k-1, hbl )
               IF( ( modkm1.GE.hbl-2 ) .AND. ( k.LE.i-1 ) ) THEN
                  DO 81 itmp1 = 1, 6
                     DO 82 itmp2 = 1, 6
                        smalla(itmp1, itmp2, ki) = zero
   82                CONTINUE
   81             CONTINUE
                  IF( ( modkm1.EQ.hbl-2 ) .AND. ( k.LT.i-1 ) ) THEN
*
*                 Copy 6 elements from global A(K-1:K+4,K-1:K+4)
*
                     itmp1 = icurrow( ki )
                     itmp2 = icurcol( ki )
                     CALL pclacp3( min( 6, n-k+2 ), k-1, a, desca,
     $                             smalla( 1, 1, ki ), 6, itmp1, itmp2,
     $                             0 )
                  END IF
                  IF( modkm1.EQ.hbl-1 ) THEN
*
*                 Copy 6 elements from global A(K-2:K+3,K-2:K+3)
*
                     CALL infog2l( k+1, k+1, desca, nprow, npcol, myrow,
     $                             mycol, irow1, icol1, itmp1, itmp2 )
                     CALL pclacp3( min( 6, n-k+3 ), k-2, a, desca,
     $                             smalla( 1, 1, ki ), 6, itmp1, itmp2,
     $                             0 )
                  END IF
               END IF
*
*
*           CLAHQR used to have a single row application and a single
*              column application to H.  Here we do something a little
*              more clever.  We break each transformation down into 3
*              parts:
*                  1.) The minimum amount of work it takes to determine
*                        a group of ROTN transformations (this is on
*                        the critical path.) (Loops 50-120)
*                  (the data is broadcast now: loops 180-240)
*                  2.) The small work it takes so that each of the rows
*                        and columns is at the same place.  For example,
*                        all ROTN row transforms are all complete
*                        through some column TMP.  (Loops 250-260)
*                  3.) The majority of the row and column transforms
*                        are then applied in a block fashion.
*                        (row transforms are in loops 280-380)
*                        (col transforms are in loops 400-540)
*
*           Each of these three parts are further subdivided into 3
*           parts:
*               A.) Work at the start of a border when
*                       MOD(ISTART-1,HBL) = HBL-2
*               B.) Work at the end of a border when
*                       MOD(ISTART-1,HBL) = HBL-1
*               C.) Work in the middle of the block when
*                       MOD(ISTART-1,HBL) < HBL-2
*
*           Further optimization is met with the boolean SKIP.  A border
*              communication can be broken into several parts for
*              efficient parallelism:
*                 Loop over all the bulges, just sending the data out
*                 Loop over all the bulges, just doing the work
*                 Loop over all the bulges, just sending the data back.
*
*
               IF( ( myrow.EQ.icurrow( ki ) ) .AND.
     $             ( mycol.EQ.icurcol( ki ) ) .AND.
     $             ( modkm1.EQ.hbl-2 ) .AND.
     $             ( istart.LT.min( i-1, istop+1 ) ) ) THEN
                  k = istart
                  nr = min( 3, i-k+1 )
                  IF( k.GT.m ) THEN
                     CALL ccopy( nr, smalla( 2, 1, ki ), 1, vcopy, 1 )
                  ELSE
                     vcopy( 1 ) = v1save
                     vcopy( 2 ) = v2save
                     vcopy( 3 ) = v3save
                  END IF
                  CALL clarfg( nr, vcopy( 1 ), vcopy( 2 ), 1, t1copy )
                  IF( k.GT.m ) THEN
                     smalla( 2, 1, ki ) = vcopy( 1 )
                     smalla( 3, 1, ki ) = zero
                     IF( k.LT.i-1 )
     $                  smalla( 4, 1, ki ) = zero
                  ELSE IF( m.GT.l ) THEN
*
*                 Following differs in comparison to pslahqr.
*
                     smalla( 2, 1, ki ) = smalla( 2, 1, ki ) -
     $                                    conjg( t1copy )*
     $                                    smalla( 2, 1, ki )
                  END IF
                  v2 = vcopy( 2 )
                  t2 = t1copy*v2
                  work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )
                  work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )
                  work( vecsidx+( k-1 )*3+3 ) = t1copy
                  IF( nr.EQ.3 ) THEN
*
*                    Do some work so next step is ready...
*
                     t1 = t1copy
                     v3 = vcopy( 3 )
                     t3 = t1*v3
                     itmp1 = min( 6, i2+2-k )
                     itmp2 = max( i1-k+2, 1 )
                     DO 50 j = 2, itmp1
                        sum = conjg( t1 )*smalla( 2, j, ki ) +
     $                        conjg( t2 )*smalla( 3, j, ki ) +
     $                        conjg( t3 )*smalla( 4, j, ki )
                        smalla( 2, j, ki ) = smalla( 2, j, ki ) - sum
                        smalla( 3, j, ki ) = smalla( 3, j, ki ) - sum*v2
                        smalla( 4, j, ki ) = smalla( 4, j, ki ) - sum*v3
   50                CONTINUE
                     DO 60 j = itmp2, 5
                        sum = t1*smalla( j, 2, ki ) +
     $                        t2*smalla( j, 3, ki ) +
     $                        t3*smalla( j, 4, ki )
                        smalla( j, 2, ki ) = smalla( j, 2, ki ) - sum
                        smalla( j, 3, ki ) = smalla( j, 3, ki ) -
     $                                       sum*conjg( v2 )
                        smalla( j, 4, ki ) = smalla( j, 4, ki ) -
     $                                       sum*conjg( v3 )
   60                CONTINUE
                  END IF
               END IF
*
               IF( ( mod( istop-1, hbl ).EQ.hbl-1 ) .AND.
     $             ( myrow.EQ.icurrow( ki ) ) .AND.
     $             ( mycol.EQ.icurcol( ki ) ) .AND.
     $             ( istart.LE.min( i, istop ) ) ) THEN
                  k = istop
                  nr = min( 3, i-k+1 )
                  IF( k.GT.m ) THEN
                     CALL ccopy( nr, smalla( 3, 2, ki ), 1, vcopy, 1 )
                  ELSE
                     vcopy( 1 ) = v1save
                     vcopy( 2 ) = v2save
                     vcopy( 3 ) = v3save
                  END IF
                  CALL clarfg( nr, vcopy( 1 ), vcopy( 2 ), 1, t1copy )
                  IF( k.GT.m ) THEN
                     smalla( 3, 2, ki ) = vcopy( 1 )
                     smalla( 4, 2, ki ) = zero
                     IF( k.LT.i-1 )
     $                  smalla( 5, 2, ki ) = zero
*
*                    Set a subdiagonal to zero now if it's possible
*
                     IF( ( k-2.GT.m ) .AND. ( mod( k-1, hbl ).GT.1 ) )
     $                    THEN
                        h11 = smalla( 1, 1, ki )
                        h10 = smalla( 2, 1, ki )
                        h22 = smalla( 2, 2, ki )
                        s = cabs1( h11 ) + cabs1( h22 )
                        IF( cabs1( h10 ).LE.max( ulp*s, smlnum ) ) THEN
                           smalla( 2, 1, ki ) = zero
                        END IF
                     END IF
                  ELSE IF( m.GT.l ) THEN
*
*                 Following differs in comparison to pslahqr.
*
                     smalla( 3, 2, ki ) = smalla( 3, 2, ki ) -
     $                                    conjg( t1copy )*
     $                                    smalla( 3, 2, ki )
                  END IF
                  v2 = vcopy( 2 )
                  t2 = t1copy*v2
                  work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )
                  work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )
                  work( vecsidx+( k-1 )*3+3 ) = t1copy
                  IF( nr.EQ.3 ) THEN
*
*                    Do some work so next step is ready...
*
                     t1 = t1copy
                     v3 = vcopy( 3 )
                     t3 = t1*v3
                     itmp1 = min( 6, i2-k+3 )
                     itmp2 = max( i1-k+3, 1 )
                     DO 70 j = 3, itmp1
                        sum = conjg( t1 )*smalla( 3, j, ki ) +
     $                        conjg( t2 )*smalla( 4, j, ki ) +
     $                        conjg( t3 )*smalla( 5, j, ki )
                        smalla( 3, j, ki ) = smalla( 3, j, ki ) - sum
                        smalla( 4, j, ki ) = smalla( 4, j, ki ) - sum*v2
                        smalla( 5, j, ki ) = smalla( 5, j, ki ) - sum*v3
   70                CONTINUE
                     DO 80 j = itmp2, 6
                        sum = t1*smalla( j, 3, ki ) +
     $                        t2*smalla( j, 4, ki ) +
     $                        t3*smalla( j, 5, ki )
                        smalla( j, 3, ki ) = smalla( j, 3, ki ) - sum
                        smalla( j, 4, ki ) = smalla( j, 4, ki ) -
     $                                       sum*conjg( v2 )
                        smalla( j, 5, ki ) = smalla( j, 5, ki ) -
     $                                       sum*conjg( v3 )
   80                CONTINUE
                  END IF
               END IF
*
               IF( ( modkm1.EQ.0 ) .AND. ( istart.LE.i-1 ) .AND.
     $             ( myrow.EQ.icurrow( ki ) ) .AND.
     $             ( right.EQ.icurcol( ki ) ) ) THEN
*
*              (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
*
                  irow1 = krow( ki )
                  icol1 = kcol( ki )
*
*                 The ELSE part of this IF needs updated VCOPY, this
*                 was not necessary in PSLAHQR.
*
                  IF( istart.GT.m ) THEN
                     vcopy( 1 ) = smalla( 4, 3, ki )
                     vcopy( 2 ) = smalla( 5, 3, ki )
                     vcopy( 3 ) = smalla( 6, 3, ki )
                     nr = min( 3, i-istart+1 )
                     CALL clarfg( nr, vcopy( 1 ), vcopy( 2 ), 1,
     $                            t1copy )
                     a( ( icol1-2 )*lda+irow1 ) = vcopy( 1 )
                     a( ( icol1-2 )*lda+irow1+1 ) = zero
                     IF( istart.LT.i-1 ) THEN
                        a( ( icol1-2 )*lda+irow1+2 ) = zero
                     END IF
                  ELSE
*
*                    If NPCOL.NE.1 THEN we need updated VCOPY.
*
                     nr = min( 3, i-istart+1 )
                     IF( npcol.EQ.1 ) THEN
                        vcopy( 1 ) = v1save
                        vcopy( 2 ) = v2save
                        vcopy( 3 ) = v3save
                     ELSE
*
*                    Get updated VCOPY from RIGHT
*
                        CALL cgerv2d( contxt, 3, 1, vcopy, 3, myrow,
     $                                right )
                     END IF
                     CALL clarfg( nr, vcopy( 1 ), vcopy( 2 ), 1,
     $                            t1copy )
                     IF( m.GT.l ) THEN
*
*                    Following differs in comparison to pslahqr.
*
                        a( ( icol1-2 )*lda+irow1 ) = a( ( icol1-2 )*lda+
     $                     irow1 )*conjg( one-t1copy )
                     END IF
                  END IF
               END IF
*
               IF( ( myrow.EQ.icurrow( ki ) ) .AND.
     $             ( mycol.EQ.icurcol( ki ) ) .AND.
     $             ( ( ( modkm1.EQ.hbl-2 ) .AND. ( istart.EQ.i-
     $             1 ) ) .OR. ( ( modkm1.LT.hbl-2 ) .AND. ( istart.LE.i-
     $             1 ) ) ) ) THEN
*
*              (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
*
                  irow1 = krow( ki )
                  icol1 = kcol( ki )
                  DO 110 k = istart, istop
*
*                    Create and do these transforms
*
                     nr = min( 3, i-k+1 )
                     IF( k.GT.m ) THEN
                        IF( mod( k-1, hbl ).EQ.0 ) THEN
                           vcopy( 1 ) = smalla( 4, 3, ki )
                           vcopy( 2 ) = smalla( 5, 3, ki )
                           vcopy( 3 ) = smalla( 6, 3, ki )
                        ELSE
                           vcopy( 1 ) = a( ( icol1-2 )*lda+irow1 )
                           vcopy( 2 ) = a( ( icol1-2 )*lda+irow1+1 )
                           IF( nr.EQ.3 ) THEN
                              vcopy( 3 ) = a( ( icol1-2 )*lda+irow1+2 )
                           END IF
                        END IF
                     ELSE
                        vcopy( 1 ) = v1save
                        vcopy( 2 ) = v2save
                        vcopy( 3 ) = v3save
                     END IF
*
*                    Must send uptodate copy of VCOPY to left.
*
                     IF( npcol.GT.1 .AND. istart.LE.m .AND.
     $                   mod( k-1, hbl ).EQ.0 ) THEN
                        CALL cgesd2d( contxt, 3, 1, vcopy, 3, myrow,
     $                                left )
                     END IF
                     CALL clarfg( nr, vcopy( 1 ), vcopy( 2 ), 1,
     $                            t1copy )
                     IF( k.GT.m ) THEN
                        IF( mod( k-1, hbl ).GT.0 ) THEN
                           a( ( icol1-2 )*lda+irow1 ) = vcopy( 1 )
                           a( ( icol1-2 )*lda+irow1+1 ) = zero
                           IF( k.LT.i-1 ) THEN
                              a( ( icol1-2 )*lda+irow1+2 ) = zero
                           END IF
*
*                       Set a subdiagonal to zero now if it's possible
*
                           IF( ( irow1.GT.2 ) .AND. ( icol1.GT.2 ) .AND.
     $                         ( k-2.GT.m ) .AND. ( mod( k-1,
     $                         hbl ).GT.1 ) ) THEN
                              h11 = a( ( icol1-3 )*lda+irow1-2 )
                              h10 = a( ( icol1-3 )*lda+irow1-1 )
                              h22 = a( ( icol1-2 )*lda+irow1-1 )
                              s = cabs1( h11 ) + cabs1( h22 )
                              IF( cabs1( h10 ).LE.max( ulp*s, smlnum ) )
     $                             THEN
                                 a( ( icol1-3 )*lda+irow1-1 ) = zero
                              END IF
                           END IF
                        END IF
                     ELSE IF( m.GT.l ) THEN
                        IF( mod( k-1, hbl ).GT.0 ) THEN
*
*                       Following differs in comparison to pslahqr.
*
                           a( ( icol1-2 )*lda+irow1 ) = a( ( icol1-2 )*
     $                        lda+irow1 )*conjg( one-t1copy )
                        END IF
                     END IF
                     v2 = vcopy( 2 )
                     t2 = t1copy*v2
                     work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )
                     work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )
                     work( vecsidx+( k-1 )*3+3 ) = t1copy
                     t1 = t1copy
                     IF( k.LT.istop ) THEN
*
*                       Do some work so next step is ready...
*
                        v3 = vcopy( 3 )
                        t3 = t1*v3
                        DO 90 j = ( icol1-1 )*lda + irow1,
     $                          ( min( k2( ki )+1, i-1 )+icol1-k-1 )*
     $                          lda + irow1, lda
                           sum = conjg( t1 )*a( j ) +
     $                           conjg( t2 )*a( j+1 ) +
     $                           conjg( t3 )*a( j+2 )
                           a( j ) = a( j ) - sum
                           a( j+1 ) = a( j+1 ) - sum*v2
                           a( j+2 ) = a( j+2 ) - sum*v3
   90                   CONTINUE
                        DO 100 j = irow1 + 1, irow1 + 3
                           sum = t1*a( ( icol1-1 )*lda+j ) +
     $                           t2*a( icol1*lda+j ) +
     $                           t3*a( ( icol1+1 )*lda+j )
                           a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*lda+
     $                        j ) - sum
                           a( icol1*lda+j ) = a( icol1*lda+j ) -
     $                                        sum*conjg( v2 )
                           a( ( icol1+1 )*lda+j ) = a( ( icol1+1 )*lda+
     $                        j ) - sum*conjg( v3 )
  100                   CONTINUE
                     END IF
                     irow1 = irow1 + 1
                     icol1 = icol1 + 1
  110             CONTINUE
               END IF
  120       CONTINUE
*
*           First part of applying the transforms is complete.
*           Broadcasts of the Householder data is done here.
*
            DO 130 ki = 1, ibulge
*
               istart = max( k1( ki ), m )
               istop = min( k2( ki ), i-1 )
*
*              Broadcast Householder information from the block
*
               IF( ( myrow.EQ.icurrow( ki ) ) .AND. ( npcol.GT.1 ) .AND.
     $             ( istart.LE.istop ) ) THEN
                  IF( mycol.NE.icurcol( ki ) ) THEN
                     CALL cgebr2d( contxt, 'ROW', ' ',
     $                             3*( istop-istart+1 ), 1,
     $                             work( vecsidx+( istart-1 )*3+1 ),
     $                             3*( istop-istart+1 ), myrow,
     $                             icurcol( ki ) )
                  ELSE
                     CALL cgebs2d( contxt, 'ROW', ' ',
     $                             3*( istop-istart+1 ), 1,
     $                             work( vecsidx+( istart-1 )*3+1 ),
     $                             3*( istop-istart+1 ) )
                  END IF
               END IF
  130       CONTINUE
*
*           Now do column transforms and finish work
*
            DO 140 ki = 1, ibulge
*
               istart = max( k1( ki ), m )
               istop = min( k2( ki ), i-1 )
*
               IF( ( mycol.EQ.icurcol( ki ) ) .AND. ( nprow.GT.1 ) .AND.
     $             ( istart.LE.istop ) ) THEN
                  IF( myrow.NE.icurrow( ki ) ) THEN
                     CALL cgebr2d( contxt, 'COL', ' ',
     $                             3*( istop-istart+1 ), 1,
     $                             work( vecsidx+( istart-1 )*3+1 ),
     $                             3*( istop-istart+1 ), icurrow( ki ),
     $                             mycol )
                  ELSE
                     CALL cgebs2d( contxt, 'COL', ' ',
     $                             3*( istop-istart+1 ), 1,
     $                             work( vecsidx+( istart-1 )*3+1 ),
     $                             3*( istop-istart+1 ) )
                  END IF
               END IF
  140       CONTINUE
*
*
*           Now do make up work to have things in block fashion
*
            DO 160 ki = 1, ibulge
               istart = max( k1( ki ), m )
               istop = min( k2( ki ), i-1 )
*
               modkm1 = mod( istart-1, hbl )
               IF( ( myrow.EQ.icurrow( ki ) ) .AND.
     $             ( mycol.EQ.icurcol( ki ) ) .AND.
     $             ( ( ( modkm1.EQ.hbl-2 ) .AND. ( istart.EQ.i-
     $             1 ) ) .OR. ( ( modkm1.LT.hbl-2 ) .AND. ( istart.LE.i-
     $             1 ) ) ) ) THEN
*
*                 (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)
*
                  irow1 = krow( ki )
                  icol1 = kcol( ki )
                  DO 150 k = istart, istop
*
*              Catch up on column & border work
*
                     nr = min( 3, i-k+1 )
                     v2 = work( vecsidx+( k-1 )*3+1 )
                     v3 = work( vecsidx+( k-1 )*3+2 )
                     t1 = work( vecsidx+( k-1 )*3+3 )
                     t2 = t1*v2
                     IF( k.LT.istop ) THEN
*
*                 Do some work so next step is ready...
*
                        t3 = t1*v3
                        CALL claref( 'Col', a, lda, .false., z, ldz,
     $                               .false., icol1, icol1, istart,
     $                               istop, min( istart+1, i )-k+irow1,
     $                               irow1, liloz, lihiz,
     $                               work( vecsidx+1 ), v2, v3, t1, t2,
     $                               t3 )
                        irow1 = irow1 + 1
                        icol1 = icol1 + 1
                     ELSE
                        IF( ( nr.EQ.3 ) .AND. ( mod( k-1,
     $                      hbl ).LT.hbl-2 ) ) THEN
                           t3 = t1*v3
                           CALL claref( 'Row', a, lda, .false., z, ldz,
     $                                  .false., irow1, irow1, istart,
     $                                  istop, icol1, min( min( k2( ki )
     $                                  +1, i-1 ), i2 )-k+icol1, liloz,
     $                                  lihiz, work( vecsidx+1 ), v2,
     $                                  v3, t1, t2, t3 )
                        END IF
                     END IF
  150             CONTINUE
               END IF
*
*           Send SMALLA back again.
*
               k = istart
               modkm1 = mod( k-1, hbl )
               IF( ( modkm1.GE.hbl-2 ) .AND. ( k.LE.i-1 ) ) THEN
                  IF( ( modkm1.EQ.hbl-2 ) .AND. ( k.LT.i-1 ) ) THEN
*
*                    Copy 6 elements from global A(K-1:K+4,K-1:K+4)
*
                     itmp1 = icurrow( ki )
                     itmp2 = icurcol( ki )
                     CALL pclacp3( min( 6, n-k+2 ), k-1, a, desca,
     $                             smalla( 1, 1, ki ), 6, itmp1, itmp2,
     $                             1 )
*
                  END IF
                  IF( modkm1.EQ.hbl-1 ) THEN
*
*                    Copy 6 elements from global A(K-2:K+3,K-2:K+3)
*
                     itmp1 = icurrow( ki )
                     itmp2 = icurcol( ki )
                     CALL pclacp3( min( 6, n-k+3 ), k-2, a, desca,
     $                             smalla( 1, 1, ki ), 6, itmp1, itmp2,
     $                             1 )
                  END IF
               END IF
*
  160       CONTINUE
*
  170       CONTINUE
*
*           Now start major set of block ROW reflections
*
            DO 180 ki = 1, ibulge
               IF( ( myrow.NE.icurrow( ki ) ) .AND.
     $             ( down.NE.icurrow( ki ) ) )GO TO 180
               istart = max( k1( ki ), m )
               istop = min( k2( ki ), i-1 )
*
               IF( ( istop.GT.istart ) .AND.
     $             ( mod( istart-1, hbl ).LT.hbl-2 ) .AND.
     $             ( icurrow( ki ).EQ.myrow ) ) THEN
                  irow1 = min( k2( ki )+1, i-1 ) + 1
                  CALL infog1l( irow1, hbl, npcol, mycol, jafirst,
     $                          itmp1, itmp2 )
                  itmp2 = locali2
                  ii = krow( ki )
                  CALL claref( 'Row', a, lda, wantz, z, ldz, .true., ii,
     $                         ii, istart, istop, itmp1, itmp2, liloz,
     $                         lihiz, work( vecsidx+1 ), v2, v3, t1, t2,
     $                         t3 )
               END IF
  180       CONTINUE
*
            DO 220 ki = 1, ibulge
               IF( krow( ki ).GT.kp2row( ki ) )
     $            GO TO 220
               IF( ( myrow.NE.icurrow( ki ) ) .AND.
     $             ( down.NE.icurrow( ki ) ) )GO TO 220
               istart = max( k1( ki ), m )
               istop = min( k2( ki ), i-1 )
               IF( ( istart.EQ.istop ) .OR.
     $             ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.
     $             ( icurrow( ki ).NE.myrow ) ) THEN
                  DO 210 k = istart, istop
                     v2 = work( vecsidx+( k-1 )*3+1 )
                     v3 = work( vecsidx+( k-1 )*3+2 )
                     t1 = work( vecsidx+( k-1 )*3+3 )
                     nr = min( 3, i-k+1 )
                     IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.
     $                   kp2row( ki ) ) ) THEN
                        IF( ( k.LT.istop ) .AND.
     $                      ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
                           itmp1 = min( k2( ki )+1, i-1 ) + 1
                        ELSE
                           IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
                              itmp1 = min( k2( ki )+1, i-1 ) + 1
                           END IF
                           IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
                              itmp1 = min( k+4, i2 ) + 1
                           END IF
                           IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
                              itmp1 = min( k+3, i2 ) + 1
                           END IF
                        END IF
*
*                    Find local coor of rows K through K+2
*
                        irow1 = krow( ki )
                        irow2 = kp2row( ki )
                        IF( ( k.GT.istart ) .AND.
     $                      ( mod( k-1, hbl ).GE.hbl-2 ) ) THEN
                           IF( down.EQ.icurrow( ki ) ) THEN
                              irow1 = irow1 + 1
                           END IF
                           IF( myrow.EQ.icurrow( ki ) ) THEN
                              irow2 = irow2 + 1
                           END IF
                        END IF
                        CALL infog1l( itmp1, hbl, npcol, mycol, jafirst,
     $                                icol1, icol2 )
                        icol2 = locali2
                        IF( ( mod( k-1, hbl ).LT.hbl-2 ) .OR.
     $                      ( nprow.EQ.1 ) ) THEN
                           t2 = t1*v2
                           t3 = t1*v3
                           CALL claref( 'Row', a, lda, wantz, z, ldz,
     $                                  .false., irow1, irow1, istart,
     $                                  istop, icol1, icol2, liloz,
     $                                  lihiz, work( vecsidx+1 ), v2,
     $                                  v3, t1, t2, t3 )
                        END IF
                        IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
     $                      ( nprow.GT.1 ) ) THEN
                           IF( irow1.NE.irow2 ) THEN
                              CALL cgesd2d( contxt, 2, icol2-icol1+1,
     $                                      a( ( icol1-1 )*lda+irow1 ),
     $                                      lda, down, mycol )
                              IF( skip .AND. ( istart.EQ.istop ) ) THEN
                                 CALL cgerv2d( contxt, 2, icol2-icol1+1,
     $                                         a( ( icol1-1 )*lda+
     $                                         irow1 ), lda, down,
     $                                         mycol )
                              END IF
                           ELSE IF( skip ) THEN
                              CALL cgerv2d( contxt, 2, icol2-icol1+1,
     $                                      work( irbuf+1 ), 2, up,
     $                                      mycol )
                              t2 = t1*v2
                              t3 = t1*v3
                              DO 190 j = icol1, icol2
                                 sum = conjg( t1 )*
     $                                 work( irbuf+2*( j-icol1 )+1 ) +
     $                                 conjg( t2 )*work( irbuf+2*
     $                                 ( j-icol1 )+2 ) +
     $                                 conjg( t3 )*a( ( j-1 )*lda+
     $                                 irow1 )
                                 work( irbuf+2*( j-icol1 )+1 )
     $                              = work( irbuf+2*( j-icol1 )+1 ) -
     $                              sum
                                 work( irbuf+2*( j-icol1 )+2 )
     $                              = work( irbuf+2*( j-icol1 )+2 ) -
     $                              sum*v2
                                 a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*
     $                              lda+irow1 ) - sum*v3
  190                         CONTINUE
                              IF( istart.EQ.istop ) THEN
                                 CALL cgesd2d( contxt, 2, icol2-icol1+1,
     $                                         work( irbuf+1 ), 2, up,
     $                                         mycol )
                              END IF
                           END IF
                        END IF
                        IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
     $                      ( nprow.GT.1 ) ) THEN
                           IF( irow1.EQ.irow2 ) THEN
                              IF( istart.EQ.istop ) THEN
                                 CALL cgesd2d( contxt, 2, icol2-icol1+1,
     $                                         a( ( icol1-1 )*lda+irow1-
     $                                         1 ), lda, down, mycol )
                              END IF
                              IF( skip ) THEN
                                 CALL cgerv2d( contxt, 2, icol2-icol1+1,
     $                                         a( ( icol1-1 )*lda+irow1-
     $                                         1 ), lda, down, mycol )
                              END IF
                           ELSE IF( skip ) THEN
                              IF( istart.EQ.istop ) THEN
                                 CALL cgerv2d( contxt, 2, icol2-icol1+1,
     $                                         work( irbuf+1 ), 2, up,
     $                                         mycol )
                              END IF
                              t2 = t1*v2
                              t3 = t1*v3
                              DO 200 j = icol1, icol2
                                 sum = conjg( t1 )*
     $                                 work( irbuf+2*( j-icol1 )+2 ) +
     $                                 conjg( t2 )*a( ( j-1 )*lda+
     $                                 irow1 ) + conjg( t3 )*
     $                                 a( ( j-1 )*lda+irow1+1 )
                                 work( irbuf+2*( j-icol1 )+2 )
     $                              = work( irbuf+2*( j-icol1 )+2 ) -
     $                              sum
                                 a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*
     $                              lda+irow1 ) - sum*v2
                                 a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*
     $                              lda+irow1+1 ) - sum*v3
  200                         CONTINUE
                              CALL cgesd2d( contxt, 2, icol2-icol1+1,
     $                                      work( irbuf+1 ), 2, up,
     $                                      mycol )
*
                           END IF
                        END IF
                     END IF
  210             CONTINUE
               END IF
  220       CONTINUE
*
            IF( skip )
     $         GO TO 290
*
            DO 260 ki = 1, ibulge
               IF( krow( ki ).GT.kp2row( ki ) )
     $            GO TO 260
               IF( ( myrow.NE.icurrow( ki ) ) .AND.
     $             ( down.NE.icurrow( ki ) ) )GO TO 260
               istart = max( k1( ki ), m )
               istop = min( k2( ki ), i-1 )
               IF( ( istart.EQ.istop ) .OR.
     $             ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.
     $             ( icurrow( ki ).NE.myrow ) ) THEN
                  DO 250 k = istart, istop
                     v2 = work( vecsidx+( k-1 )*3+1 )
                     v3 = work( vecsidx+( k-1 )*3+2 )
                     t1 = work( vecsidx+( k-1 )*3+3 )
                     nr = min( 3, i-k+1 )
                     IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.
     $                   kp2row( ki ) ) ) THEN
                        IF( ( k.LT.istop ) .AND.
     $                      ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
                           itmp1 = min( k2( ki )+1, i-1 ) + 1
                        ELSE
                           IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
                              itmp1 = min( k2( ki )+1, i-1 ) + 1
                           END IF
                           IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
                              itmp1 = min( k+4, i2 ) + 1
                           END IF
                           IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
                              itmp1 = min( k+3, i2 ) + 1
                           END IF
                        END IF
*
*                    Find local coor of rows K through K+2
*
                        irow1 = krow( ki )
                        irow2 = kp2row( ki )
                        IF( ( k.GT.istart ) .AND.
     $                      ( mod( k-1, hbl ).GE.hbl-2 ) ) THEN
                           IF( down.EQ.icurrow( ki ) ) THEN
                              irow1 = irow1 + 1
                           END IF
                           IF( myrow.EQ.icurrow( ki ) ) THEN
                              irow2 = irow2 + 1
                           END IF
                        END IF
                        CALL infog1l( itmp1, hbl, npcol, mycol, jafirst,
     $                                icol1, icol2 )
                        icol2 = locali2
                        IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
     $                      ( nprow.GT.1 ) ) THEN
                           IF( irow1.EQ.irow2 ) THEN
                              CALL cgerv2d( contxt, 2, icol2-icol1+1,
     $                                      work( irbuf+1 ), 2, up,
     $                                      mycol )
                              t2 = t1*v2
                              t3 = t1*v3
                              DO 230 j = icol1, icol2
                                 sum = conjg( t1 )*
     $                                 work( irbuf+2*( j-icol1 )+1 ) +
     $                                 conjg( t2 )*work( irbuf+2*
     $                                 ( j-icol1 )+2 ) +
     $                                 conjg( t3 )*a( ( j-1 )*lda+
     $                                 irow1 )
                                 work( irbuf+2*( j-icol1 )+1 )
     $                              = work( irbuf+2*( j-icol1 )+1 ) -
     $                              sum
                                 work( irbuf+2*( j-icol1 )+2 )
     $                              = work( irbuf+2*( j-icol1 )+2 ) -
     $                              sum*v2
                                 a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*
     $                              lda+irow1 ) - sum*v3
  230                         CONTINUE
                              IF( istart.EQ.istop ) THEN
                                 CALL cgesd2d( contxt, 2, icol2-icol1+1,
     $                                         work( irbuf+1 ), 2, up,
     $                                         mycol )
                              END IF
                           END IF
                        END IF
                        IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
     $                      ( nprow.GT.1 ) ) THEN
                           IF( irow1.NE.irow2 ) THEN
                              IF( istart.EQ.istop ) THEN
                                 CALL cgerv2d( contxt, 2, icol2-icol1+1,
     $                                         work( irbuf+1 ), 2, up,
     $                                         mycol )
                              END IF
                              t2 = t1*v2
                              t3 = t1*v3
                              DO 240 j = icol1, icol2
                                 sum = conjg( t1 )*
     $                                 work( irbuf+2*( j-icol1 )+2 ) +
     $                                 conjg( t2 )*a( ( j-1 )*lda+
     $                                 irow1 ) + conjg( t3 )*
     $                                 a( ( j-1 )*lda+irow1+1 )
                                 work( irbuf+2*( j-icol1 )+2 )
     $                              = work( irbuf+2*( j-icol1 )+2 ) -
     $                              sum
                                 a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*
     $                              lda+irow1 ) - sum*v2
                                 a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*
     $                              lda+irow1+1 ) - sum*v3
  240                         CONTINUE
                              CALL cgesd2d( contxt, 2, icol2-icol1+1,
     $                                      work( irbuf+1 ), 2, up,
     $                                      mycol )
                           END IF
                        END IF
                     END IF
  250             CONTINUE
               END IF
  260       CONTINUE
*
            DO 280 ki = 1, ibulge
               IF( krow( ki ).GT.kp2row( ki ) )
     $            GO TO 280
               IF( ( myrow.NE.icurrow( ki ) ) .AND.
     $             ( down.NE.icurrow( ki ) ) )GO TO 280
               istart = max( k1( ki ), m )
               istop = min( k2( ki ), i-1 )
               IF( ( istart.EQ.istop ) .OR.
     $             ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.
     $             ( icurrow( ki ).NE.myrow ) ) THEN
                  DO 270 k = istart, istop
                     v2 = work( vecsidx+( k-1 )*3+1 )
                     v3 = work( vecsidx+( k-1 )*3+2 )
                     t1 = work( vecsidx+( k-1 )*3+3 )
                     nr = min( 3, i-k+1 )
                     IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.
     $                   kp2row( ki ) ) ) THEN
                        IF( ( k.LT.istop ) .AND.
     $                      ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
                           itmp1 = min( k2( ki )+1, i-1 ) + 1
                        ELSE
                           IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
                              itmp1 = min( k2( ki )+1, i-1 ) + 1
                           END IF
                           IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
                              itmp1 = min( k+4, i2 ) + 1
                           END IF
                           IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
                              itmp1 = min( k+3, i2 ) + 1
                           END IF
                        END IF
*
*                    Find local coor of rows K through K+2
*
                        irow1 = krow( ki )
                        irow2 = kp2row( ki )
                        IF( ( k.GT.istart ) .AND.
     $                      ( mod( k-1, hbl ).GE.hbl-2 ) ) THEN
                           IF( down.EQ.icurrow( ki ) ) THEN
                              irow1 = irow1 + 1
                           END IF
                           IF( myrow.EQ.icurrow( ki ) ) THEN
                              irow2 = irow2 + 1
                           END IF
                        END IF
                        CALL infog1l( itmp1, hbl, npcol, mycol, jafirst,
     $                                icol1, icol2 )
                        icol2 = locali2
                        IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
     $                      ( nprow.GT.1 ) ) THEN
                           IF( irow1.NE.irow2 ) THEN
                              IF( istart.EQ.istop ) THEN
                                 CALL cgerv2d( contxt, 2, icol2-icol1+1,
     $                                         a( ( icol1-1 )*lda+
     $                                         irow1 ), lda, down,
     $                                         mycol )
                              END IF
                           END IF
                        END IF
                        IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
     $                      ( nprow.GT.1 ) ) THEN
                           IF( irow1.EQ.irow2 ) THEN
                              CALL cgerv2d( contxt, 2, icol2-icol1+1,
     $                                      a( ( icol1-1 )*lda+irow1-
     $                                      1 ), lda, down, mycol )
                           END IF
                        END IF
                     END IF
  270             CONTINUE
               END IF
  280       CONTINUE
*
  290       CONTINUE
*
*           Now start major set of block COL reflections
*
            DO 300 ki = 1, ibulge
               IF( ( mycol.NE.icurcol( ki ) ) .AND.
     $             ( right.NE.icurcol( ki ) ) )GO TO 300
               istart = max( k1( ki ), m )
               istop = min( k2( ki ), i-1 )
*
               IF( ( ( mod( istart-1, hbl ).LT.hbl-2 ) .OR. ( npcol.EQ.
     $             1 ) ) .AND. ( icurcol( ki ).EQ.mycol ) .AND.
     $             ( i-istop+1.GE.3 ) ) THEN
                  k = istart
                  IF( ( k.LT.istop ) .AND. ( mod( k-1,
     $                hbl ).LT.hbl-2 ) ) THEN
                     itmp1 = min( istart+1, i ) - 1
                  ELSE
                     IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
                        itmp1 = min( k+3, i )
                     END IF
                     IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
                        itmp1 = max( i1, k-1 ) - 1
                     END IF
                     IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
                        itmp1 = max( i1, k-2 ) - 1
                     END IF
                  END IF
*
                  icol1 = kcol( ki )
                  CALL infog1l( i1, hbl, nprow, myrow, iafirst, irow1,
     $                          irow2 )
                  irow2 = numroc( itmp1, hbl, myrow, iafirst, nprow )
                  IF( irow1.LE.irow2 ) THEN
                     itmp2 = irow2
                  ELSE
                     itmp2 = -1
                  END IF
                  CALL claref( 'Col', a, lda, wantz, z, ldz, .true.,
     $                         icol1, icol1, istart, istop, irow1,
     $                         irow2, liloz, lihiz, work( vecsidx+1 ),
     $                         v2, v3, t1, t2, t3 )
                  k = istop
                  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
*
*                 Do from ITMP1+1 to MIN(K+3,I)
*
                     IF( mod( k-1, hbl ).LT.hbl-3 ) THEN
                        irow1 = itmp2 + 1
                        IF( mod( ( itmp1 / hbl ), nprow ).EQ.myrow )
     $                       THEN
                           IF( itmp2.GT.0 ) THEN
                              irow2 = itmp2 + min( k+3, i ) - itmp1
                           ELSE
                              irow2 = irow1 - 1
                           END IF
                        ELSE
                           irow2 = irow1 - 1
                        END IF
                     ELSE
                        CALL infog1l( itmp1+1, hbl, nprow, myrow,
     $                                iafirst, irow1, irow2 )
                        irow2 = numroc( min( k+3, i ), hbl, myrow,
     $                          iafirst, nprow )
                     END IF
                     v2 = work( vecsidx+( k-1 )*3+1 )
                     v3 = work( vecsidx+( k-1 )*3+2 )
                     t1 = work( vecsidx+( k-1 )*3+3 )
                     t2 = t1*v2
                     t3 = t1*v3
                     icol1 = kcol( ki ) + istop - istart
                     CALL claref( 'Col', a, lda, .false., z, ldz,
     $                            .false., icol1, icol1, istart, istop,
     $                            irow1, irow2, liloz, lihiz,
     $                            work( vecsidx+1 ), v2, v3, t1, t2,
     $                            t3 )
                  END IF
               END IF
  300       CONTINUE
*
            DO 360 ki = 1, ibulge
               IF( kcol( ki ).GT.kp2col( ki ) )
     $            GO TO 360
               IF( ( mycol.NE.icurcol( ki ) ) .AND.
     $             ( right.NE.icurcol( ki ) ) )GO TO 360
               istart = max( k1( ki ), m )
               istop = min( k2( ki ), i-1 )
               IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN
*
*              INFO is found in a buffer
*
                  ispec = 1
               ELSE
*
*              All INFO is local
*
                  ispec = 0
               END IF
               DO 350 k = istart, istop
*
                  v2 = work( vecsidx+( k-1 )*3+1 )
                  v3 = work( vecsidx+( k-1 )*3+2 )
                  t1 = work( vecsidx+( k-1 )*3+3 )
                  nr = min( 3, i-k+1 )
                  IF( ( nr.EQ.3 ) .AND. ( kcol( ki ).LE.kp2col( ki ) ) )
     $                 THEN
*
                     IF( ( k.LT.istop ) .AND.
     $                   ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
                        itmp1 = min( istart+1, i ) - 1
                     ELSE
                        IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
                           itmp1 = min( k+3, i )
                        END IF
                        IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
                           itmp1 = max( i1, k-1 ) - 1
                        END IF
                        IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
                           itmp1 = max( i1, k-2 ) - 1
                        END IF
                     END IF
                     IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
                        icol1 = kcol( ki ) + k - istart
                        icol2 = kp2col( ki ) + k - istart
                     ELSE
                        icol1 = kcol( ki )
                        icol2 = kp2col( ki )
                        IF( k.GT.istart ) THEN
                           IF( right.EQ.icurcol( ki ) ) THEN
                              icol1 = icol1 + 1
                           END IF
                           IF( mycol.EQ.icurcol( ki ) ) THEN
                              icol2 = icol2 + 1
                           END IF
                        END IF
                     END IF
                     CALL infog1l( i1, hbl, nprow, myrow, iafirst,
     $                             irow1, irow2 )
                     irow2 = numroc( itmp1, hbl, myrow, iafirst, nprow )
                     IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
     $                   ( npcol.GT.1 ) ) THEN
                        IF( icol1.NE.icol2 ) THEN
                           CALL cgesd2d( contxt, irow2-irow1+1, 2,
     $                                   a( ( icol1-1 )*lda+irow1 ),
     $                                   lda, myrow, right )
                           IF( ( istart.EQ.istop ) .AND. skip ) THEN
                              CALL cgerv2d( contxt, irow2-irow1+1, 2,
     $                                      a( ( icol1-1 )*lda+irow1 ),
     $                                      lda, myrow, right )
                           END IF
                        ELSE IF( skip ) THEN
                           t2 = t1*v2
                           t3 = t1*v3
                           CALL cgerv2d( contxt, irow2-irow1+1, 2,
     $                                   work( icbuf+1 ), irow2-irow1+1,
     $                                   myrow, left )
                           ii = icbuf - irow1 + 1
                           jj = icbuf + irow2 - 2*irow1 + 2
                           DO 310 j = irow1, irow2
                              sum = t1*work( ii+j ) + t2*work( jj+j ) +
     $                              t3*a( ( icol1-1 )*lda+j )
                              work( ii+j ) = work( ii+j ) - sum
                              work( jj+j ) = work( jj+j ) -
     $                                       sum*conjg( v2 )
                              a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*
     $                           lda+j ) - sum*conjg( v3 )
  310                      CONTINUE
                           IF( istart.EQ.istop ) THEN
                              CALL cgesd2d( contxt, irow2-irow1+1, 2,
     $                                      work( icbuf+1 ),
     $                                      irow2-irow1+1, myrow, left )
                           END IF
                        END IF
                     END IF
                     IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
     $                   ( npcol.GT.1 ) ) THEN
                        IF( icol1.EQ.icol2 ) THEN
                           IF( istart.EQ.istop ) THEN
                              CALL cgesd2d( contxt, irow2-irow1+1, 2,
     $                                      a( ( icol1-2 )*lda+irow1 ),
     $                                      lda, myrow, right )
                           END IF
                           IF( skip ) THEN
                              CALL cgerv2d( contxt, irow2-irow1+1, 2,
     $                                      a( ( icol1-2 )*lda+irow1 ),
     $                                      lda, myrow, right )
                           END IF
                        ELSE IF( skip ) THEN
                           IF( istart.EQ.istop ) THEN
                              CALL cgerv2d( contxt, irow2-irow1+1, 2,
     $                                      work( icbuf+1 ),
     $                                      irow2-irow1+1, myrow, left )
                           END IF
                           t2 = t1*v2
                           t3 = t1*v3
                           ii = icbuf + irow2 - 2*irow1 + 2
                           DO 320 j = irow1, irow2
                              sum = t1*work( j+ii ) +
     $                              t2*a( ( icol1-1 )*lda+j ) +
     $                              t3*a( icol1*lda+j )
                              work( j+ii ) = work( j+ii ) - sum
                              a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*
     $                           lda+j ) - sum*conjg( v2 )
                              a( icol1*lda+j ) = a( icol1*lda+j ) -
     $                                           sum*conjg( v3 )
  320                      CONTINUE
                           CALL cgesd2d( contxt, irow2-irow1+1, 2,
     $                                   work( icbuf+1 ), irow2-irow1+1,
     $                                   myrow, left )
                        END IF
                     END IF
*
*                    If we want Z and we haven't already done any Z
*
                     IF( ( wantz ) .AND. ( mod( k-1,
     $                   hbl ).GE.hbl-2 ) .AND. ( npcol.GT.1 ) ) THEN
*
*                       Accumulate transformations in the matrix Z
*
                        irow1 = liloz
                        irow2 = lihiz
                        IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
                           IF( icol1.NE.icol2 ) THEN
                              CALL cgesd2d( contxt, irow2-irow1+1, 2,
     $                                      z( ( icol1-1 )*ldz+irow1 ),
     $                                      ldz, myrow, right )
                              IF( ( istart.EQ.istop ) .AND. skip ) THEN
                                 CALL cgerv2d( contxt, irow2-irow1+1, 2,
     $                                         z( ( icol1-1 )*ldz+
     $                                         irow1 ), ldz, myrow,
     $                                         right )
                              END IF
                           ELSE IF( skip ) THEN
                              CALL cgerv2d( contxt, irow2-irow1+1, 2,
     $                                      work( izbuf+1 ),
     $                                      irow2-irow1+1, myrow, left )
                              t2 = t1*v2
                              t3 = t1*v3
                              icol1 = ( icol1-1 )*ldz
                              ii = izbuf - irow1 + 1
                              jj = izbuf + irow2 - 2*irow1 + 2
                              DO 330 j = irow1, irow2
                                 sum = t1*work( ii+j ) +
     $                                 t2*work( jj+j ) + t3*z( icol1+j )
                                 work( ii+j ) = work( ii+j ) - sum
                                 work( jj+j ) = work( jj+j ) -
     $                                          sum*conjg( v2 )
                                 z( icol1+j ) = z( icol1+j ) -
     $                                          sum*conjg( v3 )
  330                         CONTINUE
                              IF( istart.EQ.istop ) THEN
                                 CALL cgesd2d( contxt, irow2-irow1+1, 2,
     $                                         work( izbuf+1 ),
     $                                         irow2-irow1+1, myrow,
     $                                         left )
                              END IF
                           END IF
                        END IF
                        IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
                           IF( icol1.EQ.icol2 ) THEN
                              IF( istart.EQ.istop ) THEN
                                 CALL cgesd2d( contxt, irow2-irow1+1, 2,
     $                                         z( ( icol1-2 )*ldz+
     $                                         irow1 ), ldz, myrow,
     $                                         right )
                              END IF
                              IF( skip ) THEN
                                 CALL cgerv2d( contxt, irow2-irow1+1, 2,
     $                                         z( ( icol1-2 )*ldz+
     $                                         irow1 ), ldz, myrow,
     $                                         right )
                              END IF
                           ELSE IF( skip ) THEN
                              IF( istart.EQ.istop ) THEN
                                 CALL cgerv2d( contxt, irow2-irow1+1, 2,
     $                                         work( izbuf+1 ),
     $                                         irow2-irow1+1, myrow,
     $                                         left )
                              END IF
                              t2 = t1*v2
                              t3 = t1*v3
                              icol1 = ( icol1-1 )*ldz
                              ii = izbuf + irow2 - 2*irow1 + 2
                              DO 340 j = irow1, irow2
                                 sum = t1*work( ii+j ) +
     $                                 t2*z( j+icol1 ) +
     $                                 t3*z( j+icol1+ldz )
                                 work( ii+j ) = work( ii+j ) - sum
                                 z( j+icol1 ) = z( j+icol1 ) -
     $                                          sum*conjg( v2 )
                                 z( j+icol1+ldz ) = z( j+icol1+ldz ) -
     $                                              sum*conjg( v3 )
  340                         CONTINUE
                              CALL cgesd2d( contxt, irow2-irow1+1, 2,
     $                                      work( izbuf+1 ),
     $                                      irow2-irow1+1, myrow, left )
                           END IF
                        END IF
                     END IF
                  END IF
  350          CONTINUE
  360       CONTINUE
*
            IF( skip )
     $         GO TO 450
*
            DO 420 ki = 1, ibulge
               IF( kcol( ki ).GT.kp2col( ki ) )
     $            GO TO 420
               IF( ( mycol.NE.icurcol( ki ) ) .AND.
     $             ( right.NE.icurcol( ki ) ) )GO TO 420
               istart = max( k1( ki ), m )
               istop = min( k2( ki ), i-1 )
               IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN
*
*                 INFO is found in a buffer
*
                  ispec = 1
               ELSE
*
*                 All INFO is local
*
                  ispec = 0
               END IF
               DO 410 k = istart, istop
*
                  v2 = work( vecsidx+( k-1 )*3+1 )
                  v3 = work( vecsidx+( k-1 )*3+2 )
                  t1 = work( vecsidx+( k-1 )*3+3 )
                  nr = min( 3, i-k+1 )
                  IF( ( nr.EQ.3 ) .AND. ( kcol( ki ).LE.kp2col( ki ) ) )
     $                 THEN
*
                     IF( ( k.LT.istop ) .AND.
     $                   ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
                        itmp1 = min( istart+1, i ) - 1
                     ELSE
                        IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
                           itmp1 = min( k+3, i )
                        END IF
                        IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
                           itmp1 = max( i1, k-1 ) - 1
                        END IF
                        IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
                           itmp1 = max( i1, k-2 ) - 1
                        END IF
                     END IF
                     IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
                        icol1 = kcol( ki ) + k - istart
                        icol2 = kp2col( ki ) + k - istart
                     ELSE
                        icol1 = kcol( ki )
                        icol2 = kp2col( ki )
                        IF( k.GT.istart ) THEN
                           IF( right.EQ.icurcol( ki ) ) THEN
                              icol1 = icol1 + 1
                           END IF
                           IF( mycol.EQ.icurcol( ki ) ) THEN
                              icol2 = icol2 + 1
                           END IF
                        END IF
                     END IF
                     CALL infog1l( i1, hbl, nprow, myrow, iafirst,
     $                             irow1, irow2 )
                     irow2 = numroc( itmp1, hbl, myrow, iafirst, nprow )
                     IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
     $                   ( npcol.GT.1 ) ) THEN
                        IF( icol1.EQ.icol2 ) THEN
                           CALL cgerv2d( contxt, irow2-irow1+1, 2,
     $                                   work( icbuf+1 ), irow2-irow1+1,
     $                                   myrow, left )
                           t2 = t1*v2
                           t3 = t1*v3
                           ii = icbuf - irow1 + 1
                           jj = icbuf + irow2 - 2*irow1 + 2
                           DO 370 j = irow1, irow2
                              sum = t1*work( ii+j ) + t2*work( jj+j ) +
     $                              t3*a( ( icol1-1 )*lda+j )
                              work( ii+j ) = work( ii+j ) - sum
                              work( jj+j ) = work( jj+j ) -
     $                                       sum*conjg( v2 )
                              a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*
     $                           lda+j ) - sum*conjg( v3 )
  370                      CONTINUE
                           IF( istart.EQ.istop ) THEN
                              CALL cgesd2d( contxt, irow2-irow1+1, 2,
     $                                      work( icbuf+1 ),
     $                                      irow2-irow1+1, myrow, left )
                           END IF
                        END IF
                     END IF
                     IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
     $                   ( npcol.GT.1 ) ) THEN
                        IF( icol1.NE.icol2 ) THEN
                           IF( istart.EQ.istop ) THEN
                              CALL cgerv2d( contxt, irow2-irow1+1, 2,
     $                                      work( icbuf+1 ),
     $                                      irow2-irow1+1, myrow, left )
                           END IF
                           t2 = t1*v2
                           t3 = t1*v3
                           ii = icbuf + irow2 - 2*irow1 + 2
                           DO 380 j = irow1, irow2
                              sum = t1*work( j+ii ) +
     $                              t2*a( ( icol1-1 )*lda+j ) +
     $                              t3*a( icol1*lda+j )
                              work( j+ii ) = work( j+ii ) - sum
                              a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*
     $                           lda+j ) - sum*conjg( v2 )
                              a( icol1*lda+j ) = a( icol1*lda+j ) -
     $                                           sum*conjg( v3 )
  380                      CONTINUE
                           CALL cgesd2d( contxt, irow2-irow1+1, 2,
     $                                   work( icbuf+1 ), irow2-irow1+1,
     $                                   myrow, left )
                        END IF
                     END IF
*
*
*                 If we want Z and we haven't already done any Z
                     IF( ( wantz ) .AND. ( mod( k-1,
     $                   hbl ).GE.hbl-2 ) .AND. ( npcol.GT.1 ) ) THEN
*
*                    Accumulate transformations in the matrix Z
*
                        irow1 = liloz
                        irow2 = lihiz
                        IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
                           IF( icol1.EQ.icol2 ) THEN
                              CALL cgerv2d( contxt, irow2-irow1+1, 2,
     $                                      work( izbuf+1 ),
     $                                      irow2-irow1+1, myrow, left )
                              t2 = t1*v2
                              t3 = t1*v3
                              icol1 = ( icol1-1 )*ldz
                              ii = izbuf - irow1 + 1
                              jj = izbuf + irow2 - 2*irow1 + 2
                              DO 390 j = irow1, irow2
                                 sum = t1*work( ii+j ) +
     $                                 t2*work( jj+j ) + t3*z( icol1+j )
                                 work( ii+j ) = work( ii+j ) - sum
                                 work( jj+j ) = work( jj+j ) -
     $                                          sum*conjg( v2 )
                                 z( icol1+j ) = z( icol1+j ) -
     $                                          sum*conjg( v3 )
  390                         CONTINUE
                              IF( istart.EQ.istop ) THEN
                                 CALL cgesd2d( contxt, irow2-irow1+1, 2,
     $                                         work( izbuf+1 ),
     $                                         irow2-irow1+1, myrow,
     $                                         left )
                              END IF
                           END IF
                        END IF
                        IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
                           IF( icol1.NE.icol2 ) THEN
                              IF( istart.EQ.istop ) THEN
                                 CALL cgerv2d( contxt, irow2-irow1+1, 2,
     $                                         work( izbuf+1 ),
     $                                         irow2-irow1+1, myrow,
     $                                         left )
                              END IF
                              t2 = t1*v2
                              t3 = t1*v3
                              icol1 = ( icol1-1 )*ldz
                              ii = izbuf + irow2 - 2*irow1 + 2
                              DO 400 j = irow1, irow2
                                 sum = t1*work( ii+j ) +
     $                                 t2*z( j+icol1 ) +
     $                                 t3*z( j+icol1+ldz )
                                 work( ii+j ) = work( ii+j ) - sum
                                 z( j+icol1 ) = z( j+icol1 ) -
     $                                          sum*conjg( v2 )
                                 z( j+icol1+ldz ) = z( j+icol1+ldz ) -
     $                                              sum*conjg( v3 )
  400                         CONTINUE
                              CALL cgesd2d( contxt, irow2-irow1+1, 2,
     $                                      work( izbuf+1 ),
     $                                      irow2-irow1+1, myrow, left )
                           END IF
                        END IF
                     END IF
                  END IF
  410          CONTINUE
  420       CONTINUE
*
            DO 440 ki = 1, ibulge
               IF( kcol( ki ).GT.kp2col( ki ) )
     $            GO TO 440
               IF( ( mycol.NE.icurcol( ki ) ) .AND.
     $             ( right.NE.icurcol( ki ) ) )GO TO 440
               istart = max( k1( ki ), m )
               istop = min( k2( ki ), i-1 )
               IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN
*
*              INFO is found in a buffer
*
                  ispec = 1
               ELSE
*
*              All INFO is local
*
                  ispec = 0
               END IF
               DO 430 k = istart, istop
*
                  v2 = work( vecsidx+( k-1 )*3+1 )
                  v3 = work( vecsidx+( k-1 )*3+2 )
                  t1 = work( vecsidx+( k-1 )*3+3 )
                  nr = min( 3, i-k+1 )
                  IF( ( nr.EQ.3 ) .AND. ( kcol( ki ).LE.kp2col( ki ) ) )
     $                 THEN
*
                     IF( ( k.LT.istop ) .AND.
     $                   ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN
                        itmp1 = min( istart+1, i ) - 1
                     ELSE
                        IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
                           itmp1 = min( k+3, i )
                        END IF
                        IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
                           itmp1 = max( i1, k-1 ) - 1
                        END IF
                        IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
                           itmp1 = max( i1, k-2 ) - 1
                        END IF
                     END IF
                     IF( mod( k-1, hbl ).LT.hbl-2 ) THEN
                        icol1 = kcol( ki ) + k - istart
                        icol2 = kp2col( ki ) + k - istart
                     ELSE
                        icol1 = kcol( ki )
                        icol2 = kp2col( ki )
                        IF( k.GT.istart ) THEN
                           IF( right.EQ.icurcol( ki ) ) THEN
                              icol1 = icol1 + 1
                           END IF
                           IF( mycol.EQ.icurcol( ki ) ) THEN
                              icol2 = icol2 + 1
                           END IF
                        END IF
                     END IF
                     CALL infog1l( i1, hbl, nprow, myrow, iafirst,
     $                             irow1, irow2 )
                     irow2 = numroc( itmp1, hbl, myrow, iafirst, nprow )
                     IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.
     $                   ( npcol.GT.1 ) ) THEN
                        IF( icol1.NE.icol2 ) THEN
                           IF( istart.EQ.istop ) THEN
                              CALL cgerv2d( contxt, irow2-irow1+1, 2,
     $                                      a( ( icol1-1 )*lda+irow1 ),
     $                                      lda, myrow, right )
                           END IF
                        END IF
                     END IF
                     IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
     $                   ( npcol.GT.1 ) ) THEN
                        IF( icol1.EQ.icol2 ) THEN
                           CALL cgerv2d( contxt, irow2-irow1+1, 2,
     $                                   a( ( icol1-2 )*lda+irow1 ),
     $                                   lda, myrow, right )
                        END IF
                     END IF
*
*                    If we want Z and we haven't already done any Z
*
                     IF( ( wantz ) .AND. ( mod( k-1,
     $                   hbl ).GE.hbl-2 ) .AND. ( npcol.GT.1 ) ) THEN
*
*                       Accumulate transformations in the matrix Z
*
                        irow1 = liloz
                        irow2 = lihiz
                        IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN
                           IF( icol1.NE.icol2 ) THEN
                              IF( istart.EQ.istop ) THEN
                                 CALL cgerv2d( contxt, irow2-irow1+1, 2,
     $                                         z( ( icol1-1 )*ldz+
     $                                         irow1 ), ldz, myrow,
     $                                         right )
                              END IF
                           END IF
                        END IF
                        IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
                           IF( icol1.EQ.icol2 ) THEN
                              CALL cgerv2d( contxt, irow2-irow1+1, 2,
     $                                      z( ( icol1-2 )*ldz+irow1 ),
     $                                      ldz, myrow, right )
                           END IF
                        END IF
                     END IF
                  END IF
  430          CONTINUE
  440       CONTINUE
*
*           Column work done
*
  450       CONTINUE
*
*           Now do NR=2 work
*
            DO 530 ki = 1, ibulge
               istart = max( k1( ki ), m )
               istop = min( k2( ki ), i-1 )
               IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN
*
*                 INFO is found in a buffer
*
                  ispec = 1
               ELSE
*
*                 All INFO is local
*
                  ispec = 0
               END IF
*
               DO 520 k = istart, istop
*
                  v2 = work( vecsidx+( k-1 )*3+1 )
                  v3 = work( vecsidx+( k-1 )*3+2 )
                  t1 = work( vecsidx+( k-1 )*3+3 )
                  nr = min( 3, i-k+1 )
                  IF( nr.EQ.2 ) THEN
                     IF ( icurrow( ki ).EQ.myrow ) THEN
                        t2 = t1*v2
                     END IF
                     IF ( icurcol( ki ).EQ.mycol ) THEN
                        t2 = t1*v2
                     END IF
*
*              Apply G from the left to transform the rows of the matrix
*              in columns K to I2.
*
                     CALL infog1l( k, hbl, npcol, mycol, jafirst, liloh,
     $                             lihih )
                     lihih = locali2
                     CALL infog1l( 1, hbl, nprow, myrow, iafirst, itmp2,
     $                             itmp1 )
                     itmp1 = numroc( k+1, hbl, myrow, iafirst, nprow )
                     IF( icurrow( ki ).EQ.myrow ) THEN
                        IF( ( ispec.EQ.0 ) .OR. ( nprow.EQ.1 ) .OR.
     $                      ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN
                           itmp1 = itmp1 - 1
                           DO 460 j = ( liloh-1 )*lda,
     $                             ( lihih-1 )*lda, lda
                              sum = conjg( t1 )*a( itmp1+j ) +
     $                              conjg( t2 )*a( itmp1+1+j )
                              a( itmp1+j ) = a( itmp1+j ) - sum
                              a( itmp1+1+j ) = a( itmp1+1+j ) - sum*v2
  460                      CONTINUE
                        ELSE
                           IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
                              CALL cgerv2d( contxt, 1, lihih-liloh+1,
     $                                      work( irbuf+1 ), 1, up,
     $                                      mycol )
                              DO 470 j = liloh, lihih
                                 sum = conjg( t1 )*
     $                                 work( irbuf+j-liloh+1 ) +
     $                                 conjg( t2 )*a( ( j-1 )*lda+
     $                                 itmp1 )
                                 work( irbuf+j-liloh+1 ) = work( irbuf+
     $                              j-liloh+1 ) - sum
                                 a( ( j-1 )*lda+itmp1 ) = a( ( j-1 )*
     $                              lda+itmp1 ) - sum*v2
  470                         CONTINUE
                              CALL cgesd2d( contxt, 1, lihih-liloh+1,
     $                                      work( irbuf+1 ), 1, up,
     $                                      mycol )
                           END IF
                        END IF
                     ELSE
                        IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
     $                      ( icurrow( ki ).EQ.down ) ) THEN
                           CALL cgesd2d( contxt, 1, lihih-liloh+1,
     $                                   a( ( liloh-1 )*lda+itmp1 ),
     $                                   lda, down, mycol )
                           CALL cgerv2d( contxt, 1, lihih-liloh+1,
     $                                   a( ( liloh-1 )*lda+itmp1 ),
     $                                   lda, down, mycol )
                        END IF
                     END IF
*
*              Apply G from the right to transform the columns of the
*              matrix in rows I1 to MIN(K+3,I).
*
                     CALL infog1l( i1, hbl, nprow, myrow, iafirst,
     $                             liloh, lihih )
                     lihih = numroc( i, hbl, myrow, iafirst, nprow )
*
                     IF( icurcol( ki ).EQ.mycol ) THEN
*                       LOCAL A(LILOZ:LIHIZ,KCOL:KCOL+2)
                        IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) .OR.
     $                      ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN
                           CALL infog1l( k, hbl, npcol, mycol, jafirst,
     $                                   itmp1, itmp2 )
                           itmp2 = numroc( k+1, hbl, mycol, jafirst,
     $                             npcol )
                           DO 480 j = liloh, lihih
                              sum = t1*a( ( itmp1-1 )*lda+j ) +
     $                              t2*a( itmp1*lda+j )
                              a( ( itmp1-1 )*lda+j ) = a( ( itmp1-1 )*
     $                           lda+j ) - sum
                              a( itmp1*lda+j ) = a( itmp1*lda+j ) -
     $                                           sum*conjg( v2 )
  480                      CONTINUE
                        ELSE
                           itmp1 = kcol( ki )
                           IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
                              CALL cgerv2d( contxt, lihih-liloh+1, 1,
     $                                      work( icbuf+1 ),
     $                                      lihih-liloh+1, myrow, left )
                              DO 490 j = liloh, lihih
                                 sum = t1*work( icbuf+j ) +
     $                                 t2*a( ( itmp1-1 )*lda+j )
                                 work( icbuf+j ) = work( icbuf+j ) - sum
                                 a( ( itmp1-1 )*lda+j )
     $                              = a( ( itmp1-1 )*lda+j ) -
     $                              sum*conjg( v2 )
  490                         CONTINUE
                              CALL cgesd2d( contxt, lihih-liloh+1, 1,
     $                                      work( icbuf+1 ),
     $                                      lihih-liloh+1, myrow, left )
                           END IF
                        END IF
                     ELSE
                        IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
     $                      ( icurcol( ki ).EQ.right ) ) THEN
                           itmp1 = kcol( ki )
                           CALL cgesd2d( contxt, lihih-liloh+1, 1,
     $                                   a( ( itmp1-1 )*lda+liloh ),
     $                                   lda, myrow, right )
                           CALL infog1l( k, hbl, npcol, mycol, jafirst,
     $                                   itmp1, itmp2 )
                           itmp2 = numroc( k+1, hbl, mycol, jafirst,
     $                             npcol )
                           CALL cgerv2d( contxt, lihih-liloh+1, 1,
     $                                   a( ( itmp1-1 )*lda+liloh ),
     $                                   lda, myrow, right )
                        END IF
                     END IF
*
                     IF( wantz ) THEN
*
*                       Accumulate transformations in the matrix Z
*
                        IF( icurcol( ki ).EQ.mycol ) THEN
*                          LOCAL Z(LILOZ:LIHIZ,KCOL:KCOL+2)
                           IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) .OR.
     $                         ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN
                              itmp1 = kcol( ki ) + k - istart
                              itmp1 = ( itmp1-1 )*ldz
                              DO 500 j = liloz, lihiz
                                 sum = t1*z( j+itmp1 ) +
     $                                 t2*z( j+itmp1+ldz )
                                 z( j+itmp1 ) = z( j+itmp1 ) - sum
                                 z( j+itmp1+ldz ) = z( j+itmp1+ldz ) -
     $                                              sum*conjg( v2 )
  500                         CONTINUE
                           ELSE
                              itmp1 = kcol( ki )
*                             IF WE ACTUALLY OWN COLUMN K
                              IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN
                                 CALL cgerv2d( contxt, lihiz-liloz+1, 1,
     $                                         work( izbuf+1 ), ldz,
     $                                         myrow, left )
                                 itmp1 = ( itmp1-1 )*ldz
                                 DO 510 j = liloz, lihiz
                                    sum = t1*work( izbuf+j ) +
     $                                    t2*z( j+itmp1 )
                                    work( izbuf+j ) = work( izbuf+j ) -
     $                                 sum
                                    z( j+itmp1 ) = z( j+itmp1 ) -
     $                                             sum*conjg( v2 )
  510                            CONTINUE
                                 CALL cgesd2d( contxt, lihiz-liloz+1, 1,
     $                                         work( izbuf+1 ), ldz,
     $                                         myrow, left )
                              END IF
                           END IF
                        ELSE
*
*                          NO WORK BUT NEED TO UPDATE ANYWAY????
*
                           IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.
     $                         ( icurcol( ki ).EQ.right ) ) THEN
                              itmp1 = kcol( ki )
                              itmp1 = ( itmp1-1 )*ldz
                              CALL cgesd2d( contxt, lihiz-liloz+1, 1,
     $                                      z( liloz+itmp1 ), ldz,
     $                                      myrow, right )
                              CALL cgerv2d( contxt, lihiz-liloz+1, 1,
     $                                      z( liloz+itmp1 ), ldz,
     $                                      myrow, right )
                           END IF
                        END IF
                     END IF
                  END IF
  520          CONTINUE
*
*        Adjust local information for this bulge
*
               IF( nprow.EQ.1 ) THEN
                  krow( ki ) = krow( ki ) + k2( ki ) - k1( ki ) + 1
                  kp2row( ki ) = kp2row( ki ) + k2( ki ) - k1( ki ) + 1
               END IF
               IF( ( mod( k1( ki )-1, hbl ).LT.hbl-2 ) .AND.
     $             ( icurrow( ki ).EQ.myrow ) .AND. ( nprow.GT.1 ) )
     $              THEN
                  krow( ki ) = krow( ki ) + k2( ki ) - k1( ki ) + 1
               END IF
               IF( ( mod( k2( ki ), hbl ).LT.hbl-2 ) .AND.
     $             ( icurrow( ki ).EQ.myrow ) .AND. ( nprow.GT.1 ) )
     $              THEN
                  kp2row( ki ) = kp2row( ki ) + k2( ki ) - k1( ki ) + 1
               END IF
               IF( ( mod( k1( ki )-1, hbl ).GE.hbl-2 ) .AND.
     $             ( ( myrow.EQ.icurrow( ki ) ) .OR. ( down.EQ.
     $             icurrow( ki ) ) ) .AND. ( nprow.GT.1 ) ) THEN
                  CALL infog1l( k2( ki )+1, hbl, nprow, myrow, iafirst,
     $                          krow( ki ), itmp2 )
               END IF
               IF( ( mod( k2( ki ), hbl ).GE.hbl-2 ) .AND.
     $             ( ( myrow.EQ.icurrow( ki ) ) .OR. ( up.EQ.
     $             icurrow( ki ) ) ) .AND. ( nprow.GT.1 ) ) THEN
                  kp2row( ki ) = numroc( k2( ki )+3, hbl, myrow,
     $                           iafirst, nprow )
               END IF
               IF( npcol.EQ.1 ) THEN
                  kcol( ki ) = kcol( ki ) + k2( ki ) - k1( ki ) + 1
                  kp2col( ki ) = kp2col( ki ) + k2( ki ) - k1( ki ) + 1
               END IF
               IF( ( mod( k1( ki )-1, hbl ).LT.hbl-2 ) .AND.
     $             ( icurcol( ki ).EQ.mycol ) .AND. ( npcol.GT.1 ) )
     $              THEN
                  kcol( ki ) = kcol( ki ) + k2( ki ) - k1( ki ) + 1
               END IF
               IF( ( mod( k2( ki ), hbl ).LT.hbl-2 ) .AND.
     $             ( icurcol( ki ).EQ.mycol ) .AND. ( npcol.GT.1 ) )
     $              THEN
                  kp2col( ki ) = kp2col( ki ) + k2( ki ) - k1( ki ) + 1
               END IF
               IF( ( mod( k1( ki )-1, hbl ).GE.hbl-2 ) .AND.
     $             ( ( mycol.EQ.icurcol( ki ) ) .OR. ( right.EQ.
     $             icurcol( ki ) ) ) .AND. ( npcol.GT.1 ) ) THEN
                  CALL infog1l( k2( ki )+1, hbl, npcol, mycol, jafirst,
     $                          kcol( ki ), itmp2 )
               END IF
               IF( ( mod( k2( ki ), hbl ).GE.hbl-2 ) .AND.
     $             ( ( mycol.EQ.icurcol( ki ) ) .OR. ( left.EQ.
     $             icurcol( ki ) ) ) .AND. ( npcol.GT.1 ) ) THEN
                  kp2col( ki ) = numroc( k2( ki )+3, hbl, mycol,
     $                           jafirst, npcol )
               END IF
               k1( ki ) = k2( ki ) + 1
               istop = min( k1( ki )+rotn-mod( k1( ki ), rotn ), i-2 )
               istop = min( istop, k1( ki )+hbl-3-
     $                 mod( k1( ki )-1, hbl ) )
               istop = min( istop, i2-2 )
               istop = max( istop, k1( ki ) )
               IF( ( mod( k1( ki )-1, hbl ).EQ.hbl-2 ) .AND.
     $             ( istop.LT.min( i-2, i2-2 ) ) ) THEN
                  istop = istop + 1
               END IF
               k2( ki ) = istop
               IF( k1( ki ).LE.istop ) THEN
                  IF( ( mod( k1( ki )-1, hbl ).EQ.hbl-2 ) .AND.
     $                ( i-k1( ki ).GT.1 ) ) THEN
*
*                    Next step switches rows & cols
*
                     icurrow( ki ) = mod( icurrow( ki )+1, nprow )
                     icurcol( ki ) = mod( icurcol( ki )+1, npcol )
                  END IF
               END IF
  530       CONTINUE
*
            IF( k2( ibulge ).LE.i-1 )
     $         GO TO 40
         END IF
*
  540 CONTINUE
*
*     Failure to converge in remaining number of iterations
*
      info = i
      RETURN
*
  550 CONTINUE
*
      IF( l.EQ.i ) THEN
*
*        H(I,I-1) is negligible: one eigenvalue has converged.
*
         CALL infog2l( i, i, desca, nprow, npcol, myrow, mycol, irow,
     $                 icol, itmp1, itmp2 )
         IF( ( myrow.EQ.itmp1 ) .AND. ( mycol.EQ.itmp2 ) ) THEN
            w( i ) = a( ( icol-1 )*lda+irow )
         ELSE
            w( i ) = zero
         END IF
      ELSE IF( l.EQ.i-1 ) THEN
*
*        H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
*
         CALL pclacp3( 2, i-1, a, desca, s1, 2*iblk, -1, -1, 0 )
         CALL clanv2( s1( 1, 1 ), s1( 1, 2 ), s1( 2, 1 ), s1( 2, 2 ),
     $                w( i-1 ), w( i ), cs, sn )
         CALL pclacp3( 2, i-1, a, desca, s1, 2*iblk, 0, 0, 1 )
*
         IF( node.NE.0 ) THEN
*           Erase the eigenvalues other eigenvalues
            w( i-1 ) = zero
            w( i ) = zero
         END IF
*
         IF( wantt ) THEN
*
*           Apply the transformation to A.
*
            IF( i2.GT.i ) THEN
               CALL pcrot( i2-i, a, i-1, i+1, desca, n, a, i, i+1,
     $                     desca, n, cs, sn )
            END IF
            CALL pcrot( i-i1-1, a, i1, i-1, desca, 1, a, i1, i, desca,
     $                  1, cs, conjg( sn ) )
         END IF
         IF( wantz ) THEN
*
*           Apply the transformation to Z.
*
            CALL pcrot( nz, z, iloz, i-1, descz, 1, z, iloz, i, descz,
     $                  1, cs, conjg( sn ) )
         END IF
*
      ELSE
*
*        Find the eigenvalues in H(L:I,L:I), L < I-1
*
         jblk = i - l + 1
         IF( jblk.LE.2*iblk ) THEN
            CALL pclacp3( i-l+1, l, a, desca, s1, 2*iblk, 0, 0, 0 )
            CALL clahqr2( .false., .false., jblk, 1, jblk, s1, 2*iblk,
     $                   w( l ), 1, jblk, z, ldz, ierr )
            IF( node.NE.0 ) THEN
*
*              Erase the eigenvalues
*
               DO 560 k = l, i
                  w( k ) = zero
  560          CONTINUE
            END IF
         END IF
      END IF
*
*     Decrement number of remaining iterations, and return to start of
*     the main loop with new value of I.
*
      itn = itn - its
      i = l - 1
      GO TO 10
*
  570 CONTINUE
      CALL cgsum2d( contxt, 'All', ' ', n, 1, w, n, -1, -1 )
      RETURN
*
*     END OF PCLAHQR
*
OpenRadioss 2025.1.11 OpenRadioss project
Functions/Subroutines

Function/Subroutine Documentation

◆ pclahqr()