159 $ IPIV2, WORK, LWORK, INFO )
169 INTEGER , LDA, LTB, LWORK, INFO
172 INTEGER IPIV( * ), IPIV2( * )
173 REAL A( LDA, * ), TB( * ), WORK( * )
179 parameter( zero = 0.0e+0, one = 1.0e+0 )
182 LOGICAL UPPER, TQUERY, WQUERY
183 INTEGER I, J, K, I1, I2,
184 INTEGER LDTB, NB, KB, JB, NT, IINFO
190EXTERNAL lsame, ilaenv
205 upper = lsame( uplo,
'U' )
206 wquery = ( lwork.EQ.-1 )
207 tquery = ( ltb.EQ.-1 )
208 IF( .NOT.upper .AND. .NOT.lsame( uplo,
'L' ) )
THEN
210 ELSE IF( n.LT.0 )
THEN
212 ELSE IF( lda.LT.
max( 1, n ) )
THEN
214 ELSE IF ( ltb .LT. 4*n .AND. .NOT.tquery )
THEN
216 ELSE IF ( lwork .LT. n .AND. .NOT.wquery )
THEN
221 CALL xerbla(
'SSYTRF_AA_2STAGE', -info )
227 nb = ilaenv( 1,
'SSYTRF_AA_2STAGE', uplo, n, -1, -1, -1 )
236 IF( tquery .OR. wquery )
THEN
249 IF( ldtb .LT. 3*nb+1 )
THEN
252 IF( lwork .LT. nb*n )
THEN
286 IF( i .EQ. (j-1) )
THEN
291 CALL sgemm(
'NoTranspose',
'NoTranspose',
293 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
294 $ a( (i-1)*nb+1, j*nb+1 ), lda,
295 $ zero, work( i*nb+1 ), n )
303 CALL sgemm(
'NoTranspose',
'NoTranspose',
305 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
307 $ a( (i-2)*nb+1, j*nb+1 ), lda,
308 $ zero, work( i*nb+1 ), n )
314 CALL slacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
315 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
318 CALL sgemm(
'Transpose',
'NoTranspose',
320 $ -one, a( 1, j*nb+1 ), lda,
322 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
324 CALL sgemm(
'Transpose',
'NoTranspose',
326 $ one, a( (j-1)*nb+1, j*nb+1 ), lda,
327 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
328 $ zero, work( 1 ), n )
329 CALL sgemm(
'NoTranspose',
'NoTranspose',
331 $ -one, work( 1 ), n,
332 $ a( (j-2)*nb+1, j*nb+1 ), lda,
333 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
336 CALL ssygst( 1,
'Upper', kb,
338 $ a( (j-1)*nb+1, j*nb+1 ), lda, iinfo )
345 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
346 $ = tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
356 CALL sgemm(
'NoTranspose',
'NoTranspose',
358 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
360 $ zero, work( j*nb+1 ), n )
362 CALL sgemm(
'NoTranspose',
'NoTranspose',
364 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
366 $ a( (j-2)*nb+1, j*nb+1 ), lda,
367 $ zero, work( j*nb+1 ), n )
372 CALL sgemm(
'Transpose',
'NoTranspose',
373 $ nb, n-(j+1)*nb, j*nb,
374 $ -one, work( nb+1 ), n,
376 $ one, a( j*nb+1, (j+1)*nb+1 ), lda )
382 CALL scopy( n-(j+1)*nb,
383 $ a( j*nb+k, (j+1)*nb+1 ), lda,
384 $ work( 1+(k-1)*n ), 1 )
389 CALL sgetrf( n-(j+1)*nb, nb,
391 $ ipiv( (j+1)*nb+1 ), iinfo )
399 CALL scopy( n-(j+1)*nb,
400 $ work( 1+(k-1)*n ), 1,
401 $ a( j*nb+k, (j+1)*nb+1 ), lda )
406 kb =
min(nb, n-(j+1)*nb)
407 CALL slaset(
'Full', kb, nb, zero, zero,
408 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
409 CALL slacpy(
'Upper', kb, nb,
411 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
413 CALL strsm(
'R',
'U',
'N',
'U', kb, nb, one,
414 $ a( (j-1)*nb+1, j*nb+1 ), lda,
415 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
423 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
424 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
427 CALL slaset( 'lower
', KB, NB, ZERO, ONE,
428 $ A( J*NB+1, (J+1)*NB+1), LDA )
434 IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB
437 I2 = IPIV( (J+1)*NB+K )
440 CALL SSWAP( K-1, A( (J+1)*NB+1, I1 ), 1,
441 $ A( (J+1)*NB+1, I2 ), 1 )
444 $ CALL SSWAP( I2-I1-1, A( I1, I1+1 ), LDA,
448 $ CALL SSWAP( N-I2, A( I1, I2+1 ), LDA,
449 $ A( I2, I2+1 ), LDA )
452 A( I1, I1 ) = A( I2, I2 )
456 CALL SSWAP( J*NB, A( 1, I1 ), 1,
477.EQ.
IF( I (J-1) ) THEN
482 CALL SGEMM( 'notranspose
', 'transpose
',
484 $ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
485 $ A( J*NB+1, (I-1)*NB+1 ), LDA,
486 $ ZERO, WORK( I*NB+1 ), N )
494 CALL SGEMM( 'notranspose
', 'transpose
',
496 $ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
498 $ A( J*NB+1, (I-2)*NB+1 ), LDA,
499 $ ZERO, WORK( I*NB+1 ), N )
505 CALL SLACPY( 'lower
', KB, KB, A( J*NB+1, J*NB+1 ), LDA,
506 $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 )
509 CALL SGEMM( 'notranspose',
'NoTranspose',
511 $ -one, a( j*nb+1, 1 ), lda,
513 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
515 CALL sgemm(
'NoTranspose',
'NoTranspose',
517 $ one, a( j*nb+1, (j-1)*nb+1 ), lda,
518 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
519 $ zero, work( 1 ), n )
520 CALL sgemm(
'NoTranspose',
'Transpose'
522 $ -one, work( 1 ), n,
524 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
527 CALL ssygst( 1,
'Lower', kb,
528 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
529 $ a( j*nb+1, (j-1)*nb+1 ), lda, iinfo )
536 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
537 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
547 CALL sgemm(
'NoTranspose',
'Transpose',
549 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
550 $ a( j*nb+1, (j-1)*nb+1 ), lda,
551 $ zero, work( j*nb+1 ), n )
553 CALL sgemm(
'NoTranspose',
'Transpose',
555 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
557 $ a( j*nb+1, (j-2)*nb+1 ), lda,
558 $ zero, work( j*nb+1 ), n )
563 CALL sgemm(
'NoTranspose',
'NoTranspose',
564 $ n-(j+1)*nb, nb, j*nb,
565 $ -one, a( (j+1)*nb+1, 1 ), lda,
567 $ one, a( (j+1)*nb+1, j*nb+1 ), lda )
572 CALL sgetrf( n-(j+1)*nb, nb,
573 $ a( (j+1)*nb+1, j*nb+1 ), lda,
574 $ ipiv( (j+1)*nb+1 ), iinfo )
581 kb =
min(nb, n-(j+1)*nb)
582 CALL slaset(
'Full', kb, nb, zero, zero,
583 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
584 CALL slacpy(
'Upper', kb, nb,
585 $ a( (j+1)*nb+1, j*nb+1 ), lda,
586 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
588 CALL strsm(
'R',
'L',
'T',
'U', kb, nb, one,
589 $ a( j*nb+1, (j-1)*nb+1 ), lda,
590 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
598 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb ) =
599 $ tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
602 CALL slaset(
'Upper', kb, nb, zero, one,
603 $ a( (j+1)*nb+1, j*nb+1), lda )
609 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
612 i2 = ipiv( (j+1)*nb+k )
615 CALL sswap( k-1, a( i1, (j+1)*nb+1 ), lda,
616 $ a( i2, (j+1)*nb+1 ), lda )
619 $
CALL sswap( i2-i1-1, a( i1+1, i1 ), 1,
620 $ a( i2, i1+1 ), lda )
623 $
CALL sswap( n-i2, a( i2+1, i1 ), 1,
627 a( i1, i1 ) = a( i2, i2 )
631 CALL sswap( j*nb, a( i1, 1 ), lda,
646 CALL sgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )