64 1 NDDL,X,R,DIAG_K,DIAG_M,
65 2 NNZ,LT_K,LT_K0,LT_M,LT_M0,
66 3 IADK,JDIK,IADK0,JDIK0,IADM,
67 4 JDIM,IADM0,JDIM0,NNZM,TOLS,
68 5 NLIM,itol,EPS_M,IPREC)
72#include "implicit_f.inc"
77 . x(*), r(*), tols, eps_m,
78 . diag_k(*), diag_m(*),
79 . lt_k(*), lt_k0(*), lt_m(*), lt_m0(*)
80 INTEGER , NNZ, NNZM, NLIM, ITOL, IPREC,
81 . iadk(*), jdik(*), iadk0(*), jdik0(*),
82 . iadm(*), jdim(*), iadm0(*), jdim0(*)
86 OPEN(66,file=
"input_mat",form=
"unformatted")
87 WRITE(66)nddl,nnz,nnzm,nlim,itol,iprec
90 WRITE(66)diag_k(1:nddl)
91 WRITE(66)diag_m(1:nddl)
95 WRITE(66)lt_m0(1:nnzm)
96 WRITE(66)iadk(1:nddl+1)
97 WRITE(66)jdik(1:iadk(nddl+1)-1)
98 WRITE(66)iadk0(1:nddl+1)
99 WRITE(66)jdik0(1:iadk0(nddl+1)-1)
100 WRITE(66)iadm(1:nddl+1)
101 WRITE(66)jdim(1:iadm(nddl+1)-1)
102 WRITE(66)iadm0(1:nddl+1)
103 WRITE(66)jdim0(1:iadm0(nddl+1)-1)
228 1 NDDL ,NNZ ,IADK ,JDIK ,DIAG_K ,
229 2 LT_K ,NDDLI ,ITOK ,IADI ,JDII ,
230 3 LT_I ,NNZM ,IADM ,JDIM ,DIAG_M ,
231 4 LT_M ,X ,R ,ITOL ,TOL ,
232 5 P ,Z ,Y ,ITASK ,IPRINT ,
233 6 N_MAX ,EPS_M ,F_X ,ISTOP ,W_DDL ,
234 8 A ,AR ,VE ,MS ,XE ,
235 9 D ,DR ,NDOF ,IPARI ,INTBUF_TAB,
236 A NUM_IMP,NS_IMP,NE_IMP,NSREM ,
237 B NSL ,NMONV ,IMONV ,MONVOL,IGRSURF ,
238 C VOLMON,FR_MV ,IBFV ,SKEW ,
239 D XFRAME ,GRAPHE,IAD_ELEM,FR_ELEM,ITAB ,
240 E INSOLV ,ITN ,FAC_K ,IPIV_K,NK ,
241 F MUMPS_PAR,CDDLP,ISOLV,IDSC ,IDDL ,
242 G IKC ,INLOC ,IND_IMP,XI_C ,R0 ,
243 H NDDLI_G,INTP_C,IRBE3 ,LRBE3 ,IRBE2 ,
258#include "implicit_f.inc"
262#include "comlock.inc"
263#include "com04_c.inc"
264#include "units_c.inc"
267#include "dmumps_struc.h"
273 INTEGER NDDL ,NNZ ,ITOL,
274 . nddli ,itok(*) ,iadi(*),jdii(*),n_max,
275 . nnzm ,iprec,itask,iprint,
276 . istop,w_ddl(*),ibfv(*),intp_c,irbe3(*) ,lrbe3(*),
278 INTEGER NDOF(*),NE_IMP(*),NSREM ,NSL,
279 . ipari(*) ,num_imp(*),ns_imp(*) ,ind_imp(*)
280 INTEGER NMONV,IMONV(*),MONVOL(*),FR_MV(*)
281 INTEGER IAD_ELEM(2,*), FR_ELEM(*), ITAB(*),
282 . INSOLV, ITN, IPIV_K(*), NK, CDDLP(*), ISOLV, IDSC,
283 . IDDL(*), IKC(*), INLOC(*),NDDLI_G, MICID
285 . LT_I(*), TOL, EPS_M, F_X, XI_C(*)
287 INTEGER ,
target,
intent(inout) ::
288 . IADK(NDDL+1) ,JDIK(NNZ), IADM(NDDL+1), JDIM(NNZM)
289 my_real ,
target,
intent(inout) ::
290 . DIAG_K(NDDL), LT_K(NNZ) ,DIAG_M(NDDL),LT_M(NNZM) ,X(NDDL),
291 . P(NDDL) ,Z(NDDL) ,R(NDDL) ,Y(NDDL)
294 . IADK(*) ,JDIK(*), IADM(*),JDIM(*)
296 . (*), LT_K(*) ,DIAG_M(*),LT_M(*) ,X(*),
297 . P(*) ,Z(*) ,R(*) ,Y(*)
300 . a(3,*),ar(3,*),ve(3,*),d(3,*),dr(3,*),xe(3,*),
301 . ms(*),volmon(*),skew(*) ,xframe(*),fac_k(*),
306 TYPE(dmumps_struc) MUMPS_PAR
312 TYPE(intbuf_struct_) INTBUF_TAB(*)
313 TYPE (SURF_) ,
DIMENSION(NSURF) :: IGRSURF
323 INTEGER I,J,IT,IP,NLIM,ND,IERR,IPRI,IERROR,NNZI,LOC_PROC,
329 . s , r2, r02,
alpha,beta,g0,g1,rr,tols,toln,tols2
330 INTEGER CRIT_STOP,IUPD,IUPD0,F_DDL,L_DDL,IFLG,GPUR4R8,
331 . itp,lcom,nddl1,lcomi,lcomx,k,
332 . ntag(nddl),igather(nddl)
336 . anorm2,xnorm2,l_a,l_b2,l_b,a_old,b_old,tmp,r2_old,rmax
338 . cs,dbar, delta, denom, kcond,snprod,qrnorm,
339 . gamma, gbar, gmax, gmin, epsln,lqnorm,diag,cgnorm,
340 . oldb, rhs1, rhs2,sn, zbar, zl ,oldb2,tnorm2,eps(4)
344 real*4,
DIMENSION(:),
ALLOCATABLE ::
345 . lt_k_sp, lt_k0_sp, lt_m_sp, lt_m0_sp, diag_k_sp, diag_m_sp,
346 . x_sp, r_sp,w_sp, lt_i0_sp
348 my_real,
DIMENSION(:),
ALLOCATABLE :: wr
349 INTEGER,
DIMENSION(:),
ALLOCATABLE :: IFRTMP, TABLE, INDTMP
351 my_real,
DIMENSION(:),
ALLOCATABLE :: w
352 INTEGER,
DIMENSION(:),
ALLOCATABLE :: IFRTMP, INDEX, TABLE, INDTMP
369 DATA warr /
'**WARRING**'/
386 lcom = lcom +
nd_fr(i)
441 f_ddl=1+itask*nddl/nthread
442 l_ddl=(itask+1)*nddl/nthread
450 IF (ispmd/=0) ipri = 0
454 1 nddl,x,r,diag_k,diag_m,
455 2 nnz,lt_k,lt_k0,lt_m,lt_m0,
458 5 nlim,itol,eps_m,iprec)
460 1 nddl,r,diag_k,nnz,lt_k,
465 WRITE(iout,*)' *** begin conjugate gradient iteration ***
'
469 WRITE(ISTDO,*)' *** begin conjugate gradient iteration **
'
472 END IF !(ITASK == 0) THEN
491 CALL PREC_SOLVH(IPREC, ITASK ,
492 1 GRAPHE,IAD_ELEM,FR_ELEM,DIAG_K,LT_K ,
493 2 IADK ,JDIK ,ITAB ,IPRI,INSOLV ,
494 3 ITN ,FAC_K , IPIV_K, NK ,MUMPS_PAR,
495 4 CDDLP ,ISOLV , IDSC , IDDL ,IKC ,
496 5 INLOC ,NDOF , NDDL ,NNZM ,IADM ,
497 6 JDIM ,DIAG_M , LT_M ,R ,Z ,
505 CALL PRODUT_H(F_DDL ,L_DDL ,R ,Z ,W_DDL , G0 ,ITASK )
508 1 NDDL ,NDDLI ,IADK ,JDIK ,DIAG_K,
509 2 LT_K ,IADI ,JDII ,ITOK ,LT_I ,
511 5 VE ,MS ,XE ,D ,DR ,
512 6 NDOF ,IPARI ,INTBUF_TAB ,NUM_IMP,
513 7 NS_IMP,NE_IMP,NSREM ,NSL ,IBFV ,
514 8 SKEW ,XFRAME,MONVOL,VOLMON,IGRSURF ,
515 9 FR_MV,NMONV ,IMONV ,IND_IMP,
516 A XI_C ,IUPD ,IRBE3 ,LRBE3 ,IRBE2 ,
517 B LRBE2 ,F_DDL ,L_DDL ,ITASK )
521 CALL PRODUT_H(F_DDL ,L_DDL ,P ,Y ,W_DDL , S ,ITASK )
535 CALL PRODUT_H( F_DDL ,L_DDL ,R ,R ,W_DDL , R02 ,ITASK )
538 ELSEIF (ITOL==3) THEN
550 ELSEIF (ITOL==4) THEN
551 R02=ALPHA*ALPHA*ABS(G0)
563.AND.
IF(IPRI/=0ITASK==0)THEN
565 IF(IPRI<0) WRITE(ISTDO,1000)IT,RR
566 WRITE(IOUT,1000)IT,RR
574 X(I) = X(I) + ALPHA*P(I)
575 R(I) = R(I) - ALPHA*Y(I)
580 CALL PREC_SOLVH(IPREC, ITASK ,
581 1 GRAPHE,IAD_ELEM,FR_ELEM,DIAG_K,LT_K ,
582 2 IADK ,JDIK ,ITAB ,IPRI,INSOLV ,
583 3 ITN ,FAC_K , IPIV_K, NK ,MUMPS_PAR,
584 4 CDDLP ,ISOLV , IDSC , IDDL ,IKC ,
585 5 INLOC ,NDOF , NDDL ,NNZM ,IADM ,
586 6 JDIM ,DIAG_M , LT_M ,R ,Z ,
591 CALL PRODUT_H( F_DDL ,L_DDL ,R ,Z ,W_DDL , G1 ,ITASK )
595 CALL PRODUT_H( F_DDL ,L_DDL ,R ,R ,W_DDL , R2 ,ITASK )
596 ELSEIF (ITOL==3) THEN
599 L_B2=ABS(BETA)*A_OLD*A_OLD
608 GMAX = ABS( L_A ) + EPS_M
612 ELSEIF (ITOL==4) THEN
618 IF (ITOL==3) TOLN=TOLN*TNORM2
623 ISTOP=CRIT_STOP(IT,R2,NLIM,TOLN)
624#include "lockoff.inc"
625.AND.
IF(NDDLI_G>0INTP_C==-1)IUPD0 = -INTP_C
628 P(I) = Z(I) + BETA*P(I)
630.AND.
IF(IUPD0>0IT>NDDLI_G/2) IUPD = IUPD0
635 1 NDDL ,NDDLI ,IADK ,JDIK ,DIAG_K,
636 2 LT_K ,IADI ,JDII ,ITOK ,LT_I ,
638 5 VE ,MS ,XE ,D ,DR ,
639 6 NDOF ,IPARI ,INTBUF_TAB ,NUM_IMP,
640 7 NS_IMP,NE_IMP,NSREM ,NSL ,IBFV ,
641 8 SKEW ,XFRAME,MONVOL,VOLMON,IGRSURF ,
642 9 FR_MV,NMONV ,IMONV ,IND_IMP,
643 A XI_C ,IUPD ,IRBE3 ,LRBE3 ,IRBE2 ,
644 B LRBE2 ,F_DDL ,L_DDL ,ITASK )
648 CALL PRODUT_H(F_DDL ,L_DDL ,P ,Y ,W_DDL , S ,ITASK )
652 X(I) = X(I) + ALPHA*P(I)
653 R(I) = R(I) - ALPHA*Y(I)
658 CALL PREC_SOLVH(IPREC, ITASK ,
659 1 GRAPHE,IAD_ELEM,FR_ELEM,DIAG_K,LT_K ,
660 2 IADK ,JDIK ,ITAB ,IPRI,INSOLV ,
661 3 ITN ,FAC_K , IPIV_K, NK ,MUMPS_PAR,
662 4 CDDLP ,ISOLV , IDSC , IDDL ,IKC ,
663 5 INLOC ,NDOF , NDDL ,NNZM ,IADM ,
664 6 JDIM ,DIAG_M , LT_M ,R ,Z ,
669 CALL PRODUT_H(F_DDL ,L_DDL ,R ,Z ,W_DDL , G1 ,ITASK )
674 CALL PRODUT_H( F_DDL ,L_DDL ,R ,R ,W_DDL , R2 ,ITASK )
675 ELSEIF (ITOL==3) THEN
685 TNORM2=TNORM2+L_A*L_A+OLDB2+L_B2
686 GAMMA = SQRT( GBAR*GBAR + OLDB2 )
689 DELTA = CS * DBAR + SN * L_A
690 GBAR = SN * DBAR - CS * L_A
694 XNORM2 = XNORM2+ZL*ZL
695 GMAX = MAX( GMAX, GAMMA )
696 GMIN = MIN( GMIN, GAMMA )
697 RHS1 = RHS2 - DELTA * ZL
699 TOLN=TOLS2*ANORM2*XNORM2
702 ELSEIF (ITOL==4) THEN
703 TMP=ALPHA*ALPHA*ABS(G1)
721.AND.
IF(IPRI/=0ITASK==0)THEN
722 IF(MOD(IT,IP)==0)THEN
724 WRITE(IOUT,1001)IT,RR
725 IF(IPRI<0) WRITE(ISTDO,1001)IT,RR
732 ISTOP=CRIT_STOP(IT,R2,NLIM,TOLN)
733#include "lockoff.inc"
735.AND..OR.
IF((IUPD>0IT==NLIM)
736.AND..AND.
. (IUPD==0ISTOP/=1IUPD0>0))THEN
742#include "lockoff.inc"
752 1 NDDL ,NDDLI ,IADK ,JDIK ,DIAG_K,
753 2 LT_K ,IADI ,JDII ,ITOK ,LT_I ,
755 5 VE ,MS ,XE ,D ,DR ,
756 6 NDOF ,IPARI ,INTBUF_TAB ,NUM_IMP,
757 7 NS_IMP,NE_IMP,NSREM ,NSL ,IBFV ,
758 8 SKEW ,XFRAME,MONVOL,VOLMON,IGRSURF ,
759 9 FR_MV,NMONV ,IMONV ,IND_IMP,
760 A XI_C ,IUPD ,IRBE3 ,LRBE3 ,IRBE2 ,
761 B LRBE2 ,F_DDL ,L_DDL ,ITASK )
771 CALL PREC_SOLVH(IPREC, ITASK ,
772 1 GRAPHE,IAD_ELEM,FR_ELEM,DIAG_K,LT_K ,
773 2 IADK ,JDIK ,ITAB ,IPRI,INSOLV ,
774 3 ITN ,FAC_K , IPIV_K, NK ,MUMPS_PAR,
775 4 CDDLP ,ISOLV , IDSC , IDDL ,IKC ,
776 5 INLOC ,NDOF , NDDL ,NNZM ,IADM ,
777 6 JDIM ,DIAG_M , LT_M ,R ,Z ,
782 CALL PRODUT_H( F_DDL ,L_DDL ,R ,Z ,W_DDL , G0 ,ITASK )
797 GMAX = ABS( L_A ) + EPS_M
801 ELSEIF (ITOL==4) THEN
816 ELSEIF(GPUR4R8==2)THEN
827 . NSPMD,LOC_PROC,L_SPMD(LOC_PROC),IDEVICE,MICID,IERR)
830 CALL ANCMSG(MSGID=223,ANMODE=ANINFO_BLIND)
832 CALL ANCMSG(MSGID=224,ANMODE=ANINFO_BLIND)
834 CALL ANCMSG(MSGID=225,ANMODE=ANINFO_BLIND)
839 print *,' init mic card number
',MICID
855 i= omp_get_num_threads()
856 print *,'number of threads(host):
',i
858 i= kmp_get_blocktime_target (TARGET_MIC, MICID)
859!dec$ attributes offload: mic :: ONE,ZERO
860!dec$ attributes offload: mic :: MIC_DSCAL,MIC_GATHER,MIC_SCATTER
861!dec$ attributes offload: mic :: MIC_DDOT,MIC_MV,MIC_SPMV,MIC_DAXPY
862!dec$ attributes offload: mic :: MIC_DCOPY
865!dir$ omp offload target(mic:MICID)
866 & in (LT_K:length(nnz), free_if(.false.), align(512))
870 print *,' variables1=
',nnz
871!dir$ omp offload target(mic:MICID)
872 & in (JDIK:length(nnz), free_if(.false.), align(512))
876 print *,' variables2=
',nnz
877!dir$ omp offload target(mic:MICID)
878 & in (LT_K0:length(nnz), free_if(.false.), align(512))
882 print *,' variables3=
',nnz
883!dir$ omp offload target(mic:MICID)
884 & in (JDIK0:length(nnz), free_if(.false.), align(512))
888 print *,' variables4=
',nnz
890!dir$ omp offload target(mic:MICID)
891& in (lt_m:length(nnzm), free_if(.false.), align(512))
895 print *,' variables5=
',nnzm
896!dir$ omp offload target(mic:MICID)
897 & in (JDIM:length(nnzm), free_if(.false.), align(512))
901 print *,' variables6=
',nnzm
902!dir$ omp offload target(mic:MICID)
903 & in (lt_m0:length(nnzm), free_if(.false.), align(512))
907 print *,' variables7=
',nnzm
908!dir$ omp offload target(mic:MICID)
909 & in (JDIM0:length(nnzm), free_if(.false.), align(512))
913 print *,' variables8=
',nnzm
915!dir$ omp offload target(mic:MICID)
916 & in (DIAG_K,DIAG_M,x,y,z,r,p:length(nddl),
921 print *,' variables9=
',nddl*7
922!dir$ omp offload target(mic:MICID)
923 & in (IADK,IADK0,IADM,IADM0:length(NDDL1),
924 & free_if(.false.), align(512))
928 print *,' variables10=
',nddl1*4
929 TF=OMP_GET_WTIME()-TF
930 print *,'transfer time cpu => mic(s) =
',TF
931 print *,'transfer rate cpu => mic(mb/s)=
',
932 . ((TB/(1024*1024))*8)/TF
934 ALLOCATE(W(NDDL),STAT=IERROR)
936 ALLOCATE(VGAT(LCOM),STAT=IERR)
938 ALLOCATE(VSCA(LCOM),STAT=IERR)
940 ALLOCATE(INDEX(LCOM),STAT=IERR)
942 ALLOCATE(TABLE(NDDL),STAT=IERR)
965!dir$ omp offload target(mic:MICID)
966 & in (W:length(NDDL), free_if(.false.), align(512))
969 print *,' variables11=
',i
970!dir$ omp offload target(mic:MICID)
971 & in (IFR2K,INDEX:length(LCOM), free_if(.false.))
975 print *,' variables12=
',i
979!dir$ omp offload target(mic:MICID)
980 & nocopy(IADK,IADK0,IADM,IADM0,LT_K,LT_K0,
981 & lt_m,lt_m0,JDIK,JDIK0,JDIM,JDIM0,
982 & DIAG_K,DIAG_M,x,y,z,r,p,w)
984!$OMP PARALLEL default(shared)
989 i= omp_get_num_threads()
990 print *,'number of threads(mic):
',i
998 CALL MIC_MV(NDDL,X,DIAG_K,Z)
1001 CALL MIC_SPMV(NDDL ,Z ,X ,LT_K ,IADK ,JDIK )
1003 CALL MIC_SPMV(NDDL ,Z ,X ,LT_K0,IADK0,JDIK0)
1009 CALL MIC_DAXPY(NDDL, -ONE, Z, R)
1019 CALL MIC_DCOPY(NDDL, R, Z)
1020 ELSEIF(IPREC == 2)THEN
1022 CALL MIC_MV(NDDL,R,DIAG_M,Z)
1023 ELSEIF(IPREC == 5)THEN
1025 CALL MIC_DCOPY(NDDL, R, Y)
1027 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
1029 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
1031 CALL MIC_DCOPY(NDDL, Z, Y)
1033 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
1039.AND.
IF(NSPMD > 1 IPREC > 1)THEN
1040 IF(IMONM > 0) CALL STARTIME(TIMERS,68)
1041!dir$ omp offload target(mic:MICID)
1043 & out(vgat:length(lcom))
1044!$OMP PARALLEL default(shared)
1045 CALL MIC_GATHER(NDDL, LCOM, Z, VGAT, IFR2K)
1047 IF(IMONM > 0) CALL STOPTIME(TIMERS,68)
1048 IF(IMONM > 0) CALL STARTIME(TIMERS,66)
1049 CALL SPMD_SUMFC_V(VGAT,VSCA,INDEX,LCOM)
1050 IF(IMONM > 0) CALL STOPTIME(TIMERS,66)
1051 IF(IMONM > 0) CALL STARTIME(TIMERS,69)
1052!dir$ omp offload target(mic:MICID)
1053 & nocopy(z,ifr2k,index)
1054 & in(vsca:length(lcom))
1055!$OMP PARALLEL default(shared)
1056 CALL MIC_SCATTER(NDDL, LCOM, Z, VSCA, IFR2K, INDEX)
1058 IF(IMONM > 0) CALL STOPTIME(TIMERS,69)
1061!dir$ omp offload target(mic:MICID)
1063!$OMP PARALLEL default(shared)
1065 CALL MIC_DCOPY(NDDL, Z, P)
1069!dir$ omp offload target(mic:MICID)
1072!$OMP PARALLEL default(shared) shared(g0)
1073 CALL MIC_DDOT(NDDL, R, Z, G0)
1078!dir$ omp offload target(mic:MICID)
1081!$OMP PARALLEL default(shared) shared(g0)
1082 CALL MIC_MV(NDDL,Z,W,Y)
1083 CALL MIC_DDOT(NDDL, R, Y, G0)
1085 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1087 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1090!dir$ omp offload target(mic:MICID)
1091 & nocopy(iadk,iadk0,iadm,iadm0,lt_k,lt_k0,lt_m)
1092 & nocopy(lt_m0,jdik,jdik0,jdim,jdim0,diag_k,diag_m,y,p,x,r,z)
1093!$OMP PARALLEL default(shared)
1100 CALL MIC_MV(NDDL,P,DIAG_K,Y)
1102 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K ,IADK ,JDIK )
1104 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K0,IADK0,JDIK0)
1112 IF(IMONM > 0) CALL STARTIME(TIMERS,68)
1113!dir$ omp offload target(mic:MICID)
1115 & out(vgat:length(lcom))
1116!$OMP PARALLEL default(shared)
1117 CALL MIC_GATHER(NDDL, LCOM, Y, VGAT, IFR2K)
1119 IF(IMONM > 0) CALL STOPTIME(TIMERS,68)
1120 IF(IMONM > 0) CALL STARTIME(TIMERS,66)
1121 CALL SPMD_SUMFC_V(VGAT,VSCA,INDEX,LCOM)
1122 IF(IMONM > 0) CALL STOPTIME(TIMERS,66)
1123 IF(IMONM > 0) CALL STARTIME(TIMERS,69)
1124!dir$ omp offload target(mic:MICID)
1125 & nocopy(y,ifr2k,index)
1126 & in(vsca:length(lcom))
1127!$OMP PARALLEL default(shared)
1128 CALL MIC_SCATTER(NDDL, LCOM, Y, VSCA, IFR2K, INDEX)
1130 IF(IMONM > 0) CALL STOPTIME(TIMERS,69)
1134!dir$ omp offload target(mic:MICID)
1137!$OMP PARALLEL default(shared) shared(s)
1138 CALL MIC_DDOT(NDDL, P, Y, S)
1142!dir$ omp offload target(mic:MICID)
1145!$OMP PARALLEL default(shared) shared(s)
1146 CALL MIC_MV(NDDL,Y,W,Z)
1147 CALL MIC_DDOT(NDDL, P, Z, S)
1149 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1151 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1168!dir$ omp offload target(mic:MICID)
1171!$OMP PARALLEL default(shared) shared(r02)
1172 CALL MIC_DDOT(NDDL, R, R, R02)
1176!dir$ omp offload target(mic:MICID)
1179!$OMP PARALLEL default(shared) shared(r02)
1180 CALL MIC_MV(NDDL,R,W,Z)
1181 CALL MIC_DDOT(NDDL, Z, Z, R02)
1183 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1184 CALL SPMD_SUM_S(R02)
1185 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1190 ELSEIF (ITOL==3) THEN
1202 ELSEIF (ITOL==4) THEN
1203 R02=ALPHA*ALPHA*ABS(G0)
1215.AND.
IF(IPRI/=0ITASK==0)THEN
1217 IF(IPRI<0) WRITE(ISTDO,1000)IT,RR
1218 WRITE(IOUT,1000)IT,RR
1225!dir$ omp offload target(mic:MICID)
1226 & nocopy(iadk,iadk0,iadm,iadm0,lt_k,lt_k0,lt_m)
1227 & nocopy(lt_m0,jdik,jdik0,jdim,jdim0,diag_k,diag_m,y,p,x,r,z)
1228!$OMP PARALLEL default(shared)
1229 CALL MIC_DAXPY(NDDL, ALPHA, P, X)
1230 CALL MIC_DAXPY(NDDL,-ALPHA, Y, R)
1240 CALL MIC_DCOPY(NDDL, R, Z)
1241 ELSEIF(IPREC == 2)THEN
1243 CALL MIC_MV(NDDL,R,DIAG_M,Z)
1244 ELSEIF(IPREC == 5)THEN
1246 CALL MIC_DCOPY(NDDL, R, Y)
1248 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
1250 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
1251 CALL MIC_DCOPY(NDDL, Z, Y)
1253 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
1258.AND.
IF(NSPMD > 1 IPREC > 1)THEN
1259 IF(IMONM > 0) CALL STARTIME(TIMERS,68)
1260!dir$ omp offload target(mic:MICID)
1262 & out(vgat:length(lcom))
1263!$OMP PARALLEL default(shared)
1264 CALL MIC_GATHER(NDDL, LCOM, Z, VGAT, IFR2K)
1266 IF(IMONM > 0) CALL STOPTIME(TIMERS,68)
1267 IF(IMONM > 0) CALL STARTIME(TIMERS,66)
1268 CALL SPMD_SUMFC_V(VGAT,VSCA,INDEX,LCOM)
1269 IF(IMONM > 0) CALL STOPTIME(TIMERS,66)
1270 IF(IMONM > 0) CALL STARTIME(TIMERS,69)
1271!dir$ omp offload target(mic:MICID)
1272 & nocopy(z,ifr2k,index)
1273 & in(vsca:length(lcom))
1274!$OMP PARALLEL default(shared)
1275 CALL MIC_SCATTER(NDDL, LCOM, Z, VSCA, IFR2K, INDEX)
1277 IF(IMONM > 0) CALL STOPTIME(TIMERS,69)
1281!dir$ omp offload target(mic:MICID)
1284!$OMP PARALLEL default(shared) shared(G1)
1285 CALL MIC_DDOT(NDDL, R, Z, G1)
1289!dir$ omp offload target(mic:MICID)
1292!$OMP PARALLEL default(shared) shared(G1)
1293 CALL MIC_MV(NDDL,Z,W,Y)
1294 CALL MIC_DDOT(NDDL, R, Y, G1)
1296 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1298 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1304!dir$ omp offload target(mic:MICID)
1307!$OMP PARALLEL default(shared) shared(R2)
1308 CALL MIC_DDOT(NDDL, R, R, R2)
1312!dir$ omp offload target(mic:MICID)
1315!$OMP PARALLEL default(shared) shared(R2)
1316 CALL MIC_MV(NDDL,R,W,Y)
1317 CALL MIC_DDOT(NDDL, Y, Y, R2)
1319 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1321 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1323 ELSEIF (ITOL==3) THEN
1326 L_B2=ABS(BETA)*A_OLD*A_OLD
1335 GMAX = ABS( L_A ) + EPS_M
1339 ELSEIF (ITOL==4) THEN
1345 IF (ITOL==3) TOLN=TOLN*TNORM2
1349 ISTOP=CRIT_STOP(IT,R2,NLIM,TOLN)
1357!dir$ omp offload target(mic:MICID)
1358 & nocopy(iadk,iadk0,iadm,iadm0,lt_k,lt_k0,lt_m)
1359 & nocopy(lt_m0,jdik,jdik0,jdim,jdim0,diag_k,diag_m,y,p,x,r,z)
1360!$OMP PARALLEL default(shared)
1363 CALL MIC_DSCAL(NDDL, BETA, P)
1364 CALL MIC_DAXPY(NDDL, ONE, Z, P)
1372 CALL MIC_MV(NDDL,P,DIAG_K,Y)
1374 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K ,IADK ,JDIK )
1376 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K0,IADK0,JDIK0)
1383 IF(IMONM > 0) CALL STARTIME(TIMERS,68)
1384!dir$ omp offload target(mic:MICID)
1386 & out(vgat:length(lcom))
1387!$OMP PARALLEL default(shared)
1388 CALL MIC_GATHER(NDDL, LCOM, Y, VGAT, IFR2K)
1390 IF(IMONM > 0) CALL STOPTIME(TIMERS,68)
1391 IF(IMONM > 0) CALL STARTIME(TIMERS,66)
1392 CALL SPMD_SUMFC_V(VGAT,VSCA,INDEX,LCOM)
1393 IF(IMONM > 0) CALL STOPTIME(TIMERS,66)
1394 IF(IMONM > 0) CALL STARTIME(TIMERS,69)
1395!dir$ omp offload target(mic:MICID)
1396 & nocopy(y,ifr2k,index)
1397 & in(vsca:length(lcom))
1398!$OMP PARALLEL default(shared)
1399 CALL MIC_SCATTER(NDDL, LCOM, Y, VSCA, IFR2K, INDEX)
1401 IF(IMONM > 0) CALL STOPTIME(TIMERS,69)
1405!dir$ omp offload target(mic:MICID)
1408!$OMP PARALLEL default(shared) shared(S)
1409 CALL MIC_DDOT(NDDL, P, Y, S)
1413!dir$ omp offload target(mic:MICID)
1416!$OMP PARALLEL default(shared) shared(S)
1417 CALL MIC_MV(NDDL,Y,W,Z)
1418 CALL MIC_DDOT(NDDL, P, Z, S)
1420 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1422 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1426!dir$ omp offload target(mic:MICID)
1427 & nocopy(iadk,iadk0,iadm,iadm0,lt_k,lt_k0,lt_m)
1428 & nocopy(lt_m0,jdik,jdik0,jdim,jdim0,diag_k,diag_m,y,p,x,r,z)
1429!$OMP PARALLEL default(shared)
1430 CALL MIC_DAXPY(NDDL, ALPHA, P, X)
1431 CALL MIC_DAXPY(NDDL,-ALPHA, Y, R)
1438 CALL MIC_DCOPY(NDDL, R, Z)
1439 ELSEIF(IPREC == 2)THEN
1441 CALL MIC_MV(NDDL,R,DIAG_M,Z)
1442 ELSEIF(IPREC == 5)THEN
1444 CALL MIC_DCOPY(NDDL, R, Y)
1446 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
1448 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
1449 CALL MIC_DCOPY(NDDL, Z, Y)
1451 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
1456.AND.
IF(NSPMD > 1 IPREC > 1)THEN
1457 IF(IMONM > 0) CALL STARTIME(TIMERS,68)
1458!dir$ omp offload target(mic:MICID)
1460 & out(vgat:length(lcom))
1461!$OMP PARALLEL default(shared)
1462 CALL MIC_GATHER(NDDL, LCOM, Z, VGAT, IFR2K)
1464 IF(IMONM > 0) CALL STOPTIME(TIMERS,68)
1465 IF(IMONM > 0) CALL STARTIME(TIMERS,66)
1466 CALL SPMD_SUMFC_V(VGAT,VSCA,INDEX,LCOM)
1467 IF(IMONM > 0) CALL STOPTIME(TIMERS,66)
1468 IF(IMONM > 0) CALL STARTIME(TIMERS,69)
1469!dir$ omp offload target(mic:MICID)
1470 & nocopy(z,ifr2k,index)
1471 & in(vsca:length(lcom))
1472!$OMP PARALLEL default(shared)
1473 CALL MIC_SCATTER(NDDL, LCOM, Z, VSCA, IFR2K, INDEX)
1475 IF(IMONM > 0) CALL STOPTIME(TIMERS,69)
1479!dir$ omp offload target(mic:MICID)
1482!$OMP PARALLEL default(shared) shared(G1)
1483 CALL MIC_DDOT(NDDL, R, Z, G1)
1487!dir$ omp offload target(mic:MICID)
1490!$OMP PARALLEL default(shared) shared(G1)
1491 CALL MIC_MV(NDDL,Z,W,Y)
1492 CALL MIC_DDOT(NDDL, R, Y, G1)
1494 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1496 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1505!dir$ omp offload target(mic:MICID)
1508!$OMP PARALLEL default(shared) shared(R2)
1509 CALL MIC_DDOT(NDDL, R, R, R2)
1513!dir$ omp offload target(mic:MICID)
1516!$OMP PARALLEL default(shared) shared(R2)
1517 CALL MIC_MV(NDDL,R,W,Y)
1518 CALL MIC_DDOT(NDDL, Y, Y, R2)
1520 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1522 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1524 ELSEIF (ITOL==3) THEN
1534 TNORM2=TNORM2+L_A*L_A+OLDB2+L_B2
1535 GAMMA = SQRT( GBAR*GBAR + OLDB2 )
1538 DELTA = CS * DBAR + SN * L_A
1539 GBAR = SN * DBAR - CS * L_A
1543 XNORM2 = XNORM2+ZL*ZL
1544 GMAX = MAX( GMAX, GAMMA )
1545 GMIN = MIN( GMIN, GAMMA )
1546 RHS1 = RHS2 - DELTA * ZL
1548 TOLN=TOLS2*ANORM2*XNORM2
1551 ELSEIF (ITOL==4) THEN
1552 TMP=ALPHA*ALPHA*ABS(G1)
1569.AND.
IF(IPRI/=0ITASK==0)THEN
1570 IF(MOD(IT,IP)==0)THEN
1572 WRITE(IOUT,1001)IT,RR
1573 IF(IPRI<0) WRITE(ISTDO,1001)IT,RR
1579 ISTOP=CRIT_STOP(IT,R2,NLIM,TOLN)
1587 WRITE(IOUT,1001)IT-1,RR
1592 tt=OMP_GET_WTIME()-tt
1593 i= omp_get_num_threads()
1596 print *,' execution time on mic with
',i,' threads
'
1597 print *,' ==>
',tt,' seconds
'
1606 ELSE ! CODE NSPMD = 1
1608!dec$ attributes offload: mic :: ONE,ZERO,ISTDO,IOUT
1610!dir$ omp offload target(mic:MICID)
1611 & nocopy(IADK,IADK0,IADM,IADM0,LT_K,LT_K0,
1612 & lt_m,lt_m0,JDIK,JDIK0,JDIM,JDIM0,
1613 & DIAG_K,DIAG_M,x,y,z,r,p,w)
1616!$OMP PARALLEL default(shared)
1620 i= omp_get_num_threads()
1621 print *,'number of threads(mic):
',i
1629 CALL MIC_MV(NDDL,X,DIAG_K,Z)
1632 CALL MIC_SPMV(NDDL ,Z ,X ,LT_K ,IADK ,JDIK )
1634 CALL MIC_SPMV(NDDL ,Z ,X ,LT_K0,IADK0,JDIK0)
1640 CALL MIC_DAXPY(NDDL, -ONE, Z, R)
1649 CALL MIC_DCOPY(NDDL, R, Z)
1650 ELSEIF(IPREC == 2)THEN
1652 CALL MIC_MV(NDDL,R,DIAG_M,Z)
1653 ELSEIF(IPREC == 5)THEN
1655 CALL MIC_DCOPY(NDDL, R, Y)
1657 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
1659 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
1661 CALL MIC_DCOPY(NDDL, Z, Y)
1663 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
1695 CALL MIC_DCOPY(NDDL, Z, P)
1703 CALL MIC_DDOT(NDDL, R, Z, G0)
1730 CALL MIC_MV(NDDL,P,DIAG_K,Y)
1732 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K ,IADK ,JDIK )
1734 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K0,IADK0,JDIK0)
1768 CALL MIC_DDOT(NDDL, P, Y, S)
1802 CALL MIC_DDOT(NDDL, R, R, R02)
1820 ELSEIF (ITOL==3) THEN
1836 ELSEIF (ITOL==4) THEN
1837 R02=ALPHA*ALPHA*ABS(G0)
1851.AND.
IF(IPRI/=0ITASK==0)THEN
1853 IF(IPRI<0) WRITE(ISTDO,1000)IT,RR
1854 WRITE(IOUT,1000)IT,RR
1867 CALL MIC_DAXPY(NDDL, ALPHA, P, X)
1868 CALL MIC_DAXPY(NDDL,-ALPHA, Y, R)
1878 CALL MIC_DCOPY(NDDL, R, Z)
1879 ELSEIF(IPREC == 2)THEN
1881 CALL MIC_MV(NDDL,R,DIAG_M,Z)
1882 ELSEIF(IPREC == 5)THEN
1884 CALL MIC_DCOPY(NDDL, R, Y)
1886 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
1888 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
1889 CALL MIC_DCOPY(NDDL, Z, Y)
1891 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
1923 CALL MIC_DDOT(NDDL, R, Z, G1)
1946 CALL MIC_DDOT(NDDL, R, R, R2)
1961 ELSEIF (ITOL==3) THEN
1966 L_B2=ABS(BETA)*A_OLD*A_OLD
1975 GMAX = ABS( L_A ) + EPS_M
1981 ELSEIF (ITOL==4) THEN
1989 IF (ITOL==3) TOLN=TOLN*TNORM2
2019 CALL MIC_DSCAL(NDDL, BETA, P)
2020 CALL MIC_DAXPY(NDDL, ONE, Z, P)
2028 CALL MIC_MV(NDDL,P,DIAG_K,Y)
2030 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K ,IADK ,JDIK )
2032 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K0,IADK0,JDIK0)
2065 CALL MIC_DDOT(NDDL, P, Y, S)
2086 CALL MIC_DAXPY(NDDL, ALPHA, P, X)
2087 CALL MIC_DAXPY(NDDL,-ALPHA, Y, R)
2094 CALL MIC_DCOPY(NDDL, R, Z)
2095 ELSEIF(IPREC == 2)THEN
2097 CALL MIC_MV(NDDL,R,DIAG_M,Z)
2098 ELSEIF(IPREC == 5)THEN
2100 CALL MIC_DCOPY(NDDL, R, Y)
2102 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
2104 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
2105 CALL MIC_DCOPY(NDDL, Z, Y)
2107 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
2139 CALL MIC_DDOT(NDDL, R, Z, G1)
2167 CALL MIC_DDOT(NDDL, R, R, R2)
2182 ELSEIF (ITOL==3) THEN
2194 TNORM2=TNORM2+L_A*L_A+OLDB2+L_B2
2195 GAMMA = SQRT( GBAR*GBAR + OLDB2 )
2198 DELTA = CS * DBAR + SN * L_A
2199 GBAR = SN * DBAR - CS * L_A
2203 XNORM2 = XNORM2+ZL*ZL
2204 GMAX = MAX( GMAX, GAMMA )
2205 GMIN = MIN( GMIN, GAMMA )
2206 RHS1 = RHS2 - DELTA * ZL
2208 TOLN=TOLS2*ANORM2*XNORM2
2213 ELSEIF (ITOL==4) THEN
2214 TMP=ALPHA*ALPHA*ABS(G1)
2233.AND.
IF(IPRI/=0ITASK==0)THEN
2234 IF(MOD(IT,IP)==0)THEN
2236 WRITE(IOUT,1001)IT,RR
2237 IF(IPRI<0) WRITE(ISTDO,1001)IT,RR
2262 tt=OMP_GET_WTIME()-tt
2263 i= omp_get_num_threads()
2266 print *,' execution time on mic with
',i,' threads
'
2267 print *,' ==>
',tt,' seconds
'
2272 WRITE(IOUT,1001)IT-1,RR
2273 END IF ! fin NSPMD ==1
2275!dec$ omp offload target(mic:MICID) out(X,R:alloc_if(.false.))
2287 TF=OMP_GET_WTIME()-TF
2288 print *,'transfer time mic=>cpu(s) =
',TF
2289 print *,'transfer rate mic=>cpu(mb/s)=
',
2290 . ((2*NDDL/(1024*1024))*8)/TF
2301 END IF ! fin GPUR4R8==2 (MIC double precision)
2308 IF(ISAVE==1)CALL IMP_RSAVE(NDDL,X,R)
2314#include "lockon.inc"
2316#include "lockoff.inc"
2317 CALL PUPD(N_MAX,NDDLI_G,IT)
2321#include "lockon.inc"
2323#include "lockoff.inc"
2325#include "lockon.inc"
2327#include "lockoff.inc"
2329.NOT.
IF (MIXEDSOL()) THEN
2331 WRITE(IOUT,1003)NLIM
2334 WRITE(ISTDO,1003)NLIM
2338 . EXIT, SQRT(ANORM2), SQRT(XNORM2),GMAX/GMIN
2340 . EXIT, SQRT(ANORM2), SQRT(XNORM2),GMAX/GMIN
2343 IF (R2>HUNDRED*TOLN) THEN
2353.NOT.
END IF !((MIXEDSOL)) THEN
2355 END IF !(ISOLV==7) THEN
2360 WRITE(IOUT,1002)IT,RR
2361 IF(ITOL==3)WRITE(IOUT,2000)
2362 . EXIT, SQRT(ANORM2), SQRT(XNORM2),GMAX/GMIN
2366 WRITE(ISTDO,1002)IT,RR
2367 IF(ITOL==3)WRITE(ISTDO,2000)
2368 . EXIT, SQRT(ANORM2), SQRT(XNORM2),GMAX/GMIN
2379 END IF !(ITASK == 0) THEN
2383#include "lockon.inc"
2385#include "lockoff.inc"
2389 1000 FORMAT(5X,'iteration=
',I8,5X,' initial residual
norm=
',E11.4)
2390 1001 FORMAT(5X,'iteration=
',I8,5X,' relative residual
norm=
',E11.4)
2391 1002 FORMAT(3X,'total c.g. iteration=
',I8,5X,
2392 . ' relative residual
norm=
',E11.4)
2394 . '---warning :
the iteration limit number was reached
',I8)
2395 1004 FORMAT(5X,'warning:c.g stop with relative residual
norm=
',E11.4)
2396 2000 FORMAT(/ 5X, A, 2X, 'anorm =
', E12.4, 2X, 'xnorm =
', E12.4,2X,
2398 2002 FORMAT(/ 5X, 'with
', 2X, 'alfa =
', E12.4, 2X, 'beta =
',
2399 . E12.4,2X,'oldb =
', E12.4)
subroutine imp_pcgh(iprec, nddl, nnz, iadk, jdik, diag_k, lt_k, nddli, itok, iadi, jdii, lt_i, nnzm, iadm, jdim, diag_m, lt_m, x, r, itol, tol, p, z, y, itask, iprint, n_max, eps_m, f_x, istop, w_ddl, a, ar, ve, ms, xe, d, dr, ndof, ipari, intbuf_tab, num_imp, ns_imp, ne_imp, nsrem, nsl, nmonv, imonv, monvol, igrsurf, volmon, fr_mv, ibfv, skew, xframe, graphe, iad_elem, fr_elem, itab, insolv, itn, fac_k, ipiv_k, nk, mumps_par, cddlp, isolv, idsc, iddl, ikc, inloc, ind_imp, xi_c, r0, nddli_g, intp_c, irbe3, lrbe3, irbe2, lrbe2)
subroutine mav_lth(nddl, nddli, iadl, jdil, diag_k, lt_k, iadi, jdii, itok, lt_i, v, w, a, ar, ve, ms, x, d, dr, ndof, ipari, intbuf_tab, num_imp, ns_imp, ne_imp, nsrem, nsl, ibfv, skew, xframe, monvol, volmon, igrsurf, fr_mv, nmonv, imonv, index2, xi_c, iupd, irbe3, lrbe3, irbe2, lrbe2, f_ddl, l_ddl, itask)