245
246
247
253 USE intbufdef_mod
255
256
257
258#include "implicit_f.inc"
259
260
261
262#include "comlock.inc"
263#include "com04_c.inc"
264#include "units_c.inc"
265#include "task_c.inc"
266#if defined(MUMPS5)
267#include "dmumps_struc.h"
268#endif
269
270
271
272
273 INTEGER NDDL ,NNZ ,ITOL,
274 . NDDLI ,ITOK(*) ,IADI(*),JDII(*),N_MAX,
275 . NNZM ,IPREC,ITASK,IPRINT,
276 . ISTOP,W_DDL(*),IBFV(*),INTP_C,IRBE3(*) ,LRBE3(*),
277 . IRBE2(*) ,LRBE2(*)
278 INTEGER NDOF(*),NE_IMP(*),NSREM ,NSL,
279 . IPARI(*) ,NUM_IMP(*),NS_IMP(*) ,IND_IMP(*)
280 INTEGER NMONV,IMONV(*),MONVOL(*),FR_MV(*)
281 INTEGER IAD_ELEM(2,*), FR_ELEM(*), ITAB(*),
282 . INSOLV, ITN, IPIV_K(*), NK, CDDLP(*), ISOLV, IDSC,
283 . IDDL(*), IKC(*), INLOC(*),NDDLI_G, MICID
285 . lt_i(*), tol, eps_m, f_x, xi_c(*)
286#if defined knf
287 INTEGER , target, intent(inout) ::
288 . IADK(NDDL+1) ,JDIK(NNZ), IADM(NDDL+1), JDIM(NNZM)
289 my_real ,
target,
intent(inout) ::
290 . diag_k(nddl), lt_k(nnz) ,diag_m(nddl),lt_m(nnzm) ,x(nddl),
291 . p(nddl) ,z(nddl) ,r(nddl) ,y(nddl)
292#elif 1
293 INTEGER
294 . IADK(*) ,JDIK(*), IADM(*),JDIM(*)
296 . diag_k(*), lt_k(*) ,diag_m(*),lt_m(*) ,x(*),
297 . p(*) ,z(*) ,r(*) ,y(*)
298#endif
300 . a(3,*),ar(3,*),ve(3,*),d(3,*),dr(3,*),xe(3,*),
301 . ms(*),volmon(*),skew(*) ,xframe(*),fac_k(*),
302 . r0(*)
303 TYPE(PRGRAPH) :: GRAPHE(*)
304
305#ifdef MUMPS5
306 TYPE(DMUMPS_STRUC) MUMPS_PAR
307#else
308
309 INTEGER MUMPS_PAR
310#endif
311
312 TYPE(INTBUF_STRUCT_) INTBUF_TAB(*)
313 TYPE (SURF_) , DIMENSION(NSURF) :: IGRSURF
314
315
316
317 LOGICAL MIXEDSOL
319#ifdef MUMPS5
320
321
322
323 INTEGER I,J,IT,IP,NLIM,ND,,IPRI,IERROR,NNZI,LOC_PROC,
324 . ICK,ISAVE,N,
325 parameter(nd=4)
326 CHARACTER*4 EXIT
327 CHARACTER*11 WARR
329 . s , r2, r02,
alpha,beta,g0,g1,rr,tols,toln,tols2
330 INTEGER CRIT_STOP,IUPD,IUPD0,F_DDL,L_DDL,IFLG,GPUR4R8,
331 . ITP,LCOM,NDDL1,,LCOMX,K,
332 . NTAG(NDDL),IGATHER(NDDL)
334
336 . anorm2,xnorm2,l_a,l_b2,l_b,a_old,b_old,tmp,r2_old,rmax
338 . cs,dbar, delta, denom, kcond,snprod,qrnorm,
339 . gamma, gbar, gmax, gmin, epsln,lqnorm,diag,cgnorm,
340 . oldb, rhs1, rhs2,sn, zbar, zl ,oldb2,tnorm2,eps(4)
341 double precision
342 . tt, tf
343
344 real*4, DIMENSION(:),ALLOCATABLE ::
345 . lt_k_sp, lt_k0_sp, lt_m_sp, lt_m0_sp, diag_k_sp, diag_m_sp,
346 . x_sp, r_sp,w_sp, lt_i0_sp
347#if defined knf
348 my_real,
DIMENSION(:),
ALLOCATABLE :: wr
349 INTEGER, DIMENSION(:),ALLOCATABLE :: IFRTMP, TABLE, INDTMP
350#elif 1
351 my_real,
DIMENSION(:),
ALLOCATABLE :: w, wr, vgat, vsca, xir, yir
352 INTEGER, DIMENSION(:),ALLOCATABLE :: IFRTMP, INDEX, TABLE, INDTMP
353#endif
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368 DATA EXIT /'WITH'/
369 DATA warr /'**WARRING**'/
370
371 isave=0
372
373
374#if defined knf
375 IF(nddli > 0)THEN
377 ELSE
378 nnzi = 0
379 END IF
380
381 lcom=0
382 lcomi=0
383 lcomx=0
384 IF(nspmd > 1)THEN
385 DO i = 1, nspmd
386 lcom = lcom +
nd_fr(i)
387 END DO
388 IF(nddli_g > 0)THEN
389 ntag(1:nddl)=0
390 DO i = 1, nspmd
393 IF(ntag(k)==0)THEN
394 lcomi=lcomi+1
395 ntag(k)=lcomi
396 igather(lcomi)=k-1
397 ENDIF
398 ENDDO
399 END DO
400 DO i = 1, nspmd
402 END DO
406 IF(ntag(k)==0)THEN
407 lcomi=lcomi+1
408 ntag(k)=lcomi
409 igather(lcomi)=k-1
410 ENDIF
411 ENDDO
412 ENDDO
413
416 IF(ntag(k)==0)THEN
417 lcomi=lcomi+1
418 ntag(k)=lcomi
419 igather(lcomi)=k-1
420 ENDIF
421
424 IF(ntag(k)==0)THEN
425 lcomi=lcomi+1
426 ntag(k)=lcomi
427 igather(lcomi)=k-1
428 ENDIF
429 ENDDO
430 ENDDO
431 END IF
432
433
434
435
436 END IF
437#endif
438 loc_proc=ispmd+1
439
440
441 f_ddl=1+itask*nddl/nthread
442 l_ddl=(itask+1)*nddl/nthread
443
444 istop=0
445 nlim=n_max
449 ipri = iprint
450 IF (ispmd/=0) ipri = 0
451
452
454 1 nddl,x,r,diag_k,diag_m,
455 2 nnz,lt_k,lt_k0,lt_m,lt_m0,
458 5 nlim,itol,eps_m,iprec)
460 1 nddl,r,diag_k,nnz,lt_k,
462
463 IF (itask == 0) THEN
464 IF(ipri/=0)THEN
465 WRITE(iout,*)' *** BEGIN CONJUGATE GRADIENT ITERATION ***'
466 WRITE(iout,*)
467 ENDIF
468 IF(ipri<0)THEN
469 WRITE(istdo,*)' *** BEGIN CONJUGATE GRADIENT ITERATION ***'
470 WRITE(istdo,*)
471 ENDIF
472 END IF
473 ip = iabs(ipri)
474 it=0
475
476 iupd = 0
477 iupd0 = 0
478
480
481
482
483
484
485
486
487 gpur4r8=2
488
489#if !defined knf
490
492 1 graphe,iad_elem,fr_elem,diag_k,lt_k ,
493 2 iadk ,jdik ,itab ,ipri,insolv ,
494 3 itn ,fac_k , ipiv_k, nk ,mumps_par,
495 4 cddlp ,isolv , idsc , iddl ,ikc ,
496 5 inloc ,ndof , nddl ,nnzm ,iadm ,
497 6 jdim ,diag_m , lt_m ,r ,z ,
498 7 f_ddl ,l_ddl )
499
501
502 DO i = f_ddl,l_ddl
503 p(i) = z(i)
504 ENDDO
505 CALL produt_h(f_ddl ,l_ddl ,r ,z ,w_ddl , g0 ,itask )
506
508 1 nddl ,nddli ,iadk ,jdik ,diag_k,
509 2 lt_k ,iadi ,jdii ,itok ,lt_i ,
510 3 p ,y ,a ,ar ,
511 5 ve ,ms ,xe ,d ,dr ,
512 6 ndof ,ipari ,intbuf_tab ,num_imp,
513 7 ns_imp,ne_imp,nsrem ,nsl ,ibfv ,
514 8 skew ,xframe,monvol,volmon,igrsurf ,
515 9 fr_mv,nmonv ,imonv ,ind_imp,
516 a xi_c ,iupd ,irbe3 ,lrbe3 ,irbe2 ,
517 b lrbe2 ,f_ddl ,l_ddl ,itask )
518
520
521 CALL produt_h(f_ddl ,l_ddl ,p ,y ,w_ddl , s ,itask )
522
523 IF (g0==zero) THEN
524 IF (itol>1) THEN
525 istop=-1
526 GOTO 200
527 ELSE
529 ENDIF
530 ELSE
532 ENDIF
533 tols2=tols*tols
534 IF (itol==1) THEN
535 CALL produt_h( f_ddl ,l_ddl ,r ,r ,w_ddl , r02 ,itask )
536
537 r2 =r02
538 ELSEIF (itol==3) THEN
539
540 r02=abs(g0)
541 r2 =r02
543
544 tnorm2=l_a*l_a
545 anorm2=zero
546 xnorm2=zero
547 a_old=l_a
548 b_old=zero
549 oldb = sqrt(r02)
550 ELSEIF (itol==4) THEN
552 eps(1)=r02
553 r2=r02
554 ELSE
555 r02=abs(g0)
556 r2 =r02
557 ENDIF
558 IF (r02==zero) THEN
559 istop=-1
560 GOTO 200
561 ENDIF
562 toln=r02*tols2
563 IF(ipri/=0.AND.itask==0)THEN
564 rr = sqrt(r2)
565 IF(ipri<0) WRITE(istdo,1000)it,rr
566 WRITE(iout,1000)it,rr
567 ENDIF
568
569
571
572 it=1
573 DO i=f_ddl,l_ddl
574 x(i) = x(i) +
alpha*p(i)
575 r(i) = r(i) -
alpha*y(i)
576 ENDDO
577
579
581 1 graphe,iad_elem,fr_elem,diag_k,lt_k ,
582 2 iadk ,jdik ,itab ,ipri,insolv ,
583 3 itn ,fac_k , ipiv_k, nk ,mumps_par,
584 4 cddlp ,isolv , idsc , iddl ,ikc ,
585 5 inloc ,ndof , nddl ,nnzm ,iadm ,
586 6 jdim ,diag_m , lt_m ,r ,z ,
587 7 f_ddl ,l_ddl )
588
590
591 CALL produt_h( f_ddl ,l_ddl ,r ,z ,w_ddl , g1 ,itask )
592
593 beta=g1/g0
594 IF (itol==1) THEN
595 CALL produt_h( f_ddl ,l_ddl ,r ,r ,w_ddl , r2 ,itask )
596 ELSEIF (itol==3) THEN
597
598 r2=abs(g1)
599 l_b2=abs(beta)*a_old*a_old
600 l_b=sqrt(l_b2)
601 tnorm2=tnorm2+l_b2
602 b_old=beta
603
604 gbar = l_a
605 dbar = l_b
606 rhs1 = oldb
607 rhs2 = zero
608 gmax = abs( l_a ) + eps_m
609 gmin = gmax
610 oldb2 = l_b2
611 oldb = l_b
612 ELSEIF (itol==4) THEN
613 r2=r02
614 ELSE
615 r2=abs(g1)
616 ENDIF
617 g0 = g1
618 IF (itol==3) toln=toln*tnorm2
619
621
622#include "lockon.inc"
624#include "lockoff.inc"
625 IF(nddli_g>0.AND.intp_c==-1)iupd0 = -intp_c
626 DO WHILE (istop==1)
627 DO i=f_ddl,l_ddl
628 p(i) = z(i) + beta*p(i)
629 ENDDO
630 IF(iupd0>0.AND.it>nddli_g/2) iupd = iupd0
631
633
635 1 nddl ,nddli ,iadk ,jdik ,diag_k,
636 2 lt_k ,iadi ,jdii ,itok ,lt_i ,
637 3 p ,y ,a ,ar ,
638 5 ve ,ms ,xe ,d ,dr ,
639 6 ndof ,ipari ,intbuf_tab ,num_imp,
640 7 ns_imp,ne_imp,nsrem ,nsl ,ibfv ,
641 8 skew ,xframe,monvol,volmon,igrsurf ,
642 9 fr_mv,nmonv ,imonv ,ind_imp,
643 a xi_c ,iupd ,irbe3 ,lrbe3 ,irbe2 ,
644 b lrbe2 ,f_ddl ,l_ddl ,itask )
645
647
648 CALL produt_h(f_ddl ,l_ddl ,p ,y ,w_ddl , s ,itask )
649
651 DO i=f_ddl,l_ddl
652 x(i) = x(i) +
alpha*p(i)
653 r(i) = r(i) -
alpha*y(i)
654 ENDDO
655
657
659 1 graphe,iad_elem,fr_elem,diag_k,lt_k ,
660 2 iadk ,jdik ,itab ,ipri,insolv ,
661 3 itn ,fac_k , ipiv_k, nk ,mumps_par,
662 4 cddlp ,isolv , idsc , iddl ,ikc ,
663 5 inloc ,ndof , nddl ,nnzm ,iadm ,
664 6 jdim ,diag_m , lt_m ,r ,z ,
665 7 f_ddl ,l_ddl )
666
668
669 CALL produt_h(f_ddl ,l_ddl ,r ,z ,w_ddl , g1 ,itask )
670
671 beta=g1/g0
672 g0 = g1
673 IF (itol==1) THEN
674 CALL produt_h( f_ddl ,l_ddl ,r ,r ,w_ddl , r2 ,itask )
675 ELSEIF (itol==3) THEN
676 r2 =abs(g1)
678 l_a=s+b_old*a_old
679
680 l_b2=abs(beta)*s*s
681 l_b=sqrt(l_b2)
682 a_old=s
683 b_old=beta
684 anorm2=tnorm2
685 tnorm2=tnorm2+l_a*l_a+oldb2+l_b2
686 gamma = sqrt( gbar*gbar + oldb2 )
687 cs = gbar / gamma
688 sn = oldb / gamma
689 delta = cs * dbar + sn * l_a
690 gbar = sn * dbar - cs * l_a
691 epsln = sn * l_b
692 dbar = - cs * l_b
693 zl = rhs1 / gamma
694 xnorm2 = xnorm2+zl*zl
695 gmax =
max( gmax, gamma )
696 gmin =
min( gmin, gamma )
697 rhs1 = rhs2 - delta * zl
698 rhs2 = - epsln * zl
699 toln=tols2*anorm2*xnorm2
700 oldb2 = l_b2
701 oldb = l_b
702 ELSEIF (itol==4) THEN
704 IF (it>=nd) THEN
705 DO j=1,nd-1
706 eps(j)=eps(j+1)
707 ENDDO
708 eps(nd)=tmp
709 r2=zero
710 DO j=1,nd
711 r2=r2+eps(j)
712 ENDDO
713 ELSE
714 eps(it+1)=tmp
715 r2=r2+tmp
716 ENDIF
717 ELSE
718 r2 =abs(g1)
719 ENDIF
720
721 IF(ipri/=0.AND.itask==0)THEN
722 IF(mod(it,ip)==0)THEN
723 rr = sqrt(r2/r02)
724 WRITE(iout,1001)it,rr
725 IF(ipri<0) WRITE(istdo,1001)it,rr
726 ENDIF
727 ENDIF
728
730
731#include "lockon.inc"
733#include "lockoff.inc"
734
735 IF((iupd>0.AND.it==nlim).OR.
736 . (iupd==0.AND.istop/=1.AND.iupd0>0))THEN
737
738 iupd0 = 0
739 IF(iupd>0) iupd = 0
740#include "lockon.inc"
741 istop = 1
742#include "lockoff.inc"
743
744 nlim = nlim + nlim
745 DO i=f_ddl,l_ddl
746 x(i) = zero
747 ENDDO
748
750
752 1 nddl ,nddli ,iadk ,jdik ,diag_k,
753 2 lt_k ,iadi ,jdii ,itok
754 3 x ,z ,a ,ar ,
755 5 ve ,ms ,xe ,d ,dr ,
756 6 ndof ,ipari ,intbuf_tab ,num_imp,
757 7 ns_imp,ne_imp,nsrem ,nsl ,ibfv ,
758 8 skew ,xframe,monvol,volmon,igrsurf ,
759 9 fr_mv,nmonv ,imonv ,ind_imp,
760 a xi_c ,iupd ,irbe3 ,lrbe3 ,irbe2 ,
761 b lrbe2 ,f_ddl ,l_ddl ,itask )
762
764
765 DO i=f_ddl,l_ddl
766 r(i) = r0(i)-z(i)
767 ENDDO
768
770
772 1 graphe,iad_elem,fr_elem,diag_k,lt_k ,
773 2 iadk ,jdik ,itab ,ipri,insolv ,
774 3 itn ,fac_k , ipiv_k, nk ,mumps_par,
775 4 cddlp ,isolv , idsc , iddl ,ikc ,
776 5 inloc ,ndof , nddl ,nnzm ,iadm ,
777 6 jdim ,diag_m , lt_m ,r ,z ,
778 7 f_ddl ,l_ddl )
779
781
782 CALL produt_h( f_ddl ,l_ddl ,r ,z ,w_ddl , g0 ,itask )
783 beta = zero
784
785 IF (itol==3) THEN
786 tnorm2=l_a*l_a
787 anorm2=zero
788 xnorm2=zero
789 a_old=zero
790 b_old=zero
791 l_b=zero
792
793 gbar = l_a
794 dbar = zero
795 rhs1 = sqrt(r02)
796 rhs2 = zero
797 gmax = abs( l_a ) + eps_m
798 gmin = gmax
799 oldb2 = zero
800 oldb = l_b
801 ELSEIF (itol==4) THEN
802 DO j=1,nd
803 eps(j)=r02
804 ENDDO
805 ENDIF
806 ENDIF
807
809
810 it = it +1
811 ENDDO
812 200 CONTINUE
813
814#elif defined knf
815 IF(gpur4r8==1)THEN
816 ELSEIF(gpur4r8==2)THEN
817 IF(itask==0)THEN
818
819
820
821
822
823
824
825
826 CALL mic_init(
827 . nspmd,loc_proc,l_spmd(loc_proc),idevice,micid,ierr)
828 IF(ierr < 0) THEN
829 IF(ierr==-1)THEN
830 CALL ancmsg(msgid=223,anmode=aninfo_blind)
831 ELSEIF(ierr==-2THEN
832 CALL ancmsg(msgid=224,anmode=aninfo_blind)
833 ELSEIF(ierr==-3)THEN
834 CALL ancmsg(msgid=225,anmode=aninfo_blind)
835 END IF
837 END IF
838 nddl1=nddl+1
839 print *,' INIT MIC CARD NUMBER ',micid
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855 i= omp_get_num_threads()
856 print *,'Number of threads (host):',i
857
858 i= kmp_get_blocktime_target(target_mic, micid)
859
860
861
862
863 tf=omp_get_wtime()
864 tb=0
865
866 & in(lt_k:length(nnz), free_if(.false.), align(512))
867
868
869 tb = tb+nnz
870 print *,' Variables1= ',nnz
871
872 & in(jdik:length(nnz), free_if(.false.), align(512))
873
874
875 tb = tb+nnz
876 print *,' Variables2= ',nnz
877
878 & in(lt_k0:length(nnz), free_if(.false.), align(512))
879
880
881 tb = tb+nnz
882 print *,' Variables3= ',nnz
883
884 & in(
jdik0:length(nnz), free_if(.false.), align(512))
885!$omp parallel
886
887 tb = tb+nnz
888 print *,' Variables4= ',nnz
889 IF(iprec == 5) THEN
890
891& in(lt_m:length(nnzm), free_if(.false.), align(512))
892
893
894 tb = tb+nnzm
895 print *,' Variables5= ',nnzm
896
897 & in(jdim:length(nnzm), free_if(.false.), align(512))
898
899
900 tb = tb+nnzm
901 print *,' Variables6= ',nnzm
902
903 & in(lt_m0:length(nnzm), free_if(.false.), align(512))
904
905
906 tb = tb+nnzm
907 print *,' variables7= ',nnzm
908!dir$ omp offload target(mic:MICID)
909 & in (JDIM0:length(nnzm), free_if(.false.), align(512))
910!$OMP PARALLEL
911!$OMP END PARALLEL
912 TB = TB+NNZ
913 print *,' variables8= ',nnzm
914 END IF
915!dir$ omp offload target(mic:MICID)
916 & in (DIAG_K,DIAG_M,x,y,z,r,p:length(nddl),
917 & free_if(.false.))
918!$OMP PARALLEL
919!$OMP END PARALLEL
920 TB = TB+NDDL*7
921 print *,' variables9= ',nddl*7
922!dir$ omp offload target(mic:MICID)
923 & in (IADK,IADK0,IADM,IADM0:length(NDDL1),
924 & free_if(.false.), align(512))
925!$OMP PARALLEL
926!$OMP END PARALLEL
927 TB = TB+nddl1*4
928 print *,' variables10= ',nddl1*4
929 TF=OMP_GET_WTIME()-TF
930 print *,'transfer time cpu => mic(s) =',TF
931 print *,'transfer rate cpu => mic(mb',
932 . ((TB/(1024*1024))*8)/TF
933 IF(NSPMD > 1)THEN
934 ALLOCATE(W(NDDL),STAT=IERROR)
935 IERROR=IERROR+IERR
936 ALLOCATE(VGAT(LCOM),STAT=IERR)
937 IERROR=IERROR+IERR
938 ALLOCATE(VSCA(LCOM),STAT=IERR)
939 IERROR=IERROR+IERR
940 ALLOCATE(INDEX(LCOM),STAT=IERR)
941 IERROR=IERROR+IERR
942 ALLOCATE(TABLE(NDDL),STAT=IERR)
943 IERROR=IERROR+IERR
944 IF(IERROR /= 0)THEN
945
946 END IF
947
948 DO I = 1, NDDL
949
950 W(I) = W_DDL(I)
951 END DO
952
953 TABLE(1:NDDL)=0
954 DO I = 1, LCOM
955 N = IFR2K(I)
956 J = TABLE(N)
957 IF(J==0)THEN
958 INDEX(I)=0
959 TABLE(N)=I
960 ELSE
961 INDEX(I)=J
962 END IF
963 END DO
964 DEALLOCATE(TABLE)
965!dir$ omp offload target(mic:MICID)
966 & in (W:length(NDDL), free_if(.false.), align(512))
967!$OMP PARALLEL
968!$OMP END PARALLEL
969 print *,' variables11= ',i
970!dir$ omp offload target(mic:MICID)
971 & in (IFR2K,INDEX:length(LCOM), free_if(.false.))
972
973!$OMP PARALLEL
974!$OMP END PARALLEL
975 print *,' variables12= ',i
976 END IF
977
978 IF(NSPMD>1)THEN
979!dir$ omp offload target(mic:MICID)
980 & nocopy(IADK,IADK0,IADM,IADM0,LT_K,LT_K0,
981 & lt_m,lt_m0,JDIK,JDIK0,JDIM,JDIM0,
982 & DIAG_K,DIAG_M,x,y,z,r,p,w)
983 & inout(tt)
984!$OMP PARALLEL default(shared)
985
986
987!$OMP single
988 tt=OMP_GET_WTIME()
989 i= omp_get_num_threads()
990 print *,'number of threads(mic):',i
991!$OMP END single
992
993
994
995
996
997
998 CALL MIC_MV(NDDL,X,DIAG_K,Z)
999
1000
1001 CALL MIC_SPMV(NDDL ,Z ,X ,LT_K ,IADK ,JDIK )
1002
1003 CALL MIC_SPMV(NDDL ,Z ,X ,LT_K0,IADK0,JDIK0)
1004 IF(NDDLI > 0) THEN
1005
1006
1007 END IF
1008
1009 CALL MIC_DAXPY(NDDL, -ONE, Z, R)
1010
1011
1012
1013
1014
1015
1016
1017 IF(IPREC == 1)THEN
1018
1019 CALL MIC_DCOPY(NDDL, R, Z)
1020 ELSEIF(IPREC == 2)THEN
1021
1022 CALL MIC_MV(NDDL,R,DIAG_M,Z)
1023 ELSEIF(IPREC == 5)THEN
1024
1025 CALL MIC_DCOPY(NDDL, R, Y)
1026
1027 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
1028
1029 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
1030
1031 CALL MIC_DCOPY(NDDL, Z, Y)
1032
1033 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
1034
1035 ELSE
1036 END IF
1037!$OMP END PARALLEL
1038
1039.AND. IF(NSPMD > 1 IPREC > 1)THEN
1040 IF(IMONM > 0) CALL STARTIME(TIMERS,68)
1041!dir$ omp offload target(mic:MICID)
1042 & nocopy(z,ifr2k)
1043 & out(vgat:length(lcom))
1044!$OMP PARALLEL default(shared)
1045 CALL MIC_GATHER(NDDL, LCOM, Z, VGAT, IFR2K)
1046!$OMP END PARALLEL
1047 IF(IMONM > 0) CALL STOPTIME(TIMERS,68)
1048 IF(IMONM > 0) CALL STARTIME(TIMERS,66)
1049 CALL SPMD_SUMFC_V(VGAT,VSCA,INDEX,LCOM)
1050 IF(IMONM > 0) CALL STOPTIME(TIMERS,66)
1051 IF(IMONM > 0) CALL STARTIME(TIMERS,69)
1052!dir$ omp offload target(mic:MICID)
1053 & nocopy(z,ifr2k,index)
1054 & in(vsca:length(lcom))
1055!$OMP PARALLEL default(shared)
1056 CALL MIC_SCATTER(NDDL, LCOM, Z, VSCA, IFR2K, INDEX)
1057!$OMP END PARALLEL
1058 IF(IMONM > 0) CALL STOPTIME(TIMERS,69)
1059 END IF
1060
1061!dir$ omp offload target(mic:MICID)
1062 & nocopy(z,p)
1063!$OMP PARALLEL default(shared)
1064
1065 CALL MIC_DCOPY(NDDL, Z, P)
1066!$OMP END PARALLEL
1067
1068 IF(NSPMD == 1) THEN
1069!dir$ omp offload target(mic:MICID)
1070 & nocopy(r,z)
1071 & inout(G0)
1072!$OMP PARALLEL default(shared) shared(g0)
1073 CALL MIC_DDOT(NDDL, R, Z, G0)
1074!$OMP END PARALLEL
1075 ELSE
1076
1077
1078!dir$ omp offload target(mic:MICID)
1079 & nocopy(z,y,w,r)
1080 & inout(G0)
1081!$OMP PARALLEL default(shared) shared(g0)
1082 CALL MIC_MV(NDDL,Z,W,Y)
1083 CALL MIC_DDOT(NDDL, R, Y, G0)
1084!$OMP END PARALLEL
1085 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1086 CALL SPMD_SUM_S(G0)
1087 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1088 ENDIF
1089
1090!dir$ omp offload target(mic:MICID)
1091 & nocopy(iadk,iadk0,iadm,iadm0,lt_k,lt_k0,lt_m)
1092 & nocopy(lt_m0,jdik,jdik0,jdim,jdim0,diag_k,diag_m,y,p,x,r,z)
1093!$OMP PARALLEL default(shared)
1094
1095
1096
1097
1098
1099
1100 CALL MIC_MV(NDDL,P,DIAG_K,Y)
1101
1102 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K ,IADK ,JDIK )
1103
1104 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K0,IADK0,JDIK0)
1105 IF(NDDLI > 0) THEN
1106
1107
1108 END IF
1109!$OMP END PARALLEL
1110
1111 IF(NSPMD > 1)THEN
1112 IF(IMONM > 0) CALL STARTIME(TIMERS,68)
1113!dir$ omp offload target(mic:MICID)
1114 & nocopy(y,ifr2k)
1115 & out(vgat:length(lcom))
1116!$OMP PARALLEL default(shared)
1117 CALL MIC_GATHER(NDDL, LCOM, Y, VGAT, IFR2K)
1118!$OMP END PARALLEL
1119 IF(IMONM > 0) CALL STOPTIME(TIMERS,68)
1120 IF(IMONM > 0) CALL STARTIME(TIMERS,66)
1121 CALL SPMD_SUMFC_V(VGAT,VSCA,INDEX,LCOM)
1122 IF(IMONM > 0) CALL STOPTIME(TIMERS,66)
1123 IF(IMONM > 0) CALL STARTIME(TIMERS,69)
1124!dir$ omp offload target(mic:MICID)
1125 & nocopy(y,ifr2k,index)
1126 & in(vsca:length(lcom))
1127!$OMP PARALLEL default(shared)
1128 CALL MIC_SCATTER(NDDL, LCOM, Y, VSCA, IFR2K, INDEX)
1129!$OMP END PARALLEL
1130 IF(IMONM > 0) CALL STOPTIME(TIMERS,69)
1131 END IF
1132
1133 IF(NSPMD == 1) THEN
1134!dir$ omp offload target(mic:MICID)
1135 & nocopy(p,y)
1136 & inout(S)
1137!$OMP PARALLEL default(shared) shared(s)
1138 CALL MIC_DDOT(NDDL, P, Y, S)
1139!$OMP END PARALLEL
1140 ELSE
1141
1142!dir$ omp offload target(mic:MICID)
1143 & nocopy(y,w,p,z)
1144 & inout(S)
1145!$OMP PARALLEL default(shared) shared(s)
1146 CALL MIC_MV(NDDL,Y,W,Z)
1147 CALL MIC_DDOT(NDDL, P, Z, S)
1148!$OMP END PARALLEL
1149 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1150 CALL SPMD_SUM_S(S)
1151 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1152 END IF
1153
1154
1155 IF (G0==ZERO) THEN
1156 IF (ITOL>1) THEN
1157 ISTOP=-1
1158 GOTO 206
1159 ELSE
1160 ALPHA = ZERO
1161 ENDIF
1162 ELSE
1163 ALPHA = G0/S
1164 ENDIF
1165 TOLS2=TOLS*TOLS
1166 IF (ITOL==1) THEN
1167 IF(NSPMD == 1) THEN
1168!dir$ omp offload target(mic:MICID)
1169 & nocopy(r)
1170 & inout(r02)
1171!$OMP PARALLEL default(shared) shared(r02)
1172 CALL MIC_DDOT(NDDL, R, R, R02)
1173!$OMP END PARALLEL
1174 ELSE
1175
1176!dir$ omp offload target(mic:MICID)
1177 & nocopy(r,w,z)
1178 & inout(r02)
1179!$OMP PARALLEL default(shared) shared(r02)
1180 CALL MIC_MV(NDDL,R,W,Z)
1181 CALL MIC_DDOT(NDDL, Z, Z, R02)
1182!$OMP END PARALLEL
1183 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1184 CALL SPMD_SUM_S(R02)
1185 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1186 END IF
1187
1188
1189 R2 =R02
1190 ELSEIF (ITOL==3) THEN
1191
1192 R02=ABS(G0)
1193 R2 =R02
1194 L_A=ONE/ALPHA
1195
1196 TNORM2=L_A*L_A
1197 ANORM2=ZERO
1198 XNORM2=ZERO
1199 A_OLD=L_A
1200 B_OLD=ZERO
1201 OLDB = SQRT(R02)
1202 ELSEIF (ITOL==4) THEN
1203 R02=ALPHA*ALPHA*ABS(G0)
1204 EPS(1)=R02
1205 R2=R02
1206 ELSE
1207 R02=ABS(G0)
1208 R2 =R02
1209 ENDIF
1210 IF (R02==ZERO) THEN
1211 ISTOP=-1
1212 GOTO 206
1213 ENDIF
1214 TOLN=R02*TOLS2
1215.AND. IF(IPRI/=0ITASK==0)THEN
1216 RR = SQRT(R2)
1217 IF(IPRI<0) WRITE(ISTDO,1000)IT,RR
1218 WRITE(IOUT,1000)IT,RR
1219 ENDIF
1220
1221
1222
1223
1224 IT=1
1225!dir$ omp offload target(mic:MICID)
1226 & nocopy(iadk,iadk0,iadm,iadm0,lt_k,lt_k0,lt_m)
1227 & nocopy(lt_m0,jdik,jdik0,jdim,jdim0,diag_k,diag_m,y,p,x,r,z)
1228!$OMP PARALLEL default(shared)
1229 CALL MIC_DAXPY(NDDL, ALPHA, P, X)
1230 CALL MIC_DAXPY(NDDL,-ALPHA, Y, R)
1231
1232
1233
1234
1235
1236
1237
1238 IF(IPREC == 1)THEN
1239
1240 CALL MIC_DCOPY(NDDL, R, Z)
1241 ELSEIF(IPREC == 2)THEN
1242
1243 CALL MIC_MV(NDDL,R,DIAG_M,Z)
1244 ELSEIF(IPREC == 5)THEN
1245
1246 CALL MIC_DCOPY(NDDL, R, Y)
1247
1248 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
1249
1250 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
1251 CALL MIC_DCOPY(NDDL, Z, Y)
1252
1253 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
1254 ELSE
1255 END IF
1256!$OMP END PARALLEL
1257
1258.AND. IF(NSPMD > 1 IPREC > 1)THEN
1259 IF(IMONM > 0) CALL STARTIME(TIMERS,68)
1260!dir$ omp offload target(mic:MICID)
1261 & nocopy(z,ifr2k)
1262 & out(vgat:length(lcom))
1263!$OMP PARALLEL default(shared)
1264 CALL MIC_GATHER(NDDL, LCOM, Z, VGAT, IFR2K)
1265!$OMP END PARALLEL
1266 IF(IMONM > 0) CALL STOPTIME(TIMERS,68)
1267 IF(IMONM > 0) CALL STARTIME(TIMERS,66)
1268 CALL SPMD_SUMFC_V(VGAT,VSCA,INDEX,LCOM)
1269 IF(IMONM > 0) CALL STOPTIME(TIMERS,66)
1270 IF(IMONM > 0) CALL STARTIME(TIMERS,69)
1271!dir$ omp offload target(mic:MICID)
1272 & nocopy(z,ifr2k,index)
1273 & in(vsca:length(lcom))
1274!$OMP PARALLEL default(shared)
1275 CALL MIC_SCATTER(NDDL, LCOM, Z, VSCA, IFR2K, INDEX)
1276!$OMP END PARALLEL
1277 IF(IMONM > 0) CALL STOPTIME(TIMERS,69)
1278 END IF
1279
1280 IF(NSPMD == 1) THEN
1281!dir$ omp offload target(mic:MICID)
1282 & nocopy(r,z)
1283 & inout(G1)
1284!$OMP PARALLEL default(shared) shared(G1)
1285 CALL MIC_DDOT(NDDL, R, Z, G1)
1286!$OMP END PARALLEL
1287 ELSE
1288
1289!dir$ omp offload target(mic:MICID)
1290 & nocopy(z,w,r,y)
1291 & inout(G1)
1292!$OMP PARALLEL default(shared) shared(G1)
1293 CALL MIC_MV(NDDL,Z,W,Y)
1294 CALL MIC_DDOT(NDDL, R, Y, G1)
1295!$OMP END PARALLEL
1296 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1297 CALL SPMD_SUM_S(G1)
1298 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1299 END IF
1300
1301 BETA=G1/G0
1302 IF (ITOL==1) THEN
1303 IF(NSPMD == 1) THEN
1304!dir$ omp offload target(mic:MICID)
1305 & nocopy(r)
1306 & inout(R2)
1307!$OMP PARALLEL default(shared) shared(R2)
1308 CALL MIC_DDOT(NDDL, R, R, R2)
1309!$OMP END PARALLEL
1310 ELSE
1311
1312!dir$ omp offload target(mic:MICID)
1313 & nocopy(r,w,y)
1314 & inout(R2)
1315!$OMP PARALLEL default(shared) shared(R2)
1316 CALL MIC_MV(NDDL,R,W,Y)
1317 CALL MIC_DDOT(NDDL, Y, Y, R2)
1318!$OMP END PARALLEL
1319 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1320 CALL SPMD_SUM_S(R2)
1321 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1322 END IF
1323 ELSEIF (ITOL==3) THEN
1324
1325 R2=ABS(G1)
1326 L_B2=ABS(BETA)*A_OLD*A_OLD
1327 L_B=SQRT(L_B2)
1328 TNORM2=TNORM2+L_B2
1329 B_OLD=BETA
1330
1331 GBAR = L_A
1332 DBAR = L_B
1333 RHS1 = OLDB
1334 RHS2 = ZERO
1335 GMAX = ABS( L_A ) + EPS_M
1336 GMIN = GMAX
1337 OLDB2 = L_B2
1338 OLDB = L_B
1339 ELSEIF (ITOL==4) THEN
1340 R2=R02
1341 ELSE
1342 R2=ABS(G1)
1343 ENDIF
1344 G0 = G1
1345 IF (ITOL==3) TOLN=TOLN*TNORM2
1346
1347
1348
1349 ISTOP=CRIT_STOP(IT,R2,NLIM,TOLN)
1350
1351
1352
1353
1354
1355 DO WHILE (ISTOP==1)
1356
1357!dir$ omp offload target(mic:MICID)
1358 & nocopy(iadk,iadk0,iadm,iadm0,lt_k,lt_k0,lt_m)
1359 & nocopy(lt_m0,jdik,jdik0,jdim,jdim0,diag_k,diag_m,y,p,x,r,z)
1360!$OMP PARALLEL default(shared)
1361
1362
1363 CALL MIC_DSCAL(NDDL, BETA, P)
1364 CALL MIC_DAXPY(NDDL, ONE, Z, P)
1365
1366
1367
1368
1369
1370
1371
1372 CALL MIC_MV(NDDL,P,DIAG_K,Y)
1373
1374 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K ,IADK ,JDIK )
1375
1376 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K0,IADK0,JDIK0)
1377 IF(NDDLI > 0) THEN
1378
1379
1380 END IF
1381!$OMP END PARALLEL
1382 IF(NSPMD > 1)THEN
1383 IF(IMONM > 0) CALL STARTIME(TIMERS,68)
1384!dir$ omp offload target(mic:MICID)
1385 & nocopy(y,ifr2k)
1386 & out(vgat:length(lcom))
1387!$OMP PARALLEL default(shared)
1388 CALL MIC_GATHER(NDDL, LCOM, Y, VGAT, IFR2K)
1389!$OMP END PARALLEL
1390 IF(IMONM > 0) CALL STOPTIME(TIMERS,68)
1391 IF(IMONM > 0) CALL STARTIME(TIMERS,66)
1392 CALL SPMD_SUMFC_V(VGAT,VSCA,INDEX,LCOM)
1393 IF(IMONM > 0) CALL STOPTIME(TIMERS,66)
1394 IF(IMONM > 0) CALL STARTIME(TIMERS,69)
1395!dir$ omp offload target(mic:MICID)
1396 & nocopy(y,ifr2k,index)
1397 & in(vsca:length(lcom))
1398!$OMP PARALLEL default(shared)
1399 CALL MIC_SCATTER(NDDL, LCOM, Y, VSCA, IFR2K, INDEX)
1400!$OMP END PARALLEL
1401 IF(IMONM > 0) CALL STOPTIME(TIMERS,69)
1402 END IF
1403
1404 IF(NSPMD == 1) THEN
1405!dir$ omp offload target(mic:MICID)
1406 & nocopy(p,y)
1407 & inout(S)
1408!$OMP PARALLEL default(shared) shared(S)
1409 CALL MIC_DDOT(NDDL, P, Y, S)
1410!$OMP END PARALLEL
1411 ELSE
1412
1413!dir$ omp offload target(mic:MICID)
1414 & nocopy(y,w,p,z)
1415 & inout(S)
1416!$OMP PARALLEL default(shared) shared(S)
1417 CALL MIC_MV(NDDL,Y,W,Z)
1418 CALL MIC_DDOT(NDDL, P, Z, S)
1419!$OMP END PARALLEL
1420 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1421 CALL SPMD_SUM_S(S)
1422 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1423 END IF
1424
1425 ALPHA=G0/S
1426!dir$ omp offload target(mic:MICID)
1427 & nocopy(iadk,iadk0,iadm,iadm0,lt_k,lt_k0,lt_m)
1428 & nocopy(lt_m0,jdik,jdik0,jdim,jdim0,diag_k,diag_m,y,p,x,r,z)
1429!$OMP PARALLEL default(shared)
1430 CALL MIC_DAXPY(NDDL, ALPHA, P, X)
1431 CALL MIC_DAXPY(NDDL,-ALPHA, Y, R)
1432
1433
1434
1435
1436 IF(IPREC == 1)THEN
1437
1438 CALL MIC_DCOPY(NDDL, R, Z)
1439 ELSEIF(IPREC == 2)THEN
1440
1441 CALL MIC_MV(NDDL,R,DIAG_M,Z)
1442 ELSEIF(IPREC == 5)THEN
1443
1444 CALL MIC_DCOPY(NDDL, R, Y)
1445
1446 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
1447
1448 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
1449 CALL MIC_DCOPY(NDDL, Z, Y)
1450
1451 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
1452 ELSE
1453 END IF
1454!$OMP END PARALLEL
1455
1456.AND. IF(NSPMD > 1 IPREC > 1)THEN
1457 IF(IMONM > 0) CALL STARTIME(TIMERS,68)
1458!dir$ omp offload target(mic:MICID)
1459 & nocopy(z,ifr2k)
1460 & out(vgat:length(lcom))
1461!$OMP PARALLEL default(shared)
1462 CALL MIC_GATHER(NDDL, LCOM, Z, VGAT, IFR2K)
1463!$OMP END PARALLEL
1464 IF(IMONM > 0) CALL STOPTIME(TIMERS,68)
1465 IF(IMONM > 0) CALL STARTIME(TIMERS,66)
1466 CALL SPMD_SUMFC_V(VGAT,VSCA,INDEX,LCOM)
1467 IF(IMONM > 0) CALL STOPTIME(TIMERS,66)
1468 IF(IMONM > 0) CALL STARTIME(TIMERS,69)
1469!dir$ omp offload target(mic:MICID)
1470 & nocopy(z,ifr2k,index)
1471 & in(vsca:length(lcom))
1472!$OMP PARALLEL default(shared)
1473 CALL MIC_SCATTER(NDDL, LCOM, Z, VSCA, IFR2K, INDEX)
1474!$OMP END PARALLEL
1475 IF(IMONM > 0) CALL STOPTIME(TIMERS,69)
1476 END IF
1477
1478 IF(NSPMD == 1) THEN
1479!dir$ omp offload target(mic:MICID)
1480 & nocopy(r,z)
1481 & inout(G1)
1482!$OMP PARALLEL default(shared) shared(G1)
1483 CALL MIC_DDOT(NDDL, R, Z, G1)
1484!$OMP END PARALLEL
1485 ELSE
1486
1487!dir$ omp offload target(mic:MICID)
1488 & nocopy(z,w,r,y)
1489 & inout(G1)
1490!$OMP PARALLEL default(shared) shared(G1)
1491 CALL MIC_MV(NDDL,Z,W,Y)
1492 CALL MIC_DDOT(NDDL, R, Y, G1)
1493!$OMP END PARALLEL
1494 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1495 CALL SPMD_SUM_S(G1)
1496 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1497 END IF
1498
1499
1500 BETA=G1/G0
1501 G0 = G1
1502
1503 IF (ITOL==1) THEN
1504 IF(NSPMD == 1) THEN
1505!dir$ omp offload target(mic:MICID)
1506 & nocopy(r)
1507 & inout(R2)
1508!$OMP PARALLEL default(shared) shared(R2)
1509 CALL MIC_DDOT(NDDL, R, R, R2)
1510!$OMP END PARALLEL
1511 ELSE
1512
1513!dir$ omp offload target(mic:MICID)
1514 & nocopy(r,w,y)
1515 & inout(R2)
1516!$OMP PARALLEL default(shared) shared(R2)
1517 CALL MIC_MV(NDDL,R,W,Y)
1518 CALL MIC_DDOT(NDDL, Y, Y, R2)
1519!$OMP END PARALLEL
1520 IF(IMONM > 0) CALL STARTIME(TIMERS,67)
1521 CALL SPMD_SUM_S(R2)
1522 IF(IMONM > 0) CALL STOPTIME(TIMERS,67)
1523 END IF
1524 ELSEIF (ITOL==3) THEN
1525 R2 =ABS(G1)
1526 S=ONE/ALPHA
1527 L_A=S+B_OLD*A_OLD
1528
1529 L_B2=ABS(BETA)*S*S
1530 L_B=SQRT(L_B2)
1531 A_OLD=S
1532 B_OLD=BETA
1533 ANORM2=TNORM2
1534 TNORM2=TNORM2+L_A*L_A+OLDB2+L_B2
1535 GAMMA = SQRT( GBAR*GBAR + OLDB2 )
1536 CS = GBAR / GAMMA
1537 SN = OLDB / GAMMA
1538 DELTA = CS * DBAR + SN * L_A
1539 GBAR = SN * DBAR - CS * L_A
1540 EPSLN = SN * L_B
1541 DBAR = - CS * L_B
1542 ZL = RHS1 / GAMMA
1543 XNORM2 = XNORM2+ZL*ZL
1544 GMAX = MAX( GMAX, GAMMA )
1545 GMIN = MIN( GMIN, GAMMA )
1546 RHS1 = RHS2 - DELTA * ZL
1547 RHS2 = - EPSLN * ZL
1548 TOLN=TOLS2*ANORM2*XNORM2
1549 OLDB2 = L_B2
1550 OLDB = L_B
1551 ELSEIF (ITOL==4) THEN
1552 TMP=ALPHA*ALPHA*ABS(G1)
1553 IF (IT>=ND) THEN
1554 DO J=1,ND-1
1555 EPS(J)=EPS(J+1)
1556 ENDDO
1557 EPS(ND)=TMP
1558 R2=ZERO
1559 DO J=1,ND
1560 R2=R2+EPS(J)
1561 ENDDO
1562 ELSE
1563 EPS(IT+1)=TMP
1564 R2=R2+TMP
1565 ENDIF
1566 ELSE
1567 R2 =ABS(G1)
1568 ENDIF
1569.AND. IF(IPRI/=0ITASK==0)THEN
1570 IF(MOD(IT,IP)==0)THEN
1571 RR = SQRT(R2/R02)
1572 WRITE(IOUT,1001)IT,RR
1573 IF(IPRI<0) WRITE(ISTDO,1001)IT,RR
1574 ENDIF
1575 ENDIF
1576
1577
1578
1579 ISTOP=CRIT_STOP(IT,R2,NLIM,TOLN)
1580
1581
1582
1583 IT = IT +1
1584 ENDDO
1585 206 CONTINUE
1586
1587 WRITE(IOUT,1001)IT-1,RR
1588
1589
1590!$OMP PARALLEL
1591!$OMP single
1592 tt=OMP_GET_WTIME()-tt
1593 i= omp_get_num_threads()
1594 print *,' '
1595 print *,' '
1596 print *,' execution time on mic with',i,' threads'
1597 print *,' ==> ',tt,' seconds'
1598 print *,' '
1599!$OMP END single
1600!$OMP END PARALLEL
1601 DEALLOCATE(W)
1602 DEALLOCATE(VGAT)
1603 DEALLOCATE(VSCA)
1604 DEALLOCATE(INDEX)
1605
1606 ELSE ! CODE NSPMD = 1
1607
1608!dec$ attributes offload: mic :: ONE,ZERO,ISTDO,IOUT
1609
1610!dir$ omp offload target(mic:MICID)
1611 & nocopy(IADK,IADK0,IADM,IADM0,LT_K,LT_K0,
1612 & lt_m,lt_m0,JDIK,JDIK0,JDIM,JDIM0,
1613 & DIAG_K,DIAG_M,x,y,z,r,p,w)
1614 & inout(tt)
1615
1616!$OMP PARALLEL default(shared)
1617
1618!$OMP single
1619 tt=OMP_GET_WTIME()
1620 i= omp_get_num_threads()
1621 print *,'number of threads(mic):',i
1622!$OMP END single
1623
1624
1625
1626
1627
1628
1629 CALL MIC_MV(NDDL,X,DIAG_K,Z)
1630
1631
1632 CALL MIC_SPMV(NDDL ,Z ,X ,LT_K ,IADK ,JDIK )
1633
1634 CALL MIC_SPMV(NDDL ,Z ,X ,LT_K0,IADK0,JDIK0)
1635 IF(NDDLI > 0) THEN
1636
1637
1638 END IF
1639
1640 CALL MIC_DAXPY(NDDL, -ONE, Z, R)
1641
1642
1643
1644
1645
1646
1647 IF(IPREC == 1)THEN
1648
1649 CALL MIC_DCOPY(NDDL, R, Z)
1650 ELSEIF(IPREC == 2)THEN
1651
1652 CALL MIC_MV(NDDL,R,DIAG_M,Z)
1653 ELSEIF(IPREC == 5)THEN
1654
1655 CALL MIC_DCOPY(NDDL, R, Y)
1656
1657 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
1658
1659 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
1660
1661 CALL MIC_DCOPY(NDDL, Z, Y)
1662
1663 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
1664
1665 ELSE
1666 END IF
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695 CALL MIC_DCOPY(NDDL, Z, P)
1696
1697
1698
1699
1700
1701
1702
1703 CALL MIC_DDOT(NDDL, R, Z, G0)
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730 CALL MIC_MV(NDDL,P,DIAG_K,Y)
1731
1732 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K ,IADK ,JDIK )
1733
1734 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K0,IADK0,JDIK0)
1735 IF(NDDLI > 0) THEN
1736
1737
1738 END IF
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768 CALL MIC_DDOT(NDDL, P, Y, S)
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785 IF (G0==ZERO) THEN
1786 IF (ITOL>1) THEN
1787 ISTOP=-1
1788 GOTO 2060
1789 ELSE
1790 ALPHA = ZERO
1791 ENDIF
1792 ELSE
1793 ALPHA = G0/S
1794 ENDIF
1795 TOLS2=TOLS*TOLS
1796 IF (ITOL==1) THEN
1797
1798
1799
1800
1801
1802 CALL MIC_DDOT(NDDL, R, R, R02)
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819 R2 =R02
1820 ELSEIF (ITOL==3) THEN
1821
1822
1823!$OMP SINGLE
1824 R02=ABS(G0)
1825 R2 =R02
1826 L_A=ONE/ALPHA
1827
1828 TNORM2=L_A*L_A
1829 ANORM2=ZERO
1830 XNORM2=ZERO
1831 A_OLD=L_A
1832 B_OLD=ZERO
1833 OLDB = SQRT(R02)
1834
1835!$OMP END SINGLE
1836 ELSEIF (ITOL==4) THEN
1837 R02=ALPHA*ALPHA*ABS(G0)
1838 EPS(1)=R02
1839 R2=R02
1840 ELSE
1841 R02=ABS(G0)
1842 R2 =R02
1843 ENDIF
1844 IF (R02==ZERO) THEN
1845 ISTOP=-1
1846 GOTO 2060
1847 ENDIF
1848
1849!$OMP SINGLE
1850 TOLN=R02*TOLS2
1851.AND. IF(IPRI/=0ITASK==0)THEN
1852 RR = SQRT(R2)
1853 IF(IPRI<0) WRITE(ISTDO,1000)IT,RR
1854 WRITE(IOUT,1000)IT,RR
1855 ENDIF
1856
1857!$OMP END SINGLE
1858
1859
1860
1861
1862 IT=1
1863
1864
1865
1866
1867 CALL MIC_DAXPY(NDDL, ALPHA, P, X)
1868 CALL MIC_DAXPY(NDDL,-ALPHA, Y, R)
1869
1870
1871
1872
1873
1874
1875
1876 IF(IPREC == 1)THEN
1877
1878 CALL MIC_DCOPY(NDDL, R, Z)
1879 ELSEIF(IPREC == 2)THEN
1880
1881 CALL MIC_MV(NDDL,R,DIAG_M,Z)
1882 ELSEIF(IPREC == 5)THEN
1883
1884 CALL MIC_DCOPY(NDDL, R, Y)
1885
1886 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
1887
1888 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
1889 CALL MIC_DCOPY(NDDL, Z, Y)
1890
1891 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
1892 ELSE
1893 END IF
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923 CALL MIC_DDOT(NDDL, R, Z, G1)
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939 BETA=G1/G0
1940 IF (ITOL==1) THEN
1941
1942
1943
1944
1945
1946 CALL MIC_DDOT(NDDL, R, R, R2)
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961 ELSEIF (ITOL==3) THEN
1962
1963
1964!$OMP SINGLE
1965 R2=ABS(G1)
1966 L_B2=ABS(BETA)*A_OLD*A_OLD
1967 L_B=SQRT(L_B2)
1968 TNORM2=TNORM2+L_B2
1969 B_OLD=BETA
1970
1971 GBAR = L_A
1972 DBAR = L_B
1973 RHS1 = OLDB
1974 RHS2 = ZERO
1975 GMAX = ABS( L_A ) + EPS_M
1976 GMIN = GMAX
1977 OLDB2 = L_B2
1978 OLDB = L_B
1979
1980!$OMP END SINGLE
1981 ELSEIF (ITOL==4) THEN
1982 R2=R02
1983 ELSE
1984 R2=ABS(G1)
1985 ENDIF
1986 G0 = G1
1987
1988!$OMP SINGLE
1989 IF (ITOL==3) TOLN=TOLN*TNORM2
1990
1991
1992
1993
1994
1995
1996 IF (IT>=NLIM) THEN
1997 ISTOP = 0
1998 ELSE
1999 IF (R2<=TOLN) THEN
2000 ISTOP = 0
2001 ELSE
2002 ISTOP = 1
2003 ENDIF
2004 ENDIF
2005
2006!$OMP END SINGLE
2007
2008
2009
2010
2011 DO WHILE (ISTOP==1)
2012
2013
2014
2015
2016
2017
2018
2019 CALL MIC_DSCAL(NDDL, BETA, P)
2020 CALL MIC_DAXPY(NDDL, ONE, Z, P)
2021
2022
2023
2024
2025
2026
2027
2028 CALL MIC_MV(NDDL,P,DIAG_K,Y)
2029
2030 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K ,IADK ,JDIK )
2031
2032 CALL MIC_SPMV(NDDL ,Y ,P ,LT_K0,IADK0,JDIK0)
2033 IF(NDDLI > 0) THEN
2034
2035
2036 END IF
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065 CALL MIC_DDOT(NDDL, P, Y, S)
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081 ALPHA=G0/S
2082
2083
2084
2085
2086 CALL MIC_DAXPY(NDDL, ALPHA, P, X)
2087 CALL MIC_DAXPY(NDDL,-ALPHA, Y, R)
2088
2089
2090
2091
2092 IF(IPREC == 1)THEN
2093
2094 CALL MIC_DCOPY(NDDL, R, Z)
2095 ELSEIF(IPREC == 2)THEN
2096
2097 CALL MIC_MV(NDDL,R,DIAG_M,Z)
2098 ELSEIF(IPREC == 5)THEN
2099
2100 CALL MIC_DCOPY(NDDL, R, Y)
2101
2102 CALL MIC_SPMV(NDDL ,Y ,R ,LT_M ,IADM ,JDIM )
2103
2104 CALL MIC_MV(NDDL,Y,DIAG_M,Z)
2105 CALL MIC_DCOPY(NDDL, Z, Y)
2106
2107 CALL MIC_SPMV(NDDL ,Z ,Y ,LT_M0,IADM0,JDIM0)
2108 ELSE
2109 END IF
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139 CALL MIC_DDOT(NDDL, R, Z, G1)
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156!$OMP single
2157 BETA=G1/G0
2158 G0 = G1
2159
2160!$OMP end single
2161 IF (ITOL==1) THEN
2162
2163
2164
2165
2166
2167 CALL MIC_DDOT(NDDL, R, R, R2)
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182 ELSEIF (ITOL==3) THEN
2183
2184!$OMP single
2185 R2 =ABS(G1)
2186 S=ONE/ALPHA
2187 L_A=S+B_OLD*A_OLD
2188
2189 L_B2=ABS(BETA)*S*S
2190 L_B=SQRT(L_B2)
2191 A_OLD=S
2192 B_OLD=BETA
2193 ANORM2=TNORM2
2194 TNORM2=TNORM2+L_A*L_A+OLDB2+L_B2
2195 GAMMA = SQRT( GBAR*GBAR + OLDB2 )
2196 CS = GBAR / GAMMA
2197 SN = OLDB / GAMMA
2198 DELTA = CS * DBAR + SN * L_A
2199 GBAR = SN * DBAR - CS * L_A
2200 EPSLN = SN * L_B
2201 DBAR = - CS * L_B
2202 ZL = RHS1 / GAMMA
2203 XNORM2 = XNORM2+ZL*ZL
2204 GMAX = MAX( GMAX, GAMMA )
2205 GMIN = MIN( GMIN, GAMMA )
2206 RHS1 = RHS2 - DELTA * ZL
2207 RHS2 = - EPSLN * ZL
2208 TOLN=TOLS2*ANORM2*XNORM2
2209 OLDB2 = L_B2
2210 OLDB = L_B
2211
2212!$OMP end single
2213 ELSEIF (ITOL==4) THEN
2214 TMP=ALPHA*ALPHA*ABS(G1)
2215 IF (IT>=ND) THEN
2216 DO J=1,ND-1
2217 EPS(J)=EPS(J+1)
2218 ENDDO
2219 EPS(ND)=TMP
2220 R2=ZERO
2221 DO J=1,ND
2222 R2=R2+EPS(J)
2223 ENDDO
2224 ELSE
2225 EPS(IT+1)=TMP
2226 R2=R2+TMP
2227 ENDIF
2228 ELSE
2229 R2 =ABS(G1)
2230 ENDIF
2231
2232!$OMP single
2233.AND. IF(IPRI/=0ITASK==0)THEN
2234 IF(MOD(IT,IP)==0)THEN
2235 RR = SQRT(R2/R02)
2236 WRITE(IOUT,1001)IT,RR
2237 IF(IPRI<0) WRITE(ISTDO,1001)IT,RR
2238 ENDIF
2239 ENDIF
2240
2241
2242
2243
2244 IF (IT>=NLIM) THEN
2245 ISTOP = 0
2246 ELSE
2247 IF (R2<=TOLN) THEN
2248 ISTOP = 0
2249 ELSE
2250 ISTOP = 1
2251 ENDIF
2252 ENDIF
2253
2254
2255
2256 IT = IT +1
2257
2258!$OMP end single
2259 ENDDO
2260 2060 CONTINUE
2261!$OMP single
2262 tt=OMP_GET_WTIME()-tt
2263 i= omp_get_num_threads()
2264 print *,' '
2265 print *,' '
2266 print *,' execution time on mic with',i,' threads'
2267 print *,' ==> ',tt,' seconds'
2268 print *,' '
2269!$OMP END single
2270
2271!$OMP END PARALLEL
2272 WRITE(IOUT,1001)IT-1,RR
2273 END IF ! fin NSPMD ==1
2274 TF=OMP_GET_WTIME()
2275!dec$ omp offload target(mic:MICID) out(X,R:alloc_if(.false.))
2276!$OMP PARALLEL
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286!$OMP END PARALLEL
2287 TF=OMP_GET_WTIME()-TF
2288 print *,'transfer time mic=>cpu(s) =',TF
2289 print *,'transfer rate mic=>cpu(mb/s)=',
2290 . ((2*NDDL/(1024*1024))*8)/TF
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301 END IF ! fin GPUR4R8==2 (MIC double precision)
2302
2303 END IF !only task0
2304
2305#endif
2306
2307
2308 IF(ISAVE==1)CALL IMP_RSAVE(NDDL,X,R)
2309
2310
2311
2312
2313 IF(ITASK == 0) THEN
2314#include "lockon.inc"
2315 ISTOP_H = ISTOP
2316#include "lockoff.inc"
2317 CALL PUPD(N_MAX,NDDLI_G,IT)
2318
2319 IF(IT>=NLIM)THEN
2320 IF (ISOLV==7) THEN
2321#include "lockon.inc"
2322 ISTOP_H = 3
2323#include "lockoff.inc"
2324 ELSE
2325#include "lockon.inc"
2326 ISTOP_H = 1
2327#include "lockoff.inc"
2328
2329.NOT. IF (MIXEDSOL()) THEN
2330 WRITE(IOUT,*)
2331 WRITE(IOUT,1003)NLIM
2332 WRITE(IOUT,*)
2333 WRITE(ISTDO,*)
2334 WRITE(ISTDO,1003)NLIM
2335 WRITE(ISTDO,*)
2336 IF(ITOL==3)THEN
2337 WRITE(IOUT,2000)
2338 . EXIT, SQRT(ANORM2), SQRT(XNORM2),GMAX/GMIN
2339 WRITE(ISTDO,2000)
2340 . EXIT, SQRT(ANORM2), SQRT(XNORM2),GMAX/GMIN
2341 ENDIF
2342
2343 IF (R2>HUNDRED*TOLN) THEN
2344 ISTOP_H = 2
2345 RR = SQRT(R2/R02)
2346 WRITE(IOUT,*)
2347 WRITE(IOUT,1004)RR
2348 WRITE(IOUT,*)
2349 WRITE(ISTDO,*)
2350 WRITE(ISTDO,1004)RR
2351 WRITE(ISTDO,*)
2352 ENDIF
2353.NOT. END IF !((MIXEDSOL)) THEN
2354
2355 END IF !(ISOLV==7) THEN
2356 ENDIF
2357 IF(IPRI/=0)THEN
2358 RR = SQRT(R2/R02)
2359 WRITE(IOUT,*)
2360 WRITE(IOUT,1002)IT,RR
2361 IF(ITOL==3)WRITE(IOUT,2000)
2362 . EXIT, SQRT(ANORM2), SQRT(XNORM2),GMAX/GMIN
2363 WRITE(IOUT,*)
2364 IF(IPRI<0) THEN
2365 WRITE(ISTDO,*)
2366 WRITE(ISTDO,1002)IT,RR
2367 IF(ITOL==3)WRITE(ISTDO,2000)
2368 . EXIT, SQRT(ANORM2), SQRT(XNORM2),GMAX/GMIN
2369 WRITE(ISTDO,*)
2370 ENDIF
2371 ENDIF
2372 IF(ITOL==3) THEN
2373 K_LAMDA0= GMIN
2374 K_LAMDA1= GMAX
2375 ELSE
2376 K_LAMDA0= ZERO
2377 K_LAMDA1= ZERO
2378 ENDIF
2379 END IF !(ITASK == 0) THEN
2380
2381 CALL MY_BARRIER
2382
2383#include "lockon.inc"
2384 ISTOP = ISTOP_H
2385#include "lockoff.inc"
2386
2387 CALL MY_BARRIER
2388
2389 1000 FORMAT(5X,'iteration=
',I8,5X,' initial residual
norm=
',E11.4)
2390 1001 FORMAT(5X,'iteration=
',I8,5X,' relative residual
norm=
',E11.4)
2391 1002 FORMAT(3X,'total c.g. iteration=',I8,5X,
2392 . ' relative residual
norm=
',E11.4)
2393 1003 FORMAT(5X,
2394 . '---warning :
the iteration limit number was reached
',I8)
2395 1004 FORMAT(5X,'warning:c.g stop with relative residual
norm=
',E11.4)
2396 2000 FORMAT(/ 5X, A, 2X, 'anorm =', E12.4, 2X, 'xnorm =', E12.4,2X,
2397 . 'kcond =', E12.4)
2398 2002 FORMAT(/ 5X, 'with', 2X, 'alfa =', E12.4, 2X, 'beta =',
2399 . E12.4,2X,'oldb =', E12.4)
2400 RETURN
2401#endif
end diagonal values have been computed in the(sparse) matrix id.SOL
norm(diag(diag(diag(inv(mat))) -id.SOL), 2) % destroy mumps instance id.JOB
logical function mixedsol()
subroutine imp_isave(nddl, x, r, diag_k, diag_m, nnz, lt_k, lt_k0, lt_m, lt_m0, iadk, jdik, iadk0, jdik0, iadm, jdim, iadm0, jdim0, nnzm, tols, nlim, itol, eps_m, iprec)
subroutine nlim_mix(nddl, nddli, nlim)
subroutine imp_isave2(nddl, x, diag_k, nnz, lt_k, iadk, jdik, lt_k0, iadk0, jdik0)
integer, dimension(:), allocatable nd_fr
integer, dimension(:), allocatable jdi_si
integer, dimension(:), allocatable iddl_sl
integer, dimension(:), allocatable iad_ss
integer, dimension(:), allocatable iad_si
integer, dimension(:), allocatable jdi_sl
integer, dimension(:), allocatable iad_sl
integer, dimension(:), allocatable iad_srem
integer, dimension(:), allocatable jdik0
integer, dimension(:), allocatable iadi0
integer, dimension(:), allocatable jdim0
integer, dimension(:), allocatable iadk0
integer, dimension(:), allocatable iadm0
subroutine prec_solvh(iprec, itask, graphe, iad_elem, fr_elem, diag_k, lt_k, iadk, jdik, itab, iprint, insolv, it, fac_k, ipiv_k, nk, mumps_par, cddlp, isolv, idsc, iddl, ikc, inloc, ndof, nddl, nnz, iadm, jdim, diag_m, lt_m, v, z, f_ddl, l_ddl)
subroutine mav_lth(nddl, nddli, iadl, jdil, diag_k, lt_k, iadi, jdii, itok, lt_i, v, w, a, ar, ve, ms, x, d, dr, ndof, ipari, intbuf_tab, num_imp, ns_imp, ne_imp, nsrem, nsl, ibfv, skew, xframe, monvol, volmon, igrsurf, fr_mv, nmonv, imonv, index2, xi_c, iupd, irbe3, lrbe3, irbe2, lrbe2, f_ddl, l_ddl, itask)
subroutine ancmsg(msgid, msgtype, anmode, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15, i16, i17, i18, i19, i20, r1, r2, r3, r4, r5, r6, r7, r8, r9, c1, c2, c3, c4, c5, c6, c7, c8, c9, prmode)