16 & MYID_NODES, COMM_NODES,
17 & NRHS_COL, NRHS_loc, LRHS_loc,
19 & IRHS_loc, RHS_loc, RHS_loc_size,
20 & RHSCOMP, LD_RHSCOMP,
21 & POSINRHSCOMP_FWD, NB_FS_IN_RHSCOMP,
22 & LSCAL, scaling_data_dr,
23 & LP, LPOK, KEEP, NB_BYTES_LOC, INFO )
27 INTEGER,
INTENT(IN) :: , N, MYID_NODES
28 INTEGER,
INTENT(IN) :: NRHS_loc, LRHS_loc
29 INTEGER,
INTENT(IN) :: NRHS_COL
30 INTEGER,
INTENT(IN) :: COMM_NODES
31 INTEGER,
INTENT(IN) :: ((1,NRHS_loc))
32 INTEGER,
INTENT(IN) :: IRHS_loc(NRHS_loc)
33 INTEGER(8),
INTENT(IN) :: RHS_loc_size
34 DOUBLE PRECISION,
INTENT(IN) :: RHS_loc(RHS_loc_size)
35 INTEGER,
INTENT(IN) :: NB_FS_IN_RHSCOMP, LD_RHSCOMP
36 INTEGER,
INTENT(IN) :: POSINRHSCOMP_FWD(N)
37 DOUBLE PRECISION,
INTENT(OUT) :: RHSCOMP(LD_RHSCOMP, NRHS_COL)
39 LOGICAL,
INTENT(IN) :: LSCAL
42 DOUBLE PRECISION,
dimension(:),
pointer :: SCALING
43 DOUBLE PRECISION,
dimension(:),
pointer :: SCALING_LOC
44 end type scaling_data_t
45 type(scaling_data_t),
INTENT(IN) :: scaling_data_dr
46 LOGICAL,
INTENT(IN) ::
47 INTEGER,
INTENT(IN) :: LP
48 INTEGER,
INTENT(INOUT) :: INFO(2)
49 INTEGER(8),
INTENT(OUT):: NB_BYTES_LOC
57 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: NBROWSTOSEND
58 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: NEXTROWTOSEND
59 DOUBLE PRECISION,
ALLOCATABLE,
DIMENSION(:,:) :: BUFR
60 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: BUFRECI
61 DOUBLE PRECISION,
ALLOCATABLE,
DIMENSION(:) :: BUFRECR
62 LOGICAL,
ALLOCATABLE,
DIMENSION(:) :: IS_SEND_ACTIVE, TOUCHED
63 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: MPI_REQI, MPI_REQR
64 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: IRHS_loc_sorted
66 INTEGER :: Iloc_sorted
68 INTEGER :: IMAP, IPROC_MAX
70 INTEGER :: MAX_ACTIVE_SENDS
71 INTEGER :: NB_ACTIVE_SENDS
72 INTEGER :: NB_FS_TOUCHED
73 INTEGER :: NBROWSTORECV
74 DOUBLE PRECISION,
PARAMETER :: ZERO = 0.0d0
77 ALLOCATE( nbrowstosend(nslaves),
78 & nextrowtosend(nslaves),
79 & irhs_loc_sorted(nrhs_loc),
83 info(2) = nslaves+nslaves+nrhs_loc
85 nb_bytes_loc = int(2*nslaves+nrhs_loc,8)*keep(34)
87 & mpi_integer, mpi_sum,
88 & comm_nodes, ierr_mpi )
89 IF (allocok .GT. 0)
RETURN
90 nbrowstosend(1:nslaves) = 0
92 IF (irhs_loc(iloc) .GE. 1 .AND.
93 & irhs_loc(iloc) .LE. n)
THEN
94 imap = map_rhs_loc(iloc)
95 nbrowstosend(imap+1) = nbrowstosend(imap+1)+1
100 nextrowtosend(imap+1)=nextrowtosend(imap)+nbrowstosend(imap)
103 DO iloc = 1, nrhs_loc
104 IF (irhs_loc(iloc) .GE. 1 .AND.
105 & irhs_loc(iloc) .LE. n)
THEN
107 iloc_sorted = nextrowtosend(imap+1)+nbrowstosend(imap+1)
108 irhs_loc_sorted(iloc_sorted) = iloc
109 nbrowstosend(imap+1)=nbrowstosend(imap+1)+1
113 max_active_sends =
min(10, nslaves)
114 IF (keep(72) .EQ.1 )
THEN
117 maxrecords =
min(200000,2000000/nrhs_col)
118 maxrecords =
min(maxrecords,
119 & 50000000 / max_active_sends / nrhs_col)
120 maxrecords =
max(maxrecords, 50)
122 ALLOCATE(bufr(maxrecords*nrhs_col,
124 & mpi_reqi(max_active_sends),
125 & mpi_reqr(max_active_sends),
126 & is_send_active(max_active_sends),
127 & bufreci(maxrecords),
128 & bufrecr(maxrecords*nrhs_col),
129 & touched(nb_fs_in_rhscomp),
131 IF (allocok .GT. 0)
THEN
132 IF (lp .GT. 0)
WRITE(lp,
'(A)')
133 &
'Error: Allocation problem in DMUMPS_SCATTER_DIST_RHS'
135 info(2)=nrhs_col*maxrecords*max_active_sends+
136 & 3*max_active_sends+maxrecords
139 nb_bytes_loc=nb_bytes_loc +
140 & keep(34) * ( int(2*max_active_sends,8) + int(maxrecords,8) ) +
141 & keep(34) * (int(max_active_sends,8) + int(nb_fs_in_rhscomp,8)) +
143 & int( maxrecords,8)*int(nrhs_col,8)*int(max_active_sends,8)
144 & + int(maxrecords,8) * int(nrhs_col,8) )
146 & mpi_integer, mpi_sum,
147 & comm_nodes, ierr_mpi )
148 IF (allocok .NE. 0)
RETURN
150 DO ireq = 1, max_active_sends
151 is_send_active(ireq) = .false.
154 DO ifs = 1, nb_fs_in_rhscomp
155 touched(ifs) = .false.
157 iproc_max=maxloc(nbrowstosend,dim=1)-1
158 DO WHILE (nbrowstosend(iproc_max+1) .NE. 0)
159 IF (iproc_max .EQ. myid_nodes)
THEN
166 iproc_max=maxloc(nbrowstosend,dim=1)-1
168 DO WHILE ( nbrowstorecv .NE. 0)
172 DO WHILE (nb_active_sends .NE. 0)
180 DO iproc = 0, nslaves-1
181 CALL mpi_reduce( nbrowstosend(iproc+1), nbrowstorecv,
183 & mpi_sum, iproc, comm_nodes, ierr_mpi )
188 include
'mumps_tags.h'
189 INTEGER :: MPI_STATUS(MPI_STATUS_SIZE), MSGSOU
192 CALL MPI_IPROBE( MPI_ANY_SOURCE, DistRhsI, COMM_NODES,
193 & flag, mpi_status, ierr_mpi )
195 msgsou = mpi_status( mpi_source )
197 & nbrecords, ierr_mpi)
198 CALL mpi_recv(bufreci(1), nbrecords, mpi_integer,
200 & comm_nodes, mpi_status, ierr_mpi)
201 CALL mpi_recv(bufrecr(1), nbrecords*nrhs_col,
202 & mpi_double_precision,
204 & comm_nodes, mpi_status, ierr_mpi)
211 & (nbrecords, bufreci_arg, bufrecr_arg)
213 INTEGER,
INTENT(IN) :: NBRECORDS
214 INTEGER,
INTENT(INOUT) :: BUFRECI_ARG(NBRECORDS)
215 DOUBLE PRECISION,
INTENT(IN) :: BUFRECR_ARG(NBRECORDS,
217 INTEGER :: I, K, IRHSCOMP, IFIRSTNOTTOUCHED, ILASTNOTTOUCHED
218 ifirstnottouched = nbrecords+1
221 IF (bufreci(i) .LE. 0)
THEN
222 WRITE(*,*)
"Internal error 1 in DMUMPS_DR_TRY_RECV",
223 & i, bufreci(i), bufreci(1)
226 irhscomp=posinrhscomp_fwd(bufreci(i))
227 bufreci_arg(i)=irhscomp
228 IF ( .NOT. touched(irhscomp) )
THEN
229 ifirstnottouched=
min(ifirstnottouched,i)
230 ilastnottouched=
max(ilastnottouched,i)
239 DO i = ifirstnottouched, ilastnottouched
240 irhscomp=bufreci_arg(i)
241 IF (.NOT. touched(irhscomp))
THEN
242 rhscomp(irhscomp,k)=zero
246 irhscomp=bufreci_arg(i)
247 rhscomp(irhscomp,k) = rhscomp(irhscomp,k) +
254 DO i = ifirstnottouched, ilastnottouched
255 irhscomp=bufreci_arg(i)
256 IF (.NOT. touched(irhscomp))
THEN
257 rhscomp(irhscomp,k)=zero
261 irhscomp=bufreci_arg(i)
262 rhscomp(irhscomp,k) = rhscomp(irhscomp,k) +
268 irhscomp = bufreci_arg(i)
269 IF (.NOT. touched(irhscomp))
THEN
270 nb_fs_touched = nb_fs_touched + 1
271 touched(irhscomp) = .true.
274 nbrowstorecv = nbrowstorecv - nbrecords
278 INTEGER :: NBRECORDS, I, K, IFIRSTNOTTOUCHED
283 IF ( nbrowstosend(myid_nodes+1) .EQ. 0)
THEN
284 WRITE(*,*)
"Internal error in DMUMPS_DR_ASSEMBLE_LOCAL"
287 nbrecords=
min(maxrecords, nbrowstosend(myid_nodes+1))
288 ifirstnottouched=nbrecords+1
290 irhscomp = posinrhscomp_fwd(irhs_loc(
291 & irhs_loc_sorted(nextrowtosend(myid_nodes+1)+i-1)))
292 IF (.NOT. touched(irhscomp))
THEN
303 ishift = (k-1) * lrhs_loc
304 DO i = ifirstnottouched, nbrecords
305 irhscomp = posinrhscomp_fwd(irhs_loc(
306 & irhs_loc_sorted(nextrowtosend(myid_nodes+1)+i-1)))
307 IF (.NOT. touched(irhscomp))
THEN
308 rhscomp(irhscomp,k)=zero
312 iloc = irhs_loc_sorted(nextrowtosend(myid_nodes+1)+i-1)
313 iglob = irhs_loc(iloc)
314 irhscomp = posinrhscomp_fwd(iglob)
315 rhscomp(irhscomp,k) = rhscomp(irhscomp,k)+
316 & rhs_loc(iloc+ishift)*
317 & scaling_data_dr%SCALING_LOC(iloc)
325!$omp& firstprivate(nbrecords)
IF (omp_flag)
327 ishift = (k-1) * lrhs_loc
328 DO i = ifirstnottouched, nbrecords
329 irhscomp = posinrhscomp_fwd(irhs_loc(
330 & irhs_loc_sorted(nextrowtosend(myid_nodes+1)+i-1)))
331 IF (.NOT. touched(irhscomp))
THEN
332 rhscomp(irhscomp,k)=zero
336 iloc = irhs_loc_sorted(nextrowtosend(myid_nodes+1)+i-1)
337 iglob = irhs_loc(iloc)
338 irhscomp = posinrhscomp_fwd(iglob)
339 rhscomp(irhscomp,k) = rhscomp(irhscomp,k)+
340 & rhs_loc(iloc+ishift)
346 irhscomp = posinrhscomp_fwd(irhs_loc(
347 & irhs_loc_sorted(nextrowtosend(myid_nodes+1)+i-1)))
348 IF (.NOT. touched(irhscomp))
THEN
349 nb_fs_touched = nb_fs_touched + 1
350 touched(irhscomp) = .true.
353 nextrowtosend(myid_nodes+1)=nextrowtosend(myid_nodes+1)+
355 nbrowstosend(myid_nodes+1)=nbrowstosend(myid_nodes+1)-
357 nbrowstorecv = nbrowstorecv - nbrecords
361 INTEGER,
INTENT(OUT) :: IBUF
364 IF (nb_active_sends .NE. max_active_sends)
THEN
365 DO i=1, max_active_sends
366 IF (.NOT. is_send_active(i))
THEN
375 INTEGER :: MPI_STATUS(MPI_STATUS_SIZE)
378 IF (nb_active_sends .GT. 0)
THEN
379 DO i=1, max_active_sends
380 IF (is_send_active(i))
THEN
381 CALL mpi_test( mpi_reqr(i), flag, mpi_status, ierr_mpi )
383 CALL mpi_wait(mpi_reqi(i), mpi_status, ierr_mpi)
384 nb_active_sends = nb_active_sends - 1
385 is_send_active(i)=.false.
386 IF (nb_active_sends .EQ. 0)
THEN
397 INTEGER,
INTENT(IN) :: IPROC_ARG
398 include
'mumps_tags.h'
399 INTEGER :: NBRECORDS, IBUF, I, K
400 INTEGER(8) :: IPOSRHS
402 IF (iproc_arg .EQ. myid_nodes)
THEN
403 WRITE(*,*)
"Internal error 1 in DMUMPS_DR_TRY_SEND"
406 IF (nbrowstosend(iproc_arg+1) .EQ. 0)
THEN
410 CALL DMUMPS_DR_GET_NEW_BUF(IBUF)
412 NBRECORDS = min(MAXRECORDS,NBROWSTOSEND(IPROC_ARG+1))
414!$ CHUNK = NRHS_COL*NBRECORDS
415.GE.
!$ IF (CHUNK KEEP(363)) THEN
417!$ CHUNK = max((CHUNK+NOMP-1)/NOMP,KEEP(363)/2)
420!$OMP PARALLEL DO COLLAPSE(2) SCHEDULE(STATIC,CHUNK)
421!$OMP& PRIVATE(K, I, IPOSBUF, IPOSRHS, Iloc) IF (OMP_FLAG)
424 IPOSBUF = (K-1)*NBRECORDS
425 IPOSRHS = int(K-1,8)*int(LRHS_loc,8)
426 Iloc = IRHS_loc_sorted(NEXTROWTOSEND(IPROC_ARG+1)+I-1)
427 BUFR( IPOSBUF + I, IBUF )
428 & = RHS_loc( IPOSRHS + Iloc ) *
429 & scaling_data_dr%SCALING_LOC(Iloc)
434!$OMP PARALLEL DO COLLAPSE(2) SCHEDULE(STATIC,CHUNK)
435!$OMP& PRIVATE(K, I, IPOSBUF, IPOSRHS, Iloc) IF (OMP_FLAG)
438 IPOSBUF = (K-1)*NBRECORDS
439 IPOSRHS = int(K-1,8)*int(LRHS_loc,8)
440 Iloc = IRHS_loc_sorted(NEXTROWTOSEND(IPROC_ARG+1)+I-1)
441 BUFR( IPOSBUF + I, IBUF )
442 & = RHS_loc( IPOSRHS + Iloc )
448 Iloc = IRHS_loc_sorted(NEXTROWTOSEND(IPROC_ARG+1)+I-1)
449 IRHS_loc_sorted(NEXTROWTOSEND(IPROC_ARG+1)+I-1)
452 CALL MPI_ISEND( IRHS_loc_sorted(NEXTROWTOSEND(IPROC_ARG+1)),
453 & NBRECORDS, MPI_INTEGER, IPROC_ARG, DistRhsI,
454 & COMM_NODES, MPI_REQI(IBUF), IERR_MPI )
455 CALL MPI_ISEND( BUFR(1,IBUF), NBRECORDS*NRHS_COL,
456 & MPI_DOUBLE_PRECISION,
457 & IPROC_ARG, DistRhsR,
458 & COMM_NODES, MPI_REQR(IBUF), IERR_MPI )
459 NEXTROWTOSEND(IPROC_ARG+1)=NEXTROWTOSEND(IPROC_ARG+1)+
461 NBROWSTOSEND(IPROC_ARG+1)=NBROWSTOSEND(IPROC_ARG+1)-NBRECORDS
462 NB_ACTIVE_SENDS = NB_ACTIVE_SENDS + 1
463 IS_SEND_ACTIVE(IBUF)=.TRUE.
466 END SUBROUTINE DMUMPS_DR_TRY_SEND
467 SUBROUTINE DMUMPS_DR_EMPTY_ROWS()
469.NE.
IF ( NB_FS_TOUCHED NB_FS_IN_RHSCOMP ) THEN
470.GE..AND.
!$ OMP_FLAG = (NRHS_COL KEEP(362))
471!$ & (NRHS_COL*NB_FS_IN_RHSCOMP > KEEP(363)/2)
472!$OMP PARALLEL DO FIRSTPRIVATE(NB_FS_IN_RHSCOMP) IF (OMP_FLAG)
474 DO IFS = 1, NB_FS_IN_RHSCOMP
475.NOT.
IF ( TOUCHED(IFS) ) THEN
476 RHSCOMP( IFS, K) = ZERO
479 DO IFS = NB_FS_IN_RHSCOMP +1, LD_RHSCOMP
480 RHSCOMP (IFS, K) = ZERO
486!$ CHUNK8 = int(NRHS_COL,8)*int(LD_RHSCOMP-NB_FS_IN_RHSCOMP,8)
487!$ CHUNK8 = max(CHUNK8,1_8)
488.GE.
!$ IF (CHUNK8 int(KEEP(363),8)) THEN
490!$ CHUNK8 = max((CHUNK8+NOMP-1)/NOMP,int(KEEP(363)/2,8))
492!$OMP PARALLEL DO COLLAPSE(2) SCHEDULE(STATIC,CHUNK8)
495 DO IFS = NB_FS_IN_RHSCOMP +1, LD_RHSCOMP
496 RHSCOMP (IFS, K) = ZERO
502 END SUBROUTINE DMUMPS_DR_EMPTY_ROWS
subroutine dmumps_scatter_dist_rhs(nslaves, n, myid_nodes, comm_nodes, nrhs_col, nrhs_loc, lrhs_loc, map_rhs_loc, irhs_loc, rhs_loc, rhs_loc_size, rhscomp, ld_rhscomp, posinrhscomp_fwd, nb_fs_in_rhscomp, lscal, scaling_data_dr, lp, lpok, keep, nb_bytes_loc, info)