OpenRadioss 2025.1.11
OpenRadioss project
Loading...
Searching...
No Matches
psgemr.c File Reference
#include "redist.h"
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

Go to the source code of this file.

Data Structures

struct  MDESC
struct  IDESC

Macros

#define static2   static
#define fortran_mr2d   psgemr2do
#define fortran_mr2dnew   psgemr2d
#define scopy_   scopy
#define slacpy_   slacpy
#define Clacpy   Csgelacpy
#define BLOCK_CYCLIC_2D   1
#define SHIFT(row, sprow, nbrow)
#define max(A, B)
#define min(A, B)
#define DIVUP(a, b)
#define ROUNDUP(a, b)
#define scanD0   sgescanD0
#define dispmat   sgedispmat
#define setmemory   sgesetmemory
#define freememory   sgefreememory
#define scan_intervals   sgescan_intervals
#define SENDBUFF   0
#define RECVBUFF   1
#define SIZEBUFF   2
#define NDEBUG
#define DESCLEN   9
#define NBPARAM
#define MAGIC_MAX   100000000
#define Mlacpy(mo, no, ao, ldao, bo, ldbo)

Functions

void Cblacs_pcoord ()
Int Cblacs_pnum ()
void Csetpvmtids ()
void Cblacs_get ()
void Cblacs_pinfo ()
void Cblacs_gridinfo ()
void Cblacs_gridinit ()
void Cblacs_exit ()
void Cblacs_gridexit ()
void Cblacs_setup ()
void Cigebs2d ()
void Cigebr2d ()
void Cigesd2d ()
void Cigerv2d ()
void Cigsum2d ()
void Cigamn2d ()
void Cigamx2d ()
void Csgesd2d ()
void Csgerv2d ()
Int localindice ()
void * mr2d_malloc ()
Int ppcm ()
Int localsize ()
Int memoryblocksize ()
Int changeorigin ()
void paramcheck ()
void Cpsgemr2do ()
void Cpsgemr2d ()
void fortran_mr2d (Int *m, Int *n, float *A, Int *ia, Int *ja, Int desc_A[DESCLEN], float *B, Int *ib, Int *jb, Int desc_B[DESCLEN])
void fortran_mr2dnew (Int *m, Int *n, float *A, Int *ia, Int *ja, Int desc_A[DESCLEN], float *B, Int *ib, Int *jb, Int desc_B[DESCLEN], Int *gcontext)
static2 void init_chenille ()
static2 Int inter_len ()
static2 Int block2buff ()
static2 void buff2block ()
static2 void gridreshape ()
void Cpsgemr2do (Int m, Int n, float *ptrmyblock, Int ia, Int ja, MDESC *ma, float *ptrmynewblock, Int ib, Int jb, MDESC *mb)
void Cpsgemr2d (Int m, Int n, float *ptrmyblock, Int ia, Int ja, MDESC *ma, float *ptrmynewblock, Int ib, Int jb, MDESC *mb, Int globcontext)
static2 void init_chenille (Int mypnum, Int nprocs, Int n0, Int *proc0, Int n1, Int *proc1, Int **psend, Int **precv, Int *myrang)
static2 Int block2buff (IDESC *vi, Int vinb, IDESC *hi, Int hinb, float *ptra, MDESC *ma, float *buff)
static2 void buff2block (IDESC *vi, Int vinb, IDESC *hi, Int hinb, float *buff, float *ptrb, MDESC *mb)
static2 Int inter_len (Int hinb, IDESC *hi, Int vinb, IDESC *vi)
void Clacpy (Int m, Int n, float *a, Int lda, float *b, Int ldb)
static2 void gridreshape (Int *ctxtp)

Macro Definition Documentation

◆ BLOCK_CYCLIC_2D

#define BLOCK_CYCLIC_2D   1

Definition at line 171 of file psgemr.c.

◆ Clacpy

#define Clacpy   Csgelacpy

Definition at line 158 of file psgemr.c.

◆ DESCLEN

#define DESCLEN   9

Definition at line 242 of file psgemr.c.

◆ dispmat

#define dispmat   sgedispmat

Definition at line 218 of file psgemr.c.

◆ DIVUP

#define DIVUP ( a,
b )
Value:
( ((a)-1) /(b)+1)

Definition at line 179 of file psgemr.c.

◆ fortran_mr2d

#define fortran_mr2d   psgemr2do

Definition at line 153 of file psgemr.c.

◆ fortran_mr2dnew

#define fortran_mr2dnew   psgemr2d

Definition at line 154 of file psgemr.c.

◆ freememory

#define freememory   sgefreememory

Definition at line 220 of file psgemr.c.

◆ MAGIC_MAX

#define MAGIC_MAX   100000000

Definition at line 286 of file psgemr.c.

◆ max

#define max ( A,
B )
Value:
((A)>(B)?(A):(B))

Definition at line 177 of file psgemr.c.

◆ min

#define min ( A,
B )
Value:
((A)>(B)?(B):(A))

Definition at line 178 of file psgemr.c.

◆ Mlacpy

#define Mlacpy ( mo,
no,
ao,
ldao,
bo,
ldbo )
Value:
{ \
float *_a,*_b; \
Int _m,_n,_lda,_ldb; \
Int _i,_j; \
_m = (mo);_n = (no); \
_a = (ao);_b = (bo); \
_lda = (ldao) - _m; \
_ldb = (ldbo) - _m; \
assert(_lda >= 0 && _ldb >= 0); \
for (_j=0;_j<_n;_j++) { \
for (_i=0;_i<_m;_i++) \
*_b++ = *_a++; \
_b += _ldb; \
_a += _lda; \
} \
}
#define Int
Definition Bconfig.h:22

Definition at line 621 of file psgemr.c.

621}
622#define Mlacpy(mo,no,ao,ldao,bo,ldbo) \
623{ \
624float *_a,*_b; \
625Int _m,_n,_lda,_ldb; \
626 Int _i,_j; \
627 _m = (mo);_n = (no); \
628 _a = (ao);_b = (bo); \
629 _lda = (ldao) - _m; \
630 _ldb = (ldbo) - _m; \
631 assert(_lda >= 0 && _ldb >= 0); \
632 for (_j=0;_j<_n;_j++) { \
633 for (_i=0;_i<_m;_i++) \
634 *_b++ = *_a++; \
635 _b += _ldb; \
636 _a += _lda; \
637 } \

◆ NBPARAM

#define NBPARAM
Value:
20 /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis
* idem B puis ia,ja puis ib,jb */

Definition at line 285 of file psgemr.c.

◆ NDEBUG

#define NDEBUG

Definition at line 237 of file psgemr.c.

◆ RECVBUFF

#define RECVBUFF   1

Definition at line 231 of file psgemr.c.

◆ ROUNDUP

#define ROUNDUP ( a,
b )
Value:
(DIVUP(a,b)*(b))
#define DIVUP(a, b)
Definition pcgemr.c:182

Definition at line 180 of file psgemr.c.

◆ scan_intervals

#define scan_intervals   sgescan_intervals

Definition at line 221 of file psgemr.c.

◆ scanD0

#define scanD0   sgescanD0

Definition at line 217 of file psgemr.c.

◆ scopy_

#define scopy_   scopy

Definition at line 155 of file psgemr.c.

◆ SENDBUFF

#define SENDBUFF   0

Definition at line 230 of file psgemr.c.

◆ setmemory

#define setmemory   sgesetmemory

Definition at line 219 of file psgemr.c.

◆ SHIFT

#define SHIFT ( row,
sprow,
nbrow )
Value:
((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))

Definition at line 176 of file psgemr.c.

◆ SIZEBUFF

#define SIZEBUFF   2

Definition at line 232 of file psgemr.c.

◆ slacpy_

void slacpy_   slacpy

Definition at line 156 of file psgemr.c.

◆ static2

#define static2   static

Id
psgemr.c,v 1.1.1.1 2000/02/15 18:04:09 susan Exp

– ScaLAPACK routine (version 1.7) – Oak Ridge National Laboratory, Univ. of Tennessee, and Univ. of California, Berkeley. October 31, 1994.

SUBROUTINE PSGEMR2D( M, N, $ A, IA, JA, ADESC, $ B, IB, JB, BDESC,

$ CTXT)

Purpose

PSGEMR2D copies a submatrix of A on a submatrix of B. A and B can have different distributions: they can be on different processor grids, they can have different blocksizes, the beginning of the area to be copied can be at a different places on A and B.

The parameters can be confusing when the grids of A and B are partially or completly disjoint, in the case a processor calls this routines but is either not in the A context or B context, the ADESC[CTXT] or BDESC[CTXT] must be equal to -1, to ensure the routine recognise this situation. To summarize the rule:

  • If a processor is in A context, all parameters related to A must be valid.
  • If a processor is in B context, all parameters related to B must be valid.
  • ADESC[CTXT] and BDESC[CTXT] must be either valid contexts or equal to -1.
  • M and N must be valid for everyone.
  • other parameters are not examined.

Notes

A description vector is associated with each 2D block-cyclicly dis- tributed matrix. This vector stores the information required to establish the mapping between a matrix entry and its corresponding process and memory location.

In the following comments, the character _ should be read as "of the distributed matrix". Let A be a generic term for any 2D block cyclicly distributed matrix. Its description vector is DESC_A:

NOTATION STORED IN EXPLANATION


DT_A (global) DESCA( DT_ ) The descriptor type. CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating the BLACS process grid A is distribu- ted over. The context itself is glo- bal, but the handle (the integer value) may vary. M_A (global) DESCA( M_ ) The number of rows in the distributed matrix A. N_A (global) DESCA( N_ ) The number of columns in the distri- buted matrix A. MB_A (global) DESCA( MB_ ) The blocking factor used to distribute the rows of A. NB_A (global) DESCA( NB_ ) The blocking factor used to distribute the columns of A. RSRC_A (global) DESCA( RSRC_ ) The process row over which the first row of the matrix A is distributed. CSRC_A (global) DESCA( CSRC_ ) The process column over which the first column of A is distributed. LLD_A (local) DESCA( LLD_ ) The leading dimension of the local array storing the local blocks of the distributed matrix A. LLD_A >= MAX(1,LOCp(M_A)).

Important notice

The parameters of the routine have changed in April 1996 There is a new last argument. It must be a context englobing all processors involved in the initial and final distribution.

Be aware that all processors included in this context must call the redistribution routine.

Parameters

M (input) INTEGER. On entry, M specifies the number of rows of the submatrix to be copied. M must be at least zero. Unchanged on exit.

N (input) INTEGER. On entry, N specifies the number of cols of the submatrix to be redistributed.rows of B. M must be at least zero. Unchanged on exit.

A (input) REAL On entry, the source matrix. Unchanged on exit.

IA,JA (input) INTEGER On entry,the coordinates of the beginning of the submatrix of A to copy. 1 <= IA <= M_A - M + 1,1 <= JA <= N_A - N + 1, Unchanged on exit.

ADESC (input) A description vector (see Notes above) If the current processor is not part of the context of A the ADESC[CTXT] must be equal to -1.

B (output) REAL On entry, the destination matrix. The portion corresponding to the defined submatrix are updated.

IB,JB (input) INTEGER On entry,the coordinates of the beginning of the submatrix of B that will be updated. 1 <= IB <= M_B - M + 1,1 <= JB <= N_B - N + 1, Unchanged on exit.

BDESC (input) B description vector (see Notes above) For processors not part of the context of B BDESC[CTXT] must be equal to -1.

CTXT (input) a context englobing at least all processors included in either A context or B context

Memory requirement :

for the processors belonging to grid 0, one buffer of size block 0 and for the processors belonging to grid 1, also one buffer of size block 1.


Created March 1993 by B. Tourancheau (See sccs for modifications).

Modifications by Loic PRYLLI 1995

Definition at line 143 of file psgemr.c.

Function Documentation

◆ block2buff() [1/2]

static2 Int block2buff ( )

◆ block2buff() [2/2]

static2 Int block2buff ( IDESC * vi,
Int vinb,
IDESC * hi,
Int hinb,
float * ptra,
MDESC * ma,
float * buff )

Definition at line 639 of file psgemr.c.

641{
642 Int h, v, sizebuff;
643 float *ptr2;
644 sizebuff = 0;
645 for (h = 0; h < hinb; h++) {
646 ptr2 = ptra + hi[h].lstart * ma->lda;
647 for (v = 0; v < vinb; v++) {
648 Mlacpy(vi[v].len, hi[h].len,
649 ptr2 + vi[v].lstart,
650 ma->lda,
651 buff + sizebuff, vi[v].len);
652 sizebuff += hi[h].len * vi[v].len;
653 }
654 }
655 return sizebuff;
#define Mlacpy(mo, no, ao, ldao, bo, ldbo)
Definition psgemr.c:621
Int lstart
Definition pcgemr.c:176
Int len
Definition pcgemr.c:177
Int lda
Definition pcgemr.c:172

◆ buff2block() [1/2]

static2 void buff2block ( )

◆ buff2block() [2/2]

static2 void buff2block ( IDESC * vi,
Int vinb,
IDESC * hi,
Int hinb,
float * buff,
float * ptrb,
MDESC * mb )

Definition at line 657 of file psgemr.c.

659{
660 Int h, v, sizebuff;
661 float *ptr2;
662 sizebuff = 0;
663 for (h = 0; h < hinb; h++) {
664 ptr2 = ptrb + hi[h].lstart * mb->lda;
665 for (v = 0; v < vinb; v++) {
666 Mlacpy(vi[v].len, hi[h].len,
667 buff + sizebuff, vi[v].len,
668 ptr2 + vi[v].lstart,
669 mb->lda);
670 sizebuff += hi[h].len * vi[v].len;
671 }
672 }

◆ Cblacs_exit()

void Cblacs_exit ( )
extern

◆ Cblacs_get()

void Cblacs_get ( )
extern

◆ Cblacs_gridexit()

void Cblacs_gridexit ( )
extern

◆ Cblacs_gridinfo()

void Cblacs_gridinfo ( )
extern

◆ Cblacs_gridinit()

void Cblacs_gridinit ( )
extern

◆ Cblacs_pcoord()

void Cblacs_pcoord ( )
extern

◆ Cblacs_pinfo()

void Cblacs_pinfo ( )
extern

◆ Cblacs_pnum()

Int Cblacs_pnum ( )
extern

◆ Cblacs_setup()

void Cblacs_setup ( )
extern

◆ changeorigin()

Int changeorigin ( )
extern

◆ Cigamn2d()

void Cigamn2d ( )
extern

◆ Cigamx2d()

void Cigamx2d ( )
extern

◆ Cigebr2d()

void Cigebr2d ( )
extern

◆ Cigebs2d()

void Cigebs2d ( )
extern

◆ Cigerv2d()

void Cigerv2d ( )
extern

◆ Cigesd2d()

void Cigesd2d ( )
extern

◆ Cigsum2d()

void Cigsum2d ( )
extern

◆ Clacpy()

void Clacpy ( Int m,
Int n,
float * a,
Int lda,
float * b,
Int ldb )

Definition at line 686 of file psgemr.c.

688{
689 Int i, j;
690 lda -= m;
691 ldb -= m;
692 assert(lda >= 0 && ldb >= 0);
693 for (j = 0; j < n; j++) {
694 for (i = 0; i < m; i++)
695 *b++ = *a++;
696 b += ldb;
697 a += lda;
698 }
n

◆ Cpsgemr2d() [1/2]

void Cpsgemr2d ( )
extern

◆ Cpsgemr2d() [2/2]

void Cpsgemr2d ( Int m,
Int n,
float * ptrmyblock,
Int ia,
Int ja,
MDESC * ma,
float * ptrmynewblock,
Int ib,
Int jb,
MDESC * mb,
Int globcontext )

Definition at line 288 of file psgemr.c.

297{
298 float *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0;
299 float *recvptr;
300 MDESC newa, newb;
301 Int *proc0, *proc1, *param;
302 Int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs;
303 Int i, j;
304 Int nprow, npcol, gcontext;
305 Int recvsize, sendsize;
306 IDESC *h_inter; /* to store the horizontal intersections */
307 IDESC *v_inter; /* to store the vertical intersections */
308 Int hinter_nb, vinter_nb; /* number of intrsections in both directions */
309 Int dummy;
310 Int p0, q0, p1, q1;
311 Int *ra, *ca;
312 /* end of variables */
313 /* To simplify further calcul we change the matrix indexation from
314 * 1..m,1..n (fortran) to 0..m-1,0..n-1 */
315 if (m == 0 || n == 0)
316 return;
317 ia -= 1;
318 ja -= 1;
319 ib -= 1;
320 jb -= 1;
321 Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum);
322 gcontext = globcontext;
323 nprocs = nprow * npcol;
324 /* if the global context that is given to us has not the shape of a line
325 * (nprow != 1), create a new context. TODO: to be optimal, we should
326 * avoid this because it is an uncessary synchronisation */
327 if (nprow != 1) {
328 gridreshape(&gcontext);
329 Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum);
330 }
331 Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0);
332 /* compatibility T3D, must check myprow and mypcol are within bounds */
333 if (myprow0 >= p0 || mypcol0 >= q0)
334 myprow0 = mypcol0 = -1;
335 assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
336 Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1);
337 if (myprow1 >= p1 || mypcol1 >= q1)
338 myprow1 = mypcol1 = -1;
339 assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
340 /* exchange the missing parameters among the processors: shape of grids and
341 * location of the processors */
342 param = (Int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(Int));
343 ra = param + nprocs * 2 + NBPARAM;
344 ca = param + (nprocs * 2 + NBPARAM) * 2;
345 for (i = 0; i < nprocs * 2 + NBPARAM; i++)
346 param[i] = MAGIC_MAX;
347 proc0 = param + NBPARAM;
348 proc1 = param + NBPARAM + nprocs;
349 /* we calulate proc0 and proc1 that will give the number of a proc in
350 * respectively a or b in the global context */
351 if (myprow0 >= 0) {
352 proc0[myprow0 * q0 + mypcol0] = mypnum;
353 param[0] = p0;
354 param[1] = q0;
355 param[4] = ma->m;
356 param[5] = ma->n;
357 param[6] = ma->nbrow;
358 param[7] = ma->nbcol;
359 param[8] = ma->sprow;
360 param[9] = ma->spcol;
361 param[10] = ia;
362 param[11] = ja;
363 }
364 if (myprow1 >= 0) {
365 proc1[myprow1 * q1 + mypcol1] = mypnum;
366 param[2] = p1;
367 param[3] = q1;
368 param[12] = mb->m;
369 param[13] = mb->n;
370 param[14] = mb->nbrow;
371 param[15] = mb->nbcol;
372 param[16] = mb->sprow;
373 param[17] = mb->spcol;
374 param[18] = ib;
375 param[19] = jb;
376 }
377 printf("Aproc0 = {%d,%d}\n", proc0[0], proc0[1]);
378 printf("Aproc1 = {%d,%d}\n", proc1[0], proc1[1]);
379 Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, (Int)1, param, 2 * nprocs + NBPARAM,
380 ra, ca, 2 * nprocs + NBPARAM, (Int)-1, (Int)-1);
381 printf("Bproc0 = {%d,%d}\n", proc0[0], proc0[1]);
382 printf("Bproc1 = {%d,%d}\n", proc1[0], proc1[1]);
383 newa = *ma;
384 newb = *mb;
385 ma = &newa;
386 mb = &newb;
387 if (myprow0 == -1) {
388 p0 = param[0];
389 q0 = param[1];
390 ma->m = param[4];
391 ma->n = param[5];
392 ma->nbrow = param[6];
393 ma->nbcol = param[7];
394 ma->sprow = param[8];
395 ma->spcol = param[9];
396 ia = param[10];
397 ja = param[11];
398 }
399 if (myprow1 == -1) {
400 p1 = param[2];
401 q1 = param[3];
402 mb->m = param[12];
403 mb->n = param[13];
404 mb->nbrow = param[14];
405 mb->nbcol = param[15];
406 mb->sprow = param[16];
407 mb->spcol = param[17];
408 ib = param[18];
409 jb = param[19];
410 }
411 for (i = 0; i < NBPARAM; i++) {
412 if (param[i] == MAGIC_MAX) {
413 fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n");
414 exit(1);
415 }
416 }
417#ifndef NDEBUG
418 for (i = 0; i < p0 * q0; i++)
419 assert(proc0[i] >= 0 && proc0[i] < nprocs);
420 for (i = 0; i < p1 * q1; i++)
421 assert(proc1[i] >= 0 && proc1[i] < nprocs);
422#endif
423 /* check the validity of the parameters */
424 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
425 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
426 /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */
427 {
428 Int decal;
429 ia = changeorigin(myprow0, ma->sprow, p0,
430 ma->nbrow, ia, &decal, &ma->sprow);
431 ptrmyblock += decal;
432 ja = changeorigin(mypcol0, ma->spcol, q0,
433 ma->nbcol, ja, &decal, &ma->spcol);
434 ptrmyblock += decal * ma->lda;
435 ma->m = ia + m;
436 ma->n = ja + n;
437 ib = changeorigin(myprow1, mb->sprow, p1,
438 mb->nbrow, ib, &decal, &mb->sprow);
439 ptrmynewblock += decal;
440 jb = changeorigin(mypcol1, mb->spcol, q1,
441 mb->nbcol, jb, &decal, &mb->spcol);
442 ptrmynewblock += decal * mb->lda;
443 mb->m = ib + m;
444 mb->n = jb + n;
445 if (p0 == 1)
446 ma->nbrow = ma->m;
447 if (q0 == 1)
448 ma->nbcol = ma->n;
449 if (p1 == 1)
450 mb->nbrow = mb->m;
451 if (q1 == 1)
452 mb->nbcol = mb->n;
453#ifndef NDEBUG
454 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
455 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
456#endif
457 }
458 /* We compute the size of the memory buffer ( we choose the worst case,
459 * when the buffer sizes == the memory block sizes). */
460 if (myprow0 >= 0 && mypcol0 >= 0) {
461 /* Initialize pointer variables */
462 setmemory(&ptrsendbuff, memoryblocksize(ma));
463 }; /* if (mypnum < p0 * q0) */
464 if (myprow1 >= 0 && mypcol1 >= 0) {
465 /* Initialize pointer variables */
466 setmemory(&ptrrecvbuff, memoryblocksize(mb));
467 }; /* if (mypnum < p1 * q1) */
468 /* allocing room for the tabs, alloc for the worst case,local_n or local_m
469 * intervals, in fact the worst case should be less, perhaps half that,I
470 * should think of that one day. */
471 h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) *
472 ma->nbcol * sizeof(IDESC));
473 v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow)
474 * ma->nbrow * sizeof(IDESC));
475 /* We go for the scanning of indices. For each processor including mypnum,
476 * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send
477 * it. Then for each processor, we compute the size of message to be
478 * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements
479 * of recvbuff the right place (scanD)(RECVBUFF)) */
480 recvptr = ptrrecvbuff;
481 {
482 Int tot, myrang, step, sens;
483 Int *sender, *recver;
484 Int mesending, merecving;
485 tot = max(p0 * q0, p1 * q1);
486 init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1,
487 &sender, &recver, &myrang);
488 if (myrang == -1)
489 goto after_comm;
490 mesending = myprow0 >= 0;
491 assert(sender[myrang] >= 0 || !mesending);
492 assert(!mesending || proc0[sender[myrang]] == mypnum);
493 merecving = myprow1 >= 0;
494 assert(recver[myrang] >= 0 || !merecving);
495 assert(!merecving || proc1[recver[myrang]] == mypnum);
496 step = tot - 1 - myrang;
497 do {
498 for (sens = 0; sens < 2; sens++) {
499 /* be careful here, when we communicating with ourselves, we must
500 * send first (myrang > step == 0) */
501 if (mesending && recver[step] >= 0 &&
502 (sens == 0)) {
503 i = recver[step] / q1;
504 j = recver[step] % q1;
505 vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i,
506 v_inter);
507 hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j,
508 h_inter);
509 sendsize = block2buff(v_inter, vinter_nb, h_inter, hinter_nb,
510 ptrmyblock, ma, ptrsendbuff);
511 } /* if (mesending...) { */
512 if (mesending && recver[step] >= 0 &&
513 (sens == myrang > step)) {
514 i = recver[step] / q1;
515 j = recver[step] % q1;
516 if (sendsize > 0
517 && (step != myrang || !merecving)
518 ) {
519 Csgesd2d(gcontext, sendsize, (Int)1, ptrsendbuff, sendsize,
520 (Int)0, proc1[i * q1 + j]);
521 } /* sendsize > 0 */
522 } /* if (mesending ... */
523 if (merecving && sender[step] >= 0 &&
524 (sens == myrang <= step)) {
525 i = sender[step] / q0;
526 j = sender[step] % q0;
527 vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i,
528 v_inter);
529 hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j,
530 h_inter);
531 recvsize = inter_len(hinter_nb, h_inter, vinter_nb, v_inter);
532 if (recvsize > 0) {
533 if (step == myrang && mesending) {
534 Clacpy(recvsize, 1,
535 ptrsendbuff, recvsize,
536 ptrrecvbuff, recvsize);
537 } else {
538 Csgerv2d(gcontext, recvsize, (Int)1, ptrrecvbuff, recvsize,
539 0, proc0[i * q0 + j]);
540 }
541 } /* recvsize > 0 */
542 } /* if (merecving ...) */
543 if (merecving && sender[step] >= 0 && sens == 1) {
544 buff2block(v_inter, vinter_nb, h_inter, hinter_nb,
545 recvptr, ptrmynewblock, mb);
546 } /* if (merecving...) */
547 } /* for (sens = 0) */
548 step -= 1;
549 if (step < 0)
550 step = tot - 1;
551 } while (step != tot - 1 - myrang);
552after_comm:
553 free(sender);
554 } /* { int tot,nr,ns ...} */
555 /* don't forget to clean up things! */
556 if (myprow1 >= 0 && mypcol1 >= 0) {
557 freememory((char *) ptrrecvbuff);
558 };
559 if (myprow0 >= 0 && mypcol0 >= 0) {
560 freememory((char *) ptrsendbuff);
561 };
562 if (nprow != 1)
563 Cblacs_gridexit(gcontext);
564 free(v_inter);
565 free(h_inter);
566 free(param);
integer, save, private nprocs
Definition cmumps_load.F:57
#define NBPARAM
Definition pcgemr.c:288
#define MAGIC_MAX
Definition pcgemr.c:289
static2 Int inter_len()
Int memoryblocksize()
Int changeorigin()
#define freememory
Definition psgemr.c:220
#define scan_intervals
Definition psgemr.c:221
void Csgerv2d()
void Cblacs_gridexit()
#define max(A, B)
Definition psgemr.c:177
static2 void gridreshape()
#define DIVUP(a, b)
Definition psgemr.c:179
#define Clacpy
Definition psgemr.c:158
void Cigamn2d()
#define setmemory
Definition psgemr.c:219
static2 void buff2block()
void paramcheck()
static2 Int block2buff()
void Cblacs_gridinfo()
void * mr2d_malloc()
void Csgesd2d()
static2 void init_chenille()
Int m
Definition pcgemr.c:166
Int spcol
Definition pcgemr.c:171
Int nbcol
Definition pcgemr.c:169
Int sprow
Definition pcgemr.c:170
Int nbrow
Definition pcgemr.c:168
Int ctxt
Definition pcgemr.c:165
Int n
Definition pcgemr.c:167

◆ Cpsgemr2do() [1/2]

void Cpsgemr2do ( )
extern

◆ Cpsgemr2do() [2/2]

void Cpsgemr2do ( Int m,
Int n,
float * ptrmyblock,
Int ia,
Int ja,
MDESC * ma,
float * ptrmynewblock,
Int ib,
Int jb,
MDESC * mb )

Definition at line 265 of file psgemr.c.

273{
274 Int dummy, nprocs;
275 Int gcontext;
276 /* first we initialize a global grid which serve as a reference to
277 * communicate from grid a to grid b */
278 Cblacs_pinfo(&dummy, &nprocs);
279 Cblacs_get((Int)0, (Int)0, &gcontext);
280 Cblacs_gridinit(&gcontext, "R", (Int)1, nprocs);
281 Cpsgemr2d(m, n, ptrmyblock, ia, ja, ma,
282 ptrmynewblock, ib, jb, mb, gcontext);
283 Cblacs_gridexit(gcontext);
284}
void Cpsgemr2d()
void Cblacs_pinfo()
void Cblacs_get()
void Cblacs_gridinit()

◆ Csetpvmtids()

void Csetpvmtids ( )
extern

◆ Csgerv2d()

void Csgerv2d ( )
extern

◆ Csgesd2d()

void Csgesd2d ( )
extern

◆ fortran_mr2d()

void fortran_mr2d ( Int * m,
Int * n,
float * A,
Int * ia,
Int * ja,
Int desc_A[DESCLEN],
float * B,
Int * ib,
Int * jb,
Int desc_B[DESCLEN] )

Definition at line 244 of file psgemr.c.

246{
247 Cpsgemr2do(*m, *n, A, *ia, *ja, (MDESC *) desc_A,
248 B, *ib, *jb, (MDESC *) desc_B);
249 return;
250}
void Cpsgemr2do()

◆ fortran_mr2dnew()

void fortran_mr2dnew ( Int * m,
Int * n,
float * A,
Int * ia,
Int * ja,
Int desc_A[DESCLEN],
float * B,
Int * ib,
Int * jb,
Int desc_B[DESCLEN],
Int * gcontext )

Definition at line 252 of file psgemr.c.

254{
255 Cpsgemr2d(*m, *n, A, *ia, *ja, (MDESC *) desc_A,
256 B, *ib, *jb, (MDESC *) desc_B, *gcontext);
257 return;
258}

◆ gridreshape() [1/2]

static2 void gridreshape ( )

◆ gridreshape() [2/2]

static2 void gridreshape ( Int * ctxtp)

Definition at line 700 of file psgemr.c.

702{
703 Int ori, final; /* original context, and new context created, with
704 * line form */
705 Int nprow, npcol, myrow, mycol;
706 Int *usermap;
707 Int i, j;
708 ori = *ctxtp;
709 Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol);
710 usermap = mr2d_malloc(sizeof(Int) * nprow * npcol);
711 for (i = 0; i < nprow; i++)
712 for (j = 0; j < npcol; j++) {
713 usermap[i + j * nprow] = Cblacs_pnum(ori, i, j);
714 }
715 /* Cblacs_get(0, 0, &final); */
716 Cblacs_get(ori, (Int)10, &final);
717 Cblacs_gridmap(&final, usermap, (Int)1, (Int)1, nprow * npcol);
718 *ctxtp = final;
719 free(usermap);
void Cblacs_gridmap()
Int Cblacs_pnum()

◆ init_chenille() [1/2]

static2 void init_chenille ( )

◆ init_chenille() [2/2]

static2 void init_chenille ( Int mypnum,
Int nprocs,
Int n0,
Int * proc0,
Int n1,
Int * proc1,
Int ** psend,
Int ** precv,
Int * myrang )

Definition at line 568 of file psgemr.c.

570{
571 Int ns, nr, i, tot;
572 Int *sender, *recver, *g0, *g1;
573 tot = max(n0, n1);
574 sender = (Int *) mr2d_malloc((nprocs + tot) * sizeof(Int) * 2);
575 recver = sender + tot;
576 *psend = sender;
577 *precv = recver;
578 g0 = recver + tot;
579 g1 = g0 + nprocs;
580 for (i = 0; i < nprocs; i++) {
581 g0[i] = -1;
582 g1[i] = -1;
583 }
584 for (i = 0; i < tot; i++) {
585 sender[i] = -1;
586 recver[i] = -1;
587 }
588 for (i = 0; i < n0; i++)
589 g0[proc0[i]] = i;
590 for (i = 0; i < n1; i++)
591 g1[proc1[i]] = i;
592 ns = 0;
593 nr = 0;
594 *myrang = -1;
595 for (i = 0; i < nprocs; i++)
596 if (g0[i] >= 0 && g1[i] >= 0) {
597 if (i == mypnum)
598 *myrang = nr;
599 sender[ns] = g0[i];
600 ns += 1;
601 recver[nr] = g1[i];
602 nr += 1;
603 assert(ns <= n0 && nr <= n1 && nr == ns);
604 }
605 for (i = 0; i < nprocs; i++)
606 if (g0[i] >= 0 && g1[i] < 0) {
607 if (i == mypnum)
608 *myrang = ns;
609 sender[ns] = g0[i];
610 ns += 1;
611 assert(ns <= n0);
612 }
613 for (i = 0; i < nprocs; i++)
614 if (g1[i] >= 0 && g0[i] < 0) {
615 if (i == mypnum)
616 *myrang = nr;
617 recver[nr] = g1[i];
618 nr += 1;
619 assert(nr <= n1);
620 }

◆ inter_len() [1/2]

static2 Int inter_len ( )

◆ inter_len() [2/2]

static2 Int inter_len ( Int hinb,
IDESC * hi,
Int vinb,
IDESC * vi )

Definition at line 674 of file psgemr.c.

676{
677 Int hlen, vlen, h, v;
678 hlen = 0;
679 for (h = 0; h < hinb; h++)
680 hlen += hi[h].len;
681 vlen = 0;
682 for (v = 0; v < vinb; v++)
683 vlen += vi[v].len;
684 return hlen * vlen;

◆ localindice()

Int localindice ( )
extern

◆ localsize()

Int localsize ( )
extern

◆ memoryblocksize()

Int memoryblocksize ( )
extern

◆ mr2d_malloc()

void * mr2d_malloc ( )
extern

◆ paramcheck()

void paramcheck ( )
extern

◆ ppcm()

Int ppcm ( )
extern