Actual source code: mpits.c
1: #include <petscsys.h>
2: #include <petsc/private/petscimpl.h>
4: PetscLogEvent PETSC_BuildTwoSided;
5: PetscLogEvent PETSC_BuildTwoSidedF;
7: const char *const PetscBuildTwoSidedTypes[] = {
8: "ALLREDUCE",
9: "IBARRIER",
10: "REDSCATTER",
11: "PetscBuildTwoSidedType",
12: "PETSC_BUILDTWOSIDED_",
13: NULL
14: };
16: static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
18: /*@
19: PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
21: Logically Collective
23: Input Parameters:
24: + comm - PETSC_COMM_WORLD
25: - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()
27: Level: developer
29: Note:
30: This option is currently global, but could be made per-communicator.
32: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
33: @*/
34: PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
35: {
38: PetscMPIInt ierr;
39: PetscMPIInt b1[2],b2[2];
40: b1[0] = -(PetscMPIInt)twosided;
41: b1[1] = (PetscMPIInt)twosided;
42: MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);
43: if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
44: }
45: _twosided_type = twosided;
46: return(0);
47: }
49: /*@
50: PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication
52: Logically Collective
54: Output Parameters:
55: + comm - communicator on which to query algorithm
56: - twosided - algorithm to use for PetscCommBuildTwoSided()
58: Level: developer
60: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
61: @*/
62: PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
63: {
65: PetscMPIInt size;
68: *twosided = PETSC_BUILDTWOSIDED_NOTSET;
69: if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
70: MPI_Comm_size(comm,&size);
71: _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */
72: #if defined(PETSC_HAVE_MPI_IBARRIER)
73: if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
74: #endif
75: PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);
76: }
77: *twosided = _twosided_type;
78: return(0);
79: }
81: #if defined(PETSC_HAVE_MPI_IBARRIER)
82: static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
83: {
85: PetscMPIInt nrecvs,tag,done,i;
86: MPI_Aint lb,unitbytes;
87: char *tdata;
88: MPI_Request *sendreqs,barrier;
89: PetscSegBuffer segrank,segdata;
90: PetscBool barrier_started;
93: PetscCommDuplicate(comm,&comm,&tag);
94: MPI_Type_get_extent(dtype,&lb,&unitbytes);
95: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
96: tdata = (char*)todata;
97: PetscMalloc1(nto,&sendreqs);
98: for (i=0; i<nto; i++) {
99: MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
100: }
101: PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
102: PetscSegBufferCreate(unitbytes,4*count,&segdata);
104: nrecvs = 0;
105: barrier = MPI_REQUEST_NULL;
106: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
107: * but we need to work around it. */
108: barrier_started = PETSC_FALSE;
109: for (done=0; !done;) {
110: PetscMPIInt flag;
111: MPI_Status status;
112: MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
113: if (flag) { /* incoming message */
114: PetscMPIInt *recvrank;
115: void *buf;
116: PetscSegBufferGet(segrank,1,&recvrank);
117: PetscSegBufferGet(segdata,count,&buf);
118: *recvrank = status.MPI_SOURCE;
119: MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
120: nrecvs++;
121: }
122: if (!barrier_started) {
123: PetscMPIInt sent,nsends;
124: PetscMPIIntCast(nto,&nsends);
125: MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
126: if (sent) {
127: MPI_Ibarrier(comm,&barrier);
128: barrier_started = PETSC_TRUE;
129: PetscFree(sendreqs);
130: }
131: } else {
132: MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
133: }
134: }
135: *nfrom = nrecvs;
136: PetscSegBufferExtractAlloc(segrank,fromranks);
137: PetscSegBufferDestroy(&segrank);
138: PetscSegBufferExtractAlloc(segdata,fromdata);
139: PetscSegBufferDestroy(&segdata);
140: PetscCommDestroy(&comm);
141: return(0);
142: }
143: #endif
145: static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
146: {
147: PetscErrorCode ierr;
148: PetscMPIInt size,rank,*iflags,nrecvs,tag,*franks,i,flg;
149: MPI_Aint lb,unitbytes;
150: char *tdata,*fdata;
151: MPI_Request *reqs,*sendreqs;
152: MPI_Status *statuses;
153: PetscCommCounter *counter;
156: MPI_Comm_size(comm,&size);
157: MPI_Comm_rank(comm,&rank);
158: PetscCommDuplicate(comm,&comm,&tag);
159: MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);
160: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set");
161: if (!counter->iflags) {
162: PetscCalloc1(size,&counter->iflags);
163: iflags = counter->iflags;
164: } else {
165: iflags = counter->iflags;
166: PetscArrayzero(iflags,size);
167: }
168: for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
169: MPIU_Allreduce(MPI_IN_PLACE,iflags,size,MPI_INT,MPI_SUM,comm);
170: nrecvs = iflags[rank];
171: MPI_Type_get_extent(dtype,&lb,&unitbytes);
172: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
173: PetscMalloc(nrecvs*count*unitbytes,&fdata);
174: tdata = (char*)todata;
175: PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
176: sendreqs = reqs + nrecvs;
177: for (i=0; i<nrecvs; i++) {
178: MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
179: }
180: for (i=0; i<nto; i++) {
181: MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
182: }
183: MPI_Waitall(nto+nrecvs,reqs,statuses);
184: PetscMalloc1(nrecvs,&franks);
185: for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
186: PetscFree2(reqs,statuses);
187: PetscCommDestroy(&comm);
189: *nfrom = nrecvs;
190: *fromranks = franks;
191: *(void**)fromdata = fdata;
192: return(0);
193: }
195: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
196: static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
197: {
199: PetscMPIInt size,*iflags,nrecvs,tag,*franks,i,flg;
200: MPI_Aint lb,unitbytes;
201: char *tdata,*fdata;
202: MPI_Request *reqs,*sendreqs;
203: MPI_Status *statuses;
204: PetscCommCounter *counter;
207: MPI_Comm_size(comm,&size);
208: PetscCommDuplicate(comm,&comm,&tag);
209: MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);
210: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set");
211: if (!counter->iflags) {
212: PetscCalloc1(size,&counter->iflags);
213: iflags = counter->iflags;
214: } else {
215: iflags = counter->iflags;
216: PetscArrayzero(iflags,size);
217: }
218: for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
219: MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);
220: MPI_Type_get_extent(dtype,&lb,&unitbytes);
221: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
222: PetscMalloc(nrecvs*count*unitbytes,&fdata);
223: tdata = (char*)todata;
224: PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
225: sendreqs = reqs + nrecvs;
226: for (i=0; i<nrecvs; i++) {
227: MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
228: }
229: for (i=0; i<nto; i++) {
230: MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
231: }
232: MPI_Waitall(nto+nrecvs,reqs,statuses);
233: PetscMalloc1(nrecvs,&franks);
234: for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
235: PetscFree2(reqs,statuses);
236: PetscCommDestroy(&comm);
238: *nfrom = nrecvs;
239: *fromranks = franks;
240: *(void**)fromdata = fdata;
241: return(0);
242: }
243: #endif
245: /*@C
246: PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
248: Collective
250: Input Parameters:
251: + comm - communicator
252: . count - number of entries to send/receive (must match on all ranks)
253: . dtype - datatype to send/receive from each rank (must match on all ranks)
254: . nto - number of ranks to send data to
255: . toranks - ranks to send to (array of length nto)
256: - todata - data to send to each rank (packed)
258: Output Parameters:
259: + nfrom - number of ranks receiving messages from
260: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
261: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
263: Level: developer
265: Options Database Keys:
266: . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, otherwise ibarrier.
268: Notes:
269: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
270: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
272: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
274: References:
275: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
276: Scalable communication protocols for dynamic sparse data exchange, 2010.
278: .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
279: @*/
280: PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
281: {
282: PetscErrorCode ierr;
283: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
286: PetscSysInitializePackage();
287: PetscLogEventSync(PETSC_BuildTwoSided,comm);
288: PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);
289: PetscCommBuildTwoSidedGetType(comm,&buildtype);
290: switch (buildtype) {
291: case PETSC_BUILDTWOSIDED_IBARRIER:
292: #if defined(PETSC_HAVE_MPI_IBARRIER)
293: PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
294: break;
295: #else
296: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
297: #endif
298: case PETSC_BUILDTWOSIDED_ALLREDUCE:
299: PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
300: break;
301: case PETSC_BUILDTWOSIDED_REDSCATTER:
302: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
303: PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
304: break;
305: #else
306: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
307: #endif
308: default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
309: }
310: PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);
311: return(0);
312: }
314: static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
315: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
316: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
317: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
318: {
320: PetscMPIInt i,*tag;
321: MPI_Aint lb,unitbytes;
322: MPI_Request *sendreq,*recvreq;
325: PetscMalloc1(ntags,&tag);
326: if (ntags > 0) {
327: PetscCommDuplicate(comm,&comm,&tag[0]);
328: }
329: for (i=1; i<ntags; i++) {
330: PetscCommGetNewTag(comm,&tag[i]);
331: }
333: /* Perform complete initial rendezvous */
334: PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
336: PetscMalloc1(nto*ntags,&sendreq);
337: PetscMalloc1(*nfrom*ntags,&recvreq);
339: MPI_Type_get_extent(dtype,&lb,&unitbytes);
340: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
341: for (i=0; i<nto; i++) {
342: PetscMPIInt k;
343: for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL;
344: (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);
345: }
346: for (i=0; i<*nfrom; i++) {
347: void *header = (*(char**)fromdata) + count*unitbytes*i;
348: PetscMPIInt k;
349: for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL;
350: (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);
351: }
352: PetscFree(tag);
353: PetscCommDestroy(&comm);
354: *toreqs = sendreq;
355: *fromreqs = recvreq;
356: return(0);
357: }
359: #if defined(PETSC_HAVE_MPI_IBARRIER)
361: static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
362: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
363: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
364: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
365: {
367: PetscMPIInt nrecvs,tag,*tags,done,i;
368: MPI_Aint lb,unitbytes;
369: char *tdata;
370: MPI_Request *sendreqs,*usendreqs,*req,barrier;
371: PetscSegBuffer segrank,segdata,segreq;
372: PetscBool barrier_started;
375: PetscCommDuplicate(comm,&comm,&tag);
376: PetscMalloc1(ntags,&tags);
377: for (i=0; i<ntags; i++) {
378: PetscCommGetNewTag(comm,&tags[i]);
379: }
380: MPI_Type_get_extent(dtype,&lb,&unitbytes);
381: if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
382: tdata = (char*)todata;
383: PetscMalloc1(nto,&sendreqs);
384: PetscMalloc1(nto*ntags,&usendreqs);
385: /* Post synchronous sends */
386: for (i=0; i<nto; i++) {
387: MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
388: }
389: /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the
390: * synchronous messages above. */
391: for (i=0; i<nto; i++) {
392: PetscMPIInt k;
393: for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL;
394: (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);
395: }
397: PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
398: PetscSegBufferCreate(unitbytes,4*count,&segdata);
399: PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);
401: nrecvs = 0;
402: barrier = MPI_REQUEST_NULL;
403: /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
404: * but we need to work around it. */
405: barrier_started = PETSC_FALSE;
406: for (done=0; !done;) {
407: PetscMPIInt flag;
408: MPI_Status status;
409: MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
410: if (flag) { /* incoming message */
411: PetscMPIInt *recvrank,k;
412: void *buf;
413: PetscSegBufferGet(segrank,1,&recvrank);
414: PetscSegBufferGet(segdata,count,&buf);
415: *recvrank = status.MPI_SOURCE;
416: MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
417: PetscSegBufferGet(segreq,ntags,&req);
418: for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL;
419: (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);
420: nrecvs++;
421: }
422: if (!barrier_started) {
423: PetscMPIInt sent,nsends;
424: PetscMPIIntCast(nto,&nsends);
425: MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
426: if (sent) {
427: MPI_Ibarrier(comm,&barrier);
428: barrier_started = PETSC_TRUE;
429: }
430: } else {
431: MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
432: }
433: }
434: *nfrom = nrecvs;
435: PetscSegBufferExtractAlloc(segrank,fromranks);
436: PetscSegBufferDestroy(&segrank);
437: PetscSegBufferExtractAlloc(segdata,fromdata);
438: PetscSegBufferDestroy(&segdata);
439: *toreqs = usendreqs;
440: PetscSegBufferExtractAlloc(segreq,fromreqs);
441: PetscSegBufferDestroy(&segreq);
442: PetscFree(sendreqs);
443: PetscFree(tags);
444: PetscCommDestroy(&comm);
445: return(0);
446: }
447: #endif
449: /*@C
450: PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous
452: Collective
454: Input Parameters:
455: + comm - communicator
456: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
457: . dtype - datatype to send/receive from each rank (must match on all ranks)
458: . nto - number of ranks to send data to
459: . toranks - ranks to send to (array of length nto)
460: . todata - data to send to each rank (packed)
461: . ntags - number of tags needed by send/recv callbacks
462: . send - callback invoked on sending process when ready to send primary payload
463: . recv - callback invoked on receiving process after delivery of rendezvous message
464: - ctx - context for callbacks
466: Output Parameters:
467: + nfrom - number of ranks receiving messages from
468: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
469: - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
471: Level: developer
473: Notes:
474: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
475: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.
477: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
479: References:
480: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
481: Scalable communication protocols for dynamic sparse data exchange, 2010.
483: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
484: @*/
485: PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,
486: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
487: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
488: {
490: MPI_Request *toreqs,*fromreqs;
493: PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);
494: MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);
495: MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);
496: PetscFree(toreqs);
497: PetscFree(fromreqs);
498: return(0);
499: }
501: /*@C
502: PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests
504: Collective
506: Input Parameters:
507: + comm - communicator
508: . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
509: . dtype - datatype to send/receive from each rank (must match on all ranks)
510: . nto - number of ranks to send data to
511: . toranks - ranks to send to (array of length nto)
512: . todata - data to send to each rank (packed)
513: . ntags - number of tags needed by send/recv callbacks
514: . send - callback invoked on sending process when ready to send primary payload
515: . recv - callback invoked on receiving process after delivery of rendezvous message
516: - ctx - context for callbacks
518: Output Parameters:
519: + nfrom - number of ranks receiving messages from
520: . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
521: . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
522: . toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree())
523: - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree())
525: Level: developer
527: Notes:
528: This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
529: PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.
531: Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
533: References:
534: . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
535: Scalable communication protocols for dynamic sparse data exchange, 2010.
537: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
538: @*/
539: PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
540: PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
541: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
542: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
543: {
544: PetscErrorCode ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*,
545: PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**,
546: PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
547: PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx);
548: PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
549: PetscMPIInt i,size;
552: PetscSysInitializePackage();
553: MPI_Comm_size(comm,&size);
554: for (i=0; i<nto; i++) {
555: if (toranks[i] < 0 || size <= toranks[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size);
556: }
557: PetscLogEventSync(PETSC_BuildTwoSidedF,comm);
558: PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);
559: PetscCommBuildTwoSidedGetType(comm,&buildtype);
560: switch (buildtype) {
561: case PETSC_BUILDTWOSIDED_IBARRIER:
562: #if defined(PETSC_HAVE_MPI_IBARRIER)
563: f = PetscCommBuildTwoSidedFReq_Ibarrier;
564: break;
565: #else
566: SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
567: #endif
568: case PETSC_BUILDTWOSIDED_ALLREDUCE:
569: case PETSC_BUILDTWOSIDED_REDSCATTER:
570: f = PetscCommBuildTwoSidedFReq_Reference;
571: break;
572: default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
573: }
574: (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);
575: PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);
576: return(0);
577: }