Actual source code: mpiviennacl.cxx


  2: /*
  3:    This file contains routines for Parallel vector operations.
  4:  */
  5: #include <petscconf.h>
  6: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
  7: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>

  9: /*MC
 10:    VECVIENNACL - VECVIENNACL = "viennacl" - A VECSEQVIENNACL on a single-process communicator, and VECMPIVIENNACL otherwise.

 12:    Options Database Keys:
 13: . -vec_type viennacl - sets the vector type to VECVIENNACL during a call to VecSetFromOptions()

 15:   Level: beginner

 17: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIWithArray(), VECSEQVIENNACL, VECMPIVIENNACL, VECSTANDARD, VecType, VecCreateMPI(), VecCreateMPI()
 18: M*/

 20: PetscErrorCode VecDestroy_MPIViennaCL(Vec v)
 21: {

 25:   try {
 26:     if (v->spptr) {
 27:       delete ((Vec_ViennaCL*)v->spptr)->GPUarray_allocated;
 28:       delete (Vec_ViennaCL*) v->spptr;
 29:     }
 30:   } catch(std::exception const & ex) {
 31:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what());
 32:   }
 33:   VecDestroy_MPI(v);
 34:   return(0);
 35: }

 37: PetscErrorCode VecNorm_MPIViennaCL(Vec xin,NormType type,PetscReal *z)
 38: {
 39:   PetscReal      sum,work = 0.0;

 43:   if (type == NORM_2 || type == NORM_FROBENIUS) {
 44:     VecNorm_SeqViennaCL(xin,NORM_2,&work);
 45:     work *= work;
 46:     MPIU_Allreduce(&work,&sum,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 47:     *z    = PetscSqrtReal(sum);
 48:   } else if (type == NORM_1) {
 49:     /* Find the local part */
 50:     VecNorm_SeqViennaCL(xin,NORM_1,&work);
 51:     /* Find the global max */
 52:     MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 53:   } else if (type == NORM_INFINITY) {
 54:     /* Find the local max */
 55:     VecNorm_SeqViennaCL(xin,NORM_INFINITY,&work);
 56:     /* Find the global max */
 57:     MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
 58:   } else if (type == NORM_1_AND_2) {
 59:     PetscReal temp[2];
 60:     VecNorm_SeqViennaCL(xin,NORM_1,temp);
 61:     VecNorm_SeqViennaCL(xin,NORM_2,temp+1);
 62:     temp[1] = temp[1]*temp[1];
 63:     MPIU_Allreduce(temp,z,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 64:     z[1] = PetscSqrtReal(z[1]);
 65:   }
 66:   return(0);
 67: }

 69: PetscErrorCode VecDot_MPIViennaCL(Vec xin,Vec yin,PetscScalar *z)
 70: {
 71:   PetscScalar    sum,work;

 75:   VecDot_SeqViennaCL(xin,yin,&work);
 76:   MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 77:   *z   = sum;
 78:   return(0);
 79: }

 81: PetscErrorCode VecTDot_MPIViennaCL(Vec xin,Vec yin,PetscScalar *z)
 82: {
 83:   PetscScalar    sum,work;

 87:   VecTDot_SeqViennaCL(xin,yin,&work);
 88:   MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 89:   *z   = sum;
 90:   return(0);
 91: }

 93: PetscErrorCode VecMDot_MPIViennaCL(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
 94: {
 95:   PetscScalar    awork[128],*work = awork;

 99:   if (nv > 128) {
100:     PetscMalloc1(nv,&work);
101:   }
102:   VecMDot_SeqViennaCL(xin,nv,y,work);
103:   MPIU_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
104:   if (nv > 128) {
105:     PetscFree(work);
106:   }
107:   return(0);
108: }

110: /*MC
111:    VECMPIVIENNACL - VECMPIVIENNACL = "mpiviennacl" - The basic parallel vector, modified to use ViennaCL

113:    Options Database Keys:
114: . -vec_type mpiviennacl - sets the vector type to VECMPIVIENNACL during a call to VecSetFromOptions()

116:   Level: beginner

118: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateMPI()
119: M*/

121: PetscErrorCode VecDuplicate_MPIViennaCL(Vec win,Vec *v)
122: {
124:   Vec_MPI        *vw,*w = (Vec_MPI*)win->data;
125:   PetscScalar    *array;

128:   VecCreate(PetscObjectComm((PetscObject)win),v);
129:   PetscLayoutReference(win->map,&(*v)->map);

131:   VecCreate_MPI_Private(*v,PETSC_FALSE,w->nghost,0);
132:   vw   = (Vec_MPI*)(*v)->data;
133:   PetscMemcpy((*v)->ops,win->ops,sizeof(struct _VecOps));

135:   /* save local representation of the parallel vector (and scatter) if it exists */
136:   if (w->localrep) {
137:     VecGetArray(*v,&array);
138:     VecCreateSeqWithArray(PETSC_COMM_SELF,1,win->map->n+w->nghost,array,&vw->localrep);
139:     PetscMemcpy(vw->localrep->ops,w->localrep->ops,sizeof(struct _VecOps));
140:     VecRestoreArray(*v,&array);
141:     PetscLogObjectParent((PetscObject)*v,(PetscObject)vw->localrep);
142:     vw->localupdate = w->localupdate;
143:     if (vw->localupdate) {
144:       PetscObjectReference((PetscObject)vw->localupdate);
145:     }
146:   }

148:   /* New vector should inherit stashing property of parent */
149:   (*v)->stash.donotstash   = win->stash.donotstash;
150:   (*v)->stash.ignorenegidx = win->stash.ignorenegidx;

152:   /* change type_name appropriately */
153:   PetscObjectChangeTypeName((PetscObject)(*v),VECMPIVIENNACL);

155:   PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*v))->olist);
156:   PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*v))->qlist);
157:   (*v)->map->bs   = PetscAbs(win->map->bs);
158:   (*v)->bstash.bs = win->bstash.bs;
159:   return(0);
160: }

162: PetscErrorCode VecDotNorm2_MPIViennaCL(Vec s,Vec t,PetscScalar *dp,PetscScalar *nm)
163: {
165:   PetscScalar    work[2],sum[2];

168:   VecDotNorm2_SeqViennaCL(s,t,work,work+1);
169:   MPIU_Allreduce((void*)&work,(void*)&sum,2,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)s));
170:   *dp  = sum[0];
171:   *nm  = sum[1];
172:   return(0);
173: }

175: PetscErrorCode VecBindToCPU_MPIViennaCL(Vec vv, PetscBool pin)
176: {
179:   vv->boundtocpu = pin;

181:   if (pin) {
182:     VecViennaCLCopyFromGPU(vv);
183:     vv->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
184:     vv->ops->dotnorm2               = NULL;
185:     vv->ops->waxpy                  = VecWAXPY_Seq;
186:     vv->ops->dot                    = VecDot_MPI;
187:     vv->ops->mdot                   = VecMDot_MPI;
188:     vv->ops->tdot                   = VecTDot_MPI;
189:     vv->ops->norm                   = VecNorm_MPI;
190:     vv->ops->scale                  = VecScale_Seq;
191:     vv->ops->copy                   = VecCopy_Seq;
192:     vv->ops->set                    = VecSet_Seq;
193:     vv->ops->swap                   = VecSwap_Seq;
194:     vv->ops->axpy                   = VecAXPY_Seq;
195:     vv->ops->axpby                  = VecAXPBY_Seq;
196:     vv->ops->maxpy                  = VecMAXPY_Seq;
197:     vv->ops->aypx                   = VecAYPX_Seq;
198:     vv->ops->axpbypcz               = VecAXPBYPCZ_Seq;
199:     vv->ops->pointwisemult          = VecPointwiseMult_Seq;
200:     vv->ops->setrandom              = VecSetRandom_Seq;
201:     vv->ops->placearray             = VecPlaceArray_Seq;
202:     vv->ops->replacearray           = VecReplaceArray_Seq;
203:     vv->ops->resetarray             = VecResetArray_Seq;
204:     vv->ops->dot_local              = VecDot_Seq;
205:     vv->ops->tdot_local             = VecTDot_Seq;
206:     vv->ops->norm_local             = VecNorm_Seq;
207:     vv->ops->mdot_local             = VecMDot_Seq;
208:     vv->ops->pointwisedivide        = VecPointwiseDivide_Seq;
209:     vv->ops->getlocalvector         = NULL;
210:     vv->ops->restorelocalvector     = NULL;
211:     vv->ops->getlocalvectorread     = NULL;
212:     vv->ops->restorelocalvectorread = NULL;
213:     vv->ops->getarraywrite          = NULL;
214:   } else {
215:     vv->ops->dotnorm2        = VecDotNorm2_MPIViennaCL;
216:     vv->ops->waxpy           = VecWAXPY_SeqViennaCL;
217:     vv->ops->duplicate       = VecDuplicate_MPIViennaCL;
218:     vv->ops->dot             = VecDot_MPIViennaCL;
219:     vv->ops->mdot            = VecMDot_MPIViennaCL;
220:     vv->ops->tdot            = VecTDot_MPIViennaCL;
221:     vv->ops->norm            = VecNorm_MPIViennaCL;
222:     vv->ops->scale           = VecScale_SeqViennaCL;
223:     vv->ops->copy            = VecCopy_SeqViennaCL;
224:     vv->ops->set             = VecSet_SeqViennaCL;
225:     vv->ops->swap            = VecSwap_SeqViennaCL;
226:     vv->ops->axpy            = VecAXPY_SeqViennaCL;
227:     vv->ops->axpby           = VecAXPBY_SeqViennaCL;
228:     vv->ops->maxpy           = VecMAXPY_SeqViennaCL;
229:     vv->ops->aypx            = VecAYPX_SeqViennaCL;
230:     vv->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
231:     vv->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
232:     vv->ops->setrandom       = VecSetRandom_SeqViennaCL;
233:     vv->ops->dot_local       = VecDot_SeqViennaCL;
234:     vv->ops->tdot_local      = VecTDot_SeqViennaCL;
235:     vv->ops->norm_local      = VecNorm_SeqViennaCL;
236:     vv->ops->mdot_local      = VecMDot_SeqViennaCL;
237:     vv->ops->destroy         = VecDestroy_MPIViennaCL;
238:     vv->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
239:     vv->ops->placearray      = VecPlaceArray_SeqViennaCL;
240:     vv->ops->replacearray    = VecReplaceArray_SeqViennaCL;
241:     vv->ops->resetarray      = VecResetArray_SeqViennaCL;
242:     vv->ops->getarraywrite   = VecGetArrayWrite_SeqViennaCL;
243:     vv->ops->getarray        = VecGetArray_SeqViennaCL;
244:     vv->ops->restorearray    = VecRestoreArray_SeqViennaCL;
245:   }
246:   return(0);
247: }

249: PETSC_EXTERN PetscErrorCode VecCreate_MPIViennaCL(Vec vv)
250: {

254:   PetscLayoutSetUp(vv->map);
255:   VecViennaCLAllocateCheck(vv);
256:   VecCreate_MPIViennaCL_Private(vv,PETSC_FALSE,0,((Vec_ViennaCL*)(vv->spptr))->GPUarray);
257:   VecViennaCLAllocateCheckHost(vv);
258:   VecSet(vv,0.0);
259:   VecSet_Seq(vv,0.0);
260:   vv->offloadmask = PETSC_OFFLOAD_BOTH;
261:   return(0);
262: }

264: PETSC_EXTERN PetscErrorCode VecCreate_ViennaCL(Vec v)
265: {
267:   PetscMPIInt    size;

270:   MPI_Comm_size(PetscObjectComm((PetscObject)v),&size);
271:   if (size == 1) {
272:     VecSetType(v,VECSEQVIENNACL);
273:   } else {
274:     VecSetType(v,VECMPIVIENNACL);
275:   }
276:   return(0);
277: }

279: /*@C
280:    VecCreateMPIViennaCLWithArray - Creates a parallel, array-style vector,
281:    where the user provides the viennacl vector to store the vector values.

283:    Collective

285:    Input Parameters:
286: +  comm  - the MPI communicator to use
287: .  bs    - block size, same meaning as VecSetBlockSize()
288: .  n     - local vector length, cannot be PETSC_DECIDE
289: .  N     - global vector length (or PETSC_DECIDE to have calculated)
290: -  array - the user provided GPU array to store the vector values

292:    Output Parameter:
293: .  vv - the vector

295:    Notes:
296:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
297:    same type as an existing vector.

299:    If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
300:    at a later stage to SET the array for storing the vector values.

302:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
303:    The user should not free the array until the vector is destroyed.

305:    Level: intermediate

307: .seealso: VecCreateSeqViennaCLWithArray(), VecCreateMPIWithArray(), VecCreateSeqWithArray(),
308:           VecCreate(), VecCreateMPI(), VecCreateGhostWithArray(), VecViennaCLPlaceArray()

310: @*/
311: PetscErrorCode  VecCreateMPIViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const ViennaCLVector *array,Vec *vv)
312: {

316:   if (n == PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Must set local size of vector");
317:   PetscSplitOwnership(comm,&n,&N);
318:   VecCreate(comm,vv);
319:   VecSetSizes(*vv,n,N);
320:   VecSetBlockSize(*vv,bs);
321:   VecCreate_MPIViennaCL_Private(*vv,PETSC_FALSE,0,array);
322:   return(0);
323: }

325: /*@C
326:    VecCreateMPIViennaCLWithArrays - Creates a parallel, array-style vector,
327:    where the user provides the ViennaCL vector to store the vector values.

329:    Collective

331:    Input Parameters:
332: +  comm  - the MPI communicator to use
333: .  bs    - block size, same meaning as VecSetBlockSize()
334: .  n     - local vector length, cannot be PETSC_DECIDE
335: .  N     - global vector length (or PETSC_DECIDE to have calculated)
336: -  cpuarray - the user provided CPU array to store the vector values
337: -  viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.

339:    Output Parameter:
340: .  vv - the vector

342:    Notes:
343:    If both cpuarray and viennaclvec are provided, the caller must ensure that
344:    the provided arrays have identical values.

346:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
347:    same type as an existing vector.

349:    PETSc does NOT free the provided arrays when the vector is destroyed via
350:    VecDestroy(). The user should not free the array until the vector is
351:    destroyed.

353:    Level: intermediate

355: .seealso: VecCreateSeqViennaCLWithArrays(), VecCreateMPIWithArray()
356:           VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost(),
357:           VecCreateMPI(), VecCreateGhostWithArray(), VecViennaCLPlaceArray(),
358:           VecPlaceArray(), VecCreateMPICUDAWithArrays(),
359:           VecViennaCLAllocateCheckHost()
360: @*/
361: PetscErrorCode  VecCreateMPIViennaCLWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const PetscScalar cpuarray[],const ViennaCLVector *viennaclvec,Vec *vv)
362: {

366:   VecCreateMPIViennaCLWithArray(comm,bs,n,N,viennaclvec,vv);

368:   if (cpuarray && viennaclvec) {
369:     Vec_MPI *s         = (Vec_MPI*)((*vv)->data);
370:     s->array           = (PetscScalar*)cpuarray;
371:     (*vv)->offloadmask = PETSC_OFFLOAD_BOTH;
372:   } else if (cpuarray) {
373:     Vec_MPI *s         = (Vec_MPI*)((*vv)->data);
374:     s->array           = (PetscScalar*)cpuarray;
375:     (*vv)->offloadmask =  PETSC_OFFLOAD_CPU;
376:   } else if (viennaclvec) {
377:     (*vv)->offloadmask = PETSC_OFFLOAD_GPU;
378:   } else {
379:     (*vv)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
380:   }

382:   return(0);
383: }

385: PetscErrorCode VecCreate_MPIViennaCL_Private(Vec vv,PetscBool alloc,PetscInt nghost,const ViennaCLVector *array)
386: {
388:   Vec_ViennaCL   *vecviennacl;

391:   VecCreate_MPI_Private(vv,PETSC_FALSE,0,0);
392:   PetscObjectChangeTypeName((PetscObject)vv,VECMPIVIENNACL);

394:   VecBindToCPU_MPIViennaCL(vv,PETSC_FALSE);
395:   vv->ops->bindtocpu = VecBindToCPU_MPIViennaCL;

397:   if (alloc && !array) {
398:     VecViennaCLAllocateCheck(vv);
399:     VecViennaCLAllocateCheckHost(vv);
400:     VecSet(vv,0.0);
401:     VecSet_Seq(vv,0.0);
402:     vv->offloadmask = PETSC_OFFLOAD_BOTH;
403:   }
404:   if (array) {
405:     if (!vv->spptr)
406:       vv->spptr = new Vec_ViennaCL;
407:     vecviennacl = (Vec_ViennaCL*)vv->spptr;
408:     vecviennacl->GPUarray_allocated = 0;
409:     vecviennacl->GPUarray = (ViennaCLVector*)array;
410:     vv->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
411:   }

413:   return(0);
414: }