Actual source code: util.c
1: /*
2: GAMG geometric-algebric multigrid PC - Mark Adams 2011
3: */
4: #include <petsc/private/matimpl.h>
5: #include <../src/ksp/pc/impls/gamg/gamg.h>
7: /*
8: Produces a set of block column indices of the matrix row, one for each block represented in the original row
10: n - the number of block indices in cc[]
11: cc - the block indices (must be large enough to contain the indices)
12: */
13: PETSC_STATIC_INLINE PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc)
14: {
15: PetscInt cnt = -1,nidx,j;
16: const PetscInt *idx;
20: MatGetRow(Amat,row,&nidx,&idx,NULL);
21: if (nidx) {
22: cnt = 0;
23: cc[cnt] = idx[0]/bs;
24: for (j=1; j<nidx; j++) {
25: if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs;
26: }
27: }
28: MatRestoreRow(Amat,row,&nidx,&idx,NULL);
29: *n = cnt+1;
30: return(0);
31: }
33: /*
34: Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
36: ncollapsed - the number of block indices
37: collapsed - the block indices (must be large enough to contain the indices)
38: */
39: PETSC_STATIC_INLINE PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed)
40: {
41: PetscInt i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp;
45: MatCollapseRow(Amat,start,bs,&nprev,cprev);
46: for (i=start+1; i<start+bs; i++) {
47: MatCollapseRow(Amat,i,bs,&ncur,ccur);
48: PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged);
49: cprevtmp = cprev; cprev = merged; merged = cprevtmp;
50: }
51: *ncollapsed = nprev;
52: if (collapsed) *collapsed = cprev;
53: return(0);
54: }
56: /* -------------------------------------------------------------------------- */
57: /*
58: PCGAMGCreateGraph - create simple scaled scalar graph from matrix
60: Input Parameter:
61: . Amat - matrix
62: Output Parameter:
63: . a_Gmaat - eoutput scalar graph (symmetric?)
64: */
65: PetscErrorCode PCGAMGCreateGraph(Mat Amat, Mat *a_Gmat)
66: {
68: PetscInt Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs;
69: MPI_Comm comm;
70: Mat Gmat;
73: PetscObjectGetComm((PetscObject)Amat,&comm);
74: MatGetOwnershipRange(Amat, &Istart, &Iend);
75: MatGetSize(Amat, &MM, &NN);
76: MatGetBlockSize(Amat, &bs);
77: nloc = (Iend-Istart)/bs;
79: PetscLogEventBegin(petsc_gamg_setup_events[GRAPH],0,0,0,0);
81: /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
82: /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
83: implementation */
84: if (bs > 1) {
85: const PetscScalar *vals;
86: const PetscInt *idx;
87: PetscInt *d_nnz, *o_nnz,*w0,*w1,*w2;
88: PetscBool ismpiaij,isseqaij;
90: /*
91: Determine the preallocation needed for the scalar matrix derived from the vector matrix.
92: */
94: PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij);
95: PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij);
96: PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz);
98: if (isseqaij) {
99: PetscInt max_d_nnz;
101: /*
102: Determine exact preallocation count for (sequential) scalar matrix
103: */
104: MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz);
105: max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
106: PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2);
107: for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) {
108: MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL);
109: }
110: PetscFree3(w0,w1,w2);
112: } else if (ismpiaij) {
113: Mat Daij,Oaij;
114: const PetscInt *garray;
115: PetscInt max_d_nnz;
117: MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray);
119: /*
120: Determine exact preallocation count for diagonal block portion of scalar matrix
121: */
122: MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz);
123: max_d_nnz = PetscMin(nloc,bs*max_d_nnz);
124: PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2);
125: for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
126: MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL);
127: }
128: PetscFree3(w0,w1,w2);
130: /*
131: Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
132: */
133: for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
134: o_nnz[jj] = 0;
135: for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */
136: MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL);
137: o_nnz[jj] += ncols;
138: MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL);
139: }
140: if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc;
141: }
143: } else SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"Require AIJ matrix type");
145: /* get scalar copy (norms) of matrix */
146: MatCreate(comm, &Gmat);
147: MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE);
148: MatSetBlockSizes(Gmat, 1, 1);
149: MatSetType(Gmat, MATAIJ);
150: MatSeqAIJSetPreallocation(Gmat,0,d_nnz);
151: MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz);
152: PetscFree2(d_nnz,o_nnz);
154: for (Ii = Istart; Ii < Iend; Ii++) {
155: PetscInt dest_row = Ii/bs;
156: MatGetRow(Amat,Ii,&ncols,&idx,&vals);
157: for (jj=0; jj<ncols; jj++) {
158: PetscInt dest_col = idx[jj]/bs;
159: PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
160: MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES);
161: }
162: MatRestoreRow(Amat,Ii,&ncols,&idx,&vals);
163: }
164: MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY);
165: MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY);
166: } else {
167: /* just copy scalar matrix - abs() not taken here but scaled later */
168: MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat);
169: }
170: MatPropagateSymmetryOptions(Amat, Gmat);
172: PetscLogEventEnd(petsc_gamg_setup_events[GRAPH],0,0,0,0);
174: *a_Gmat = Gmat;
175: return(0);
176: }
178: /* -------------------------------------------------------------------------- */
179: /*@C
180: PCGAMGFilterGraph - filter (remove zero and possibly small values from the) graph and make it symmetric if requested
182: Collective on Mat
184: Input Parameters:
185: + a_Gmat - the graph
186: . vfilter - threshold parameter [0,1)
187: - symm - make the result symmetric
189: Level: developer
191: Notes:
192: This is called before graph coarsers are called.
194: .seealso: PCGAMGSetThreshold()
195: @*/
196: PetscErrorCode PCGAMGFilterGraph(Mat *a_Gmat,PetscReal vfilter,PetscBool symm)
197: {
198: PetscErrorCode ierr;
199: PetscInt Istart,Iend,Ii,jj,ncols,nnz0,nnz1, NN, MM, nloc;
200: PetscMPIInt rank;
201: Mat Gmat = *a_Gmat, tGmat;
202: MPI_Comm comm;
203: const PetscScalar *vals;
204: const PetscInt *idx;
205: PetscInt *d_nnz, *o_nnz;
206: Vec diag;
209: PetscLogEventBegin(petsc_gamg_setup_events[GRAPH],0,0,0,0);
211: /* TODO GPU: optimization proposal, each class provides fast implementation of this
212: procedure via MatAbs API */
213: if (vfilter < 0.0 && !symm) {
214: /* Just use the provided matrix as the graph but make all values positive */
215: MatInfo info;
216: PetscScalar *avals;
217: PetscBool isaij,ismpiaij;
218: PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isaij);
219: PetscObjectBaseTypeCompare((PetscObject)Gmat,MATMPIAIJ,&ismpiaij);
220: if (!isaij && !ismpiaij) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_USER,"Require (MPI)AIJ matrix type");
221: if (isaij) {
222: MatGetInfo(Gmat,MAT_LOCAL,&info);
223: MatSeqAIJGetArray(Gmat,&avals);
224: for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
225: MatSeqAIJRestoreArray(Gmat,&avals);
226: } else {
227: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Gmat->data;
228: MatGetInfo(aij->A,MAT_LOCAL,&info);
229: MatSeqAIJGetArray(aij->A,&avals);
230: for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
231: MatSeqAIJRestoreArray(aij->A,&avals);
232: MatGetInfo(aij->B,MAT_LOCAL,&info);
233: MatSeqAIJGetArray(aij->B,&avals);
234: for (jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
235: MatSeqAIJRestoreArray(aij->B,&avals);
236: }
237: PetscLogEventEnd(petsc_gamg_setup_events[GRAPH],0,0,0,0);
238: return(0);
239: }
241: /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
242: Also, if the matrix is symmetric, can we skip this
243: operation? It can be very expensive on large matrices. */
244: PetscObjectGetComm((PetscObject)Gmat,&comm);
245: MPI_Comm_rank(comm,&rank);
246: MatGetOwnershipRange(Gmat, &Istart, &Iend);
247: nloc = Iend - Istart;
248: MatGetSize(Gmat, &MM, &NN);
250: if (symm) {
251: Mat matTrans;
252: MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans);
253: MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN);
254: MatDestroy(&matTrans);
255: }
257: /* scale Gmat for all values between -1 and 1 */
258: MatCreateVecs(Gmat, &diag, NULL);
259: MatGetDiagonal(Gmat, diag);
260: VecReciprocal(diag);
261: VecSqrtAbs(diag);
262: MatDiagonalScale(Gmat, diag, diag);
263: VecDestroy(&diag);
265: /* Determine upper bound on nonzeros needed in new filtered matrix */
266: PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz);
267: for (Ii = Istart, jj = 0; Ii < Iend; Ii++, jj++) {
268: MatGetRow(Gmat,Ii,&ncols,NULL,NULL);
269: d_nnz[jj] = ncols;
270: o_nnz[jj] = ncols;
271: MatRestoreRow(Gmat,Ii,&ncols,NULL,NULL);
272: if (d_nnz[jj] > nloc) d_nnz[jj] = nloc;
273: if (o_nnz[jj] > (MM-nloc)) o_nnz[jj] = MM - nloc;
274: }
275: MatCreate(comm, &tGmat);
276: MatSetSizes(tGmat,nloc,nloc,MM,MM);
277: MatSetBlockSizes(tGmat, 1, 1);
278: MatSetType(tGmat, MATAIJ);
279: MatSeqAIJSetPreallocation(tGmat,0,d_nnz);
280: MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz);
281: MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
282: PetscFree2(d_nnz,o_nnz);
284: for (Ii = Istart, nnz0 = nnz1 = 0; Ii < Iend; Ii++) {
285: MatGetRow(Gmat,Ii,&ncols,&idx,&vals);
286: for (jj=0; jj<ncols; jj++,nnz0++) {
287: PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
288: if (PetscRealPart(sv) > vfilter) {
289: nnz1++;
290: MatSetValues(tGmat,1,&Ii,1,&idx[jj],&sv,INSERT_VALUES);
291: }
292: }
293: MatRestoreRow(Gmat,Ii,&ncols,&idx,&vals);
294: }
295: MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY);
296: MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY);
297: if (symm) {
298: MatSetOption(tGmat,MAT_SYMMETRIC,PETSC_TRUE);
299: } else {
300: MatPropagateSymmetryOptions(Gmat,tGmat);
301: }
302: PetscLogEventEnd(petsc_gamg_setup_events[GRAPH],0,0,0,0);
304: #if defined(PETSC_USE_INFO)
305: {
306: double t1 = (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, t2 = (!nloc) ? 1. : (double)nnz0/(double)nloc;
307: PetscInfo4(*a_Gmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%D)\n",t1,vfilter,t2,MM);
308: }
309: #endif
310: MatDestroy(&Gmat);
311: *a_Gmat = tGmat;
312: return(0);
313: }
315: /* -------------------------------------------------------------------------- */
316: /*
317: PCGAMGGetDataWithGhosts - hacks into Mat MPIAIJ so this must have size > 1
319: Input Parameter:
320: . Gmat - MPIAIJ matrix for scattters
321: . data_sz - number of data terms per node (# cols in output)
322: . data_in[nloc*data_sz] - column oriented data
323: Output Parameter:
324: . a_stride - numbrt of rows of output
325: . a_data_out[stride*data_sz] - output data with ghosts
326: */
327: PetscErrorCode PCGAMGGetDataWithGhosts(Mat Gmat,PetscInt data_sz,PetscReal data_in[],PetscInt *a_stride,PetscReal **a_data_out)
328: {
330: Vec tmp_crds;
331: Mat_MPIAIJ *mpimat = (Mat_MPIAIJ*)Gmat->data;
332: PetscInt nnodes,num_ghosts,dir,kk,jj,my0,Iend,nloc;
333: PetscScalar *data_arr;
334: PetscReal *datas;
335: PetscBool isMPIAIJ;
338: PetscObjectBaseTypeCompare((PetscObject)Gmat, MATMPIAIJ, &isMPIAIJ);
339: MatGetOwnershipRange(Gmat, &my0, &Iend);
340: nloc = Iend - my0;
341: VecGetLocalSize(mpimat->lvec, &num_ghosts);
342: nnodes = num_ghosts + nloc;
343: *a_stride = nnodes;
344: MatCreateVecs(Gmat, &tmp_crds, NULL);
346: PetscMalloc1(data_sz*nnodes, &datas);
347: for (dir=0; dir<data_sz; dir++) {
348: /* set local, and global */
349: for (kk=0; kk<nloc; kk++) {
350: PetscInt gid = my0 + kk;
351: PetscScalar crd = (PetscScalar)data_in[dir*nloc + kk]; /* col oriented */
352: datas[dir*nnodes + kk] = PetscRealPart(crd);
354: VecSetValues(tmp_crds, 1, &gid, &crd, INSERT_VALUES);
355: }
356: VecAssemblyBegin(tmp_crds);
357: VecAssemblyEnd(tmp_crds);
358: /* get ghost datas */
359: VecScatterBegin(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
360: VecScatterEnd(mpimat->Mvctx,tmp_crds,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
361: VecGetArray(mpimat->lvec, &data_arr);
362: for (kk=nloc,jj=0;jj<num_ghosts;kk++,jj++) datas[dir*nnodes + kk] = PetscRealPart(data_arr[jj]);
363: VecRestoreArray(mpimat->lvec, &data_arr);
364: }
365: VecDestroy(&tmp_crds);
366: *a_data_out = datas;
367: return(0);
368: }
370: PetscErrorCode PCGAMGHashTableCreate(PetscInt a_size, PCGAMGHashTable *a_tab)
371: {
373: PetscInt kk;
376: a_tab->size = a_size;
377: PetscMalloc2(a_size, &a_tab->table,a_size, &a_tab->data);
378: for (kk=0; kk<a_size; kk++) a_tab->table[kk] = -1;
379: return(0);
380: }
382: PetscErrorCode PCGAMGHashTableDestroy(PCGAMGHashTable *a_tab)
383: {
387: PetscFree2(a_tab->table,a_tab->data);
388: return(0);
389: }
391: PetscErrorCode PCGAMGHashTableAdd(PCGAMGHashTable *a_tab, PetscInt a_key, PetscInt a_data)
392: {
393: PetscInt kk,idx;
396: if (a_key<0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Negative key %D.",a_key);
397: for (kk = 0, idx = GAMG_HASH(a_key); kk < a_tab->size; kk++, idx = (idx==(a_tab->size-1)) ? 0 : idx + 1) {
398: if (a_tab->table[idx] == a_key) {
399: /* exists */
400: a_tab->data[idx] = a_data;
401: break;
402: } else if (a_tab->table[idx] == -1) {
403: /* add */
404: a_tab->table[idx] = a_key;
405: a_tab->data[idx] = a_data;
406: break;
407: }
408: }
409: if (kk==a_tab->size) {
410: /* this is not to efficient, waiting until completely full */
411: PetscInt oldsize = a_tab->size, new_size = 2*a_tab->size + 5, *oldtable = a_tab->table, *olddata = a_tab->data;
414: a_tab->size = new_size;
415: PetscMalloc2(a_tab->size, &a_tab->table,a_tab->size, &a_tab->data);
416: for (kk=0;kk<a_tab->size;kk++) a_tab->table[kk] = -1;
417: for (kk=0;kk<oldsize;kk++) {
418: if (oldtable[kk] != -1) {
419: PCGAMGHashTableAdd(a_tab, oldtable[kk], olddata[kk]);
420: }
421: }
422: PetscFree2(oldtable,olddata);
423: PCGAMGHashTableAdd(a_tab, a_key, a_data);
424: }
425: return(0);
426: }