2: static char help[] = "Solves a nonlinear system in parallel with a user-defined Newton method.\n\
3: Uses KSP to solve the linearized Newton sytems. This solver\n\
4: is a very simplistic inexact Newton method. The intent of this code is to\n\
5: demonstrate the repeated solution of linear sytems with the same nonzero pattern.\n\
6: \n\
7: This is NOT the recommended approach for solving nonlinear problems with PETSc!\n\
8: We urge users to employ the SNES component for solving nonlinear problems whenever\n\
9: possible, as it offers many advantages over coding nonlinear solvers independently.\n\
10: \n\
11: We solve the Bratu (SFI - solid fuel ignition) problem in a 2D rectangular\n\
12: domain, using distributed arrays (DMDAs) to partition the parallel grid.\n\
13: The command line options include:\n\
14: -par <parameter>, where <parameter> indicates the problem's nonlinearity\n\
15: problem SFI: <parameter> = Bratu parameter (0 <= par <= 6.81)\n\
16: -mx <xg>, where <xg> = number of grid points in the x-direction\n\
17: -my <yg>, where <yg> = number of grid points in the y-direction\n\
18: -Nx <npx>, where <npx> = number of processors in the x-direction\n\
19: -Ny <npy>, where <npy> = number of processors in the y-direction\n\n";
21: /*T
22: Concepts: KSP^writing a user-defined nonlinear solver (parallel Bratu example);
23: Concepts: DMDA^using distributed arrays;
24: Processors: n
25: T*/
27: /* ------------------------------------------------------------------------
29: Solid Fuel Ignition (SFI) problem. This problem is modeled by
30: the partial differential equation
32: -Laplacian u - lambda*exp(u) = 0, 0 < x,y < 1,
34: with boundary conditions
36: u = 0 for x = 0, x = 1, y = 0, y = 1.
38: A finite difference approximation with the usual 5-point stencil
39: is used to discretize the boundary value problem to obtain a nonlinear
40: system of equations.
42: The SNES version of this problem is: snes/examples/tutorials/ex5.c
43: We urge users to employ the SNES component for solving nonlinear
44: problems whenever possible, as it offers many advantages over coding
45: nonlinear solvers independently.
47: ------------------------------------------------------------------------- */
49: /*
50: Include "petscdmda.h" so that we can use distributed arrays (DMDAs).
51: Include "petscksp.h" so that we can use KSP solvers. Note that this
52: file automatically includes:
53: petscsys.h - base PETSc routines petscvec.h - vectors
54: petscmat.h - matrices
55: petscis.h - index sets petscksp.h - Krylov subspace methods
56: petscviewer.h - viewers petscpc.h - preconditioners
57: */
58: #include <petscdmda.h>
59: #include <petscksp.h>
61: /*
62: User-defined application context - contains data needed by the
63: application-provided call-back routines, ComputeJacobian() and
64: ComputeFunction().
65: */
66: typedef struct {
67: PetscReal param; /* test problem parameter */
68: PetscInt mx,my; /* discretization in x,y directions */
69: Vec localX,localF; /* ghosted local vector */
70: DM da; /* distributed array data structure */
71: PetscInt rank; /* processor rank */
72: } AppCtx;
74: /*
75: User-defined routines
76: */
77: extern PetscErrorCode ComputeFunction(AppCtx*,Vec,Vec),FormInitialGuess(AppCtx*,Vec);
78: extern PetscErrorCode ComputeJacobian(AppCtx*,Vec,Mat,MatStructure*);
82: int main(int argc,char **argv) 83: {
84: /* -------------- Data to define application problem ---------------- */
85: MPI_Comm comm; /* communicator */
86: KSP ksp; /* linear solver */
87: Vec X,Y,F; /* solution, update, residual vectors */
88: Mat J; /* Jacobian matrix */
89: AppCtx user; /* user-defined work context */
90: PetscInt Nx,Ny; /* number of preocessors in x- and y- directions */
91: PetscMPIInt size; /* number of processors */
92: PetscReal bratu_lambda_max = 6.81,bratu_lambda_min = 0.;
93: PetscInt m,N;
96: /* --------------- Data to define nonlinear solver -------------- */
97: PetscReal rtol = 1.e-8; /* relative convergence tolerance */
98: PetscReal xtol = 1.e-8; /* step convergence tolerance */
99: PetscReal ttol; /* convergence tolerance */
100: PetscReal fnorm,ynorm,xnorm; /* various vector norms */
101: PetscInt max_nonlin_its = 10; /* maximum number of iterations for nonlinear solver */
102: PetscInt max_functions = 50; /* maximum number of function evaluations */
103: PetscInt lin_its; /* number of linear solver iterations for each step */
104: PetscInt i; /* nonlinear solve iteration number */
105: MatStructure mat_flag; /* flag indicating structure of preconditioner matrix */
106: PetscBool no_output = PETSC_FALSE; /* flag indicating whether to surpress output */
108: PetscInitialize(&argc,&argv,(char*)0,help);
109: comm = PETSC_COMM_WORLD;
110: MPI_Comm_rank(comm,&user.rank);
111: PetscOptionsGetBool(NULL,"-no_output",&no_output,NULL);
113: /*
114: Initialize problem parameters
115: */
116: user.mx = 4; user.my = 4; user.param = 6.0;
118: PetscOptionsGetInt(NULL,"-mx",&user.mx,NULL);
119: PetscOptionsGetInt(NULL,"-my",&user.my,NULL);
120: PetscOptionsGetReal(NULL,"-par",&user.param,NULL);
121: if (user.param >= bratu_lambda_max || user.param <= bratu_lambda_min) SETERRQ(PETSC_COMM_WORLD,1,"Lambda is out of range");
122: N = user.mx*user.my;
124: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
125: Create linear solver context
126: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
128: KSPCreate(comm,&ksp);
130: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
131: Create vector data structures
132: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
134: /*
135: Create distributed array (DMDA) to manage parallel grid and vectors
136: */
137: MPI_Comm_size(comm,&size);
138: Nx = PETSC_DECIDE; Ny = PETSC_DECIDE;
139: PetscOptionsGetInt(NULL,"-Nx",&Nx,NULL);
140: PetscOptionsGetInt(NULL,"-Ny",&Ny,NULL);
141: if (Nx*Ny != size && (Nx != PETSC_DECIDE || Ny != PETSC_DECIDE)) SETERRQ(PETSC_COMM_WORLD,1,"Incompatible number of processors: Nx * Ny != size");
142: DMDACreate2d(PETSC_COMM_WORLD,DMDA_BOUNDARY_NONE,DMDA_BOUNDARY_NONE,DMDA_STENCIL_STAR,user.mx,user.my,Nx,Ny,1,1,NULL,NULL,&user.da);
144: /*
145: Extract global and local vectors from DMDA; then duplicate for remaining
146: vectors that are the same types
147: */
148: DMCreateGlobalVector(user.da,&X);
149: DMCreateLocalVector(user.da,&user.localX);
150: VecDuplicate(X,&F);
151: VecDuplicate(X,&Y);
152: VecDuplicate(user.localX,&user.localF);
155: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
156: Create matrix data structure for Jacobian
157: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
158: /*
159: Note: For the parallel case, vectors and matrices MUST be partitioned
160: accordingly. When using distributed arrays (DMDAs) to create vectors,
161: the DMDAs determine the problem partitioning. We must explicitly
162: specify the local matrix dimensions upon its creation for compatibility
163: with the vector distribution. Thus, the generic MatCreate() routine
164: is NOT sufficient when working with distributed arrays.
166: Note: Here we only approximately preallocate storage space for the
167: Jacobian. See the users manual for a discussion of better techniques
168: for preallocating matrix memory.
169: */
170: if (size == 1) {
171: MatCreateSeqAIJ(comm,N,N,5,NULL,&J);
172: } else {
173: VecGetLocalSize(X,&m);
174: MatCreateAIJ(comm,m,m,N,N,5,NULL,3,NULL,&J);
175: }
177: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
178: Customize linear solver; set runtime options
179: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
181: /*
182: Set runtime options (e.g.,-ksp_monitor -ksp_rtol <rtol> -ksp_type <type>)
183: */
184: KSPSetFromOptions(ksp);
186: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
187: Evaluate initial guess
188: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
190: FormInitialGuess(&user,X);
191: ComputeFunction(&user,X,F); /* Compute F(X) */
192: VecNorm(F,NORM_2,&fnorm); /* fnorm = || F || */
193: ttol = fnorm*rtol;
194: if (!no_output) PetscPrintf(comm,"Initial function norm = %G\n",fnorm);
196: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
197: Solve nonlinear system with a user-defined method
198: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
200: /*
201: This solver is a very simplistic inexact Newton method, with no
202: no damping strategies or bells and whistles. The intent of this code
203: is merely to demonstrate the repeated solution with KSP of linear
204: sytems with the same nonzero structure.
206: This is NOT the recommended approach for solving nonlinear problems
207: with PETSc! We urge users to employ the SNES component for solving
208: nonlinear problems whenever possible with application codes, as it
209: offers many advantages over coding nonlinear solvers independently.
210: */
212: for (i=0; i<max_nonlin_its; i++) {
214: /*
215: Compute the Jacobian matrix. See the comments in this routine for
216: important information about setting the flag mat_flag.
217: */
218: ComputeJacobian(&user,X,J,&mat_flag);
220: /*
221: Solve J Y = F, where J is the Jacobian matrix.
222: - First, set the KSP linear operators. Here the matrix that
223: defines the linear system also serves as the preconditioning
224: matrix.
225: - Then solve the Newton system.
226: */
227: KSPSetOperators(ksp,J,J,mat_flag);
228: KSPSolve(ksp,F,Y);
229: KSPGetIterationNumber(ksp,&lin_its);
231: /*
232: Compute updated iterate
233: */
234: VecNorm(Y,NORM_2,&ynorm); /* ynorm = || Y || */
235: VecAYPX(Y,-1.0,X); /* Y <- X - Y */
236: VecCopy(Y,X); /* X <- Y */
237: VecNorm(X,NORM_2,&xnorm); /* xnorm = || X || */
238: if (!no_output) {
239: PetscPrintf(comm," linear solve iterations = %D, xnorm=%G, ynorm=%G\n",lin_its,xnorm,ynorm);
240: }
242: /*
243: Evaluate new nonlinear function
244: */
245: ComputeFunction(&user,X,F); /* Compute F(X) */
246: VecNorm(F,NORM_2,&fnorm); /* fnorm = || F || */
247: if (!no_output) {
248: PetscPrintf(comm,"Iteration %D, function norm = %G\n",i+1,fnorm);
249: }
251: /*
252: Test for convergence
253: */
254: if (fnorm <= ttol) {
255: if (!no_output) {
256: PetscPrintf(comm,"Converged due to function norm %G < %G (relative tolerance)\n",fnorm,ttol);
257: }
258: break;
259: }
260: if (ynorm < xtol*(xnorm)) {
261: if (!no_output) {
262: PetscPrintf(comm,"Converged due to small update length: %G < %G * %G\n",ynorm,xtol,xnorm);
263: }
264: break;
265: }
266: if (i > max_functions) {
267: if (!no_output) {
268: PetscPrintf(comm,"Exceeded maximum number of function evaluations: %D > %D\n",i,max_functions);
269: }
270: break;
271: }
272: }
273: PetscPrintf(comm,"Number of SNES iterations = %D\n",i+1);
275: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
276: Free work space. All PETSc objects should be destroyed when they
277: are no longer needed.
278: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
280: MatDestroy(&J); VecDestroy(&Y);
281: VecDestroy(&user.localX); VecDestroy(&X);
282: VecDestroy(&user.localF); VecDestroy(&F);
283: KSPDestroy(&ksp); DMDestroy(&user.da);
284: PetscFinalize();
286: return 0;
287: }
288: /* ------------------------------------------------------------------- */
291: /*
292: FormInitialGuess - Forms initial approximation.
294: Input Parameters:
295: user - user-defined application context
296: X - vector
298: Output Parameter:
299: X - vector
300: */
301: PetscErrorCode FormInitialGuess(AppCtx *user,Vec X)302: {
303: PetscInt i,j,row,mx,my,ierr,xs,ys,xm,ym,gxm,gym,gxs,gys;
304: PetscReal one = 1.0,lambda,temp1,temp,hx,hy;
305: PetscScalar *x;
306: Vec localX = user->localX;
308: mx = user->mx; my = user->my; lambda = user->param;
309: hx = one/(PetscReal)(mx-1); hy = one/(PetscReal)(my-1);
310: temp1 = lambda/(lambda + one);
312: /*
313: Get a pointer to vector data.
314: - For default PETSc vectors, VecGetArray() returns a pointer to
315: the data array. Otherwise, the routine is implementation dependent.
316: - You MUST call VecRestoreArray() when you no longer need access to
317: the array.
318: */
319: VecGetArray(localX,&x);
321: /*
322: Get local grid boundaries (for 2-dimensional DMDA):
323: xs, ys - starting grid indices (no ghost points)
324: xm, ym - widths of local grid (no ghost points)
325: gxs, gys - starting grid indices (including ghost points)
326: gxm, gym - widths of local grid (including ghost points)
327: */
328: DMDAGetCorners(user->da,&xs,&ys,NULL,&xm,&ym,NULL);
329: DMDAGetGhostCorners(user->da,&gxs,&gys,NULL,&gxm,&gym,NULL);
331: /*
332: Compute initial guess over the locally owned part of the grid
333: */
334: for (j=ys; j<ys+ym; j++) {
335: temp = (PetscReal)(PetscMin(j,my-j-1))*hy;
336: for (i=xs; i<xs+xm; i++) {
337: row = i - gxs + (j - gys)*gxm;
338: if (i == 0 || j == 0 || i == mx-1 || j == my-1) {
339: x[row] = 0.0;
340: continue;
341: }
342: x[row] = temp1*PetscSqrtScalar(PetscMin((PetscReal)(PetscMin(i,mx-i-1))*hx,temp));
343: }
344: }
346: /*
347: Restore vector
348: */
349: VecRestoreArray(localX,&x);
351: /*
352: Insert values into global vector
353: */
354: DMLocalToGlobalBegin(user->da,localX,INSERT_VALUES,X);
355: DMLocalToGlobalEnd(user->da,localX,INSERT_VALUES,X);
356: return 0;
357: }
358: /* ------------------------------------------------------------------- */
361: /*
362: ComputeFunction - Evaluates nonlinear function, F(x).
364: Input Parameters:
365: . X - input vector
366: . user - user-defined application context
368: Output Parameter:
369: . F - function vector
370: */
371: PetscErrorCode ComputeFunction(AppCtx *user,Vec X,Vec F)372: {
374: PetscInt i,j,row,mx,my,xs,ys,xm,ym,gxs,gys,gxm,gym;
375: PetscReal two = 2.0,one = 1.0,lambda,hx,hy,hxdhy,hydhx,sc;
376: PetscScalar u,uxx,uyy,*x,*f;
377: Vec localX = user->localX,localF = user->localF;
379: mx = user->mx; my = user->my; lambda = user->param;
380: hx = one/(PetscReal)(mx-1); hy = one/(PetscReal)(my-1);
381: sc = hx*hy*lambda; hxdhy = hx/hy; hydhx = hy/hx;
383: /*
384: Scatter ghost points to local vector, using the 2-step process
385: DMGlobalToLocalBegin(), DMGlobalToLocalEnd().
386: By placing code between these two statements, computations can be
387: done while messages are in transition.
388: */
389: DMGlobalToLocalBegin(user->da,X,INSERT_VALUES,localX);
390: DMGlobalToLocalEnd(user->da,X,INSERT_VALUES,localX);
392: /*
393: Get pointers to vector data
394: */
395: VecGetArray(localX,&x);
396: VecGetArray(localF,&f);
398: /*
399: Get local grid boundaries
400: */
401: DMDAGetCorners(user->da,&xs,&ys,NULL,&xm,&ym,NULL);
402: DMDAGetGhostCorners(user->da,&gxs,&gys,NULL,&gxm,&gym,NULL);
404: /*
405: Compute function over the locally owned part of the grid
406: */
407: for (j=ys; j<ys+ym; j++) {
408: row = (j - gys)*gxm + xs - gxs - 1;
409: for (i=xs; i<xs+xm; i++) {
410: row++;
411: if (i == 0 || j == 0 || i == mx-1 || j == my-1) {
412: f[row] = x[row];
413: continue;
414: }
415: u = x[row];
416: uxx = (two*u - x[row-1] - x[row+1])*hydhx;
417: uyy = (two*u - x[row-gxm] - x[row+gxm])*hxdhy;
418: f[row] = uxx + uyy - sc*PetscExpScalar(u);
419: }
420: }
422: /*
423: Restore vectors
424: */
425: VecRestoreArray(localX,&x);
426: VecRestoreArray(localF,&f);
428: /*
429: Insert values into global vector
430: */
431: DMLocalToGlobalBegin(user->da,localF,INSERT_VALUES,F);
432: DMLocalToGlobalEnd(user->da,localF,INSERT_VALUES,F);
433: PetscLogFlops(11.0*ym*xm);
434: return 0;
435: }
436: /* ------------------------------------------------------------------- */
439: /*
440: ComputeJacobian - Evaluates Jacobian matrix.
442: Input Parameters:
443: . x - input vector
444: . user - user-defined application context
446: Output Parameters:
447: . jac - Jacobian matrix
448: . flag - flag indicating matrix structure
450: Notes:
451: Due to grid point reordering with DMDAs, we must always work
452: with the local grid points, and then transform them to the new
453: global numbering with the "ltog" mapping (via DMDAGetGlobalIndices()).
454: We cannot work directly with the global numbers for the original
455: uniprocessor grid!
456: */
457: PetscErrorCode ComputeJacobian(AppCtx *user,Vec X,Mat jac,MatStructure *flag)458: {
460: Vec localX = user->localX; /* local vector */
461: PetscInt *ltog; /* local-to-global mapping */
462: PetscInt i,j,row,mx,my,col[5];
463: PetscInt nloc,xs,ys,xm,ym,gxs,gys,gxm,gym,grow;
464: PetscScalar two = 2.0,one = 1.0,lambda,v[5],hx,hy,hxdhy,hydhx,sc,*x;
466: mx = user->mx; my = user->my; lambda = user->param;
467: hx = one/(PetscReal)(mx-1); hy = one/(PetscReal)(my-1);
468: sc = hx*hy; hxdhy = hx/hy; hydhx = hy/hx;
470: /*
471: Scatter ghost points to local vector, using the 2-step process
472: DMGlobalToLocalBegin(), DMGlobalToLocalEnd().
473: By placing code between these two statements, computations can be
474: done while messages are in transition.
475: */
476: DMGlobalToLocalBegin(user->da,X,INSERT_VALUES,localX);
477: DMGlobalToLocalEnd(user->da,X,INSERT_VALUES,localX);
479: /*
480: Get pointer to vector data
481: */
482: VecGetArray(localX,&x);
484: /*
485: Get local grid boundaries
486: */
487: DMDAGetCorners(user->da,&xs,&ys,NULL,&xm,&ym,NULL);
488: DMDAGetGhostCorners(user->da,&gxs,&gys,NULL,&gxm,&gym,NULL);
490: /*
491: Get the global node numbers for all local nodes, including ghost points
492: */
493: DMDAGetGlobalIndices(user->da,&nloc,<og);
495: /*
496: Compute entries for the locally owned part of the Jacobian.
497: - Currently, all PETSc parallel matrix formats are partitioned by
498: contiguous chunks of rows across the processors. The "grow"
499: parameter computed below specifies the global row number
500: corresponding to each local grid point.
501: - Each processor needs to insert only elements that it owns
502: locally (but any non-local elements will be sent to the
503: appropriate processor during matrix assembly).
504: - Always specify global row and columns of matrix entries.
505: - Here, we set all entries for a particular row at once.
506: */
507: for (j=ys; j<ys+ym; j++) {
508: row = (j - gys)*gxm + xs - gxs - 1;
509: for (i=xs; i<xs+xm; i++) {
510: row++;
511: grow = ltog[row];
512: /* boundary points */
513: if (i == 0 || j == 0 || i == mx-1 || j == my-1) {
514: MatSetValues(jac,1,&grow,1,&grow,&one,INSERT_VALUES);
515: continue;
516: }
517: /* interior grid points */
518: v[0] = -hxdhy; col[0] = ltog[row - gxm];
519: v[1] = -hydhx; col[1] = ltog[row - 1];
520: v[2] = two*(hydhx + hxdhy) - sc*lambda*PetscExpScalar(x[row]); col[2] = grow;
521: v[3] = -hydhx; col[3] = ltog[row + 1];
522: v[4] = -hxdhy; col[4] = ltog[row + gxm];
523: MatSetValues(jac,1,&grow,5,col,v,INSERT_VALUES);
524: }
525: }
527: /*
528: Assemble matrix, using the 2-step process:
529: MatAssemblyBegin(), MatAssemblyEnd().
530: By placing code between these two statements, computations can be
531: done while messages are in transition.
532: */
533: MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);
534: VecRestoreArray(localX,&x);
535: MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);
537: /*
538: Set flag to indicate that the Jacobian matrix retains an identical
539: nonzero structure throughout all nonlinear iterations (although the
540: values of the entries change). Thus, we can save some work in setting
541: up the preconditioner (e.g., no need to redo symbolic factorization for
542: ILU/ICC preconditioners).
543: - If the nonzero structure of the matrix is different during
544: successive linear solves, then the flag DIFFERENT_NONZERO_PATTERN
545: must be used instead. If you are unsure whether the matrix
546: structure has changed or not, use the flag DIFFERENT_NONZERO_PATTERN.
547: - Caution: If you specify SAME_NONZERO_PATTERN, PETSc
548: believes your assertion and does not check the structure
549: of the matrix. If you erroneously claim that the structure
550: is the same when it actually is not, the new preconditioner
551: will not function correctly. Thus, use this optimization
552: feature with caution!
553: */
554: *flag = SAME_NONZERO_PATTERN;
555: return 0;
556: }