Rheolef  7.2
an efficient C++ finite element environment
solver_pastix_mpi.cc
Go to the documentation of this file.
1 // direct solver based on pastix
22 //
23 #include "rheolef/config.h"
24 #if defined(_RHEOLEF_HAVE_PASTIX) && defined(_RHEOLEF_HAVE_MPI)
25 #include "solver_pastix.h"
26 #include "rheolef/cg.h"
27 #include "rheolef/eye.h"
28 
29 namespace rheolef {
30 using namespace std;
31 
32 template<class T>
33 solver_pastix_rep<T,distributed>::solver_pastix_rep (const csr<T>& a, const solver_option& opt)
34  : base(),
35  _comm(),
36  _i2dis_i_base(),
37  _new_n(0),
38  _new_ptr(0),
39  _new_idx(0),
40  _new_val(0)
41 {
42 warning_macro ("call load...");
43  load (a, opt);
44 warning_macro ("return load...");
45 }
46 template<class T>
47 void
48 solver_pastix_rep<T,distributed>::load (const csr<T>& a, const solver_option& opt)
49 {
50 warning_macro ("load...");
51  base::_is_sym = a.is_symmetric();
52  base::_pattern_dimension = a.pattern_dimension();
53  _comm = a.row_ownership().comm();
54  base::_csr_row_ownership = a.row_ownership();
55  base::_csr_col_ownership = a.col_ownership();
56  base::_opt = opt;
57 
58  check_macro (a.nrow() == a.ncol(), "factorization: only square matrix are supported");
59 
60  // there is a bug in pastix related to "np < a.dis_nrow" : use an alternative solver...
61  if (a.nrow() == 0) {
62  base::_have_pastix_bug_small_matrix = true;
63  }
64  // ask if other procs have a bug ?
65  base::_have_pastix_bug_small_matrix = mpi::all_reduce (comm(), base::_have_pastix_bug_small_matrix, mpi::maximum<bool>());
66  if (base::_have_pastix_bug_small_matrix) {
67  // one of the procs at least have a too small matrix for pastix !
68  base::_a_when_bug = a;
69  return;
70  }
71  if (base::_is_sym) {
72  load_symmetric(a);
73  } else {
74  load_unsymmetric(a);
75  }
76  if (base::_opt.do_check) {
77  check ();
78  }
79  symbolic_factorization ();
80  numeric_factorization();
81 warning_macro ("load done");
82 }
83 template<class T>
84 void
85 solver_pastix_rep<T,distributed>::load_unsymmetric (const csr<T>& a)
86 {
87  size_t n = a.nrow();
88  size_t nnz = a.nnz() + a.ext_nnz();
89  resize (n, nnz);
90  load_both_continued (a);
91 }
92 // compute how many a.dia (dis_i,dis_j) have dis_i > dis_j
93 template<class T>
94 static
95 size_t
96 compute_csr_upper_dia_nnz (const csr<T>& a)
97 {
98  size_t csr_upper_dia_nnz = 0;
99  size_t first_dis_i = a.row_ownership().first_index();
100  size_t first_dis_j = a.col_ownership().first_index();
101  for (size_t i = 0; i < a.nrow(); i++) {
102  size_t dis_i = first_dis_i + i;
103  typename csr<T>::const_iterator ia = a.begin();
104  for (typename csr<T>::const_data_iterator ap = ia[i]; ap < ia[i+1]; ap++) {
105  size_t j = (*ap).first;
106  size_t dis_j = first_dis_j + j;
107  if (dis_i <= dis_j) csr_upper_dia_nnz++;
108  }
109  }
110  return csr_upper_dia_nnz;
111 }
112 // compute how many a.ext (dis_i,pa_j) have dis_i > dis_j
113 template<class T>
114 static
115 size_t
116 compute_csr_upper_ext_nnz (const csr<T>& a)
117 {
118  size_t csr_upper_ext_nnz = 0;
119  size_t first_dis_i = a.row_ownership().first_index();
120  size_t first_dis_j = a.col_ownership().first_index();
121  for (size_t i = 0; i < a.nrow(); i++) {
122  size_t dis_i = first_dis_i + i;
123  typename csr<T>::const_iterator ext_ia = a.ext_begin();
124  for (typename csr<T>::const_data_iterator ap = ext_ia[i]; ap < ext_ia[i+1]; ap++) {
125  const size_t& jext = (*ap).first;
126  size_t dis_j = a.jext2dis_j (jext);
127  if (dis_i <= dis_j) csr_upper_ext_nnz++;
128  }
129  }
130  return csr_upper_ext_nnz;
131 }
132 template<class T>
133 void
134 solver_pastix_rep<T,distributed>::load_symmetric (const csr<T>& a)
135 {
136  // conserve only the lower part of the csc(pastix) = the upper past of the csr(rheolef)
137  size_t n = a.nrow();
138  size_t csr_upper_dia_nnz = compute_csr_upper_dia_nnz(a);
139  size_t csr_upper_ext_nnz = compute_csr_upper_ext_nnz(a);
140  size_t nnz = csr_upper_dia_nnz + csr_upper_ext_nnz;
141  resize (n, nnz);
142  load_both_continued (a);
143 }
144 template<class T>
145 void
146 solver_pastix_rep<T,distributed>::load_both_continued (const csr<T>& a)
147 {
148  size_t first_dis_i = a.row_ownership().first_index();
149  size_t first_dis_j = a.col_ownership().first_index();
150  typename csr<T>::const_iterator aptr = a.begin();
151  pastix_int_t bp = 0;
152  base::_ptr [0] = bp + base::_base;
153  for (size_t i = 0; i < a.nrow(); i++) {
154  size_t dis_i = first_dis_i + i;
155  _i2dis_i_base [i] = dis_i + base::_base;
156  for (typename csr<T>::const_data_iterator ap = aptr[i]; ap < aptr[i+1]; ap++) {
157  size_t j = (*ap).first;
158  const T& val = (*ap).second;
159  size_t dis_j = first_dis_j + j;
160  if (base::_is_sym && dis_i > dis_j) continue;
161  base::_val [bp] = val;
162  base::_idx [bp] = dis_j + base::_base;
163  bp++;
164  }
165  // note: pas tries par j croissant : dans a.ext, il i a des dis_j < first_dis_j
166  typename csr<T>::const_iterator ext_ia = a.ext_begin();
167  for (typename csr<T>::const_data_iterator ap = ext_ia[i]; ap < ext_ia[i+1]; ap++) {
168  size_t jext = (*ap).first;
169  const T& val = (*ap).second;
170  size_t dis_j = a.jext2dis_j (jext);
171  if (base::_is_sym && dis_i > dis_j) continue;
172  base::_val [bp] = val;
173  base::_idx [bp] = dis_j + base::_base;
174  bp++;
175  }
176  base::_ptr [i+1] = bp + base::_base;
177  }
178  check_macro (bp == base::_nnz, "factorization: invalid count: nnz="<<base::_nnz<<", count="<<bp);
179 }
180 template<class T>
181 void
182 solver_pastix_rep<T,distributed>::update_values (const csr<T>& a)
183 {
184  if (base::_have_pastix_bug_small_matrix) {
185  base::_a_when_bug = a;
186  return;
187  }
188  check_macro (size_t(base::_n) == a.nrow() && size_t(base::_n) == a.ncol(),
189  "update: local input matrix size distribution mismatch: ("<<a.nrow()<<","<<a.ncol()<<"), expect ("
190  << base::_n << "," << base::_n << ")");
191  size_t nnz_a;
192  if (!base::_is_sym) {
193  nnz_a = a.nnz() + a.ext_nnz();
194  } else {
195  // conserve only the lower part of the csc(pastix) = the upper past of the csr(rheolef)
196  size_t csr_upper_dia_nnz = compute_csr_upper_dia_nnz(a);
197  size_t csr_upper_ext_nnz = compute_csr_upper_ext_nnz(a);
198  nnz_a = csr_upper_dia_nnz + csr_upper_ext_nnz;
199  }
200  check_macro (size_t(base::_nnz) == nnz_a,
201  "update: local input matrix nnz distribution mismatch: nnz="<<nnz_a<<", expect nnz="<<base::_nnz);
202 
203  size_t first_dis_i = a.row_ownership().first_index();
204  size_t first_dis_j = a.col_ownership().first_index();
205  pastix_int_t bp = 0;
206  typename csr<T>::const_iterator aptr = a.begin();
207  for (size_t i = 0; i < a.nrow(); i++) {
208  size_t dis_i = first_dis_i + i;
209  for (typename csr<T>::const_data_iterator ap = aptr[i]; ap < aptr[i+1]; ap++) {
210  size_t j = (*ap).first;
211  size_t dis_j = first_dis_j + j;
212  if (base::_is_sym && dis_i > dis_j) continue;
213  base::_val [bp] = (*ap).second;
214  bp++;
215  }
216  typename csr<T>::const_iterator ext_ia = a.ext_begin();
217  for (typename csr<T>::const_data_iterator ap = ext_ia[i]; ap < ext_ia[i+1]; ap++) {
218  size_t jext = (*ap).first;
219  size_t dis_j = a.jext2dis_j (jext);
220  if (base::_is_sym && dis_i > dis_j) continue;
221  base::_val [bp] = (*ap).second;
222  bp++;
223  }
224  }
225  numeric_factorization();
226 }
227 template<class T>
228 void
229 solver_pastix_rep<T,distributed>::resize (pastix_int_t n, pastix_int_t nnz)
230 {
231  base::_n = n;
232  base::_nnz = nnz;
233  base::_ptr.resize(n+1);
234  base::_idx.resize(nnz);
235  base::_val.resize(nnz);
236  _i2dis_i_base.resize(n);
237 }
238 template<class T>
239 void
241 {
248  pastix_int_t symmetry = (is_symmetric() ? API_SYM_YES : API_SYM_NO);
249  pastix_int_t *ptr_begin = (pastix_int_t*) base::_ptr.begin().operator->();
250  pastix_int_t *idx_begin = (pastix_int_t*) base::_idx.begin().operator->();
251  T *val_begin = (T*) base::_val.begin().operator->();
252  pastix_int_t *i2dis_i_base_begin = (pastix_int_t*) _i2dis_i_base.begin().operator->();
253  pastix_int_t status
254  = d_pastix_checkMatrix(_comm, base::_opt.verbose_level, symmetry, API_YES,
255  base::_n, &ptr_begin, &idx_begin, &val_begin, &i2dis_i_base_begin,
256  1);
257  check_macro (status == 0, "pastix check returns error status = " << status);
258 }
259 template<class T>
260 void
261 solver_pastix_rep<T,distributed>::symbolic_factorization ()
262 {
263  if (base::_have_pastix_bug_small_matrix) {
264  return;
265  }
266  const pastix_int_t nbthread = 1; // threads are not yet very well supported with openmpi & scotch
267  const pastix_int_t ordering = 0; // scotch
268 
269  // tasks :
270  // 0. set params to default values
271  base::_iparm[IPARM_START_TASK] = API_TASK_INIT;
272  base::_iparm[IPARM_END_TASK] = API_TASK_INIT;
273  base::_iparm[IPARM_MODIFY_PARAMETER] = API_NO;
274  d_dpastix (base::pp_data(),
275  _comm,
276  base::_n,
277  base::_ptr.begin().operator->(),
278  base::_idx.begin().operator->(),
279  NULL, // _val.begin().operator->(),
280  _i2dis_i_base.begin().operator->(),
281  NULL,
282  NULL,
283  NULL,
284  1,
285  base::_iparm,
286  base::_dparm);
287 
288  // Customize some parameters
289  base::_iparm[IPARM_THREAD_NBR] = nbthread;
290  if (is_symmetric()) {
291  base::_iparm[IPARM_SYM] = API_SYM_YES;
292  base::_iparm[IPARM_FACTORIZATION] = API_FACT_LDLT;
293  } else {
294  base::_iparm[IPARM_SYM] = API_SYM_NO;
295  base::_iparm[IPARM_FACTORIZATION] = API_FACT_LU;
296  }
297  base::_iparm[IPARM_MATRIX_VERIFICATION] = API_NO;
298  base::_iparm[IPARM_VERBOSE] = base::_opt.verbose_level;
299  base::_iparm[IPARM_ORDERING] = ordering;
300  bool do_incomplete;
301  if (base::_opt.iterative != solver_option::decide) {
302  do_incomplete = (base::_opt.iterative != 0);
303  } else {
304  do_incomplete = (base::_pattern_dimension > 2); // 3d-pattern => iterative & IC(k) precond
305  }
306  base::_iparm[IPARM_INCOMPLETE] = (do_incomplete ? 1 : 0);
307  base::_iparm[IPARM_OOC_LIMIT] = base::_opt.ooc;
308  if (base::_opt.iterative == 1) {
309  base::_dparm[DPARM_EPSILON_REFINEMENT] = base::_opt.tol;
310  }
311  base::_iparm[IPARM_LEVEL_OF_FILL] = base::_opt.level_of_fill;
312  base::_iparm[IPARM_AMALGAMATION_LEVEL] = base::_opt.amalgamation;
313  base::_iparm[IPARM_RHS_MAKING] = API_RHS_B;
314 
315  // 1) get the i2new_dis_i array: its indexes are in the [0:dis_n[ range
316  // i2new_dis_i[i] = i2new_dis_i [i]
317  base::_i2new_dis_i.resize (base::_n);
318 
319  pastix_int_t itmp = 0; // pastix does not manage NULL pointers: when n=0, vector::begin() retuns a NULL...
320  T vtmp = 0; // pastix does not manage NULL pointers: when n=0, vector::begin() retuns a NULL...
321 
322  // tasks :
323  // 1. ordering
324  // 2. symbolic factorization
325  // 3. tasks mapping and scheduling
326  base::_iparm[IPARM_START_TASK] = API_TASK_ORDERING;
327  base::_iparm[IPARM_END_TASK] = API_TASK_ANALYSE;
328 
329  d_dpastix (base::pp_data(),
330  _comm,
331  base::_n,
332  base::_ptr.begin().operator->(),
333  (base::_idx.begin().operator->() != NULL) ? base::_idx.begin().operator->() : &itmp,
334  NULL,
335  (_i2dis_i_base.begin().operator->() != NULL) ? _i2dis_i_base.begin().operator->() : &itmp,
336  (base::_i2new_dis_i.begin().operator->() != NULL) ? base::_i2new_dis_i.begin().operator->() : &itmp,
337  NULL,
338  NULL,
339  1,
340  base::_iparm,
341  base::_dparm);
342 
343  _new_n = d_pastix_getLocalNodeNbr (base::pp_data());
344 
345  // 2) buil new local to global column correspondance
346  // new_i2dis_i_base[new_i] - base = new_i2dis_i [new_i] with a base=1
347  base::_new_i2dis_i_base.resize (_new_n);
348  d_pastix_getLocalNodeLst (base::pp_data(), base::_new_i2dis_i_base.begin().operator->());
349 }
350 template<class T>
351 void
352 solver_pastix_rep<T,distributed>::numeric_factorization ()
353 {
354  if (base::_have_pastix_bug_small_matrix) {
355  trace_macro ("num_fact: bug, circumvent !");
356  return;
357  }
358  // 3) redistributing the matrix in the new numbering
359  pastix_int_t status2 = d_cscd_redispatch (
360  base::_n,
361  base::_ptr.begin().operator->(),
362  base::_idx.begin().operator->(),
363  base::_val.begin().operator->(),
364  NULL,
365  _i2dis_i_base.begin().operator->(),
366  _new_n,
367  &(_new_ptr),
368  &(_new_idx),
369  &(_new_val),
370  NULL,
371  base::_new_i2dis_i_base.begin().operator->(),
372  _comm);
373  check_macro (status2 == EXIT_SUCCESS, "d_csd_redispatch failed");
374 
375  // pastix tasks:
376  // 4. numerical factorization
377  base::_iparm[IPARM_START_TASK] = API_TASK_NUMFACT;
378  base::_iparm[IPARM_END_TASK] = API_TASK_NUMFACT;
379  d_dpastix (base::pp_data(),
380  _comm,
381  _new_n,
382  _new_ptr,
383  _new_idx,
384  _new_val,
385  base::_new_i2dis_i_base.begin().operator->(),
386  base::_i2new_dis_i.begin().operator->(),
387  NULL,
388  NULL,
389  0,
390  base::_iparm,
391  base::_dparm);
392 }
393 template<class T>
394 vec<T>
395 solver_pastix_rep<T,distributed>::trans_solve (const vec<T>& rhs) const
396 {
397  if (base::_have_pastix_bug_small_matrix) {
398  error_macro ("trans_solve: bug, circumvent not yet !");
399  }
400  // ================================================================
401  // solve
402  // ================================================================
403  check_macro (rhs.size() == size_t(base::_n), "invalid rhs size="<<rhs.size()<<": expect size="<<base::_n);
404 
405  // redispatch rhs separately: formally: new_rhs [new_i] := rhs [i2new_i[i]]
406  base::_new_rhs.resize (_new_n);
407  std::set<size_t> dis_i_set;
408  std::map<size_t,size_t> dis_i2new_i;
409  size_t first_dis_i = base::_csr_row_ownership.first_index();
410  size_t last_dis_i = base::_csr_row_ownership.last_index();
411  for (pastix_int_t new_i = 0; new_i < _new_n; new_i++) {
412  size_t dis_i = base::_new_i2dis_i_base [new_i] - base::_base;
413  if (dis_i >= first_dis_i && dis_i < last_dis_i) {
414  // value is locally available
415  size_t i = dis_i - first_dis_i;
416  base::_new_rhs [new_i] = rhs [i];
417  } else {
418  // value is owned by another processor
419  dis_i_set.insert (dis_i);
420  dis_i2new_i.insert (std::make_pair(dis_i,new_i));
421  }
422  }
423  // external terms: TODO: scatter begin&end in the symbolic step
424  std::map<size_t,T> dis_i_map;
425  rhs.get_dis_entry (dis_i_set, dis_i_map);
426  for (typename map<size_t,T>::const_iterator
427  iter = dis_i_map.begin(),
428  last = dis_i_map.end(); iter != last; iter++) {
429  size_t dis_i = (*iter).first;
430  const T& val = (*iter).second;
431  size_t new_i = dis_i2new_i [dis_i]; // find
432  base::_new_rhs [new_i] = val;
433  }
434  // tasks:
435  // 5. numerical solve
436  // 6. numerical refinement
437  // 7. clean
438  T vtmp = 0; // pastix does not manage NULL rhs pointer: when new_n=0, but vector::begin() retuns a NULL...
439  base::_iparm[IPARM_START_TASK] = API_TASK_SOLVE;
440  base::_iparm[IPARM_END_TASK] = API_TASK_REFINE;
441  d_dpastix (base::pp_data(),
442  _comm,
443  _new_n,
444  _new_ptr,
445  _new_idx,
446  _new_val,
447  base::_new_i2dis_i_base.begin().operator->(),
448  base::_i2new_dis_i.begin().operator->(),
449  NULL,
450  (_new_n != 0) ? base::_new_rhs.begin().operator->() : &vtmp,
451  1,
452  base::_iparm,
453  base::_dparm);
454 
455  // new_i2dis_i_base [new_i] - base = new_i2dis_i [new_i]
456  vec<T> x (base::_csr_row_ownership);
457  for (pastix_int_t new_i = 0; new_i < _new_n; new_i++) {
458  size_t dis_i = base::_new_i2dis_i_base [new_i] - base::_base;
459  x.dis_entry (dis_i) = base::_new_rhs[new_i];
460  }
461  x.dis_entry_assembly();
462  return x;
463 }
464 template<class T>
465 vec<T>
466 solver_pastix_rep<T,distributed>::solve (const vec<T>& rhs) const
467 {
468  if (base::_have_pastix_bug_small_matrix) {
470  size_t max_iter = 1000; // TODO: use _opt to get tol & max_iter
471  vec<T> x (base::_a_when_bug.col_ownership(), 0);
472  int status = cg (base::_a_when_bug, x, rhs, eye<T>(), max_iter, tol, &derr);
473  check_macro (tol < std::numeric_limits<T>::epsilon(), "solver(cg): machine precision not reached: "<<tol);
474  return x;
475  }
476  // TODO: make a csc<T> wrapper around csr<T> and use csc<T> in form & form_assembly
477  // => avoid the transposition
478  if (! base::_is_sym) {
479  warning_macro ("solve: not yet supported for unsymmetric matrix");
480  }
481  return trans_solve (rhs);
482 }
483 template<class T>
484 solver_pastix_rep<T,distributed>::~solver_pastix_rep()
485 {
486  // tasks:
487  // 7. clean
488  base::_iparm[IPARM_START_TASK] = API_TASK_CLEAN;
489  base::_iparm[IPARM_END_TASK] = API_TASK_CLEAN;
490 
491  d_dpastix (base::pp_data(),
492  _comm,
493  _new_n,
494  _new_ptr,
495  _new_idx,
496  _new_val,
497  NULL,
498  NULL,
499  NULL,
500  base::_new_rhs.begin().operator->(),
501  1,
502  base::_iparm,
503  base::_dparm);
504 
505  // was allocated by d_cscd_redispatch()
506  if (_new_ptr != NULL) free (_new_ptr);
507  if (_new_idx != NULL) free (_new_idx);
508  if (_new_val != NULL) free (_new_val);
509  base::_pastix_data = 0;
510 }
511 // ----------------------------------------------------------------------------
512 // instanciation in library
513 // ----------------------------------------------------------------------------
514 // TODO: code is only valid here for T=double (d_pastix etc)
515 
516 template class solver_pastix_rep<double,distributed>;
517 
518 } // namespace rheolef
519 #endif // _RHEOLEF_HAVE_PASTIX
#define trace_macro(message)
Definition: dis_macros.h:111
#define error_macro(message)
Definition: dis_macros.h:49
#define warning_macro(message)
Definition: dis_macros.h:53
Expr1::float_type T
Definition: field_expr.h:230
check_macro(expr1.have_homogeneous_space(Xh1), "dual(expr1,expr2); expr1 should have homogeneous space. HINT: use dual(interpolate(Xh, expr1),expr2)")
This file is part of Rheolef.
void solve(tiny_matrix< T > &a, tiny_vector< size_t > &piv, const tiny_vector< T > &b, tiny_vector< T > &x)
Definition: tiny_lu.h:92
int cg(const Matrix &A, Vector &x, const Vector2 &Mb, const Preconditioner &M, const solver_option &sopt=solver_option())
Definition: cg.h:94
void check(Float p, const field &uh)
void load(idiststream &in, Float &p, field &uh)
Float epsilon