escript  Revision_
BlockOps.h
Go to the documentation of this file.
1 
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16 
17 #ifndef __PASO_BLOCKOPS_H__
18 #define __PASO_BLOCKOPS_H__
19 
20 #include "Paso.h"
21 #include "PasoException.h"
22 
23 #include <cstring> // memcpy
24 
25 #ifdef ESYS_HAVE_LAPACK
26  #ifdef ESYS_MKL_LAPACK
27  #include <mkl_lapack.h>
28  #include <mkl_cblas.h>
29  #else
30  extern "C" {
31  #include <clapack.h>
32  #include <cblas.h>
33  }
34  #endif
35 #endif
36 
37 namespace paso {
38 
39 inline void BlockOps_Cpy_N(dim_t N, double* R, const double* V)
40 {
41  memcpy((void*)R, (void*)V, N*sizeof(double));
42 }
43 
45 inline void BlockOps_SMV_2(double* R, const double* mat, const double* V)
46 {
47  const double S1 = V[0];
48  const double S2 = V[1];
49  const double A11 = mat[0];
50  const double A12 = mat[2];
51  const double A21 = mat[1];
52  const double A22 = mat[3];
53  R[0] -= A11 * S1 + A12 * S2;
54  R[1] -= A21 * S1 + A22 * S2;
55 }
56 
58 inline void BlockOps_SMV_3(double* R, const double* mat, const double* V)
59 {
60  const double S1 = V[0];
61  const double S2 = V[1];
62  const double S3 = V[2];
63  const double A11 = mat[0];
64  const double A21 = mat[1];
65  const double A31 = mat[2];
66  const double A12 = mat[3];
67  const double A22 = mat[4];
68  const double A32 = mat[5];
69  const double A13 = mat[6];
70  const double A23 = mat[7];
71  const double A33 = mat[8];
72  R[0] -= A11 * S1 + A12 * S2 + A13 * S3;
73  R[1] -= A21 * S1 + A22 * S2 + A23 * S3;
74  R[2] -= A31 * S1 + A32 * S2 + A33 * S3;
75 }
76 
77 #define PASO_MISSING_CLAPACK throw PasoException("You need to install a LAPACK version to enable operations on block sizes > 3.")
78 
80 inline void BlockOps_SMV_N(dim_t N, double* R, const double* mat, const double* V)
81 {
82 #ifdef ESYS_HAVE_LAPACK
83  cblas_dgemv(CblasColMajor,CblasNoTrans, N, N, -1., mat, N, V, 1, 1., R, 1);
84 #else
86 #endif
87 }
88 
89 inline void BlockOps_MV_N(dim_t N, double* R, const double* mat, const double* V)
90 {
91 #ifdef ESYS_HAVE_LAPACK
92  cblas_dgemv(CblasColMajor,CblasNoTrans, N, N, 1., mat, N, V, 1, 0., R, 1);
93 #else
95 #endif
96 }
97 
98 inline void BlockOps_invM_2(double* invA, const double* A, int* failed)
99 {
100  const double A11 = A[0];
101  const double A12 = A[2];
102  const double A21 = A[1];
103  const double A22 = A[3];
104  double D = A11*A22-A12*A21;
105  if (std::abs(D) > 0) {
106  D = 1./D;
107  invA[0] = A22*D;
108  invA[1] = -A21*D;
109  invA[2] = -A12*D;
110  invA[3] = A11*D;
111  } else {
112  *failed = 1;
113  }
114 }
115 
116 inline void BlockOps_invM_3(double* invA, const double* A, int* failed)
117 {
118  const double A11 = A[0];
119  const double A21 = A[1];
120  const double A31 = A[2];
121  const double A12 = A[3];
122  const double A22 = A[4];
123  const double A32 = A[5];
124  const double A13 = A[6];
125  const double A23 = A[7];
126  const double A33 = A[8];
127  double D = A11*(A22*A33-A23*A32) +
128  A12*(A31*A23-A21*A33) +
129  A13*(A21*A32-A31*A22);
130  if (std::abs(D) > 0) {
131  D = 1./D;
132  invA[0] = (A22*A33-A23*A32)*D;
133  invA[1] = (A31*A23-A21*A33)*D;
134  invA[2] = (A21*A32-A31*A22)*D;
135  invA[3] = (A13*A32-A12*A33)*D;
136  invA[4] = (A11*A33-A31*A13)*D;
137  invA[5] = (A12*A31-A11*A32)*D;
138  invA[6] = (A12*A23-A13*A22)*D;
139  invA[7] = (A13*A21-A11*A23)*D;
140  invA[8] = (A11*A22-A12*A21)*D;
141  } else {
142  *failed = 1;
143  }
144 }
145 
147 inline void BlockOps_invM_N(dim_t N, double* mat, index_t* pivot, int* failed)
148 {
149 #ifdef ESYS_HAVE_LAPACK
150 #ifdef ESYS_MKL_LAPACK
151  int res = 0;
152  dgetrf(&N, &N, mat, &N, pivot, &res);
153  if (res != 0)
154  *failed = 1;
155 #else
156  int res = clapack_dgetrf(CblasColMajor, N, N, mat, N, pivot);
157  if (res != 0)
158  *failed = 1;
159 #endif // ESYS_MKL_LAPACK
160 #else
162 #endif
163 }
164 
166 inline void BlockOps_solve_N(dim_t N, double* X, double* mat, index_t* pivot, int* failed)
167 {
168 #ifdef ESYS_HAVE_LAPACK
169 #ifdef ESYS_MKL_LAPACK
170  int res = 0;
171  int ONE = 1;
172  dgetrs("N", &N, &ONE, mat, &N, pivot, X, &N, &res);
173  if (res != 0)
174  *failed = 1;
175 #else
176  int res = clapack_dgetrs(CblasColMajor, CblasNoTrans, N, 1, mat, N, pivot, X, N);
177  if (res != 0)
178  *failed = 1;
179 #endif // ESYS_MKL_LAPACK
180 #else
182 #endif
183 }
184 
186 inline void BlockOps_MViP_2(const double* mat, double* V)
187 {
188  const double S1 = V[0];
189  const double S2 = V[1];
190  const double A11 = mat[0];
191  const double A12 = mat[2];
192  const double A21 = mat[1];
193  const double A22 = mat[3];
194  V[0] = A11 * S1 + A12 * S2;
195  V[1] = A21 * S1 + A22 * S2;
196 }
197 
199 inline void BlockOps_MViP_3(const double* mat, double* V)
200 {
201  const double S1 = V[0];
202  const double S2 = V[1];
203  const double S3 = V[2];
204  const double A11 = mat[0];
205  const double A21 = mat[1];
206  const double A31 = mat[2];
207  const double A12 = mat[3];
208  const double A22 = mat[4];
209  const double A32 = mat[5];
210  const double A13 = mat[6];
211  const double A23 = mat[7];
212  const double A33 = mat[8];
213  V[0] = A11 * S1 + A12 * S2 + A13 * S3;
214  V[1] = A21 * S1 + A22 * S2 + A23 * S3;
215  V[2] = A31 * S1 + A32 * S2 + A33 * S3;
216 }
217 
218 inline void BlockOps_solveAll(dim_t n_block, dim_t n, double* D,
219  index_t* pivot, double* x)
220 {
221  if (n_block == 1) {
222 #pragma omp parallel for
223  for (dim_t i=0; i<n; ++i)
224  x[i] *= D[i];
225  } else if (n_block == 2) {
226 #pragma omp parallel for
227  for (dim_t i=0; i<n; ++i)
228  BlockOps_MViP_2(&D[4*i], &x[2*i]);
229  } else if (n_block == 3) {
230 #pragma omp parallel for
231  for (dim_t i=0; i<n; ++i)
232  BlockOps_MViP_3(&D[9*i], &x[3*i]);
233  } else {
234  int failed = 0;
235 #pragma omp parallel for
236  for (dim_t i=0; i<n; ++i) {
237  const dim_t block_size = n_block*n_block;
238  BlockOps_solve_N(n_block, &x[n_block*i], &D[block_size*i], &pivot[n_block*i], &failed);
239  }
240  if (failed > 0) {
241  throw PasoException("BlockOps_solveAll: solution failed.");
242  }
243  }
244 }
245 
246 } // namespace paso
247 
248 #endif // __PASO_BLOCKOPS_H__
249 
void BlockOps_Cpy_N(dim_t N, double *R, const double *V)
Definition: BlockOps.h:39
void BlockOps_invM_N(dim_t N, double *mat, index_t *pivot, int *failed)
LU factorization of NxN matrix mat with partial pivoting.
Definition: BlockOps.h:147
void BlockOps_solve_N(dim_t N, double *X, double *mat, index_t *pivot, int *failed)
solves system of linear equations A*X=B
Definition: BlockOps.h:166
void BlockOps_MViP_2(const double *mat, double *V)
inplace matrix vector product - order 2
Definition: BlockOps.h:186
void BlockOps_invM_2(double *invA, const double *A, int *failed)
Definition: BlockOps.h:98
void BlockOps_SMV_3(double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - 3x3
Definition: BlockOps.h:58
void BlockOps_invM_3(double *invA, const double *A, int *failed)
Definition: BlockOps.h:116
void BlockOps_MV_N(dim_t N, double *R, const double *mat, const double *V)
Definition: BlockOps.h:89
void BlockOps_SMV_2(double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - 2x2
Definition: BlockOps.h:45
Definition: AMG.cpp:45
int index_t
type for array/matrix indices used both globally and on each rank
Definition: DataTypes.h:59
#define PASO_MISSING_CLAPACK
Definition: BlockOps.h:77
static dim_t N
Definition: SparseMatrix_saveHB.cpp:37
void BlockOps_solveAll(dim_t n_block, dim_t n, double *D, index_t *pivot, double *x)
Definition: BlockOps.h:218
void BlockOps_SMV_N(dim_t N, double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - NxN
Definition: BlockOps.h:80
#define V(_K_, _I_)
Definition: ShapeFunctions.cpp:120
void BlockOps_MViP_3(const double *mat, double *V)
inplace matrix vector product - order 3
Definition: BlockOps.h:199
PasoException exception class.
Definition: PasoException.h:32
index_t dim_t
Definition: DataTypes.h:64