escript  Revision_
BlockOps.h
Go to the documentation of this file.
1 
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2020 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014-2017 by Centre for Geoscience Computing (GeoComp)
14 * Development from 2019 by School of Earth and Environmental Sciences
15 **
16 *****************************************************************************/
17 
18 #ifndef __PASO_BLOCKOPS_H__
19 #define __PASO_BLOCKOPS_H__
20 
21 #include "Paso.h"
22 #include "PasoException.h"
23 
24 #include <cstring> // memcpy
25 
26 #ifdef ESYS_HAVE_LAPACK
27  #ifdef ESYS_MKL_LAPACK
28  #include <mkl_lapack.h>
29  #include <mkl_cblas.h>
30  #else
31  extern "C" {
32  #include <clapack.h>
33  #include <cblas.h>
34  }
35  #endif
36 #endif
37 
38 namespace paso {
39 
40 inline void BlockOps_Cpy_N(dim_t N, double* R, const double* V)
41 {
42  memcpy((void*)R, (void*)V, N*sizeof(double));
43 }
44 
46 inline void BlockOps_SMV_2(double* R, const double* mat, const double* V)
47 {
48  const double S1 = V[0];
49  const double S2 = V[1];
50  const double A11 = mat[0];
51  const double A12 = mat[2];
52  const double A21 = mat[1];
53  const double A22 = mat[3];
54  R[0] -= A11 * S1 + A12 * S2;
55  R[1] -= A21 * S1 + A22 * S2;
56 }
57 
59 inline void BlockOps_SMV_3(double* R, const double* mat, const double* V)
60 {
61  const double S1 = V[0];
62  const double S2 = V[1];
63  const double S3 = V[2];
64  const double A11 = mat[0];
65  const double A21 = mat[1];
66  const double A31 = mat[2];
67  const double A12 = mat[3];
68  const double A22 = mat[4];
69  const double A32 = mat[5];
70  const double A13 = mat[6];
71  const double A23 = mat[7];
72  const double A33 = mat[8];
73  R[0] -= A11 * S1 + A12 * S2 + A13 * S3;
74  R[1] -= A21 * S1 + A22 * S2 + A23 * S3;
75  R[2] -= A31 * S1 + A32 * S2 + A33 * S3;
76 }
77 
78 #define PASO_MISSING_CLAPACK throw PasoException("You need to install a LAPACK version to enable operations on block sizes > 3.")
79 
81 inline void BlockOps_SMV_N(dim_t N, double* R, const double* mat, const double* V)
82 {
83 #ifdef ESYS_HAVE_LAPACK
84  cblas_dgemv(CblasColMajor,CblasNoTrans, N, N, -1., mat, N, V, 1, 1., R, 1);
85 #else
87 #endif
88 }
89 
90 inline void BlockOps_MV_N(dim_t N, double* R, const double* mat, const double* V)
91 {
92 #ifdef ESYS_HAVE_LAPACK
93  cblas_dgemv(CblasColMajor,CblasNoTrans, N, N, 1., mat, N, V, 1, 0., R, 1);
94 #else
96 #endif
97 }
98 
99 inline void BlockOps_invM_2(double* invA, const double* A, int* failed)
100 {
101  const double A11 = A[0];
102  const double A12 = A[2];
103  const double A21 = A[1];
104  const double A22 = A[3];
105  double D = A11*A22-A12*A21;
106  if (std::abs(D) > 0) {
107  D = 1./D;
108  invA[0] = A22*D;
109  invA[1] = -A21*D;
110  invA[2] = -A12*D;
111  invA[3] = A11*D;
112  } else {
113  *failed = 1;
114  }
115 }
116 
117 inline void BlockOps_invM_3(double* invA, const double* A, int* failed)
118 {
119  const double A11 = A[0];
120  const double A21 = A[1];
121  const double A31 = A[2];
122  const double A12 = A[3];
123  const double A22 = A[4];
124  const double A32 = A[5];
125  const double A13 = A[6];
126  const double A23 = A[7];
127  const double A33 = A[8];
128  double D = A11*(A22*A33-A23*A32) +
129  A12*(A31*A23-A21*A33) +
130  A13*(A21*A32-A31*A22);
131  if (std::abs(D) > 0) {
132  D = 1./D;
133  invA[0] = (A22*A33-A23*A32)*D;
134  invA[1] = (A31*A23-A21*A33)*D;
135  invA[2] = (A21*A32-A31*A22)*D;
136  invA[3] = (A13*A32-A12*A33)*D;
137  invA[4] = (A11*A33-A31*A13)*D;
138  invA[5] = (A12*A31-A11*A32)*D;
139  invA[6] = (A12*A23-A13*A22)*D;
140  invA[7] = (A13*A21-A11*A23)*D;
141  invA[8] = (A11*A22-A12*A21)*D;
142  } else {
143  *failed = 1;
144  }
145 }
146 
148 inline void BlockOps_invM_N(dim_t N, double* mat, index_t* pivot, int* failed)
149 {
150 #ifdef ESYS_HAVE_LAPACK
151 #ifdef ESYS_MKL_LAPACK
152  int res = 0;
153  dgetrf(&N, &N, mat, &N, pivot, &res);
154  if (res != 0)
155  *failed = 1;
156 #else
157  int res = clapack_dgetrf(CblasColMajor, N, N, mat, N, pivot);
158  if (res != 0)
159  *failed = 1;
160 #endif // ESYS_MKL_LAPACK
161 #else
163 #endif
164 }
165 
167 inline void BlockOps_solve_N(dim_t N, double* X, double* mat, index_t* pivot, int* failed)
168 {
169 #ifdef ESYS_HAVE_LAPACK
170 #ifdef ESYS_MKL_LAPACK
171  int res = 0;
172  int ONE = 1;
173  dgetrs("N", &N, &ONE, mat, &N, pivot, X, &N, &res);
174  if (res != 0)
175  *failed = 1;
176 #else
177  int res = clapack_dgetrs(CblasColMajor, CblasNoTrans, N, 1, mat, N, pivot, X, N);
178  if (res != 0)
179  *failed = 1;
180 #endif // ESYS_MKL_LAPACK
181 #else
183 #endif
184 }
185 
187 inline void BlockOps_MViP_2(const double* mat, double* V)
188 {
189  const double S1 = V[0];
190  const double S2 = V[1];
191  const double A11 = mat[0];
192  const double A12 = mat[2];
193  const double A21 = mat[1];
194  const double A22 = mat[3];
195  V[0] = A11 * S1 + A12 * S2;
196  V[1] = A21 * S1 + A22 * S2;
197 }
198 
200 inline void BlockOps_MViP_3(const double* mat, double* V)
201 {
202  const double S1 = V[0];
203  const double S2 = V[1];
204  const double S3 = V[2];
205  const double A11 = mat[0];
206  const double A21 = mat[1];
207  const double A31 = mat[2];
208  const double A12 = mat[3];
209  const double A22 = mat[4];
210  const double A32 = mat[5];
211  const double A13 = mat[6];
212  const double A23 = mat[7];
213  const double A33 = mat[8];
214  V[0] = A11 * S1 + A12 * S2 + A13 * S3;
215  V[1] = A21 * S1 + A22 * S2 + A23 * S3;
216  V[2] = A31 * S1 + A32 * S2 + A33 * S3;
217 }
218 
219 inline void BlockOps_solveAll(dim_t n_block, dim_t n, double* D,
220  index_t* pivot, double* x)
221 {
222  if (n_block == 1) {
223 #pragma omp parallel for
224  for (dim_t i=0; i<n; ++i)
225  x[i] *= D[i];
226  } else if (n_block == 2) {
227 #pragma omp parallel for
228  for (dim_t i=0; i<n; ++i)
229  BlockOps_MViP_2(&D[4*i], &x[2*i]);
230  } else if (n_block == 3) {
231 #pragma omp parallel for
232  for (dim_t i=0; i<n; ++i)
233  BlockOps_MViP_3(&D[9*i], &x[3*i]);
234  } else {
235  int failed = 0;
236 #pragma omp parallel for
237  for (dim_t i=0; i<n; ++i) {
238  const dim_t block_size = n_block*n_block;
239  BlockOps_solve_N(n_block, &x[n_block*i], &D[block_size*i], &pivot[n_block*i], &failed);
240  }
241  if (failed > 0) {
242  throw PasoException("BlockOps_solveAll: solution failed.");
243  }
244  }
245 }
246 
247 } // namespace paso
248 
249 #endif // __PASO_BLOCKOPS_H__
250 
paso::N
static dim_t N
Definition: SparseMatrix_saveHB.cpp:37
paso::BlockOps_MViP_3
void BlockOps_MViP_3(const double *mat, double *V)
inplace matrix vector product - order 3
Definition: BlockOps.h:200
paso::BlockOps_invM_2
void BlockOps_invM_2(double *invA, const double *A, int *failed)
Definition: BlockOps.h:99
paso::BlockOps_invM_N
void BlockOps_invM_N(dim_t N, double *mat, index_t *pivot, int *failed)
LU factorization of NxN matrix mat with partial pivoting.
Definition: BlockOps.h:148
paso::BlockOps_solveAll
void BlockOps_solveAll(dim_t n_block, dim_t n, double *D, index_t *pivot, double *x)
Definition: BlockOps.h:219
Paso.h
escript::DataTypes::dim_t
index_t dim_t
Definition: DataTypes.h:66
paso::BlockOps_SMV_2
void BlockOps_SMV_2(double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - 2x2
Definition: BlockOps.h:46
paso::BlockOps_invM_3
void BlockOps_invM_3(double *invA, const double *A, int *failed)
Definition: BlockOps.h:117
paso::BlockOps_solve_N
void BlockOps_solve_N(dim_t N, double *X, double *mat, index_t *pivot, int *failed)
solves system of linear equations A*X=B
Definition: BlockOps.h:167
PASO_MISSING_CLAPACK
#define PASO_MISSING_CLAPACK
Definition: BlockOps.h:78
V
#define V(_K_, _I_)
Definition: ShapeFunctions.cpp:121
paso::BlockOps_Cpy_N
void BlockOps_Cpy_N(dim_t N, double *R, const double *V)
Definition: BlockOps.h:40
paso::PasoException
PasoException exception class.
Definition: PasoException.h:34
paso::BlockOps_MV_N
void BlockOps_MV_N(dim_t N, double *R, const double *mat, const double *V)
Definition: BlockOps.h:90
escript::DataTypes::index_t
int index_t
type for array/matrix indices used both globally and on each rank
Definition: DataTypes.h:61
paso::BlockOps_SMV_3
void BlockOps_SMV_3(double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - 3x3
Definition: BlockOps.h:59
PasoException.h
paso::BlockOps_MViP_2
void BlockOps_MViP_2(const double *mat, double *V)
inplace matrix vector product - order 2
Definition: BlockOps.h:187
paso
Definition: BiCGStab.cpp:25
paso::BlockOps_SMV_N
void BlockOps_SMV_N(dim_t N, double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - NxN
Definition: BlockOps.h:81