Go to the documentation of this file.
17 #ifndef __PASO_BLOCKOPS_H__
18 #define __PASO_BLOCKOPS_H__
25 #ifdef ESYS_HAVE_LAPACK
26 #ifdef ESYS_MKL_LAPACK
27 #include <mkl_lapack.h>
28 #include <mkl_cblas.h>
41 memcpy((
void*)R, (
void*)
V,
N*
sizeof(
double));
45 inline void BlockOps_SMV_2(
double* R,
const double* mat,
const double*
V)
47 const double S1 =
V[0];
48 const double S2 =
V[1];
49 const double A11 = mat[0];
50 const double A12 = mat[2];
51 const double A21 = mat[1];
52 const double A22 = mat[3];
53 R[0] -= A11 * S1 + A12 * S2;
54 R[1] -= A21 * S1 + A22 * S2;
58 inline void BlockOps_SMV_3(
double* R,
const double* mat,
const double*
V)
60 const double S1 =
V[0];
61 const double S2 =
V[1];
62 const double S3 =
V[2];
63 const double A11 = mat[0];
64 const double A21 = mat[1];
65 const double A31 = mat[2];
66 const double A12 = mat[3];
67 const double A22 = mat[4];
68 const double A32 = mat[5];
69 const double A13 = mat[6];
70 const double A23 = mat[7];
71 const double A33 = mat[8];
72 R[0] -= A11 * S1 + A12 * S2 + A13 * S3;
73 R[1] -= A21 * S1 + A22 * S2 + A23 * S3;
74 R[2] -= A31 * S1 + A32 * S2 + A33 * S3;
77 #define PASO_MISSING_CLAPACK throw PasoException("You need to install a LAPACK version to enable operations on block sizes > 3.")
82 #ifdef ESYS_HAVE_LAPACK
83 cblas_dgemv(CblasColMajor,CblasNoTrans,
N,
N, -1., mat,
N,
V, 1, 1., R, 1);
91 #ifdef ESYS_HAVE_LAPACK
92 cblas_dgemv(CblasColMajor,CblasNoTrans,
N,
N, 1., mat,
N,
V, 1, 0., R, 1);
100 const double A11 = A[0];
101 const double A12 = A[2];
102 const double A21 = A[1];
103 const double A22 = A[3];
104 double D = A11*A22-A12*A21;
105 if (std::abs(D) > 0) {
116 inline void BlockOps_invM_3(
double* invA,
const double* A,
int* failed)
118 const double A11 = A[0];
119 const double A21 = A[1];
120 const double A31 = A[2];
121 const double A12 = A[3];
122 const double A22 = A[4];
123 const double A32 = A[5];
124 const double A13 = A[6];
125 const double A23 = A[7];
126 const double A33 = A[8];
127 double D = A11*(A22*A33-A23*A32) +
128 A12*(A31*A23-A21*A33) +
129 A13*(A21*A32-A31*A22);
130 if (std::abs(D) > 0) {
132 invA[0] = (A22*A33-A23*A32)*D;
133 invA[1] = (A31*A23-A21*A33)*D;
134 invA[2] = (A21*A32-A31*A22)*D;
135 invA[3] = (A13*A32-A12*A33)*D;
136 invA[4] = (A11*A33-A31*A13)*D;
137 invA[5] = (A12*A31-A11*A32)*D;
138 invA[6] = (A12*A23-A13*A22)*D;
139 invA[7] = (A13*A21-A11*A23)*D;
140 invA[8] = (A11*A22-A12*A21)*D;
149 #ifdef ESYS_HAVE_LAPACK
150 #ifdef ESYS_MKL_LAPACK
152 dgetrf(&
N, &
N, mat, &
N, pivot, &res);
156 int res = clapack_dgetrf(CblasColMajor,
N,
N, mat,
N, pivot);
159 #endif // ESYS_MKL_LAPACK
168 #ifdef ESYS_HAVE_LAPACK
169 #ifdef ESYS_MKL_LAPACK
172 dgetrs(
"N", &
N, &ONE, mat, &
N, pivot, X, &
N, &res);
176 int res = clapack_dgetrs(CblasColMajor, CblasNoTrans,
N, 1, mat,
N, pivot, X,
N);
179 #endif // ESYS_MKL_LAPACK
188 const double S1 =
V[0];
189 const double S2 =
V[1];
190 const double A11 = mat[0];
191 const double A12 = mat[2];
192 const double A21 = mat[1];
193 const double A22 = mat[3];
194 V[0] = A11 * S1 + A12 * S2;
195 V[1] = A21 * S1 + A22 * S2;
201 const double S1 =
V[0];
202 const double S2 =
V[1];
203 const double S3 =
V[2];
204 const double A11 = mat[0];
205 const double A21 = mat[1];
206 const double A31 = mat[2];
207 const double A12 = mat[3];
208 const double A22 = mat[4];
209 const double A32 = mat[5];
210 const double A13 = mat[6];
211 const double A23 = mat[7];
212 const double A33 = mat[8];
213 V[0] = A11 * S1 + A12 * S2 + A13 * S3;
214 V[1] = A21 * S1 + A22 * S2 + A23 * S3;
215 V[2] = A31 * S1 + A32 * S2 + A33 * S3;
222 #pragma omp parallel for
223 for (
dim_t i=0; i<n; ++i)
225 }
else if (n_block == 2) {
226 #pragma omp parallel for
227 for (
dim_t i=0; i<n; ++i)
229 }
else if (n_block == 3) {
230 #pragma omp parallel for
231 for (
dim_t i=0; i<n; ++i)
235 #pragma omp parallel for
236 for (
dim_t i=0; i<n; ++i) {
237 const dim_t block_size = n_block*n_block;
238 BlockOps_solve_N(n_block, &x[n_block*i], &D[block_size*i], &pivot[n_block*i], &failed);
241 throw PasoException(
"BlockOps_solveAll: solution failed.");
248 #endif // __PASO_BLOCKOPS_H__
static dim_t N
Definition: SparseMatrix_saveHB.cpp:48
void BlockOps_MViP_3(const double *mat, double *V)
inplace matrix vector product - order 3
Definition: BlockOps.h:210
void BlockOps_invM_2(double *invA, const double *A, int *failed)
Definition: BlockOps.h:109
void BlockOps_invM_N(dim_t N, double *mat, index_t *pivot, int *failed)
LU factorization of NxN matrix mat with partial pivoting.
Definition: BlockOps.h:158
void BlockOps_solveAll(dim_t n_block, dim_t n, double *D, index_t *pivot, double *x)
Definition: BlockOps.h:229
index_t dim_t
Definition: DataTypes.h:87
void BlockOps_SMV_2(double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - 2x2
Definition: BlockOps.h:56
void BlockOps_invM_3(double *invA, const double *A, int *failed)
Definition: BlockOps.h:127
void BlockOps_solve_N(dim_t N, double *X, double *mat, index_t *pivot, int *failed)
solves system of linear equations A*X=B
Definition: BlockOps.h:177
#define PASO_MISSING_CLAPACK
Definition: BlockOps.h:88
#define V(_K_, _I_)
Definition: ShapeFunctions.cpp:133
void BlockOps_Cpy_N(dim_t N, double *R, const double *V)
Definition: BlockOps.h:50
void BlockOps_MV_N(dim_t N, double *R, const double *mat, const double *V)
Definition: BlockOps.h:100
int index_t
type for array/matrix indices used both globally and on each rank
Definition: DataTypes.h:82
void BlockOps_SMV_3(double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - 3x3
Definition: BlockOps.h:69
void BlockOps_MViP_2(const double *mat, double *V)
inplace matrix vector product - order 2
Definition: BlockOps.h:197
Definition: BiCGStab.cpp:25
void BlockOps_SMV_N(dim_t N, double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - NxN
Definition: BlockOps.h:91