17 #ifndef __PASO_BLOCKOPS_H__ 18 #define __PASO_BLOCKOPS_H__ 25 #ifdef ESYS_HAVE_LAPACK 26 #ifdef ESYS_MKL_LAPACK 27 #include <mkl_lapack.h> 28 #include <mkl_cblas.h> 41 memcpy((
void*)R, (
void*)V, N*
sizeof(
double));
47 const double S1 = V[0];
48 const double S2 = V[1];
49 const double A11 = mat[0];
50 const double A12 = mat[2];
51 const double A21 = mat[1];
52 const double A22 = mat[3];
53 R[0] -= A11 * S1 + A12 * S2;
54 R[1] -= A21 * S1 + A22 * S2;
60 const double S1 = V[0];
61 const double S2 = V[1];
62 const double S3 = V[2];
63 const double A11 = mat[0];
64 const double A21 = mat[1];
65 const double A31 = mat[2];
66 const double A12 = mat[3];
67 const double A22 = mat[4];
68 const double A32 = mat[5];
69 const double A13 = mat[6];
70 const double A23 = mat[7];
71 const double A33 = mat[8];
72 R[0] -= A11 * S1 + A12 * S2 + A13 * S3;
73 R[1] -= A21 * S1 + A22 * S2 + A23 * S3;
74 R[2] -= A31 * S1 + A32 * S2 + A33 * S3;
77 #define PASO_MISSING_CLAPACK throw PasoException("You need to install a LAPACK version to enable operations on block sizes > 3.") 82 #ifdef ESYS_HAVE_LAPACK 83 cblas_dgemv(CblasColMajor,CblasNoTrans, N, N, -1., mat, N, V, 1, 1., R, 1);
91 #ifdef ESYS_HAVE_LAPACK 92 cblas_dgemv(CblasColMajor,CblasNoTrans, N, N, 1., mat, N, V, 1, 0., R, 1);
100 const double A11 = A[0];
101 const double A12 = A[2];
102 const double A21 = A[1];
103 const double A22 = A[3];
104 double D = A11*A22-A12*A21;
105 if (std::abs(D) > 0) {
118 const double A11 = A[0];
119 const double A21 = A[1];
120 const double A31 = A[2];
121 const double A12 = A[3];
122 const double A22 = A[4];
123 const double A32 = A[5];
124 const double A13 = A[6];
125 const double A23 = A[7];
126 const double A33 = A[8];
127 double D = A11*(A22*A33-A23*A32) +
128 A12*(A31*A23-A21*A33) +
129 A13*(A21*A32-A31*A22);
130 if (std::abs(D) > 0) {
132 invA[0] = (A22*A33-A23*A32)*D;
133 invA[1] = (A31*A23-A21*A33)*D;
134 invA[2] = (A21*A32-A31*A22)*D;
135 invA[3] = (A13*A32-A12*A33)*D;
136 invA[4] = (A11*A33-A31*A13)*D;
137 invA[5] = (A12*A31-A11*A32)*D;
138 invA[6] = (A12*A23-A13*A22)*D;
139 invA[7] = (A13*A21-A11*A23)*D;
140 invA[8] = (A11*A22-A12*A21)*D;
149 #ifdef ESYS_HAVE_LAPACK 150 #ifdef ESYS_MKL_LAPACK 152 dgetrf(&N, &N, mat, &N, pivot, &res);
156 int res = clapack_dgetrf(CblasColMajor, N, N, mat, N, pivot);
159 #endif // ESYS_MKL_LAPACK 168 #ifdef ESYS_HAVE_LAPACK 169 #ifdef ESYS_MKL_LAPACK 172 dgetrs(
"N", &N, &ONE, mat, &N, pivot, X, &N, &res);
176 int res = clapack_dgetrs(CblasColMajor, CblasNoTrans, N, 1, mat, N, pivot, X, N);
179 #endif // ESYS_MKL_LAPACK 188 const double S1 = V[0];
189 const double S2 = V[1];
190 const double A11 = mat[0];
191 const double A12 = mat[2];
192 const double A21 = mat[1];
193 const double A22 = mat[3];
194 V[0] = A11 * S1 + A12 * S2;
195 V[1] = A21 * S1 + A22 * S2;
201 const double S1 = V[0];
202 const double S2 = V[1];
203 const double S3 = V[2];
204 const double A11 = mat[0];
205 const double A21 = mat[1];
206 const double A31 = mat[2];
207 const double A12 = mat[3];
208 const double A22 = mat[4];
209 const double A32 = mat[5];
210 const double A13 = mat[6];
211 const double A23 = mat[7];
212 const double A33 = mat[8];
213 V[0] = A11 * S1 + A12 * S2 + A13 * S3;
214 V[1] = A21 * S1 + A22 * S2 + A23 * S3;
215 V[2] = A31 * S1 + A32 * S2 + A33 * S3;
222 #pragma omp parallel for 223 for (
dim_t i=0; i<n; ++i)
225 }
else if (n_block == 2) {
226 #pragma omp parallel for 227 for (
dim_t i=0; i<n; ++i)
229 }
else if (n_block == 3) {
230 #pragma omp parallel for 231 for (
dim_t i=0; i<n; ++i)
235 #pragma omp parallel for 236 for (
dim_t i=0; i<n; ++i) {
237 const dim_t block_size = n_block*n_block;
238 BlockOps_solve_N(n_block, &x[n_block*i], &D[block_size*i], &pivot[n_block*i], &failed);
248 #endif // __PASO_BLOCKOPS_H__
void BlockOps_Cpy_N(dim_t N, double *R, const double *V)
Definition: BlockOps.h:39
void BlockOps_invM_N(dim_t N, double *mat, index_t *pivot, int *failed)
LU factorization of NxN matrix mat with partial pivoting.
Definition: BlockOps.h:147
void BlockOps_solve_N(dim_t N, double *X, double *mat, index_t *pivot, int *failed)
solves system of linear equations A*X=B
Definition: BlockOps.h:166
void BlockOps_MViP_2(const double *mat, double *V)
inplace matrix vector product - order 2
Definition: BlockOps.h:186
void BlockOps_invM_2(double *invA, const double *A, int *failed)
Definition: BlockOps.h:98
void BlockOps_SMV_3(double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - 3x3
Definition: BlockOps.h:58
void BlockOps_invM_3(double *invA, const double *A, int *failed)
Definition: BlockOps.h:116
void BlockOps_MV_N(dim_t N, double *R, const double *mat, const double *V)
Definition: BlockOps.h:89
void BlockOps_SMV_2(double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - 2x2
Definition: BlockOps.h:45
int index_t
type for array/matrix indices used both globally and on each rank
Definition: DataTypes.h:59
#define PASO_MISSING_CLAPACK
Definition: BlockOps.h:77
static dim_t N
Definition: SparseMatrix_saveHB.cpp:37
void BlockOps_solveAll(dim_t n_block, dim_t n, double *D, index_t *pivot, double *x)
Definition: BlockOps.h:218
void BlockOps_SMV_N(dim_t N, double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - NxN
Definition: BlockOps.h:80
#define V(_K_, _I_)
Definition: ShapeFunctions.cpp:120
void BlockOps_MViP_3(const double *mat, double *V)
inplace matrix vector product - order 3
Definition: BlockOps.h:199
PasoException exception class.
Definition: PasoException.h:32
index_t dim_t
Definition: DataTypes.h:64