17 #ifndef __deal2__chunk_sparse_matrix_templates_h 18 #define __deal2__chunk_sparse_matrix_templates_h 21 #include <deal.II/base/template_constraints.h> 22 #include <deal.II/base/parallel.h> 23 #include <deal.II/lac/chunk_sparse_matrix.h> 24 #include <deal.II/lac/vector.h> 25 #include <deal.II/lac/full_matrix.h> 37 #include <deal.II/base/thread_management.h> 38 #include <deal.II/base/multithread_info.h> 68 chunk_vmult_add (
const size_type chunk_size,
70 const SrcIterator src,
75 for (size_type i=0; i<chunk_size;
76 ++i, matrix_row += chunk_size)
78 typename std::iterator_traits<DstIterator>::value_type
81 for (size_type j=0; j<chunk_size; ++j)
82 sum += matrix_row[j] * src[j];
99 chunk_vmult_subtract (
const size_type chunk_size,
101 const SrcIterator src,
106 for (size_type i=0; i<chunk_size;
107 ++i, matrix_row += chunk_size)
109 typename std::iterator_traits<DstIterator>::value_type
112 for (size_type j=0; j<chunk_size; ++j)
113 sum += matrix_row[j] * src[j];
126 typename SrcIterator,
127 typename DstIterator>
130 chunk_Tvmult_add (
const size_type chunk_size,
132 const SrcIterator src,
135 for (size_type i=0; i<chunk_size; ++i)
137 typename std::iterator_traits<DstIterator>::value_type
140 for (size_type j=0; j<chunk_size; ++j)
141 sum += matrix[j*chunk_size+i] * src[j];
152 template <
typename result_type,
154 typename SrcIterator1,
155 typename SrcIterator2>
158 chunk_matrix_scalar_product (
const size_type chunk_size,
159 const MatrixIterator matrix,
160 const SrcIterator1 u,
161 const SrcIterator2 v)
163 result_type result = 0;
165 MatrixIterator matrix_row = matrix;
167 for (size_type i=0; i<chunk_size;
168 ++i, matrix_row += chunk_size)
170 typename std::iterator_traits<SrcIterator2>::value_type
173 for (size_type j=0; j<chunk_size; ++j)
174 sum += matrix_row[j] * v[j];
176 result += u[i] * sum;
192 template <
typename number,
196 const unsigned int begin_row,
197 const unsigned int end_row,
198 const number *values,
199 const std::size_t *rowstart,
200 const size_type *colnums,
204 const size_type m = cols.
n_rows();
205 const size_type n = cols.
n_cols();
210 const size_type n_filled_last_rows = m % chunk_size;
211 const size_type n_filled_last_cols = n % chunk_size;
213 const size_type last_regular_row = n_filled_last_rows > 0 ?
214 std::min(m/chunk_size,
215 static_cast<size_type>(end_row)) :
217 const size_type irregular_col = n/chunk_size;
219 typename OutVector::iterator dst_ptr = dst.begin()+chunk_size*begin_row;
220 const number *val_ptr= &values[rowstart[begin_row]*chunk_size*chunk_size];
221 const size_type *colnum_ptr = &colnums[rowstart[begin_row]];
222 for (
unsigned int chunk_row=begin_row; chunk_row<last_regular_row;
225 const number *
const val_end_of_row = &values[rowstart[chunk_row+1] *
226 chunk_size * chunk_size];
227 while (val_ptr != val_end_of_row)
229 if (*colnum_ptr != irregular_col)
230 chunk_vmult_add (chunk_size,
232 src.begin() + *colnum_ptr * chunk_size,
236 for (size_type r=0; r<chunk_size; ++r)
237 for (size_type c=0; c<n_filled_last_cols; ++c)
238 dst_ptr[r] += (val_ptr[r*chunk_size + c] *
239 src(*colnum_ptr * chunk_size + c));
242 val_ptr += chunk_size * chunk_size;
245 dst_ptr += chunk_size;
249 if (n_filled_last_rows > 0 && end_row == (m/chunk_size+1))
251 const size_type chunk_row = last_regular_row;
253 const number *
const val_end_of_row = &values[rowstart[chunk_row+1] *
254 chunk_size * chunk_size];
255 while (val_ptr != val_end_of_row)
257 if (*colnum_ptr != irregular_col)
260 for (size_type r=0; r<n_filled_last_rows; ++r)
261 for (size_type c=0; c<chunk_size; ++c)
263 += (val_ptr[r*chunk_size + c] *
264 src(*colnum_ptr * chunk_size + c));
268 for (size_type r=0; r<n_filled_last_rows; ++r)
269 for (size_type c=0; c<n_filled_last_cols; ++c)
271 += (val_ptr[r*chunk_size + c] *
272 src(*colnum_ptr * chunk_size + c));
275 val_ptr += chunk_size * chunk_size;
278 Assert(std::size_t(colnum_ptr-&colnums[0]) == rowstart[end_row],
280 Assert(std::size_t(val_ptr-&values[0]) ==
281 rowstart[end_row] * chunk_size * chunk_size,
289 template <
typename number>
292 cols(0,
"ChunkSparseMatrix"),
299 template <
typename number>
303 cols(0,
"ChunkSparseMatrix"),
314 template <
typename number>
327 template <
typename number>
330 cols(0,
"ChunkSparseMatrix"),
339 template <
typename number>
343 cols(0,
"ChunkSparseMatrix"),
351 for (size_type i=0; i<
n(); ++i)
357 template <
typename number>
373 void zero_subrange (
const unsigned int begin,
374 const unsigned int end,
377 std::memset (dst+begin,0,(end-begin)*
sizeof(T));
384 template <
typename number>
392 ChunkSparsityPattern::ExcNotCompressed());
403 const unsigned int grain_size =
404 internal::SparseMatrix::minimum_parallel_grain_size *
405 (matrix_size+
m()) /
m();
406 if (matrix_size>grain_size)
408 std_cxx1x::bind(&internal::ChunkSparseMatrix::template
409 zero_subrange<number>,
410 std_cxx1x::_1, std_cxx1x::_2,
413 else if (matrix_size > 0)
414 std::memset (&
val[0], 0, matrix_size*
sizeof(number));
421 template <
typename number>
431 for (size_type i=0; i<
n(); ++i)
439 template <
typename number>
458 chunk_size * chunk_size;
475 template <
typename number>
487 template <
typename number>
494 return cols->
empty();
499 template <
typename number>
509 template <
typename number>
519 return std::count_if(&
val[0],
521 chunk_size * chunk_size],
522 std::bind2nd(std::not_equal_to<double>(), 0));
527 template <
typename number>
539 template <
typename number>
540 template <
typename somenumber>
546 Assert (cols == matrix.
cols, ExcDifferentChunkSparsityPatterns());
550 std::copy (&matrix.
val[0],
552 * chunk_size * chunk_size],
560 template <
typename number>
561 template <
typename somenumber>
569 for (size_type row=0; row<matrix.
m(); ++row)
570 for (size_type col=0; col<matrix.
n(); ++col)
571 if (matrix(row,col) != 0)
572 set (row, col, matrix(row,col));
577 template <
typename number>
578 template <
typename somenumber>
585 Assert (cols == matrix.
cols, ExcDifferentChunkSparsityPatterns());
589 number *val_ptr = &
val[0];
590 const somenumber *matrix_ptr = &matrix.
val[0];
592 * chunk_size * chunk_size];
594 while (val_ptr != end_ptr)
595 *val_ptr++ += factor **matrix_ptr++;
599 template <
typename number>
600 template <
class OutVector,
class InVector>
603 const InVector &src)
const 620 template <
typename number>
621 template <
class OutVector,
class InVector>
624 const InVector &src)
const 648 template <
typename number>
649 template <
class OutVector,
class InVector>
652 const InVector &src)
const 661 std_cxx1x::bind (&internal::ChunkSparseMatrix::vmult_add_on_subrange
662 <number,InVector,OutVector>,
663 std_cxx1x::cref(*cols),
664 std_cxx1x::_1, std_cxx1x::_2,
668 std_cxx1x::cref(src),
669 std_cxx1x::ref(dst)),
670 internal::SparseMatrix::minimum_parallel_grain_size/cols->
chunk_size+1);
675 template <
typename number>
676 template <
class OutVector,
class InVector>
679 const InVector &src)
const 692 const bool rows_have_padding = (
m() % cols->
chunk_size != 0),
695 const size_type n_regular_chunk_rows
696 = (rows_have_padding ?
702 const number *val_ptr =
val;
705 for (size_type chunk_row=0; chunk_row<n_regular_chunk_rows; ++chunk_row)
710 while (val_ptr != val_end_of_row)
712 if ((cols_have_padding ==
false)
715 internal::ChunkSparseMatrix::chunk_Tvmult_add
719 dst.begin() + *colnum_ptr * cols->
chunk_size);
734 if (rows_have_padding)
736 const size_type chunk_row = n_chunk_rows - 1;
741 while (val_ptr != val_end_of_row)
743 if ((cols_have_padding ==
false)
769 template <
typename number>
770 template <
typename somenumber>
779 somenumber result = 0;
788 const bool rows_have_padding = (
m() % cols->
chunk_size != 0),
791 const size_type n_regular_chunk_rows
792 = (rows_have_padding ?
796 const number *val_ptr =
val;
798 typename Vector<somenumber>::const_iterator v_ptr = v.
begin();
800 for (size_type chunk_row=0; chunk_row<n_regular_chunk_rows; ++chunk_row)
805 while (val_ptr != val_end_of_row)
807 if ((cols_have_padding ==
false)
811 internal::ChunkSparseMatrix::
812 chunk_matrix_scalar_product<somenumber>
836 if (rows_have_padding)
838 const size_type chunk_row = n_chunk_rows - 1;
843 while (val_ptr != val_end_of_row)
845 if ((cols_have_padding ==
false)
878 template <
typename number>
879 template <
typename somenumber>
890 somenumber result = 0;
896 const bool rows_have_padding = (
m() % cols->
chunk_size != 0),
899 const size_type n_regular_chunk_rows
900 = (rows_have_padding ?
904 const number *val_ptr =
val;
906 typename Vector<somenumber>::const_iterator u_ptr = u.
begin();
908 for (size_type chunk_row=0; chunk_row<n_regular_chunk_rows; ++chunk_row)
913 while (val_ptr != val_end_of_row)
915 if ((cols_have_padding ==
false)
919 internal::ChunkSparseMatrix::
920 chunk_matrix_scalar_product<somenumber>
944 if (rows_have_padding)
946 const size_type chunk_row = n_chunk_rows - 1;
951 while (val_ptr != val_end_of_row)
953 if ((cols_have_padding ==
false)
986 template <
typename number>
1001 for (size_type chunk_row=0; chunk_row<n_chunk_rows; ++chunk_row)
1003 j<cols->sparsity_pattern.rowstart[chunk_row+1] ; ++j)
1013 return column_sums.linfty_norm();
1018 template <
typename number>
1037 for (size_type chunk_row=0; chunk_row<n_chunk_rows; ++chunk_row)
1039 j<cols->sparsity_pattern.rowstart[chunk_row+1] ; ++j)
1042 row_sums(chunk_row * cols->
chunk_size + r) +=
1048 return row_sums.linfty_norm();
1053 template <
typename number>
1062 for (
const number *ptr = &
val[0]; ptr != &
val[
max_len]; ++ptr)
1065 return std::sqrt (norm_sqr);
1070 template <
typename number>
1071 template <
typename somenumber>
1083 Assert (&u != &dst, ExcSourceEqualsDestination());
1098 const bool rows_have_padding = (
m() % cols->
chunk_size != 0),
1099 cols_have_padding = (
n() % cols->
chunk_size != 0);
1101 const size_type n_regular_chunk_rows
1102 = (rows_have_padding ?
1106 const number *val_ptr =
val;
1108 typename Vector<somenumber>::iterator dst_ptr = dst.
begin();
1110 for (size_type chunk_row=0; chunk_row<n_regular_chunk_rows; ++chunk_row)
1115 while (val_ptr != val_end_of_row)
1117 if ((cols_have_padding ==
false)
1120 internal::ChunkSparseMatrix::chunk_vmult_subtract
1128 for (size_type c=0; c<
n() % cols->
chunk_size; ++c)
1142 if (rows_have_padding)
1144 const size_type chunk_row = n_chunk_rows - 1;
1149 while (val_ptr != val_end_of_row)
1151 if ((cols_have_padding ==
false)
1156 for (size_type r=0; r<
m() % cols->
chunk_size; ++r)
1164 for (size_type r=0; r<
m() % cols->
chunk_size; ++r)
1165 for (size_type c=0; c<
n() % cols->
chunk_size; ++c)
1184 template <
typename number>
1185 template <
typename somenumber>
1189 const number )
const 1203 template <
typename number>
1204 template <
typename somenumber>
1208 const number )
const 1223 template <
typename number>
1224 template <
typename somenumber>
1228 const number om)
const 1240 template <
typename number>
1241 template <
typename somenumber>
1245 const number om)
const 1257 template <
typename number>
1258 template <
typename somenumber>
1261 const number )
const 1272 template <
typename number>
1273 template <
typename somenumber>
1276 const number )
const 1287 template <
typename number>
1288 template <
typename somenumber>
1291 const std::vector<size_type> &permutation,
1292 const std::vector<size_type> &inverse_permutation,
1293 const number )
const 1300 Assert (
m() == permutation.size(),
1302 Assert (
m() == inverse_permutation.size(),
1309 template <
typename number>
1310 template <
typename somenumber>
1313 const std::vector<size_type> &permutation,
1314 const std::vector<size_type> &inverse_permutation,
1315 const number )
const 1322 Assert (
m() == permutation.size(),
1324 Assert (
m() == inverse_permutation.size(),
1332 template <
typename number>
1333 template <
typename somenumber>
1337 const number )
const 1351 template <
typename number>
1352 template <
typename somenumber>
1356 const number )
const 1370 template <
typename number>
1371 template <
typename somenumber>
1375 const number om)
const 1383 template <
typename number>
1384 template <
typename somenumber>
1387 const number )
const 1400 template <
typename number>
1414 template <
typename number>
1416 const unsigned int precision,
1417 const bool scientific,
1418 const unsigned int width_,
1419 const char *zero_string,
1420 const double denominator)
const 1427 unsigned int width = width_;
1431 std::ios::fmtflags old_flags = out.flags();
1432 unsigned int old_precision = out.precision (precision);
1436 out.setf (std::ios::scientific, std::ios::floatfield);
1438 width = precision+7;
1442 out.setf (std::ios::fixed, std::ios::floatfield);
1444 width = precision+2;
1447 for (size_type i=0; i<
m(); ++i)
1449 for (size_type j=0; j<
n(); ++j)
1451 out << std::setw(width)
1454 out << std::setw(width) << zero_string <<
' ';
1460 out.precision(old_precision);
1461 out.flags (old_flags);
1466 template <
typename number>
1468 const double threshold)
const 1481 for (size_type d=0; d<chunk_size; ++d)
1485 for (size_type e=0; e<chunk_size; ++e)
1490 for (size_type e=0; e<chunk_size; ++e)
1495 for (size_type e=0; e<chunk_size; ++e)
1506 template <
typename number>
1513 out <<
'[' <<
max_len <<
"][";
1515 out.write (reinterpret_cast<const char *>(&
val[0]),
1516 reinterpret_cast<const char *>(&
val[max_len])
1517 - reinterpret_cast<const char *>(&
val[0]));
1525 template <
typename number>
1548 in.read (reinterpret_cast<char *>(&
val[0]),
1549 reinterpret_cast<char *>(&
val[max_len])
1550 - reinterpret_cast<char *>(&
val[0]));
1557 template <
typename number>
1561 return sizeof(*this) +
max_len*
sizeof(number);
1565 DEAL_II_NAMESPACE_CLOSE
void TSOR_step(Vector< somenumber > &v, const Vector< somenumber > &b, const number om=1.) const
void TSOR(Vector< somenumber > &v, const number om=1.) const
void SSOR(Vector< somenumber > &v, const number omega=1.) const
void print(std::ostream &out) const
::ExceptionBase & ExcMessage(std::string arg1)
void print_pattern(std::ostream &out, const double threshold=0.) const
const_iterator end() const
types::global_dof_index size_type
size_type n_nonzero_elements() const
#define AssertThrow(cond, exc)
static real_type abs(const number &x)
void SOR(Vector< somenumber > &v, const number om=1.) const
size_type n_actually_nonzero_elements() const
size_type n_nonzero_elements() const
void apply_to_subranges(const RangeType &begin, const typename identity< RangeType >::type &end, const Function &f, const unsigned int grainsize)
void vmult_add(OutVector &dst, const InVector &src) const
SparsityPattern sparsity_pattern
size_type n_nonzero_elements() const
void SSOR_step(Vector< somenumber > &v, const Vector< somenumber > &b, const number om=1.) const
void vmult(OutVector &dst, const InVector &src) const
::ExceptionBase & ExcIO()
size_type get_chunk_size() const
void SOR_step(Vector< somenumber > &v, const Vector< somenumber > &b, const number om=1.) const
numbers::NumberTraits< number >::real_type real_type
T sum(const T &t, const MPI_Comm &mpi_communicator)
virtual ~ChunkSparseMatrix()
unsigned int global_dof_index
#define Assert(cond, exc)
real_type frobenius_norm() const
void print_formatted(std::ostream &out, const unsigned int precision=3, const bool scientific=true, const unsigned int width=0, const char *zero_string=" ", const double denominator=1.) const
void block_write(std::ostream &out) const
somenumber matrix_scalar_product(const Vector< somenumber > &u, const Vector< somenumber > &v) const
static bool equal(const T *p1, const T *p2)
void PSOR(Vector< somenumber > &v, const std::vector< size_type > &permutation, const std::vector< size_type > &inverse_permutation, const number om=1.) const
void precondition_Jacobi(Vector< somenumber > &dst, const Vector< somenumber > &src, const number omega=1.) const
ChunkSparseMatrix< number > & operator=(const ChunkSparseMatrix< number > &)
::ExceptionBase & ExcInvalidConstructorCall()
ChunkSparseMatrix< number > & copy_from(const ChunkSparseMatrix< somenumber > &source)
void precondition_TSOR(Vector< somenumber > &dst, const Vector< somenumber > &src, const number om=1.) const
real_type l2_norm() const
somenumber residual(Vector< somenumber > &dst, const Vector< somenumber > &x, const Vector< somenumber > &b) const
void precondition_SOR(Vector< somenumber > &dst, const Vector< somenumber > &src, const number om=1.) const
void precondition_SSOR(Vector< somenumber > &dst, const Vector< somenumber > &src, const number om=1.) const
void block_read(std::istream &in)
::ExceptionBase & ExcNotImplemented()
::ExceptionBase & ExcNotInitialized()
::ExceptionBase & ExcDimensionMismatch(std::size_t arg1, std::size_t arg2)
std::size_t memory_consumption() const
virtual void reinit(const ChunkSparsityPattern &sparsity)
::ExceptionBase & ExcScalarAssignmentOnlyForZeroValue()
void add(const size_type i, const size_type j, const number value)
real_type l1_norm() const
SmartPointer< const ChunkSparsityPattern, ChunkSparseMatrix< number > > cols
somenumber matrix_norm_square(const Vector< somenumber > &v) const
const_iterator begin() const
::ExceptionBase & ExcInternalError()
void Tvmult(OutVector &dst, const InVector &src) const
real_type linfty_norm() const
void TPSOR(Vector< somenumber > &v, const std::vector< size_type > &permutation, const std::vector< size_type > &inverse_permutation, const number om=1.) const
void Tvmult_add(OutVector &dst, const InVector &src) const
static const size_type invalid_entry