DOLFIN-X
DOLFIN-X C++ interface
HDF5Interface.h
1 // Copyright (C) 2012 Chris N. Richardson and Garth N. Wells
2 //
3 // This file is part of DOLFINX (https://www.fenicsproject.org)
4 //
5 // SPDX-License-Identifier: LGPL-3.0-or-later
6 
7 #pragma once
8 
9 #include <array>
10 #include <cassert>
11 #include <cstdint>
12 #include <dolfinx/common/log.h>
13 #include <hdf5.h>
14 #include <mpi.h>
15 #include <string>
16 #include <vector>
17 
18 namespace dolfinx
19 {
20 
21 namespace io
22 {
23 
25 
27 {
28 #define HDF5_FAIL -1
29 public:
35  static hid_t open_file(MPI_Comm mpi_comm, const std::string& filename,
36  const std::string& mode, const bool use_mpi_io);
37 
40  static void close_file(const hid_t handle);
41 
44  static void flush_file(const hid_t handle);
45 
49  static std::string get_filename(hid_t handle);
50 
61  template <typename T>
62  static void write_dataset(const hid_t handle, const std::string& dataset_path,
63  const T* data,
64  const std::array<std::int64_t, 2>& range,
65  const std::vector<std::int64_t>& global_size,
66  bool use_mpi_io, bool use_chunking);
67 
76  template <typename T>
77  static std::vector<T> read_dataset(const hid_t handle,
78  const std::string& dataset_path,
79  const std::array<std::int64_t, 2>& range);
80 
85  static bool has_dataset(const hid_t handle, const std::string& dataset_path);
86 
91  static std::vector<std::int64_t>
92  get_dataset_shape(const hid_t handle, const std::string& dataset_path);
93 
100  static void set_mpi_atomicity(const hid_t handle, const bool atomic);
101 
106  static bool get_mpi_atomicity(const hid_t handle);
107 
108 private:
112  static void add_group(const hid_t handle, const std::string& dataset_path);
113 
114  // Return HDF5 data type
115  template <typename T>
116  static hid_t hdf5_type()
117  {
118  throw std::runtime_error("Cannot get HDF5 primitive data type. "
119  "No specialised function for this data type");
120  }
121 };
123 //---------------------------------------------------------------------------
124 template <>
125 inline hid_t HDF5Interface::hdf5_type<float>()
126 {
127  return H5T_NATIVE_FLOAT;
128 }
129 //---------------------------------------------------------------------------
130 template <>
131 inline hid_t HDF5Interface::hdf5_type<double>()
132 {
133  return H5T_NATIVE_DOUBLE;
134 }
135 //---------------------------------------------------------------------------
136 template <>
137 inline hid_t HDF5Interface::hdf5_type<int>()
138 {
139  return H5T_NATIVE_INT;
140 }
141 //---------------------------------------------------------------------------
142 template <>
143 inline hid_t HDF5Interface::hdf5_type<std::int64_t>()
144 {
145  return H5T_NATIVE_INT64;
146 }
147 //---------------------------------------------------------------------------
148 template <>
149 inline hid_t HDF5Interface::hdf5_type<std::size_t>()
150 {
151  if (sizeof(std::size_t) == sizeof(unsigned long))
152  return H5T_NATIVE_ULONG;
153  else if (sizeof(std::size_t) == sizeof(unsigned int))
154  return H5T_NATIVE_UINT;
155 
156  throw std::runtime_error("Cannot determine size of std::size_t. "
157  "std::size_t is not the same size as long or int");
158 }
159 //---------------------------------------------------------------------------
160 template <typename T>
161 inline void HDF5Interface::write_dataset(
162  const hid_t file_handle, const std::string& dataset_path, const T* data,
163  const std::array<std::int64_t, 2>& range,
164  const std::vector<int64_t>& global_size, bool use_mpi_io, bool use_chunking)
165 {
166  // Data rank
167  const std::size_t rank = global_size.size();
168  assert(rank != 0);
169 
170  if (rank > 2)
171  {
172  throw std::runtime_error("Cannot write dataset to HDF5 file"
173  "Only rank 1 and rank 2 dataset are supported");
174  }
175 
176  // Get HDF5 data type
177  const hid_t h5type = hdf5_type<T>();
178 
179  // Hyperslab selection parameters
180  std::vector<hsize_t> count(global_size.begin(), global_size.end());
181  count[0] = range[1] - range[0];
182 
183  // Data offsets
184  std::vector<hsize_t> offset(rank, 0);
185  offset[0] = range[0];
186 
187  // Dataset dimensions
188  const std::vector<hsize_t> dimsf(global_size.begin(), global_size.end());
189 
190  // Generic status report
191  herr_t status;
192 
193  // Create a global data space
194  const hid_t filespace0 = H5Screate_simple(rank, dimsf.data(), nullptr);
195  assert(filespace0 != HDF5_FAIL);
196 
197  // Set chunking parameters
198  hid_t chunking_properties;
199  if (use_chunking)
200  {
201  // Set chunk size and limit to 1kB min/1MB max
202  hsize_t chunk_size = dimsf[0] / 2;
203  if (chunk_size > 1048576)
204  chunk_size = 1048576;
205  if (chunk_size < 1024)
206  chunk_size = 1024;
207 
208  hsize_t chunk_dims[2] = {chunk_size, dimsf[1]};
209  chunking_properties = H5Pcreate(H5P_DATASET_CREATE);
210  H5Pset_chunk(chunking_properties, rank, chunk_dims);
211  }
212  else
213  chunking_properties = H5P_DEFAULT;
214 
215  // Check that group exists and recursively create if required
216  const std::string group_name(dataset_path, 0, dataset_path.rfind('/'));
217  add_group(file_handle, group_name);
218 
219  // Create global dataset (using dataset_path)
220  const hid_t dset_id
221  = H5Dcreate2(file_handle, dataset_path.c_str(), h5type, filespace0,
222  H5P_DEFAULT, chunking_properties, H5P_DEFAULT);
223  assert(dset_id != HDF5_FAIL);
224 
225  // Close global data space
226  status = H5Sclose(filespace0);
227  assert(status != HDF5_FAIL);
228 
229  // Create a local data space
230  const hid_t memspace = H5Screate_simple(rank, count.data(), nullptr);
231  assert(memspace != HDF5_FAIL);
232 
233  // Create a file dataspace within the global space - a hyperslab
234  const hid_t filespace1 = H5Dget_space(dset_id);
235  status = H5Sselect_hyperslab(filespace1, H5S_SELECT_SET, offset.data(),
236  nullptr, count.data(), nullptr);
237  assert(status != HDF5_FAIL);
238 
239  // Set parallel access
240  const hid_t plist_id = H5Pcreate(H5P_DATASET_XFER);
241  if (use_mpi_io)
242  {
243 #ifdef H5_HAVE_PARALLEL
244  status = H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
245  assert(status != HDF5_FAIL);
246 #else
247  throw std::runtime_error("HDF5 library has not been configured with MPI");
248 #endif
249  }
250 
251  // Write local dataset into selected hyperslab
252  status = H5Dwrite(dset_id, h5type, memspace, filespace1, plist_id, data);
253  assert(status != HDF5_FAIL);
254 
255  if (use_chunking)
256  {
257  // Close chunking properties
258  status = H5Pclose(chunking_properties);
259  assert(status != HDF5_FAIL);
260  }
261 
262  // Close dataset collectively
263  status = H5Dclose(dset_id);
264  assert(status != HDF5_FAIL);
265 
266  // Close hyperslab
267  status = H5Sclose(filespace1);
268  assert(status != HDF5_FAIL);
269 
270  // Close local dataset
271  status = H5Sclose(memspace);
272  assert(status != HDF5_FAIL);
273 
274  // Release file-access template
275  status = H5Pclose(plist_id);
276  assert(status != HDF5_FAIL);
277 }
278 //---------------------------------------------------------------------------
279 template <typename T>
280 inline std::vector<T>
281 HDF5Interface::read_dataset(const hid_t file_handle,
282  const std::string& dataset_path,
283  const std::array<std::int64_t, 2>& range)
284 {
285  // Open the dataset
286  const hid_t dset_id
287  = H5Dopen2(file_handle, dataset_path.c_str(), H5P_DEFAULT);
288  assert(dset_id != HDF5_FAIL);
289 
290  // Open dataspace
291  const hid_t dataspace = H5Dget_space(dset_id);
292  assert(dataspace != HDF5_FAIL);
293 
294  // Get rank of data set
295  const int rank = H5Sget_simple_extent_ndims(dataspace);
296  assert(rank >= 0);
297 
298  if (rank > 2)
299  LOG(WARNING) << "HDF5Interface::read_dataset untested for rank > 2.";
300 
301  // Allocate data for shape
302  std::vector<hsize_t> shape(rank);
303 
304  // Get size in each dimension
305  const int ndims = H5Sget_simple_extent_dims(dataspace, shape.data(), nullptr);
306  assert(ndims == rank);
307 
308  // Hyperslab selection
309  std::vector<hsize_t> offset(rank, 0);
310  std::vector<hsize_t> count = shape;
311  if (range[0] != -1 and range[1] != -1)
312  {
313  offset[0] = range[0];
314  count[0] = range[1] - range[0];
315  }
316  else
317  offset[0] = 0;
318 
319  // Select a block in the dataset beginning at offset[], with
320  // size=count[]
321  herr_t status = H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset.data(),
322  nullptr, count.data(), nullptr);
323  assert(status != HDF5_FAIL);
324 
325  // Create a memory dataspace
326  const hid_t memspace = H5Screate_simple(rank, count.data(), nullptr);
327  assert(memspace != HDF5_FAIL);
328 
329  // Create local data to read into
330  std::size_t data_size = 1;
331  for (std::size_t i = 0; i < count.size(); ++i)
332  data_size *= count[i];
333  std::vector<T> data(data_size);
334 
335  // Read data on each process
336  const hid_t h5type = hdf5_type<T>();
337  status
338  = H5Dread(dset_id, h5type, memspace, dataspace, H5P_DEFAULT, data.data());
339  assert(status != HDF5_FAIL);
340 
341  // Close dataspace
342  status = H5Sclose(dataspace);
343  assert(status != HDF5_FAIL);
344 
345  // Close memspace
346  status = H5Sclose(memspace);
347  assert(status != HDF5_FAIL);
348 
349  // Close dataset
350  status = H5Dclose(dset_id);
351  assert(status != HDF5_FAIL);
352 
353  return data;
354 }
355 //---------------------------------------------------------------------------
357 } // namespace io
358 } // namespace dolfinx
dolfinx::io::HDF5Interface::close_file
static void close_file(const hid_t handle)
Close HDF5 file.
Definition: HDF5Interface.cpp:117
dolfinx::io::HDF5Interface
This class provides an interface to some HDF5 functionality.
Definition: HDF5Interface.h:27
dolfinx::io::HDF5Interface::open_file
static hid_t open_file(MPI_Comm mpi_comm, const std::string &filename, const std::string &mode, const bool use_mpi_io)
Open HDF5 and return file descriptor.
Definition: HDF5Interface.cpp:59
dolfinx::io::HDF5Interface::set_mpi_atomicity
static void set_mpi_atomicity(const hid_t handle, const bool atomic)
Set MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-SetMpiAtomicity and ...
Definition: HDF5Interface.cpp:229
dolfinx::io::HDF5Interface::has_dataset
static bool has_dataset(const hid_t handle, const std::string &dataset_path)
Check for existence of dataset in HDF5 file.
Definition: HDF5Interface.cpp:146
dolfinx::io::HDF5Interface::read_dataset
static std::vector< T > read_dataset(const hid_t handle, const std::string &dataset_path, const std::array< std::int64_t, 2 > &range)
Read data from a HDF5 dataset "dataset_path" as defined by range blocks on each process.
dolfinx::io::HDF5Interface::get_dataset_shape
static std::vector< std::int64_t > get_dataset_shape(const hid_t handle, const std::string &dataset_path)
Get dataset shape (size of each dimension)
Definition: HDF5Interface.cpp:196
dolfinx::io::HDF5Interface::flush_file
static void flush_file(const hid_t handle)
Flush data to file to improve data integrity after interruption.
Definition: HDF5Interface.cpp:123
dolfinx::io::HDF5Interface::write_dataset
static void write_dataset(const hid_t handle, const std::string &dataset_path, const T *data, const std::array< std::int64_t, 2 > &range, const std::vector< std::int64_t > &global_size, bool use_mpi_io, bool use_chunking)
Write data to existing HDF file as defined by range blocks on each process.
dolfinx::io::HDF5Interface::get_mpi_atomicity
static bool get_mpi_atomicity(const hid_t handle)
Get MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-GetMpiAtomicity and ...
Definition: HDF5Interface.cpp:237
dolfinx::io::HDF5Interface::get_filename
static std::string get_filename(hid_t handle)
Get filename.
Definition: HDF5Interface.cpp:129