Halide  14.0.0
Halide compiler and libraries
HalideBuffer.h
Go to the documentation of this file.
1 /** \file
2  * Defines a Buffer type that wraps from halide_buffer_t and adds
3  * functionality, and methods for more conveniently iterating over the
4  * samples in a halide_buffer_t outside of Halide code. */
5 
6 #ifndef HALIDE_RUNTIME_BUFFER_H
7 #define HALIDE_RUNTIME_BUFFER_H
8 
9 #include <algorithm>
10 #include <atomic>
11 #include <cassert>
12 #include <cstdint>
13 #include <cstring>
14 #include <limits>
15 #include <memory>
16 #include <vector>
17 
18 #if defined(__has_feature)
19 #if __has_feature(memory_sanitizer)
20 #include <sanitizer/msan_interface.h>
21 #endif
22 #endif
23 
24 #include "HalideRuntime.h"
25 
26 #ifdef _MSC_VER
27 #include <malloc.h>
28 #define HALIDE_ALLOCA _alloca
29 #else
30 #define HALIDE_ALLOCA __builtin_alloca
31 #endif
32 
33 // gcc 5.1 has a false positive warning on this code
34 #if __GNUC__ == 5 && __GNUC_MINOR__ == 1
35 #pragma GCC diagnostic ignored "-Warray-bounds"
36 #endif
37 
38 #ifndef HALIDE_RUNTIME_BUFFER_CHECK_INDICES
39 #define HALIDE_RUNTIME_BUFFER_CHECK_INDICES 0
40 #endif
41 
42 namespace Halide {
43 namespace Runtime {
44 
45 // Forward-declare our Buffer class
46 template<typename T, int Dims, int InClassDimStorage>
47 class Buffer;
48 
49 // A helper to check if a parameter pack is entirely implicitly
50 // int-convertible to use with std::enable_if
51 template<typename... Args>
52 struct AllInts : std::false_type {};
53 
54 template<>
55 struct AllInts<> : std::true_type {};
56 
57 template<typename T, typename... Args>
58 struct AllInts<T, Args...> {
59  static const bool value = std::is_convertible<T, int>::value && AllInts<Args...>::value;
60 };
61 
62 // Floats and doubles are technically implicitly int-convertible, but
63 // doing so produces a warning we treat as an error, so just disallow
64 // it here.
65 template<typename... Args>
66 struct AllInts<float, Args...> : std::false_type {};
67 
68 template<typename... Args>
69 struct AllInts<double, Args...> : std::false_type {};
70 
71 // A helper to detect if there are any zeros in a container
72 namespace Internal {
73 template<typename Container>
74 bool any_zero(const Container &c) {
75  for (int i : c) {
76  if (i == 0) {
77  return true;
78  }
79  }
80  return false;
81 }
82 } // namespace Internal
83 
84 /** A struct acting as a header for allocations owned by the Buffer
85  * class itself. */
87  void (*deallocate_fn)(void *);
88  std::atomic<int> ref_count;
89 
90  // Note that ref_count always starts at 1
93  }
94 };
95 
96 /** This indicates how to deallocate the device for a Halide::Runtime::Buffer. */
97 enum struct BufferDeviceOwnership : int {
98  Allocated, ///> halide_device_free will be called when device ref count goes to zero
99  WrappedNative, ///> halide_device_detach_native will be called when device ref count goes to zero
100  Unmanaged, ///> No free routine will be called when device ref count goes to zero
101  AllocatedDeviceAndHost, ///> Call device_and_host_free when DevRefCount goes to zero.
102  Cropped, ///> Call halide_device_release_crop when DevRefCount goes to zero.
103 };
104 
105 /** A similar struct for managing device allocations. */
107  // This is only ever constructed when there's something to manage,
108  // so start at one.
109  std::atomic<int> count{1};
111 };
112 
113 constexpr int AnyDims = -1;
114 
115 /** A templated Buffer class that wraps halide_buffer_t and adds
116  * functionality. When using Halide from C++, this is the preferred
117  * way to create input and output buffers. The overhead of using this
118  * class relative to a naked halide_buffer_t is minimal - it uses another
119  * ~16 bytes on the stack, and does no dynamic allocations when using
120  * it to represent existing memory of a known maximum dimensionality.
121  *
122  * The template parameter T is the element type. For buffers where the
123  * element type is unknown, or may vary, use void or const void.
124  *
125  * The template parameter Dims is the number of dimensions. For buffers where
126  * the dimensionality type is unknown at, or may vary, use AnyDims.
127  *
128  * InClassDimStorage is the maximum number of dimensions that can be represented
129  * using space inside the class itself. Set it to the maximum dimensionality
130  * you expect this buffer to be. If the actual dimensionality exceeds
131  * this, heap storage is allocated to track the shape of the buffer.
132  * InClassDimStorage defaults to 4, which should cover nearly all usage.
133  *
134  * The class optionally allocates and owns memory for the image using
135  * a shared pointer allocated with the provided allocator. If they are
136  * null, malloc and free are used. Any device-side allocation is
137  * considered as owned if and only if the host-side allocation is
138  * owned. */
139 template<typename T = void,
140  int Dims = AnyDims,
141  int InClassDimStorage = (Dims == AnyDims ? 4 : std::max(Dims, 1))>
142 class Buffer {
143  /** The underlying halide_buffer_t */
144  halide_buffer_t buf = {};
145 
146  /** Some in-class storage for shape of the dimensions. */
147  halide_dimension_t shape[InClassDimStorage];
148 
149  /** The allocation owned by this Buffer. NULL if the Buffer does not
150  * own the memory. */
151  AllocationHeader *alloc = nullptr;
152 
153  /** A reference count for the device allocation owned by this
154  * buffer. */
155  mutable DeviceRefCount *dev_ref_count = nullptr;
156 
157  /** True if T is of type void or const void */
158  static const bool T_is_void = std::is_same<typename std::remove_const<T>::type, void>::value;
159 
160  /** A type function that adds a const qualifier if T is a const type. */
161  template<typename T2>
162  using add_const_if_T_is_const = typename std::conditional<std::is_const<T>::value, const T2, T2>::type;
163 
164  /** T unless T is (const) void, in which case (const)
165  * uint8_t. Useful for providing return types for operator() */
166  using not_void_T = typename std::conditional<T_is_void,
167  add_const_if_T_is_const<uint8_t>,
168  T>::type;
169 
170  /** T with constness removed. Useful for return type of copy(). */
171  using not_const_T = typename std::remove_const<T>::type;
172 
173  /** The type the elements are stored as. Equal to not_void_T
174  * unless T is a pointer, in which case uint64_t. Halide stores
175  * all pointer types as uint64s internally, even on 32-bit
176  * systems. */
177  using storage_T = typename std::conditional<std::is_pointer<T>::value, uint64_t, not_void_T>::type;
178 
179 public:
180  /** True if the Halide type is not void (or const void). */
181  static constexpr bool has_static_halide_type = !T_is_void;
182 
183  /** Get the Halide type of T. Callers should not use the result if
184  * has_static_halide_type is false. */
185  static constexpr halide_type_t static_halide_type() {
186  return halide_type_of<typename std::remove_cv<not_void_T>::type>();
187  }
188 
189  /** Does this Buffer own the host memory it refers to? */
190  bool owns_host_memory() const {
191  return alloc != nullptr;
192  }
193 
194  static constexpr bool has_static_dimensions = (Dims != AnyDims);
195 
196  /** Callers should not use the result if
197  * has_static_dimensions is false. */
198  static constexpr int static_dimensions() {
199  return Dims;
200  }
201 
202  static_assert(!has_static_dimensions || static_dimensions() >= 0);
203 
204 private:
205  /** Increment the reference count of any owned allocation */
206  void incref() const {
207  if (owns_host_memory()) {
208  alloc->ref_count++;
209  }
210  if (buf.device) {
211  if (!dev_ref_count) {
212  // I seem to have a non-zero dev field but no
213  // reference count for it. I must have been given a
214  // device allocation by a Halide pipeline, and have
215  // never been copied from since. Take sole ownership
216  // of it.
217  dev_ref_count = new DeviceRefCount;
218  }
219  dev_ref_count->count++;
220  }
221  }
222 
223  // Note that this is called "cropped" but can also encompass a slice/embed
224  // operation as well.
225  struct DevRefCountCropped : DeviceRefCount {
226  Buffer<T, Dims, InClassDimStorage> cropped_from;
227  DevRefCountCropped(const Buffer<T, Dims, InClassDimStorage> &cropped_from)
228  : cropped_from(cropped_from) {
229  ownership = BufferDeviceOwnership::Cropped;
230  }
231  };
232 
233  /** Setup the device ref count for a buffer to indicate it is a crop (or slice, embed, etc) of cropped_from */
234  void crop_from(const Buffer<T, Dims, InClassDimStorage> &cropped_from) {
235  assert(dev_ref_count == nullptr);
236  dev_ref_count = new DevRefCountCropped(cropped_from);
237  }
238 
239  /** Decrement the reference count of any owned allocation and free host
240  * and device memory if it hits zero. Sets alloc to nullptr. */
241  void decref(bool device_only = false) {
242  if (owns_host_memory() && !device_only) {
243  int new_count = --(alloc->ref_count);
244  if (new_count == 0) {
245  void (*fn)(void *) = alloc->deallocate_fn;
246  alloc->~AllocationHeader();
247  fn(alloc);
248  }
249  buf.host = nullptr;
250  alloc = nullptr;
251  set_host_dirty(false);
252  }
253  int new_count = 0;
254  if (dev_ref_count) {
255  new_count = --(dev_ref_count->count);
256  }
257  if (new_count == 0) {
258  if (buf.device) {
259  assert(!(alloc && device_dirty()) &&
260  "Implicitly freeing a dirty device allocation while a host allocation still lives. "
261  "Call device_free explicitly if you want to drop dirty device-side data. "
262  "Call copy_to_host explicitly if you want the data copied to the host allocation "
263  "before the device allocation is freed.");
264  int result = 0;
265  if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative) {
266  result = buf.device_interface->detach_native(nullptr, &buf);
267  } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost) {
268  result = buf.device_interface->device_and_host_free(nullptr, &buf);
269  } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
270  result = buf.device_interface->device_release_crop(nullptr, &buf);
271  } else if (dev_ref_count == nullptr || dev_ref_count->ownership == BufferDeviceOwnership::Allocated) {
272  result = buf.device_interface->device_free(nullptr, &buf);
273  }
274  // No reasonable way to return the error, but we can at least assert-fail in debug builds.
275  assert((result == 0) && "device_interface call returned a nonzero result in Buffer::decref()");
276  (void)result;
277  }
278  if (dev_ref_count) {
279  if (dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
280  delete (DevRefCountCropped *)dev_ref_count;
281  } else {
282  delete dev_ref_count;
283  }
284  }
285  }
286  dev_ref_count = nullptr;
287  buf.device = 0;
288  buf.device_interface = nullptr;
289  }
290 
291  void free_shape_storage() {
292  if (buf.dim != shape) {
293  delete[] buf.dim;
294  buf.dim = nullptr;
295  }
296  }
297 
298  template<int DimsSpecified>
299  void make_static_shape_storage() {
300  static_assert(Dims == AnyDims || Dims == DimsSpecified,
301  "Number of arguments to Buffer() does not match static dimensionality");
302  buf.dimensions = DimsSpecified;
303  if constexpr (Dims == AnyDims) {
304  if constexpr (DimsSpecified <= InClassDimStorage) {
305  buf.dim = shape;
306  } else {
307  static_assert(DimsSpecified >= 1);
308  buf.dim = new halide_dimension_t[DimsSpecified];
309  }
310  } else {
311  static_assert(InClassDimStorage >= Dims);
312  buf.dim = shape;
313  }
314  }
315 
316  void make_shape_storage(const int dimensions) {
317  if (Dims != AnyDims && Dims != dimensions) {
318  assert(false && "Number of arguments to Buffer() does not match static dimensionality");
319  }
320  // This should usually be inlined, so if dimensions is statically known,
321  // we can skip the call to new
322  buf.dimensions = dimensions;
323  buf.dim = (dimensions <= InClassDimStorage) ? shape : new halide_dimension_t[dimensions];
324  }
325 
326  void copy_shape_from(const halide_buffer_t &other) {
327  // All callers of this ensure that buf.dimensions == other.dimensions.
328  make_shape_storage(other.dimensions);
329  std::copy(other.dim, other.dim + other.dimensions, buf.dim);
330  }
331 
332  template<typename T2, int D2, int S2>
333  void move_shape_from(Buffer<T2, D2, S2> &&other) {
334  if (other.shape == other.buf.dim) {
335  copy_shape_from(other.buf);
336  } else {
337  buf.dim = other.buf.dim;
338  other.buf.dim = nullptr;
339  }
340  }
341 
342  /** Initialize the shape from a halide_buffer_t. */
343  void initialize_from_buffer(const halide_buffer_t &b,
344  BufferDeviceOwnership ownership) {
345  memcpy(&buf, &b, sizeof(halide_buffer_t));
346  copy_shape_from(b);
347  if (b.device) {
348  dev_ref_count = new DeviceRefCount;
349  dev_ref_count->ownership = ownership;
350  }
351  }
352 
353  /** Initialize the shape from an array of ints */
354  void initialize_shape(const int *sizes) {
355  for (int i = 0; i < buf.dimensions; i++) {
356  buf.dim[i].min = 0;
357  buf.dim[i].extent = sizes[i];
358  if (i == 0) {
359  buf.dim[i].stride = 1;
360  } else {
361  buf.dim[i].stride = buf.dim[i - 1].stride * buf.dim[i - 1].extent;
362  }
363  }
364  }
365 
366  /** Initialize the shape from a vector of extents */
367  void initialize_shape(const std::vector<int> &sizes) {
368  assert(buf.dimensions == (int)sizes.size());
369  initialize_shape(sizes.data());
370  }
371 
372  /** Initialize the shape from the static shape of an array */
373  template<typename Array, size_t N>
374  void initialize_shape_from_array_shape(int next, Array (&vals)[N]) {
375  buf.dim[next].min = 0;
376  buf.dim[next].extent = (int)N;
377  if (next == 0) {
378  buf.dim[next].stride = 1;
379  } else {
380  initialize_shape_from_array_shape(next - 1, vals[0]);
381  buf.dim[next].stride = buf.dim[next - 1].stride * buf.dim[next - 1].extent;
382  }
383  }
384 
385  /** Base case for the template recursion above. */
386  template<typename T2>
387  void initialize_shape_from_array_shape(int, const T2 &) {
388  }
389 
390  /** Get the dimensionality of a multi-dimensional C array */
391  template<typename Array, size_t N>
392  static int dimensionality_of_array(Array (&vals)[N]) {
393  return dimensionality_of_array(vals[0]) + 1;
394  }
395 
396  template<typename T2>
397  static int dimensionality_of_array(const T2 &) {
398  return 0;
399  }
400 
401  /** Get the underlying halide_type_t of an array's element type. */
402  template<typename Array, size_t N>
403  static halide_type_t scalar_type_of_array(Array (&vals)[N]) {
404  return scalar_type_of_array(vals[0]);
405  }
406 
407  template<typename T2>
408  static halide_type_t scalar_type_of_array(const T2 &) {
409  return halide_type_of<typename std::remove_cv<T2>::type>();
410  }
411 
412  /** Crop a single dimension without handling device allocation. */
413  void crop_host(int d, int min, int extent) {
414  assert(dim(d).min() <= min);
415  assert(dim(d).max() >= min + extent - 1);
416  ptrdiff_t shift = min - dim(d).min();
417  if (buf.host != nullptr) {
418  buf.host += (shift * dim(d).stride()) * type().bytes();
419  }
420  buf.dim[d].min = min;
421  buf.dim[d].extent = extent;
422  }
423 
424  /** Crop as many dimensions as are in rect, without handling device allocation. */
425  void crop_host(const std::vector<std::pair<int, int>> &rect) {
426  assert(rect.size() <= static_cast<decltype(rect.size())>(std::numeric_limits<int>::max()));
427  int limit = (int)rect.size();
428  assert(limit <= dimensions());
429  for (int i = 0; i < limit; i++) {
430  crop_host(i, rect[i].first, rect[i].second);
431  }
432  }
433 
434  void complete_device_crop(Buffer<T, Dims, InClassDimStorage> &result_host_cropped) const {
435  assert(buf.device_interface != nullptr);
436  if (buf.device_interface->device_crop(nullptr, &this->buf, &result_host_cropped.buf) == 0) {
437  const Buffer<T, Dims, InClassDimStorage> *cropped_from = this;
438  // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
439  // is it possible to get to this point without incref having run at least once since
440  // the device field was set? (I.e. in the internal logic of crop. incref might have been
441  // called.)
442  if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
443  cropped_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
444  }
445  result_host_cropped.crop_from(*cropped_from);
446  }
447  }
448 
449  /** slice a single dimension without handling device allocation. */
450  void slice_host(int d, int pos) {
451  static_assert(Dims == AnyDims);
452  assert(dimensions() > 0);
453  assert(d >= 0 && d < dimensions());
454  assert(pos >= dim(d).min() && pos <= dim(d).max());
455  buf.dimensions--;
456  ptrdiff_t shift = pos - buf.dim[d].min;
457  if (buf.host != nullptr) {
458  buf.host += (shift * buf.dim[d].stride) * type().bytes();
459  }
460  for (int i = d; i < buf.dimensions; i++) {
461  buf.dim[i] = buf.dim[i + 1];
462  }
463  buf.dim[buf.dimensions] = {0, 0, 0};
464  }
465 
466  void complete_device_slice(Buffer<T, AnyDims, InClassDimStorage> &result_host_sliced, int d, int pos) const {
467  assert(buf.device_interface != nullptr);
468  if (buf.device_interface->device_slice(nullptr, &this->buf, d, pos, &result_host_sliced.buf) == 0) {
469  const Buffer<T, Dims, InClassDimStorage> *sliced_from = this;
470  // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
471  // is it possible to get to this point without incref having run at least once since
472  // the device field was set? (I.e. in the internal logic of slice. incref might have been
473  // called.)
474  if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
475  sliced_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
476  }
477  // crop_from() is correct here, despite the fact that we are slicing.
478  result_host_sliced.crop_from(*sliced_from);
479  }
480  }
481 
482 public:
483  typedef T ElemType;
484 
485  /** Read-only access to the shape */
486  class Dimension {
487  const halide_dimension_t &d;
488 
489  public:
490  /** The lowest coordinate in this dimension */
491  HALIDE_ALWAYS_INLINE int min() const {
492  return d.min;
493  }
494 
495  /** The number of elements in memory you have to step over to
496  * increment this coordinate by one. */
498  return d.stride;
499  }
500 
501  /** The extent of the image along this dimension */
503  return d.extent;
504  }
505 
506  /** The highest coordinate in this dimension */
507  HALIDE_ALWAYS_INLINE int max() const {
508  return min() + extent() - 1;
509  }
510 
511  /** An iterator class, so that you can iterate over
512  * coordinates in a dimensions using a range-based for loop. */
513  struct iterator {
514  int val;
515  int operator*() const {
516  return val;
517  }
518  bool operator!=(const iterator &other) const {
519  return val != other.val;
520  }
522  val++;
523  return *this;
524  }
525  };
526 
527  /** An iterator that points to the min coordinate */
529  return {min()};
530  }
531 
532  /** An iterator that points to one past the max coordinate */
534  return {min() + extent()};
535  }
536 
538  : d(dim) {
539  }
540  };
541 
542  /** Access the shape of the buffer */
544  assert(i >= 0 && i < this->dimensions());
545  return Dimension(buf.dim[i]);
546  }
547 
548  /** Access to the mins, strides, extents. Will be deprecated. Do not use. */
549  // @{
550  int min(int i) const {
551  return dim(i).min();
552  }
553  int extent(int i) const {
554  return dim(i).extent();
555  }
556  int stride(int i) const {
557  return dim(i).stride();
558  }
559  // @}
560 
561  /** The total number of elements this buffer represents. Equal to
562  * the product of the extents */
563  size_t number_of_elements() const {
564  return buf.number_of_elements();
565  }
566 
567  /** Get the dimensionality of the buffer. */
568  int dimensions() const {
569  if constexpr (has_static_dimensions) {
570  return Dims;
571  } else {
572  return buf.dimensions;
573  }
574  }
575 
576  /** Get the type of the elements. */
577  halide_type_t type() const {
578  return buf.type;
579  }
580 
581  /** A pointer to the element with the lowest address. If all
582  * strides are positive, equal to the host pointer. */
583  T *begin() const {
584  assert(buf.host != nullptr); // Cannot call begin() on an unallocated Buffer.
585  return (T *)buf.begin();
586  }
587 
588  /** A pointer to one beyond the element with the highest address. */
589  T *end() const {
590  assert(buf.host != nullptr); // Cannot call end() on an unallocated Buffer.
591  return (T *)buf.end();
592  }
593 
594  /** The total number of bytes spanned by the data in memory. */
595  size_t size_in_bytes() const {
596  return buf.size_in_bytes();
597  }
598 
599  /** Reset the Buffer to be equivalent to a default-constructed Buffer
600  * of the same static type (if any); Buffer<void> will have its runtime
601  * type reset to uint8. */
602  void reset() {
603  *this = Buffer();
604  }
605 
607  : shape() {
608  buf.type = static_halide_type();
609  // If Dims are statically known, must create storage that many.
610  // otherwise, make a zero-dimensional buffer.
611  constexpr int buf_dimensions = (Dims == AnyDims) ? 0 : Dims;
612  make_static_shape_storage<buf_dimensions>();
613  }
614 
615  /** Make a Buffer from a halide_buffer_t */
616  explicit Buffer(const halide_buffer_t &buf,
618  assert(T_is_void || buf.type == static_halide_type());
619  initialize_from_buffer(buf, ownership);
620  }
621 
622  /** Give Buffers access to the members of Buffers of different dimensionalities and types. */
623  template<typename T2, int D2, int S2>
624  friend class Buffer;
625 
626 private:
627  template<typename T2, int D2, int S2>
628  static void static_assert_can_convert_from() {
629  static_assert((!std::is_const<T2>::value || std::is_const<T>::value),
630  "Can't convert from a Buffer<const T> to a Buffer<T>");
631  static_assert(std::is_same<typename std::remove_const<T>::type,
632  typename std::remove_const<T2>::type>::value ||
633  T_is_void || Buffer<T2, D2, S2>::T_is_void,
634  "type mismatch constructing Buffer");
635  static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2,
636  "Can't convert from a Buffer with static dimensionality to a Buffer with different static dimensionality");
637  }
638 
639 public:
640  /** Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
641  * If this can be determined at compile time, fail with a static assert; otherwise
642  * return a boolean based on runtime typing. */
643  template<typename T2, int D2, int S2>
644  static bool can_convert_from(const Buffer<T2, D2, S2> &other) {
645  static_assert_can_convert_from<T2, D2, S2>();
646  if (Buffer<T2, D2, S2>::T_is_void && !T_is_void) {
647  if (other.type() != static_halide_type()) {
648  return false;
649  }
650  }
651  if (Dims != AnyDims) {
652  if (other.dimensions() != Dims) {
653  return false;
654  }
655  }
656  return true;
657  }
658 
659  /** Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage>
660  * cannot be constructed from some other Buffer type. */
661  template<typename T2, int D2, int S2>
662  static void assert_can_convert_from(const Buffer<T2, D2, S2> &other) {
663  // Explicitly call static_assert_can_convert_from() here so
664  // that we always get compile-time checking, even if compiling with
665  // assertions disabled.
666  static_assert_can_convert_from<T2, D2, S2>();
667  assert(can_convert_from(other));
668  }
669 
670  /** Copy constructor. Does not copy underlying data. */
672  : buf(other.buf),
673  alloc(other.alloc) {
674  other.incref();
675  dev_ref_count = other.dev_ref_count;
676  copy_shape_from(other.buf);
677  }
678 
679  /** Construct a Buffer from a Buffer of different dimensionality
680  * and type. Asserts that the type and dimensionality matches (at runtime,
681  * if one of the types is void). Note that this constructor is
682  * implicit. This, for example, lets you pass things like
683  * Buffer<T> or Buffer<const void> to functions expected
684  * Buffer<const T>. */
685  template<typename T2, int D2, int S2>
687  : buf(other.buf),
688  alloc(other.alloc) {
689  assert_can_convert_from(other);
690  other.incref();
691  dev_ref_count = other.dev_ref_count;
692  copy_shape_from(other.buf);
693  }
694 
695  /** Move constructor */
697  : buf(other.buf),
698  alloc(other.alloc),
699  dev_ref_count(other.dev_ref_count) {
700  other.dev_ref_count = nullptr;
701  other.alloc = nullptr;
702  move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
703  other.buf = halide_buffer_t();
704  }
705 
706  /** Move-construct a Buffer from a Buffer of different
707  * dimensionality and type. Asserts that the types match (at
708  * runtime if one of the types is void). */
709  template<typename T2, int D2, int S2>
711  : buf(other.buf),
712  alloc(other.alloc),
713  dev_ref_count(other.dev_ref_count) {
714  assert_can_convert_from(other);
715  other.dev_ref_count = nullptr;
716  other.alloc = nullptr;
717  move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
718  other.buf = halide_buffer_t();
719  }
720 
721  /** Assign from another Buffer of possibly-different
722  * dimensionality and type. Asserts that the types match (at
723  * runtime if one of the types is void). */
724  template<typename T2, int D2, int S2>
726  if ((const void *)this == (const void *)&other) {
727  return *this;
728  }
729  assert_can_convert_from(other);
730  other.incref();
731  decref();
732  dev_ref_count = other.dev_ref_count;
733  alloc = other.alloc;
734  free_shape_storage();
735  buf = other.buf;
736  copy_shape_from(other.buf);
737  return *this;
738  }
739 
740  /** Standard assignment operator */
742  // The cast to void* here is just to satisfy clang-tidy
743  if ((const void *)this == (const void *)&other) {
744  return *this;
745  }
746  other.incref();
747  decref();
748  dev_ref_count = other.dev_ref_count;
749  alloc = other.alloc;
750  free_shape_storage();
751  buf = other.buf;
752  copy_shape_from(other.buf);
753  return *this;
754  }
755 
756  /** Move from another Buffer of possibly-different
757  * dimensionality and type. Asserts that the types match (at
758  * runtime if one of the types is void). */
759  template<typename T2, int D2, int S2>
761  assert_can_convert_from(other);
762  decref();
763  alloc = other.alloc;
764  other.alloc = nullptr;
765  dev_ref_count = other.dev_ref_count;
766  other.dev_ref_count = nullptr;
767  free_shape_storage();
768  buf = other.buf;
769  move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
770  other.buf = halide_buffer_t();
771  return *this;
772  }
773 
774  /** Standard move-assignment operator */
776  decref();
777  alloc = other.alloc;
778  other.alloc = nullptr;
779  dev_ref_count = other.dev_ref_count;
780  other.dev_ref_count = nullptr;
781  free_shape_storage();
782  buf = other.buf;
783  move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
784  other.buf = halide_buffer_t();
785  return *this;
786  }
787 
788  /** Check the product of the extents fits in memory. */
789  void check_overflow() {
790  size_t size = type().bytes();
791  for (int i = 0; i < dimensions(); i++) {
792  size *= dim(i).extent();
793  }
794  // We allow 2^31 or 2^63 bytes, so drop the top bit.
795  size = (size << 1) >> 1;
796  for (int i = 0; i < dimensions(); i++) {
797  size /= dim(i).extent();
798  }
799  assert(size == (size_t)type().bytes() && "Error: Overflow computing total size of buffer.");
800  }
801 
802  /** Allocate memory for this Buffer. Drops the reference to any
803  * owned memory. */
804  void allocate(void *(*allocate_fn)(size_t) = nullptr,
805  void (*deallocate_fn)(void *) = nullptr) {
806  if (!allocate_fn) {
807  allocate_fn = malloc;
808  }
809  if (!deallocate_fn) {
810  deallocate_fn = free;
811  }
812 
813  // Drop any existing allocation
814  deallocate();
815 
816  // Conservatively align images to 128 bytes. This is enough
817  // alignment for all the platforms we might use.
818  size_t size = size_in_bytes();
819  const size_t alignment = 128;
820  size = (size + alignment - 1) & ~(alignment - 1);
821  void *alloc_storage = allocate_fn(size + sizeof(AllocationHeader) + alignment - 1);
822  alloc = new (alloc_storage) AllocationHeader(deallocate_fn);
823  uint8_t *unaligned_ptr = ((uint8_t *)alloc) + sizeof(AllocationHeader);
824  buf.host = (uint8_t *)((uintptr_t)(unaligned_ptr + alignment - 1) & ~(alignment - 1));
825  }
826 
827  /** Drop reference to any owned host or device memory, possibly
828  * freeing it, if this buffer held the last reference to
829  * it. Retains the shape of the buffer. Does nothing if this
830  * buffer did not allocate its own memory. */
831  void deallocate() {
832  decref();
833  }
834 
835  /** Drop reference to any owned device memory, possibly freeing it
836  * if this buffer held the last reference to it. Asserts that
837  * device_dirty is false. */
839  decref(true);
840  }
841 
842  /** Allocate a new image of the given size with a runtime
843  * type. Only used when you do know what size you want but you
844  * don't know statically what type the elements are. Pass zeroes
845  * to make a buffer suitable for bounds query calls. */
846  template<typename... Args,
847  typename = typename std::enable_if<AllInts<Args...>::value>::type>
848  Buffer(halide_type_t t, int first, Args... rest) {
849  if (!T_is_void) {
850  assert(static_halide_type() == t);
851  }
852  int extents[] = {first, (int)rest...};
853  buf.type = t;
854  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
855  make_static_shape_storage<buf_dimensions>();
856  initialize_shape(extents);
857  if (!Internal::any_zero(extents)) {
858  check_overflow();
859  allocate();
860  }
861  }
862 
863  /** Allocate a new image of the given size. Pass zeroes to make a
864  * buffer suitable for bounds query calls. */
865  // @{
866 
867  // The overload with one argument is 'explicit', so that
868  // (say) int is not implicitly convertible to Buffer<int>
869  explicit Buffer(int first) {
870  static_assert(!T_is_void,
871  "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
872  int extents[] = {first};
873  buf.type = static_halide_type();
874  constexpr int buf_dimensions = 1;
875  make_static_shape_storage<buf_dimensions>();
876  initialize_shape(extents);
877  if (first != 0) {
878  check_overflow();
879  allocate();
880  }
881  }
882 
883  template<typename... Args,
884  typename = typename std::enable_if<AllInts<Args...>::value>::type>
885  Buffer(int first, int second, Args... rest) {
886  static_assert(!T_is_void,
887  "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
888  int extents[] = {first, second, (int)rest...};
889  buf.type = static_halide_type();
890  constexpr int buf_dimensions = 2 + (int)(sizeof...(rest));
891  make_static_shape_storage<buf_dimensions>();
892  initialize_shape(extents);
893  if (!Internal::any_zero(extents)) {
894  check_overflow();
895  allocate();
896  }
897  }
898  // @}
899 
900  /** Allocate a new image of unknown type using a vector of ints as the size. */
901  Buffer(halide_type_t t, const std::vector<int> &sizes) {
902  if (!T_is_void) {
903  assert(static_halide_type() == t);
904  }
905  buf.type = t;
906  // make_shape_storage() will do a runtime check that dimensionality matches.
907  make_shape_storage((int)sizes.size());
908  initialize_shape(sizes);
909  if (!Internal::any_zero(sizes)) {
910  check_overflow();
911  allocate();
912  }
913  }
914 
915  /** Allocate a new image of known type using a vector of ints as the size. */
916  explicit Buffer(const std::vector<int> &sizes)
917  : Buffer(static_halide_type(), sizes) {
918  }
919 
920 private:
921  // Create a copy of the sizes vector, ordered as specified by order.
922  static std::vector<int> make_ordered_sizes(const std::vector<int> &sizes, const std::vector<int> &order) {
923  assert(order.size() == sizes.size());
924  std::vector<int> ordered_sizes(sizes.size());
925  for (size_t i = 0; i < sizes.size(); ++i) {
926  ordered_sizes[i] = sizes.at(order[i]);
927  }
928  return ordered_sizes;
929  }
930 
931 public:
932  /** Allocate a new image of unknown type using a vector of ints as the size and
933  * a vector of indices indicating the storage order for each dimension. The
934  * length of the sizes vector and the storage-order vector must match. For instance,
935  * to allocate an interleaved RGB buffer, you would pass {2, 0, 1} for storage_order. */
936  Buffer(halide_type_t t, const std::vector<int> &sizes, const std::vector<int> &storage_order)
937  : Buffer(t, make_ordered_sizes(sizes, storage_order)) {
938  transpose(storage_order);
939  }
940 
941  Buffer(const std::vector<int> &sizes, const std::vector<int> &storage_order)
942  : Buffer(static_halide_type(), sizes, storage_order) {
943  }
944 
945  /** Make an Buffer that refers to a statically sized array. Does not
946  * take ownership of the data, and does not set the host_dirty flag. */
947  template<typename Array, size_t N>
948  explicit Buffer(Array (&vals)[N]) {
949  const int buf_dimensions = dimensionality_of_array(vals);
950  buf.type = scalar_type_of_array(vals);
951  buf.host = (uint8_t *)vals;
952  make_shape_storage(buf_dimensions);
953  initialize_shape_from_array_shape(buf.dimensions - 1, vals);
954  }
955 
956  /** Initialize an Buffer of runtime type from a pointer and some
957  * sizes. Assumes dense row-major packing and a min coordinate of
958  * zero. Does not take ownership of the data and does not set the
959  * host_dirty flag. */
960  template<typename... Args,
961  typename = typename std::enable_if<AllInts<Args...>::value>::type>
962  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int first, Args &&...rest) {
963  if (!T_is_void) {
964  assert(static_halide_type() == t);
965  }
966  int extents[] = {first, (int)rest...};
967  buf.type = t;
968  buf.host = (uint8_t *)const_cast<void *>(data);
969  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
970  make_static_shape_storage<buf_dimensions>();
971  initialize_shape(extents);
972  }
973 
974  /** Initialize an Buffer from a pointer and some sizes. Assumes
975  * dense row-major packing and a min coordinate of zero. Does not
976  * take ownership of the data and does not set the host_dirty flag. */
977  template<typename... Args,
978  typename = typename std::enable_if<AllInts<Args...>::value>::type>
979  explicit Buffer(T *data, int first, Args &&...rest) {
980  int extents[] = {first, (int)rest...};
981  buf.type = static_halide_type();
982  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
983  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
984  make_static_shape_storage<buf_dimensions>();
985  initialize_shape(extents);
986  }
987 
988  /** Initialize an Buffer from a pointer and a vector of
989  * sizes. Assumes dense row-major packing and a min coordinate of
990  * zero. Does not take ownership of the data and does not set the
991  * host_dirty flag. */
992  explicit Buffer(T *data, const std::vector<int> &sizes) {
993  buf.type = static_halide_type();
994  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
995  make_shape_storage((int)sizes.size());
996  initialize_shape(sizes);
997  }
998 
999  /** Initialize an Buffer of runtime type from a pointer and a
1000  * vector of sizes. Assumes dense row-major packing and a min
1001  * coordinate of zero. Does not take ownership of the data and
1002  * does not set the host_dirty flag. */
1003  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, const std::vector<int> &sizes) {
1004  if (!T_is_void) {
1005  assert(static_halide_type() == t);
1006  }
1007  buf.type = t;
1008  buf.host = (uint8_t *)const_cast<void *>(data);
1009  make_shape_storage((int)sizes.size());
1010  initialize_shape(sizes);
1011  }
1012 
1013  /** Initialize an Buffer from a pointer to the min coordinate and
1014  * an array describing the shape. Does not take ownership of the
1015  * data, and does not set the host_dirty flag. */
1016  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int d, const halide_dimension_t *shape) {
1017  if (!T_is_void) {
1018  assert(static_halide_type() == t);
1019  }
1020  buf.type = t;
1021  buf.host = (uint8_t *)const_cast<void *>(data);
1022  make_shape_storage(d);
1023  for (int i = 0; i < d; i++) {
1024  buf.dim[i] = shape[i];
1025  }
1026  }
1027 
1028  /** Initialize a Buffer from a pointer to the min coordinate and
1029  * a vector describing the shape. Does not take ownership of the
1030  * data, and does not set the host_dirty flag. */
1031  explicit inline Buffer(halide_type_t t, add_const_if_T_is_const<void> *data,
1032  const std::vector<halide_dimension_t> &shape)
1033  : Buffer(t, data, (int)shape.size(), shape.data()) {
1034  }
1035 
1036  /** Initialize an Buffer from a pointer to the min coordinate and
1037  * an array describing the shape. Does not take ownership of the
1038  * data and does not set the host_dirty flag. */
1039  explicit Buffer(T *data, int d, const halide_dimension_t *shape) {
1040  buf.type = static_halide_type();
1041  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1042  make_shape_storage(d);
1043  for (int i = 0; i < d; i++) {
1044  buf.dim[i] = shape[i];
1045  }
1046  }
1047 
1048  /** Initialize a Buffer from a pointer to the min coordinate and
1049  * a vector describing the shape. Does not take ownership of the
1050  * data, and does not set the host_dirty flag. */
1051  explicit inline Buffer(T *data, const std::vector<halide_dimension_t> &shape)
1052  : Buffer(data, (int)shape.size(), shape.data()) {
1053  }
1054 
1055  /** Destructor. Will release any underlying owned allocation if
1056  * this is the last reference to it. Will assert fail if there are
1057  * weak references to this Buffer outstanding. */
1059  decref();
1060  free_shape_storage();
1061  }
1062 
1063  /** Get a pointer to the raw halide_buffer_t this wraps. */
1064  // @{
1066  return &buf;
1067  }
1068 
1069  const halide_buffer_t *raw_buffer() const {
1070  return &buf;
1071  }
1072  // @}
1073 
1074  /** Provide a cast operator to halide_buffer_t *, so that
1075  * instances can be passed directly to Halide filters. */
1076  operator halide_buffer_t *() {
1077  return &buf;
1078  }
1079 
1080  /** Return a typed reference to this Buffer. Useful for converting
1081  * a reference to a Buffer<void> to a reference to, for example, a
1082  * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1083  * You can also optionally sspecify a new value for Dims; this is useful
1084  * mainly for removing the dimensionality constraint on a Buffer with
1085  * explicit dimensionality. Does a runtime assert if the source buffer type
1086  * is void or the new dimensionality is incompatible. */
1087  template<typename T2, int D2 = Dims>
1090  return *((Buffer<T2, D2, InClassDimStorage> *)this);
1091  }
1092 
1093  /** Return a const typed reference to this Buffer. Useful for converting
1094  * a reference to a Buffer<void> to a reference to, for example, a
1095  * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1096  * You can also optionally sspecify a new value for Dims; this is useful
1097  * mainly for removing the dimensionality constraint on a Buffer with
1098  * explicit dimensionality. Does a runtime assert if the source buffer type
1099  * is void or the new dimensionality is incompatible. */
1100  template<typename T2, int D2 = Dims>
1103  return *((const Buffer<T2, D2, InClassDimStorage> *)this);
1104  }
1105 
1106  /** Return an rval reference to this Buffer. Useful for converting
1107  * a reference to a Buffer<void> to a reference to, for example, a
1108  * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1109  * You can also optionally sspecify a new value for Dims; this is useful
1110  * mainly for removing the dimensionality constraint on a Buffer with
1111  * explicit dimensionality. Does a runtime assert if the source buffer type
1112  * is void or the new dimensionality is incompatible. */
1113  template<typename T2, int D2 = Dims>
1116  return *((Buffer<T2, D2, InClassDimStorage> *)this);
1117  }
1118 
1119  /** as_const() is syntactic sugar for .as<const T>(), to avoid the need
1120  * to recapitulate the type argument. */
1121  // @{
1123  Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> &as_const() & {
1124  // Note that we can skip the assert_can_convert_from(), since T -> const T
1125  // conversion is always legal.
1126  return *((Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1127  }
1128 
1130  const Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> &as_const() const & {
1131  return *((const Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1132  }
1133 
1135  Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> as_const() && {
1136  return *((Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1137  }
1138  // @}
1139 
1140  /** Conventional names for the first three dimensions. */
1141  // @{
1142  int width() const {
1143  return (dimensions() > 0) ? dim(0).extent() : 1;
1144  }
1145  int height() const {
1146  return (dimensions() > 1) ? dim(1).extent() : 1;
1147  }
1148  int channels() const {
1149  return (dimensions() > 2) ? dim(2).extent() : 1;
1150  }
1151  // @}
1152 
1153  /** Conventional names for the min and max value of each dimension */
1154  // @{
1155  int left() const {
1156  return dim(0).min();
1157  }
1158 
1159  int right() const {
1160  return dim(0).max();
1161  }
1162 
1163  int top() const {
1164  return dim(1).min();
1165  }
1166 
1167  int bottom() const {
1168  return dim(1).max();
1169  }
1170  // @}
1171 
1172  /** Make a new image which is a deep copy of this image. Use crop
1173  * or slice followed by copy to make a copy of only a portion of
1174  * the image. The new image uses the same memory layout as the
1175  * original, with holes compacted away. Note that the returned
1176  * Buffer is always of a non-const type T (ie:
1177  *
1178  * Buffer<const T>.copy() -> Buffer<T> rather than Buffer<const T>
1179  *
1180  * which is always safe, since we are making a deep copy. (The caller
1181  * can easily cast it back to Buffer<const T> if desired, which is
1182  * always safe and free.)
1183  */
1184  Buffer<not_const_T, Dims, InClassDimStorage> copy(void *(*allocate_fn)(size_t) = nullptr,
1185  void (*deallocate_fn)(void *) = nullptr) const {
1187  dst.copy_from(*this);
1188  return dst;
1189  }
1190 
1191  /** Like copy(), but the copy is created in interleaved memory layout
1192  * (vs. keeping the same memory layout as the original). Requires that 'this'
1193  * has exactly 3 dimensions.
1194  */
1196  void (*deallocate_fn)(void *) = nullptr) const {
1197  static_assert(Dims == AnyDims || Dims == 3);
1198  assert(dimensions() == 3);
1200  dst.set_min(min(0), min(1), min(2));
1201  dst.allocate(allocate_fn, deallocate_fn);
1202  dst.copy_from(*this);
1203  return dst;
1204  }
1205 
1206  /** Like copy(), but the copy is created in planar memory layout
1207  * (vs. keeping the same memory layout as the original).
1208  */
1209  Buffer<not_const_T, Dims, InClassDimStorage> copy_to_planar(void *(*allocate_fn)(size_t) = nullptr,
1210  void (*deallocate_fn)(void *) = nullptr) const {
1211  std::vector<int> mins, extents;
1212  const int dims = dimensions();
1213  mins.reserve(dims);
1214  extents.reserve(dims);
1215  for (int d = 0; d < dims; ++d) {
1216  mins.push_back(dim(d).min());
1217  extents.push_back(dim(d).extent());
1218  }
1220  dst.set_min(mins);
1221  dst.allocate(allocate_fn, deallocate_fn);
1222  dst.copy_from(*this);
1223  return dst;
1224  }
1225 
1226  /** Make a copy of the Buffer which shares the underlying host and/or device
1227  * allocations as the existing Buffer. This is purely syntactic sugar for
1228  * cases where you have a const reference to a Buffer but need a temporary
1229  * non-const copy (e.g. to make a call into AOT-generated Halide code), and want a terse
1230  * inline way to create a temporary. \code
1231  * void call_my_func(const Buffer<const uint8_t>& input) {
1232  * my_func(input.alias(), output);
1233  * }\endcode
1234  */
1236  return *this;
1237  }
1238 
1239  /** Fill a Buffer with the values at the same coordinates in
1240  * another Buffer. Restricts itself to coordinates contained
1241  * within the intersection of the two buffers. If the two Buffers
1242  * are not in the same coordinate system, you will need to
1243  * translate the argument Buffer first. E.g. if you're blitting a
1244  * sprite onto a framebuffer, you'll want to translate the sprite
1245  * to the correct location first like so: \code
1246  * framebuffer.copy_from(sprite.translated({x, y})); \endcode
1247  */
1248  template<typename T2, int D2, int S2>
1250  static_assert(!std::is_const<T>::value, "Cannot call copy_from() on a Buffer<const T>");
1251  assert(!device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty destination.");
1252  assert(!src.device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty source.");
1253 
1255 
1256  static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2);
1257  assert(src.dimensions() == dst.dimensions());
1258 
1259  // Trim the copy to the region in common
1260  const int d = dimensions();
1261  for (int i = 0; i < d; i++) {
1262  int min_coord = std::max(dst.dim(i).min(), src.dim(i).min());
1263  int max_coord = std::min(dst.dim(i).max(), src.dim(i).max());
1264  if (max_coord < min_coord) {
1265  // The buffers do not overlap.
1266  return;
1267  }
1268  dst.crop(i, min_coord, max_coord - min_coord + 1);
1269  src.crop(i, min_coord, max_coord - min_coord + 1);
1270  }
1271 
1272  // If T is void, we need to do runtime dispatch to an
1273  // appropriately-typed lambda. We're copying, so we only care
1274  // about the element size. (If not, this should optimize away
1275  // into a static dispatch to the right-sized copy.)
1276  if (T_is_void ? (type().bytes() == 1) : (sizeof(not_void_T) == 1)) {
1277  using MemType = uint8_t;
1278  auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1279  auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1280  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1281  } else if (T_is_void ? (type().bytes() == 2) : (sizeof(not_void_T) == 2)) {
1282  using MemType = uint16_t;
1283  auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1284  auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1285  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1286  } else if (T_is_void ? (type().bytes() == 4) : (sizeof(not_void_T) == 4)) {
1287  using MemType = uint32_t;
1288  auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1289  auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1290  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1291  } else if (T_is_void ? (type().bytes() == 8) : (sizeof(not_void_T) == 8)) {
1292  using MemType = uint64_t;
1293  auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1294  auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1295  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1296  } else {
1297  assert(false && "type().bytes() must be 1, 2, 4, or 8");
1298  }
1299  set_host_dirty();
1300  }
1301 
1302  /** Make an image that refers to a sub-range of this image along
1303  * the given dimension. Asserts that the crop region is within
1304  * the existing bounds: you cannot "crop outwards", even if you know there
1305  * is valid Buffer storage (e.g. because you already cropped inwards). */
1306  Buffer<T, Dims, InClassDimStorage> cropped(int d, int min, int extent) const {
1307  // Make a fresh copy of the underlying buffer (but not a fresh
1308  // copy of the allocation, if there is one).
1310 
1311  // This guarantees the prexisting device ref is dropped if the
1312  // device_crop call fails and maintains the buffer in a consistent
1313  // state.
1314  im.device_deallocate();
1315 
1316  im.crop_host(d, min, extent);
1317  if (buf.device_interface != nullptr) {
1318  complete_device_crop(im);
1319  }
1320  return im;
1321  }
1322 
1323  /** Crop an image in-place along the given dimension. This does
1324  * not move any data around in memory - it just changes the min
1325  * and extent of the given dimension. */
1326  void crop(int d, int min, int extent) {
1327  // An optimization for non-device buffers. For the device case,
1328  // a temp buffer is required, so reuse the not-in-place version.
1329  // TODO(zalman|abadams): Are nop crops common enough to special
1330  // case the device part of the if to do nothing?
1331  if (buf.device_interface != nullptr) {
1332  *this = cropped(d, min, extent);
1333  } else {
1334  crop_host(d, min, extent);
1335  }
1336  }
1337 
1338  /** Make an image that refers to a sub-rectangle of this image along
1339  * the first N dimensions. Asserts that the crop region is within
1340  * the existing bounds. The cropped image may drop any device handle
1341  * if the device_interface cannot accomplish the crop in-place. */
1342  Buffer<T, Dims, InClassDimStorage> cropped(const std::vector<std::pair<int, int>> &rect) const {
1343  // Make a fresh copy of the underlying buffer (but not a fresh
1344  // copy of the allocation, if there is one).
1346 
1347  // This guarantees the prexisting device ref is dropped if the
1348  // device_crop call fails and maintains the buffer in a consistent
1349  // state.
1350  im.device_deallocate();
1351 
1352  im.crop_host(rect);
1353  if (buf.device_interface != nullptr) {
1354  complete_device_crop(im);
1355  }
1356  return im;
1357  }
1358 
1359  /** Crop an image in-place along the first N dimensions. This does
1360  * not move any data around in memory, nor does it free memory. It
1361  * just rewrites the min/extent of each dimension to refer to a
1362  * subregion of the same allocation. */
1363  void crop(const std::vector<std::pair<int, int>> &rect) {
1364  // An optimization for non-device buffers. For the device case,
1365  // a temp buffer is required, so reuse the not-in-place version.
1366  // TODO(zalman|abadams): Are nop crops common enough to special
1367  // case the device part of the if to do nothing?
1368  if (buf.device_interface != nullptr) {
1369  *this = cropped(rect);
1370  } else {
1371  crop_host(rect);
1372  }
1373  }
1374 
1375  /** Make an image which refers to the same data with using
1376  * translated coordinates in the given dimension. Positive values
1377  * move the image data to the right or down relative to the
1378  * coordinate system. Drops any device handle. */
1381  im.translate(d, dx);
1382  return im;
1383  }
1384 
1385  /** Translate an image in-place along one dimension by changing
1386  * how it is indexed. Does not move any data around in memory. */
1387  void translate(int d, int delta) {
1388  assert(d >= 0 && d < this->dimensions());
1389  device_deallocate();
1390  buf.dim[d].min += delta;
1391  }
1392 
1393  /** Make an image which refers to the same data translated along
1394  * the first N dimensions. */
1395  Buffer<T, Dims, InClassDimStorage> translated(const std::vector<int> &delta) const {
1397  im.translate(delta);
1398  return im;
1399  }
1400 
1401  /** Translate an image along the first N dimensions by changing
1402  * how it is indexed. Does not move any data around in memory. */
1403  void translate(const std::vector<int> &delta) {
1404  device_deallocate();
1405  assert(delta.size() <= static_cast<decltype(delta.size())>(std::numeric_limits<int>::max()));
1406  int limit = (int)delta.size();
1407  assert(limit <= dimensions());
1408  for (int i = 0; i < limit; i++) {
1409  translate(i, delta[i]);
1410  }
1411  }
1412 
1413  /** Set the min coordinate of an image in the first N dimensions. */
1414  // @{
1415  void set_min(const std::vector<int> &mins) {
1416  assert(mins.size() <= static_cast<decltype(mins.size())>(dimensions()));
1417  device_deallocate();
1418  for (size_t i = 0; i < mins.size(); i++) {
1419  buf.dim[i].min = mins[i];
1420  }
1421  }
1422 
1423  template<typename... Args>
1424  void set_min(Args... args) {
1425  set_min(std::vector<int>{args...});
1426  }
1427  // @}
1428 
1429  /** Test if a given coordinate is within the bounds of an image. */
1430  // @{
1431  bool contains(const std::vector<int> &coords) const {
1432  assert(coords.size() <= static_cast<decltype(coords.size())>(dimensions()));
1433  for (size_t i = 0; i < coords.size(); i++) {
1434  if (coords[i] < dim((int)i).min() || coords[i] > dim((int)i).max()) {
1435  return false;
1436  }
1437  }
1438  return true;
1439  }
1440 
1441  template<typename... Args>
1442  bool contains(Args... args) const {
1443  return contains(std::vector<int>{args...});
1444  }
1445  // @}
1446 
1447  /** Make a buffer which refers to the same data in the same layout
1448  * using a swapped indexing order for the dimensions given. So
1449  * A = B.transposed(0, 1) means that A(i, j) == B(j, i), and more
1450  * strongly that A.address_of(i, j) == B.address_of(j, i). */
1453  im.transpose(d1, d2);
1454  return im;
1455  }
1456 
1457  /** Transpose a buffer in-place by changing how it is indexed. For
1458  * example, transpose(0, 1) on a two-dimensional buffer means that
1459  * the value referred to by coordinates (i, j) is now reached at
1460  * the coordinates (j, i), and vice versa. This is done by
1461  * reordering the per-dimension metadata rather than by moving
1462  * data around in memory, so other views of the same memory will
1463  * not see the data as having been transposed. */
1464  void transpose(int d1, int d2) {
1465  assert(d1 >= 0 && d1 < this->dimensions());
1466  assert(d2 >= 0 && d2 < this->dimensions());
1467  std::swap(buf.dim[d1], buf.dim[d2]);
1468  }
1469 
1470  /** A generalized transpose: instead of swapping two dimensions,
1471  * pass a vector that lists each dimension index exactly once, in
1472  * the desired order. This does not move any data around in memory
1473  * - it just permutes how it is indexed. */
1474  void transpose(const std::vector<int> &order) {
1475  assert((int)order.size() == dimensions());
1476  if (dimensions() < 2) {
1477  // My, that was easy
1478  return;
1479  }
1480 
1481  std::vector<int> order_sorted = order;
1482  for (size_t i = 1; i < order_sorted.size(); i++) {
1483  for (size_t j = i; j > 0 && order_sorted[j - 1] > order_sorted[j]; j--) {
1484  std::swap(order_sorted[j], order_sorted[j - 1]);
1485  transpose(j, j - 1);
1486  }
1487  }
1488  }
1489 
1490  /** Make a buffer which refers to the same data in the same
1491  * layout using a different ordering of the dimensions. */
1492  Buffer<T, Dims, InClassDimStorage> transposed(const std::vector<int> &order) const {
1494  im.transpose(order);
1495  return im;
1496  }
1497 
1498  /** Make a lower-dimensional buffer that refers to one slice of
1499  * this buffer. */
1500  Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1501  sliced(int d, int pos) const {
1502  static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1503  assert(dimensions() > 0);
1504 
1506 
1507  // This guarantees the prexisting device ref is dropped if the
1508  // device_slice call fails and maintains the buffer in a consistent
1509  // state.
1510  im.device_deallocate();
1511 
1512  im.slice_host(d, pos);
1513  if (buf.device_interface != nullptr) {
1514  complete_device_slice(im, d, pos);
1515  }
1516  return im;
1517  }
1518 
1519  /** Make a lower-dimensional buffer that refers to one slice of this
1520  * buffer at the dimension's minimum. */
1521  Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1522  sliced(int d) const {
1523  static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1524  assert(dimensions() > 0);
1525 
1526  return sliced(d, dim(d).min());
1527  }
1528 
1529  /** Rewrite the buffer to refer to a single lower-dimensional
1530  * slice of itself along the given dimension at the given
1531  * coordinate. Does not move any data around or free the original
1532  * memory, so other views of the same data are unaffected. Can
1533  * only be called on a Buffer with dynamic dimensionality. */
1534  void slice(int d, int pos) {
1535  static_assert(Dims == AnyDims, "Cannot call slice() on a Buffer with static dimensionality.");
1536  assert(dimensions() > 0);
1537 
1538  // An optimization for non-device buffers. For the device case,
1539  // a temp buffer is required, so reuse the not-in-place version.
1540  // TODO(zalman|abadams): Are nop slices common enough to special
1541  // case the device part of the if to do nothing?
1542  if (buf.device_interface != nullptr) {
1543  *this = sliced(d, pos);
1544  } else {
1545  slice_host(d, pos);
1546  }
1547  }
1548 
1549  /** Slice a buffer in-place at the dimension's minimum. */
1550  inline void slice(int d) {
1551  slice(d, dim(d).min());
1552  }
1553 
1554  /** Make a new buffer that views this buffer as a single slice in a
1555  * higher-dimensional space. The new dimension has extent one and
1556  * the given min. This operation is the opposite of slice. As an
1557  * example, the following condition is true:
1558  *
1559  \code
1560  im2 = im.embedded(1, 17);
1561  &im(x, y, c) == &im2(x, 17, y, c);
1562  \endcode
1563  */
1564  Buffer<T, (Dims == AnyDims ? AnyDims : Dims + 1)>
1565  embedded(int d, int pos = 0) const {
1567  im.embed(d, pos);
1568  return im;
1569  }
1570 
1571  /** Embed a buffer in-place, increasing the
1572  * dimensionality. */
1573  void embed(int d, int pos = 0) {
1574  static_assert(Dims == AnyDims, "Cannot call embed() on a Buffer with static dimensionality.");
1575  assert(d >= 0 && d <= dimensions());
1576  add_dimension();
1577  translate(dimensions() - 1, pos);
1578  for (int i = dimensions() - 1; i > d; i--) {
1579  transpose(i, i - 1);
1580  }
1581  }
1582 
1583  /** Add a new dimension with a min of zero and an extent of
1584  * one. The stride is the extent of the outermost dimension times
1585  * its stride. The new dimension is the last dimension. This is a
1586  * special case of embed. */
1587  void add_dimension() {
1588  static_assert(Dims == AnyDims, "Cannot call add_dimension() on a Buffer with static dimensionality.");
1589  const int dims = buf.dimensions;
1590  buf.dimensions++;
1591  if (buf.dim != shape) {
1592  // We're already on the heap. Reallocate.
1593  halide_dimension_t *new_shape = new halide_dimension_t[buf.dimensions];
1594  for (int i = 0; i < dims; i++) {
1595  new_shape[i] = buf.dim[i];
1596  }
1597  delete[] buf.dim;
1598  buf.dim = new_shape;
1599  } else if (dims == InClassDimStorage) {
1600  // Transition from the in-class storage to the heap
1601  make_shape_storage(buf.dimensions);
1602  for (int i = 0; i < dims; i++) {
1603  buf.dim[i] = shape[i];
1604  }
1605  } else {
1606  // We still fit in the class
1607  }
1608  buf.dim[dims] = {0, 1, 0};
1609  if (dims == 0) {
1610  buf.dim[dims].stride = 1;
1611  } else {
1612  buf.dim[dims].stride = buf.dim[dims - 1].extent * buf.dim[dims - 1].stride;
1613  }
1614  }
1615 
1616  /** Add a new dimension with a min of zero, an extent of one, and
1617  * the specified stride. The new dimension is the last
1618  * dimension. This is a special case of embed. */
1620  add_dimension();
1621  buf.dim[buf.dimensions - 1].stride = s;
1622  }
1623 
1624  /** Methods for managing any GPU allocation. */
1625  // @{
1626  // Set the host dirty flag. Called by every operator()
1627  // access. Must be inlined so it can be hoisted out of loops.
1629  void set_host_dirty(bool v = true) {
1630  assert((!v || !device_dirty()) && "Cannot set host dirty when device is already dirty. Call copy_to_host() before accessing the buffer from host.");
1631  buf.set_host_dirty(v);
1632  }
1633 
1634  // Check if the device allocation is dirty. Called by
1635  // set_host_dirty, which is called by every accessor. Must be
1636  // inlined so it can be hoisted out of loops.
1638  bool device_dirty() const {
1639  return buf.device_dirty();
1640  }
1641 
1642  bool host_dirty() const {
1643  return buf.host_dirty();
1644  }
1645 
1646  void set_device_dirty(bool v = true) {
1647  assert((!v || !host_dirty()) && "Cannot set device dirty when host is already dirty.");
1648  buf.set_device_dirty(v);
1649  }
1650 
1651  int copy_to_host(void *ctx = nullptr) {
1652  if (device_dirty()) {
1653  return buf.device_interface->copy_to_host(ctx, &buf);
1654  }
1655  return 0;
1656  }
1657 
1658  int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1659  if (host_dirty()) {
1660  return device_interface->copy_to_device(ctx, &buf, device_interface);
1661  }
1662  return 0;
1663  }
1664 
1665  int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1666  return device_interface->device_malloc(ctx, &buf, device_interface);
1667  }
1668 
1669  int device_free(void *ctx = nullptr) {
1670  if (dev_ref_count) {
1671  assert(dev_ref_count->ownership == BufferDeviceOwnership::Allocated &&
1672  "Can't call device_free on an unmanaged or wrapped native device handle. "
1673  "Free the source allocation or call device_detach_native instead.");
1674  // Multiple people may be holding onto this dev field
1675  assert(dev_ref_count->count == 1 &&
1676  "Multiple Halide::Runtime::Buffer objects share this device "
1677  "allocation. Freeing it would create dangling references. "
1678  "Don't call device_free on Halide buffers that you have copied or "
1679  "passed by value.");
1680  }
1681  int ret = 0;
1682  if (buf.device_interface) {
1683  ret = buf.device_interface->device_free(ctx, &buf);
1684  }
1685  if (dev_ref_count) {
1686  delete dev_ref_count;
1687  dev_ref_count = nullptr;
1688  }
1689  return ret;
1690  }
1691 
1692  int device_wrap_native(const struct halide_device_interface_t *device_interface,
1693  uint64_t handle, void *ctx = nullptr) {
1694  assert(device_interface);
1695  dev_ref_count = new DeviceRefCount;
1697  return device_interface->wrap_native(ctx, &buf, handle, device_interface);
1698  }
1699 
1700  int device_detach_native(void *ctx = nullptr) {
1701  assert(dev_ref_count &&
1702  dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative &&
1703  "Only call device_detach_native on buffers wrapping a native "
1704  "device handle via device_wrap_native. This buffer was allocated "
1705  "using device_malloc, or is unmanaged. "
1706  "Call device_free or free the original allocation instead.");
1707  // Multiple people may be holding onto this dev field
1708  assert(dev_ref_count->count == 1 &&
1709  "Multiple Halide::Runtime::Buffer objects share this device "
1710  "allocation. Freeing it could create dangling references. "
1711  "Don't call device_detach_native on Halide buffers that you "
1712  "have copied or passed by value.");
1713  int ret = 0;
1714  if (buf.device_interface) {
1715  ret = buf.device_interface->detach_native(ctx, &buf);
1716  }
1717  delete dev_ref_count;
1718  dev_ref_count = nullptr;
1719  return ret;
1720  }
1721 
1722  int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1723  return device_interface->device_and_host_malloc(ctx, &buf, device_interface);
1724  }
1725 
1726  int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1727  if (dev_ref_count) {
1728  assert(dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost &&
1729  "Can't call device_and_host_free on a device handle not allocated with device_and_host_malloc. "
1730  "Free the source allocation or call device_detach_native instead.");
1731  // Multiple people may be holding onto this dev field
1732  assert(dev_ref_count->count == 1 &&
1733  "Multiple Halide::Runtime::Buffer objects share this device "
1734  "allocation. Freeing it would create dangling references. "
1735  "Don't call device_and_host_free on Halide buffers that you have copied or "
1736  "passed by value.");
1737  }
1738  int ret = 0;
1739  if (buf.device_interface) {
1740  ret = buf.device_interface->device_and_host_free(ctx, &buf);
1741  }
1742  if (dev_ref_count) {
1743  delete dev_ref_count;
1744  dev_ref_count = nullptr;
1745  }
1746  return ret;
1747  }
1748 
1749  int device_sync(void *ctx = nullptr) {
1750  return buf.device_sync(ctx);
1751  }
1752 
1753  bool has_device_allocation() const {
1754  return buf.device != 0;
1755  }
1756 
1757  /** Return the method by which the device field is managed. */
1759  if (dev_ref_count == nullptr) {
1761  }
1762  return dev_ref_count->ownership;
1763  }
1764  // @}
1765 
1766  /** If you use the (x, y, c) indexing convention, then Halide
1767  * Buffers are stored planar by default. This function constructs
1768  * an interleaved RGB or RGBA image that can still be indexed
1769  * using (x, y, c). Passing it to a generator requires that the
1770  * generator has been compiled with support for interleaved (also
1771  * known as packed or chunky) memory layouts. */
1772  static Buffer<void, Dims, InClassDimStorage> make_interleaved(halide_type_t t, int width, int height, int channels) {
1773  static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1774  Buffer<void, Dims, InClassDimStorage> im(t, channels, width, height);
1775  // Note that this is equivalent to calling transpose({2, 0, 1}),
1776  // but slightly more efficient.
1777  im.transpose(0, 1);
1778  im.transpose(1, 2);
1779  return im;
1780  }
1781 
1782  /** If you use the (x, y, c) indexing convention, then Halide
1783  * Buffers are stored planar by default. This function constructs
1784  * an interleaved RGB or RGBA image that can still be indexed
1785  * using (x, y, c). Passing it to a generator requires that the
1786  * generator has been compiled with support for interleaved (also
1787  * known as packed or chunky) memory layouts. */
1788  static Buffer<T, Dims, InClassDimStorage> make_interleaved(int width, int height, int channels) {
1789  return make_interleaved(static_halide_type(), width, height, channels);
1790  }
1791 
1792  /** Wrap an existing interleaved image. */
1793  static Buffer<add_const_if_T_is_const<void>, Dims, InClassDimStorage>
1794  make_interleaved(halide_type_t t, T *data, int width, int height, int channels) {
1795  static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1796  Buffer<add_const_if_T_is_const<void>, Dims, InClassDimStorage> im(t, data, channels, width, height);
1797  im.transpose(0, 1);
1798  im.transpose(1, 2);
1799  return im;
1800  }
1801 
1802  /** Wrap an existing interleaved image. */
1803  static Buffer<T, Dims, InClassDimStorage> make_interleaved(T *data, int width, int height, int channels) {
1804  return make_interleaved(static_halide_type(), data, width, height, channels);
1805  }
1806 
1807  /** Make a zero-dimensional Buffer */
1809  static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1810  Buffer<add_const_if_T_is_const<void>, AnyDims, InClassDimStorage> buf(t, 1);
1811  buf.slice(0, 0);
1812  return buf;
1813  }
1814 
1815  /** Make a zero-dimensional Buffer */
1817  static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1819  buf.slice(0, 0);
1820  return buf;
1821  }
1822 
1823  /** Make a zero-dimensional Buffer that points to non-owned, existing data */
1825  static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1827  buf.slice(0, 0);
1828  return buf;
1829  }
1830 
1831  /** Make a buffer with the same shape and memory nesting order as
1832  * another buffer. It may have a different type. */
1833  template<typename T2, int D2, int S2>
1835  void *(*allocate_fn)(size_t) = nullptr,
1836  void (*deallocate_fn)(void *) = nullptr) {
1837  static_assert(Dims == D2 || Dims == AnyDims);
1838  const halide_type_t dst_type = T_is_void ? src.type() : halide_type_of<typename std::remove_cv<not_void_T>::type>();
1839  return Buffer<>::make_with_shape_of_helper(dst_type, src.dimensions(), src.buf.dim,
1840  allocate_fn, deallocate_fn);
1841  }
1842 
1843 private:
1844  static Buffer<> make_with_shape_of_helper(halide_type_t dst_type,
1845  int dimensions,
1846  halide_dimension_t *shape,
1847  void *(*allocate_fn)(size_t),
1848  void (*deallocate_fn)(void *)) {
1849  // Reorder the dimensions of src to have strides in increasing order
1850  std::vector<int> swaps;
1851  for (int i = dimensions - 1; i > 0; i--) {
1852  for (int j = i; j > 0; j--) {
1853  if (shape[j - 1].stride > shape[j].stride) {
1854  std::swap(shape[j - 1], shape[j]);
1855  swaps.push_back(j);
1856  }
1857  }
1858  }
1859 
1860  // Rewrite the strides to be dense (this messes up src, which
1861  // is why we took it by value).
1862  for (int i = 0; i < dimensions; i++) {
1863  if (i == 0) {
1864  shape[i].stride = 1;
1865  } else {
1866  shape[i].stride = shape[i - 1].extent * shape[i - 1].stride;
1867  }
1868  }
1869 
1870  // Undo the dimension reordering
1871  while (!swaps.empty()) {
1872  int j = swaps.back();
1873  std::swap(shape[j - 1], shape[j]);
1874  swaps.pop_back();
1875  }
1876 
1877  // Use an explicit runtime type, and make dst a Buffer<void>, to allow
1878  // using this method with Buffer<void> for either src or dst.
1879  Buffer<> dst(dst_type, nullptr, dimensions, shape);
1880  dst.allocate(allocate_fn, deallocate_fn);
1881 
1882  return dst;
1883  }
1884 
1885  template<typename... Args>
1887  ptrdiff_t
1888  offset_of(int d, int first, Args... rest) const {
1889 #if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
1890  assert(first >= this->buf.dim[d].min);
1891  assert(first < this->buf.dim[d].min + this->buf.dim[d].extent);
1892 #endif
1893  return offset_of(d + 1, rest...) + (ptrdiff_t)this->buf.dim[d].stride * (first - this->buf.dim[d].min);
1894  }
1895 
1897  ptrdiff_t offset_of(int d) const {
1898  return 0;
1899  }
1900 
1901  template<typename... Args>
1903  storage_T *
1904  address_of(Args... args) const {
1905  if (T_is_void) {
1906  return (storage_T *)(this->buf.host) + offset_of(0, args...) * type().bytes();
1907  } else {
1908  return (storage_T *)(this->buf.host) + offset_of(0, args...);
1909  }
1910  }
1911 
1913  ptrdiff_t offset_of(const int *pos) const {
1914  ptrdiff_t offset = 0;
1915  for (int i = this->dimensions() - 1; i >= 0; i--) {
1916 #if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
1917  assert(pos[i] >= this->buf.dim[i].min);
1918  assert(pos[i] < this->buf.dim[i].min + this->buf.dim[i].extent);
1919 #endif
1920  offset += (ptrdiff_t)this->buf.dim[i].stride * (pos[i] - this->buf.dim[i].min);
1921  }
1922  return offset;
1923  }
1924 
1926  storage_T *address_of(const int *pos) const {
1927  if (T_is_void) {
1928  return (storage_T *)this->buf.host + offset_of(pos) * type().bytes();
1929  } else {
1930  return (storage_T *)this->buf.host + offset_of(pos);
1931  }
1932  }
1933 
1934 public:
1935  /** Get a pointer to the address of the min coordinate. */
1936  T *data() const {
1937  return (T *)(this->buf.host);
1938  }
1939 
1940  /** Access elements. Use im(...) to get a reference to an element,
1941  * and use &im(...) to get the address of an element. If you pass
1942  * fewer arguments than the buffer has dimensions, the rest are
1943  * treated as their min coordinate. The non-const versions set the
1944  * host_dirty flag to true.
1945  */
1946  //@{
1947  template<typename... Args,
1948  typename = typename std::enable_if<AllInts<Args...>::value>::type>
1949  HALIDE_ALWAYS_INLINE const not_void_T &operator()(int first, Args... rest) const {
1950  static_assert(!T_is_void,
1951  "Cannot use operator() on Buffer<void> types");
1952  constexpr int expected_dims = 1 + (int)(sizeof...(rest));
1953  static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
1954  assert(!device_dirty());
1955  return *((const not_void_T *)(address_of(first, rest...)));
1956  }
1957 
1959  const not_void_T &
1960  operator()() const {
1961  static_assert(!T_is_void,
1962  "Cannot use operator() on Buffer<void> types");
1963  constexpr int expected_dims = 0;
1964  static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
1965  assert(!device_dirty());
1966  return *((const not_void_T *)(data()));
1967  }
1968 
1970  const not_void_T &
1971  operator()(const int *pos) const {
1972  static_assert(!T_is_void,
1973  "Cannot use operator() on Buffer<void> types");
1974  assert(!device_dirty());
1975  return *((const not_void_T *)(address_of(pos)));
1976  }
1977 
1978  template<typename... Args,
1979  typename = typename std::enable_if<AllInts<Args...>::value>::type>
1981  not_void_T &
1982  operator()(int first, Args... rest) {
1983  static_assert(!T_is_void,
1984  "Cannot use operator() on Buffer<void> types");
1985  constexpr int expected_dims = 1 + (int)(sizeof...(rest));
1986  static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
1987  set_host_dirty();
1988  return *((not_void_T *)(address_of(first, rest...)));
1989  }
1990 
1992  not_void_T &
1994  static_assert(!T_is_void,
1995  "Cannot use operator() on Buffer<void> types");
1996  constexpr int expected_dims = 0;
1997  static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
1998  set_host_dirty();
1999  return *((not_void_T *)(data()));
2000  }
2001 
2003  not_void_T &
2004  operator()(const int *pos) {
2005  static_assert(!T_is_void,
2006  "Cannot use operator() on Buffer<void> types");
2007  set_host_dirty();
2008  return *((not_void_T *)(address_of(pos)));
2009  }
2010  // @}
2011 
2012  /** Tests that all values in this buffer are equal to val. */
2013  bool all_equal(not_void_T val) const {
2014  bool all_equal = true;
2015  for_each_element([&](const int *pos) { all_equal &= (*this)(pos) == val; });
2016  return all_equal;
2017  }
2018 
2020  set_host_dirty();
2021  for_each_value([=](T &v) { v = val; });
2022  return *this;
2023  }
2024 
2025 private:
2026  /** Helper functions for for_each_value. */
2027  // @{
2028  template<int N>
2029  struct for_each_value_task_dim {
2030  std::ptrdiff_t extent;
2031  std::ptrdiff_t stride[N];
2032  };
2033 
2034  // Given an array of strides, and a bunch of pointers to pointers
2035  // (all of different types), advance the pointers using the
2036  // strides.
2037  template<typename Ptr, typename... Ptrs>
2038  HALIDE_ALWAYS_INLINE static void advance_ptrs(const std::ptrdiff_t *stride, Ptr &ptr, Ptrs &...ptrs) {
2039  ptr += *stride;
2040  advance_ptrs(stride + 1, ptrs...);
2041  }
2042 
2044  static void advance_ptrs(const std::ptrdiff_t *) {
2045  }
2046 
2047  template<typename Fn, typename Ptr, typename... Ptrs>
2048  HALIDE_NEVER_INLINE static void for_each_value_helper(Fn &&f, int d, bool innermost_strides_are_one,
2049  const for_each_value_task_dim<sizeof...(Ptrs) + 1> *t, Ptr ptr, Ptrs... ptrs) {
2050  if (d == 0) {
2051  if (innermost_strides_are_one) {
2052  Ptr end = ptr + t[0].extent;
2053  while (ptr != end) {
2054  f(*ptr++, (*ptrs++)...);
2055  }
2056  } else {
2057  for (std::ptrdiff_t i = t[0].extent; i != 0; i--) {
2058  f(*ptr, (*ptrs)...);
2059  advance_ptrs(t[0].stride, ptr, ptrs...);
2060  }
2061  }
2062  } else {
2063  for (std::ptrdiff_t i = t[d].extent; i != 0; i--) {
2064  for_each_value_helper(f, d - 1, innermost_strides_are_one, t, ptr, ptrs...);
2065  advance_ptrs(t[d].stride, ptr, ptrs...);
2066  }
2067  }
2068  }
2069 
2070  template<int N>
2071  HALIDE_NEVER_INLINE static bool for_each_value_prep(for_each_value_task_dim<N> *t,
2072  const halide_buffer_t **buffers) {
2073  // Check the buffers all have clean host allocations
2074  for (int i = 0; i < N; i++) {
2075  if (buffers[i]->device) {
2076  assert(buffers[i]->host &&
2077  "Buffer passed to for_each_value has device allocation but no host allocation. Call allocate() and copy_to_host() first");
2078  assert(!buffers[i]->device_dirty() &&
2079  "Buffer passed to for_each_value is dirty on device. Call copy_to_host() first");
2080  } else {
2081  assert(buffers[i]->host &&
2082  "Buffer passed to for_each_value has no host or device allocation");
2083  }
2084  }
2085 
2086  const int dimensions = buffers[0]->dimensions;
2087 
2088  // Extract the strides in all the dimensions
2089  for (int i = 0; i < dimensions; i++) {
2090  for (int j = 0; j < N; j++) {
2091  assert(buffers[j]->dimensions == dimensions);
2092  assert(buffers[j]->dim[i].extent == buffers[0]->dim[i].extent &&
2093  buffers[j]->dim[i].min == buffers[0]->dim[i].min);
2094  const int s = buffers[j]->dim[i].stride;
2095  t[i].stride[j] = s;
2096  }
2097  t[i].extent = buffers[0]->dim[i].extent;
2098 
2099  // Order the dimensions by stride, so that the traversal is cache-coherent.
2100  // Use the last dimension for this, because this is the source in copies.
2101  // It appears to be better to optimize read order than write order.
2102  for (int j = i; j > 0 && t[j].stride[N - 1] < t[j - 1].stride[N - 1]; j--) {
2103  std::swap(t[j], t[j - 1]);
2104  }
2105  }
2106 
2107  // flatten dimensions where possible to make a larger inner
2108  // loop for autovectorization.
2109  int d = dimensions;
2110  for (int i = 1; i < d; i++) {
2111  bool flat = true;
2112  for (int j = 0; j < N; j++) {
2113  flat = flat && t[i - 1].stride[j] * t[i - 1].extent == t[i].stride[j];
2114  }
2115  if (flat) {
2116  t[i - 1].extent *= t[i].extent;
2117  for (int j = i; j < d; j++) {
2118  t[j] = t[j + 1];
2119  }
2120  i--;
2121  d--;
2122  t[d].extent = 1;
2123  }
2124  }
2125 
2126  bool innermost_strides_are_one = true;
2127  if (dimensions > 0) {
2128  for (int i = 0; i < N; i++) {
2129  innermost_strides_are_one &= (t[0].stride[i] == 1);
2130  }
2131  }
2132 
2133  return innermost_strides_are_one;
2134  }
2135 
2136  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2137  void for_each_value_impl(Fn &&f, Args &&...other_buffers) const {
2138  if (dimensions() > 0) {
2139  Buffer<>::for_each_value_task_dim<N> *t =
2140  (Buffer<>::for_each_value_task_dim<N> *)HALIDE_ALLOCA((dimensions() + 1) * sizeof(for_each_value_task_dim<N>));
2141  // Move the preparatory code into a non-templated helper to
2142  // save code size.
2143  const halide_buffer_t *buffers[] = {&buf, (&other_buffers.buf)...};
2144  bool innermost_strides_are_one = Buffer<>::for_each_value_prep(t, buffers);
2145 
2146  Buffer<>::for_each_value_helper(f, dimensions() - 1,
2147  innermost_strides_are_one,
2148  t,
2149  data(), (other_buffers.data())...);
2150  } else {
2151  f(*data(), (*other_buffers.data())...);
2152  }
2153  }
2154  // @}
2155 
2156 public:
2157  /** Call a function on every value in the buffer, and the
2158  * corresponding values in some number of other buffers of the
2159  * same size. The function should take a reference, const
2160  * reference, or value of the correct type for each buffer. This
2161  * effectively lifts a function of scalars to an element-wise
2162  * function of buffers. This produces code that the compiler can
2163  * autovectorize. This is slightly cheaper than for_each_element,
2164  * because it does not need to track the coordinates.
2165  *
2166  * Note that constness of Buffers is preserved: a const Buffer<T> (for either
2167  * 'this' or the other-buffers arguments) will allow mutation of the
2168  * buffer contents, while a Buffer<const T> will not. Attempting to specify
2169  * a mutable reference for the lambda argument of a Buffer<const T>
2170  * will result in a compilation error. */
2171  // @{
2172  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2173  HALIDE_ALWAYS_INLINE const Buffer<T, Dims, InClassDimStorage> &for_each_value(Fn &&f, Args &&...other_buffers) const {
2174  for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2175  return *this;
2176  }
2177 
2178  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2181  for_each_value(Fn &&f, Args &&...other_buffers) {
2182  for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2183  return *this;
2184  }
2185  // @}
2186 
2187 private:
2188  // Helper functions for for_each_element
2189  struct for_each_element_task_dim {
2190  int min, max;
2191  };
2192 
2193  /** If f is callable with this many args, call it. The first
2194  * argument is just to make the overloads distinct. Actual
2195  * overload selection is done using the enable_if. */
2196  template<typename Fn,
2197  typename... Args,
2198  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2199  HALIDE_ALWAYS_INLINE static void for_each_element_variadic(int, int, const for_each_element_task_dim *, Fn &&f, Args... args) {
2200  f(args...);
2201  }
2202 
2203  /** If the above overload is impossible, we add an outer loop over
2204  * an additional argument and try again. */
2205  template<typename Fn,
2206  typename... Args>
2207  HALIDE_ALWAYS_INLINE static void for_each_element_variadic(double, int d, const for_each_element_task_dim *t, Fn &&f, Args... args) {
2208  for (int i = t[d].min; i <= t[d].max; i++) {
2209  for_each_element_variadic(0, d - 1, t, std::forward<Fn>(f), i, args...);
2210  }
2211  }
2212 
2213  /** Determine the minimum number of arguments a callable can take
2214  * using the same trick. */
2215  template<typename Fn,
2216  typename... Args,
2217  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2218  HALIDE_ALWAYS_INLINE static int num_args(int, Fn &&, Args...) {
2219  return (int)(sizeof...(Args));
2220  }
2221 
2222  /** The recursive version is only enabled up to a recursion limit
2223  * of 256. This catches callables that aren't callable with any
2224  * number of ints. */
2225  template<typename Fn,
2226  typename... Args>
2227  HALIDE_ALWAYS_INLINE static int num_args(double, Fn &&f, Args... args) {
2228  static_assert(sizeof...(args) <= 256,
2229  "Callable passed to for_each_element must accept either a const int *,"
2230  " or up to 256 ints. No such operator found. Expect infinite template recursion.");
2231  return num_args(0, std::forward<Fn>(f), 0, args...);
2232  }
2233 
2234  /** A version where the callable takes a position array instead,
2235  * with compile-time recursion on the dimensionality. This
2236  * overload is preferred to the one below using the same int vs
2237  * double trick as above, but is impossible once d hits -1 using
2238  * std::enable_if. */
2239  template<int d,
2240  typename Fn,
2241  typename = typename std::enable_if<(d >= 0)>::type>
2242  HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(int, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2243  for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2244  for_each_element_array_helper<d - 1>(0, t, std::forward<Fn>(f), pos);
2245  }
2246  }
2247 
2248  /** Base case for recursion above. */
2249  template<int d,
2250  typename Fn,
2251  typename = typename std::enable_if<(d < 0)>::type>
2252  HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(double, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2253  f(pos);
2254  }
2255 
2256  /** A run-time-recursive version (instead of
2257  * compile-time-recursive) that requires the callable to take a
2258  * pointer to a position array instead. Dispatches to the
2259  * compile-time-recursive version once the dimensionality gets
2260  * small. */
2261  template<typename Fn>
2262  static void for_each_element_array(int d, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2263  if (d == -1) {
2264  f(pos);
2265  } else if (d == 0) {
2266  // Once the dimensionality gets small enough, dispatch to
2267  // a compile-time-recursive version for better codegen of
2268  // the inner loops.
2269  for_each_element_array_helper<0, Fn>(0, t, std::forward<Fn>(f), pos);
2270  } else if (d == 1) {
2271  for_each_element_array_helper<1, Fn>(0, t, std::forward<Fn>(f), pos);
2272  } else if (d == 2) {
2273  for_each_element_array_helper<2, Fn>(0, t, std::forward<Fn>(f), pos);
2274  } else if (d == 3) {
2275  for_each_element_array_helper<3, Fn>(0, t, std::forward<Fn>(f), pos);
2276  } else {
2277  for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2278  for_each_element_array(d - 1, t, std::forward<Fn>(f), pos);
2279  }
2280  }
2281  }
2282 
2283  /** We now have two overloads for for_each_element. This one
2284  * triggers if the callable takes a const int *.
2285  */
2286  template<typename Fn,
2287  typename = decltype(std::declval<Fn>()((const int *)nullptr))>
2288  static void for_each_element(int, int dims, const for_each_element_task_dim *t, Fn &&f, int check = 0) {
2289  int *pos = (int *)HALIDE_ALLOCA(dims * sizeof(int));
2290  for_each_element_array(dims - 1, t, std::forward<Fn>(f), pos);
2291  }
2292 
2293  /** This one triggers otherwise. It treats the callable as
2294  * something that takes some number of ints. */
2295  template<typename Fn>
2296  HALIDE_ALWAYS_INLINE static void for_each_element(double, int dims, const for_each_element_task_dim *t, Fn &&f) {
2297  int args = num_args(0, std::forward<Fn>(f));
2298  assert(dims >= args);
2299  for_each_element_variadic(0, args - 1, t, std::forward<Fn>(f));
2300  }
2301 
2302  template<typename Fn>
2303  void for_each_element_impl(Fn &&f) const {
2304  for_each_element_task_dim *t =
2305  (for_each_element_task_dim *)HALIDE_ALLOCA(dimensions() * sizeof(for_each_element_task_dim));
2306  for (int i = 0; i < dimensions(); i++) {
2307  t[i].min = dim(i).min();
2308  t[i].max = dim(i).max();
2309  }
2310  for_each_element(0, dimensions(), t, std::forward<Fn>(f));
2311  }
2312 
2313 public:
2314  /** Call a function at each site in a buffer. This is likely to be
2315  * much slower than using Halide code to populate a buffer, but is
2316  * convenient for tests. If the function has more arguments than the
2317  * buffer has dimensions, the remaining arguments will be zero. If it
2318  * has fewer arguments than the buffer has dimensions then the last
2319  * few dimensions of the buffer are not iterated over. For example,
2320  * the following code exploits this to set a floating point RGB image
2321  * to red:
2322 
2323  \code
2324  Buffer<float, 3> im(100, 100, 3);
2325  im.for_each_element([&](int x, int y) {
2326  im(x, y, 0) = 1.0f;
2327  im(x, y, 1) = 0.0f;
2328  im(x, y, 2) = 0.0f:
2329  });
2330  \endcode
2331 
2332  * The compiled code is equivalent to writing the a nested for loop,
2333  * and compilers are capable of optimizing it in the same way.
2334  *
2335  * If the callable can be called with an int * as the sole argument,
2336  * that version is called instead. Each location in the buffer is
2337  * passed to it in a coordinate array. This version is higher-overhead
2338  * than the variadic version, but is useful for writing generic code
2339  * that accepts buffers of arbitrary dimensionality. For example, the
2340  * following sets the value at all sites in an arbitrary-dimensional
2341  * buffer to their first coordinate:
2342 
2343  \code
2344  im.for_each_element([&](const int *pos) {im(pos) = pos[0];});
2345  \endcode
2346 
2347  * It is also possible to use for_each_element to iterate over entire
2348  * rows or columns by cropping the buffer to a single column or row
2349  * respectively and iterating over elements of the result. For example,
2350  * to set the diagonal of the image to 1 by iterating over the columns:
2351 
2352  \code
2353  Buffer<float, 3> im(100, 100, 3);
2354  im.sliced(1, 0).for_each_element([&](int x, int c) {
2355  im(x, x, c) = 1.0f;
2356  });
2357  \endcode
2358 
2359  * Or, assuming the memory layout is known to be dense per row, one can
2360  * memset each row of an image like so:
2361 
2362  \code
2363  Buffer<float, 3> im(100, 100, 3);
2364  im.sliced(0, 0).for_each_element([&](int y, int c) {
2365  memset(&im(0, y, c), 0, sizeof(float) * im.width());
2366  });
2367  \endcode
2368 
2369  */
2370  // @{
2371  template<typename Fn>
2373  for_each_element_impl(f);
2374  return *this;
2375  }
2376 
2377  template<typename Fn>
2381  for_each_element_impl(f);
2382  return *this;
2383  }
2384  // @}
2385 
2386 private:
2387  template<typename Fn>
2388  struct FillHelper {
2389  Fn f;
2391 
2392  template<typename... Args,
2393  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2394  void operator()(Args... args) {
2395  (*buf)(args...) = f(args...);
2396  }
2397 
2398  FillHelper(Fn &&f, Buffer<T, Dims, InClassDimStorage> *buf)
2399  : f(std::forward<Fn>(f)), buf(buf) {
2400  }
2401  };
2402 
2403 public:
2404  /** Fill a buffer by evaluating a callable at every site. The
2405  * callable should look much like a callable passed to
2406  * for_each_element, but it should return the value that should be
2407  * stored to the coordinate corresponding to the arguments. */
2408  template<typename Fn,
2409  typename = typename std::enable_if<!std::is_arithmetic<typename std::decay<Fn>::type>::value>::type>
2411  // We'll go via for_each_element. We need a variadic wrapper lambda.
2412  FillHelper<Fn> wrapper(std::forward<Fn>(f), this);
2413  return for_each_element(wrapper);
2414  }
2415 
2416  /** Check if an input buffer passed extern stage is a querying
2417  * bounds. Compared to doing the host pointer check directly,
2418  * this both adds clarity to code and will facilitate moving to
2419  * another representation for bounds query arguments. */
2420  bool is_bounds_query() const {
2421  return buf.is_bounds_query();
2422  }
2423 
2424  /** Convenient check to verify that all of the interesting bytes in the Buffer
2425  * are initialized under MSAN. Note that by default, we use for_each_value() here so that
2426  * we skip any unused padding that isn't part of the Buffer; this isn't efficient,
2427  * but in MSAN mode, it doesn't matter. (Pass true for the flag to force check
2428  * the entire Buffer storage.) */
2429  void msan_check_mem_is_initialized(bool entire = false) const {
2430 #if defined(__has_feature)
2431 #if __has_feature(memory_sanitizer)
2432  if (entire) {
2433  __msan_check_mem_is_initialized(data(), size_in_bytes());
2434  } else {
2435  for_each_value([](T &v) { __msan_check_mem_is_initialized(&v, sizeof(T)); ; });
2436  }
2437 #endif
2438 #endif
2439  }
2440 };
2441 
2442 } // namespace Runtime
2443 } // namespace Halide
2444 
2445 #undef HALIDE_ALLOCA
2446 
2447 #endif // HALIDE_RUNTIME_IMAGE_H
#define HALIDE_ALLOCA
Definition: HalideBuffer.h:30
This file declares the routines used by Halide internally in its runtime.
#define HALIDE_NEVER_INLINE
Definition: HalideRuntime.h:39
#define HALIDE_ALWAYS_INLINE
Definition: HalideRuntime.h:38
struct halide_buffer_t halide_buffer_t
The raw representation of an image passed around by generated Halide code.
A Halide::Buffer is a named shared reference to a Halide::Runtime::Buffer.
Definition: Buffer.h:120
Read-only access to the shape.
Definition: HalideBuffer.h:486
HALIDE_ALWAYS_INLINE int min() const
The lowest coordinate in this dimension.
Definition: HalideBuffer.h:491
Dimension(const halide_dimension_t &dim)
Definition: HalideBuffer.h:537
HALIDE_ALWAYS_INLINE int max() const
The highest coordinate in this dimension.
Definition: HalideBuffer.h:507
HALIDE_ALWAYS_INLINE iterator end() const
An iterator that points to one past the max coordinate.
Definition: HalideBuffer.h:533
HALIDE_ALWAYS_INLINE int stride() const
The number of elements in memory you have to step over to increment this coordinate by one.
Definition: HalideBuffer.h:497
HALIDE_ALWAYS_INLINE iterator begin() const
An iterator that points to the min coordinate.
Definition: HalideBuffer.h:528
HALIDE_ALWAYS_INLINE int extent() const
The extent of the image along this dimension.
Definition: HalideBuffer.h:502
A templated Buffer class that wraps halide_buffer_t and adds functionality.
Definition: HalideBuffer.h:142
Buffer< T,(Dims==AnyDims ? AnyDims :Dims+1)> embedded(int d, int pos=0) const
Make a new buffer that views this buffer as a single slice in a higher-dimensional space.
void translate(int d, int delta)
Translate an image in-place along one dimension by changing how it is indexed.
Buffer(const halide_buffer_t &buf, BufferDeviceOwnership ownership=BufferDeviceOwnership::Unmanaged)
Make a Buffer from a halide_buffer_t.
Definition: HalideBuffer.h:616
void allocate(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Allocate memory for this Buffer.
Definition: HalideBuffer.h:804
void add_dimension()
Add a new dimension with a min of zero and an extent of one.
T * end() const
A pointer to one beyond the element with the highest address.
Definition: HalideBuffer.h:589
void slice(int d)
Slice a buffer in-place at the dimension's minimum.
bool owns_host_memory() const
Does this Buffer own the host memory it refers to?
Definition: HalideBuffer.h:190
HALIDE_ALWAYS_INLINE not_void_T & operator()(int first, Args... rest)
int width() const
Conventional names for the first three dimensions.
Buffer< T, Dims, InClassDimStorage > translated(int d, int dx) const
Make an image which refers to the same data with using translated coordinates in the given dimension.
HALIDE_ALWAYS_INLINE const Buffer< T2, D2, InClassDimStorage > & as() const &
Return a const typed reference to this Buffer.
void transpose(const std::vector< int > &order)
A generalized transpose: instead of swapping two dimensions, pass a vector that lists each dimension ...
void set_min(const std::vector< int > &mins)
Set the min coordinate of an image in the first N dimensions.
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d, int pos) const
Make a lower-dimensional buffer that refers to one slice of this buffer.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< int > &sizes)
Initialize an Buffer of runtime type from a pointer and a vector of sizes.
static Buffer< T, Dims, InClassDimStorage > make_interleaved(int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
int copy_to_host(void *ctx=nullptr)
Buffer(halide_type_t t, const std::vector< int > &sizes)
Allocate a new image of unknown type using a vector of ints as the size.
Definition: HalideBuffer.h:901
int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int device_free(void *ctx=nullptr)
int extent(int i) const
Definition: HalideBuffer.h:553
bool contains(Args... args) const
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers)
void set_device_dirty(bool v=true)
Buffer(T *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
Buffer(Buffer< T2, D2, S2 > &&other)
Move-construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:710
void slice(int d, int pos)
Rewrite the buffer to refer to a single lower-dimensional slice of itself along the given dimension a...
HALIDE_ALWAYS_INLINE void set_host_dirty(bool v=true)
Methods for managing any GPU allocation.
void msan_check_mem_is_initialized(bool entire=false) const
Convenient check to verify that all of the interesting bytes in the Buffer are initialized under MSAN...
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > as() &&
Return an rval reference to this Buffer.
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T2, D2, S2 > &other)
Assign from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:725
static Buffer< T, Dims, InClassDimStorage > make_scalar()
Make a zero-dimensional Buffer.
int device_detach_native(void *ctx=nullptr)
Buffer< T, Dims, InClassDimStorage > translated(const std::vector< int > &delta) const
Make an image which refers to the same data translated along the first N dimensions.
int device_wrap_native(const struct halide_device_interface_t *device_interface, uint64_t handle, void *ctx=nullptr)
HALIDE_ALWAYS_INLINE Dimension dim(int i) const
Access the shape of the buffer.
Definition: HalideBuffer.h:543
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f)
Buffer< T, Dims, InClassDimStorage > cropped(int d, int min, int extent) const
Make an image that refers to a sub-range of this image along the given dimension.
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > as_const() &&
Buffer(int first, int second, Args... rest)
Definition: HalideBuffer.h:885
HALIDE_ALWAYS_INLINE const Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() const &
BufferDeviceOwnership device_ownership() const
Return the method by which the device field is managed.
void check_overflow()
Check the product of the extents fits in memory.
Definition: HalideBuffer.h:789
static bool can_convert_from(const Buffer< T2, D2, S2 > &other)
Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
Definition: HalideBuffer.h:644
int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int device_sync(void *ctx=nullptr)
HALIDE_ALWAYS_INLINE const not_void_T & operator()() const
Buffer(const std::vector< int > &sizes)
Allocate a new image of known type using a vector of ints as the size.
Definition: HalideBuffer.h:916
void embed(int d, int pos=0)
Embed a buffer in-place, increasing the dimensionality.
static constexpr halide_type_t static_halide_type()
Get the Halide type of T.
Definition: HalideBuffer.h:185
Buffer(T *data, int first, Args &&...rest)
Initialize an Buffer from a pointer and some sizes.
Definition: HalideBuffer.h:979
HALIDE_ALWAYS_INLINE not_void_T & operator()(const int *pos)
int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer< T, Dims, InClassDimStorage > cropped(const std::vector< std::pair< int, int >> &rect) const
Make an image that refers to a sub-rectangle of this image along the first N dimensions.
Buffer(Array(&vals)[N])
Make an Buffer that refers to a statically sized array.
Definition: HalideBuffer.h:948
static Buffer< void, Dims, InClassDimStorage > make_interleaved(halide_type_t t, int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_interleaved(halide_type_t t, T *data, int width, int height, int channels)
Wrap an existing interleaved image.
halide_type_t type() const
Get the type of the elements.
Definition: HalideBuffer.h:577
int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer< T, Dims, InClassDimStorage > & fill(Fn &&f)
Fill a buffer by evaluating a callable at every site.
Buffer(int first)
Allocate a new image of the given size.
Definition: HalideBuffer.h:869
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_interleaved(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in interleaved memory layout (vs.
HALIDE_ALWAYS_INLINE bool device_dirty() const
static Buffer< T, Dims, InClassDimStorage > make_scalar(T *data)
Make a zero-dimensional Buffer that points to non-owned, existing data.
static constexpr int static_dimensions()
Callers should not use the result if has_static_dimensions is false.
Definition: HalideBuffer.h:198
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_planar(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in planar memory layout (vs.
void transpose(int d1, int d2)
Transpose a buffer in-place by changing how it is indexed.
HALIDE_ALWAYS_INLINE const not_void_T & operator()(const int *pos) const
void deallocate()
Drop reference to any owned host or device memory, possibly freeing it, if this buffer held the last ...
Definition: HalideBuffer.h:831
size_t size_in_bytes() const
The total number of bytes spanned by the data in memory.
Definition: HalideBuffer.h:595
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers) const
Call a function on every value in the buffer, and the corresponding values in some number of other bu...
bool has_device_allocation() const
halide_buffer_t * raw_buffer()
Get a pointer to the raw halide_buffer_t this wraps.
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Standard move-assignment operator.
Definition: HalideBuffer.h:775
Buffer< T, Dims, InClassDimStorage > transposed(const std::vector< int > &order) const
Make a buffer which refers to the same data in the same layout using a different ordering of the dime...
static Buffer< T, Dims, InClassDimStorage > make_with_shape_of(Buffer< T2, D2, S2 > src, void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Make a buffer with the same shape and memory nesting order as another buffer.
void reset()
Reset the Buffer to be equivalent to a default-constructed Buffer of the same static type (if any); B...
Definition: HalideBuffer.h:602
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() &
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
HALIDE_ALWAYS_INLINE const not_void_T & operator()(int first, Args... rest) const
Access elements.
Buffer(halide_type_t t, int first, Args... rest)
Allocate a new image of the given size with a runtime type.
Definition: HalideBuffer.h:848
int dimensions() const
Get the dimensionality of the buffer.
Definition: HalideBuffer.h:568
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T, Dims, InClassDimStorage > &other)
Standard assignment operator.
Definition: HalideBuffer.h:741
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
int min(int i) const
Access to the mins, strides, extents.
Definition: HalideBuffer.h:550
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T2, D2, S2 > &&other)
Move from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:760
void device_deallocate()
Drop reference to any owned device memory, possibly freeing it if this buffer held the last reference...
Definition: HalideBuffer.h:838
void add_dimension_with_stride(int s)
Add a new dimension with a min of zero, an extent of one, and the specified stride.
Buffer< T, Dims, InClassDimStorage > & fill(not_void_T val)
Buffer(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Move constructor.
Definition: HalideBuffer.h:696
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_scalar(halide_type_t t)
Make a zero-dimensional Buffer.
HALIDE_ALWAYS_INLINE not_void_T & operator()()
void crop(const std::vector< std::pair< int, int >> &rect)
Crop an image in-place along the first N dimensions.
void crop(int d, int min, int extent)
Crop an image in-place along the given dimension.
void set_min(Args... args)
Buffer< T, Dims, InClassDimStorage > transposed(int d1, int d2) const
Make a buffer which refers to the same data in the same layout using a swapped indexing order for the...
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d) const
Make a lower-dimensional buffer that refers to one slice of this buffer at the dimension's minimum.
size_t number_of_elements() const
The total number of elements this buffer represents.
Definition: HalideBuffer.h:563
static void assert_can_convert_from(const Buffer< T2, D2, S2 > &other)
Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage> cannot be const...
Definition: HalideBuffer.h:662
void translate(const std::vector< int > &delta)
Translate an image along the first N dimensions by changing how it is indexed.
Buffer(const Buffer< T, Dims, InClassDimStorage > &other)
Copy constructor.
Definition: HalideBuffer.h:671
Buffer(const std::vector< int > &sizes, const std::vector< int > &storage_order)
Definition: HalideBuffer.h:941
Buffer(halide_type_t t, const std::vector< int > &sizes, const std::vector< int > &storage_order)
Allocate a new image of unknown type using a vector of ints as the size and a vector of indices indic...
Definition: HalideBuffer.h:936
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
T * data() const
Get a pointer to the address of the min coordinate.
bool is_bounds_query() const
Check if an input buffer passed extern stage is a querying bounds.
int left() const
Conventional names for the min and max value of each dimension.
void copy_from(Buffer< T2, D2, S2 > src)
Fill a Buffer with the values at the same coordinates in another Buffer.
static Buffer< T, Dims, InClassDimStorage > make_interleaved(T *data, int width, int height, int channels)
Wrap an existing interleaved image.
Buffer< not_const_T, Dims, InClassDimStorage > copy(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Make a new image which is a deep copy of this image.
Buffer< T, Dims, InClassDimStorage > alias() const
Make a copy of the Buffer which shares the underlying host and/or device allocations as the existing ...
int stride(int i) const
Definition: HalideBuffer.h:556
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f) const
Call a function at each site in a buffer.
Buffer(const Buffer< T2, D2, S2 > &other)
Construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:686
bool contains(const std::vector< int > &coords) const
Test if a given coordinate is within the bounds of an image.
T * begin() const
A pointer to the element with the lowest address.
Definition: HalideBuffer.h:583
Buffer(T *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Buffer(T *data, const std::vector< int > &sizes)
Initialize an Buffer from a pointer and a vector of sizes.
Definition: HalideBuffer.h:992
bool all_equal(not_void_T val) const
Tests that all values in this buffer are equal to val.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int first, Args &&...rest)
Initialize an Buffer of runtime type from a pointer and some sizes.
Definition: HalideBuffer.h:962
const halide_buffer_t * raw_buffer() const
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > & as() &
Return a typed reference to this Buffer.
bool any_zero(const Container &c)
Definition: HalideBuffer.h:74
constexpr int AnyDims
Definition: HalideBuffer.h:113
BufferDeviceOwnership
This indicates how to deallocate the device for a Halide::Runtime::Buffer.
Definition: HalideBuffer.h:97
@ AllocatedDeviceAndHost
No free routine will be called when device ref count goes to zero
@ WrappedNative
halide_device_free will be called when device ref count goes to zero
@ Unmanaged
halide_device_detach_native will be called when device ref count goes to zero
@ Cropped
Call device_and_host_free when DevRefCount goes to zero.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:600
Expr max(const FuncRef &a, const FuncRef &b)
Definition: Func.h:603
unsigned __INT64_TYPE__ uint64_t
void * malloc(size_t)
unsigned __INT8_TYPE__ uint8_t
__PTRDIFF_TYPE__ ptrdiff_t
unsigned __INT16_TYPE__ uint16_t
unsigned __INT32_TYPE__ uint32_t
void * memcpy(void *s1, const void *s2, size_t n)
void free(void *)
A struct acting as a header for allocations owned by the Buffer class itself.
Definition: HalideBuffer.h:86
AllocationHeader(void(*deallocate_fn)(void *))
Definition: HalideBuffer.h:91
An iterator class, so that you can iterate over coordinates in a dimensions using a range-based for l...
Definition: HalideBuffer.h:513
bool operator!=(const iterator &other) const
Definition: HalideBuffer.h:518
A similar struct for managing device allocations.
Definition: HalideBuffer.h:106
BufferDeviceOwnership ownership
Definition: HalideBuffer.h:110
The raw representation of an image passed around by generated Halide code.
int32_t dimensions
The dimensionality of the buffer.
halide_dimension_t * dim
The shape of the buffer.
uint64_t device
A device-handle for e.g.
uint8_t * host
A pointer to the start of the data in main memory.
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
int(* copy_to_device)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* device_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
A runtime tag for a type in the halide type system.