Halide  12.0.1
Halide compiler and libraries
HalideBuffer.h
Go to the documentation of this file.
1 /** \file
2  * Defines a Buffer type that wraps from halide_buffer_t and adds
3  * functionality, and methods for more conveniently iterating over the
4  * samples in a halide_buffer_t outside of Halide code. */
5 
6 #ifndef HALIDE_RUNTIME_BUFFER_H
7 #define HALIDE_RUNTIME_BUFFER_H
8 
9 #include <algorithm>
10 #include <atomic>
11 #include <cassert>
12 #include <cstdint>
13 #include <cstring>
14 #include <limits>
15 #include <memory>
16 #include <vector>
17 
18 #if defined(__has_feature)
19 #if __has_feature(memory_sanitizer)
20 #include <sanitizer/msan_interface.h>
21 #endif
22 #endif
23 
24 #include "HalideRuntime.h"
25 
26 #ifdef _MSC_VER
27 #include <malloc.h>
28 #define HALIDE_ALLOCA _alloca
29 #else
30 #define HALIDE_ALLOCA __builtin_alloca
31 #endif
32 
33 // gcc 5.1 has a false positive warning on this code
34 #if __GNUC__ == 5 && __GNUC_MINOR__ == 1
35 #pragma GCC diagnostic ignored "-Warray-bounds"
36 #endif
37 
38 namespace Halide {
39 namespace Runtime {
40 
41 // Forward-declare our Buffer class
42 template<typename T, int D>
43 class Buffer;
44 
45 // A helper to check if a parameter pack is entirely implicitly
46 // int-convertible to use with std::enable_if
47 template<typename... Args>
48 struct AllInts : std::false_type {};
49 
50 template<>
51 struct AllInts<> : std::true_type {};
52 
53 template<typename T, typename... Args>
54 struct AllInts<T, Args...> {
55  static const bool value = std::is_convertible<T, int>::value && AllInts<Args...>::value;
56 };
57 
58 // Floats and doubles are technically implicitly int-convertible, but
59 // doing so produces a warning we treat as an error, so just disallow
60 // it here.
61 template<typename... Args>
62 struct AllInts<float, Args...> : std::false_type {};
63 
64 template<typename... Args>
65 struct AllInts<double, Args...> : std::false_type {};
66 
67 // A helper to detect if there are any zeros in a container
68 namespace Internal {
69 template<typename Container>
70 bool any_zero(const Container &c) {
71  for (int i : c) {
72  if (i == 0) {
73  return true;
74  }
75  }
76  return false;
77 }
78 } // namespace Internal
79 
80 /** A struct acting as a header for allocations owned by the Buffer
81  * class itself. */
83  void (*deallocate_fn)(void *);
84  std::atomic<int> ref_count;
85 
86  // Note that ref_count always starts at 1
89  }
90 };
91 
92 /** This indicates how to deallocate the device for a Halide::Runtime::Buffer. */
93 enum struct BufferDeviceOwnership : int {
94  Allocated, ///> halide_device_free will be called when device ref count goes to zero
95  WrappedNative, ///> halide_device_detach_native will be called when device ref count goes to zero
96  Unmanaged, ///> No free routine will be called when device ref count goes to zero
97  AllocatedDeviceAndHost, ///> Call device_and_host_free when DevRefCount goes to zero.
98  Cropped, ///> Call halide_device_release_crop when DevRefCount goes to zero.
99 };
100 
101 /** A similar struct for managing device allocations. */
103  // This is only ever constructed when there's something to manage,
104  // so start at one.
105  std::atomic<int> count{1};
107 };
108 
109 /** A templated Buffer class that wraps halide_buffer_t and adds
110  * functionality. When using Halide from C++, this is the preferred
111  * way to create input and output buffers. The overhead of using this
112  * class relative to a naked halide_buffer_t is minimal - it uses another
113  * ~16 bytes on the stack, and does no dynamic allocations when using
114  * it to represent existing memory of a known maximum dimensionality.
115  *
116  * The template parameter T is the element type. For buffers where the
117  * element type is unknown, or may vary, use void or const void.
118  *
119  * D is the maximum number of dimensions that can be represented using
120  * space inside the class itself. Set it to the maximum dimensionality
121  * you expect this buffer to be. If the actual dimensionality exceeds
122  * this, heap storage is allocated to track the shape of the buffer. D
123  * defaults to 4, which should cover nearly all usage.
124  *
125  * The class optionally allocates and owns memory for the image using
126  * a shared pointer allocated with the provided allocator. If they are
127  * null, malloc and free are used. Any device-side allocation is
128  * considered as owned if and only if the host-side allocation is
129  * owned. */
130 template<typename T = void, int D = 4>
131 class Buffer {
132  /** The underlying halide_buffer_t */
133  halide_buffer_t buf = {0};
134 
135  /** Some in-class storage for shape of the dimensions. */
136  halide_dimension_t shape[D];
137 
138  /** The allocation owned by this Buffer. NULL if the Buffer does not
139  * own the memory. */
140  AllocationHeader *alloc = nullptr;
141 
142  /** A reference count for the device allocation owned by this
143  * buffer. */
144  mutable DeviceRefCount *dev_ref_count = nullptr;
145 
146  /** True if T is of type void or const void */
147  static const bool T_is_void = std::is_same<typename std::remove_const<T>::type, void>::value;
148 
149  /** A type function that adds a const qualifier if T is a const type. */
150  template<typename T2>
151  using add_const_if_T_is_const = typename std::conditional<std::is_const<T>::value, const T2, T2>::type;
152 
153  /** T unless T is (const) void, in which case (const)
154  * uint8_t. Useful for providing return types for operator() */
155  using not_void_T = typename std::conditional<T_is_void,
156  add_const_if_T_is_const<uint8_t>,
157  T>::type;
158 
159  /** T with constness removed. Useful for return type of copy(). */
160  using not_const_T = typename std::remove_const<T>::type;
161 
162  /** The type the elements are stored as. Equal to not_void_T
163  * unless T is a pointer, in which case uint64_t. Halide stores
164  * all pointer types as uint64s internally, even on 32-bit
165  * systems. */
166  using storage_T = typename std::conditional<std::is_pointer<T>::value, uint64_t, not_void_T>::type;
167 
168 public:
169  /** True if the Halide type is not void (or const void). */
170  static constexpr bool has_static_halide_type = !T_is_void;
171 
172  /** Get the Halide type of T. Callers should not use the result if
173  * has_static_halide_type is false. */
175  return halide_type_of<typename std::remove_cv<not_void_T>::type>();
176  }
177 
178  /** Does this Buffer own the host memory it refers to? */
179  bool owns_host_memory() const {
180  return alloc != nullptr;
181  }
182 
183 private:
184  /** Increment the reference count of any owned allocation */
185  void incref() const {
186  if (owns_host_memory()) {
187  alloc->ref_count++;
188  }
189  if (buf.device) {
190  if (!dev_ref_count) {
191  // I seem to have a non-zero dev field but no
192  // reference count for it. I must have been given a
193  // device allocation by a Halide pipeline, and have
194  // never been copied from since. Take sole ownership
195  // of it.
196  dev_ref_count = new DeviceRefCount;
197  }
198  dev_ref_count->count++;
199  }
200  }
201 
202  // Note that this is called "cropped" but can also encompass a slice/embed
203  // operation as well.
204  struct DevRefCountCropped : DeviceRefCount {
205  Buffer<T, D> cropped_from;
206  DevRefCountCropped(const Buffer<T, D> &cropped_from)
207  : cropped_from(cropped_from) {
209  }
210  };
211 
212  /** Setup the device ref count for a buffer to indicate it is a crop (or slice, embed, etc) of cropped_from */
213  void crop_from(const Buffer<T, D> &cropped_from) {
214  assert(dev_ref_count == nullptr);
215  dev_ref_count = new DevRefCountCropped(cropped_from);
216  }
217 
218  /** Decrement the reference count of any owned allocation and free host
219  * and device memory if it hits zero. Sets alloc to nullptr. */
220  void decref(bool device_only = false) {
221  if (owns_host_memory() && !device_only) {
222  int new_count = --(alloc->ref_count);
223  if (new_count == 0) {
224  void (*fn)(void *) = alloc->deallocate_fn;
225  alloc->~AllocationHeader();
226  fn(alloc);
227  }
228  buf.host = nullptr;
229  alloc = nullptr;
230  set_host_dirty(false);
231  }
232  int new_count = 0;
233  if (dev_ref_count) {
234  new_count = --(dev_ref_count->count);
235  }
236  if (new_count == 0) {
237  if (buf.device) {
238  assert(!(alloc && device_dirty()) &&
239  "Implicitly freeing a dirty device allocation while a host allocation still lives. "
240  "Call device_free explicitly if you want to drop dirty device-side data. "
241  "Call copy_to_host explicitly if you want the data copied to the host allocation "
242  "before the device allocation is freed.");
243  if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative) {
244  buf.device_interface->detach_native(nullptr, &buf);
245  } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost) {
246  buf.device_interface->device_and_host_free(nullptr, &buf);
247  } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
248  buf.device_interface->device_release_crop(nullptr, &buf);
249  } else if (dev_ref_count == nullptr || dev_ref_count->ownership == BufferDeviceOwnership::Allocated) {
250  buf.device_interface->device_free(nullptr, &buf);
251  }
252  }
253  if (dev_ref_count) {
254  if (dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
255  delete (DevRefCountCropped *)dev_ref_count;
256  } else {
257  delete dev_ref_count;
258  }
259  }
260  }
261  dev_ref_count = nullptr;
262  buf.device = 0;
263  buf.device_interface = nullptr;
264  }
265 
266  void free_shape_storage() {
267  if (buf.dim != shape) {
268  delete[] buf.dim;
269  buf.dim = nullptr;
270  }
271  }
272 
273  void make_shape_storage(const int dimensions) {
274  // This should usually be inlined, so if dimensions is statically known,
275  // we can skip the call to new
276  buf.dimensions = dimensions;
277  buf.dim = (dimensions <= D) ? shape : new halide_dimension_t[dimensions];
278  }
279 
280  void copy_shape_from(const halide_buffer_t &other) {
281  // All callers of this ensure that buf.dimensions == other.dimensions.
282  make_shape_storage(other.dimensions);
283  std::copy(other.dim, other.dim + other.dimensions, buf.dim);
284  }
285 
286  template<typename T2, int D2>
287  void move_shape_from(Buffer<T2, D2> &&other) {
288  if (other.shape == other.buf.dim) {
289  copy_shape_from(other.buf);
290  } else {
291  buf.dim = other.buf.dim;
292  other.buf.dim = nullptr;
293  }
294  }
295 
296  /** Initialize the shape from a halide_buffer_t. */
297  void initialize_from_buffer(const halide_buffer_t &b,
298  BufferDeviceOwnership ownership) {
299  memcpy(&buf, &b, sizeof(halide_buffer_t));
300  copy_shape_from(b);
301  if (b.device) {
302  dev_ref_count = new DeviceRefCount;
303  dev_ref_count->ownership = ownership;
304  }
305  }
306 
307  /** Initialize the shape from an array of ints */
308  void initialize_shape(const int *sizes) {
309  for (int i = 0; i < buf.dimensions; i++) {
310  buf.dim[i].min = 0;
311  buf.dim[i].extent = sizes[i];
312  if (i == 0) {
313  buf.dim[i].stride = 1;
314  } else {
315  buf.dim[i].stride = buf.dim[i - 1].stride * buf.dim[i - 1].extent;
316  }
317  }
318  }
319 
320  /** Initialize the shape from a vector of extents */
321  void initialize_shape(const std::vector<int> &sizes) {
322  assert(buf.dimensions == (int)sizes.size());
323  initialize_shape(sizes.data());
324  }
325 
326  /** Initialize the shape from the static shape of an array */
327  template<typename Array, size_t N>
328  void initialize_shape_from_array_shape(int next, Array (&vals)[N]) {
329  buf.dim[next].min = 0;
330  buf.dim[next].extent = (int)N;
331  if (next == 0) {
332  buf.dim[next].stride = 1;
333  } else {
334  initialize_shape_from_array_shape(next - 1, vals[0]);
335  buf.dim[next].stride = buf.dim[next - 1].stride * buf.dim[next - 1].extent;
336  }
337  }
338 
339  /** Base case for the template recursion above. */
340  template<typename T2>
341  void initialize_shape_from_array_shape(int, const T2 &) {
342  }
343 
344  /** Get the dimensionality of a multi-dimensional C array */
345  template<typename Array, size_t N>
346  static int dimensionality_of_array(Array (&vals)[N]) {
347  return dimensionality_of_array(vals[0]) + 1;
348  }
349 
350  template<typename T2>
351  static int dimensionality_of_array(const T2 &) {
352  return 0;
353  }
354 
355  /** Get the underlying halide_type_t of an array's element type. */
356  template<typename Array, size_t N>
357  static halide_type_t scalar_type_of_array(Array (&vals)[N]) {
358  return scalar_type_of_array(vals[0]);
359  }
360 
361  template<typename T2>
362  static halide_type_t scalar_type_of_array(const T2 &) {
363  return halide_type_of<typename std::remove_cv<T2>::type>();
364  }
365 
366  /** Crop a single dimension without handling device allocation. */
367  void crop_host(int d, int min, int extent) {
368  assert(dim(d).min() <= min);
369  assert(dim(d).max() >= min + extent - 1);
370  ptrdiff_t shift = min - dim(d).min();
371  if (buf.host != nullptr) {
372  buf.host += (shift * dim(d).stride()) * type().bytes();
373  }
374  buf.dim[d].min = min;
375  buf.dim[d].extent = extent;
376  }
377 
378  /** Crop as many dimensions as are in rect, without handling device allocation. */
379  void crop_host(const std::vector<std::pair<int, int>> &rect) {
380  assert(rect.size() <= static_cast<decltype(rect.size())>(std::numeric_limits<int>::max()));
381  int limit = (int)rect.size();
382  assert(limit <= dimensions());
383  for (int i = 0; i < limit; i++) {
384  crop_host(i, rect[i].first, rect[i].second);
385  }
386  }
387 
388  void complete_device_crop(Buffer<T, D> &result_host_cropped) const {
389  assert(buf.device_interface != nullptr);
390  if (buf.device_interface->device_crop(nullptr, &this->buf, &result_host_cropped.buf) == 0) {
391  const Buffer<T, D> *cropped_from = this;
392  // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
393  // is it possible to get to this point without incref having run at least once since
394  // the device field was set? (I.e. in the internal logic of crop. incref might have been
395  // called.)
396  if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
397  cropped_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
398  }
399  result_host_cropped.crop_from(*cropped_from);
400  }
401  }
402 
403  /** slice a single dimension without handling device allocation. */
404  void slice_host(int d, int pos) {
405  assert(d >= 0 && d < dimensions());
406  assert(pos >= dim(d).min() && pos <= dim(d).max());
407  buf.dimensions--;
408  ptrdiff_t shift = pos - buf.dim[d].min;
409  if (buf.host != nullptr) {
410  buf.host += (shift * buf.dim[d].stride) * type().bytes();
411  }
412  for (int i = d; i < buf.dimensions; i++) {
413  buf.dim[i] = buf.dim[i + 1];
414  }
415  buf.dim[buf.dimensions] = {0, 0, 0};
416  }
417 
418  void complete_device_slice(Buffer<T, D> &result_host_sliced, int d, int pos) const {
419  assert(buf.device_interface != nullptr);
420  if (buf.device_interface->device_slice(nullptr, &this->buf, d, pos, &result_host_sliced.buf) == 0) {
421  const Buffer<T, D> *sliced_from = this;
422  // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
423  // is it possible to get to this point without incref having run at least once since
424  // the device field was set? (I.e. in the internal logic of slice. incref might have been
425  // called.)
426  if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
427  sliced_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
428  }
429  // crop_from() is correct here, despite the fact that we are slicing.
430  result_host_sliced.crop_from(*sliced_from);
431  }
432  }
433 
434 public:
435  typedef T ElemType;
436 
437  /** Read-only access to the shape */
438  class Dimension {
439  const halide_dimension_t &d;
440 
441  public:
442  /** The lowest coordinate in this dimension */
443  HALIDE_ALWAYS_INLINE int min() const {
444  return d.min;
445  }
446 
447  /** The number of elements in memory you have to step over to
448  * increment this coordinate by one. */
450  return d.stride;
451  }
452 
453  /** The extent of the image along this dimension */
455  return d.extent;
456  }
457 
458  /** The highest coordinate in this dimension */
459  HALIDE_ALWAYS_INLINE int max() const {
460  return min() + extent() - 1;
461  }
462 
463  /** An iterator class, so that you can iterate over
464  * coordinates in a dimensions using a range-based for loop. */
465  struct iterator {
466  int val;
467  int operator*() const {
468  return val;
469  }
470  bool operator!=(const iterator &other) const {
471  return val != other.val;
472  }
474  val++;
475  return *this;
476  }
477  };
478 
479  /** An iterator that points to the min coordinate */
481  return {min()};
482  }
483 
484  /** An iterator that points to one past the max coordinate */
486  return {min() + extent()};
487  }
488 
490  : d(dim) {
491  }
492  };
493 
494  /** Access the shape of the buffer */
496  assert(i >= 0 && i < this->dimensions());
497  return Dimension(buf.dim[i]);
498  }
499 
500  /** Access to the mins, strides, extents. Will be deprecated. Do not use. */
501  // @{
502  int min(int i) const {
503  return dim(i).min();
504  }
505  int extent(int i) const {
506  return dim(i).extent();
507  }
508  int stride(int i) const {
509  return dim(i).stride();
510  }
511  // @}
512 
513  /** The total number of elements this buffer represents. Equal to
514  * the product of the extents */
515  size_t number_of_elements() const {
516  return buf.number_of_elements();
517  }
518 
519  /** Get the dimensionality of the buffer. */
520  int dimensions() const {
521  return buf.dimensions;
522  }
523 
524  /** Get the type of the elements. */
525  halide_type_t type() const {
526  return buf.type;
527  }
528 
529  /** A pointer to the element with the lowest address. If all
530  * strides are positive, equal to the host pointer. */
531  T *begin() const {
532  assert(buf.host != nullptr); // Cannot call begin() on an unallocated Buffer.
533  return (T *)buf.begin();
534  }
535 
536  /** A pointer to one beyond the element with the highest address. */
537  T *end() const {
538  assert(buf.host != nullptr); // Cannot call end() on an unallocated Buffer.
539  return (T *)buf.end();
540  }
541 
542  /** The total number of bytes spanned by the data in memory. */
543  size_t size_in_bytes() const {
544  return buf.size_in_bytes();
545  }
546 
547  /** Reset the Buffer to be equivalent to a default-constructed Buffer
548  * of the same static type (if any); Buffer<void> will have its runtime
549  * type reset to uint8. */
550  void reset() {
551  *this = Buffer();
552  }
553 
555  : shape() {
556  buf.type = static_halide_type();
557  make_shape_storage(0);
558  }
559 
560  /** Make a Buffer from a halide_buffer_t */
561  explicit Buffer(const halide_buffer_t &buf,
563  assert(T_is_void || buf.type == static_halide_type());
564  initialize_from_buffer(buf, ownership);
565  }
566 
567  /** Give Buffers access to the members of Buffers of different dimensionalities and types. */
568  template<typename T2, int D2>
569  friend class Buffer;
570 
571 private:
572  template<typename T2, int D2>
573  static void static_assert_can_convert_from() {
574  static_assert((!std::is_const<T2>::value || std::is_const<T>::value),
575  "Can't convert from a Buffer<const T> to a Buffer<T>");
576  static_assert(std::is_same<typename std::remove_const<T>::type,
577  typename std::remove_const<T2>::type>::value ||
578  T_is_void || Buffer<T2, D2>::T_is_void,
579  "type mismatch constructing Buffer");
580  }
581 
582 public:
583  /** Determine if if an Buffer<T, D> can be constructed from some other Buffer type.
584  * If this can be determined at compile time, fail with a static assert; otherwise
585  * return a boolean based on runtime typing. */
586  template<typename T2, int D2>
587  static bool can_convert_from(const Buffer<T2, D2> &other) {
588  static_assert_can_convert_from<T2, D2>();
589  if (Buffer<T2, D2>::T_is_void && !T_is_void) {
590  return other.type() == static_halide_type();
591  }
592  return true;
593  }
594 
595  /** Fail an assertion at runtime or compile-time if an Buffer<T, D>
596  * cannot be constructed from some other Buffer type. */
597  template<typename T2, int D2>
598  static void assert_can_convert_from(const Buffer<T2, D2> &other) {
599  // Explicitly call static_assert_can_convert_from() here so
600  // that we always get compile-time checking, even if compiling with
601  // assertions disabled.
602  static_assert_can_convert_from<T2, D2>();
603  assert(can_convert_from(other));
604  }
605 
606  /** Copy constructor. Does not copy underlying data. */
607  Buffer(const Buffer<T, D> &other)
608  : buf(other.buf),
609  alloc(other.alloc) {
610  other.incref();
611  dev_ref_count = other.dev_ref_count;
612  copy_shape_from(other.buf);
613  }
614 
615  /** Construct a Buffer from a Buffer of different dimensionality
616  * and type. Asserts that the type matches (at runtime, if one of
617  * the types is void). Note that this constructor is
618  * implicit. This, for example, lets you pass things like
619  * Buffer<T> or Buffer<const void> to functions expected
620  * Buffer<const T>. */
621  template<typename T2, int D2>
622  Buffer(const Buffer<T2, D2> &other)
623  : buf(other.buf),
624  alloc(other.alloc) {
626  other.incref();
627  dev_ref_count = other.dev_ref_count;
628  copy_shape_from(other.buf);
629  }
630 
631  /** Move constructor */
632  Buffer(Buffer<T, D> &&other) noexcept
633  : buf(other.buf),
634  alloc(other.alloc),
635  dev_ref_count(other.dev_ref_count) {
636  other.dev_ref_count = nullptr;
637  other.alloc = nullptr;
638  move_shape_from(std::forward<Buffer<T, D>>(other));
639  other.buf = halide_buffer_t();
640  }
641 
642  /** Move-construct a Buffer from a Buffer of different
643  * dimensionality and type. Asserts that the types match (at
644  * runtime if one of the types is void). */
645  template<typename T2, int D2>
647  : buf(other.buf),
648  alloc(other.alloc),
649  dev_ref_count(other.dev_ref_count) {
651  other.dev_ref_count = nullptr;
652  other.alloc = nullptr;
653  move_shape_from(std::forward<Buffer<T2, D2>>(other));
654  other.buf = halide_buffer_t();
655  }
656 
657  /** Assign from another Buffer of possibly-different
658  * dimensionality and type. Asserts that the types match (at
659  * runtime if one of the types is void). */
660  template<typename T2, int D2>
662  if ((const void *)this == (const void *)&other) {
663  return *this;
664  }
666  other.incref();
667  decref();
668  dev_ref_count = other.dev_ref_count;
669  alloc = other.alloc;
670  free_shape_storage();
671  buf = other.buf;
672  copy_shape_from(other.buf);
673  return *this;
674  }
675 
676  /** Standard assignment operator */
678  // The cast to void* here is just to satisfy clang-tidy
679  if ((const void *)this == (const void *)&other) {
680  return *this;
681  }
682  other.incref();
683  decref();
684  dev_ref_count = other.dev_ref_count;
685  alloc = other.alloc;
686  free_shape_storage();
687  buf = other.buf;
688  copy_shape_from(other.buf);
689  return *this;
690  }
691 
692  /** Move from another Buffer of possibly-different
693  * dimensionality and type. Asserts that the types match (at
694  * runtime if one of the types is void). */
695  template<typename T2, int D2>
698  decref();
699  alloc = other.alloc;
700  other.alloc = nullptr;
701  dev_ref_count = other.dev_ref_count;
702  other.dev_ref_count = nullptr;
703  free_shape_storage();
704  buf = other.buf;
705  move_shape_from(std::forward<Buffer<T2, D2>>(other));
706  other.buf = halide_buffer_t();
707  return *this;
708  }
709 
710  /** Standard move-assignment operator */
711  Buffer<T, D> &operator=(Buffer<T, D> &&other) noexcept {
712  decref();
713  alloc = other.alloc;
714  other.alloc = nullptr;
715  dev_ref_count = other.dev_ref_count;
716  other.dev_ref_count = nullptr;
717  free_shape_storage();
718  buf = other.buf;
719  move_shape_from(std::forward<Buffer<T, D>>(other));
720  other.buf = halide_buffer_t();
721  return *this;
722  }
723 
724  /** Check the product of the extents fits in memory. */
725  void check_overflow() {
726  size_t size = type().bytes();
727  for (int i = 0; i < dimensions(); i++) {
728  size *= dim(i).extent();
729  }
730  // We allow 2^31 or 2^63 bytes, so drop the top bit.
731  size = (size << 1) >> 1;
732  for (int i = 0; i < dimensions(); i++) {
733  size /= dim(i).extent();
734  }
735  assert(size == (size_t)type().bytes() && "Error: Overflow computing total size of buffer.");
736  }
737 
738  /** Allocate memory for this Buffer. Drops the reference to any
739  * owned memory. */
740  void allocate(void *(*allocate_fn)(size_t) = nullptr,
741  void (*deallocate_fn)(void *) = nullptr) {
742  if (!allocate_fn) {
743  allocate_fn = malloc;
744  }
745  if (!deallocate_fn) {
746  deallocate_fn = free;
747  }
748 
749  // Drop any existing allocation
750  deallocate();
751 
752  // Conservatively align images to 128 bytes. This is enough
753  // alignment for all the platforms we might use.
754  size_t size = size_in_bytes();
755  const size_t alignment = 128;
756  size = (size + alignment - 1) & ~(alignment - 1);
757  void *alloc_storage = allocate_fn(size + sizeof(AllocationHeader) + alignment - 1);
758  alloc = new (alloc_storage) AllocationHeader(deallocate_fn);
759  uint8_t *unaligned_ptr = ((uint8_t *)alloc) + sizeof(AllocationHeader);
760  buf.host = (uint8_t *)((uintptr_t)(unaligned_ptr + alignment - 1) & ~(alignment - 1));
761  }
762 
763  /** Drop reference to any owned host or device memory, possibly
764  * freeing it, if this buffer held the last reference to
765  * it. Retains the shape of the buffer. Does nothing if this
766  * buffer did not allocate its own memory. */
767  void deallocate() {
768  decref();
769  }
770 
771  /** Drop reference to any owned device memory, possibly freeing it
772  * if this buffer held the last reference to it. Asserts that
773  * device_dirty is false. */
775  decref(true);
776  }
777 
778  /** Allocate a new image of the given size with a runtime
779  * type. Only used when you do know what size you want but you
780  * don't know statically what type the elements are. Pass zeroes
781  * to make a buffer suitable for bounds query calls. */
782  template<typename... Args,
783  typename = typename std::enable_if<AllInts<Args...>::value>::type>
784  Buffer(halide_type_t t, int first, Args... rest) {
785  if (!T_is_void) {
786  assert(static_halide_type() == t);
787  }
788  int extents[] = {first, (int)rest...};
789  buf.type = t;
790  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
791  make_shape_storage(buf_dimensions);
792  initialize_shape(extents);
793  if (!Internal::any_zero(extents)) {
794  check_overflow();
795  allocate();
796  }
797  }
798 
799  /** Allocate a new image of the given size. Pass zeroes to make a
800  * buffer suitable for bounds query calls. */
801  // @{
802 
803  // The overload with one argument is 'explicit', so that
804  // (say) int is not implicitly convertible to Buffer<int>
805  explicit Buffer(int first) {
806  static_assert(!T_is_void,
807  "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
808  int extents[] = {first};
809  buf.type = static_halide_type();
810  constexpr int buf_dimensions = 1;
811  make_shape_storage(buf_dimensions);
812  initialize_shape(extents);
813  if (first != 0) {
814  check_overflow();
815  allocate();
816  }
817  }
818 
819  template<typename... Args,
820  typename = typename std::enable_if<AllInts<Args...>::value>::type>
821  Buffer(int first, int second, Args... rest) {
822  static_assert(!T_is_void,
823  "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
824  int extents[] = {first, second, (int)rest...};
825  buf.type = static_halide_type();
826  constexpr int buf_dimensions = 2 + (int)(sizeof...(rest));
827  make_shape_storage(buf_dimensions);
828  initialize_shape(extents);
829  if (!Internal::any_zero(extents)) {
830  check_overflow();
831  allocate();
832  }
833  }
834  // @}
835 
836  /** Allocate a new image of unknown type using a vector of ints as the size. */
837  Buffer(halide_type_t t, const std::vector<int> &sizes) {
838  if (!T_is_void) {
839  assert(static_halide_type() == t);
840  }
841  buf.type = t;
842  make_shape_storage((int)sizes.size());
843  initialize_shape(sizes);
844  if (!Internal::any_zero(sizes)) {
845  check_overflow();
846  allocate();
847  }
848  }
849 
850  /** Allocate a new image of known type using a vector of ints as the size. */
851  explicit Buffer(const std::vector<int> &sizes)
852  : Buffer(static_halide_type(), sizes) {
853  }
854 
855 private:
856  // Create a copy of the sizes vector, ordered as specified by order.
857  static std::vector<int> make_ordered_sizes(const std::vector<int> &sizes, const std::vector<int> &order) {
858  assert(order.size() == sizes.size());
859  std::vector<int> ordered_sizes(sizes.size());
860  for (size_t i = 0; i < sizes.size(); ++i) {
861  ordered_sizes[i] = sizes.at(order[i]);
862  }
863  return ordered_sizes;
864  }
865 
866 public:
867  /** Allocate a new image of unknown type using a vector of ints as the size and
868  * a vector of indices indicating the storage order for each dimension. The
869  * length of the sizes vector and the storage-order vector must match. For instance,
870  * to allocate an interleaved RGB buffer, you would pass {2, 0, 1} for storage_order. */
871  Buffer(halide_type_t t, const std::vector<int> &sizes, const std::vector<int> &storage_order)
872  : Buffer(t, make_ordered_sizes(sizes, storage_order)) {
873  transpose(storage_order);
874  }
875 
876  Buffer(const std::vector<int> &sizes, const std::vector<int> &storage_order)
877  : Buffer(static_halide_type(), sizes, storage_order) {
878  }
879 
880  /** Make an Buffer that refers to a statically sized array. Does not
881  * take ownership of the data, and does not set the host_dirty flag. */
882  template<typename Array, size_t N>
883  explicit Buffer(Array (&vals)[N]) {
884  const int buf_dimensions = dimensionality_of_array(vals);
885  buf.type = scalar_type_of_array(vals);
886  buf.host = (uint8_t *)vals;
887  make_shape_storage(buf_dimensions);
888  initialize_shape_from_array_shape(buf.dimensions - 1, vals);
889  }
890 
891  /** Initialize an Buffer of runtime type from a pointer and some
892  * sizes. Assumes dense row-major packing and a min coordinate of
893  * zero. Does not take ownership of the data and does not set the
894  * host_dirty flag. */
895  template<typename... Args,
896  typename = typename std::enable_if<AllInts<Args...>::value>::type>
897  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int first, Args &&...rest) {
898  if (!T_is_void) {
899  assert(static_halide_type() == t);
900  }
901  int extents[] = {first, (int)rest...};
902  buf.type = t;
903  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
904  buf.host = (uint8_t *)const_cast<void *>(data);
905  make_shape_storage(buf_dimensions);
906  initialize_shape(extents);
907  }
908 
909  /** Initialize an Buffer from a pointer and some sizes. Assumes
910  * dense row-major packing and a min coordinate of zero. Does not
911  * take ownership of the data and does not set the host_dirty flag. */
912  template<typename... Args,
913  typename = typename std::enable_if<AllInts<Args...>::value>::type>
914  explicit Buffer(T *data, int first, Args &&...rest) {
915  int extents[] = {first, (int)rest...};
916  buf.type = static_halide_type();
917  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
918  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
919  make_shape_storage(buf_dimensions);
920  initialize_shape(extents);
921  }
922 
923  /** Initialize an Buffer from a pointer and a vector of
924  * sizes. Assumes dense row-major packing and a min coordinate of
925  * zero. Does not take ownership of the data and does not set the
926  * host_dirty flag. */
927  explicit Buffer(T *data, const std::vector<int> &sizes) {
928  buf.type = static_halide_type();
929  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
930  make_shape_storage((int)sizes.size());
931  initialize_shape(sizes);
932  }
933 
934  /** Initialize an Buffer of runtime type from a pointer and a
935  * vector of sizes. Assumes dense row-major packing and a min
936  * coordinate of zero. Does not take ownership of the data and
937  * does not set the host_dirty flag. */
938  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, const std::vector<int> &sizes) {
939  if (!T_is_void) {
940  assert(static_halide_type() == t);
941  }
942  buf.type = t;
943  buf.host = (uint8_t *)const_cast<void *>(data);
944  make_shape_storage((int)sizes.size());
945  initialize_shape(sizes);
946  }
947 
948  /** Initialize an Buffer from a pointer to the min coordinate and
949  * an array describing the shape. Does not take ownership of the
950  * data, and does not set the host_dirty flag. */
951  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int d, const halide_dimension_t *shape) {
952  if (!T_is_void) {
953  assert(static_halide_type() == t);
954  }
955  buf.type = t;
956  buf.host = (uint8_t *)const_cast<void *>(data);
957  make_shape_storage(d);
958  for (int i = 0; i < d; i++) {
959  buf.dim[i] = shape[i];
960  }
961  }
962 
963  /** Initialize a Buffer from a pointer to the min coordinate and
964  * a vector describing the shape. Does not take ownership of the
965  * data, and does not set the host_dirty flag. */
966  explicit inline Buffer(halide_type_t t, add_const_if_T_is_const<void> *data,
967  const std::vector<halide_dimension_t> &shape)
968  : Buffer(t, data, (int)shape.size(), shape.data()) {
969  }
970 
971  /** Initialize an Buffer from a pointer to the min coordinate and
972  * an array describing the shape. Does not take ownership of the
973  * data and does not set the host_dirty flag. */
974  explicit Buffer(T *data, int d, const halide_dimension_t *shape) {
975  buf.type = static_halide_type();
976  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
977  make_shape_storage(d);
978  for (int i = 0; i < d; i++) {
979  buf.dim[i] = shape[i];
980  }
981  }
982 
983  /** Initialize a Buffer from a pointer to the min coordinate and
984  * a vector describing the shape. Does not take ownership of the
985  * data, and does not set the host_dirty flag. */
986  explicit inline Buffer(T *data, const std::vector<halide_dimension_t> &shape)
987  : Buffer(data, (int)shape.size(), shape.data()) {
988  }
989 
990  /** Destructor. Will release any underlying owned allocation if
991  * this is the last reference to it. Will assert fail if there are
992  * weak references to this Buffer outstanding. */
994  free_shape_storage();
995  decref();
996  }
997 
998  /** Get a pointer to the raw halide_buffer_t this wraps. */
999  // @{
1001  return &buf;
1002  }
1003 
1004  const halide_buffer_t *raw_buffer() const {
1005  return &buf;
1006  }
1007  // @}
1008 
1009  /** Provide a cast operator to halide_buffer_t *, so that
1010  * instances can be passed directly to Halide filters. */
1011  operator halide_buffer_t *() {
1012  return &buf;
1013  }
1014 
1015  /** Return a typed reference to this Buffer. Useful for converting
1016  * a reference to a Buffer<void> to a reference to, for example, a
1017  * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1018  * Does a runtime assert if the source buffer type is void. */
1019  template<typename T2>
1022  return *((Buffer<T2, D> *)this);
1023  }
1024 
1025  /** Return a const typed reference to this Buffer. Useful for
1026  * converting a conference reference to one Buffer type to a const
1027  * reference to another Buffer type. Does a runtime assert if the
1028  * source buffer type is void. */
1029  template<typename T2>
1032  return *((const Buffer<T2, D> *)this);
1033  }
1034 
1035  /** Returns this rval Buffer with a different type attached. Does
1036  * a dynamic type check if the source type is void. */
1037  template<typename T2>
1040  return *((Buffer<T2, D> *)this);
1041  }
1042 
1043  /** as_const() is syntactic sugar for .as<const T>(), to avoid the need
1044  * to recapitulate the type argument. */
1045  // @{
1048  // Note that we can skip the assert_can_convert_from(), since T -> const T
1049  // conversion is always legal.
1050  return *((Buffer<typename std::add_const<T>::type, D> *)this);
1051  }
1052 
1055  return *((const Buffer<typename std::add_const<T>::type, D> *)this);
1056  }
1057 
1060  return *((Buffer<typename std::add_const<T>::type, D> *)this);
1061  }
1062  // @}
1063 
1064  /** Conventional names for the first three dimensions. */
1065  // @{
1066  int width() const {
1067  return (dimensions() > 0) ? dim(0).extent() : 1;
1068  }
1069  int height() const {
1070  return (dimensions() > 1) ? dim(1).extent() : 1;
1071  }
1072  int channels() const {
1073  return (dimensions() > 2) ? dim(2).extent() : 1;
1074  }
1075  // @}
1076 
1077  /** Conventional names for the min and max value of each dimension */
1078  // @{
1079  int left() const {
1080  return dim(0).min();
1081  }
1082 
1083  int right() const {
1084  return dim(0).max();
1085  }
1086 
1087  int top() const {
1088  return dim(1).min();
1089  }
1090 
1091  int bottom() const {
1092  return dim(1).max();
1093  }
1094  // @}
1095 
1096  /** Make a new image which is a deep copy of this image. Use crop
1097  * or slice followed by copy to make a copy of only a portion of
1098  * the image. The new image uses the same memory layout as the
1099  * original, with holes compacted away. Note that the returned
1100  * Buffer is always of a non-const type T (ie:
1101  *
1102  * Buffer<const T>.copy() -> Buffer<T> rather than Buffer<const T>
1103  *
1104  * which is always safe, since we are making a deep copy. (The caller
1105  * can easily cast it back to Buffer<const T> if desired, which is
1106  * always safe and free.)
1107  */
1108  Buffer<not_const_T, D> copy(void *(*allocate_fn)(size_t) = nullptr,
1109  void (*deallocate_fn)(void *) = nullptr) const {
1110  Buffer<not_const_T, D> dst = Buffer<not_const_T, D>::make_with_shape_of(*this, allocate_fn, deallocate_fn);
1111  dst.copy_from(*this);
1112  return dst;
1113  }
1114 
1115  /** Like copy(), but the copy is created in interleaved memory layout
1116  * (vs. keeping the same memory layout as the original). Requires that 'this'
1117  * has exactly 3 dimensions.
1118  */
1119  Buffer<not_const_T, D> copy_to_interleaved(void *(*allocate_fn)(size_t) = nullptr,
1120  void (*deallocate_fn)(void *) = nullptr) const {
1121  assert(dimensions() == 3);
1123  dst.set_min(min(0), min(1), min(2));
1124  dst.allocate(allocate_fn, deallocate_fn);
1125  dst.copy_from(*this);
1126  return dst;
1127  }
1128 
1129  /** Like copy(), but the copy is created in planar memory layout
1130  * (vs. keeping the same memory layout as the original).
1131  */
1132  Buffer<not_const_T, D> copy_to_planar(void *(*allocate_fn)(size_t) = nullptr,
1133  void (*deallocate_fn)(void *) = nullptr) const {
1134  std::vector<int> mins, extents;
1135  const int dims = dimensions();
1136  mins.reserve(dims);
1137  extents.reserve(dims);
1138  for (int d = 0; d < dims; ++d) {
1139  mins.push_back(dim(d).min());
1140  extents.push_back(dim(d).extent());
1141  }
1143  dst.set_min(mins);
1144  dst.allocate(allocate_fn, deallocate_fn);
1145  dst.copy_from(*this);
1146  return dst;
1147  }
1148 
1149  /** Make a copy of the Buffer which shares the underlying host and/or device
1150  * allocations as the existing Buffer. This is purely syntactic sugar for
1151  * cases where you have a const reference to a Buffer but need a temporary
1152  * non-const copy (e.g. to make a call into AOT-generated Halide code), and want a terse
1153  * inline way to create a temporary. \code
1154  * void call_my_func(const Buffer<const uint8_t>& input) {
1155  * my_func(input.alias(), output);
1156  * }\endcode
1157  */
1158  inline Buffer<T, D> alias() const {
1159  return *this;
1160  }
1161 
1162  /** Fill a Buffer with the values at the same coordinates in
1163  * another Buffer. Restricts itself to coordinates contained
1164  * within the intersection of the two buffers. If the two Buffers
1165  * are not in the same coordinate system, you will need to
1166  * translate the argument Buffer first. E.g. if you're blitting a
1167  * sprite onto a framebuffer, you'll want to translate the sprite
1168  * to the correct location first like so: \code
1169  * framebuffer.copy_from(sprite.translated({x, y})); \endcode
1170  */
1171  template<typename T2, int D2>
1173  static_assert(!std::is_const<T>::value, "Cannot call copy_from() on a Buffer<const T>");
1174  assert(!device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty destination.");
1175  assert(!src.device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty source.");
1176 
1177  Buffer<T, D> dst(*this);
1178 
1179  assert(src.dimensions() == dst.dimensions());
1180 
1181  // Trim the copy to the region in common
1182  for (int i = 0; i < dimensions(); i++) {
1183  int min_coord = std::max(dst.dim(i).min(), src.dim(i).min());
1184  int max_coord = std::min(dst.dim(i).max(), src.dim(i).max());
1185  if (max_coord < min_coord) {
1186  // The buffers do not overlap.
1187  return;
1188  }
1189  dst.crop(i, min_coord, max_coord - min_coord + 1);
1190  src.crop(i, min_coord, max_coord - min_coord + 1);
1191  }
1192 
1193  // If T is void, we need to do runtime dispatch to an
1194  // appropriately-typed lambda. We're copying, so we only care
1195  // about the element size. (If not, this should optimize away
1196  // into a static dispatch to the right-sized copy.)
1197  if (T_is_void ? (type().bytes() == 1) : (sizeof(not_void_T) == 1)) {
1198  using MemType = uint8_t;
1199  auto &typed_dst = (Buffer<MemType, D> &)dst;
1200  auto &typed_src = (Buffer<const MemType, D> &)src;
1201  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1202  } else if (T_is_void ? (type().bytes() == 2) : (sizeof(not_void_T) == 2)) {
1203  using MemType = uint16_t;
1204  auto &typed_dst = (Buffer<MemType, D> &)dst;
1205  auto &typed_src = (Buffer<const MemType, D> &)src;
1206  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1207  } else if (T_is_void ? (type().bytes() == 4) : (sizeof(not_void_T) == 4)) {
1208  using MemType = uint32_t;
1209  auto &typed_dst = (Buffer<MemType, D> &)dst;
1210  auto &typed_src = (Buffer<const MemType, D> &)src;
1211  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1212  } else if (T_is_void ? (type().bytes() == 8) : (sizeof(not_void_T) == 8)) {
1213  using MemType = uint64_t;
1214  auto &typed_dst = (Buffer<MemType, D> &)dst;
1215  auto &typed_src = (Buffer<const MemType, D> &)src;
1216  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1217  } else {
1218  assert(false && "type().bytes() must be 1, 2, 4, or 8");
1219  }
1220  set_host_dirty();
1221  }
1222 
1223  /** Make an image that refers to a sub-range of this image along
1224  * the given dimension. Asserts that the crop region is within
1225  * the existing bounds: you cannot "crop outwards", even if you know there
1226  * is valid Buffer storage (e.g. because you already cropped inwards). */
1227  Buffer<T, D> cropped(int d, int min, int extent) const {
1228  // Make a fresh copy of the underlying buffer (but not a fresh
1229  // copy of the allocation, if there is one).
1230  Buffer<T, D> im = *this;
1231 
1232  // This guarantees the prexisting device ref is dropped if the
1233  // device_crop call fails and maintains the buffer in a consistent
1234  // state.
1235  im.device_deallocate();
1236 
1237  im.crop_host(d, min, extent);
1238  if (buf.device_interface != nullptr) {
1239  complete_device_crop(im);
1240  }
1241  return im;
1242  }
1243 
1244  /** Crop an image in-place along the given dimension. This does
1245  * not move any data around in memory - it just changes the min
1246  * and extent of the given dimension. */
1247  void crop(int d, int min, int extent) {
1248  // An optimization for non-device buffers. For the device case,
1249  // a temp buffer is required, so reuse the not-in-place version.
1250  // TODO(zalman|abadams): Are nop crops common enough to special
1251  // case the device part of the if to do nothing?
1252  if (buf.device_interface != nullptr) {
1253  *this = cropped(d, min, extent);
1254  } else {
1255  crop_host(d, min, extent);
1256  }
1257  }
1258 
1259  /** Make an image that refers to a sub-rectangle of this image along
1260  * the first N dimensions. Asserts that the crop region is within
1261  * the existing bounds. The cropped image may drop any device handle
1262  * if the device_interface cannot accomplish the crop in-place. */
1263  Buffer<T, D> cropped(const std::vector<std::pair<int, int>> &rect) const {
1264  // Make a fresh copy of the underlying buffer (but not a fresh
1265  // copy of the allocation, if there is one).
1266  Buffer<T, D> im = *this;
1267 
1268  // This guarantees the prexisting device ref is dropped if the
1269  // device_crop call fails and maintains the buffer in a consistent
1270  // state.
1271  im.device_deallocate();
1272 
1273  im.crop_host(rect);
1274  if (buf.device_interface != nullptr) {
1275  complete_device_crop(im);
1276  }
1277  return im;
1278  }
1279 
1280  /** Crop an image in-place along the first N dimensions. This does
1281  * not move any data around in memory, nor does it free memory. It
1282  * just rewrites the min/extent of each dimension to refer to a
1283  * subregion of the same allocation. */
1284  void crop(const std::vector<std::pair<int, int>> &rect) {
1285  // An optimization for non-device buffers. For the device case,
1286  // a temp buffer is required, so reuse the not-in-place version.
1287  // TODO(zalman|abadams): Are nop crops common enough to special
1288  // case the device part of the if to do nothing?
1289  if (buf.device_interface != nullptr) {
1290  *this = cropped(rect);
1291  } else {
1292  crop_host(rect);
1293  }
1294  }
1295 
1296  /** Make an image which refers to the same data with using
1297  * translated coordinates in the given dimension. Positive values
1298  * move the image data to the right or down relative to the
1299  * coordinate system. Drops any device handle. */
1300  Buffer<T, D> translated(int d, int dx) const {
1301  Buffer<T, D> im = *this;
1302  im.translate(d, dx);
1303  return im;
1304  }
1305 
1306  /** Translate an image in-place along one dimension by changing
1307  * how it is indexed. Does not move any data around in memory. */
1308  void translate(int d, int delta) {
1309  assert(d >= 0 && d < this->dimensions());
1311  buf.dim[d].min += delta;
1312  }
1313 
1314  /** Make an image which refers to the same data translated along
1315  * the first N dimensions. */
1316  Buffer<T, D> translated(const std::vector<int> &delta) const {
1317  Buffer<T, D> im = *this;
1318  im.translate(delta);
1319  return im;
1320  }
1321 
1322  /** Translate an image along the first N dimensions by changing
1323  * how it is indexed. Does not move any data around in memory. */
1324  void translate(const std::vector<int> &delta) {
1326  assert(delta.size() <= static_cast<decltype(delta.size())>(std::numeric_limits<int>::max()));
1327  int limit = (int)delta.size();
1328  assert(limit <= dimensions());
1329  for (int i = 0; i < limit; i++) {
1330  translate(i, delta[i]);
1331  }
1332  }
1333 
1334  /** Set the min coordinate of an image in the first N dimensions. */
1335  // @{
1336  void set_min(const std::vector<int> &mins) {
1337  assert(mins.size() <= static_cast<decltype(mins.size())>(dimensions()));
1339  for (size_t i = 0; i < mins.size(); i++) {
1340  buf.dim[i].min = mins[i];
1341  }
1342  }
1343 
1344  template<typename... Args>
1345  void set_min(Args... args) {
1346  set_min(std::vector<int>{args...});
1347  }
1348  // @}
1349 
1350  /** Test if a given coordinate is within the bounds of an image. */
1351  // @{
1352  bool contains(const std::vector<int> &coords) const {
1353  assert(coords.size() <= static_cast<decltype(coords.size())>(dimensions()));
1354  for (size_t i = 0; i < coords.size(); i++) {
1355  if (coords[i] < dim((int)i).min() || coords[i] > dim((int)i).max()) {
1356  return false;
1357  }
1358  }
1359  return true;
1360  }
1361 
1362  template<typename... Args>
1363  bool contains(Args... args) const {
1364  return contains(std::vector<int>{args...});
1365  }
1366  // @}
1367 
1368  /** Make a buffer which refers to the same data in the same layout
1369  * using a swapped indexing order for the dimensions given. So
1370  * A = B.transposed(0, 1) means that A(i, j) == B(j, i), and more
1371  * strongly that A.address_of(i, j) == B.address_of(j, i). */
1372  Buffer<T, D> transposed(int d1, int d2) const {
1373  Buffer<T, D> im = *this;
1374  im.transpose(d1, d2);
1375  return im;
1376  }
1377 
1378  /** Transpose a buffer in-place by changing how it is indexed. For
1379  * example, transpose(0, 1) on a two-dimensional buffer means that
1380  * the value referred to by coordinates (i, j) is now reached at
1381  * the coordinates (j, i), and vice versa. This is done by
1382  * reordering the per-dimension metadata rather than by moving
1383  * data around in memory, so other views of the same memory will
1384  * not see the data as having been transposed. */
1385  void transpose(int d1, int d2) {
1386  assert(d1 >= 0 && d1 < this->dimensions());
1387  assert(d2 >= 0 && d2 < this->dimensions());
1388  std::swap(buf.dim[d1], buf.dim[d2]);
1389  }
1390 
1391  /** A generalized transpose: instead of swapping two dimensions,
1392  * pass a vector that lists each dimension index exactly once, in
1393  * the desired order. This does not move any data around in memory
1394  * - it just permutes how it is indexed. */
1395  void transpose(const std::vector<int> &order) {
1396  assert((int)order.size() == dimensions());
1397  if (dimensions() < 2) {
1398  // My, that was easy
1399  return;
1400  }
1401 
1402  std::vector<int> order_sorted = order;
1403  for (size_t i = 1; i < order_sorted.size(); i++) {
1404  for (size_t j = i; j > 0 && order_sorted[j - 1] > order_sorted[j]; j--) {
1405  std::swap(order_sorted[j], order_sorted[j - 1]);
1406  transpose(j, j - 1);
1407  }
1408  }
1409  }
1410 
1411  /** Make a buffer which refers to the same data in the same
1412  * layout using a different ordering of the dimensions. */
1413  Buffer<T, D> transposed(const std::vector<int> &order) const {
1414  Buffer<T, D> im = *this;
1415  im.transpose(order);
1416  return im;
1417  }
1418 
1419  /** Make a lower-dimensional buffer that refers to one slice of
1420  * this buffer. */
1421  Buffer<T, D> sliced(int d, int pos) const {
1422  Buffer<T, D> im = *this;
1423 
1424  // This guarantees the prexisting device ref is dropped if the
1425  // device_slice call fails and maintains the buffer in a consistent
1426  // state.
1427  im.device_deallocate();
1428 
1429  im.slice_host(d, pos);
1430  if (buf.device_interface != nullptr) {
1431  complete_device_slice(im, d, pos);
1432  }
1433  return im;
1434  }
1435 
1436  /** Make a lower-dimensional buffer that refers to one slice of this
1437  * buffer at the dimension's minimum. */
1438  inline Buffer<T, D> sliced(int d) const {
1439  return sliced(d, dim(d).min());
1440  }
1441 
1442  /** Rewrite the buffer to refer to a single lower-dimensional
1443  * slice of itself along the given dimension at the given
1444  * coordinate. Does not move any data around or free the original
1445  * memory, so other views of the same data are unaffected. */
1446  void slice(int d, int pos) {
1447  // An optimization for non-device buffers. For the device case,
1448  // a temp buffer is required, so reuse the not-in-place version.
1449  // TODO(zalman|abadams): Are nop slices common enough to special
1450  // case the device part of the if to do nothing?
1451  if (buf.device_interface != nullptr) {
1452  *this = sliced(d, pos);
1453  } else {
1454  slice_host(d, pos);
1455  }
1456  }
1457 
1458  /** Slice a buffer in-place at the dimension's minimum. */
1459  inline void slice(int d) {
1460  slice(d, dim(d).min());
1461  }
1462 
1463  /** Make a new buffer that views this buffer as a single slice in a
1464  * higher-dimensional space. The new dimension has extent one and
1465  * the given min. This operation is the opposite of slice. As an
1466  * example, the following condition is true:
1467  *
1468  \code
1469  im2 = im.embedded(1, 17);
1470  &im(x, y, c) == &im2(x, 17, y, c);
1471  \endcode
1472  */
1473  Buffer<T, D> embedded(int d, int pos = 0) const {
1474  Buffer<T, D> im(*this);
1475  im.embed(d, pos);
1476  return im;
1477  }
1478 
1479  /** Embed a buffer in-place, increasing the
1480  * dimensionality. */
1481  void embed(int d, int pos = 0) {
1482  assert(d >= 0 && d <= dimensions());
1483  add_dimension();
1484  translate(dimensions() - 1, pos);
1485  for (int i = dimensions() - 1; i > d; i--) {
1486  transpose(i, i - 1);
1487  }
1488  }
1489 
1490  /** Add a new dimension with a min of zero and an extent of
1491  * one. The stride is the extent of the outermost dimension times
1492  * its stride. The new dimension is the last dimension. This is a
1493  * special case of embed. */
1494  void add_dimension() {
1495  const int dims = buf.dimensions;
1496  buf.dimensions++;
1497  if (buf.dim != shape) {
1498  // We're already on the heap. Reallocate.
1499  halide_dimension_t *new_shape = new halide_dimension_t[buf.dimensions];
1500  for (int i = 0; i < dims; i++) {
1501  new_shape[i] = buf.dim[i];
1502  }
1503  delete[] buf.dim;
1504  buf.dim = new_shape;
1505  } else if (dims == D) {
1506  // Transition from the in-class storage to the heap
1507  make_shape_storage(buf.dimensions);
1508  for (int i = 0; i < dims; i++) {
1509  buf.dim[i] = shape[i];
1510  }
1511  } else {
1512  // We still fit in the class
1513  }
1514  buf.dim[dims] = {0, 1, 0};
1515  if (dims == 0) {
1516  buf.dim[dims].stride = 1;
1517  } else {
1518  buf.dim[dims].stride = buf.dim[dims - 1].extent * buf.dim[dims - 1].stride;
1519  }
1520  }
1521 
1522  /** Add a new dimension with a min of zero, an extent of one, and
1523  * the specified stride. The new dimension is the last
1524  * dimension. This is a special case of embed. */
1526  add_dimension();
1527  buf.dim[buf.dimensions - 1].stride = s;
1528  }
1529 
1530  /** Methods for managing any GPU allocation. */
1531  // @{
1532  // Set the host dirty flag. Called by every operator()
1533  // access. Must be inlined so it can be hoisted out of loops.
1535  void set_host_dirty(bool v = true) {
1536  assert((!v || !device_dirty()) && "Cannot set host dirty when device is already dirty. Call copy_to_host() before accessing the buffer from host.");
1537  buf.set_host_dirty(v);
1538  }
1539 
1540  // Check if the device allocation is dirty. Called by
1541  // set_host_dirty, which is called by every accessor. Must be
1542  // inlined so it can be hoisted out of loops.
1544  bool device_dirty() const {
1545  return buf.device_dirty();
1546  }
1547 
1548  bool host_dirty() const {
1549  return buf.host_dirty();
1550  }
1551 
1552  void set_device_dirty(bool v = true) {
1553  assert((!v || !host_dirty()) && "Cannot set device dirty when host is already dirty.");
1554  buf.set_device_dirty(v);
1555  }
1556 
1557  int copy_to_host(void *ctx = nullptr) {
1558  if (device_dirty()) {
1559  return buf.device_interface->copy_to_host(ctx, &buf);
1560  }
1561  return 0;
1562  }
1563 
1564  int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1565  if (host_dirty()) {
1566  return device_interface->copy_to_device(ctx, &buf, device_interface);
1567  }
1568  return 0;
1569  }
1570 
1571  int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1572  return device_interface->device_malloc(ctx, &buf, device_interface);
1573  }
1574 
1575  int device_free(void *ctx = nullptr) {
1576  if (dev_ref_count) {
1577  assert(dev_ref_count->ownership == BufferDeviceOwnership::Allocated &&
1578  "Can't call device_free on an unmanaged or wrapped native device handle. "
1579  "Free the source allocation or call device_detach_native instead.");
1580  // Multiple people may be holding onto this dev field
1581  assert(dev_ref_count->count == 1 &&
1582  "Multiple Halide::Runtime::Buffer objects share this device "
1583  "allocation. Freeing it would create dangling references. "
1584  "Don't call device_free on Halide buffers that you have copied or "
1585  "passed by value.");
1586  }
1587  int ret = 0;
1588  if (buf.device_interface) {
1589  ret = buf.device_interface->device_free(ctx, &buf);
1590  }
1591  if (dev_ref_count) {
1592  delete dev_ref_count;
1593  dev_ref_count = nullptr;
1594  }
1595  return ret;
1596  }
1597 
1598  int device_wrap_native(const struct halide_device_interface_t *device_interface,
1599  uint64_t handle, void *ctx = nullptr) {
1600  assert(device_interface);
1601  dev_ref_count = new DeviceRefCount;
1603  return device_interface->wrap_native(ctx, &buf, handle, device_interface);
1604  }
1605 
1606  int device_detach_native(void *ctx = nullptr) {
1607  assert(dev_ref_count &&
1608  dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative &&
1609  "Only call device_detach_native on buffers wrapping a native "
1610  "device handle via device_wrap_native. This buffer was allocated "
1611  "using device_malloc, or is unmanaged. "
1612  "Call device_free or free the original allocation instead.");
1613  // Multiple people may be holding onto this dev field
1614  assert(dev_ref_count->count == 1 &&
1615  "Multiple Halide::Runtime::Buffer objects share this device "
1616  "allocation. Freeing it could create dangling references. "
1617  "Don't call device_detach_native on Halide buffers that you "
1618  "have copied or passed by value.");
1619  int ret = 0;
1620  if (buf.device_interface) {
1621  ret = buf.device_interface->detach_native(ctx, &buf);
1622  }
1623  delete dev_ref_count;
1624  dev_ref_count = nullptr;
1625  return ret;
1626  }
1627 
1628  int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1629  return device_interface->device_and_host_malloc(ctx, &buf, device_interface);
1630  }
1631 
1632  int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1633  if (dev_ref_count) {
1634  assert(dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost &&
1635  "Can't call device_and_host_free on a device handle not allocated with device_and_host_malloc. "
1636  "Free the source allocation or call device_detach_native instead.");
1637  // Multiple people may be holding onto this dev field
1638  assert(dev_ref_count->count == 1 &&
1639  "Multiple Halide::Runtime::Buffer objects share this device "
1640  "allocation. Freeing it would create dangling references. "
1641  "Don't call device_and_host_free on Halide buffers that you have copied or "
1642  "passed by value.");
1643  }
1644  int ret = 0;
1645  if (buf.device_interface) {
1646  ret = buf.device_interface->device_and_host_free(ctx, &buf);
1647  }
1648  if (dev_ref_count) {
1649  delete dev_ref_count;
1650  dev_ref_count = nullptr;
1651  }
1652  return ret;
1653  }
1654 
1655  int device_sync(void *ctx = nullptr) {
1656  return buf.device_sync(ctx);
1657  }
1658 
1659  bool has_device_allocation() const {
1660  return buf.device != 0;
1661  }
1662 
1663  /** Return the method by which the device field is managed. */
1665  if (dev_ref_count == nullptr) {
1667  }
1668  return dev_ref_count->ownership;
1669  }
1670  // @}
1671 
1672  /** If you use the (x, y, c) indexing convention, then Halide
1673  * Buffers are stored planar by default. This function constructs
1674  * an interleaved RGB or RGBA image that can still be indexed
1675  * using (x, y, c). Passing it to a generator requires that the
1676  * generator has been compiled with support for interleaved (also
1677  * known as packed or chunky) memory layouts. */
1680  // Note that this is equivalent to calling transpose({2, 0, 1}),
1681  // but slightly more efficient.
1682  im.transpose(0, 1);
1683  im.transpose(1, 2);
1684  return im;
1685  }
1686 
1687  /** If you use the (x, y, c) indexing convention, then Halide
1688  * Buffers are stored planar by default. This function constructs
1689  * an interleaved RGB or RGBA image that can still be indexed
1690  * using (x, y, c). Passing it to a generator requires that the
1691  * generator has been compiled with support for interleaved (also
1692  * known as packed or chunky) memory layouts. */
1695  }
1696 
1697  /** Wrap an existing interleaved image. */
1701  im.transpose(0, 1);
1702  im.transpose(1, 2);
1703  return im;
1704  }
1705 
1706  /** Wrap an existing interleaved image. */
1709  }
1710 
1711  /** Make a zero-dimensional Buffer */
1714  buf.slice(0, 0);
1715  return buf;
1716  }
1717 
1718  /** Make a zero-dimensional Buffer */
1720  Buffer<T, 1> buf(1);
1721  buf.slice(0, 0);
1722  return buf;
1723  }
1724 
1725  /** Make a zero-dimensional Buffer that points to non-owned, existing data */
1727  Buffer<T, 1> buf(data, 1);
1728  buf.slice(0, 0);
1729  return buf;
1730  }
1731 
1732  /** Make a buffer with the same shape and memory nesting order as
1733  * another buffer. It may have a different type. */
1734  template<typename T2, int D2>
1736  void *(*allocate_fn)(size_t) = nullptr,
1737  void (*deallocate_fn)(void *) = nullptr) {
1738 
1739  const halide_type_t dst_type = T_is_void ? src.type() : halide_type_of<typename std::remove_cv<not_void_T>::type>();
1740  return Buffer<>::make_with_shape_of_helper(dst_type, src.dimensions(), src.buf.dim,
1741  allocate_fn, deallocate_fn);
1742  }
1743 
1744 private:
1745  static Buffer<> make_with_shape_of_helper(halide_type_t dst_type,
1746  int dimensions,
1747  halide_dimension_t *shape,
1748  void *(*allocate_fn)(size_t),
1749  void (*deallocate_fn)(void *)) {
1750  // Reorder the dimensions of src to have strides in increasing order
1751  std::vector<int> swaps;
1752  for (int i = dimensions - 1; i > 0; i--) {
1753  for (int j = i; j > 0; j--) {
1754  if (shape[j - 1].stride > shape[j].stride) {
1755  std::swap(shape[j - 1], shape[j]);
1756  swaps.push_back(j);
1757  }
1758  }
1759  }
1760 
1761  // Rewrite the strides to be dense (this messes up src, which
1762  // is why we took it by value).
1763  for (int i = 0; i < dimensions; i++) {
1764  if (i == 0) {
1765  shape[i].stride = 1;
1766  } else {
1767  shape[i].stride = shape[i - 1].extent * shape[i - 1].stride;
1768  }
1769  }
1770 
1771  // Undo the dimension reordering
1772  while (!swaps.empty()) {
1773  int j = swaps.back();
1774  std::swap(shape[j - 1], shape[j]);
1775  swaps.pop_back();
1776  }
1777 
1778  // Use an explicit runtime type, and make dst a Buffer<void>, to allow
1779  // using this method with Buffer<void> for either src or dst.
1780  Buffer<> dst(dst_type, nullptr, dimensions, shape);
1781  dst.allocate(allocate_fn, deallocate_fn);
1782 
1783  return dst;
1784  }
1785 
1786  template<typename... Args>
1788  ptrdiff_t
1789  offset_of(int d, int first, Args... rest) const {
1790  return offset_of(d + 1, rest...) + (ptrdiff_t)this->buf.dim[d].stride * (first - this->buf.dim[d].min);
1791  }
1792 
1794  ptrdiff_t offset_of(int d) const {
1795  return 0;
1796  }
1797 
1798  template<typename... Args>
1800  storage_T *
1801  address_of(Args... args) const {
1802  if (T_is_void) {
1803  return (storage_T *)(this->buf.host) + offset_of(0, args...) * type().bytes();
1804  } else {
1805  return (storage_T *)(this->buf.host) + offset_of(0, args...);
1806  }
1807  }
1808 
1810  ptrdiff_t offset_of(const int *pos) const {
1811  ptrdiff_t offset = 0;
1812  for (int i = this->dimensions() - 1; i >= 0; i--) {
1813  offset += (ptrdiff_t)this->buf.dim[i].stride * (pos[i] - this->buf.dim[i].min);
1814  }
1815  return offset;
1816  }
1817 
1819  storage_T *address_of(const int *pos) const {
1820  if (T_is_void) {
1821  return (storage_T *)this->buf.host + offset_of(pos) * type().bytes();
1822  } else {
1823  return (storage_T *)this->buf.host + offset_of(pos);
1824  }
1825  }
1826 
1827 public:
1828  /** Get a pointer to the address of the min coordinate. */
1829  T *data() const {
1830  return (T *)(this->buf.host);
1831  }
1832 
1833  /** Access elements. Use im(...) to get a reference to an element,
1834  * and use &im(...) to get the address of an element. If you pass
1835  * fewer arguments than the buffer has dimensions, the rest are
1836  * treated as their min coordinate. The non-const versions set the
1837  * host_dirty flag to true.
1838  */
1839  //@{
1840  template<typename... Args,
1841  typename = typename std::enable_if<AllInts<Args...>::value>::type>
1842  HALIDE_ALWAYS_INLINE const not_void_T &operator()(int first, Args... rest) const {
1843  static_assert(!T_is_void,
1844  "Cannot use operator() on Buffer<void> types");
1845  assert(!device_dirty());
1846  return *((const not_void_T *)(address_of(first, rest...)));
1847  }
1848 
1850  const not_void_T &
1851  operator()() const {
1852  static_assert(!T_is_void,
1853  "Cannot use operator() on Buffer<void> types");
1854  assert(!device_dirty());
1855  return *((const not_void_T *)(data()));
1856  }
1857 
1859  const not_void_T &
1860  operator()(const int *pos) const {
1861  static_assert(!T_is_void,
1862  "Cannot use operator() on Buffer<void> types");
1863  assert(!device_dirty());
1864  return *((const not_void_T *)(address_of(pos)));
1865  }
1866 
1867  template<typename... Args,
1868  typename = typename std::enable_if<AllInts<Args...>::value>::type>
1870  not_void_T &
1871  operator()(int first, Args... rest) {
1872  static_assert(!T_is_void,
1873  "Cannot use operator() on Buffer<void> types");
1874  set_host_dirty();
1875  return *((not_void_T *)(address_of(first, rest...)));
1876  }
1877 
1879  not_void_T &
1881  static_assert(!T_is_void,
1882  "Cannot use operator() on Buffer<void> types");
1883  set_host_dirty();
1884  return *((not_void_T *)(data()));
1885  }
1886 
1888  not_void_T &
1889  operator()(const int *pos) {
1890  static_assert(!T_is_void,
1891  "Cannot use operator() on Buffer<void> types");
1892  set_host_dirty();
1893  return *((not_void_T *)(address_of(pos)));
1894  }
1895  // @}
1896 
1897  /** Tests that all values in this buffer are equal to val. */
1898  bool all_equal(not_void_T val) const {
1899  bool all_equal = true;
1900  for_each_element([&](const int *pos) { all_equal &= (*this)(pos) == val; });
1901  return all_equal;
1902  }
1903 
1904  Buffer<T, D> &fill(not_void_T val) {
1905  set_host_dirty();
1906  for_each_value([=](T &v) { v = val; });
1907  return *this;
1908  }
1909 
1910 private:
1911  /** Helper functions for for_each_value. */
1912  // @{
1913  template<int N>
1914  struct for_each_value_task_dim {
1917  };
1918 
1919  // Given an array of strides, and a bunch of pointers to pointers
1920  // (all of different types), advance the pointers using the
1921  // strides.
1922  template<typename Ptr, typename... Ptrs>
1923  HALIDE_ALWAYS_INLINE static void advance_ptrs(const std::ptrdiff_t *stride, Ptr &ptr, Ptrs &...ptrs) {
1924  ptr += *stride;
1925  advance_ptrs(stride + 1, ptrs...);
1926  }
1927 
1929  static void advance_ptrs(const std::ptrdiff_t *) {
1930  }
1931 
1932  template<typename Fn, typename Ptr, typename... Ptrs>
1933  HALIDE_NEVER_INLINE static void for_each_value_helper(Fn &&f, int d, bool innermost_strides_are_one,
1934  const for_each_value_task_dim<sizeof...(Ptrs) + 1> *t, Ptr ptr, Ptrs... ptrs) {
1935  if (d == 0) {
1936  if (innermost_strides_are_one) {
1937  Ptr end = ptr + t[0].extent;
1938  while (ptr != end) {
1939  f(*ptr++, (*ptrs++)...);
1940  }
1941  } else {
1942  for (std::ptrdiff_t i = t[0].extent; i != 0; i--) {
1943  f(*ptr, (*ptrs)...);
1944  advance_ptrs(t[0].stride, ptr, ptrs...);
1945  }
1946  }
1947  } else {
1948  for (std::ptrdiff_t i = t[d].extent; i != 0; i--) {
1949  for_each_value_helper(f, d - 1, innermost_strides_are_one, t, ptr, ptrs...);
1950  advance_ptrs(t[d].stride, ptr, ptrs...);
1951  }
1952  }
1953  }
1954 
1955  template<int N>
1956  HALIDE_NEVER_INLINE static bool for_each_value_prep(for_each_value_task_dim<N> *t,
1957  const halide_buffer_t **buffers) {
1958  // Check the buffers all have clean host allocations
1959  for (int i = 0; i < N; i++) {
1960  if (buffers[i]->device) {
1961  assert(buffers[i]->host &&
1962  "Buffer passed to for_each_value has device allocation but no host allocation. Call allocate() and copy_to_host() first");
1963  assert(!buffers[i]->device_dirty() &&
1964  "Buffer passed to for_each_value is dirty on device. Call copy_to_host() first");
1965  } else {
1966  assert(buffers[i]->host &&
1967  "Buffer passed to for_each_value has no host or device allocation");
1968  }
1969  }
1970 
1971  const int dimensions = buffers[0]->dimensions;
1972 
1973  // Extract the strides in all the dimensions
1974  for (int i = 0; i < dimensions; i++) {
1975  for (int j = 0; j < N; j++) {
1976  assert(buffers[j]->dimensions == dimensions);
1977  assert(buffers[j]->dim[i].extent == buffers[0]->dim[i].extent &&
1978  buffers[j]->dim[i].min == buffers[0]->dim[i].min);
1979  const int s = buffers[j]->dim[i].stride;
1980  t[i].stride[j] = s;
1981  }
1982  t[i].extent = buffers[0]->dim[i].extent;
1983 
1984  // Order the dimensions by stride, so that the traversal is cache-coherent.
1985  // Use the last dimension for this, because this is the source in copies.
1986  // It appears to be better to optimize read order than write order.
1987  for (int j = i; j > 0 && t[j].stride[N - 1] < t[j - 1].stride[N - 1]; j--) {
1988  std::swap(t[j], t[j - 1]);
1989  }
1990  }
1991 
1992  // flatten dimensions where possible to make a larger inner
1993  // loop for autovectorization.
1994  int d = dimensions;
1995  for (int i = 1; i < d; i++) {
1996  bool flat = true;
1997  for (int j = 0; j < N; j++) {
1998  flat = flat && t[i - 1].stride[j] * t[i - 1].extent == t[i].stride[j];
1999  }
2000  if (flat) {
2001  t[i - 1].extent *= t[i].extent;
2002  for (int j = i; j < d; j++) {
2003  t[j] = t[j + 1];
2004  }
2005  i--;
2006  d--;
2007  t[d].extent = 1;
2008  }
2009  }
2010 
2011  bool innermost_strides_are_one = true;
2012  if (dimensions > 0) {
2013  for (int i = 0; i < N; i++) {
2014  innermost_strides_are_one &= (t[0].stride[i] == 1);
2015  }
2016  }
2017 
2018  return innermost_strides_are_one;
2019  }
2020 
2021  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2022  void for_each_value_impl(Fn &&f, Args &&...other_buffers) const {
2023  if (dimensions() > 0) {
2024  Buffer<>::for_each_value_task_dim<N> *t =
2025  (Buffer<>::for_each_value_task_dim<N> *)HALIDE_ALLOCA((dimensions() + 1) * sizeof(for_each_value_task_dim<N>));
2026  // Move the preparatory code into a non-templated helper to
2027  // save code size.
2028  const halide_buffer_t *buffers[] = {&buf, (&other_buffers.buf)...};
2029  bool innermost_strides_are_one = Buffer<>::for_each_value_prep(t, buffers);
2030 
2031  Buffer<>::for_each_value_helper(f, dimensions() - 1,
2032  innermost_strides_are_one,
2033  t,
2034  data(), (other_buffers.data())...);
2035  } else {
2036  f(*data(), (*other_buffers.data())...);
2037  }
2038  }
2039  // @}
2040 
2041 public:
2042  /** Call a function on every value in the buffer, and the
2043  * corresponding values in some number of other buffers of the
2044  * same size. The function should take a reference, const
2045  * reference, or value of the correct type for each buffer. This
2046  * effectively lifts a function of scalars to an element-wise
2047  * function of buffers. This produces code that the compiler can
2048  * autovectorize. This is slightly cheaper than for_each_element,
2049  * because it does not need to track the coordinates.
2050  *
2051  * Note that constness of Buffers is preserved: a const Buffer<T> (for either
2052  * 'this' or the other-buffers arguments) will allow mutation of the
2053  * buffer contents, while a Buffer<const T> will not. Attempting to specify
2054  * a mutable reference for the lambda argument of a Buffer<const T>
2055  * will result in a compilation error. */
2056  // @{
2057  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2058  HALIDE_ALWAYS_INLINE const Buffer<T, D> &for_each_value(Fn &&f, Args &&...other_buffers) const {
2059  for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2060  return *this;
2061  }
2062 
2063  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2065  Buffer<T, D> &
2066  for_each_value(Fn &&f, Args &&...other_buffers) {
2067  for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2068  return *this;
2069  }
2070  // @}
2071 
2072 private:
2073  // Helper functions for for_each_element
2074  struct for_each_element_task_dim {
2075  int min, max;
2076  };
2077 
2078  /** If f is callable with this many args, call it. The first
2079  * argument is just to make the overloads distinct. Actual
2080  * overload selection is done using the enable_if. */
2081  template<typename Fn,
2082  typename... Args,
2083  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2084  HALIDE_ALWAYS_INLINE static void for_each_element_variadic(int, int, const for_each_element_task_dim *, Fn &&f, Args... args) {
2085  f(args...);
2086  }
2087 
2088  /** If the above overload is impossible, we add an outer loop over
2089  * an additional argument and try again. */
2090  template<typename Fn,
2091  typename... Args>
2092  HALIDE_ALWAYS_INLINE static void for_each_element_variadic(double, int d, const for_each_element_task_dim *t, Fn &&f, Args... args) {
2093  for (int i = t[d].min; i <= t[d].max; i++) {
2094  for_each_element_variadic(0, d - 1, t, std::forward<Fn>(f), i, args...);
2095  }
2096  }
2097 
2098  /** Determine the minimum number of arguments a callable can take
2099  * using the same trick. */
2100  template<typename Fn,
2101  typename... Args,
2102  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2103  HALIDE_ALWAYS_INLINE static int num_args(int, Fn &&, Args...) {
2104  return (int)(sizeof...(Args));
2105  }
2106 
2107  /** The recursive version is only enabled up to a recursion limit
2108  * of 256. This catches callables that aren't callable with any
2109  * number of ints. */
2110  template<typename Fn,
2111  typename... Args>
2112  HALIDE_ALWAYS_INLINE static int num_args(double, Fn &&f, Args... args) {
2113  static_assert(sizeof...(args) <= 256,
2114  "Callable passed to for_each_element must accept either a const int *,"
2115  " or up to 256 ints. No such operator found. Expect infinite template recursion.");
2116  return num_args(0, std::forward<Fn>(f), 0, args...);
2117  }
2118 
2119  /** A version where the callable takes a position array instead,
2120  * with compile-time recursion on the dimensionality. This
2121  * overload is preferred to the one below using the same int vs
2122  * double trick as above, but is impossible once d hits -1 using
2123  * std::enable_if. */
2124  template<int d,
2125  typename Fn,
2126  typename = typename std::enable_if<(d >= 0)>::type>
2127  HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(int, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2128  for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2129  for_each_element_array_helper<d - 1>(0, t, std::forward<Fn>(f), pos);
2130  }
2131  }
2132 
2133  /** Base case for recursion above. */
2134  template<int d,
2135  typename Fn,
2136  typename = typename std::enable_if<(d < 0)>::type>
2137  HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(double, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2138  f(pos);
2139  }
2140 
2141  /** A run-time-recursive version (instead of
2142  * compile-time-recursive) that requires the callable to take a
2143  * pointer to a position array instead. Dispatches to the
2144  * compile-time-recursive version once the dimensionality gets
2145  * small. */
2146  template<typename Fn>
2147  static void for_each_element_array(int d, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2148  if (d == -1) {
2149  f(pos);
2150  } else if (d == 0) {
2151  // Once the dimensionality gets small enough, dispatch to
2152  // a compile-time-recursive version for better codegen of
2153  // the inner loops.
2154  for_each_element_array_helper<0, Fn>(0, t, std::forward<Fn>(f), pos);
2155  } else if (d == 1) {
2156  for_each_element_array_helper<1, Fn>(0, t, std::forward<Fn>(f), pos);
2157  } else if (d == 2) {
2158  for_each_element_array_helper<2, Fn>(0, t, std::forward<Fn>(f), pos);
2159  } else if (d == 3) {
2160  for_each_element_array_helper<3, Fn>(0, t, std::forward<Fn>(f), pos);
2161  } else {
2162  for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2163  for_each_element_array(d - 1, t, std::forward<Fn>(f), pos);
2164  }
2165  }
2166  }
2167 
2168  /** We now have two overloads for for_each_element. This one
2169  * triggers if the callable takes a const int *.
2170  */
2171  template<typename Fn,
2172  typename = decltype(std::declval<Fn>()((const int *)nullptr))>
2173  static void for_each_element(int, int dims, const for_each_element_task_dim *t, Fn &&f, int check = 0) {
2174  int *pos = (int *)HALIDE_ALLOCA(dims * sizeof(int));
2175  for_each_element_array(dims - 1, t, std::forward<Fn>(f), pos);
2176  }
2177 
2178  /** This one triggers otherwise. It treats the callable as
2179  * something that takes some number of ints. */
2180  template<typename Fn>
2181  HALIDE_ALWAYS_INLINE static void for_each_element(double, int dims, const for_each_element_task_dim *t, Fn &&f) {
2182  int args = num_args(0, std::forward<Fn>(f));
2183  assert(dims >= args);
2184  for_each_element_variadic(0, args - 1, t, std::forward<Fn>(f));
2185  }
2186 
2187  template<typename Fn>
2188  void for_each_element_impl(Fn &&f) const {
2189  for_each_element_task_dim *t =
2190  (for_each_element_task_dim *)HALIDE_ALLOCA(dimensions() * sizeof(for_each_element_task_dim));
2191  for (int i = 0; i < dimensions(); i++) {
2192  t[i].min = dim(i).min();
2193  t[i].max = dim(i).max();
2194  }
2195  for_each_element(0, dimensions(), t, std::forward<Fn>(f));
2196  }
2197 
2198 public:
2199  /** Call a function at each site in a buffer. This is likely to be
2200  * much slower than using Halide code to populate a buffer, but is
2201  * convenient for tests. If the function has more arguments than the
2202  * buffer has dimensions, the remaining arguments will be zero. If it
2203  * has fewer arguments than the buffer has dimensions then the last
2204  * few dimensions of the buffer are not iterated over. For example,
2205  * the following code exploits this to set a floating point RGB image
2206  * to red:
2207 
2208  \code
2209  Buffer<float, 3> im(100, 100, 3);
2210  im.for_each_element([&](int x, int y) {
2211  im(x, y, 0) = 1.0f;
2212  im(x, y, 1) = 0.0f;
2213  im(x, y, 2) = 0.0f:
2214  });
2215  \endcode
2216 
2217  * The compiled code is equivalent to writing the a nested for loop,
2218  * and compilers are capable of optimizing it in the same way.
2219  *
2220  * If the callable can be called with an int * as the sole argument,
2221  * that version is called instead. Each location in the buffer is
2222  * passed to it in a coordinate array. This version is higher-overhead
2223  * than the variadic version, but is useful for writing generic code
2224  * that accepts buffers of arbitrary dimensionality. For example, the
2225  * following sets the value at all sites in an arbitrary-dimensional
2226  * buffer to their first coordinate:
2227 
2228  \code
2229  im.for_each_element([&](const int *pos) {im(pos) = pos[0];});
2230  \endcode
2231 
2232  * It is also possible to use for_each_element to iterate over entire
2233  * rows or columns by cropping the buffer to a single column or row
2234  * respectively and iterating over elements of the result. For example,
2235  * to set the diagonal of the image to 1 by iterating over the columns:
2236 
2237  \code
2238  Buffer<float, 3> im(100, 100, 3);
2239  im.sliced(1, 0).for_each_element([&](int x, int c) {
2240  im(x, x, c) = 1.0f;
2241  });
2242  \endcode
2243 
2244  * Or, assuming the memory layout is known to be dense per row, one can
2245  * memset each row of an image like so:
2246 
2247  \code
2248  Buffer<float, 3> im(100, 100, 3);
2249  im.sliced(0, 0).for_each_element([&](int y, int c) {
2250  memset(&im(0, y, c), 0, sizeof(float) * im.width());
2251  });
2252  \endcode
2253 
2254  */
2255  // @{
2256  template<typename Fn>
2258  for_each_element_impl(f);
2259  return *this;
2260  }
2261 
2262  template<typename Fn>
2264  Buffer<T, D> &
2266  for_each_element_impl(f);
2267  return *this;
2268  }
2269  // @}
2270 
2271 private:
2272  template<typename Fn>
2273  struct FillHelper {
2274  Fn f;
2275  Buffer<T, D> *buf;
2276 
2277  template<typename... Args,
2278  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2279  void operator()(Args... args) {
2280  (*buf)(args...) = f(args...);
2281  }
2282 
2283  FillHelper(Fn &&f, Buffer<T, D> *buf)
2284  : f(std::forward<Fn>(f)), buf(buf) {
2285  }
2286  };
2287 
2288 public:
2289  /** Fill a buffer by evaluating a callable at every site. The
2290  * callable should look much like a callable passed to
2291  * for_each_element, but it should return the value that should be
2292  * stored to the coordinate corresponding to the arguments. */
2293  template<typename Fn,
2294  typename = typename std::enable_if<!std::is_arithmetic<typename std::decay<Fn>::type>::value>::type>
2295  Buffer<T, D> &fill(Fn &&f) {
2296  // We'll go via for_each_element. We need a variadic wrapper lambda.
2297  FillHelper<Fn> wrapper(std::forward<Fn>(f), this);
2298  return for_each_element(wrapper);
2299  }
2300 
2301  /** Check if an input buffer passed extern stage is a querying
2302  * bounds. Compared to doing the host pointer check directly,
2303  * this both adds clarity to code and will facilitate moving to
2304  * another representation for bounds query arguments. */
2305  bool is_bounds_query() const {
2306  return buf.is_bounds_query();
2307  }
2308 
2309  /** Convenient check to verify that all of the interesting bytes in the Buffer
2310  * are initialized under MSAN. Note that by default, we use for_each_value() here so that
2311  * we skip any unused padding that isn't part of the Buffer; this isn't efficient,
2312  * but in MSAN mode, it doesn't matter. (Pass true for the flag to force check
2313  * the entire Buffer storage.) */
2314  void msan_check_mem_is_initialized(bool entire = false) const {
2315 #if defined(__has_feature)
2316 #if __has_feature(memory_sanitizer)
2317  if (entire) {
2318  __msan_check_mem_is_initialized(data(), size_in_bytes());
2319  } else {
2320  for_each_value([](T &v) { __msan_check_mem_is_initialized(&v, sizeof(T)); ; });
2321  }
2322 #endif
2323 #endif
2324  }
2325 };
2326 
2327 } // namespace Runtime
2328 } // namespace Halide
2329 
2330 #undef HALIDE_ALLOCA
2331 
2332 #endif // HALIDE_RUNTIME_IMAGE_H
#define HALIDE_ALLOCA
Definition: HalideBuffer.h:30
This file declares the routines used by Halide internally in its runtime.
#define HALIDE_NEVER_INLINE
Definition: HalideRuntime.h:39
#define HALIDE_ALWAYS_INLINE
Definition: HalideRuntime.h:38
struct halide_buffer_t halide_buffer_t
The raw representation of an image passed around by generated Halide code.
A Halide::Buffer is a named shared reference to a Halide::Runtime::Buffer.
Definition: Buffer.h:115
Read-only access to the shape.
Definition: HalideBuffer.h:438
HALIDE_ALWAYS_INLINE int stride() const
The number of elements in memory you have to step over to increment this coordinate by one.
Definition: HalideBuffer.h:449
HALIDE_ALWAYS_INLINE int extent() const
The extent of the image along this dimension.
Definition: HalideBuffer.h:454
HALIDE_ALWAYS_INLINE iterator begin() const
An iterator that points to the min coordinate.
Definition: HalideBuffer.h:480
Dimension(const halide_dimension_t &dim)
Definition: HalideBuffer.h:489
HALIDE_ALWAYS_INLINE int min() const
The lowest coordinate in this dimension.
Definition: HalideBuffer.h:443
HALIDE_ALWAYS_INLINE int max() const
The highest coordinate in this dimension.
Definition: HalideBuffer.h:459
HALIDE_ALWAYS_INLINE iterator end() const
An iterator that points to one past the max coordinate.
Definition: HalideBuffer.h:485
A templated Buffer class that wraps halide_buffer_t and adds functionality.
Definition: HalideBuffer.h:131
Buffer(halide_type_t t, const std::vector< int > &sizes)
Allocate a new image of unknown type using a vector of ints as the size.
Definition: HalideBuffer.h:837
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
Definition: HalideBuffer.h:951
int width() const
Conventional names for the first three dimensions.
HALIDE_ALWAYS_INLINE Buffer< T2, D > as() &&
Returns this rval Buffer with a different type attached.
void translate(int d, int delta)
Translate an image in-place along one dimension by changing how it is indexed.
HALIDE_ALWAYS_INLINE Buffer< T, D > & for_each_value(Fn &&f, Args &&...other_buffers)
HALIDE_ALWAYS_INLINE const Buffer< T, D > & for_each_element(Fn &&f) const
Call a function at each site in a buffer.
int dimensions() const
Get the dimensionality of the buffer.
Definition: HalideBuffer.h:520
static Buffer< T, D > make_interleaved(int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
bool owns_host_memory() const
Does this Buffer own the host memory it refers to?
Definition: HalideBuffer.h:179
Buffer(halide_type_t t, const std::vector< int > &sizes, const std::vector< int > &storage_order)
Allocate a new image of unknown type using a vector of ints as the size and a vector of indices indic...
Definition: HalideBuffer.h:871
Buffer(Buffer< T, D > &&other) noexcept
Move constructor.
Definition: HalideBuffer.h:632
Buffer(const Buffer< T2, D2 > &other)
Construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:622
Buffer(int first)
Allocate a new image of the given size.
Definition: HalideBuffer.h:805
Buffer(T *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Definition: HalideBuffer.h:986
int extent(int i) const
Definition: HalideBuffer.h:505
HALIDE_ALWAYS_INLINE const Buffer< T2, D > & as() const &
Return a const typed reference to this Buffer.
static Buffer< add_const_if_T_is_const< void >, D > make_interleaved(halide_type_t t, T *data, int width, int height, int channels)
Wrap an existing interleaved image.
Buffer< not_const_T, D > copy_to_planar(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in planar memory layout (vs.
T * begin() const
A pointer to the element with the lowest address.
Definition: HalideBuffer.h:531
Buffer< T, D > cropped(const std::vector< std::pair< int, int >> &rect) const
Make an image that refers to a sub-rectangle of this image along the first N dimensions.
Buffer(Buffer< T2, D2 > &&other)
Move-construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:646
Buffer< T, D > translated(int d, int dx) const
Make an image which refers to the same data with using translated coordinates in the given dimension.
void slice(int d, int pos)
Rewrite the buffer to refer to a single lower-dimensional slice of itself along the given dimension a...
void transpose(const std::vector< int > &order)
A generalized transpose: instead of swapping two dimensions, pass a vector that lists each dimension ...
int device_detach_native(void *ctx=nullptr)
int device_free(void *ctx=nullptr)
halide_buffer_t * raw_buffer()
Get a pointer to the raw halide_buffer_t this wraps.
Buffer< T, D > & operator=(Buffer< T2, D2 > &&other)
Move from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:696
Buffer(T *data, const std::vector< int > &sizes)
Initialize an Buffer from a pointer and a vector of sizes.
Definition: HalideBuffer.h:927
void set_device_dirty(bool v=true)
HALIDE_ALWAYS_INLINE Buffer< T2, D > & as() &
Return a typed reference to this Buffer.
void copy_from(Buffer< T2, D2 > src)
Fill a Buffer with the values at the same coordinates in another Buffer.
Buffer< T, D > & operator=(const Buffer< T, D > &other)
Standard assignment operator.
Definition: HalideBuffer.h:677
HALIDE_ALWAYS_INLINE const Buffer< T, D > & for_each_value(Fn &&f, Args &&...other_buffers) const
Call a function on every value in the buffer, and the corresponding values in some number of other bu...
Buffer< T, D > sliced(int d, int pos) const
Make a lower-dimensional buffer that refers to one slice of this buffer.
static void assert_can_convert_from(const Buffer< T2, D2 > &other)
Fail an assertion at runtime or compile-time if an Buffer<T, D> cannot be constructed from some other...
Definition: HalideBuffer.h:598
void translate(const std::vector< int > &delta)
Translate an image along the first N dimensions by changing how it is indexed.
Buffer(T *data, int first, Args &&...rest)
Initialize an Buffer from a pointer and some sizes.
Definition: HalideBuffer.h:914
Buffer(const Buffer< T, D > &other)
Copy constructor.
Definition: HalideBuffer.h:607
friend class Buffer
Give Buffers access to the members of Buffers of different dimensionalities and types.
Definition: HalideBuffer.h:569
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int first, Args &&...rest)
Initialize an Buffer of runtime type from a pointer and some sizes.
Definition: HalideBuffer.h:897
void crop(int d, int min, int extent)
Crop an image in-place along the given dimension.
static Buffer< T, D > make_scalar()
Make a zero-dimensional Buffer.
Buffer(const std::vector< int > &sizes)
Allocate a new image of known type using a vector of ints as the size.
Definition: HalideBuffer.h:851
bool all_equal(not_void_T val) const
Tests that all values in this buffer are equal to val.
static Buffer< T, D > make_scalar(T *data)
Make a zero-dimensional Buffer that points to non-owned, existing data.
int device_wrap_native(const struct halide_device_interface_t *device_interface, uint64_t handle, void *ctx=nullptr)
Buffer< T, D > transposed(const std::vector< int > &order) const
Make a buffer which refers to the same data in the same layout using a different ordering of the dime...
Buffer< T, D > translated(const std::vector< int > &delta) const
Make an image which refers to the same data translated along the first N dimensions.
void deallocate()
Drop reference to any owned host or device memory, possibly freeing it, if this buffer held the last ...
Definition: HalideBuffer.h:767
static Buffer< void, D > make_interleaved(halide_type_t t, int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, D > & as_const() &
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
int left() const
Conventional names for the min and max value of each dimension.
HALIDE_ALWAYS_INLINE void set_host_dirty(bool v=true)
Methods for managing any GPU allocation.
static constexpr bool has_static_halide_type
True if the Halide type is not void (or const void).
Definition: HalideBuffer.h:170
Buffer(const std::vector< int > &sizes, const std::vector< int > &storage_order)
Definition: HalideBuffer.h:876
Buffer< T, D > & operator=(const Buffer< T2, D2 > &other)
Assign from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:661
void crop(const std::vector< std::pair< int, int >> &rect)
Crop an image in-place along the first N dimensions.
bool has_device_allocation() const
Buffer< T, D > cropped(int d, int min, int extent) const
Make an image that refers to a sub-range of this image along the given dimension.
void allocate(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Allocate memory for this Buffer.
Definition: HalideBuffer.h:740
int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
static halide_type_t static_halide_type()
Get the Halide type of T.
Definition: HalideBuffer.h:174
Buffer(const halide_buffer_t &buf, BufferDeviceOwnership ownership=BufferDeviceOwnership::Unmanaged)
Make a Buffer from a halide_buffer_t.
Definition: HalideBuffer.h:561
Buffer(Array(&vals)[N])
Make an Buffer that refers to a statically sized array.
Definition: HalideBuffer.h:883
HALIDE_ALWAYS_INLINE bool device_dirty() const
Buffer< T, D > transposed(int d1, int d2) const
Make a buffer which refers to the same data in the same layout using a swapped indexing order for the...
Buffer< T, D > embedded(int d, int pos=0) const
Make a new buffer that views this buffer as a single slice in a higher-dimensional space.
Buffer(T *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
Definition: HalideBuffer.h:974
size_t number_of_elements() const
The total number of elements this buffer represents.
Definition: HalideBuffer.h:515
halide_type_t type() const
Get the type of the elements.
Definition: HalideBuffer.h:525
Buffer< not_const_T, D > copy_to_interleaved(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in interleaved memory layout (vs.
int stride(int i) const
Definition: HalideBuffer.h:508
T * end() const
A pointer to one beyond the element with the highest address.
Definition: HalideBuffer.h:537
void transpose(int d1, int d2)
Transpose a buffer in-place by changing how it is indexed.
int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
HALIDE_ALWAYS_INLINE not_void_T & operator()(const int *pos)
static bool can_convert_from(const Buffer< T2, D2 > &other)
Determine if if an Buffer<T, D> can be constructed from some other Buffer type.
Definition: HalideBuffer.h:587
HALIDE_ALWAYS_INLINE const Buffer< typename std::add_const< T >::type, D > & as_const() const &
HALIDE_ALWAYS_INLINE const not_void_T & operator()(const int *pos) const
HALIDE_ALWAYS_INLINE not_void_T & operator()()
void add_dimension()
Add a new dimension with a min of zero and an extent of one.
void msan_check_mem_is_initialized(bool entire=false) const
Convenient check to verify that all of the interesting bytes in the Buffer are initialized under MSAN...
void slice(int d)
Slice a buffer in-place at the dimension's minimum.
T * data() const
Get a pointer to the address of the min coordinate.
static Buffer< T, D > make_interleaved(T *data, int width, int height, int channels)
Wrap an existing interleaved image.
HALIDE_ALWAYS_INLINE Buffer< T, D > & for_each_element(Fn &&f)
HALIDE_ALWAYS_INLINE const not_void_T & operator()(int first, Args... rest) const
Access elements.
Buffer< T, D > & fill(Fn &&f)
Fill a buffer by evaluating a callable at every site.
void check_overflow()
Check the product of the extents fits in memory.
Definition: HalideBuffer.h:725
void reset()
Reset the Buffer to be equivalent to a default-constructed Buffer of the same static type (if any); B...
Definition: HalideBuffer.h:550
const halide_buffer_t * raw_buffer() const
Buffer< not_const_T, D > copy(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Make a new image which is a deep copy of this image.
size_t size_in_bytes() const
The total number of bytes spanned by the data in memory.
Definition: HalideBuffer.h:543
Buffer< T, D > alias() const
Make a copy of the Buffer which shares the underlying host and/or device allocations as the existing ...
bool contains(Args... args) const
HALIDE_ALWAYS_INLINE Dimension dim(int i) const
Access the shape of the buffer.
Definition: HalideBuffer.h:495
int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Definition: HalideBuffer.h:966
Buffer< T, D > & fill(not_void_T val)
Buffer(halide_type_t t, int first, Args... rest)
Allocate a new image of the given size with a runtime type.
Definition: HalideBuffer.h:784
int device_sync(void *ctx=nullptr)
BufferDeviceOwnership device_ownership() const
Return the method by which the device field is managed.
bool is_bounds_query() const
Check if an input buffer passed extern stage is a querying bounds.
static Buffer< T, D > make_with_shape_of(Buffer< T2, D2 > src, void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Make a buffer with the same shape and memory nesting order as another buffer.
void embed(int d, int pos=0)
Embed a buffer in-place, increasing the dimensionality.
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, D > as_const() &&
HALIDE_ALWAYS_INLINE const not_void_T & operator()() const
Buffer(int first, int second, Args... rest)
Definition: HalideBuffer.h:821
void add_dimension_with_stride(int s)
Add a new dimension with a min of zero, an extent of one, and the specified stride.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< int > &sizes)
Initialize an Buffer of runtime type from a pointer and a vector of sizes.
Definition: HalideBuffer.h:938
Buffer< T, D > & operator=(Buffer< T, D > &&other) noexcept
Standard move-assignment operator.
Definition: HalideBuffer.h:711
int copy_to_host(void *ctx=nullptr)
bool contains(const std::vector< int > &coords) const
Test if a given coordinate is within the bounds of an image.
void set_min(Args... args)
Buffer< T, D > sliced(int d) const
Make a lower-dimensional buffer that refers to one slice of this buffer at the dimension's minimum.
HALIDE_ALWAYS_INLINE not_void_T & operator()(int first, Args... rest)
int min(int i) const
Access to the mins, strides, extents.
Definition: HalideBuffer.h:502
void device_deallocate()
Drop reference to any owned device memory, possibly freeing it if this buffer held the last reference...
Definition: HalideBuffer.h:774
static Buffer< add_const_if_T_is_const< void >, D > make_scalar(halide_type_t t)
Make a zero-dimensional Buffer.
void set_min(const std::vector< int > &mins)
Set the min coordinate of an image in the first N dimensions.
bool any_zero(const Container &c)
Definition: HalideBuffer.h:70
BufferDeviceOwnership
This indicates how to deallocate the device for a Halide::Runtime::Buffer.
Definition: HalideBuffer.h:93
@ AllocatedDeviceAndHost
No free routine will be called when device ref count goes to zero
@ WrappedNative
halide_device_free will be called when device ref count goes to zero
@ Unmanaged
halide_device_detach_native will be called when device ref count goes to zero
@ Cropped
Call device_and_host_free when DevRefCount goes to zero.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:578
Expr max(const FuncRef &a, const FuncRef &b)
Definition: Func.h:581
char * buf
Definition: printer.h:32
char * dst
Definition: printer.h:32
unsigned __INT64_TYPE__ uint64_t
void * malloc(size_t)
unsigned __INT8_TYPE__ uint8_t
__PTRDIFF_TYPE__ ptrdiff_t
unsigned __INT16_TYPE__ uint16_t
unsigned __INT32_TYPE__ uint32_t
void * memcpy(void *s1, const void *s2, size_t n)
void free(void *)
A struct acting as a header for allocations owned by the Buffer class itself.
Definition: HalideBuffer.h:82
AllocationHeader(void(*deallocate_fn)(void *))
Definition: HalideBuffer.h:87
An iterator class, so that you can iterate over coordinates in a dimensions using a range-based for l...
Definition: HalideBuffer.h:465
bool operator!=(const iterator &other) const
Definition: HalideBuffer.h:470
A similar struct for managing device allocations.
Definition: HalideBuffer.h:102
BufferDeviceOwnership ownership
Definition: HalideBuffer.h:106
The raw representation of an image passed around by generated Halide code.
int32_t dimensions
The dimensionality of the buffer.
halide_dimension_t * dim
The shape of the buffer.
uint64_t device
A device-handle for e.g.
uint8_t * host
A pointer to the start of the data in main memory.
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
int(* copy_to_device)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* device_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
A runtime tag for a type in the halide type system.