6 #ifndef HALIDE_RUNTIME_BUFFER_H
7 #define HALIDE_RUNTIME_BUFFER_H
18 #if defined(__has_feature)
19 #if __has_feature(memory_sanitizer)
20 #include <sanitizer/msan_interface.h>
28 #define HALIDE_ALLOCA _alloca
30 #define HALIDE_ALLOCA __builtin_alloca
34 #if __GNUC__ == 5 && __GNUC_MINOR__ == 1
35 #pragma GCC diagnostic ignored "-Warray-bounds"
42 template<
typename T,
int D>
47 template<
typename... Args>
53 template<
typename T,
typename... Args>
55 static const bool value = std::is_convertible<T, int>::value &&
AllInts<Args...>::value;
61 template<
typename... Args>
62 struct AllInts<float, Args...> : std::false_type {};
64 template<
typename... Args>
65 struct AllInts<double, Args...> : std::false_type {};
69 template<
typename Container>
130 template<
typename T =
void,
int D = 4>
147 static const bool T_is_void = std::is_same<typename std::remove_const<T>::type,
void>::value;
150 template<
typename T2>
151 using add_const_if_T_is_const =
typename std::conditional<std::is_const<T>::value,
const T2, T2>
::type;
155 using not_void_T =
typename std::conditional<T_is_void,
156 add_const_if_T_is_const<uint8_t>,
160 using not_const_T =
typename std::remove_const<T>::type;
166 using storage_T =
typename std::conditional<std::is_pointer<T>::value,
uint64_t, not_void_T>
::type;
175 return halide_type_of<typename std::remove_cv<not_void_T>::type>();
180 return alloc !=
nullptr;
185 void incref()
const {
190 if (!dev_ref_count) {
196 dev_ref_count =
new DeviceRefCount;
198 dev_ref_count->
count++;
204 struct DevRefCountCropped : DeviceRefCount {
205 Buffer<T, D> cropped_from;
206 DevRefCountCropped(
const Buffer<T, D> &cropped_from)
207 : cropped_from(cropped_from) {
213 void crop_from(
const Buffer<T, D> &cropped_from) {
214 assert(dev_ref_count ==
nullptr);
215 dev_ref_count =
new DevRefCountCropped(cropped_from);
220 void decref(
bool device_only =
false) {
223 if (new_count == 0) {
225 alloc->~AllocationHeader();
234 new_count = --(dev_ref_count->
count);
236 if (new_count == 0) {
239 "Implicitly freeing a dirty device allocation while a host allocation still lives. "
240 "Call device_free explicitly if you want to drop dirty device-side data. "
241 "Call copy_to_host explicitly if you want the data copied to the host allocation "
242 "before the device allocation is freed.");
244 buf.device_interface->detach_native(
nullptr, &buf);
246 buf.device_interface->device_and_host_free(
nullptr, &buf);
248 buf.device_interface->device_release_crop(
nullptr, &buf);
250 buf.device_interface->device_free(
nullptr, &buf);
255 delete (DevRefCountCropped *)dev_ref_count;
257 delete dev_ref_count;
261 dev_ref_count =
nullptr;
263 buf.device_interface =
nullptr;
266 void free_shape_storage() {
267 if (
buf.dim != shape) {
273 void make_shape_storage(
const int dimensions) {
286 template<
typename T2,
int D2>
287 void move_shape_from(Buffer<T2, D2> &&other) {
288 if (other.shape == other.buf.dim) {
289 copy_shape_from(other.buf);
291 buf.dim = other.buf.dim;
292 other.buf.dim =
nullptr;
302 dev_ref_count =
new DeviceRefCount;
308 void initialize_shape(
const int *sizes) {
309 for (
int i = 0; i <
buf.dimensions; i++) {
311 buf.dim[i].extent = sizes[i];
313 buf.dim[i].stride = 1;
315 buf.dim[i].stride =
buf.dim[i - 1].stride *
buf.dim[i - 1].extent;
321 void initialize_shape(
const std::vector<int> &sizes) {
322 assert(
buf.dimensions == (
int)sizes.size());
323 initialize_shape(sizes.data());
327 template<
typename Array,
size_t N>
328 void initialize_shape_from_array_shape(
int next, Array (&vals)[N]) {
329 buf.dim[next].min = 0;
330 buf.dim[next].extent = (int)N;
332 buf.dim[next].stride = 1;
334 initialize_shape_from_array_shape(next - 1, vals[0]);
335 buf.dim[next].stride =
buf.dim[next - 1].stride *
buf.dim[next - 1].extent;
340 template<
typename T2>
341 void initialize_shape_from_array_shape(
int,
const T2 &) {
345 template<
typename Array,
size_t N>
346 static int dimensionality_of_array(Array (&vals)[N]) {
347 return dimensionality_of_array(vals[0]) + 1;
350 template<
typename T2>
351 static int dimensionality_of_array(
const T2 &) {
356 template<
typename Array,
size_t N>
357 static halide_type_t scalar_type_of_array(Array (&vals)[N]) {
358 return scalar_type_of_array(vals[0]);
361 template<
typename T2>
363 return halide_type_of<typename std::remove_cv<T2>::type>();
367 void crop_host(
int d,
int min,
int extent) {
371 if (
buf.host !=
nullptr) {
379 void crop_host(
const std::vector<std::pair<int, int>> &rect) {
381 int limit = (int)rect.size();
383 for (
int i = 0; i < limit; i++) {
384 crop_host(i, rect[i].first, rect[i].second);
388 void complete_device_crop(Buffer<T, D> &result_host_cropped)
const {
389 assert(
buf.device_interface !=
nullptr);
390 if (
buf.device_interface->device_crop(
nullptr, &this->buf, &result_host_cropped.buf) == 0) {
391 const Buffer<T, D> *cropped_from =
this;
397 cropped_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
399 result_host_cropped.crop_from(*cropped_from);
404 void slice_host(
int d,
int pos) {
409 if (
buf.host !=
nullptr) {
410 buf.host += (shift *
buf.dim[d].stride) *
type().bytes();
412 for (
int i = d; i <
buf.dimensions; i++) {
413 buf.dim[i] =
buf.dim[i + 1];
415 buf.dim[
buf.dimensions] = {0, 0, 0};
418 void complete_device_slice(Buffer<T, D> &result_host_sliced,
int d,
int pos)
const {
419 assert(
buf.device_interface !=
nullptr);
420 if (
buf.device_interface->device_slice(
nullptr, &this->buf, d, pos, &result_host_sliced.buf) == 0) {
421 const Buffer<T, D> *sliced_from =
this;
427 sliced_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
430 result_host_sliced.crop_from(*sliced_from);
516 return buf.number_of_elements();
521 return buf.dimensions;
532 assert(
buf.host !=
nullptr);
533 return (T *)
buf.begin();
538 assert(
buf.host !=
nullptr);
539 return (T *)
buf.end();
544 return buf.size_in_bytes();
557 make_shape_storage(0);
564 initialize_from_buffer(
buf, ownership);
568 template<
typename T2,
int D2>
572 template<
typename T2,
int D2>
573 static void static_assert_can_convert_from() {
574 static_assert((!std::is_const<T2>::value || std::is_const<T>::value),
575 "Can't convert from a Buffer<const T> to a Buffer<T>");
576 static_assert(std::is_same<
typename std::remove_const<T>::type,
577 typename std::remove_const<T2>::type>::value ||
579 "type mismatch constructing Buffer");
586 template<
typename T2,
int D2>
588 static_assert_can_convert_from<T2, D2>();
597 template<
typename T2,
int D2>
602 static_assert_can_convert_from<T2, D2>();
611 dev_ref_count = other.dev_ref_count;
612 copy_shape_from(other.buf);
621 template<
typename T2,
int D2>
627 dev_ref_count = other.dev_ref_count;
628 copy_shape_from(other.buf);
635 dev_ref_count(other.dev_ref_count) {
636 other.dev_ref_count =
nullptr;
637 other.alloc =
nullptr;
645 template<
typename T2,
int D2>
649 dev_ref_count(other.dev_ref_count) {
651 other.dev_ref_count =
nullptr;
652 other.alloc =
nullptr;
660 template<
typename T2,
int D2>
662 if ((
const void *)
this == (
const void *)&other) {
668 dev_ref_count = other.dev_ref_count;
670 free_shape_storage();
672 copy_shape_from(other.buf);
679 if ((
const void *)
this == (
const void *)&other) {
684 dev_ref_count = other.dev_ref_count;
686 free_shape_storage();
688 copy_shape_from(other.buf);
695 template<
typename T2,
int D2>
700 other.alloc =
nullptr;
701 dev_ref_count = other.dev_ref_count;
702 other.dev_ref_count =
nullptr;
703 free_shape_storage();
714 other.alloc =
nullptr;
715 dev_ref_count = other.dev_ref_count;
716 other.dev_ref_count =
nullptr;
717 free_shape_storage();
726 size_t size =
type().bytes();
731 size = (size << 1) >> 1;
735 assert(size == (
size_t)
type().bytes() &&
"Error: Overflow computing total size of buffer.");
740 void allocate(
void *(*allocate_fn)(
size_t) =
nullptr,
741 void (*deallocate_fn)(
void *) =
nullptr) {
745 if (!deallocate_fn) {
746 deallocate_fn =
free;
755 const size_t alignment = 128;
756 size = (size + alignment - 1) & ~(alignment - 1);
757 void *alloc_storage = allocate_fn(size +
sizeof(
AllocationHeader) + alignment - 1);
760 buf.host = (
uint8_t *)((uintptr_t)(unaligned_ptr + alignment - 1) & ~(alignment - 1));
782 template<
typename... Args,
783 typename =
typename std::enable_if<
AllInts<Args...>::value>
::type>
788 int extents[] = {first, (int)rest...};
790 constexpr
int buf_dimensions = 1 + (int)(
sizeof...(rest));
791 make_shape_storage(buf_dimensions);
792 initialize_shape(extents);
806 static_assert(!T_is_void,
807 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
808 int extents[] = {first};
810 constexpr
int buf_dimensions = 1;
811 make_shape_storage(buf_dimensions);
812 initialize_shape(extents);
819 template<
typename... Args,
820 typename =
typename std::enable_if<
AllInts<Args...>::value>
::type>
821 Buffer(
int first,
int second, Args... rest) {
822 static_assert(!T_is_void,
823 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
824 int extents[] = {first, second, (int)rest...};
826 constexpr
int buf_dimensions = 2 + (int)(
sizeof...(rest));
827 make_shape_storage(buf_dimensions);
828 initialize_shape(extents);
842 make_shape_storage((
int)sizes.size());
843 initialize_shape(sizes);
851 explicit Buffer(
const std::vector<int> &sizes)
857 static std::vector<int> make_ordered_sizes(
const std::vector<int> &sizes,
const std::vector<int> &order) {
858 assert(order.size() == sizes.size());
859 std::vector<int> ordered_sizes(sizes.size());
860 for (
size_t i = 0; i < sizes.size(); ++i) {
861 ordered_sizes[i] = sizes.at(order[i]);
863 return ordered_sizes;
872 :
Buffer(t, make_ordered_sizes(sizes, storage_order)) {
876 Buffer(
const std::vector<int> &sizes,
const std::vector<int> &storage_order)
882 template<
typename Array,
size_t N>
884 const int buf_dimensions = dimensionality_of_array(vals);
885 buf.type = scalar_type_of_array(vals);
887 make_shape_storage(buf_dimensions);
888 initialize_shape_from_array_shape(
buf.dimensions - 1, vals);
895 template<
typename... Args,
896 typename =
typename std::enable_if<
AllInts<Args...>::value>
::type>
901 int extents[] = {first, (int)rest...};
903 constexpr
int buf_dimensions = 1 + (int)(
sizeof...(rest));
905 make_shape_storage(buf_dimensions);
906 initialize_shape(extents);
912 template<
typename... Args,
913 typename =
typename std::enable_if<
AllInts<Args...>::value>
::type>
915 int extents[] = {first, (int)rest...};
917 constexpr
int buf_dimensions = 1 + (int)(
sizeof...(rest));
918 buf.host = (
uint8_t *)
const_cast<typename std::remove_const<T>::type *
>(
data);
919 make_shape_storage(buf_dimensions);
920 initialize_shape(extents);
929 buf.host = (
uint8_t *)
const_cast<typename std::remove_const<T>::type *
>(
data);
930 make_shape_storage((
int)sizes.size());
931 initialize_shape(sizes);
944 make_shape_storage((
int)sizes.size());
945 initialize_shape(sizes);
957 make_shape_storage(d);
958 for (
int i = 0; i < d; i++) {
959 buf.dim[i] = shape[i];
967 const std::vector<halide_dimension_t> &shape)
976 buf.host = (
uint8_t *)
const_cast<typename std::remove_const<T>::type *
>(
data);
977 make_shape_storage(d);
978 for (
int i = 0; i < d; i++) {
979 buf.dim[i] = shape[i];
986 explicit inline Buffer(T *
data,
const std::vector<halide_dimension_t> &shape)
994 free_shape_storage();
1019 template<
typename T2>
1029 template<
typename T2>
1037 template<
typename T2>
1109 void (*deallocate_fn)(
void *) =
nullptr)
const {
1111 dst.copy_from(*
this);
1120 void (*deallocate_fn)(
void *) =
nullptr)
const {
1124 dst.allocate(allocate_fn, deallocate_fn);
1125 dst.copy_from(*
this);
1133 void (*deallocate_fn)(
void *) =
nullptr)
const {
1134 std::vector<int> mins, extents;
1137 extents.reserve(dims);
1138 for (
int d = 0; d < dims; ++d) {
1139 mins.push_back(
dim(d).
min());
1144 dst.allocate(allocate_fn, deallocate_fn);
1145 dst.copy_from(*
this);
1171 template<
typename T2,
int D2>
1173 static_assert(!std::is_const<T>::value,
"Cannot call copy_from() on a Buffer<const T>");
1174 assert(!
device_dirty() &&
"Cannot call Halide::Runtime::Buffer::copy_from on a device dirty destination.");
1175 assert(!src.
device_dirty() &&
"Cannot call Halide::Runtime::Buffer::copy_from on a device dirty source.");
1185 if (max_coord < min_coord) {
1189 dst.crop(i, min_coord, max_coord - min_coord + 1);
1190 src.
crop(i, min_coord, max_coord - min_coord + 1);
1197 if (T_is_void ? (
type().bytes() == 1) : (
sizeof(not_void_T) == 1)) {
1202 }
else if (T_is_void ? (
type().bytes() == 2) : (
sizeof(not_void_T) == 2)) {
1207 }
else if (T_is_void ? (
type().bytes() == 4) : (
sizeof(not_void_T) == 4)) {
1212 }
else if (T_is_void ? (
type().bytes() == 8) : (
sizeof(not_void_T) == 8)) {
1218 assert(
false &&
"type().bytes() must be 1, 2, 4, or 8");
1238 if (
buf.device_interface !=
nullptr) {
1239 complete_device_crop(im);
1252 if (
buf.device_interface !=
nullptr) {
1274 if (
buf.device_interface !=
nullptr) {
1275 complete_device_crop(im);
1284 void crop(
const std::vector<std::pair<int, int>> &rect) {
1289 if (
buf.device_interface !=
nullptr) {
1311 buf.dim[d].min += delta;
1327 int limit = (int)delta.size();
1329 for (
int i = 0; i < limit; i++) {
1337 assert(mins.size() <=
static_cast<decltype(mins.size())
>(
dimensions()));
1339 for (
size_t i = 0; i < mins.size(); i++) {
1340 buf.dim[i].min = mins[i];
1344 template<
typename... Args>
1346 set_min(std::vector<int>{args...});
1353 assert(coords.size() <=
static_cast<decltype(coords.size())
>(
dimensions()));
1354 for (
size_t i = 0; i < coords.size(); i++) {
1355 if (coords[i] <
dim((
int)i).
min() || coords[i] >
dim((
int)i).
max()) {
1362 template<
typename... Args>
1364 return contains(std::vector<int>{args...});
1388 std::swap(
buf.dim[d1],
buf.dim[d2]);
1402 std::vector<int> order_sorted = order;
1403 for (
size_t i = 1; i < order_sorted.size(); i++) {
1404 for (
size_t j = i; j > 0 && order_sorted[j - 1] > order_sorted[j]; j--) {
1405 std::swap(order_sorted[j], order_sorted[j - 1]);
1429 im.slice_host(d, pos);
1430 if (
buf.device_interface !=
nullptr) {
1431 complete_device_slice(im, d, pos);
1451 if (
buf.device_interface !=
nullptr) {
1495 const int dims =
buf.dimensions;
1497 if (
buf.dim != shape) {
1500 for (
int i = 0; i < dims; i++) {
1501 new_shape[i] =
buf.dim[i];
1504 buf.dim = new_shape;
1505 }
else if (dims == D) {
1507 make_shape_storage(
buf.dimensions);
1508 for (
int i = 0; i < dims; i++) {
1509 buf.dim[i] = shape[i];
1514 buf.dim[dims] = {0, 1, 0};
1516 buf.dim[dims].stride = 1;
1518 buf.dim[dims].stride =
buf.dim[dims - 1].extent *
buf.dim[dims - 1].stride;
1527 buf.dim[
buf.dimensions - 1].stride = s;
1536 assert((!v || !
device_dirty()) &&
"Cannot set host dirty when device is already dirty. Call copy_to_host() before accessing the buffer from host.");
1537 buf.set_host_dirty(v);
1545 return buf.device_dirty();
1549 return buf.host_dirty();
1553 assert((!v || !
host_dirty()) &&
"Cannot set device dirty when host is already dirty.");
1554 buf.set_device_dirty(v);
1559 return buf.device_interface->copy_to_host(ctx, &
buf);
1576 if (dev_ref_count) {
1578 "Can't call device_free on an unmanaged or wrapped native device handle. "
1579 "Free the source allocation or call device_detach_native instead.");
1581 assert(dev_ref_count->
count == 1 &&
1582 "Multiple Halide::Runtime::Buffer objects share this device "
1583 "allocation. Freeing it would create dangling references. "
1584 "Don't call device_free on Halide buffers that you have copied or "
1585 "passed by value.");
1588 if (
buf.device_interface) {
1589 ret =
buf.device_interface->device_free(ctx, &
buf);
1591 if (dev_ref_count) {
1592 delete dev_ref_count;
1593 dev_ref_count =
nullptr;
1599 uint64_t handle,
void *ctx =
nullptr) {
1600 assert(device_interface);
1603 return device_interface->
wrap_native(ctx, &
buf, handle, device_interface);
1607 assert(dev_ref_count &&
1609 "Only call device_detach_native on buffers wrapping a native "
1610 "device handle via device_wrap_native. This buffer was allocated "
1611 "using device_malloc, or is unmanaged. "
1612 "Call device_free or free the original allocation instead.");
1614 assert(dev_ref_count->
count == 1 &&
1615 "Multiple Halide::Runtime::Buffer objects share this device "
1616 "allocation. Freeing it could create dangling references. "
1617 "Don't call device_detach_native on Halide buffers that you "
1618 "have copied or passed by value.");
1620 if (
buf.device_interface) {
1621 ret =
buf.device_interface->detach_native(ctx, &
buf);
1623 delete dev_ref_count;
1624 dev_ref_count =
nullptr;
1633 if (dev_ref_count) {
1635 "Can't call device_and_host_free on a device handle not allocated with device_and_host_malloc. "
1636 "Free the source allocation or call device_detach_native instead.");
1638 assert(dev_ref_count->
count == 1 &&
1639 "Multiple Halide::Runtime::Buffer objects share this device "
1640 "allocation. Freeing it would create dangling references. "
1641 "Don't call device_and_host_free on Halide buffers that you have copied or "
1642 "passed by value.");
1645 if (
buf.device_interface) {
1646 ret =
buf.device_interface->device_and_host_free(ctx, &
buf);
1648 if (dev_ref_count) {
1649 delete dev_ref_count;
1650 dev_ref_count =
nullptr;
1656 return buf.device_sync(ctx);
1660 return buf.device != 0;
1665 if (dev_ref_count ==
nullptr) {
1734 template<
typename T2,
int D2>
1736 void *(*allocate_fn)(
size_t) =
nullptr,
1737 void (*deallocate_fn)(
void *) =
nullptr) {
1739 const halide_type_t dst_type = T_is_void ? src.
type() : halide_type_of<typename std::remove_cv<not_void_T>::type>();
1741 allocate_fn, deallocate_fn);
1748 void *(*allocate_fn)(
size_t),
1749 void (*deallocate_fn)(
void *)) {
1751 std::vector<int> swaps;
1753 for (
int j = i; j > 0; j--) {
1755 std::swap(shape[j - 1], shape[j]);
1772 while (!swaps.empty()) {
1773 int j = swaps.back();
1774 std::swap(shape[j - 1], shape[j]);
1781 dst.allocate(allocate_fn, deallocate_fn);
1786 template<
typename... Args>
1789 offset_of(
int d,
int first, Args... rest)
const {
1790 return offset_of(d + 1, rest...) + (
ptrdiff_t)this->buf.
dim[d].
stride * (first - this->buf.dim[d].min);
1798 template<
typename... Args>
1801 address_of(Args... args)
const {
1803 return (storage_T *)(this->buf.
host) + offset_of(0, args...) *
type().bytes();
1805 return (storage_T *)(this->buf.
host) + offset_of(0, args...);
1810 ptrdiff_t offset_of(
const int *pos)
const {
1812 for (
int i = this->
dimensions() - 1; i >= 0; i--) {
1819 storage_T *address_of(
const int *pos)
const {
1821 return (storage_T *)this->buf.
host + offset_of(pos) *
type().bytes();
1823 return (storage_T *)this->buf.
host + offset_of(pos);
1830 return (T *)(this->buf.
host);
1840 template<
typename... Args,
1841 typename =
typename std::enable_if<
AllInts<Args...>::value>
::type>
1843 static_assert(!T_is_void,
1844 "Cannot use operator() on Buffer<void> types");
1846 return *((
const not_void_T *)(address_of(first, rest...)));
1852 static_assert(!T_is_void,
1853 "Cannot use operator() on Buffer<void> types");
1855 return *((
const not_void_T *)(
data()));
1861 static_assert(!T_is_void,
1862 "Cannot use operator() on Buffer<void> types");
1864 return *((
const not_void_T *)(address_of(pos)));
1867 template<
typename... Args,
1868 typename =
typename std::enable_if<
AllInts<Args...>::value>
::type>
1872 static_assert(!T_is_void,
1873 "Cannot use operator() on Buffer<void> types");
1875 return *((not_void_T *)(address_of(first, rest...)));
1881 static_assert(!T_is_void,
1882 "Cannot use operator() on Buffer<void> types");
1884 return *((not_void_T *)(
data()));
1890 static_assert(!T_is_void,
1891 "Cannot use operator() on Buffer<void> types");
1893 return *((not_void_T *)(address_of(pos)));
1900 for_each_element([&](
const int *pos) {
all_equal &= (*this)(pos) == val; });
1914 struct for_each_value_task_dim {
1922 template<
typename Ptr,
typename... Ptrs>
1925 advance_ptrs(
stride + 1, ptrs...);
1932 template<
typename Fn,
typename Ptr,
typename... Ptrs>
1933 HALIDE_NEVER_INLINE static void for_each_value_helper(Fn &&f,
int d,
bool innermost_strides_are_one,
1934 const for_each_value_task_dim<
sizeof...(Ptrs) + 1> *t, Ptr ptr, Ptrs... ptrs) {
1936 if (innermost_strides_are_one) {
1937 Ptr
end = ptr + t[0].extent;
1938 while (ptr !=
end) {
1939 f(*ptr++, (*ptrs++)...);
1943 f(*ptr, (*ptrs)...);
1944 advance_ptrs(t[0].
stride, ptr, ptrs...);
1949 for_each_value_helper(f, d - 1, innermost_strides_are_one, t, ptr, ptrs...);
1950 advance_ptrs(t[d].
stride, ptr, ptrs...);
1959 for (
int i = 0; i < N; i++) {
1960 if (buffers[i]->device) {
1961 assert(buffers[i]->host &&
1962 "Buffer passed to for_each_value has device allocation but no host allocation. Call allocate() and copy_to_host() first");
1964 "Buffer passed to for_each_value is dirty on device. Call copy_to_host() first");
1966 assert(buffers[i]->host &&
1967 "Buffer passed to for_each_value has no host or device allocation");
1975 for (
int j = 0; j < N; j++) {
1979 const int s = buffers[j]->
dim[i].
stride;
1982 t[i].extent = buffers[0]->
dim[i].
extent;
1987 for (
int j = i; j > 0 && t[j].stride[N - 1] < t[j - 1].stride[N - 1]; j--) {
1988 std::swap(t[j], t[j - 1]);
1995 for (
int i = 1; i < d; i++) {
1997 for (
int j = 0; j < N; j++) {
1998 flat = flat && t[i - 1].stride[j] * t[i - 1].extent == t[i].stride[j];
2001 t[i - 1].extent *= t[i].extent;
2002 for (
int j = i; j < d; j++) {
2011 bool innermost_strides_are_one =
true;
2013 for (
int i = 0; i < N; i++) {
2014 innermost_strides_are_one &= (t[0].stride[i] == 1);
2018 return innermost_strides_are_one;
2021 template<
typename Fn,
typename... Args,
int N =
sizeof...(Args) + 1>
2022 void for_each_value_impl(Fn &&f, Args &&...other_buffers)
const {
2024 Buffer<>::for_each_value_task_dim<N> *t =
2025 (Buffer<>::for_each_value_task_dim<N> *)
HALIDE_ALLOCA((
dimensions() + 1) *
sizeof(for_each_value_task_dim<N>));
2029 bool innermost_strides_are_one = Buffer<>::for_each_value_prep(t, buffers);
2031 Buffer<>::for_each_value_helper(f,
dimensions() - 1,
2032 innermost_strides_are_one,
2034 data(), (other_buffers.data())...);
2036 f(*
data(), (*other_buffers.data())...);
2057 template<
typename Fn,
typename... Args,
int N =
sizeof...(Args) + 1>
2059 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2063 template<
typename Fn,
typename... Args,
int N =
sizeof...(Args) + 1>
2067 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2074 struct for_each_element_task_dim {
2081 template<
typename Fn,
2083 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2084 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(
int,
int,
const for_each_element_task_dim *, Fn &&f, Args... args) {
2090 template<
typename Fn,
2092 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(
double,
int d,
const for_each_element_task_dim *t, Fn &&f, Args... args) {
2093 for (
int i = t[d].
min; i <= t[d].max; i++) {
2094 for_each_element_variadic(0, d - 1, t, std::forward<Fn>(f), i, args...);
2100 template<
typename Fn,
2102 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2104 return (
int)(
sizeof...(Args));
2110 template<
typename Fn,
2113 static_assert(
sizeof...(args) <= 256,
2114 "Callable passed to for_each_element must accept either a const int *,"
2115 " or up to 256 ints. No such operator found. Expect infinite template recursion.");
2116 return num_args(0, std::forward<Fn>(f), 0, args...);
2126 typename =
typename std::enable_if<(d >= 0)>::
type>
2127 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(
int,
const for_each_element_task_dim *t, Fn &&f,
int *pos) {
2128 for (pos[d] = t[d].
min; pos[d] <= t[d].max; pos[d]++) {
2129 for_each_element_array_helper<d - 1>(0, t, std::forward<Fn>(f), pos);
2136 typename =
typename std::enable_if<(d < 0)>::type>
2137 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(
double,
const for_each_element_task_dim *t, Fn &&f,
int *pos) {
2146 template<
typename Fn>
2147 static void for_each_element_array(
int d,
const for_each_element_task_dim *t, Fn &&f,
int *pos) {
2150 }
else if (d == 0) {
2154 for_each_element_array_helper<0, Fn>(0, t, std::forward<Fn>(f), pos);
2155 }
else if (d == 1) {
2156 for_each_element_array_helper<1, Fn>(0, t, std::forward<Fn>(f), pos);
2157 }
else if (d == 2) {
2158 for_each_element_array_helper<2, Fn>(0, t, std::forward<Fn>(f), pos);
2159 }
else if (d == 3) {
2160 for_each_element_array_helper<3, Fn>(0, t, std::forward<Fn>(f), pos);
2162 for (pos[d] = t[d].
min; pos[d] <= t[d].max; pos[d]++) {
2163 for_each_element_array(d - 1, t, std::forward<Fn>(f), pos);
2171 template<
typename Fn,
2172 typename = decltype(std::declval<Fn>()((
const int *)
nullptr))>
2173 static void for_each_element(
int,
int dims,
const for_each_element_task_dim *t, Fn &&f,
int check = 0) {
2175 for_each_element_array(dims - 1, t, std::forward<Fn>(f), pos);
2180 template<
typename Fn>
2181 HALIDE_ALWAYS_INLINE static void for_each_element(
double,
int dims,
const for_each_element_task_dim *t, Fn &&f) {
2182 int args = num_args(0, std::forward<Fn>(f));
2183 assert(dims >= args);
2184 for_each_element_variadic(0, args - 1, t, std::forward<Fn>(f));
2187 template<
typename Fn>
2188 void for_each_element_impl(Fn &&f)
const {
2189 for_each_element_task_dim *t =
2195 for_each_element(0,
dimensions(), t, std::forward<Fn>(f));
2256 template<
typename Fn>
2258 for_each_element_impl(f);
2262 template<
typename Fn>
2266 for_each_element_impl(f);
2272 template<
typename Fn>
2277 template<
typename... Args,
2278 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2280 (*buf)(args...) = f(args...);
2284 : f(std::forward<Fn>(f)),
buf(
buf) {
2293 template<
typename Fn,
2294 typename =
typename std::enable_if<!std::is_arithmetic<typename std::decay<Fn>::type>::value>
::type>
2297 FillHelper<Fn> wrapper(std::forward<Fn>(f),
this);
2298 return for_each_element(wrapper);
2306 return buf.is_bounds_query();
2315 #if defined(__has_feature)
2316 #if __has_feature(memory_sanitizer)
2320 for_each_value([](T &v) { __msan_check_mem_is_initialized(&v,
sizeof(T)); ; });
2330 #undef HALIDE_ALLOCA
This file declares the routines used by Halide internally in its runtime.
#define HALIDE_NEVER_INLINE
#define HALIDE_ALWAYS_INLINE
struct halide_buffer_t halide_buffer_t
The raw representation of an image passed around by generated Halide code.
A Halide::Buffer is a named shared reference to a Halide::Runtime::Buffer.
Read-only access to the shape.
HALIDE_ALWAYS_INLINE int stride() const
The number of elements in memory you have to step over to increment this coordinate by one.
HALIDE_ALWAYS_INLINE int extent() const
The extent of the image along this dimension.
HALIDE_ALWAYS_INLINE iterator begin() const
An iterator that points to the min coordinate.
Dimension(const halide_dimension_t &dim)
HALIDE_ALWAYS_INLINE int min() const
The lowest coordinate in this dimension.
HALIDE_ALWAYS_INLINE int max() const
The highest coordinate in this dimension.
HALIDE_ALWAYS_INLINE iterator end() const
An iterator that points to one past the max coordinate.
A templated Buffer class that wraps halide_buffer_t and adds functionality.
Buffer(halide_type_t t, const std::vector< int > &sizes)
Allocate a new image of unknown type using a vector of ints as the size.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
int width() const
Conventional names for the first three dimensions.
HALIDE_ALWAYS_INLINE Buffer< T2, D > as() &&
Returns this rval Buffer with a different type attached.
void translate(int d, int delta)
Translate an image in-place along one dimension by changing how it is indexed.
HALIDE_ALWAYS_INLINE Buffer< T, D > & for_each_value(Fn &&f, Args &&...other_buffers)
HALIDE_ALWAYS_INLINE const Buffer< T, D > & for_each_element(Fn &&f) const
Call a function at each site in a buffer.
int dimensions() const
Get the dimensionality of the buffer.
static Buffer< T, D > make_interleaved(int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
bool owns_host_memory() const
Does this Buffer own the host memory it refers to?
Buffer(halide_type_t t, const std::vector< int > &sizes, const std::vector< int > &storage_order)
Allocate a new image of unknown type using a vector of ints as the size and a vector of indices indic...
Buffer(Buffer< T, D > &&other) noexcept
Move constructor.
Buffer(const Buffer< T2, D2 > &other)
Construct a Buffer from a Buffer of different dimensionality and type.
Buffer(int first)
Allocate a new image of the given size.
Buffer(T *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
HALIDE_ALWAYS_INLINE const Buffer< T2, D > & as() const &
Return a const typed reference to this Buffer.
static Buffer< add_const_if_T_is_const< void >, D > make_interleaved(halide_type_t t, T *data, int width, int height, int channels)
Wrap an existing interleaved image.
Buffer< not_const_T, D > copy_to_planar(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in planar memory layout (vs.
T * begin() const
A pointer to the element with the lowest address.
Buffer< T, D > cropped(const std::vector< std::pair< int, int >> &rect) const
Make an image that refers to a sub-rectangle of this image along the first N dimensions.
Buffer(Buffer< T2, D2 > &&other)
Move-construct a Buffer from a Buffer of different dimensionality and type.
Buffer< T, D > translated(int d, int dx) const
Make an image which refers to the same data with using translated coordinates in the given dimension.
void slice(int d, int pos)
Rewrite the buffer to refer to a single lower-dimensional slice of itself along the given dimension a...
void transpose(const std::vector< int > &order)
A generalized transpose: instead of swapping two dimensions, pass a vector that lists each dimension ...
int device_detach_native(void *ctx=nullptr)
int device_free(void *ctx=nullptr)
halide_buffer_t * raw_buffer()
Get a pointer to the raw halide_buffer_t this wraps.
Buffer< T, D > & operator=(Buffer< T2, D2 > &&other)
Move from another Buffer of possibly-different dimensionality and type.
Buffer(T *data, const std::vector< int > &sizes)
Initialize an Buffer from a pointer and a vector of sizes.
void set_device_dirty(bool v=true)
HALIDE_ALWAYS_INLINE Buffer< T2, D > & as() &
Return a typed reference to this Buffer.
void copy_from(Buffer< T2, D2 > src)
Fill a Buffer with the values at the same coordinates in another Buffer.
Buffer< T, D > & operator=(const Buffer< T, D > &other)
Standard assignment operator.
HALIDE_ALWAYS_INLINE const Buffer< T, D > & for_each_value(Fn &&f, Args &&...other_buffers) const
Call a function on every value in the buffer, and the corresponding values in some number of other bu...
Buffer< T, D > sliced(int d, int pos) const
Make a lower-dimensional buffer that refers to one slice of this buffer.
static void assert_can_convert_from(const Buffer< T2, D2 > &other)
Fail an assertion at runtime or compile-time if an Buffer<T, D> cannot be constructed from some other...
void translate(const std::vector< int > &delta)
Translate an image along the first N dimensions by changing how it is indexed.
Buffer(T *data, int first, Args &&...rest)
Initialize an Buffer from a pointer and some sizes.
Buffer(const Buffer< T, D > &other)
Copy constructor.
friend class Buffer
Give Buffers access to the members of Buffers of different dimensionalities and types.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int first, Args &&...rest)
Initialize an Buffer of runtime type from a pointer and some sizes.
void crop(int d, int min, int extent)
Crop an image in-place along the given dimension.
static Buffer< T, D > make_scalar()
Make a zero-dimensional Buffer.
Buffer(const std::vector< int > &sizes)
Allocate a new image of known type using a vector of ints as the size.
bool all_equal(not_void_T val) const
Tests that all values in this buffer are equal to val.
static Buffer< T, D > make_scalar(T *data)
Make a zero-dimensional Buffer that points to non-owned, existing data.
int device_wrap_native(const struct halide_device_interface_t *device_interface, uint64_t handle, void *ctx=nullptr)
Buffer< T, D > transposed(const std::vector< int > &order) const
Make a buffer which refers to the same data in the same layout using a different ordering of the dime...
Buffer< T, D > translated(const std::vector< int > &delta) const
Make an image which refers to the same data translated along the first N dimensions.
void deallocate()
Drop reference to any owned host or device memory, possibly freeing it, if this buffer held the last ...
static Buffer< void, D > make_interleaved(halide_type_t t, int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, D > & as_const() &
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
int left() const
Conventional names for the min and max value of each dimension.
HALIDE_ALWAYS_INLINE void set_host_dirty(bool v=true)
Methods for managing any GPU allocation.
static constexpr bool has_static_halide_type
True if the Halide type is not void (or const void).
Buffer(const std::vector< int > &sizes, const std::vector< int > &storage_order)
Buffer< T, D > & operator=(const Buffer< T2, D2 > &other)
Assign from another Buffer of possibly-different dimensionality and type.
void crop(const std::vector< std::pair< int, int >> &rect)
Crop an image in-place along the first N dimensions.
bool has_device_allocation() const
Buffer< T, D > cropped(int d, int min, int extent) const
Make an image that refers to a sub-range of this image along the given dimension.
void allocate(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Allocate memory for this Buffer.
int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
static halide_type_t static_halide_type()
Get the Halide type of T.
Buffer(const halide_buffer_t &buf, BufferDeviceOwnership ownership=BufferDeviceOwnership::Unmanaged)
Make a Buffer from a halide_buffer_t.
Buffer(Array(&vals)[N])
Make an Buffer that refers to a statically sized array.
HALIDE_ALWAYS_INLINE bool device_dirty() const
Buffer< T, D > transposed(int d1, int d2) const
Make a buffer which refers to the same data in the same layout using a swapped indexing order for the...
Buffer< T, D > embedded(int d, int pos=0) const
Make a new buffer that views this buffer as a single slice in a higher-dimensional space.
Buffer(T *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
size_t number_of_elements() const
The total number of elements this buffer represents.
halide_type_t type() const
Get the type of the elements.
Buffer< not_const_T, D > copy_to_interleaved(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in interleaved memory layout (vs.
T * end() const
A pointer to one beyond the element with the highest address.
void transpose(int d1, int d2)
Transpose a buffer in-place by changing how it is indexed.
int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
HALIDE_ALWAYS_INLINE not_void_T & operator()(const int *pos)
static bool can_convert_from(const Buffer< T2, D2 > &other)
Determine if if an Buffer<T, D> can be constructed from some other Buffer type.
HALIDE_ALWAYS_INLINE const Buffer< typename std::add_const< T >::type, D > & as_const() const &
HALIDE_ALWAYS_INLINE const not_void_T & operator()(const int *pos) const
HALIDE_ALWAYS_INLINE not_void_T & operator()()
void add_dimension()
Add a new dimension with a min of zero and an extent of one.
void msan_check_mem_is_initialized(bool entire=false) const
Convenient check to verify that all of the interesting bytes in the Buffer are initialized under MSAN...
void slice(int d)
Slice a buffer in-place at the dimension's minimum.
T * data() const
Get a pointer to the address of the min coordinate.
static Buffer< T, D > make_interleaved(T *data, int width, int height, int channels)
Wrap an existing interleaved image.
HALIDE_ALWAYS_INLINE Buffer< T, D > & for_each_element(Fn &&f)
HALIDE_ALWAYS_INLINE const not_void_T & operator()(int first, Args... rest) const
Access elements.
Buffer< T, D > & fill(Fn &&f)
Fill a buffer by evaluating a callable at every site.
void check_overflow()
Check the product of the extents fits in memory.
void reset()
Reset the Buffer to be equivalent to a default-constructed Buffer of the same static type (if any); B...
const halide_buffer_t * raw_buffer() const
Buffer< not_const_T, D > copy(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Make a new image which is a deep copy of this image.
size_t size_in_bytes() const
The total number of bytes spanned by the data in memory.
Buffer< T, D > alias() const
Make a copy of the Buffer which shares the underlying host and/or device allocations as the existing ...
bool contains(Args... args) const
HALIDE_ALWAYS_INLINE Dimension dim(int i) const
Access the shape of the buffer.
int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Buffer< T, D > & fill(not_void_T val)
Buffer(halide_type_t t, int first, Args... rest)
Allocate a new image of the given size with a runtime type.
int device_sync(void *ctx=nullptr)
BufferDeviceOwnership device_ownership() const
Return the method by which the device field is managed.
bool is_bounds_query() const
Check if an input buffer passed extern stage is a querying bounds.
static Buffer< T, D > make_with_shape_of(Buffer< T2, D2 > src, void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Make a buffer with the same shape and memory nesting order as another buffer.
void embed(int d, int pos=0)
Embed a buffer in-place, increasing the dimensionality.
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, D > as_const() &&
HALIDE_ALWAYS_INLINE const not_void_T & operator()() const
Buffer(int first, int second, Args... rest)
void add_dimension_with_stride(int s)
Add a new dimension with a min of zero, an extent of one, and the specified stride.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< int > &sizes)
Initialize an Buffer of runtime type from a pointer and a vector of sizes.
Buffer< T, D > & operator=(Buffer< T, D > &&other) noexcept
Standard move-assignment operator.
int copy_to_host(void *ctx=nullptr)
bool contains(const std::vector< int > &coords) const
Test if a given coordinate is within the bounds of an image.
void set_min(Args... args)
Buffer< T, D > sliced(int d) const
Make a lower-dimensional buffer that refers to one slice of this buffer at the dimension's minimum.
HALIDE_ALWAYS_INLINE not_void_T & operator()(int first, Args... rest)
int min(int i) const
Access to the mins, strides, extents.
void device_deallocate()
Drop reference to any owned device memory, possibly freeing it if this buffer held the last reference...
static Buffer< add_const_if_T_is_const< void >, D > make_scalar(halide_type_t t)
Make a zero-dimensional Buffer.
void set_min(const std::vector< int > &mins)
Set the min coordinate of an image in the first N dimensions.
bool any_zero(const Container &c)
BufferDeviceOwnership
This indicates how to deallocate the device for a Halide::Runtime::Buffer.
@ AllocatedDeviceAndHost
No free routine will be called when device ref count goes to zero
@ WrappedNative
halide_device_free will be called when device ref count goes to zero
@ Unmanaged
halide_device_detach_native will be called when device ref count goes to zero
@ Cropped
Call device_and_host_free when DevRefCount goes to zero.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Expr max(const FuncRef &a, const FuncRef &b)
unsigned __INT64_TYPE__ uint64_t
unsigned __INT8_TYPE__ uint8_t
__PTRDIFF_TYPE__ ptrdiff_t
unsigned __INT16_TYPE__ uint16_t
unsigned __INT32_TYPE__ uint32_t
void * memcpy(void *s1, const void *s2, size_t n)
An iterator class, so that you can iterate over coordinates in a dimensions using a range-based for l...
bool operator!=(const iterator &other) const
A similar struct for managing device allocations.
BufferDeviceOwnership ownership
The raw representation of an image passed around by generated Halide code.
int32_t dimensions
The dimensionality of the buffer.
halide_dimension_t * dim
The shape of the buffer.
uint64_t device
A device-handle for e.g.
uint8_t * host
A pointer to the start of the data in main memory.
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
int(* copy_to_device)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* device_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
A runtime tag for a type in the halide type system.