Halide
12.0.1
Halide compiler and libraries
|
Routines specific to the Halide Cuda runtime. More...
#include "HalideRuntime.h"
Go to the source code of this file.
Macros | |
#define | HALIDE_RUNTIME_CUDA |
Functions | |
const struct halide_device_interface_t * | halide_cuda_device_interface () |
int | halide_cuda_initialize_kernels (void *user_context, void **state_ptr, const char *src, int size) |
These are forward declared here to allow clients to override the Halide Cuda runtime. More... | |
int | halide_cuda_run (void *user_context, void *state_ptr, const char *entry_name, int blocksX, int blocksY, int blocksZ, int threadsX, int threadsY, int threadsZ, int shared_mem_bytes, size_t arg_sizes[], void *args[], int8_t arg_is_buffer[]) |
void | halide_cuda_finalize_kernels (void *user_context, void *state_ptr) |
int | halide_cuda_wrap_device_ptr (void *user_context, struct halide_buffer_t *buf, uint64_t device_ptr) |
Set the underlying cuda device poiner for a buffer. More... | |
int | halide_cuda_detach_device_ptr (void *user_context, struct halide_buffer_t *buf) |
Disconnect this halide_buffer_t from the device pointer it was previously wrapped around. More... | |
uintptr_t | halide_cuda_get_device_ptr (void *user_context, struct halide_buffer_t *buf) |
Return the underlying device pointer for a halide_buffer_t. More... | |
int | halide_cuda_release_unused_device_allocations (void *user_context) |
Release any currently-unused device allocations back to the cuda driver. More... | |
Routines specific to the Halide Cuda runtime.
Definition in file HalideRuntimeCuda.h.
#define HALIDE_RUNTIME_CUDA |
Definition at line 19 of file HalideRuntimeCuda.h.
const struct halide_device_interface_t* halide_cuda_device_interface | ( | ) |
Referenced by Halide::PyTorch::wrap().
int halide_cuda_initialize_kernels | ( | void * | user_context, |
void ** | state_ptr, | ||
const char * | src, | ||
int | size | ||
) |
These are forward declared here to allow clients to override the Halide Cuda runtime.
Do not call them.
int halide_cuda_run | ( | void * | user_context, |
void * | state_ptr, | ||
const char * | entry_name, | ||
int | blocksX, | ||
int | blocksY, | ||
int | blocksZ, | ||
int | threadsX, | ||
int | threadsY, | ||
int | threadsZ, | ||
int | shared_mem_bytes, | ||
size_t | arg_sizes[], | ||
void * | args[], | ||
int8_t | arg_is_buffer[] | ||
) |
void halide_cuda_finalize_kernels | ( | void * | user_context, |
void * | state_ptr | ||
) |
int halide_cuda_wrap_device_ptr | ( | void * | user_context, |
struct halide_buffer_t * | buf, | ||
uint64_t | device_ptr | ||
) |
Set the underlying cuda device poiner for a buffer.
The device pointer should be allocated using cuMemAlloc or similar and must have an extent large enough to cover that specified by the halide_buffer_t extent fields. The dev field of the halide_buffer_t must be NULL when this routine is called. This call can fail due to being passed an invalid device pointer. The device and host dirty bits are left unmodified.
int halide_cuda_detach_device_ptr | ( | void * | user_context, |
struct halide_buffer_t * | buf | ||
) |
Disconnect this halide_buffer_t from the device pointer it was previously wrapped around.
Should only be called for a halide_buffer_t that halide_cuda_wrap_device_ptr was previously called on. The device field of the halide_buffer_t will be NULL on return.
uintptr_t halide_cuda_get_device_ptr | ( | void * | user_context, |
struct halide_buffer_t * | buf | ||
) |
Return the underlying device pointer for a halide_buffer_t.
This buffer must be valid on a Cuda device, or not have any associated device memory. If there is no device memory (dev field is NULL), this returns 0.
int halide_cuda_release_unused_device_allocations | ( | void * | user_context | ) |
Release any currently-unused device allocations back to the cuda driver.
See halide_reuse_device_allocations.