25 class OutputImageParam;
48 const std::string &
name()
const {
78 std::vector<Var> dim_vars;
82 void split(
const std::string &old,
const std::string &outer,
const std::string &inner,
84 void remove(
const std::string &var);
87 const std::vector<Internal::StorageDim> &storage_dims()
const {
88 return function.schedule().storage_dims();
91 Stage &compute_with(
LoopLevel loop_level,
const std::map<std::string, LoopAlignStrategy> &align);
95 : function(std::move(f)), definition(std::move(d)), stage_index(stage_index) {
98 dim_vars.reserve(
function.args().size());
99 for (
const auto &arg :
function.args()) {
100 dim_vars.emplace_back(arg);
358 const Expr &xfactor,
const Expr &yfactor,
361 const std::vector<VarOrRVar> &outers,
362 const std::vector<VarOrRVar> &inners,
363 const std::vector<Expr> &factors,
364 const std::vector<TailStrategy> &tails);
366 const std::vector<VarOrRVar> &outers,
367 const std::vector<VarOrRVar> &inners,
368 const std::vector<Expr> &factors,
371 const std::vector<VarOrRVar> &inners,
372 const std::vector<Expr> &factors,
376 template<
typename... Args>
379 std::vector<VarOrRVar> collected_args{x, y, std::forward<Args>(args)...};
380 return reorder(collected_args);
417 const Expr &x_size,
const Expr &y_size,
423 const Expr &x_size,
const Expr &y_size,
430 const Expr &x_size,
const Expr &y_size,
const Expr &z_size,
435 const Expr &x_size,
const Expr &y_size,
const Expr &z_size,
447 return prefetch(f, var, var, offset, strategy);
452 return prefetch(param, var, var, offset, strategy);
458 return prefetch(image.parameter(), var, var, offset, strategy);
467 return prefetch(image.parameter(), at, from, std::move(offset), strategy);
496 int implicit_placeholder_pos;
498 std::vector<Expr> args;
499 std::vector<Expr> args_with_implicit_vars(
const std::vector<Expr> &e)
const;
504 template<
typename BinaryOp>
505 Stage func_ref_update(
const Tuple &e,
int init_val);
510 template<
typename BinaryOp>
511 Stage func_ref_update(
Expr e,
int init_val);
515 int placeholder_pos = -1,
int count = 0);
517 int placeholder_pos = -1,
int count = 0);
615 std::vector<Expr> args;
620 Tuple values_with_undefs(
const Expr &e)
const;
714 std::pair<int, int> add_implicit_vars(std::vector<Var> &)
const;
715 std::pair<int, int> add_implicit_vars(std::vector<Expr> &)
const;
726 Func &reorder_storage(
const std::vector<Var> &dims,
size_t start);
728 void invalidate_cache();
748 template<
typename T,
int Dims>
843 std::vector<int32_t> sizes = {},
902 const std::vector<int32_t> &sizes,
915 void compile_to_bitcode(
const std::string &filename,
const std::vector<Argument> &,
const std::string &fn_name,
938 void compile_to_object(
const std::string &filename,
const std::vector<Argument> &,
const std::string &fn_name,
951 void compile_to_header(
const std::string &filename,
const std::vector<Argument> &,
const std::string &fn_name =
"",
960 void compile_to_assembly(
const std::string &filename,
const std::vector<Argument> &,
const std::string &fn_name,
971 const std::vector<Argument> &,
972 const std::string &fn_name =
"",
979 const std::vector<Argument> &
args,
993 const std::string &fn_name =
"",
1001 const std::string &fn_name =
"",
1012 const std::vector<Argument> &
args,
1013 const std::vector<Target> &targets);
1029 const std::vector<Argument> &
args,
1030 const std::vector<Target> &targets,
1031 const std::vector<std::string> &suffixes);
1042 void compile_to(
const std::map<OutputFileType, std::string> &output_files,
1043 const std::vector<Argument> &
args,
1044 const std::string &fn_name,
1063 void (*
free)(
void *,
void *));
1091 template<typename T>
1170 std::vector<RVar>
rvars(
int idx = 0)
const;
1188 const std::vector<ExternFuncArgument> ¶ms,
Type t,
1198 const std::vector<ExternFuncArgument> ¶ms,
1199 const std::vector<Type> &types,
int dimensionality,
1206 const std::vector<ExternFuncArgument> ¶ms,
1207 const std::vector<Type> &types,
int dimensionality,
1216 const std::vector<ExternFuncArgument> ¶ms,
Type t,
1217 const std::vector<Var> &arguments,
1220 define_extern(function_name, params, std::vector<Type>{t}, arguments,
1221 mangling, device_api);
1225 const std::vector<ExternFuncArgument> ¶ms,
1226 const std::vector<Type> &types,
1227 const std::vector<Var> &arguments,
1256 template<
typename... Args>
1259 std::vector<Var> collected_args{std::forward<Args>(
args)...};
1273 template<
typename... Args>
1276 std::vector<Expr> collected_args{x, std::forward<Args>(
args)...};
1277 return (*
this)(collected_args);
1543 const Expr &xfactor,
const Expr &yfactor,
1550 const Expr &xfactor,
const Expr &yfactor,
1555 const std::vector<VarOrRVar> &outers,
1556 const std::vector<VarOrRVar> &inners,
1557 const std::vector<Expr> &factors,
1558 const std::vector<TailStrategy> &tails);
1562 const std::vector<VarOrRVar> &outers,
1563 const std::vector<VarOrRVar> &inners,
1564 const std::vector<Expr> &factors,
1569 const std::vector<VarOrRVar> &inners,
1570 const std::vector<Expr> &factors,
1577 template<
typename... Args>
1580 std::vector<VarOrRVar> collected_args{x, y, std::forward<Args>(
args)...};
1581 return reorder(collected_args);
1916 const Expr &x_size,
const Expr &y_size,
1922 const Expr &x_size,
const Expr &y_size,
1929 const Expr &x_size,
const Expr &y_size,
const Expr &z_size,
1934 const Expr &x_size,
const Expr &y_size,
const Expr &z_size,
1977 return prefetch(f, var, var, offset, strategy);
1982 return prefetch(param, var, var, offset, strategy);
1984 template<
typename T>
1988 return prefetch<T>(image, var, var, offset, strategy);
2073 template<
typename T>
2076 return prefetch(image.parameter(), at, from, std::move(offset), strategy);
2098 template<
typename... Args>
2101 std::vector<Var> collected_args{x, y, std::forward<Args>(
args)...};
2102 return reorder_storage(collected_args);
2526 namespace Internal {
2528 template<
typename Last>
2530 using T =
typename std::remove_pointer<typename std::remove_reference<Last>::type>::type;
2532 <<
"Can't evaluate expression "
2533 << t[idx] <<
" of type " << t[idx].type()
2534 <<
" as a scalar of type " << type_of<T>() <<
"\n";
2537 template<
typename First,
typename Second,
typename... Rest>
2539 check_types<First>(t, idx);
2543 template<
typename Last>
2545 using T =
typename std::remove_pointer<typename std::remove_reference<Last>::type>::type;
2549 template<
typename First,
typename Second,
typename... Rest>
2551 assign_results<First>(r, idx, first);
2560 template<
typename T>
2563 <<
"Can't evaluate expression "
2564 << e <<
" of type " << e.
type()
2565 <<
" as a scalar of type " << type_of<T>() <<
"\n";
2573 template<
typename T>
2575 return evaluate<T>(
nullptr, e);
2579 template<
typename First,
typename... Rest>
2590 template<
typename First,
typename... Rest>
2592 evaluate<First, Rest...>(
nullptr, std::move(t), std::forward<First>(first), std::forward<Rest...>(rest...));
2595 namespace Internal {
2614 template<
typename T>
2617 <<
"Can't evaluate expression "
2618 << e <<
" of type " << e.
type()
2619 <<
" as a scalar of type " << type_of<T>() <<
"\n";
2630 template<
typename First,
typename... Rest>
Defines a type used for expressing the type signature of a generated halide pipeline.
#define internal_assert(c)
Base classes for Halide expressions (Halide::Expr) and statements (Halide::Internal::Stmt)
Defines the struct representing lifetime and dependencies of a JIT compiled halide pipeline.
Defines Module, an IR container that fully describes a Halide program.
Classes for declaring scalar parameters to halide pipelines.
Defines the front-end class representing an entire Halide imaging pipeline.
Defines the front-end syntax for reduction domains and reduction variables.
Defines the structure that describes a Halide target.
Defines Tuple - the front-end handle on small arrays of expressions.
#define HALIDE_NO_USER_CODE_INLINE
Defines the Var - the front-end variable.
A Halide::Buffer is a named shared reference to a Halide::Runtime::Buffer.
Helper class for identifying purpose of an Expr passed to memoize.
EvictionKey(const Expr &expr=Expr())
void print_loop_nest()
Write out the loop nests specified by the schedule for this Function.
Func & unroll(const VarOrRVar &var)
Mark a dimension to be completely unrolled.
bool is_extern() const
Is this function an external stage? That is, was it defined using define_extern?
FuncRef operator()(std::vector< Expr >) const
Either calls to the function, or the left-hand-side of an update definition (see RDom).
Func & hexagon(const VarOrRVar &x=Var::outermost())
Schedule for execution on Hexagon.
Func(const std::string &name)
Declare a new undefined function with the given name.
void compile_to_multitarget_object_files(const std::string &filename_prefix, const std::vector< Argument > &args, const std::vector< Target > &targets, const std::vector< std::string > &suffixes)
Like compile_to_multitarget_static_library(), except that the object files are all output as object f...
Func & align_extent(const Var &var, Expr modulus)
Expand the region computed so that the extent is a multiple of 'modulus'.
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< Var, Args... >::value, FuncRef >::type operator()(Args &&...args) const
Func & tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xo, const VarOrRVar &yo, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)
Split two dimensions at once by the given factors, and then reorder the resulting dimensions to be xi...
void specialize_fail(const std::string &message)
Add a specialization to a Func that always terminates execution with a call to halide_error().
Func & memoize(const EvictionKey &eviction_key=EvictionKey())
Use the halide_memoization_cache_...
void compile_to_assembly(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name, const Target &target=get_target_from_environment())
Statically compile this function to text assembly equivalent to the object file generated by compile_...
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & allow_race_conditions()
Specify that race conditions are permitted for this Func, which enables parallelizing over RVars even...
bool has_update_definition() const
Does this function have at least one update definition?
void compile_jit(const Target &target=get_jit_target_from_environment())
Eagerly jit compile the function to machine code.
Func & bound_storage(const Var &dim, const Expr &bound)
Bound the extent of a Func's storage, but not extent of its compute.
Func()
Declare a new undefined function with an automatically-generated unique name.
Func & async()
Produce this Func asynchronously in a separate thread.
void compile_to_bitcode(const std::string &filename, const std::vector< Argument > &, const Target &target=get_target_from_environment())
void realize(Pipeline::RealizationArg outputs, const Target &target=Target(), const ParamMap ¶m_map=ParamMap::empty_map())
Evaluate this function into an existing allocated buffer or buffers.
void set_custom_trace(int(*trace_fn)(void *, const halide_trace_event_t *))
Func & gpu(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & gpu_threads(const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & compute_root()
Compute all of this function once ahead of time.
Func & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto)
Generalized tiling, reusing the previous names as the outer names.
Func & gpu(const VarOrRVar &block_x, const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Tell Halide that the following dimensions correspond to GPU block indices and thread indices.
Func & compute_with(const Stage &s, const VarOrRVar &var, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy >> &align)
Schedule the iteration over the initial definition of this function to be fused with another stage 's...
void compile_to_lowered_stmt(const std::string &filename, const std::vector< Argument > &args, StmtOutputFormat fmt=Text, const Target &target=get_target_from_environment())
Write out an internal representation of lowered code.
void compile_to_c(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name="", const Target &target=get_target_from_environment())
Statically compile this function to C source code.
Func & fuse(const VarOrRVar &inner, const VarOrRVar &outer, const VarOrRVar &fused)
Join two dimensions into a single fused dimenion.
Func & fold_storage(const Var &dim, const Expr &extent, bool fold_forward=true)
Store realizations of this function in a circular buffer of a given extent.
Func & store_at(LoopLevel loop_level)
Equivalent to the version of store_at that takes a Var, but schedules storage at a given LoopLevel.
Stage update(int idx=0)
Get a handle on an update step for the purposes of scheduling it.
Func & reorder_storage(const Var &x, const Var &y)
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< Expr, Args... >::value, FuncRef >::type operator()(const Expr &x, Args &&...args) const
bool defined() const
Does this function have at least a pure definition.
Func & compute_at(LoopLevel loop_level)
Schedule a function to be computed within the iteration over a given LoopLevel.
const Internal::StageSchedule & get_schedule() const
Return the current StageSchedule associated with this initial Stage of this Func.
Func & gpu_blocks(const VarOrRVar &block_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Tell Halide that the following dimensions correspond to GPU block indices.
Func & store_at(const Func &f, const Var &var)
Allocate storage for this function within f's loop over var.
Func copy_to_host()
Declare that this function should be implemented by a call to halide_buffer_copy with a NULL target d...
Func & split(const VarOrRVar &old, const VarOrRVar &outer, const VarOrRVar &inner, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Split a dimension into inner and outer subdimensions with the given names, where the inner dimension ...
Func & prefetch(const Func &f, const VarOrRVar &var, int offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
Prefetch data written to or read from a Func or an ImageParam by a subsequent loop iteration,...
Func & compute_with(LoopLevel loop_level, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy >> &align)
std::vector< Argument > infer_arguments() const
Infer the arguments to the Func, sorted into a canonical order: all buffers (sorted alphabetically by...
void compile_to_header(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name="", const Target &target=get_target_from_environment())
Emit a header file with the given filename for this function.
Func & prefetch(const T &image, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
std::vector< Var > args() const
Get the pure arguments.
Func(const Expr &e)
Declare a new function with an automatically-generated unique name, and define it to return the given...
Func & add_trace_tag(const std::string &trace_tag)
Add a string of arbitrary text that will be passed thru to trace inspection code if the Func is reali...
int dimensions() const
The dimensionality (number of arguments) of this function.
void realize(JITUserContext *context, Pipeline::RealizationArg outputs, const Target &target=Target(), const ParamMap ¶m_map=ParamMap::empty_map())
Same as above, but takes a custom user-provided context to be passed to runtime functions.
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< Var, Args... >::value, Func & >::type reorder_storage(const Var &x, const Var &y, Args &&...args)
void set_custom_do_par_for(int(*custom_do_par_for)(void *, int(*)(void *, int, uint8_t *), int, int, uint8_t *))
Func & align_bounds(const Var &var, Expr modulus, Expr remainder=0)
Expand the region computed so that the min coordinates is congruent to 'remainder' modulo 'modulus',...
std::string source_location() const
Get the source location of the pure definition of this Func.
Func & compute_with(LoopLevel loop_level, LoopAlignStrategy align=LoopAlignStrategy::Auto)
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Func & >::type reorder(const VarOrRVar &x, const VarOrRVar &y, Args &&...args)
void infer_input_bounds(const std::vector< int32_t > &sizes, const Target &target=get_jit_target_from_environment(), const ParamMap ¶m_map=ParamMap::empty_map())
For a given size of output, or a given output buffer, determine the bounds required of all unbound Im...
Func & store_root()
Equivalent to Func::store_at, but schedules storage outside the outermost loop.
int outputs() const
Get the number of outputs of this Func.
void set_custom_allocator(void *(*malloc)(void *, size_t), void(*free)(void *, void *))
Tuple update_values(int idx=0) const
Get the right-hand-side of an update definition for functions that returns multiple values.
void compile_to_bitcode(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name, const Target &target=get_target_from_environment())
Statically compile this function to llvm bitcode, with the given filename (which should probably end ...
int num_update_definitions() const
How many update definitions does this function have?
Func & rename(const VarOrRVar &old_name, const VarOrRVar &new_name)
Rename a dimension.
Func & vectorize(const VarOrRVar &var)
Mark a dimension to be computed all-at-once as a single vector.
Func & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, const std::vector< TailStrategy > &tails)
A more general form of tile, which defines tiles of any dimensionality.
Func & prefetch(const Internal::Parameter ¶m, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
Func & bound_extent(const Var &var, Expr extent)
Bound the extent of a Func's realization, but not its min.
Func & trace_stores()
Trace all stores to the buffer backing this Func by emitting calls to halide_trace.
Func & set_estimates(const Region &estimates)
Set (min, extent) estimates for all dimensions in the Func at once; this is equivalent to calling set...
Stage specialize(const Expr &condition)
Specialize a Func.
Func & compute_at(const Func &f, const Var &var)
Compute this function as needed for each unique value of the given var for the given calling function...
void set_custom_do_task(int(*custom_do_task)(void *, int(*)(void *, int, uint8_t *), int, uint8_t *))
Func & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto)
The generalized tile, with a single tail strategy to apply to all vars.
Func & reorder_storage(const std::vector< Var > &dims)
Specify how the storage for the function is laid out.
Func & compute_at(const Func &f, const RVar &var)
Schedule a function to be computed within the iteration over some dimension of an update domain.
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &bx, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Short-hand for tiling a domain and mapping the tile indices to GPU block indices and the coordinates ...
const std::vector< Expr > & update_args(int idx=0) const
Get the left-hand-side of the update definition.
Func & gpu_blocks(const VarOrRVar &block_x, const VarOrRVar &block_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & store_at(const Func &f, const RVar &var)
Equivalent to the version of store_at that takes a Var, but schedules storage within the loop over a ...
Realization realize(std::vector< int32_t > sizes={}, const Target &target=Target(), const ParamMap ¶m_map=ParamMap::empty_map())
Evaluate this function over some rectangular domain and return the resulting buffer or buffers.
HALIDE_NO_USER_CODE_INLINE Func(Buffer< T, Dims > &im)
Construct a new Func to wrap a Buffer.
void define_extern(const std::string &function_name, const std::vector< ExternFuncArgument > ¶ms, const std::vector< Type > &types, const std::vector< Var > &arguments, NameMangling mangling=NameMangling::Default, DeviceAPI device_api=DeviceAPI::Host)
Func & parallel(const VarOrRVar &var, const Expr &task_size, TailStrategy tail=TailStrategy::Auto)
Split a dimension by the given task_size, and the parallelize the outer dimension.
JITHandlers & jit_handlers()
Get a struct containing the currently set custom functions used by JIT.
Expr value() const
The right-hand-side value of the pure definition of this function.
Func & tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)
A shorter form of tile, which reuses the old variable names as the new outer dimensions.
void set_error_handler(void(*handler)(void *, const char *))
Deprecated variants of the above that use a void pointer instead of a JITUserContext pointer.
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func clone_in(const std::vector< Func > &fs)
Module compile_to_module(const std::vector< Argument > &args, const std::string &fn_name="", const Target &target=get_target_from_environment())
Store an internal representation of lowered code as a self contained Module suitable for further comp...
void infer_input_bounds(JITUserContext *context, const std::vector< int32_t > &sizes, const Target &target=get_jit_target_from_environment(), const ParamMap ¶m_map=ParamMap::empty_map())
Versions of infer_input_bounds that take a custom user context to pass to runtime functions.
void define_extern(const std::string &function_name, const std::vector< ExternFuncArgument > ¶ms, const std::vector< Type > &types, int dimensionality, NameMangling mangling=NameMangling::Default, DeviceAPI device_api=DeviceAPI::Host)
void set_custom_print(void(*handler)(void *, const char *))
Func in()
Create and return a global identity wrapper, which wraps all calls to this Func by any other Func.
Func & vectorize(const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Split a dimension by the given factor, then vectorize the inner dimension.
OutputImageParam output_buffer() const
Get a handle on the output buffer for this Func.
Expr update_value(int idx=0) const
Get the right-hand-side of an update definition.
Func & bound(const Var &var, Expr min, Expr extent)
Statically declare that the range over which a function should be evaluated is given by the second an...
void compile_to(const std::map< OutputFileType, std::string > &output_files, const std::vector< Argument > &args, const std::string &fn_name, const Target &target=get_target_from_environment())
Compile and generate multiple target files with single call.
void compile_to_llvm_assembly(const std::string &filename, const std::vector< Argument > &, const Target &target=get_target_from_environment())
Func & gpu_threads(const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
void add_custom_lowering_pass(T *pass)
Add a custom pass to be used during lowering.
Func in(const std::vector< Func > &fs)
Create and return an identity wrapper shared by all the Funcs in 'fs'.
Func & gpu(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
void infer_input_bounds(JITUserContext *context, Pipeline::RealizationArg outputs, const Target &target=get_jit_target_from_environment(), const ParamMap ¶m_map=ParamMap::empty_map())
Func & parallel(const VarOrRVar &var)
Mark a dimension to be traversed in parallel.
Func & serial(const VarOrRVar &var)
Mark a dimension to be traversed serially.
Func & prefetch(const Func &f, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
prefetch() is a more fine-grained version of prefetch(), which allows specification of different vars...
const std::string & name() const
The name of this function, either given during construction, or automatically generated.
void define_extern(const std::string &function_name, const std::vector< ExternFuncArgument > ¶ms, Type t, int dimensionality, NameMangling mangling=NameMangling::Default, DeviceAPI device_api=DeviceAPI::Host)
Add an extern definition for this Func.
Func & align_storage(const Var &dim, const Expr &alignment)
Pad the storage extent of a particular dimension of realizations of this function up to be a multiple...
void compile_to_file(const std::string &filename_prefix, const std::vector< Argument > &args, const std::string &fn_name="", const Target &target=get_target_from_environment())
Compile to object file and header pair, with the given arguments.
Func & gpu_threads(const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Tell Halide that the following dimensions correspond to GPU thread indices.
void add_custom_lowering_pass(Internal::IRMutator *pass, std::function< void()> deleter)
Add a custom pass to be used during lowering, with the function that will be called to delete it also...
void clear_custom_lowering_passes()
Remove all previously-set custom lowering passes.
void compile_to_llvm_assembly(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name, const Target &target=get_target_from_environment())
Statically compile this function to llvm assembly, with the given filename (which should probably end...
void compile_to_multitarget_static_library(const std::string &filename_prefix, const std::vector< Argument > &args, const std::vector< Target > &targets)
Compile to static-library file and header pair once for each target; each resulting function will be ...
Func & gpu_lanes(const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
The given dimension corresponds to the lanes in a GPU warp.
std::vector< OutputImageParam > output_buffers() const
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &bz, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & store_in(MemoryType memory_type)
Set the type of memory this Func should be stored in.
Realization realize(JITUserContext *context, std::vector< int32_t > sizes={}, const Target &target=Target(), const ParamMap ¶m_map=ParamMap::empty_map())
Same as above, but takes a custom user-provided context to be passed to runtime functions.
void compile_to_assembly(const std::string &filename, const std::vector< Argument > &, const Target &target=get_target_from_environment())
Func clone_in(const Func &f)
Similar to Func::in; however, instead of replacing the call to this Func with an identity Func that r...
std::vector< RVar > rvars(int idx=0) const
Get the RVars of the reduction domain for an update definition, if there is one.
Func & gpu_single_thread(DeviceAPI device_api=DeviceAPI::Default_GPU)
Tell Halide to run this stage using a single gpu thread and block.
Func(Internal::Function f)
Construct a new Func to wrap an existing, already-define Function object.
void compile_to_object(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name, const Target &target=get_target_from_environment())
Statically compile this function to an object file, with the given filename (which should probably en...
const std::string & extern_function_name() const
Get the name of the extern function called for an extern definition.
Func & compute_with(const Stage &s, const VarOrRVar &var, LoopAlignStrategy align=LoopAlignStrategy::Auto)
Func & trace_realizations()
Trace all realizations of this Func by emitting calls to halide_trace.
Tuple values() const
The values returned by this function.
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & compute_inline()
Aggressively inline all uses of this function.
const std::vector< Type > & output_types() const
Get the types of the outputs of this Func.
Func copy_to_device(DeviceAPI d=DeviceAPI::Default_GPU)
Declare that this function should be implemented by a call to halide_buffer_copy with the given targe...
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
void compile_to_object(const std::string &filename, const std::vector< Argument > &, const Target &target=get_target_from_environment())
void define_extern(const std::string &function_name, const std::vector< ExternFuncArgument > ¶ms, Type t, const std::vector< Var > &arguments, NameMangling mangling=NameMangling::Default, DeviceAPI device_api=DeviceAPI::Host)
Func & reorder(const std::vector< VarOrRVar > &vars)
Reorder variables to have the given nesting order, from innermost out.
Func & atomic(bool override_associativity_test=false)
Issue atomic updates for this Func.
const std::vector< CustomLoweringPass > & custom_lowering_passes()
Get the custom lowering passes.
void debug_to_file(const std::string &filename)
When this function is compiled, include code that dumps its values to a file after it is realized,...
Func & gpu_blocks(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func in(const Func &f)
Creates and returns a new identity Func that wraps this Func.
void compile_to_static_library(const std::string &filename_prefix, const std::vector< Argument > &args, const std::string &fn_name="", const Target &target=get_target_from_environment())
Compile to static-library file and header pair, with the given arguments.
Func & set_estimate(const Var &var, const Expr &min, const Expr &extent)
Statically declare the range over which the function will be evaluated in the general case.
Func & unroll(const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Split a dimension by the given factor, then unroll the inner dimension.
void infer_input_bounds(Pipeline::RealizationArg outputs, const Target &target=get_jit_target_from_environment(), const ParamMap ¶m_map=ParamMap::empty_map())
Func & trace_loads()
Trace all loads from this Func by emitting calls to halide_trace.
FuncRef operator()(std::vector< Var >) const
Construct either the left-hand-side of a definition, or a call to a functions that happens to only co...
void define_extern(const std::string &function_name, const std::vector< ExternFuncArgument > ¶ms, const std::vector< Type > &types, int dimensionality, NameMangling mangling)
A fragment of front-end syntax of the form f(x, y, z), where x, y, z are Vars or Exprs.
Stage operator*=(const FuncRef &)
FuncTupleElementRef operator[](int) const
When a FuncRef refers to a function that provides multiple outputs, you can access each output as an ...
Stage operator-=(const FuncRef &)
size_t size() const
How many outputs does the function this refers to produce.
Internal::Function function() const
What function is this calling?
Stage operator+=(Expr)
Define a stage that adds the given expression to this Func.
Stage operator-=(Expr)
Define a stage that adds the negative of the given expression to this Func.
Stage operator*=(Expr)
Define a stage that multiplies this Func by the given expression.
Stage operator-=(const Tuple &)
Stage operator/=(Expr)
Define a stage that divides this Func by the given expression.
Stage operator+=(const FuncRef &)
Stage operator=(const Expr &)
Use this as the left-hand-side of a definition or an update definition (see RDom).
Stage operator=(const FuncRef &)
FuncRef(Internal::Function, const std::vector< Var > &, int placeholder_pos=-1, int count=0)
Stage operator+=(const Tuple &)
FuncRef(const Internal::Function &, const std::vector< Expr > &, int placeholder_pos=-1, int count=0)
Stage operator/=(const FuncRef &)
Stage operator*=(const Tuple &)
Stage operator/=(const Tuple &)
Stage operator=(const Tuple &)
Use this as the left-hand-side of a definition or an update definition for a Func with multiple outpu...
A fragment of front-end syntax of the form f(x, y, z)[index], where x, y, z are Vars or Exprs.
int index() const
Return index to the function outputs.
Stage operator+=(const Expr &e)
Define a stage that adds the given expression to Tuple component 'idx' of this Func.
Stage operator*=(const Expr &e)
Define a stage that multiplies Tuple component 'idx' of this Func by the given expression.
Stage operator/=(const Expr &e)
Define a stage that divides Tuple component 'idx' of this Func by the given expression.
Stage operator=(const Expr &e)
Use this as the left-hand-side of an update definition of Tuple component 'idx' of a Func (see RDom).
Stage operator=(const FuncRef &e)
Stage operator-=(const Expr &e)
Define a stage that adds the negative of the given expression to Tuple component 'idx' of this Func.
FuncTupleElementRef(const FuncRef &ref, const std::vector< Expr > &args, int idx)
An Image parameter to a halide pipeline.
A Function definition which can either represent a init or an update definition.
const StageSchedule & schedule() const
Get the default (no-specialization) stage-specific schedule associated with this definition.
const std::vector< Expr > & args() const
Get the default (no-specialization) arguments (left-hand-side) of the definition.
bool defined() const
Definition objects are nullable.
A reference-counted handle to Halide's internal representation of a function.
A base class for passes over the IR which modify it (e.g.
A reference-counted handle to a parameter to a halide pipeline.
A schedule for a single stage of a Halide pipeline.
A reference to a site in a Halide statement at the top of the body of a particular for loop.
A handle on the output buffer of a pipeline.
static const ParamMap & empty_map()
A const ref to an empty ParamMap.
A class representing a Halide pipeline.
A multi-dimensional domain over which to iterate.
A reduction variable represents a single dimension of a reduction domain (RDom).
const std::string & name() const
The name of this reduction variable.
A Realization is a vector of references to existing Buffer objects.
A single definition of a Func.
Stage & prefetch(const T &image, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
std::string name() const
Return the name of this stage, e.g.
Stage & rename(const VarOrRVar &old_name, const VarOrRVar &new_name)
Stage & gpu_threads(const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & gpu(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & gpu(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & prefetch(const Func &f, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
Stage & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto)
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Stage & >::type reorder(const VarOrRVar &x, const VarOrRVar &y, Args &&...args)
Stage & gpu(const VarOrRVar &block_x, const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & hexagon(const VarOrRVar &x=Var::outermost())
Func rfactor(const RVar &r, const Var &v)
Stage & compute_with(const Stage &s, const VarOrRVar &var, LoopAlignStrategy align=LoopAlignStrategy::Auto)
Stage & vectorize(const VarOrRVar &var)
Stage & gpu_single_thread(DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & prefetch(const Func &f, const VarOrRVar &var, int offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
Stage & compute_with(LoopLevel loop_level, LoopAlignStrategy align=LoopAlignStrategy::Auto)
Stage & prefetch(const Internal::Parameter ¶m, const VarOrRVar &at, const VarOrRVar &from, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
Stage & unroll(const VarOrRVar &var)
Stage & parallel(const VarOrRVar &var)
Stage & allow_race_conditions()
Stage & serial(const VarOrRVar &var)
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &bz, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &bx, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, const std::vector< TailStrategy > &tails)
Stage specialize(const Expr &condition)
Stage & compute_with(LoopLevel loop_level, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy >> &align)
Schedule the iteration over this stage to be fused with another stage 's' from outermost loop to a gi...
Stage & tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xo, const VarOrRVar &yo, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)
Stage & split(const VarOrRVar &old, const VarOrRVar &outer, const VarOrRVar &inner, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Scheduling calls that control how the domain of this stage is traversed.
Stage & fuse(const VarOrRVar &inner, const VarOrRVar &outer, const VarOrRVar &fused)
Stage(Internal::Function f, Internal::Definition d, size_t stage_index)
Stage & vectorize(const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Func rfactor(std::vector< std::pair< RVar, Var >> preserved)
Calling rfactor() on an associative update definition a Func will split the update into an intermedia...
Stage & parallel(const VarOrRVar &var, const Expr &task_size, TailStrategy tail=TailStrategy::Auto)
Stage & gpu_blocks(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
const Internal::StageSchedule & get_schedule() const
Return the current StageSchedule associated with this Stage.
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & reorder(const std::vector< VarOrRVar > &vars)
Stage & gpu_blocks(const VarOrRVar &block_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & gpu_blocks(const VarOrRVar &block_x, const VarOrRVar &block_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto)
void specialize_fail(const std::string &message)
Stage & gpu_threads(const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)
Stage & compute_with(const Stage &s, const VarOrRVar &var, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy >> &align)
Stage & unroll(const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Stage & atomic(bool override_associativity_test=false)
std::string source_location() const
Attempt to get the source file and line where this stage was defined by parsing the process's own deb...
Stage & gpu_threads(const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & gpu_lanes(const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
std::string dump_argument_list() const
Return a string describing the current var list taking into account all the splits,...
void unscheduled()
Assert that this stage has intentionally been given no schedule, and suppress the warning about unsch...
Create a small array of Exprs for defining and calling functions with multiple outputs.
A Halide variable, to be used when defining functions.
const std::string & name() const
Get the name of a Var.
static Var outermost()
A Var that represents the location outside the outermost loop.
void schedule_scalar(Func f)
void assign_results(Realization &r, int idx, Last last)
void check_types(const Tuple &t, int idx)
ForType
An enum describing a type of loop traversal.
std::vector< Var > make_argument_list(int dimensionality)
Make a list of unique arguments for definitions with unnamed arguments.
WEAK halide_do_task_t custom_do_task
WEAK halide_do_par_for_t custom_do_par_for
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
class HALIDE_ATTRIBUTE_DEPRECATED("Use OutputFileType instead of Output") Output
PrefetchBoundStrategy
Different ways to handle accesses outside the original extents in a prefetch.
@ GuardWithIf
Guard the prefetch with if-guards that ignores the prefetch if any of the prefetched region ever goes...
HALIDE_NO_USER_CODE_INLINE T evaluate_may_gpu(const Expr &e)
JIT-Compile and run enough code to evaluate a Halide expression.
TailStrategy
Different ways to handle a tail case in a split when the factor does not provably divide the extent.
@ Auto
For pure definitions use ShiftInwards.
LoopAlignStrategy
Different ways to handle the case when the start/end of the loops of stages computed with (fused) are...
@ Auto
By default, LoopAlignStrategy is set to NoAlign.
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
NameMangling
An enum to specify calling convention for extern stages.
@ Default
Match whatever is specified in the Target.
Target get_jit_target_from_environment()
Return the target that Halide will use for jit-compilation.
DeviceAPI
An enum describing a type of device API.
@ Host
Used to denote for loops that run on the same device as the containing code.
Target get_target_from_environment()
Return the target that Halide will use.
StmtOutputFormat
Used to determine if the output printed to file should be as a normal string or as an HTML file which...
std::vector< Range > Region
A multi-dimensional box.
Expr max(const FuncRef &a, const FuncRef &b)
MemoryType
An enum describing different address spaces to be used with Func::store_in.
HALIDE_NO_USER_CODE_INLINE T evaluate(JITUserContext *ctx, const Expr &e)
JIT-Compile and run enough code to evaluate a Halide expression.
unsigned __INT8_TYPE__ uint8_t
A fragment of Halide syntax.
HALIDE_ALWAYS_INLINE Type type() const
Get the type of this expression node.
An argument to an extern-defined Func.
A set of custom overrides of runtime functions.
A context to be passed to Pipeline::realize.
A struct representing a target machine and os to generate code for.
bool has_gpu_feature() const
Is a fully feature GPU compute runtime enabled? I.e.
bool has_feature(Feature f) const
Types in the halide type system.
A class that can represent Vars or RVars.
const std::string & name() const
VarOrRVar(const std::string &n, bool r)
VarOrRVar(const ImplicitVar< N > &u)