25 class OutputImageParam;
48 const std::string &
name()
const {
78 std::vector<Var> dim_vars;
82 void split(
const std::string &old,
const std::string &outer,
const std::string &inner,
84 void remove(
const std::string &var);
87 const std::vector<Internal::StorageDim> &storage_dims()
const {
88 return function.schedule().storage_dims();
91 Stage &compute_with(
LoopLevel loop_level,
const std::map<std::string, LoopAlignStrategy> &align);
95 : function(std::move(f)), definition(std::move(d)), stage_index(stage_index) {
99 dim_vars.reserve(
function.args().size());
100 for (
const auto &arg :
function.args()) {
101 dim_vars.emplace_back(arg);
359 const Expr &xfactor,
const Expr &yfactor,
362 const std::vector<VarOrRVar> &outers,
363 const std::vector<VarOrRVar> &inners,
364 const std::vector<Expr> &factors,
365 const std::vector<TailStrategy> &tails);
367 const std::vector<VarOrRVar> &outers,
368 const std::vector<VarOrRVar> &inners,
369 const std::vector<Expr> &factors,
372 const std::vector<VarOrRVar> &inners,
373 const std::vector<Expr> &factors,
377 template<
typename... Args>
380 std::vector<VarOrRVar> collected_args{x, y, std::forward<Args>(args)...};
381 return reorder(collected_args);
418 const Expr &x_size,
const Expr &y_size,
424 const Expr &x_size,
const Expr &y_size,
431 const Expr &x_size,
const Expr &y_size,
const Expr &z_size,
436 const Expr &x_size,
const Expr &y_size,
const Expr &z_size,
451 return prefetch(image.parameter(), var, offset, strategy);
474 int implicit_placeholder_pos;
476 std::vector<Expr> args;
477 std::vector<Expr> args_with_implicit_vars(
const std::vector<Expr> &e)
const;
482 template<
typename BinaryOp>
483 Stage func_ref_update(
const Tuple &e,
int init_val);
488 template<
typename BinaryOp>
489 Stage func_ref_update(
Expr e,
int init_val);
493 int placeholder_pos = -1,
int count = 0);
495 int placeholder_pos = -1,
int count = 0);
593 std::vector<Expr> args;
598 Tuple values_with_undefs(
const Expr &e)
const;
692 std::pair<int, int> add_implicit_vars(std::vector<Var> &)
const;
693 std::pair<int, int> add_implicit_vars(std::vector<Expr> &)
const;
704 Func &reorder_storage(
const std::vector<Var> &dims,
size_t start);
706 void invalidate_cache();
827 template<typename T, typename = typename std::enable_if<std::is_same<T,
int>::
value>::type>
831 return realize(std::vector<int32_t>{x_size}, target, param_map);
881 void compile_to_bitcode(
const std::string &filename,
const std::vector<Argument> &,
const std::string &fn_name,
904 void compile_to_object(
const std::string &filename,
const std::vector<Argument> &,
const std::string &fn_name,
917 void compile_to_header(
const std::string &filename,
const std::vector<Argument> &,
const std::string &fn_name =
"",
926 void compile_to_assembly(
const std::string &filename,
const std::vector<Argument> &,
const std::string &fn_name,
937 const std::vector<Argument> &,
938 const std::string &fn_name =
"",
945 const std::vector<Argument> &
args,
959 const std::string &fn_name =
"",
967 const std::string &fn_name =
"",
978 const std::vector<Argument> &
args,
979 const std::vector<Target> &targets);
995 const std::vector<Argument> &
args,
996 const std::vector<Target> &targets,
997 const std::vector<std::string> &suffixes);
1008 void compile_to(
const std::map<Output, std::string> &output_files,
1009 const std::vector<Argument> &
args,
1010 const std::string &fn_name,
1046 void (*
free)(
void *,
void *));
1129 template<
typename T>
1208 std::vector<RVar>
rvars(
int idx = 0)
const;
1226 const std::vector<ExternFuncArgument> ¶ms,
Type t,
1236 const std::vector<ExternFuncArgument> ¶ms,
1237 const std::vector<Type> &types,
int dimensionality,
1244 const std::vector<ExternFuncArgument> ¶ms,
1245 const std::vector<Type> &types,
int dimensionality,
1254 const std::vector<ExternFuncArgument> ¶ms,
Type t,
1255 const std::vector<Var> &arguments,
1258 define_extern(function_name, params, std::vector<Type>{t}, arguments,
1259 mangling, device_api);
1263 const std::vector<ExternFuncArgument> ¶ms,
1264 const std::vector<Type> &types,
1265 const std::vector<Var> &arguments,
1294 template<
typename... Args>
1297 std::vector<Var> collected_args{std::forward<Args>(
args)...};
1311 template<
typename... Args>
1314 std::vector<Expr> collected_args{x, std::forward<Args>(
args)...};
1315 return (*
this)(collected_args);
1581 const Expr &xfactor,
const Expr &yfactor,
1588 const Expr &xfactor,
const Expr &yfactor,
1593 const std::vector<VarOrRVar> &outers,
1594 const std::vector<VarOrRVar> &inners,
1595 const std::vector<Expr> &factors,
1596 const std::vector<TailStrategy> &tails);
1600 const std::vector<VarOrRVar> &outers,
1601 const std::vector<VarOrRVar> &inners,
1602 const std::vector<Expr> &factors,
1607 const std::vector<VarOrRVar> &inners,
1608 const std::vector<Expr> &factors,
1615 template<
typename... Args>
1618 std::vector<VarOrRVar> collected_args{x, y, std::forward<Args>(
args)...};
1619 return reorder(collected_args);
1954 const Expr &x_size,
const Expr &y_size,
1960 const Expr &x_size,
const Expr &y_size,
1967 const Expr &x_size,
const Expr &y_size,
const Expr &z_size,
1972 const Expr &x_size,
const Expr &y_size,
const Expr &z_size,
2016 template<
typename T>
2019 return prefetch(image.parameter(), var, offset, strategy);
2041 template<
typename... Args>
2044 std::vector<Var> collected_args{x, y, std::forward<Args>(
args)...};
2045 return reorder_storage(collected_args);
2461 namespace Internal {
2463 template<
typename Last>
2465 using T =
typename std::remove_pointer<typename std::remove_reference<Last>::type>::type;
2467 <<
"Can't evaluate expression "
2468 << t[idx] <<
" of type " << t[idx].type()
2469 <<
" as a scalar of type " << type_of<T>() <<
"\n";
2472 template<
typename First,
typename Second,
typename... Rest>
2474 check_types<First>(t, idx);
2478 template<
typename Last>
2480 using T =
typename std::remove_pointer<typename std::remove_reference<Last>::type>::type;
2484 template<
typename First,
typename Second,
typename... Rest>
2486 assign_results<First>(r, idx, first);
2495 template<
typename T>
2498 <<
"Can't evaluate expression "
2499 << e <<
" of type " << e.
type()
2500 <<
" as a scalar of type " << type_of<T>() <<
"\n";
2508 template<
typename First,
typename... Rest>
2518 namespace Internal {
2537 template<
typename T>
2540 <<
"Can't evaluate expression "
2541 << e <<
" of type " << e.
type()
2542 <<
" as a scalar of type " << type_of<T>() <<
"\n";
2553 template<
typename First,
typename... Rest>
Defines a type used for expressing the type signature of a generated halide pipeline.
#define internal_assert(c)
Base classes for Halide expressions (Halide::Expr) and statements (Halide::Internal::Stmt)
#define HALIDE_ATTRIBUTE_DEPRECATED(x)
#define HALIDE_ALWAYS_INLINE
Defines the struct representing lifetime and dependencies of a JIT compiled halide pipeline.
Defines Module, an IR container that fully describes a Halide program.
Classes for declaring scalar parameters to halide pipelines.
Defines the front-end class representing an entire Halide imaging pipeline.
Defines the front-end syntax for reduction domains and reduction variables.
Defines the structure that describes a Halide target.
Defines Tuple - the front-end handle on small arrays of expressions.
#define HALIDE_NO_USER_CODE_INLINE
Defines the Var - the front-end variable.
A Halide::Buffer is a named shared reference to a Halide::Runtime::Buffer.
Helper class for identifying purpose of an Expr passed to memoize.
EvictionKey(const Expr &expr=Expr())
Func & prefetch(const Func &f, const VarOrRVar &var, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
Prefetch data written to or read from a Func or an ImageParam by a subsequent loop iteration,...
void print_loop_nest()
Write out the loop nests specified by the schedule for this Function.
Func & unroll(const VarOrRVar &var)
Mark a dimension to be completely unrolled.
bool is_extern() const
Is this function an external stage? That is, was it defined using define_extern?
FuncRef operator()(std::vector< Expr >) const
Either calls to the function, or the left-hand-side of an update definition (see RDom).
Func & hexagon(const VarOrRVar &x=Var::outermost())
Schedule for execution on Hexagon.
Func(const std::string &name)
Declare a new undefined function with the given name.
void compile_to_multitarget_object_files(const std::string &filename_prefix, const std::vector< Argument > &args, const std::vector< Target > &targets, const std::vector< std::string > &suffixes)
Like compile_to_multitarget_static_library(), except that the object files are all output as object f...
Func & align_extent(const Var &var, Expr modulus)
Expand the region computed so that the extent is a multiple of 'modulus'.
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< Var, Args... >::value, FuncRef >::type operator()(Args &&...args) const
Func & tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xo, const VarOrRVar &yo, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)
Split two dimensions at once by the given factors, and then reorder the resulting dimensions to be xi...
void specialize_fail(const std::string &message)
Add a specialization to a Func that always terminates execution with a call to halide_error().
Func & memoize(const EvictionKey &eviction_key=EvictionKey())
Use the halide_memoization_cache_...
void compile_to_assembly(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name, const Target &target=get_target_from_environment())
Statically compile this function to text assembly equivalent to the object file generated by compile_...
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & allow_race_conditions()
Specify that race conditions are permitted for this Func, which enables parallelizing over RVars even...
bool has_update_definition() const
Does this function have at least one update definition?
void compile_jit(const Target &target=get_jit_target_from_environment())
Eagerly jit compile the function to machine code.
Func()
Declare a new undefined function with an automatically-generated unique name.
Func & async()
Produce this Func asynchronously in a separate thread.
void compile_to_bitcode(const std::string &filename, const std::vector< Argument > &, const Target &target=get_target_from_environment())
void realize(Pipeline::RealizationArg outputs, const Target &target=Target(), const ParamMap ¶m_map=ParamMap::empty_map())
Evaluate this function into an existing allocated buffer or buffers.
void set_custom_trace(int(*trace_fn)(void *, const halide_trace_event_t *))
Set custom routines to call when tracing is enabled.
Func & gpu(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & gpu_threads(const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & compute_root()
Compute all of this function once ahead of time.
Func & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto)
Generalized tiling, reusing the previous names as the outer names.
Func & gpu(const VarOrRVar &block_x, const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Tell Halide that the following dimensions correspond to GPU block indices and thread indices.
Func & compute_with(const Stage &s, const VarOrRVar &var, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy >> &align)
Schedule the iteration over the initial definition of this function to be fused with another stage 's...
void compile_to_lowered_stmt(const std::string &filename, const std::vector< Argument > &args, StmtOutputFormat fmt=Text, const Target &target=get_target_from_environment())
Write out an internal representation of lowered code.
void compile_to_c(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name="", const Target &target=get_target_from_environment())
Statically compile this function to C source code.
Func & fuse(const VarOrRVar &inner, const VarOrRVar &outer, const VarOrRVar &fused)
Join two dimensions into a single fused dimenion.
Func & fold_storage(const Var &dim, const Expr &extent, bool fold_forward=true)
Store realizations of this function in a circular buffer of a given extent.
Func & store_at(LoopLevel loop_level)
Equivalent to the version of store_at that takes a Var, but schedules storage at a given LoopLevel.
Stage update(int idx=0)
Get a handle on an update step for the purposes of scheduling it.
Func & reorder_storage(const Var &x, const Var &y)
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< Expr, Args... >::value, FuncRef >::type operator()(const Expr &x, Args &&...args) const
bool defined() const
Does this function have at least a pure definition.
Func & compute_at(LoopLevel loop_level)
Schedule a function to be computed within the iteration over a given LoopLevel.
const Internal::StageSchedule & get_schedule() const
Return the current StageSchedule associated with this initial Stage of this Func.
Func & gpu_blocks(const VarOrRVar &block_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Tell Halide that the following dimensions correspond to GPU block indices.
Func & store_at(const Func &f, const Var &var)
Allocate storage for this function within f's loop over var.
Func copy_to_host()
Declare that this function should be implemented by a call to halide_buffer_copy with a NULL target d...
Func & split(const VarOrRVar &old, const VarOrRVar &outer, const VarOrRVar &inner, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Split a dimension into inner and outer subdimensions with the given names, where the inner dimension ...
Func & compute_with(LoopLevel loop_level, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy >> &align)
std::vector< Argument > infer_arguments() const
Infer the arguments to the Func, sorted into a canonical order: all buffers (sorted alphabetically by...
void compile_to_header(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name="", const Target &target=get_target_from_environment())
Emit a header file with the given filename for this function.
std::vector< Var > args() const
Get the pure arguments.
Func(const Expr &e)
Declare a new function with an automatically-generated unique name, and define it to return the given...
Func & add_trace_tag(const std::string &trace_tag)
Add a string of arbitrary text that will be passed thru to trace inspection code if the Func is reali...
int dimensions() const
The dimensionality (number of arguments) of this function.
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< Var, Args... >::value, Func & >::type reorder_storage(const Var &x, const Var &y, Args &&...args)
void set_custom_do_par_for(int(*custom_do_par_for)(void *, int(*)(void *, int, uint8_t *), int, int, uint8_t *))
Set a custom parallel for loop launcher.
Func & align_bounds(const Var &var, Expr modulus, Expr remainder=0)
Expand the region computed so that the min coordinates is congruent to 'remainder' modulo 'modulus',...
std::string source_location() const
Get the source location of the pure definition of this Func.
Func & compute_with(LoopLevel loop_level, LoopAlignStrategy align=LoopAlignStrategy::Auto)
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Func & >::type reorder(const VarOrRVar &x, const VarOrRVar &y, Args &&...args)
void infer_input_bounds(const std::vector< int32_t > &sizes, const Target &target=get_jit_target_from_environment(), const ParamMap ¶m_map=ParamMap::empty_map())
For a given size of output, or a given output buffer, determine the bounds required of all unbound Im...
Func & store_root()
Equivalent to Func::store_at, but schedules storage outside the outermost loop.
int outputs() const
Get the number of outputs of this Func.
void set_custom_allocator(void *(*malloc)(void *, size_t), void(*free)(void *, void *))
Set a custom malloc and free for halide to use.
Tuple update_values(int idx=0) const
Get the right-hand-side of an update definition for functions that returns multiple values.
void compile_to_bitcode(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name, const Target &target=get_target_from_environment())
Statically compile this function to llvm bitcode, with the given filename (which should probably end ...
int num_update_definitions() const
How many update definitions does this function have?
Func & rename(const VarOrRVar &old_name, const VarOrRVar &new_name)
Rename a dimension.
Func & vectorize(const VarOrRVar &var)
Mark a dimension to be computed all-at-once as a single vector.
Func & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, const std::vector< TailStrategy > &tails)
A more general form of tile, which defines tiles of any dimensionality.
Func & bound_extent(const Var &var, Expr extent)
Bound the extent of a Func's realization, but not its min.
Func & trace_stores()
Trace all stores to the buffer backing this Func by emitting calls to halide_trace.
Func & set_estimates(const Region &estimates)
Set (min, extent) estimates for all dimensions in the Func at once; this is equivalent to calling set...
Stage specialize(const Expr &condition)
Specialize a Func.
Func & compute_at(const Func &f, const Var &var)
Compute this function as needed for each unique value of the given var for the given calling function...
void set_custom_do_task(int(*custom_do_task)(void *, int(*)(void *, int, uint8_t *), int, uint8_t *))
Set a custom task handler to be called by the parallel for loop.
Func & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto)
The generalized tile, with a single tail strategy to apply to all vars.
Func & reorder_storage(const std::vector< Var > &dims)
Specify how the storage for the function is laid out.
Func & compute_at(const Func &f, const RVar &var)
Schedule a function to be computed within the iteration over some dimension of an update domain.
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &bx, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Short-hand for tiling a domain and mapping the tile indices to GPU block indices and the coordinates ...
const std::vector< Expr > & update_args(int idx=0) const
Get the left-hand-side of the update definition.
Func & gpu_blocks(const VarOrRVar &block_x, const VarOrRVar &block_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & store_at(const Func &f, const RVar &var)
Equivalent to the version of store_at that takes a Var, but schedules storage within the loop over a ...
Realization realize(std::vector< int32_t > sizes={}, const Target &target=Target(), const ParamMap ¶m_map=ParamMap::empty_map())
Evaluate this function over some rectangular domain and return the resulting buffer or buffers.
void define_extern(const std::string &function_name, const std::vector< ExternFuncArgument > ¶ms, const std::vector< Type > &types, const std::vector< Var > &arguments, NameMangling mangling=NameMangling::Default, DeviceAPI device_api=DeviceAPI::Host)
Func & parallel(const VarOrRVar &var, const Expr &task_size, TailStrategy tail=TailStrategy::Auto)
Split a dimension by the given task_size, and the parallelize the outer dimension.
Expr value() const
The right-hand-side value of the pure definition of this function.
Func & tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)
A shorter form of tile, which reuses the old variable names as the new outer dimensions.
void set_error_handler(void(*handler)(void *, const char *))
Set the error handler function that be called in the case of runtime errors during halide pipelines.
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func clone_in(const std::vector< Func > &fs)
Module compile_to_module(const std::vector< Argument > &args, const std::string &fn_name="", const Target &target=get_target_from_environment())
Store an internal representation of lowered code as a self contained Module suitable for further comp...
void define_extern(const std::string &function_name, const std::vector< ExternFuncArgument > ¶ms, const std::vector< Type > &types, int dimensionality, NameMangling mangling=NameMangling::Default, DeviceAPI device_api=DeviceAPI::Host)
void set_custom_print(void(*handler)(void *, const char *))
Set the function called to print messages from the runtime.
Func in()
Create and return a global identity wrapper, which wraps all calls to this Func by any other Func.
Func & vectorize(const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Split a dimension by the given factor, then vectorize the inner dimension.
OutputImageParam output_buffer() const
Get a handle on the output buffer for this Func.
Expr update_value(int idx=0) const
Get the right-hand-side of an update definition.
Func & bound(const Var &var, Expr min, Expr extent)
Statically declare that the range over which a function should be evaluated is given by the second an...
void compile_to_llvm_assembly(const std::string &filename, const std::vector< Argument > &, const Target &target=get_target_from_environment())
Func & gpu_threads(const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
void add_custom_lowering_pass(T *pass)
Add a custom pass to be used during lowering.
Func in(const std::vector< Func > &fs)
Create and return an identity wrapper shared by all the Funcs in 'fs'.
void compile_to(const std::map< Output, std::string > &output_files, const std::vector< Argument > &args, const std::string &fn_name, const Target &target=get_target_from_environment())
Compile and generate multiple target files with single call.
Func & gpu(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & parallel(const VarOrRVar &var)
Mark a dimension to be traversed in parallel.
Func & serial(const VarOrRVar &var)
Mark a dimension to be traversed serially.
const Internal::JITHandlers & jit_handlers()
Get a struct containing the currently set custom functions used by JIT.
const std::string & name() const
The name of this function, either given during construction, or automatically generated.
void define_extern(const std::string &function_name, const std::vector< ExternFuncArgument > ¶ms, Type t, int dimensionality, NameMangling mangling=NameMangling::Default, DeviceAPI device_api=DeviceAPI::Host)
Add an extern definition for this Func.
Func & align_storage(const Var &dim, const Expr &alignment)
Pad the storage extent of a particular dimension of realizations of this function up to be a multiple...
void compile_to_file(const std::string &filename_prefix, const std::vector< Argument > &args, const std::string &fn_name="", const Target &target=get_target_from_environment())
Compile to object file and header pair, with the given arguments.
Func & prefetch(const T &image, VarOrRVar var, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
Func & gpu_threads(const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Tell Halide that the following dimensions correspond to GPU thread indices.
void add_custom_lowering_pass(Internal::IRMutator *pass, std::function< void()> deleter)
Add a custom pass to be used during lowering, with the function that will be called to delete it also...
void clear_custom_lowering_passes()
Remove all previously-set custom lowering passes.
void compile_to_llvm_assembly(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name, const Target &target=get_target_from_environment())
Statically compile this function to llvm assembly, with the given filename (which should probably end...
void compile_to_multitarget_static_library(const std::string &filename_prefix, const std::vector< Argument > &args, const std::vector< Target > &targets)
Compile to static-library file and header pair once for each target; each resulting function will be ...
Func & gpu_lanes(const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
The given dimension corresponds to the lanes in a GPU warp.
std::vector< OutputImageParam > output_buffers() const
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &bz, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & store_in(MemoryType memory_type)
Set the type of memory this Func should be stored in.
HALIDE_NO_USER_CODE_INLINE Func(Buffer< T > &im)
Construct a new Func to wrap a Buffer.
void compile_to_assembly(const std::string &filename, const std::vector< Argument > &, const Target &target=get_target_from_environment())
Func clone_in(const Func &f)
Similar to Func::in; however, instead of replacing the call to this Func with an identity Func that r...
std::vector< RVar > rvars(int idx=0) const
Get the RVars of the reduction domain for an update definition, if there is one.
Func & gpu_single_thread(DeviceAPI device_api=DeviceAPI::Default_GPU)
Tell Halide to run this stage using a single gpu thread and block.
Func(Internal::Function f)
Construct a new Func to wrap an existing, already-define Function object.
void compile_to_object(const std::string &filename, const std::vector< Argument > &, const std::string &fn_name, const Target &target=get_target_from_environment())
Statically compile this function to an object file, with the given filename (which should probably en...
const std::string & extern_function_name() const
Get the name of the extern function called for an extern definition.
Func & prefetch(const Internal::Parameter ¶m, const VarOrRVar &var, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
Func & compute_with(const Stage &s, const VarOrRVar &var, LoopAlignStrategy align=LoopAlignStrategy::Auto)
Func & trace_realizations()
Trace all realizations of this Func by emitting calls to halide_trace.
Tuple values() const
The values returned by this function.
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func & compute_inline()
Aggressively inline all uses of this function.
const std::vector< Type > & output_types() const
Get the types of the outputs of this Func.
Func copy_to_device(DeviceAPI d=DeviceAPI::Default_GPU)
Declare that this function should be implemented by a call to halide_buffer_copy with the given targe...
Func & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
void compile_to_object(const std::string &filename, const std::vector< Argument > &, const Target &target=get_target_from_environment())
void define_extern(const std::string &function_name, const std::vector< ExternFuncArgument > ¶ms, Type t, const std::vector< Var > &arguments, NameMangling mangling=NameMangling::Default, DeviceAPI device_api=DeviceAPI::Host)
Func & reorder(const std::vector< VarOrRVar > &vars)
Reorder variables to have the given nesting order, from innermost out.
Func & atomic(bool override_associativity_test=false)
Issue atomic updates for this Func.
const std::vector< CustomLoweringPass > & custom_lowering_passes()
Get the custom lowering passes.
void debug_to_file(const std::string &filename)
When this function is compiled, include code that dumps its values to a file after it is realized,...
Func & gpu_blocks(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
Func in(const Func &f)
Creates and returns a new identity Func that wraps this Func.
void compile_to_static_library(const std::string &filename_prefix, const std::vector< Argument > &args, const std::string &fn_name="", const Target &target=get_target_from_environment())
Compile to static-library file and header pair, with the given arguments.
Func & set_estimate(const Var &var, const Expr &min, const Expr &extent)
Statically declare the range over which the function will be evaluated in the general case.
Func & unroll(const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Split a dimension by the given factor, then unroll the inner dimension.
void infer_input_bounds(Pipeline::RealizationArg outputs, const Target &target=get_jit_target_from_environment(), const ParamMap ¶m_map=ParamMap::empty_map())
Func & trace_loads()
Trace all loads from this Func by emitting calls to halide_trace.
FuncRef operator()(std::vector< Var >) const
Construct either the left-hand-side of a definition, or a call to a functions that happens to only co...
void define_extern(const std::string &function_name, const std::vector< ExternFuncArgument > ¶ms, const std::vector< Type > &types, int dimensionality, NameMangling mangling)
A fragment of front-end syntax of the form f(x, y, z), where x, y, z are Vars or Exprs.
Stage operator*=(const FuncRef &)
FuncTupleElementRef operator[](int) const
When a FuncRef refers to a function that provides multiple outputs, you can access each output as an ...
Stage operator-=(const FuncRef &)
size_t size() const
How many outputs does the function this refers to produce.
Internal::Function function() const
What function is this calling?
Stage operator+=(Expr)
Define a stage that adds the given expression to this Func.
Stage operator-=(Expr)
Define a stage that adds the negative of the given expression to this Func.
Stage operator*=(Expr)
Define a stage that multiplies this Func by the given expression.
Stage operator-=(const Tuple &)
Stage operator/=(Expr)
Define a stage that divides this Func by the given expression.
Stage operator+=(const FuncRef &)
Stage operator=(const Expr &)
Use this as the left-hand-side of a definition or an update definition (see RDom).
Stage operator=(const FuncRef &)
FuncRef(Internal::Function, const std::vector< Var > &, int placeholder_pos=-1, int count=0)
Stage operator+=(const Tuple &)
FuncRef(const Internal::Function &, const std::vector< Expr > &, int placeholder_pos=-1, int count=0)
Stage operator/=(const FuncRef &)
Stage operator*=(const Tuple &)
Stage operator/=(const Tuple &)
Stage operator=(const Tuple &)
Use this as the left-hand-side of a definition or an update definition for a Func with multiple outpu...
A fragment of front-end syntax of the form f(x, y, z)[index], where x, y, z are Vars or Exprs.
int index() const
Return index to the function outputs.
Stage operator+=(const Expr &e)
Define a stage that adds the given expression to Tuple component 'idx' of this Func.
Stage operator*=(const Expr &e)
Define a stage that multiplies Tuple component 'idx' of this Func by the given expression.
Stage operator/=(const Expr &e)
Define a stage that divides Tuple component 'idx' of this Func by the given expression.
Stage operator=(const Expr &e)
Use this as the left-hand-side of an update definition of Tuple component 'idx' of a Func (see RDom).
Stage operator=(const FuncRef &e)
Stage operator-=(const Expr &e)
Define a stage that adds the negative of the given expression to Tuple component 'idx' of this Func.
FuncTupleElementRef(const FuncRef &ref, const std::vector< Expr > &args, int idx)
An Image parameter to a halide pipeline.
A Function definition which can either represent a init or an update definition.
const StageSchedule & schedule() const
Get the default (no-specialization) stage-specific schedule associated with this definition.
const std::vector< Expr > & args() const
Get the default (no-specialization) arguments (left-hand-side) of the definition.
bool defined() const
Definition objects are nullable.
A reference-counted handle to Halide's internal representation of a function.
A base class for passes over the IR which modify it (e.g.
A reference-counted handle to a parameter to a halide pipeline.
A schedule for a single stage of a Halide pipeline.
bool & touched()
This flag is set to true if the dims list has been manipulated by the user (or if a ScheduleHandle wa...
A reference to a site in a Halide statement at the top of the body of a particular for loop.
A handle on the output buffer of a pipeline.
static const ParamMap & empty_map()
A const ref to an empty ParamMap.
A class representing a Halide pipeline.
A multi-dimensional domain over which to iterate.
A reduction variable represents a single dimension of a reduction domain (RDom).
const std::string & name() const
The name of this reduction variable.
A Realization is a vector of references to existing Buffer objects.
A single definition of a Func.
std::string name() const
Return the name of this stage, e.g.
Stage & rename(const VarOrRVar &old_name, const VarOrRVar &new_name)
Stage & gpu_threads(const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & gpu(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & gpu(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &thread_x, const VarOrRVar &thread_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto)
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
HALIDE_NO_USER_CODE_INLINE std::enable_if< Internal::all_are_convertible< VarOrRVar, Args... >::value, Stage & >::type reorder(const VarOrRVar &x, const VarOrRVar &y, Args &&...args)
Stage & gpu(const VarOrRVar &block_x, const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & hexagon(const VarOrRVar &x=Var::outermost())
Func rfactor(const RVar &r, const Var &v)
Stage & compute_with(const Stage &s, const VarOrRVar &var, LoopAlignStrategy align=LoopAlignStrategy::Auto)
Stage & vectorize(const VarOrRVar &var)
Stage & gpu_single_thread(DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & compute_with(LoopLevel loop_level, LoopAlignStrategy align=LoopAlignStrategy::Auto)
Stage & unroll(const VarOrRVar &var)
Stage & parallel(const VarOrRVar &var)
Stage & allow_race_conditions()
Stage & serial(const VarOrRVar &var)
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &bx, const VarOrRVar &by, const VarOrRVar &bz, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &bx, const VarOrRVar &tx, const Expr &x_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & prefetch(const T &image, VarOrRVar var, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
Stage & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &outers, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, const std::vector< TailStrategy > &tails)
Stage specialize(const Expr &condition)
Stage & compute_with(LoopLevel loop_level, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy >> &align)
Schedule the iteration over this stage to be fused with another stage 's' from outermost loop to a gi...
Stage & tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xo, const VarOrRVar &yo, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)
Stage & split(const VarOrRVar &old, const VarOrRVar &outer, const VarOrRVar &inner, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Scheduling calls that control how the domain of this stage is traversed.
Stage & fuse(const VarOrRVar &inner, const VarOrRVar &outer, const VarOrRVar &fused)
Stage(Internal::Function f, Internal::Definition d, size_t stage_index)
Stage & vectorize(const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Func rfactor(std::vector< std::pair< RVar, Var >> preserved)
Calling rfactor() on an associative update definition a Func will split the update into an intermedia...
Stage & parallel(const VarOrRVar &var, const Expr &task_size, TailStrategy tail=TailStrategy::Auto)
Stage & gpu_blocks(const VarOrRVar &block_x, const VarOrRVar &block_y, const VarOrRVar &block_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
const Internal::StageSchedule & get_schedule() const
Return the current StageSchedule associated with this Stage.
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &z, const VarOrRVar &tx, const VarOrRVar &ty, const VarOrRVar &tz, const Expr &x_size, const Expr &y_size, const Expr &z_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & reorder(const std::vector< VarOrRVar > &vars)
Stage & gpu_blocks(const VarOrRVar &block_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & gpu_blocks(const VarOrRVar &block_x, const VarOrRVar &block_y, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & tile(const std::vector< VarOrRVar > &previous, const std::vector< VarOrRVar > &inners, const std::vector< Expr > &factors, TailStrategy tail=TailStrategy::Auto)
void specialize_fail(const std::string &message)
Stage & gpu_threads(const VarOrRVar &thread_x, const VarOrRVar &thread_y, const VarOrRVar &thread_z, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &xi, const VarOrRVar &yi, const Expr &xfactor, const Expr &yfactor, TailStrategy tail=TailStrategy::Auto)
Stage & compute_with(const Stage &s, const VarOrRVar &var, const std::vector< std::pair< VarOrRVar, LoopAlignStrategy >> &align)
Stage & unroll(const VarOrRVar &var, const Expr &factor, TailStrategy tail=TailStrategy::Auto)
Stage & prefetch(const Func &f, const VarOrRVar &var, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
Stage & atomic(bool override_associativity_test=false)
std::string source_location() const
Attempt to get the source file and line where this stage was defined by parsing the process's own deb...
Stage & gpu_threads(const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & gpu_lanes(const VarOrRVar &thread_x, DeviceAPI device_api=DeviceAPI::Default_GPU)
Stage & gpu_tile(const VarOrRVar &x, const VarOrRVar &y, const VarOrRVar &tx, const VarOrRVar &ty, const Expr &x_size, const Expr &y_size, TailStrategy tail=TailStrategy::Auto, DeviceAPI device_api=DeviceAPI::Default_GPU)
std::string dump_argument_list() const
Return a string describing the current var list taking into account all the splits,...
Stage & prefetch(const Internal::Parameter ¶m, const VarOrRVar &var, Expr offset=1, PrefetchBoundStrategy strategy=PrefetchBoundStrategy::GuardWithIf)
Create a small array of Exprs for defining and calling functions with multiple outputs.
A Halide variable, to be used when defining functions.
const std::string & name() const
Get the name of a Var.
static Var outermost()
A Var that represents the location outside the outermost loop.
void schedule_scalar(Func f)
void assign_results(Realization &r, int idx, Last last)
void check_types(const Tuple &t, int idx)
ForType
An enum describing a type of loop traversal.
std::vector< Var > make_argument_list(int dimensionality)
Make a list of unique arguments for definitions with unnamed arguments.
WEAK halide_do_task_t custom_do_task
WEAK halide_do_par_for_t custom_do_par_for
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
HALIDE_NO_USER_CODE_INLINE T evaluate(const Expr &e)
JIT-Compile and run enough code to evaluate a Halide expression.
PrefetchBoundStrategy
Different ways to handle accesses outside the original extents in a prefetch.
@ GuardWithIf
Guard the prefetch with if-guards that ignores the prefetch if any of the prefetched region ever goes...
HALIDE_NO_USER_CODE_INLINE T evaluate_may_gpu(const Expr &e)
JIT-Compile and run enough code to evaluate a Halide expression.
TailStrategy
Different ways to handle a tail case in a split when the factor does not provably divide the extent.
@ Auto
For pure definitions use ShiftInwards.
LoopAlignStrategy
Different ways to handle the case when the start/end of the loops of stages computed with (fused) are...
@ Auto
By default, LoopAlignStrategy is set to NoAlign.
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
NameMangling
An enum to specify calling convention for extern stages.
@ Default
Match whatever is specified in the Target.
Target get_jit_target_from_environment()
Return the target that Halide will use for jit-compilation.
DeviceAPI
An enum describing a type of device API.
@ Host
Used to denote for loops that run on the same device as the containing code.
Target get_target_from_environment()
Return the target that Halide will use.
StmtOutputFormat
Used to determine if the output printed to file should be as a normal string or as an HTML file which...
std::vector< Range > Region
A multi-dimensional box.
Expr max(const FuncRef &a, const FuncRef &b)
MemoryType
An enum describing different address spaces to be used with Func::store_in.
unsigned __INT8_TYPE__ uint8_t
A fragment of Halide syntax.
HALIDE_ALWAYS_INLINE Type type() const
Get the type of this expression node.
An argument to an extern-defined Func.
A struct representing a target machine and os to generate code for.
bool has_gpu_feature() const
Is a fully feature GPU compute runtime enabled? I.e.
bool has_feature(Feature f) const
Types in the halide type system.
A class that can represent Vars or RVars.
const std::string & name() const
VarOrRVar(const std::string &n, bool r)
VarOrRVar(const ImplicitVar< N > &u)