1 #ifndef HALIDE_CODEGEN_LLVM_H
2 #define HALIDE_CODEGEN_LLVM_H
15 class IRBuilderDefaultInserter;
17 template<
typename,
typename>
24 class ExecutionEngine;
47 struct ExternSignature;
81 const std::string &suffix,
82 const std::vector<std::pair<std::string, ExternSignature>> &externs);
105 const std::string &extern_name,
const std::vector<LoweredArgument> &args);
106 virtual void end_func(
const std::vector<LoweredArgument> &args);
113 virtual std::string
mcpu()
const = 0;
115 virtual std::string
mabi()
const;
142 llvm::Function *
function;
181 bool must_succeed =
true)
const;
291 llvm::Constant *
create_binary_blob(
const std::vector<char> &data,
const std::string &name,
bool constant =
true);
400 bool zero_initialize =
false,
401 const std::string &name =
"");
446 llvm::Function *
get_llvm_intrin(
const Type &ret_type,
const std::string &name,
const std::vector<Type> &arg_types,
bool scalars_are_vectors =
false);
447 llvm::Function *
get_llvm_intrin(llvm::Type *ret_type,
const std::string &name,
const std::vector<llvm::Type *> &arg_types);
449 llvm::Function *
declare_intrin_overload(
const std::string &name,
const Type &ret_type,
const std::string &impl_name, std::vector<Type> arg_types,
bool scalars_are_vectors =
false);
464 const std::string &name, std::vector<Expr>);
466 llvm::Function *
intrin, std::vector<Expr>);
468 const std::string &name, std::vector<llvm::Value *>);
470 llvm::Function *
intrin, std::vector<llvm::Value *>);
475 virtual llvm::Value *
slice_vector(llvm::Value *vec,
int start,
int extent);
482 const std::vector<int> &indices);
527 std::map<std::string, llvm::Constant *> string_constants;
532 llvm::BasicBlock *destructor_block;
538 bool llvm_large_code_model;
545 llvm::Function *embed_metadata_getter(
const std::string &metadata_getter_name,
546 const std::string &function_name,
const std::vector<LoweredArgument> &args,
547 const std::map<std::string, std::string> &metadata_name_map);
550 llvm::Constant *embed_constant_expr(
Expr e, llvm::Type *t);
551 llvm::Constant *embed_constant_scalar_value_t(
const Expr &e);
553 llvm::Function *add_argv_wrapper(llvm::Function *fn,
const std::string &name,
bool result_in_argv =
false);
555 llvm::Value *codegen_dense_vector_load(
const Type &type,
const std::string &name,
const Expr &base,
557 llvm::Value *vpred =
nullptr,
bool slice_to_native =
true);
558 llvm::Value *codegen_dense_vector_load(
const Load *load, llvm::Value *vpred =
nullptr,
bool slice_to_native =
true);
560 virtual void codegen_predicated_vector_load(
const Load *op);
561 virtual void codegen_predicated_vector_store(
const Store *op);
563 void codegen_atomic_rmw(
const Store *op);
565 void init_codegen(
const std::string &name,
bool any_strict_float =
false);
566 std::unique_ptr<llvm::Module> finish_codegen();
569 template<
typename Op>
570 bool try_to_fold_vector_reduce(
const Expr &a,
Expr b);
577 llvm::LLVMContext &context);
Defines the base class for things that recursively walk over the IR.
Defines Module, an IR container that fully describes a Halide program.
Defines the Scope class, which is used for keeping track of names in a scope while traversing IR.
Defines the structure that describes a Halide target.
A Halide::Buffer is a named shared reference to a Halide::Runtime::Buffer.
A code generator abstract base class.
void visit(const Let *) override
static std::unique_ptr< CodeGen_LLVM > new_for_target(const Target &target, llvm::LLVMContext &context)
Create an instance of CodeGen_LLVM suitable for the target.
llvm::StructType * metadata_t_type
virtual void end_func(const std::vector< LoweredArgument > &args)
llvm::StructType * argument_t_type
void visit(const Select *) override
void visit(const Or *) override
void visit(const AssertStmt *) override
bool emit_atomic_stores
Emit atomic store instructions?
void do_parallel_tasks(const std::vector< ParallelTask > &tasks)
void sym_pop(const std::string &name)
Remove an entry for the symbol table, revealing any previous entries with the same name.
virtual void init_context()
Grab all the context specific internal state.
virtual llvm::Value * concat_vectors(const std::vector< llvm::Value * > &)
Concatenate a bunch of llvm vectors.
llvm::StructType * device_interface_t_type
void visit(const Mul *) override
llvm::StructType * semaphore_t_type
llvm::Value * make_halide_type_t(const Type &)
Turn a Halide Type into an llvm::Value representing a constant halide_type_t.
virtual bool supports_atomic_add(const Type &t) const
llvm::Constant * create_binary_blob(const std::vector< char > &data, const std::string &name, bool constant=true)
Put a binary blob in the module as a global variable and return a pointer to it.
std::vector< LoweredArgument > current_function_args
void return_with_error_code(llvm::Value *error_code)
Return the the pipeline with the given error code.
virtual llvm::Value * shuffle_vectors(llvm::Value *a, llvm::Value *b, const std::vector< int > &indices)
Create an LLVM shuffle vectors instruction.
llvm::StructType * semaphore_acquire_t_type
static std::unique_ptr< llvm::Module > compile_trampolines(const Target &target, llvm::LLVMContext &context, const std::string &suffix, const std::vector< std::pair< std::string, ExternSignature >> &externs)
llvm::Value * call_intrin(const Type &t, int intrin_lanes, llvm::Function *intrin, std::vector< Expr >)
llvm::Value * call_intrin(const Type &t, int intrin_lanes, const std::string &name, std::vector< Expr >)
Generate a call to a vector intrinsic or runtime inlined function.
llvm::StructType * parallel_task_t_type
void trigger_destructor(llvm::Function *destructor_fn, llvm::Value *stack_slot)
Call a destructor early.
llvm::Value * get_user_context() const
The user_context argument.
void visit(const Min *) override
void visit(const For *) override
void visit(const Not *) override
void visit(const Sub *) override
void sym_push(const std::string &name, llvm::Value *value)
Add an entry to the symbol table, hiding previous entries with the same name.
llvm::StructType * type_t_type
void add_external_code(const Module &halide_module)
Add external_code entries to llvm module.
DestructorType
Some destructors should always be called.
llvm::Value * codegen(const Expr &)
Emit code that evaluates an expression, and return the llvm representation of the result of the expre...
llvm::Value * codegen_buffer_pointer(llvm::Value *base_address, Type type, Expr index)
Halide::Target target
The target we're generating code for.
virtual void begin_func(LinkageType linkage, const std::string &simple_name, const std::string &extern_name, const std::vector< LoweredArgument > &args)
Helper functions for compiling Halide functions to llvm functions.
llvm::Value * call_intrin(llvm::Type *t, int intrin_lanes, const std::string &name, std::vector< llvm::Value * >)
llvm::StructType * scalar_value_t_type
std::unique_ptr< llvm::Module > module
virtual void prepare_for_early_exit()
If we have to bail out of a pipeline midway, this should inject the appropriate target-specific clean...
void visit(const Max *) override
void codegen(const Stmt &)
Emit code that runs a statement.
virtual void compile_buffer(const Buffer<> &buffer)
llvm::Function * declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector< Type > arg_types, bool scalars_are_vectors=false)
Declare an intrinsic function that participates in overload resolution.
virtual std::string mcpu() const =0
What should be passed as -mcpu, -mattrs, and related for compilation.
virtual bool promote_indices() const
Should indexing math be promoted to 64-bit on platforms with 64-bit pointers?
void visit(const GE *) override
void visit(const Variable *) override
void declare_intrin_overload(const std::string &name, const Type &ret_type, llvm::Function *impl, std::vector< Type > arg_types)
llvm::Value * shuffle_vectors(llvm::Value *v, const std::vector< int > &indices)
Shorthand for shuffling a vector with an undef vector.
void visit(const Atomic *) override
void create_assertion(llvm::Value *condition, const Expr &message, llvm::Value *error_code=nullptr)
Codegen an assertion.
virtual Type upgrade_type_for_storage(const Type &) const
Return the type that a given Halide type should be stored/loaded from memory as.
llvm::Function * get_llvm_intrin(llvm::Type *ret_type, const std::string &name, const std::vector< llvm::Type * > &arg_types)
llvm::Value * sym_get(const std::string &name, bool must_succeed=true) const
Fetch an entry from the symbol table.
void visit(const Shuffle *) override
llvm::BasicBlock * get_destructor_block()
Retrieves the block containing the error handling code.
llvm::MDNode * very_likely_branch
void visit(const Allocate *) override=0
Generate code for an allocate node.
virtual llvm::Value * slice_vector(llvm::Value *vec, int start, int extent)
Take a slice of lanes out of an llvm vector.
void visit(const Prefetch *) override
void visit(const Provide *) override
These IR nodes should have been removed during lowering.
void visit(const Fork *) override
virtual Type upgrade_type_for_arithmetic(const Type &) const
Return the type in which arithmetic should be done for the given storage type.
llvm::FunctionType * signature_to_type(const ExternSignature &signature)
Given a Halide ExternSignature, return the equivalent llvm::FunctionType.
const Target & get_target() const
The target we're generating code for.
void visit(const Div *) override
llvm::StructType * halide_buffer_t_type
llvm::IRBuilder< llvm::ConstantFolder, llvm::IRBuilderDefaultInserter > * builder
void visit(const EQ *) override
virtual int native_vector_bits() const =0
What's the natural vector bit-width to use for loads, stores, etc.
llvm::StructType * dimension_t_type
virtual void compile_func(const LoweredFunc &func, const std::string &simple_name, const std::string &extern_name)
Compile a specific halide declaration into the llvm Module.
llvm::LLVMContext * context
void get_parallel_tasks(const Stmt &s, std::vector< ParallelTask > &tasks, std::pair< std::string, int > prefix)
llvm::Value * register_destructor(llvm::Function *destructor_fn, llvm::Value *obj, DestructorType when)
virtual std::string get_allocation_name(const std::string &n)
Get a unique name for the actual block of memory that an allocate node uses.
virtual bool use_soft_float_abi() const =0
llvm::Value * codegen_buffer_pointer(const std::string &buffer, Type type, llvm::Value *index)
Generate a pointer into a named buffer at a given index, of a given type.
virtual std::string mabi() const
void visit(const Evaluate *) override
std::map< std::string, std::vector< Intrinsic > > intrinsics
Mapping of intrinsic functions to the various overloads implementing it.
virtual std::unique_ptr< llvm::Module > compile(const Module &module)
Takes a halide Module and compiles it to an llvm Module.
void visit(const LE *) override
void visit(const NE *) override
void add_tbaa_metadata(llvm::Instruction *inst, std::string buffer, const Expr &index)
Mark a load or store with type-based-alias-analysis metadata so that llvm knows it can reorder loads ...
void visit(const And *) override
virtual bool use_pic() const
void scalarize(const Expr &)
Codegen a vector Expr by codegenning each lane and combining.
void visit(const StringImm *) override
virtual void codegen_vector_reduce(const VectorReduce *op, const Expr &init)
Compile a horizontal reduction that starts with an explicit initial value.
void codegen_asserts(const std::vector< const AssertStmt * > &asserts)
Codegen a block of asserts with pure conditions.
size_t get_requested_alloca_total() const
llvm::Value * create_alloca_at_entry(llvm::Type *type, int n, bool zero_initialize=false, const std::string &name="")
Perform an alloca at the function entrypoint.
virtual Type upgrade_type_for_argument_passing(const Type &) const
Return the type that a Halide type should be passed in and out of functions as.
llvm::Value * create_broadcast(llvm::Value *, int lanes)
Widen an llvm scalar into an llvm vector with the given number of lanes.
void visit(const GT *) override
llvm::Value * codegen_buffer_pointer(const std::string &buffer, Type type, Expr index)
void visit(const Cast *) override
CodeGen_LLVM(const Target &t)
void visit(const Ramp *) override
void visit(const Broadcast *) override
void visit(const Mod *) override
void visit(const Call *) override
static void initialize_llvm()
Initialize internal llvm state for the enabled targets.
void do_as_parallel_task(const Stmt &s)
std::set< std::string > external_buffer
Which buffers came in from the outside world (and so we can't guarantee their alignment)
void set_context(llvm::LLVMContext &context)
Tell the code generator which LLVM context to use.
void visit(const Store *) override
llvm::Constant * create_string_constant(const std::string &str)
Put a string constant in the module as a global variable and return a pointer to it.
void optimize_module()
Run all of llvm's optimization passes on the module.
llvm::Value * call_intrin(llvm::Type *t, int intrin_lanes, llvm::Function *intrin, std::vector< llvm::Value * >)
llvm::Value * call_overloaded_intrin(const Type &result_type, const std::string &name, const std::vector< Expr > &args)
Call an overloaded intrinsic function.
void visit(const ProducerConsumer *) override
virtual llvm::Type * llvm_type_of(const Type &) const
Get the llvm type equivalent to the given halide type in the current context.
llvm::Value * codegen_buffer_pointer(llvm::Value *base_address, Type type, llvm::Value *index)
void visit(const LT *) override
void visit(const Load *) override
std::pair< llvm::Function *, int > find_vector_runtime_function(const std::string &name, int lanes)
Go looking for a vector version of a runtime function.
virtual llvm::Value * interleave_vectors(const std::vector< llvm::Value * > &)
Implementation of the intrinsic call to interleave_vectors.
bool inside_atomic_mutex_node
Are we inside an atomic node that uses mutex locks? This is used for detecting deadlocks from nested ...
void visit(const FloatImm *) override
void visit(const IntImm *) override
Generate code for various IR nodes.
llvm::MDNode * strict_fp_math_md
virtual void init_module()
Initialize the CodeGen_LLVM internal state to compile a fresh module.
virtual std::string mattrs() const =0
void visit(const Realize *) override
void visit(const VectorReduce *) override
void visit(const IfThenElse *) override
size_t requested_alloca_total
A (very) conservative guess at the size of all alloca() storage requested (including alignment paddin...
void visit(const Acquire *) override
Expr wild_u1x_
Some wildcard variables used for peephole optimizations in subclasses.
llvm::Function * get_llvm_intrin(const Type &ret_type, const std::string &name, const std::vector< Type > &arg_types, bool scalars_are_vectors=false)
Get an LLVM intrinsic declaration.
llvm::StructType * pseudostack_slot_t_type
llvm::MDNode * default_fp_math_md
bool sym_exists(const std::string &name) const
Test if an item exists in the symbol table.
void visit(const Free *) override=0
Generate code for a free node.
void visit(const Add *) override
void visit(const Block *) override
void visit(const UIntImm *) override
void visit(const LetStmt *) override
llvm::Type * void_t
Some useful llvm types.
A base class for algorithms that need to recursively walk over the IR.
virtual void visit(const IntImm *)
A reference-counted handle to a parameter to a halide pipeline.
HALIDE_ALWAYS_INLINE auto intrin(Call::IntrinsicOp intrinsic_op, Args... args) noexcept -> Intrin< decltype(pattern_arg(args))... >
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
LinkageType
Type of linkage a function in a lowered Halide module can have.
@ Internal
Not visible externally, similar to 'static' linkage in C.
std::unique_ptr< llvm::Module > codegen_llvm(const Module &module, llvm::LLVMContext &context)
Given a Halide module, generate an llvm::Module.
A fragment of Halide syntax.
The sum of two expressions.
Allocate a scratch area called with the given name, type, and size.
Logical and - are both expressions true.
If the 'condition' is false, then evaluate and return the message, which should be a call to an error...
Lock all the Store nodes in the body statement.
A sequence of statements to be executed in-order.
A vector with 'lanes' elements, in which every element is 'value'.
The actual IR nodes begin here.
Description of an intrinsic function overload.
Intrinsic(Type result_type, std::vector< Type > arg_types, llvm::Function *impl)
std::vector< Type > arg_types
Codegen a call to do_parallel_tasks.
std::vector< SemAcquire > semaphores
The ratio of two expressions.
Is the first expression equal to the second.
Evaluate and discard an expression, presumably because it has some side-effect.
Floating point constants.
A pair of statements executed concurrently.
Free the resources associated with the given buffer.
Is the first expression greater than or equal to the second.
Is the first expression greater than the second.
Is the first expression less than or equal to the second.
Is the first expression less than the second.
A let expression, like you might find in a functional language.
The statement form of a let node.
Load a value from a named symbol if predicate is true.
Definition of a lowered function.
The greater of two values.
The lesser of two values.
The result of modulus_remainder analysis.
The product of two expressions.
Is the first expression not equal to the second.
Logical not - true if the expression false.
Logical or - is at least one of the expression true.
Represent a multi-dimensional region of a Func or an ImageParam that needs to be prefetched.
This node is a helpful annotation to do with permissions.
This defines the value of a function at a multi-dimensional location.
A linear ramp vector node.
Allocate a multi-dimensional buffer of the given type and size.
Construct a new vector by taking elements from another sequence of vectors.
A reference-counted handle to a statement node.
Store a 'value' to the buffer called 'name' at a given 'index' if 'predicate' is true.
The difference of two expressions.
Unsigned integer constants.
Horizontally reduce a vector to a scalar or narrower vector using the given commutative and associati...
A struct representing a target machine and os to generate code for.
Types in the halide type system.