Halide  12.0.1
Halide compiler and libraries
CodeGen_LLVM.h
Go to the documentation of this file.
1 #ifndef HALIDE_CODEGEN_LLVM_H
2 #define HALIDE_CODEGEN_LLVM_H
3 
4 /** \file
5  *
6  * Defines the base-class for all architecture-specific code
7  * generators that use llvm.
8  */
9 
10 namespace llvm {
11 class Value;
12 class Module;
13 class Function;
14 class FunctionType;
15 class IRBuilderDefaultInserter;
16 class ConstantFolder;
17 template<typename, typename>
18 class IRBuilder;
19 class LLVMContext;
20 class Type;
21 class StructType;
22 class Instruction;
23 class CallInst;
24 class ExecutionEngine;
25 class AllocaInst;
26 class Constant;
27 class Triple;
28 class MDNode;
29 class NamedMDNode;
30 class DataLayout;
31 class BasicBlock;
32 class GlobalVariable;
33 } // namespace llvm
34 
35 #include <map>
36 #include <memory>
37 #include <string>
38 #include <vector>
39 
40 #include "IRVisitor.h"
41 #include "Module.h"
42 #include "Scope.h"
43 #include "Target.h"
44 
45 namespace Halide {
46 
47 struct ExternSignature;
48 
49 namespace Internal {
50 
51 /** A code generator abstract base class. Actual code generators
52  * (e.g. CodeGen_X86) inherit from this. This class is responsible
53  * for taking a Halide Stmt and producing llvm bitcode, machine
54  * code in an object file, or machine code accessible through a
55  * function pointer.
56  */
57 class CodeGen_LLVM : public IRVisitor {
58 public:
59  /** Create an instance of CodeGen_LLVM suitable for the target. */
60  static std::unique_ptr<CodeGen_LLVM> new_for_target(const Target &target, llvm::LLVMContext &context);
61 
62  ~CodeGen_LLVM() override;
63 
64  /** Takes a halide Module and compiles it to an llvm Module. */
65  virtual std::unique_ptr<llvm::Module> compile(const Module &module);
66 
67  /** The target we're generating code for */
68  const Target &get_target() const {
69  return target;
70  }
71 
72  /** Tell the code generator which LLVM context to use. */
73  void set_context(llvm::LLVMContext &context);
74 
75  /** Initialize internal llvm state for the enabled targets. */
76  static void initialize_llvm();
77 
78  static std::unique_ptr<llvm::Module> compile_trampolines(
79  const Target &target,
80  llvm::LLVMContext &context,
81  const std::string &suffix,
82  const std::vector<std::pair<std::string, ExternSignature>> &externs);
83 
84  size_t get_requested_alloca_total() const {
86  }
87 
88 protected:
89  CodeGen_LLVM(const Target &t);
90 
91  /** Compile a specific halide declaration into the llvm Module. */
92  // @{
93  virtual void compile_func(const LoweredFunc &func, const std::string &simple_name, const std::string &extern_name);
94  virtual void compile_buffer(const Buffer<> &buffer);
95  // @}
96 
97  /** Helper functions for compiling Halide functions to llvm
98  * functions. begin_func performs all the work necessary to begin
99  * generating code for a function with a given argument list with
100  * the IRBuilder. A call to begin_func should be a followed by a
101  * call to end_func with the same arguments, to generate the
102  * appropriate cleanup code. */
103  // @{
104  virtual void begin_func(LinkageType linkage, const std::string &simple_name,
105  const std::string &extern_name, const std::vector<LoweredArgument> &args);
106  virtual void end_func(const std::vector<LoweredArgument> &args);
107  // @}
108 
109  /** What should be passed as -mcpu, -mattrs, and related for
110  * compilation. The architecture-specific code generator should
111  * define these. */
112  // @{
113  virtual std::string mcpu() const = 0;
114  virtual std::string mattrs() const = 0;
115  virtual std::string mabi() const;
116  virtual bool use_soft_float_abi() const = 0;
117  virtual bool use_pic() const;
118  // @}
119 
120  /** Should indexing math be promoted to 64-bit on platforms with
121  * 64-bit pointers? */
122  virtual bool promote_indices() const {
123  return true;
124  }
125 
126  /** What's the natural vector bit-width to use for loads, stores, etc. */
127  virtual int native_vector_bits() const = 0;
128 
129  /** Return the type in which arithmetic should be done for the
130  * given storage type. */
131  virtual Type upgrade_type_for_arithmetic(const Type &) const;
132 
133  /** Return the type that a given Halide type should be
134  * stored/loaded from memory as. */
135  virtual Type upgrade_type_for_storage(const Type &) const;
136 
137  /** Return the type that a Halide type should be passed in and out
138  * of functions as. */
140 
141  std::unique_ptr<llvm::Module> module;
142  llvm::Function *function;
143  llvm::LLVMContext *context;
145  llvm::Value *value;
146  llvm::MDNode *very_likely_branch;
147  llvm::MDNode *default_fp_math_md;
148  llvm::MDNode *strict_fp_math_md;
149  std::vector<LoweredArgument> current_function_args;
150  //@}
151 
152  /** The target we're generating code for */
154 
155  /** Grab all the context specific internal state. */
156  virtual void init_context();
157  /** Initialize the CodeGen_LLVM internal state to compile a fresh
158  * module. This allows reuse of one CodeGen_LLVM object to compiled
159  * multiple related modules (e.g. multiple device kernels). */
160  virtual void init_module();
161 
162  /** Add external_code entries to llvm module. */
163  void add_external_code(const Module &halide_module);
164 
165  /** Run all of llvm's optimization passes on the module. */
167 
168  /** Add an entry to the symbol table, hiding previous entries with
169  * the same name. Call this when new values come into scope. */
170  void sym_push(const std::string &name, llvm::Value *value);
171 
172  /** Remove an entry for the symbol table, revealing any previous
173  * entries with the same name. Call this when values go out of
174  * scope. */
175  void sym_pop(const std::string &name);
176 
177  /** Fetch an entry from the symbol table. If the symbol is not
178  * found, it either errors out (if the second arg is true), or
179  * returns nullptr. */
180  llvm::Value *sym_get(const std::string &name,
181  bool must_succeed = true) const;
182 
183  /** Test if an item exists in the symbol table. */
184  bool sym_exists(const std::string &name) const;
185 
186  /** Given a Halide ExternSignature, return the equivalent llvm::FunctionType. */
187  llvm::FunctionType *signature_to_type(const ExternSignature &signature);
188 
189  /** Some useful llvm types */
190  // @{
191  llvm::Type *void_t, *i1_t, *i8_t, *i16_t, *i32_t, *i64_t, *f16_t, *f32_t, *f64_t;
192  llvm::StructType *halide_buffer_t_type,
203 
204  // @}
205 
206  /** Some wildcard variables used for peephole optimizations in
207  * subclasses */
208  // @{
212 
213  // Wildcards for scalars.
217  // @}
218 
219  /** Emit code that evaluates an expression, and return the llvm
220  * representation of the result of the expression. */
221  llvm::Value *codegen(const Expr &);
222 
223  /** Emit code that runs a statement. */
224  void codegen(const Stmt &);
225 
226  /** Codegen a vector Expr by codegenning each lane and combining. */
227  void scalarize(const Expr &);
228 
229  /** Some destructors should always be called. Others should only
230  * be called if the pipeline is exiting with an error code. */
234 
235  /* Call this at the location of object creation to register how an
236  * object should be destroyed. This does three things:
237  * 1) Emits code here that puts the object in a unique
238  * null-initialized stack slot
239  * 2) Adds an instruction to the destructor block that calls the
240  * destructor on that stack slot if it's not null.
241  * 3) Returns that stack slot, so you can neuter the destructor
242  * (by storing null to the stack slot) or destroy the object early
243  * (by calling trigger_destructor).
244  */
245  llvm::Value *register_destructor(llvm::Function *destructor_fn, llvm::Value *obj, DestructorType when);
246 
247  /** Call a destructor early. Pass in the value returned by register destructor. */
248  void trigger_destructor(llvm::Function *destructor_fn, llvm::Value *stack_slot);
249 
250  /** Retrieves the block containing the error handling
251  * code. Creates it if it doesn't already exist for this
252  * function. */
253  llvm::BasicBlock *get_destructor_block();
254 
255  /** Codegen an assertion. If false, returns the error code (if not
256  * null), or evaluates and returns the message, which must be an
257  * Int(32) expression. */
258  // @{
259  void create_assertion(llvm::Value *condition, const Expr &message, llvm::Value *error_code = nullptr);
260  // @}
261 
262  /** Codegen a block of asserts with pure conditions */
263  void codegen_asserts(const std::vector<const AssertStmt *> &asserts);
264 
265  /** Codegen a call to do_parallel_tasks */
266  struct ParallelTask {
268  struct SemAcquire {
271  };
272  std::vector<SemAcquire> semaphores;
273  std::string loop_var;
276  std::string name;
277  };
279  void get_parallel_tasks(const Stmt &s, std::vector<ParallelTask> &tasks, std::pair<std::string, int> prefix);
280  void do_parallel_tasks(const std::vector<ParallelTask> &tasks);
281  void do_as_parallel_task(const Stmt &s);
282 
283  /** Return the the pipeline with the given error code. Will run
284  * the destructor block. */
285  void return_with_error_code(llvm::Value *error_code);
286 
287  /** Put a string constant in the module as a global variable and return a pointer to it. */
288  llvm::Constant *create_string_constant(const std::string &str);
289 
290  /** Put a binary blob in the module as a global variable and return a pointer to it. */
291  llvm::Constant *create_binary_blob(const std::vector<char> &data, const std::string &name, bool constant = true);
292 
293  /** Widen an llvm scalar into an llvm vector with the given number of lanes. */
294  llvm::Value *create_broadcast(llvm::Value *, int lanes);
295 
296  /** Generate a pointer into a named buffer at a given index, of a
297  * given type. The index counts according to the scalar type of
298  * the type passed in. */
299  // @{
300  llvm::Value *codegen_buffer_pointer(const std::string &buffer, Type type, llvm::Value *index);
301  llvm::Value *codegen_buffer_pointer(const std::string &buffer, Type type, Expr index);
302  llvm::Value *codegen_buffer_pointer(llvm::Value *base_address, Type type, Expr index);
303  llvm::Value *codegen_buffer_pointer(llvm::Value *base_address, Type type, llvm::Value *index);
304  // @}
305 
306  /** Turn a Halide Type into an llvm::Value representing a constant halide_type_t */
307  llvm::Value *make_halide_type_t(const Type &);
308 
309  /** Mark a load or store with type-based-alias-analysis metadata
310  * so that llvm knows it can reorder loads and stores across
311  * different buffers */
312  void add_tbaa_metadata(llvm::Instruction *inst, std::string buffer, const Expr &index);
313 
314  /** Get a unique name for the actual block of memory that an
315  * allocate node uses. Used so that alias analysis understands
316  * when multiple Allocate nodes shared the same memory. */
317  virtual std::string get_allocation_name(const std::string &n) {
318  return n;
319  }
320 
321  using IRVisitor::visit;
322 
323  /** Generate code for various IR nodes. These can be overridden by
324  * architecture-specific code to perform peephole
325  * optimizations. The result of each is stored in \ref value */
326  // @{
327  void visit(const IntImm *) override;
328  void visit(const UIntImm *) override;
329  void visit(const FloatImm *) override;
330  void visit(const StringImm *) override;
331  void visit(const Cast *) override;
332  void visit(const Variable *) override;
333  void visit(const Add *) override;
334  void visit(const Sub *) override;
335  void visit(const Mul *) override;
336  void visit(const Div *) override;
337  void visit(const Mod *) override;
338  void visit(const Min *) override;
339  void visit(const Max *) override;
340  void visit(const EQ *) override;
341  void visit(const NE *) override;
342  void visit(const LT *) override;
343  void visit(const LE *) override;
344  void visit(const GT *) override;
345  void visit(const GE *) override;
346  void visit(const And *) override;
347  void visit(const Or *) override;
348  void visit(const Not *) override;
349  void visit(const Select *) override;
350  void visit(const Load *) override;
351  void visit(const Ramp *) override;
352  void visit(const Broadcast *) override;
353  void visit(const Call *) override;
354  void visit(const Let *) override;
355  void visit(const LetStmt *) override;
356  void visit(const AssertStmt *) override;
357  void visit(const ProducerConsumer *) override;
358  void visit(const For *) override;
359  void visit(const Acquire *) override;
360  void visit(const Store *) override;
361  void visit(const Block *) override;
362  void visit(const Fork *) override;
363  void visit(const IfThenElse *) override;
364  void visit(const Evaluate *) override;
365  void visit(const Shuffle *) override;
366  void visit(const VectorReduce *) override;
367  void visit(const Prefetch *) override;
368  void visit(const Atomic *) override;
369  // @}
370 
371  /** Generate code for an allocate node. It has no default
372  * implementation - it must be handled in an architecture-specific
373  * way. */
374  void visit(const Allocate *) override = 0;
375 
376  /** Generate code for a free node. It has no default
377  * implementation and must be handled in an architecture-specific
378  * way. */
379  void visit(const Free *) override = 0;
380 
381  /** These IR nodes should have been removed during
382  * lowering. CodeGen_LLVM will error out if they are present */
383  // @{
384  void visit(const Provide *) override;
385  void visit(const Realize *) override;
386  // @}
387 
388  /** If we have to bail out of a pipeline midway, this should
389  * inject the appropriate target-specific cleanup code. */
390  virtual void prepare_for_early_exit() {
391  }
392 
393  /** Get the llvm type equivalent to the given halide type in the
394  * current context. */
395  virtual llvm::Type *llvm_type_of(const Type &) const;
396 
397  /** Perform an alloca at the function entrypoint. Will be cleaned
398  * on function exit. */
399  llvm::Value *create_alloca_at_entry(llvm::Type *type, int n,
400  bool zero_initialize = false,
401  const std::string &name = "");
402 
403  /** A (very) conservative guess at the size of all alloca() storage requested
404  * (including alignment padding). It's currently meant only to be used as
405  * a very coarse way to ensure there is enough stack space when testing
406  * on the WebAssembly backend.
407  *
408  * It is *not* meant to be a useful proxy for "stack space needed", for a
409  * number of reasons:
410  * - allocas with non-overlapping lifetimes will share space
411  * - on some backends, LLVM may promote register-sized allocas into registers
412  * - while this accounts for alloca() calls we know about, it doesn't attempt
413  * to account for stack spills, function call overhead, etc.
414  */
416 
417  /** Which buffers came in from the outside world (and so we can't
418  * guarantee their alignment) */
419  std::set<std::string> external_buffer;
420 
421  /** The user_context argument. May be a constant null if the
422  * function is being compiled without a user context. */
423  llvm::Value *get_user_context() const;
424 
425  /** Implementation of the intrinsic call to
426  * interleave_vectors. This implementation allows for interleaving
427  * an arbitrary number of vectors.*/
428  virtual llvm::Value *interleave_vectors(const std::vector<llvm::Value *> &);
429 
430  /** Description of an intrinsic function overload. Overloads are resolved
431  * using both argument and return types. The scalar types of the arguments
432  * and return type must match exactly for an overload resolution to succeed. */
433  struct Intrinsic {
435  std::vector<Type> arg_types;
436  llvm::Function *impl;
437 
438  Intrinsic(Type result_type, std::vector<Type> arg_types, llvm::Function *impl)
439  : result_type(result_type), arg_types(std::move(arg_types)), impl(impl) {
440  }
441  };
442  /** Mapping of intrinsic functions to the various overloads implementing it. */
443  std::map<std::string, std::vector<Intrinsic>> intrinsics;
444 
445  /** Get an LLVM intrinsic declaration. If it doesn't exist, it will be created. */
446  llvm::Function *get_llvm_intrin(const Type &ret_type, const std::string &name, const std::vector<Type> &arg_types, bool scalars_are_vectors = false);
447  llvm::Function *get_llvm_intrin(llvm::Type *ret_type, const std::string &name, const std::vector<llvm::Type *> &arg_types);
448  /** Declare an intrinsic function that participates in overload resolution. */
449  llvm::Function *declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector<Type> arg_types, bool scalars_are_vectors = false);
450  void declare_intrin_overload(const std::string &name, const Type &ret_type, llvm::Function *impl, std::vector<Type> arg_types);
451  /** Call an overloaded intrinsic function. Returns nullptr if no suitable overload is found. */
452  llvm::Value *call_overloaded_intrin(const Type &result_type, const std::string &name, const std::vector<Expr> &args);
453 
454  /** Generate a call to a vector intrinsic or runtime inlined
455  * function. The arguments are sliced up into vectors of the width
456  * given by 'intrin_lanes', the intrinsic is called on each
457  * piece, then the results (if any) are concatenated back together
458  * into the original type 't'. For the version that takes an
459  * llvm::Type *, the type may be void, so the vector width of the
460  * arguments must be specified explicitly as
461  * 'called_lanes'. */
462  // @{
463  llvm::Value *call_intrin(const Type &t, int intrin_lanes,
464  const std::string &name, std::vector<Expr>);
465  llvm::Value *call_intrin(const Type &t, int intrin_lanes,
466  llvm::Function *intrin, std::vector<Expr>);
467  llvm::Value *call_intrin(llvm::Type *t, int intrin_lanes,
468  const std::string &name, std::vector<llvm::Value *>);
469  llvm::Value *call_intrin(llvm::Type *t, int intrin_lanes,
470  llvm::Function *intrin, std::vector<llvm::Value *>);
471  // @}
472 
473  /** Take a slice of lanes out of an llvm vector. Pads with undefs
474  * if you ask for more lanes than the vector has. */
475  virtual llvm::Value *slice_vector(llvm::Value *vec, int start, int extent);
476 
477  /** Concatenate a bunch of llvm vectors. Must be of the same type. */
478  virtual llvm::Value *concat_vectors(const std::vector<llvm::Value *> &);
479 
480  /** Create an LLVM shuffle vectors instruction. */
481  virtual llvm::Value *shuffle_vectors(llvm::Value *a, llvm::Value *b,
482  const std::vector<int> &indices);
483  /** Shorthand for shuffling a vector with an undef vector. */
484  llvm::Value *shuffle_vectors(llvm::Value *v, const std::vector<int> &indices);
485 
486  /** Go looking for a vector version of a runtime function. Will
487  * return the best match. Matches in the following order:
488  *
489  * 1) The requested vector width.
490  *
491  * 2) The width which is the smallest power of two
492  * greater than or equal to the vector width.
493  *
494  * 3) All the factors of 2) greater than one, in decreasing order.
495  *
496  * 4) The smallest power of two not yet tried.
497  *
498  * So for a 5-wide vector, it tries: 5, 8, 4, 2, 16.
499  *
500  * If there's no match, returns (nullptr, 0).
501  */
502  std::pair<llvm::Function *, int> find_vector_runtime_function(const std::string &name, int lanes);
503 
504  virtual bool supports_atomic_add(const Type &t) const;
505 
506  /** Compile a horizontal reduction that starts with an explicit
507  * initial value. There are lots of complex ways to peephole
508  * optimize this pattern, especially with the proliferation of
509  * dot-product instructions, and they can usefully share logic
510  * across backends. */
511  virtual void codegen_vector_reduce(const VectorReduce *op, const Expr &init);
512 
513  /** Are we inside an atomic node that uses mutex locks?
514  This is used for detecting deadlocks from nested atomics & illegal vectorization. */
516 
517  /** Emit atomic store instructions? */
519 
520 private:
521  /** All the values in scope at the current code location during
522  * codegen. Use sym_push and sym_pop to access. */
523  Scope<llvm::Value *> symbol_table;
524 
525  /** String constants already emitted to the module. Tracked to
526  * prevent emitting the same string many times. */
527  std::map<std::string, llvm::Constant *> string_constants;
528 
529  /** A basic block to branch to on error that triggers all
530  * destructors. As destructors are registered, code gets added
531  * to this block. */
532  llvm::BasicBlock *destructor_block;
533 
534  /** Turn off all unsafe math flags in scopes while this is set. */
535  bool strict_float;
536 
537  /** Use the LLVM large code model when this is set. */
538  bool llvm_large_code_model;
539 
540  /** Embed an instance of halide_filter_metadata_t in the code, using
541  * the given name (by convention, this should be ${FUNCTIONNAME}_metadata)
542  * as extern "C" linkage. Note that the return value is a function-returning-
543  * pointer-to-constant-data.
544  */
545  llvm::Function *embed_metadata_getter(const std::string &metadata_getter_name,
546  const std::string &function_name, const std::vector<LoweredArgument> &args,
547  const std::map<std::string, std::string> &metadata_name_map);
548 
549  /** Embed a constant expression as a global variable. */
550  llvm::Constant *embed_constant_expr(Expr e, llvm::Type *t);
551  llvm::Constant *embed_constant_scalar_value_t(const Expr &e);
552 
553  llvm::Function *add_argv_wrapper(llvm::Function *fn, const std::string &name, bool result_in_argv = false);
554 
555  llvm::Value *codegen_dense_vector_load(const Type &type, const std::string &name, const Expr &base,
556  const Buffer<> &image, const Parameter &param, const ModulusRemainder &alignment,
557  llvm::Value *vpred = nullptr, bool slice_to_native = true);
558  llvm::Value *codegen_dense_vector_load(const Load *load, llvm::Value *vpred = nullptr, bool slice_to_native = true);
559 
560  virtual void codegen_predicated_vector_load(const Load *op);
561  virtual void codegen_predicated_vector_store(const Store *op);
562 
563  void codegen_atomic_rmw(const Store *op);
564 
565  void init_codegen(const std::string &name, bool any_strict_float = false);
566  std::unique_ptr<llvm::Module> finish_codegen();
567 
568  /** A helper routine for generating folded vector reductions. */
569  template<typename Op>
570  bool try_to_fold_vector_reduce(const Expr &a, Expr b);
571 };
572 
573 } // namespace Internal
574 
575 /** Given a Halide module, generate an llvm::Module. */
576 std::unique_ptr<llvm::Module> codegen_llvm(const Module &module,
577  llvm::LLVMContext &context);
578 
579 } // namespace Halide
580 
581 #endif
Defines the base class for things that recursively walk over the IR.
Defines Module, an IR container that fully describes a Halide program.
Defines the Scope class, which is used for keeping track of names in a scope while traversing IR.
Defines the structure that describes a Halide target.
A Halide::Buffer is a named shared reference to a Halide::Runtime::Buffer.
Definition: Buffer.h:115
A code generator abstract base class.
Definition: CodeGen_LLVM.h:57
void visit(const Let *) override
static std::unique_ptr< CodeGen_LLVM > new_for_target(const Target &target, llvm::LLVMContext &context)
Create an instance of CodeGen_LLVM suitable for the target.
llvm::StructType * metadata_t_type
Definition: CodeGen_LLVM.h:195
virtual void end_func(const std::vector< LoweredArgument > &args)
llvm::StructType * argument_t_type
Definition: CodeGen_LLVM.h:196
void visit(const Select *) override
void visit(const Or *) override
void visit(const AssertStmt *) override
bool emit_atomic_stores
Emit atomic store instructions?
Definition: CodeGen_LLVM.h:518
void do_parallel_tasks(const std::vector< ParallelTask > &tasks)
void sym_pop(const std::string &name)
Remove an entry for the symbol table, revealing any previous entries with the same name.
virtual void init_context()
Grab all the context specific internal state.
virtual llvm::Value * concat_vectors(const std::vector< llvm::Value * > &)
Concatenate a bunch of llvm vectors.
llvm::StructType * device_interface_t_type
Definition: CodeGen_LLVM.h:198
void visit(const Mul *) override
llvm::StructType * semaphore_t_type
Definition: CodeGen_LLVM.h:200
llvm::Value * make_halide_type_t(const Type &)
Turn a Halide Type into an llvm::Value representing a constant halide_type_t.
virtual bool supports_atomic_add(const Type &t) const
llvm::Constant * create_binary_blob(const std::vector< char > &data, const std::string &name, bool constant=true)
Put a binary blob in the module as a global variable and return a pointer to it.
std::vector< LoweredArgument > current_function_args
Definition: CodeGen_LLVM.h:149
void return_with_error_code(llvm::Value *error_code)
Return the the pipeline with the given error code.
virtual llvm::Value * shuffle_vectors(llvm::Value *a, llvm::Value *b, const std::vector< int > &indices)
Create an LLVM shuffle vectors instruction.
llvm::StructType * semaphore_acquire_t_type
Definition: CodeGen_LLVM.h:201
static std::unique_ptr< llvm::Module > compile_trampolines(const Target &target, llvm::LLVMContext &context, const std::string &suffix, const std::vector< std::pair< std::string, ExternSignature >> &externs)
llvm::Value * call_intrin(const Type &t, int intrin_lanes, llvm::Function *intrin, std::vector< Expr >)
llvm::Value * call_intrin(const Type &t, int intrin_lanes, const std::string &name, std::vector< Expr >)
Generate a call to a vector intrinsic or runtime inlined function.
llvm::StructType * parallel_task_t_type
Definition: CodeGen_LLVM.h:202
void trigger_destructor(llvm::Function *destructor_fn, llvm::Value *stack_slot)
Call a destructor early.
llvm::Value * get_user_context() const
The user_context argument.
void visit(const Min *) override
void visit(const For *) override
void visit(const Not *) override
void visit(const Sub *) override
void sym_push(const std::string &name, llvm::Value *value)
Add an entry to the symbol table, hiding previous entries with the same name.
llvm::StructType * type_t_type
Definition: CodeGen_LLVM.h:193
void add_external_code(const Module &halide_module)
Add external_code entries to llvm module.
DestructorType
Some destructors should always be called.
Definition: CodeGen_LLVM.h:231
llvm::Value * codegen(const Expr &)
Emit code that evaluates an expression, and return the llvm representation of the result of the expre...
llvm::Value * codegen_buffer_pointer(llvm::Value *base_address, Type type, Expr index)
Halide::Target target
The target we're generating code for.
Definition: CodeGen_LLVM.h:153
virtual void begin_func(LinkageType linkage, const std::string &simple_name, const std::string &extern_name, const std::vector< LoweredArgument > &args)
Helper functions for compiling Halide functions to llvm functions.
llvm::Value * call_intrin(llvm::Type *t, int intrin_lanes, const std::string &name, std::vector< llvm::Value * >)
llvm::StructType * scalar_value_t_type
Definition: CodeGen_LLVM.h:197
std::unique_ptr< llvm::Module > module
Definition: CodeGen_LLVM.h:141
virtual void prepare_for_early_exit()
If we have to bail out of a pipeline midway, this should inject the appropriate target-specific clean...
Definition: CodeGen_LLVM.h:390
void visit(const Max *) override
void codegen(const Stmt &)
Emit code that runs a statement.
virtual void compile_buffer(const Buffer<> &buffer)
llvm::Function * declare_intrin_overload(const std::string &name, const Type &ret_type, const std::string &impl_name, std::vector< Type > arg_types, bool scalars_are_vectors=false)
Declare an intrinsic function that participates in overload resolution.
virtual std::string mcpu() const =0
What should be passed as -mcpu, -mattrs, and related for compilation.
virtual bool promote_indices() const
Should indexing math be promoted to 64-bit on platforms with 64-bit pointers?
Definition: CodeGen_LLVM.h:122
void visit(const GE *) override
void visit(const Variable *) override
void declare_intrin_overload(const std::string &name, const Type &ret_type, llvm::Function *impl, std::vector< Type > arg_types)
llvm::Value * shuffle_vectors(llvm::Value *v, const std::vector< int > &indices)
Shorthand for shuffling a vector with an undef vector.
void visit(const Atomic *) override
void create_assertion(llvm::Value *condition, const Expr &message, llvm::Value *error_code=nullptr)
Codegen an assertion.
virtual Type upgrade_type_for_storage(const Type &) const
Return the type that a given Halide type should be stored/loaded from memory as.
llvm::Function * get_llvm_intrin(llvm::Type *ret_type, const std::string &name, const std::vector< llvm::Type * > &arg_types)
llvm::Value * sym_get(const std::string &name, bool must_succeed=true) const
Fetch an entry from the symbol table.
void visit(const Shuffle *) override
llvm::BasicBlock * get_destructor_block()
Retrieves the block containing the error handling code.
void visit(const Allocate *) override=0
Generate code for an allocate node.
virtual llvm::Value * slice_vector(llvm::Value *vec, int start, int extent)
Take a slice of lanes out of an llvm vector.
void visit(const Prefetch *) override
void visit(const Provide *) override
These IR nodes should have been removed during lowering.
void visit(const Fork *) override
virtual Type upgrade_type_for_arithmetic(const Type &) const
Return the type in which arithmetic should be done for the given storage type.
llvm::FunctionType * signature_to_type(const ExternSignature &signature)
Given a Halide ExternSignature, return the equivalent llvm::FunctionType.
const Target & get_target() const
The target we're generating code for.
Definition: CodeGen_LLVM.h:68
void visit(const Div *) override
llvm::StructType * halide_buffer_t_type
Definition: CodeGen_LLVM.h:192
llvm::IRBuilder< llvm::ConstantFolder, llvm::IRBuilderDefaultInserter > * builder
Definition: CodeGen_LLVM.h:144
void visit(const EQ *) override
virtual int native_vector_bits() const =0
What's the natural vector bit-width to use for loads, stores, etc.
llvm::StructType * dimension_t_type
Definition: CodeGen_LLVM.h:194
virtual void compile_func(const LoweredFunc &func, const std::string &simple_name, const std::string &extern_name)
Compile a specific halide declaration into the llvm Module.
llvm::LLVMContext * context
Definition: CodeGen_LLVM.h:143
void get_parallel_tasks(const Stmt &s, std::vector< ParallelTask > &tasks, std::pair< std::string, int > prefix)
llvm::Value * register_destructor(llvm::Function *destructor_fn, llvm::Value *obj, DestructorType when)
virtual std::string get_allocation_name(const std::string &n)
Get a unique name for the actual block of memory that an allocate node uses.
Definition: CodeGen_LLVM.h:317
virtual bool use_soft_float_abi() const =0
llvm::Value * codegen_buffer_pointer(const std::string &buffer, Type type, llvm::Value *index)
Generate a pointer into a named buffer at a given index, of a given type.
virtual std::string mabi() const
void visit(const Evaluate *) override
std::map< std::string, std::vector< Intrinsic > > intrinsics
Mapping of intrinsic functions to the various overloads implementing it.
Definition: CodeGen_LLVM.h:443
virtual std::unique_ptr< llvm::Module > compile(const Module &module)
Takes a halide Module and compiles it to an llvm Module.
void visit(const LE *) override
void visit(const NE *) override
void add_tbaa_metadata(llvm::Instruction *inst, std::string buffer, const Expr &index)
Mark a load or store with type-based-alias-analysis metadata so that llvm knows it can reorder loads ...
void visit(const And *) override
virtual bool use_pic() const
void scalarize(const Expr &)
Codegen a vector Expr by codegenning each lane and combining.
void visit(const StringImm *) override
virtual void codegen_vector_reduce(const VectorReduce *op, const Expr &init)
Compile a horizontal reduction that starts with an explicit initial value.
void codegen_asserts(const std::vector< const AssertStmt * > &asserts)
Codegen a block of asserts with pure conditions.
size_t get_requested_alloca_total() const
Definition: CodeGen_LLVM.h:84
llvm::Value * create_alloca_at_entry(llvm::Type *type, int n, bool zero_initialize=false, const std::string &name="")
Perform an alloca at the function entrypoint.
virtual Type upgrade_type_for_argument_passing(const Type &) const
Return the type that a Halide type should be passed in and out of functions as.
llvm::Value * create_broadcast(llvm::Value *, int lanes)
Widen an llvm scalar into an llvm vector with the given number of lanes.
void visit(const GT *) override
llvm::Value * codegen_buffer_pointer(const std::string &buffer, Type type, Expr index)
void visit(const Cast *) override
void visit(const Ramp *) override
void visit(const Broadcast *) override
void visit(const Mod *) override
void visit(const Call *) override
static void initialize_llvm()
Initialize internal llvm state for the enabled targets.
void do_as_parallel_task(const Stmt &s)
std::set< std::string > external_buffer
Which buffers came in from the outside world (and so we can't guarantee their alignment)
Definition: CodeGen_LLVM.h:419
void set_context(llvm::LLVMContext &context)
Tell the code generator which LLVM context to use.
void visit(const Store *) override
llvm::Constant * create_string_constant(const std::string &str)
Put a string constant in the module as a global variable and return a pointer to it.
void optimize_module()
Run all of llvm's optimization passes on the module.
llvm::Value * call_intrin(llvm::Type *t, int intrin_lanes, llvm::Function *intrin, std::vector< llvm::Value * >)
llvm::Value * call_overloaded_intrin(const Type &result_type, const std::string &name, const std::vector< Expr > &args)
Call an overloaded intrinsic function.
void visit(const ProducerConsumer *) override
virtual llvm::Type * llvm_type_of(const Type &) const
Get the llvm type equivalent to the given halide type in the current context.
llvm::Value * codegen_buffer_pointer(llvm::Value *base_address, Type type, llvm::Value *index)
void visit(const LT *) override
void visit(const Load *) override
std::pair< llvm::Function *, int > find_vector_runtime_function(const std::string &name, int lanes)
Go looking for a vector version of a runtime function.
virtual llvm::Value * interleave_vectors(const std::vector< llvm::Value * > &)
Implementation of the intrinsic call to interleave_vectors.
bool inside_atomic_mutex_node
Are we inside an atomic node that uses mutex locks? This is used for detecting deadlocks from nested ...
Definition: CodeGen_LLVM.h:515
void visit(const FloatImm *) override
void visit(const IntImm *) override
Generate code for various IR nodes.
virtual void init_module()
Initialize the CodeGen_LLVM internal state to compile a fresh module.
virtual std::string mattrs() const =0
void visit(const Realize *) override
void visit(const VectorReduce *) override
void visit(const IfThenElse *) override
size_t requested_alloca_total
A (very) conservative guess at the size of all alloca() storage requested (including alignment paddin...
Definition: CodeGen_LLVM.h:415
void visit(const Acquire *) override
Expr wild_u1x_
Some wildcard variables used for peephole optimizations in subclasses.
Definition: CodeGen_LLVM.h:209
llvm::Function * get_llvm_intrin(const Type &ret_type, const std::string &name, const std::vector< Type > &arg_types, bool scalars_are_vectors=false)
Get an LLVM intrinsic declaration.
llvm::StructType * pseudostack_slot_t_type
Definition: CodeGen_LLVM.h:199
bool sym_exists(const std::string &name) const
Test if an item exists in the symbol table.
void visit(const Free *) override=0
Generate code for a free node.
void visit(const Add *) override
void visit(const Block *) override
void visit(const UIntImm *) override
void visit(const LetStmt *) override
llvm::Type * void_t
Some useful llvm types.
Definition: CodeGen_LLVM.h:191
A base class for algorithms that need to recursively walk over the IR.
Definition: IRVisitor.h:19
virtual void visit(const IntImm *)
A reference-counted handle to a parameter to a halide pipeline.
Definition: Parameter.h:29
A halide module.
Definition: Module.h:135
HALIDE_ALWAYS_INLINE auto intrin(Call::IntrinsicOp intrinsic_op, Args... args) noexcept -> Intrin< decltype(pattern_arg(args))... >
Definition: IRMatch.h:1503
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
LinkageType
Type of linkage a function in a lowered Halide module can have.
Definition: Module.h:48
@ Internal
Not visible externally, similar to 'static' linkage in C.
std::unique_ptr< llvm::Module > codegen_llvm(const Module &module, llvm::LLVMContext &context)
Given a Halide module, generate an llvm::Module.
A fragment of Halide syntax.
Definition: Expr.h:256
The sum of two expressions.
Definition: IR.h:38
Allocate a scratch area called with the given name, type, and size.
Definition: IR.h:352
Logical and - are both expressions true.
Definition: IR.h:157
If the 'condition' is false, then evaluate and return the message, which should be a call to an error...
Definition: IR.h:276
Lock all the Store nodes in the body statement.
Definition: IR.h:853
A sequence of statements to be executed in-order.
Definition: IR.h:417
A vector with 'lanes' elements, in which every element is 'value'.
Definition: IR.h:241
A function call.
Definition: IR.h:464
The actual IR nodes begin here.
Definition: IR.h:29
Description of an intrinsic function overload.
Definition: CodeGen_LLVM.h:433
Intrinsic(Type result_type, std::vector< Type > arg_types, llvm::Function *impl)
Definition: CodeGen_LLVM.h:438
Codegen a call to do_parallel_tasks.
Definition: CodeGen_LLVM.h:266
The ratio of two expressions.
Definition: IR.h:65
Is the first expression equal to the second.
Definition: IR.h:103
Evaluate and discard an expression, presumably because it has some side-effect.
Definition: IR.h:450
Floating point constants.
Definition: Expr.h:234
A for loop.
Definition: IR.h:730
A pair of statements executed concurrently.
Definition: IR.h:431
Free the resources associated with the given buffer.
Definition: IR.h:388
Is the first expression greater than or equal to the second.
Definition: IR.h:148
Is the first expression greater than the second.
Definition: IR.h:139
An if-then-else block.
Definition: IR.h:440
Integer constants.
Definition: Expr.h:216
Is the first expression less than or equal to the second.
Definition: IR.h:130
Is the first expression less than the second.
Definition: IR.h:121
A let expression, like you might find in a functional language.
Definition: IR.h:253
The statement form of a let node.
Definition: IR.h:264
Load a value from a named symbol if predicate is true.
Definition: IR.h:199
Definition of a lowered function.
Definition: Module.h:96
The greater of two values.
Definition: IR.h:94
The lesser of two values.
Definition: IR.h:85
The remainder of a / b.
Definition: IR.h:76
The result of modulus_remainder analysis.
The product of two expressions.
Definition: IR.h:56
Is the first expression not equal to the second.
Definition: IR.h:112
Logical not - true if the expression false.
Definition: IR.h:175
Logical or - is at least one of the expression true.
Definition: IR.h:166
Represent a multi-dimensional region of a Func or an ImageParam that needs to be prefetched.
Definition: IR.h:830
This node is a helpful annotation to do with permissions.
Definition: IR.h:297
This defines the value of a function at a multi-dimensional location.
Definition: IR.h:336
A linear ramp vector node.
Definition: IR.h:229
Allocate a multi-dimensional buffer of the given type and size.
Definition: IR.h:402
A ternary operator.
Definition: IR.h:186
Construct a new vector by taking elements from another sequence of vectors.
Definition: IR.h:761
A reference-counted handle to a statement node.
Definition: Expr.h:413
Store a 'value' to the buffer called 'name' at a given 'index' if 'predicate' is true.
Definition: IR.h:315
String constants.
Definition: Expr.h:243
The difference of two expressions.
Definition: IR.h:47
Unsigned integer constants.
Definition: Expr.h:225
A named variable.
Definition: IR.h:683
Horizontally reduce a vector to a scalar or narrower vector using the given commutative and associati...
Definition: IR.h:871
A struct representing a target machine and os to generate code for.
Definition: Target.h:19
Types in the halide type system.
Definition: Type.h:269