Halide  12.0.1
Halide compiler and libraries
Schedule.h
Go to the documentation of this file.
1 #ifndef HALIDE_SCHEDULE_H
2 #define HALIDE_SCHEDULE_H
3 
4 /** \file
5  * Defines the internal representation of the schedule for a function
6  */
7 
8 #include <map>
9 #include <string>
10 #include <utility>
11 #include <vector>
12 
13 #include "DeviceAPI.h"
14 #include "Expr.h"
15 #include "FunctionPtr.h"
16 #include "Parameter.h"
17 #include "PrefetchDirective.h"
18 
19 namespace Halide {
20 
21 class Func;
22 struct VarOrRVar;
23 
24 namespace Internal {
25 class Function;
26 struct FunctionContents;
27 struct LoopLevelContents;
28 } // namespace Internal
29 
30 /** Different ways to handle a tail case in a split when the
31  * factor does not provably divide the extent. */
32 enum class TailStrategy {
33  /** Round up the extent to be a multiple of the split
34  * factor. Not legal for RVars, as it would change the meaning
35  * of the algorithm. Pros: generates the simplest, fastest
36  * code. Cons: if used on a stage that reads from the input or
37  * writes to the output, constrains the input or output size
38  * to be a multiple of the split factor. */
39  RoundUp,
40 
41  /** Guard the inner loop with an if statement that prevents
42  * evaluation beyond the original extent. Always legal. The if
43  * statement is treated like a boundary condition, and
44  * factored out into a loop epilogue if possible. Pros: no
45  * redundant re-evaluation; does not constrain input our
46  * output sizes. Cons: increases code size due to separate
47  * tail-case handling; vectorization will scalarize in the tail
48  * case to handle the if statement. */
50 
51  /** Guard the inner loop with an if statement that prevents
52  * evaluation beyond the original extent, with a hint that the
53  * if statement should be implemented with predicated operations.
54  * Always legal. The if statement is treated like a boundary
55  * condition, and factored out into a loop epilogue if possible.
56  * Pros: no redundant re-evaluation; does not constrain input our
57  * output sizes. Cons: increases code size due to separate
58  * tail-case handling. */
59  Predicate,
60 
61  /** Prevent evaluation beyond the original extent by shifting
62  * the tail case inwards, re-evaluating some points near the
63  * end. Only legal for pure variables in pure definitions. If
64  * the inner loop is very simple, the tail case is treated
65  * like a boundary condition and factored out into an
66  * epilogue.
67  *
68  * This is a good trade-off between several factors. Like
69  * RoundUp, it supports vectorization well, because the inner
70  * loop is always a fixed size with no data-dependent
71  * branching. It increases code size slightly for inner loops
72  * due to the epilogue handling, but not for outer loops
73  * (e.g. loops over tiles). If used on a stage that reads from
74  * an input or writes to an output, this stategy only requires
75  * that the input/output extent be at least the split factor,
76  * instead of a multiple of the split factor as with RoundUp. */
78 
79  /** For pure definitions use ShiftInwards. For pure vars in
80  * update definitions use RoundUp. For RVars in update
81  * definitions use GuardWithIf. */
82  Auto
83 };
84 
85 /** Different ways to handle the case when the start/end of the loops of stages
86  * computed with (fused) are not aligned. */
87 enum class LoopAlignStrategy {
88  /** Shift the start of the fused loops to align. */
89  AlignStart,
90 
91  /** Shift the end of the fused loops to align. */
92  AlignEnd,
93 
94  /** compute_with will make no attempt to align the start/end of the
95  * fused loops. */
96  NoAlign,
97 
98  /** By default, LoopAlignStrategy is set to NoAlign. */
99  Auto
100 };
101 
102 /** A reference to a site in a Halide statement at the top of the
103  * body of a particular for loop. Evaluating a region of a halide
104  * function is done by generating a loop nest that spans its
105  * dimensions. We schedule the inputs to that function by
106  * recursively injecting realizations for them at particular sites
107  * in this loop nest. A LoopLevel identifies such a site. The site
108  * can either be a loop nest within all stages of a function
109  * or it can refer to a loop nest within a particular function's
110  * stage (initial definition or updates).
111  *
112  * Note that a LoopLevel is essentially a pointer to an underlying value;
113  * all copies of a LoopLevel refer to the same site, so mutating one copy
114  * (via the set() method) will effectively mutate all copies:
115  \code
116  Func f;
117  Var x;
118  LoopLevel a(f, x);
119  // Both a and b refer to LoopLevel(f, x)
120  LoopLevel b = a;
121  // Now both a and b refer to LoopLevel::root()
122  a.set(LoopLevel::root());
123  \endcode
124  * This is quite useful when splitting Halide code into utility libraries, as it allows
125  * a library to schedule code according to a caller's specifications, even if the caller
126  * hasn't fully defined its pipeline yet:
127  \code
128  Func demosaic(Func input,
129  LoopLevel intermed_compute_at,
130  LoopLevel intermed_store_at,
131  LoopLevel output_compute_at) {
132  Func intermed = ...;
133  Func output = ...;
134  intermed.compute_at(intermed_compute_at).store_at(intermed_store_at);
135  output.compute_at(output_compute_at);
136  return output;
137  }
138 
139  void process() {
140  // Note that these LoopLevels are all undefined when we pass them to demosaic()
141  LoopLevel intermed_compute_at, intermed_store_at, output_compute_at;
142  Func input = ...;
143  Func demosaiced = demosaic(input, intermed_compute_at, intermed_store_at, output_compute_at);
144  Func output = ...;
145 
146  // We need to ensure all LoopLevels have a well-defined value prior to lowering:
147  intermed_compute_at.set(LoopLevel(output, y));
148  intermed_store_at.set(LoopLevel(output, y));
149  output_compute_at.set(LoopLevel(output, x));
150  }
151  \endcode
152  */
153 class LoopLevel {
155 
157  : contents(std::move(c)) {
158  }
159  LoopLevel(const std::string &func_name, const std::string &var_name,
160  bool is_rvar, int stage_index, bool locked = false);
161 
162 public:
163  /** Return the index of the function stage associated with this loop level.
164  * Asserts if undefined */
165  int stage_index() const;
166 
167  /** Identify the loop nest corresponding to some dimension of some function */
168  // @{
169  LoopLevel(const Internal::Function &f, const VarOrRVar &v, int stage_index = -1);
170  LoopLevel(const Func &f, const VarOrRVar &v, int stage_index = -1);
171  // @}
172 
173  /** Construct an undefined LoopLevel. Calling any method on an undefined
174  * LoopLevel (other than set()) will assert. */
176 
177  /** Construct a special LoopLevel value that implies
178  * that a function should be inlined away. */
179  static LoopLevel inlined();
180 
181  /** Construct a special LoopLevel value which represents the
182  * location outside of all for loops. */
183  static LoopLevel root();
184 
185  /** Mutate our contents to match the contents of 'other'. */
186  void set(const LoopLevel &other);
187 
188  // All the public methods below this point are meant only for internal
189  // use by Halide, rather than user code; hence, they are deliberately
190  // documented with plain comments (rather than Doxygen) to avoid being
191  // present in user documentation.
192 
193  // Lock this LoopLevel.
195 
196  // Return the Func name. Asserts if the LoopLevel is_root() or is_inlined() or !defined().
197  std::string func() const;
198 
199  // Return the VarOrRVar. Asserts if the LoopLevel is_root() or is_inlined() or !defined().
200  VarOrRVar var() const;
201 
202  // Return true iff the LoopLevel is defined. (Only LoopLevels created
203  // with the default ctor are undefined.)
204  bool defined() const;
205 
206  // Test if a loop level corresponds to inlining the function.
207  bool is_inlined() const;
208 
209  // Test if a loop level is 'root', which describes the site
210  // outside of all for loops.
211  bool is_root() const;
212 
213  // Return a string of the form func.var -- note that this is safe
214  // to call for root or inline LoopLevels, but asserts if !defined().
215  std::string to_string() const;
216 
217  // Compare this loop level against the variable name of a for
218  // loop, to see if this loop level refers to the site
219  // immediately inside this loop. Asserts if !defined().
220  bool match(const std::string &loop) const;
221 
222  bool match(const LoopLevel &other) const;
223 
224  // Check if two loop levels are exactly the same.
225  bool operator==(const LoopLevel &other) const;
226 
227  bool operator!=(const LoopLevel &other) const {
228  return !(*this == other);
229  }
230 
231 private:
232  void check_defined() const;
233  void check_locked() const;
234  void check_defined_and_locked() const;
235 };
236 
239  /** Contains alignment strategies for the fused dimensions (indexed by the
240  * dimension name). If not in the map, use the default alignment strategy
241  * to align the fused dimension (see \ref LoopAlignStrategy::Auto).
242  */
243  std::map<std::string, LoopAlignStrategy> align;
244 
246  : level(LoopLevel::inlined().lock()) {
247  }
248  FuseLoopLevel(const LoopLevel &level, const std::map<std::string, LoopAlignStrategy> &align)
249  : level(level), align(align) {
250  }
251 };
252 
253 namespace Internal {
254 
255 class IRMutator;
256 struct ReductionVariable;
257 
258 struct Split {
259  std::string old_var, outer, inner;
261  bool exact; // Is it required that the factor divides the extent
262  // of the old var. True for splits of RVars. Forces
263  // tail strategy to be GuardWithIf.
265 
266  enum SplitType { SplitVar = 0,
270 
271  // If split_type is Rename, then this is just a renaming of the
272  // old_var to the outer and not a split. The inner var should
273  // be ignored, and factor should be one. Renames are kept in
274  // the same list as splits so that ordering between them is
275  // respected.
276 
277  // If split type is Purify, this replaces the old_var RVar to
278  // the outer Var. The inner var should be ignored, and factor
279  // should be one.
280 
281  // If split_type is Fuse, then this does the opposite of a
282  // split, it joins the outer and inner into the old_var.
284 
285  bool is_rename() const {
286  return split_type == RenameVar;
287  }
288  bool is_split() const {
289  return split_type == SplitVar;
290  }
291  bool is_fuse() const {
292  return split_type == FuseVars;
293  }
294  bool is_purify() const {
295  return split_type == PurifyRVar;
296  }
297 };
298 
299 /** Each Dim below has a dim_type, which tells you what
300  * transformations are legal on it. When you combine two Dims of
301  * distinct DimTypes (e.g. with Stage::fuse), the combined result has
302  * the greater enum value of the two types. */
303 enum class DimType {
304  /** This dim originated from a Var. You can evaluate a Func at
305  * distinct values of this Var in any order over an interval
306  * that's at least as large as the interval required. In pure
307  * definitions you can even redundantly re-evaluate points. */
308  PureVar = 0,
309 
310  /** The dim originated from an RVar. You can evaluate a Func at
311  * distinct values of this RVar in any order (including in
312  * parallel) over exactly the interval specified in the
313  * RDom. PureRVars can also be reordered arbitrarily in the dims
314  * list, as there are no data hazards between the evaluation of
315  * the Func at distinct values of the RVar.
316  *
317  * The most common case where an RVar is considered pure is RVars
318  * that are used in a way which obeys all the syntactic
319  * constraints that a Var does, e.g:
320  *
321  \code
322  RDom r(0, 100);
323  f(r.x) = f(r.x) + 5;
324  \endcode
325  *
326  * Other cases where RVars are pure are where the sites being
327  * written to by the Func evaluated at one value of the RVar
328  * couldn't possibly collide with the sites being written or read
329  * by the Func at a distinct value of the RVar. For example, r.x
330  * is pure in the following three definitions:
331  *
332  \code
333 
334  // This definition writes to even coordinates and reads from the
335  // same site (which no other value of r.x is writing to) and odd
336  // sites (which no other value of r.x is writing to):
337  f(2*r.x) = max(f(2*r.x), f(2*r.x + 7));
338 
339  // This definition writes to scanline zero and reads from the the
340  // same site and scanline one:
341  f(r.x, 0) += f(r.x, 1);
342 
343  // This definition reads and writes over non-overlapping ranges:
344  f(r.x + 100) += f(r.x);
345  \endcode
346  *
347  * To give two counterexamples, r.x is not pure in the following
348  * definitions:
349  *
350  \code
351  // The same site is written by distinct values of the RVar
352  // (write-after-write hazard):
353  f(r.x / 2) += f(r.x);
354 
355  // One value of r.x reads from a site that another value of r.x
356  // is writing to (read-after-write hazard):
357  f(r.x) += f(r.x + 1);
358  \endcode
359  */
360  PureRVar,
361 
362  /** The dim originated from an RVar. You must evaluate a Func at
363  * distinct values of this RVar in increasing order over precisely
364  * the interval specified in the RDom. ImpureRVars may not be
365  * reordered with respect to other ImpureRVars.
366  *
367  * All RVars are impure by default. Those for which we can prove
368  * no data hazards exist get promoted to PureRVar. There are two
369  * instances in which ImpureRVars may be parallelized or reordered
370  * even in the presence of hazards:
371  *
372  * 1) In the case of an update definition that has been proven to be
373  * an associative and commutative reduction, reordering of
374  * ImpureRVars is allowed, and parallelizing them is allowed if
375  * the update has been made atomic.
376  *
377  * 2) ImpureRVars can also be reordered and parallelized if
378  * Func::allow_race_conditions() has been set. This is the escape
379  * hatch for when there are no hazards but the checks above failed
380  * to prove that (RDom::where can encode arbitrary facts about
381  * non-linear integer arithmetic, which is undecidable), or for
382  * when you don't actually care about the non-determinism
383  * introduced by data hazards (e.g. in the algorithm HOGWILD!).
384  */
385  ImpureRVar,
386 };
387 
388 /** The Dim struct represents one loop in the schedule's
389  * representation of a loop nest. */
390 struct Dim {
391  /** Name of the loop variable */
392  std::string var;
393 
394  /** How are the loop values traversed (e.g. unrolled, vectorized, parallel) */
396 
397  /** On what device does the body of the loop execute (e.g. Host, GPU, Hexagon) */
399 
400  /** The DimType tells us what transformations are legal on this
401  * loop (see the DimType enum above). */
403 
404  /** Can this loop be evaluated in any order (including in
405  * parallel)? Equivalently, are there no data hazards between
406  * evaluations of the Func at distinct values of this var? */
407  bool is_pure() const {
409  }
410 
411  /** Did this loop originate from an RVar (in which case the bounds
412  * of the loops are algorithmically meaningful)? */
413  bool is_rvar() const {
415  }
416 
417  /** Could multiple iterations of this loop happen at the same
418  * time, with reads and writes interleaved in arbitrary ways
419  * according to the memory model of the underlying compiler and
420  * machine? */
421  bool is_unordered_parallel() const {
423  }
424 
425  /** Could multiple iterations of this loop happen at the same
426  * time? Vectorized and GPULanes loop types are parallel but not
427  * unordered, because the loop iterations proceed together in
428  * lockstep with some well-defined outcome if there are hazards. */
429  bool is_parallel() const {
431  }
432 };
433 
434 /** A bound on a loop, typically from Func::bound */
435 struct Bound {
436  /** The loop var being bounded */
437  std::string var;
438 
439  /** Declared min and extent of the loop. min may be undefined if
440  * Func::bound_extent was used. */
442 
443  /** If defined, the number of iterations will be a multiple of
444  * "modulus", and the first iteration will be at a value congruent
445  * to "remainder" modulo "modulus". Set by Func::align_bounds and
446  * Func::align_extent. */
448 };
449 
450 /** Properties of one axis of the storage of a Func */
451 struct StorageDim {
452  /** The var in the pure definition corresponding to this axis */
453  std::string var;
454 
455  /** The bounds allocated (not computed) must be a multiple of
456  * "alignment". Set by Func::align_storage. */
458 
459  /** If the Func is explicitly folded along this axis (with
460  * Func::fold_storage) this gives the extent of the circular
461  * buffer used, and whether it is used in increasing order
462  * (fold_forward = true) or decreasing order (fold_forward =
463  * false). */
466 };
467 
468 /** This represents two stages with fused loop nests from outermost to
469  * a specific loop level. The loops to compute func_1(stage_1) are
470  * fused with the loops to compute func_2(stage_2) from outermost to
471  * loop level var_name and the computation from stage_1 of func_1
472  * occurs first.
473  */
474 struct FusedPair {
475  std::string func_1;
476  std::string func_2;
477  size_t stage_1;
478  size_t stage_2;
479  std::string var_name;
480 
481  FusedPair() = default;
482  FusedPair(const std::string &f1, size_t s1, const std::string &f2,
483  size_t s2, const std::string &var)
484  : func_1(f1), func_2(f2), stage_1(s1), stage_2(s2), var_name(var) {
485  }
486 
487  bool operator==(const FusedPair &other) const {
488  return (func_1 == other.func_1) && (func_2 == other.func_2) &&
489  (stage_1 == other.stage_1) && (stage_2 == other.stage_2) &&
490  (var_name == other.var_name);
491  }
492  bool operator<(const FusedPair &other) const {
493  if (func_1 != other.func_1) {
494  return func_1 < other.func_1;
495  }
496  if (func_2 != other.func_2) {
497  return func_2 < other.func_2;
498  }
499  if (var_name != other.var_name) {
500  return var_name < other.var_name;
501  }
502  if (stage_1 != other.stage_1) {
503  return stage_1 < other.stage_1;
504  }
505  return stage_2 < other.stage_2;
506  }
507 };
508 
509 struct FuncScheduleContents;
510 struct StageScheduleContents;
511 struct FunctionContents;
512 
513 /** A schedule for a Function of a Halide pipeline. This schedule is
514  * applied to all stages of the Function. Right now this interface is
515  * basically a struct, offering mutable access to its innards.
516  * In the future it may become more encapsulated. */
519 
520 public:
522  : contents(std::move(c)) {
523  }
524  FuncSchedule(const FuncSchedule &other) = default;
526 
527  /** Return a deep copy of this FuncSchedule. It recursively deep copies all
528  * called functions, schedules, specializations, and reduction domains. This
529  * method takes a map of <old FunctionContents, deep-copied version> as input
530  * and would use the deep-copied FunctionContents from the map if exists
531  * instead of creating a new deep-copy to avoid creating deep-copies of the
532  * same FunctionContents multiple times.
533  */
535  std::map<FunctionPtr, FunctionPtr> &copied_map) const;
536 
537  /** This flag is set to true if the schedule is memoized. */
538  // @{
539  bool &memoized();
540  bool memoized() const;
541  // @}
542 
543  /** This flag is set to true if the schedule is memoized and has an attached
544  * eviction key. */
545  // @{
548  // @}
549 
550  /** Is the production of this Function done asynchronously */
551  bool &async();
552  bool async() const;
553 
554  /** The list and order of dimensions used to store this
555  * function. The first dimension in the vector corresponds to the
556  * innermost dimension for storage (i.e. which dimension is
557  * tightly packed in memory) */
558  // @{
559  const std::vector<StorageDim> &storage_dims() const;
560  std::vector<StorageDim> &storage_dims();
561  // @}
562 
563  /** The memory type (heap/stack/shared/etc) used to back this Func. */
564  // @{
567  // @}
568 
569  /** You may explicitly bound some of the dimensions of a function,
570  * or constrain them to lie on multiples of a given factor. See
571  * \ref Func::bound and \ref Func::align_bounds and \ref Func::align_extent. */
572  // @{
573  const std::vector<Bound> &bounds() const;
574  std::vector<Bound> &bounds();
575  // @}
576 
577  /** You may explicitly specify an estimate of some of the function
578  * dimensions. See \ref Func::set_estimate */
579  // @{
580  const std::vector<Bound> &estimates() const;
581  std::vector<Bound> &estimates();
582  // @}
583 
584  /** Mark calls of a function by 'f' to be replaced with its identity
585  * wrapper or clone during the lowering stage. If the string 'f' is empty,
586  * it means replace all calls to the function by all other functions
587  * (excluding itself) in the pipeline with the global identity wrapper.
588  * See \ref Func::in and \ref Func::clone_in for more details. */
589  // @{
590  const std::map<std::string, Internal::FunctionPtr> &wrappers() const;
591  std::map<std::string, Internal::FunctionPtr> &wrappers();
592  void add_wrapper(const std::string &f,
593  const Internal::FunctionPtr &wrapper);
594  // @}
595 
596  /** At what sites should we inject the allocation and the
597  * computation of this function? The store_level must be outside
598  * of or equal to the compute_level. If the compute_level is
599  * inline, the store_level is meaningless. See \ref Func::store_at
600  * and \ref Func::compute_at */
601  // @{
602  const LoopLevel &store_level() const;
603  const LoopLevel &compute_level() const;
606  // @}
607 
608  /** Pass an IRVisitor through to all Exprs referenced in the
609  * Schedule. */
610  void accept(IRVisitor *) const;
611 
612  /** Pass an IRMutator through to all Exprs referenced in the
613  * Schedule. */
614  void mutate(IRMutator *);
615 };
616 
617 /** A schedule for a single stage of a Halide pipeline. Right now this
618  * interface is basically a struct, offering mutable access to its
619  * innards. In the future it may become more encapsulated. */
622 
623 public:
625  : contents(std::move(c)) {
626  }
627  StageSchedule(const StageSchedule &other) = default;
629 
630  /** Return a copy of this StageSchedule. */
632 
633  /** This flag is set to true if the dims list has been manipulated
634  * by the user (or if a ScheduleHandle was created that could have
635  * been used to manipulate it). It controls the warning that
636  * occurs if you schedule the vars of the pure step but not the
637  * update steps. */
638  // @{
639  bool &touched();
640  bool touched() const;
641  // @}
642 
643  /** RVars of reduction domain associated with this schedule if there is any. */
644  // @{
645  const std::vector<ReductionVariable> &rvars() const;
646  std::vector<ReductionVariable> &rvars();
647  // @}
648 
649  /** The traversal of the domain of a function can have some of its
650  * dimensions split into sub-dimensions. See \ref Func::split */
651  // @{
652  const std::vector<Split> &splits() const;
653  std::vector<Split> &splits();
654  // @}
655 
656  /** The list and ordering of dimensions used to evaluate this
657  * function, after all splits have taken place. The first
658  * dimension in the vector corresponds to the innermost for loop,
659  * and the last is the outermost. Also specifies what type of for
660  * loop to use for each dimension. Does not specify the bounds on
661  * each dimension. These get inferred from how the function is
662  * used, what the splits are, and any optional bounds in the list below. */
663  // @{
664  const std::vector<Dim> &dims() const;
665  std::vector<Dim> &dims();
666  // @}
667 
668  /** You may perform prefetching in some of the dimensions of a
669  * function. See \ref Func::prefetch */
670  // @{
671  const std::vector<PrefetchDirective> &prefetches() const;
672  std::vector<PrefetchDirective> &prefetches();
673  // @}
674 
675  /** Innermost loop level of fused loop nest for this function stage.
676  * Fusion runs from outermost to this loop level. The stages being fused
677  * should not have producer/consumer relationship. See \ref Func::compute_with
678  * and \ref Func::compute_with */
679  // @{
680  const FuseLoopLevel &fuse_level() const;
682  // @}
683 
684  /** List of function stages that are to be fused with this function stage
685  * from the outermost loop to a certain loop level. Those function stages
686  * are to be computed AFTER this function stage at the last fused loop level.
687  * This list is populated when realization_order() is called. See
688  * \ref Func::compute_with */
689  // @{
690  const std::vector<FusedPair> &fused_pairs() const;
691  std::vector<FusedPair> &fused_pairs();
692 
693  /** Are race conditions permitted? */
694  // @{
695  bool allow_race_conditions() const;
697  // @}
698 
699  /** Use atomic update? */
700  // @{
701  bool atomic() const;
702  bool &atomic();
703  // @}
704 
705  /** Atomic updates are only allowed on associative reductions.
706  * We try to prove the associativity, but the user can override
707  * the associativity test and suppress compiler error if the prover
708  * fails to recognize the associativity or the user does not care. */
709  // @{
712  // @}
713 
714  /** Pass an IRVisitor through to all Exprs referenced in the
715  * Schedule. */
716  void accept(IRVisitor *) const;
717 
718  /** Pass an IRMutator through to all Exprs referenced in the
719  * Schedule. */
720  void mutate(IRMutator *);
721 };
722 
723 } // namespace Internal
724 } // namespace Halide
725 
726 #endif
Defines DeviceAPI.
Base classes for Halide expressions (Halide::Expr) and statements (Halide::Internal::Stmt)
Defines the internal representation of parameters to halide piplines.
Defines the PrefetchDirective struct.
A halide function.
Definition: Func.h:681
A schedule for a Function of a Halide pipeline.
Definition: Schedule.h:517
const std::vector< Bound > & bounds() const
You may explicitly bound some of the dimensions of a function, or constrain them to lie on multiples ...
const std::map< std::string, Internal::FunctionPtr > & wrappers() const
Mark calls of a function by 'f' to be replaced with its identity wrapper or clone during the lowering...
void add_wrapper(const std::string &f, const Internal::FunctionPtr &wrapper)
std::vector< Bound > & estimates()
void mutate(IRMutator *)
Pass an IRMutator through to all Exprs referenced in the Schedule.
const std::vector< StorageDim > & storage_dims() const
The list and order of dimensions used to store this function.
void accept(IRVisitor *) const
Pass an IRVisitor through to all Exprs referenced in the Schedule.
std::map< std::string, Internal::FunctionPtr > & wrappers()
const LoopLevel & compute_level() const
MemoryType memory_type() const
The memory type (heap/stack/shared/etc) used to back this Func.
FuncSchedule(const FuncSchedule &other)=default
const std::vector< Bound > & estimates() const
You may explicitly specify an estimate of some of the function dimensions.
Expr & memoize_eviction_key()
This flag is set to true if the schedule is memoized and has an attached eviction key.
FuncSchedule(IntrusivePtr< FuncScheduleContents > c)
Definition: Schedule.h:521
bool & async()
Is the production of this Function done asynchronously.
const LoopLevel & store_level() const
At what sites should we inject the allocation and the computation of this function?...
FuncSchedule deep_copy(std::map< FunctionPtr, FunctionPtr > &copied_map) const
Return a deep copy of this FuncSchedule.
std::vector< Bound > & bounds()
std::vector< StorageDim > & storage_dims()
bool & memoized()
This flag is set to true if the schedule is memoized.
A reference-counted handle to Halide's internal representation of a function.
Definition: Function.h:38
A base class for passes over the IR which modify it (e.g.
Definition: IRMutator.h:26
A base class for algorithms that need to recursively walk over the IR.
Definition: IRVisitor.h:19
A schedule for a single stage of a Halide pipeline.
Definition: Schedule.h:620
const std::vector< FusedPair > & fused_pairs() const
List of function stages that are to be fused with this function stage from the outermost loop to a ce...
StageSchedule(IntrusivePtr< StageScheduleContents > c)
Definition: Schedule.h:624
StageSchedule get_copy() const
Return a copy of this StageSchedule.
std::vector< Dim > & dims()
bool & touched()
This flag is set to true if the dims list has been manipulated by the user (or if a ScheduleHandle wa...
std::vector< ReductionVariable > & rvars()
const std::vector< Split > & splits() const
The traversal of the domain of a function can have some of its dimensions split into sub-dimensions.
bool allow_race_conditions() const
Are race conditions permitted?
const FuseLoopLevel & fuse_level() const
Innermost loop level of fused loop nest for this function stage.
std::vector< PrefetchDirective > & prefetches()
bool atomic() const
Use atomic update?
void mutate(IRMutator *)
Pass an IRMutator through to all Exprs referenced in the Schedule.
const std::vector< PrefetchDirective > & prefetches() const
You may perform prefetching in some of the dimensions of a function.
void accept(IRVisitor *) const
Pass an IRVisitor through to all Exprs referenced in the Schedule.
std::vector< Split > & splits()
std::vector< FusedPair > & fused_pairs()
const std::vector< ReductionVariable > & rvars() const
RVars of reduction domain associated with this schedule if there is any.
const std::vector< Dim > & dims() const
The list and ordering of dimensions used to evaluate this function, after all splits have taken place...
bool override_atomic_associativity_test() const
Atomic updates are only allowed on associative reductions.
StageSchedule(const StageSchedule &other)=default
A reference to a site in a Halide statement at the top of the body of a particular for loop.
Definition: Schedule.h:153
VarOrRVar var() const
std::string to_string() const
static LoopLevel root()
Construct a special LoopLevel value which represents the location outside of all for loops.
LoopLevel(const Internal::Function &f, const VarOrRVar &v, int stage_index=-1)
Identify the loop nest corresponding to some dimension of some function.
static LoopLevel inlined()
Construct a special LoopLevel value that implies that a function should be inlined away.
LoopLevel(const Func &f, const VarOrRVar &v, int stage_index=-1)
bool operator==(const LoopLevel &other) const
std::string func() const
int stage_index() const
Return the index of the function stage associated with this loop level.
LoopLevel()
Construct an undefined LoopLevel.
void set(const LoopLevel &other)
Mutate our contents to match the contents of 'other'.
bool match(const LoopLevel &other) const
bool is_root() const
bool is_inlined() const
LoopLevel & lock()
bool operator!=(const LoopLevel &other) const
Definition: Schedule.h:227
bool defined() const
bool match(const std::string &loop) const
DimType
Each Dim below has a dim_type, which tells you what transformations are legal on it.
Definition: Schedule.h:303
@ ImpureRVar
The dim originated from an RVar.
@ PureRVar
The dim originated from an RVar.
@ PureVar
This dim originated from a Var.
ForType
An enum describing a type of loop traversal.
Definition: Expr.h:395
bool is_unordered_parallel(ForType for_type)
Check if for_type executes for loop iterations in parallel and unordered.
bool is_parallel(ForType for_type)
Returns true if for_type executes for loop iterations in parallel.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
TailStrategy
Different ways to handle a tail case in a split when the factor does not provably divide the extent.
Definition: Schedule.h:32
@ Auto
For pure definitions use ShiftInwards.
@ RoundUp
Round up the extent to be a multiple of the split factor.
@ Predicate
Guard the inner loop with an if statement that prevents evaluation beyond the original extent,...
@ GuardWithIf
Guard the inner loop with an if statement that prevents evaluation beyond the original extent.
@ ShiftInwards
Prevent evaluation beyond the original extent by shifting the tail case inwards, re-evaluating some p...
LoopAlignStrategy
Different ways to handle the case when the start/end of the loops of stages computed with (fused) are...
Definition: Schedule.h:87
@ Auto
By default, LoopAlignStrategy is set to NoAlign.
@ NoAlign
compute_with will make no attempt to align the start/end of the fused loops.
@ AlignEnd
Shift the end of the fused loops to align.
@ AlignStart
Shift the start of the fused loops to align.
DeviceAPI
An enum describing a type of device API.
Definition: DeviceAPI.h:15
MemoryType
An enum describing different address spaces to be used with Func::store_in.
Definition: Expr.h:346
A fragment of Halide syntax.
Definition: Expr.h:256
std::map< std::string, LoopAlignStrategy > align
Contains alignment strategies for the fused dimensions (indexed by the dimension name).
Definition: Schedule.h:243
FuseLoopLevel(const LoopLevel &level, const std::map< std::string, LoopAlignStrategy > &align)
Definition: Schedule.h:248
A bound on a loop, typically from Func::bound.
Definition: Schedule.h:435
Expr min
Declared min and extent of the loop.
Definition: Schedule.h:441
std::string var
The loop var being bounded.
Definition: Schedule.h:437
Expr modulus
If defined, the number of iterations will be a multiple of "modulus", and the first iteration will be...
Definition: Schedule.h:447
The Dim struct represents one loop in the schedule's representation of a loop nest.
Definition: Schedule.h:390
std::string var
Name of the loop variable.
Definition: Schedule.h:392
DimType dim_type
The DimType tells us what transformations are legal on this loop (see the DimType enum above).
Definition: Schedule.h:402
bool is_rvar() const
Did this loop originate from an RVar (in which case the bounds of the loops are algorithmically meani...
Definition: Schedule.h:413
ForType for_type
How are the loop values traversed (e.g.
Definition: Schedule.h:395
DeviceAPI device_api
On what device does the body of the loop execute (e.g.
Definition: Schedule.h:398
bool is_parallel() const
Could multiple iterations of this loop happen at the same time? Vectorized and GPULanes loop types ar...
Definition: Schedule.h:429
bool is_unordered_parallel() const
Could multiple iterations of this loop happen at the same time, with reads and writes interleaved in ...
Definition: Schedule.h:421
bool is_pure() const
Can this loop be evaluated in any order (including in parallel)? Equivalently, are there no data haza...
Definition: Schedule.h:407
A possibly-weak pointer to a Halide function.
Definition: FunctionPtr.h:27
This represents two stages with fused loop nests from outermost to a specific loop level.
Definition: Schedule.h:474
bool operator==(const FusedPair &other) const
Definition: Schedule.h:487
FusedPair(const std::string &f1, size_t s1, const std::string &f2, size_t s2, const std::string &var)
Definition: Schedule.h:482
bool operator<(const FusedPair &other) const
Definition: Schedule.h:492
bool is_fuse() const
Definition: Schedule.h:291
bool is_rename() const
Definition: Schedule.h:285
bool is_split() const
Definition: Schedule.h:288
TailStrategy tail
Definition: Schedule.h:264
std::string old_var
Definition: Schedule.h:259
bool is_purify() const
Definition: Schedule.h:294
Properties of one axis of the storage of a Func.
Definition: Schedule.h:451
std::string var
The var in the pure definition corresponding to this axis.
Definition: Schedule.h:453
Expr alignment
The bounds allocated (not computed) must be a multiple of "alignment".
Definition: Schedule.h:457
Expr fold_factor
If the Func is explicitly folded along this axis (with Func::fold_storage) this gives the extent of t...
Definition: Schedule.h:464
A class that can represent Vars or RVars.
Definition: Func.h:30