Halide  12.0.1
Halide compiler and libraries
FunctionDAG.h
Go to the documentation of this file.
1 /** This file defines the class FunctionDAG, which is our
2  * representation of a Halide pipeline, and contains methods to using
3  * Halide's bounds tools to query properties of it. */
4 
5 #ifndef FUNCTION_DAG_H
6 #define FUNCTION_DAG_H
7 
8 #include <algorithm>
9 #include <cstdint>
10 #include <map>
11 #include <string>
12 #include <utility>
13 
14 #include <vector>
15 
16 #include "Errors.h"
17 #include "Featurization.h"
18 #include "Halide.h"
19 
20 namespace Halide {
21 namespace Internal {
22 namespace Autoscheduler {
23 
24 using std::map;
25 using std::pair;
26 using std::string;
27 using std::unique_ptr;
28 using std::vector;
29 
30 // First we have various utility classes.
31 
32 // An optional rational type used when analyzing memory dependencies.
34  bool exists = false;
36 
37  OptionalRational() = default;
39  : exists(e), numerator(n), denominator(d) {
40  }
41 
42  void operator+=(const OptionalRational &other) {
43  if (!exists || !other.exists) {
44  exists = false;
45  return;
46  }
47  if (denominator == other.denominator) {
48  numerator += other.numerator;
49  return;
50  }
51 
52  int64_t l = lcm(denominator, other.denominator);
53  numerator *= l / denominator;
54  denominator = l;
55  numerator += other.numerator * (l / other.denominator);
57  numerator /= g;
58  denominator /= g;
59  }
60 
62  if ((*this) == 0) {
63  return *this;
64  }
65  if (other == 0) {
66  return other;
67  }
68  int64_t num = numerator * other.numerator;
69  int64_t den = denominator * other.denominator;
70  bool e = exists && other.exists;
71  return OptionalRational{e, num, den};
72  }
73 
74  // Because this type is optional (exists may be false), we don't
75  // have a total ordering. These methods all return false when the
76  // operators are not comparable, so a < b is not the same as !(a
77  // >= b).
78  bool operator<(int x) const {
79  if (!exists) {
80  return false;
81  }
82  if (denominator > 0) {
83  return numerator < x * denominator;
84  } else {
85  return numerator > x * denominator;
86  }
87  }
88 
89  bool operator<=(int x) const {
90  if (!exists) {
91  return false;
92  }
93  if (denominator > 0) {
94  return numerator <= x * denominator;
95  } else {
96  return numerator >= x * denominator;
97  }
98  }
99 
100  bool operator>(int x) const {
101  if (!exists) {
102  return false;
103  }
104  return !((*this) <= x);
105  }
106 
107  bool operator>=(int x) const {
108  if (!exists) {
109  return false;
110  }
111  return !((*this) < x);
112  }
113 
114  bool operator==(int x) const {
115  return exists && (numerator == (x * denominator));
116  }
117 
118  bool operator==(const OptionalRational &other) const {
119  return (exists == other.exists) && (numerator * other.denominator == denominator * other.numerator);
120  }
121 };
122 
123 // A LoadJacobian records the derivative of the coordinate accessed in
124 // some producer w.r.t the loops of the consumer.
126  vector<vector<OptionalRational>> coeffs;
127  int64_t c;
128 
129 public:
130  LoadJacobian(vector<vector<OptionalRational>> &&matrix, int64_t c = 1)
131  : coeffs(matrix), c(c) {
132  }
133 
134  size_t producer_storage_dims() const {
135  return coeffs.size();
136  }
137 
138  size_t consumer_loop_dims() const {
139  if (coeffs.empty() || coeffs[0].empty()) {
140  // The producer is scalar, and we don't know how
141  // many consumer loops there are.
142  return 0;
143  }
144  return coeffs[0].size();
145  }
146 
147  OptionalRational operator()(int producer_storage_dim, int consumer_loop_dim) const {
148  if (coeffs.empty()) {
149  // The producer is scalar, so all strides are zero.
150  return {true, 0, 1};
151  }
152  internal_assert(producer_storage_dim < (int)coeffs.size());
153  const auto &p = coeffs[producer_storage_dim];
154  if (p.empty()) {
155  // The consumer is scalar, so all strides are zero.
156  return {true, 0, 1};
157  }
158  internal_assert(consumer_loop_dim < (int)p.size());
159  return p[consumer_loop_dim];
160  }
161 
162  // To avoid redundantly re-recording copies of the same
163  // load Jacobian, we keep a count of how many times a
164  // load with this Jacobian occurs.
165  int64_t count() const {
166  return c;
167  }
168 
169  // Try to merge another LoadJacobian into this one, increasing the
170  // count if the coefficients match.
171  bool merge(const LoadJacobian &other) {
172  if (other.coeffs.size() != coeffs.size()) {
173  return false;
174  }
175  for (size_t i = 0; i < coeffs.size(); i++) {
176  if (other.coeffs[i].size() != coeffs[i].size()) {
177  return false;
178  }
179  for (size_t j = 0; j < coeffs[i].size(); j++) {
180  if (!(other.coeffs[i][j] == coeffs[i][j])) {
181  return false;
182  }
183  }
184  }
185  c += other.count();
186  return true;
187  }
188 
189  // Multiply Jacobians, used to look at memory dependencies through
190  // inlined functions.
191  LoadJacobian operator*(const LoadJacobian &other) const {
192  vector<vector<OptionalRational>> matrix;
194  matrix.resize(producer_storage_dims());
195  for (size_t i = 0; i < producer_storage_dims(); i++) {
196  matrix[i].resize(other.consumer_loop_dims());
197  for (size_t j = 0; j < other.consumer_loop_dims(); j++) {
198  matrix[i][j] = OptionalRational{true, 0, 1};
199  for (size_t k = 0; k < consumer_loop_dims(); k++) {
200  matrix[i][j] += (*this)(i, k) * other(k, j);
201  }
202  }
203  }
204  LoadJacobian result(std::move(matrix), count() * other.count());
205  return result;
206  }
207 
208  void dump(const char *prefix) const;
209 };
210 
211 // Classes to represent a concrete set of bounds for a Func. A Span is
212 // single-dimensional, and a Bound is a multi-dimensional box. For
213 // each dimension we track the estimated size, and also whether or not
214 // the size is known to be constant at compile-time. For each Func we
215 // track three different types of bounds:
216 
217 // 1) The region required by consumers of the Func, which determines
218 // 2) The region actually computed, which in turn determines
219 // 3) The min and max of all loops in the loop next.
220 
221 // 3 in turn determines the region required of the inputs to a Func,
222 // which determines their region computed, and hence their loop nest,
223 // and so on back up the Function DAG from outputs back to inputs.
224 
225 class Span {
226  int64_t min_, max_;
227  bool constant_extent_;
228 
229 public:
230  int64_t min() const {
231  return min_;
232  }
233  int64_t max() const {
234  return max_;
235  }
236  int64_t extent() const {
237  return max_ - min_ + 1;
238  }
239  bool constant_extent() const {
240  return constant_extent_;
241  }
242 
243  void union_with(const Span &other) {
244  min_ = std::min(min_, other.min());
245  max_ = std::max(max_, other.max());
246  constant_extent_ = constant_extent_ && other.constant_extent();
247  }
248 
249  void set_extent(int64_t e) {
250  max_ = min_ + e - 1;
251  }
252 
253  void translate(int64_t x) {
254  min_ += x;
255  max_ += x;
256  }
257 
258  Span(int64_t a, int64_t b, bool c)
259  : min_(a), max_(b), constant_extent_(c) {
260  }
261  Span() = default;
262  Span(const Span &other) = default;
263  static Span empty_span() {
264  return Span(INT64_MAX, INT64_MIN, true);
265  }
266 };
267 
268 // Bounds objects are created and destroyed very frequently while
269 // exploring scheduling options, so we have a custom allocator and
270 // memory pool. Much like IR nodes, we treat them as immutable once
271 // created and wrapped in a Bound object so that they can be shared
272 // safely across scheduling alternatives.
275 
276  class Layout;
277  const Layout *layout = nullptr;
278 
279  Span *data() const {
280  // This struct is a header
281  return (Span *)(const_cast<BoundContents *>(this) + 1);
282  }
283 
285  return data()[i];
286  }
287 
289  return data()[i + layout->computed_offset];
290  }
291 
292  Span &loops(int i, int j) {
293  return data()[j + layout->loop_offset[i]];
294  }
295 
296  const Span &region_required(int i) const {
297  return data()[i];
298  }
299 
300  const Span &region_computed(int i) const {
301  return data()[i + layout->computed_offset];
302  }
303 
304  const Span &loops(int i, int j) const {
305  return data()[j + layout->loop_offset[i]];
306  }
307 
309  auto *b = layout->make();
310  size_t bytes = sizeof(data()[0]) * layout->total_size;
311  memcpy(b->data(), data(), bytes);
312  return b;
313  }
314 
315  void validate() const;
316 
317  // We're frequently going to need to make these concrete bounds
318  // arrays. It makes things more efficient if we figure out the
319  // memory layout of those data structures once ahead of time, and
320  // make each individual instance just use that. Note that this is
321  // not thread-safe.
322  class Layout {
323  // A memory pool of free BoundContent objects with this layout
324  mutable std::vector<BoundContents *> pool;
325 
326  // All the blocks of memory allocated
327  mutable std::vector<void *> blocks;
328 
329  mutable size_t num_live = 0;
330 
331  void allocate_some_more() const;
332 
333  public:
334  // number of Span to allocate
336 
337  // region_required has size func->dimensions() and comes first in the memory layout
338 
339  // region_computed comes next at the following index
341 
342  // the loop for each stage starts at the following index
343  std::vector<int> loop_offset;
344 
345  Layout() = default;
347 
348  Layout(const Layout &) = delete;
349  void operator=(const Layout &) = delete;
350  Layout(Layout &&) = delete;
351  void operator=(Layout &&) = delete;
352 
353  // Make a BoundContents object with this layout
354  BoundContents *make() const;
355 
356  // Release a BoundContents object with this layout back to the pool
357  void release(const BoundContents *b) const;
358  };
359 };
360 
362 
363 // A representation of the function DAG. The nodes and edges are both
364 // in reverse realization order, so if you want to walk backwards up
365 // the DAG, just iterate the nodes or edges in-order.
366 struct FunctionDAG {
367 
368  // An edge is a producer-consumer relationship
369  struct Edge;
370 
374  };
375 
376  // A Node represents a single Func
377  struct Node {
378  // A pointer back to the owning DAG
380 
381  // The Halide Func this represents
383 
384  // The number of bytes per point stored.
386 
387  // The min/max variables used to denote a symbolic region of
388  // this Func. Used in the cost above, and in the Edges below.
389  vector<SymbolicInterval> region_required;
390 
391  // A concrete region required from a bounds estimate. Only
392  // defined for outputs.
394 
395  // The region computed of a Func, in terms of the region
396  // required. For simple Funcs this is identical to the
397  // region_required. However, in some Funcs computing one
398  // output requires computing other outputs too. You can't
399  // really ask for a single output pixel from something blurred
400  // with an IIR without computing the others, for example.
402  // The min and max in their full symbolic glory. We use
403  // these in the general case.
405 
406  // Analysis used to accelerate common cases
408  int64_t c_min = 0, c_max = 0;
409  };
410  vector<RegionComputedInfo> region_computed;
412 
413  // Expand a region required into a region computed, using the
414  // symbolic intervals above.
415  void required_to_computed(const Span *required, Span *computed) const;
416 
417  // Metadata about one symbolic loop in a Func's default loop nest.
418  struct Loop {
419  string var;
420  bool pure, rvar;
422 
423  // Which pure dimension does this loop correspond to? Invalid if it's an rvar
424  int pure_dim;
425 
426  // Precomputed metadata to accelerate common cases:
427 
428  // If true, the loop bounds are just the region computed in the given dimension
431 
432  // If true, the loop bounds are a constant with the given min and max
433  bool bounds_are_constant = false;
434  int64_t c_min = 0, c_max = 0;
435 
436  // A persistent fragment of source for getting this Var
437  // from its owner Func. Used for printing source code
438  // equivalent to a computed schedule.
439  string accessor;
440  };
441 
442  // Get the loop nest shape as a function of the region computed
443  void loop_nest_for_region(int stage_idx, const Span *computed, Span *loop) const;
444 
445  // One stage of a Func
446  struct Stage {
447  // The owning Node
449 
450  // Which stage of the Func is this. 0 = pure.
451  int index;
452 
453  // The loop nest that computes this stage, from innermost out.
454  vector<Loop> loop;
456 
457  // The vectorization width that will be used for
458  // compute. Corresponds to the natural width for the
459  // narrowest type used.
461 
462  // The featurization of the compute done
464 
465  // The actual Halide front-end stage object
467 
468  // The name for scheduling (e.g. "foo.update(3)")
469  string name;
470 
471  // Ids for perfect hashing on stages.
472  int id, max_id;
473 
474  vector<Edge *> incoming_edges;
475 
476  vector<bool> dependencies;
477  bool downstream_of(const Node &n) const {
478  return dependencies[n.id];
479  };
480 
482  : stage(std::move(s)) {
483  }
484  };
485  vector<Stage> stages;
486 
487  vector<Edge *> outgoing_edges;
488 
489  // Max vector size across the stages
491 
492  // A unique ID for this node, allocated consecutively starting
493  // at zero for each pipeline.
494  int id, max_id;
495 
496  // Just func->dimensions(), but we ask for it so many times
497  // that's it's worth avoiding the function call into
498  // libHalide.
500 
501  // Is a single pointwise call to another Func
503 
504  // We represent the input buffers as node, though we do not attempt to schedule them.
505  bool is_input;
506 
507  // Is one of the pipeline outputs
508  bool is_output;
509 
510  // Only uses pointwise calls
512 
513  // Only uses pointwise calls + clamping on all indices
515 
516  std::unique_ptr<BoundContents::Layout> bounds_memory_layout;
517 
519  return bounds_memory_layout->make();
520  }
521  };
522 
523  // A representation of a producer-consumer relationship
524  struct Edge {
525  struct BoundInfo {
526  // The symbolic expression for the bound in this dimension
528 
529  // Fields below are the results of additional analysis
530  // used to evaluate this bound more quickly.
534 
535  BoundInfo(const Expr &e, const Node::Stage &consumer);
536  };
537 
538  // Memory footprint on producer required by consumer.
539  vector<pair<BoundInfo, BoundInfo>> bounds;
540 
543 
544  // The number of calls the consumer makes to the producer, per
545  // point in the loop nest of the consumer.
546  int calls;
547 
549 
550  vector<LoadJacobian> load_jacobians;
551 
553 
554  // Given a loop nest of the consumer stage, expand a region
555  // required of the producer to be large enough to include all
556  // points required.
557  void expand_footprint(const Span *consumer_loop, Span *producer_required) const;
558  };
559 
560  vector<Node> nodes;
561  vector<Edge> edges;
562 
563  // Create the function DAG, and do all the dependency and cost
564  // analysis. This is done once up-front before the tree search.
565  FunctionDAG(const vector<Function> &outputs, const MachineParams &params, const Target &target);
566 
567  void dump() const;
568  std::ostream &dump(std::ostream &os) const;
569 
570 private:
571  // Compute the featurization for the entire DAG
572  void featurize();
573 
574  template<typename OS>
575  void dump_internal(OS &os) const;
576 
577 public:
578  // This class uses a lot of internal pointers, so we'll make it uncopyable/unmovable.
579  FunctionDAG(const FunctionDAG &other) = delete;
580  FunctionDAG &operator=(const FunctionDAG &other) = delete;
581  FunctionDAG(FunctionDAG &&other) = delete;
582  FunctionDAG &operator=(FunctionDAG &&other) = delete;
583 };
584 
585 } // namespace Autoscheduler
586 } // namespace Internal
587 } // namespace Halide
588 
589 #endif // FUNCTION_DAG_H
#define internal_assert(c)
Definition: Errors.h:19
void release(const BoundContents *b) const
OptionalRational operator()(int producer_storage_dim, int consumer_loop_dim) const
Definition: FunctionDAG.h:147
LoadJacobian operator*(const LoadJacobian &other) const
Definition: FunctionDAG.h:191
LoadJacobian(vector< vector< OptionalRational >> &&matrix, int64_t c=1)
Definition: FunctionDAG.h:130
bool merge(const LoadJacobian &other)
Definition: FunctionDAG.h:171
void dump(const char *prefix) const
Span(int64_t a, int64_t b, bool c)
Definition: FunctionDAG.h:258
Span(const Span &other)=default
void union_with(const Span &other)
Definition: FunctionDAG.h:243
A reference-counted handle to Halide's internal representation of a function.
Definition: Function.h:38
A class representing a reference count to be used with IntrusivePtr.
Definition: IntrusivePtr.h:19
A single definition of a Func.
Definition: Func.h:70
A Halide variable, to be used when defining functions.
Definition: Var.h:19
int64_t gcd(int64_t, int64_t)
The greatest common divisor of two integers.
int64_t lcm(int64_t, int64_t)
The least common multiple of two integers.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:578
Expr max(const FuncRef &a, const FuncRef &b)
Definition: Func.h:581
signed __INT64_TYPE__ int64_t
void * memcpy(void *s1, const void *s2, size_t n)
A fragment of Halide syntax.
Definition: Expr.h:256
const Span & loops(int i, int j) const
Definition: FunctionDAG.h:304
BoundInfo(const Expr &e, const Node::Stage &consumer)
void expand_footprint(const Span *consumer_loop, Span *producer_required) const
vector< pair< BoundInfo, BoundInfo > > bounds
Definition: FunctionDAG.h:539
std::unique_ptr< BoundContents::Layout > bounds_memory_layout
Definition: FunctionDAG.h:516
void loop_nest_for_region(int stage_idx, const Span *computed, Span *loop) const
vector< RegionComputedInfo > region_computed
Definition: FunctionDAG.h:410
void required_to_computed(const Span *required, Span *computed) const
FunctionDAG & operator=(FunctionDAG &&other)=delete
FunctionDAG & operator=(const FunctionDAG &other)=delete
std::ostream & dump(std::ostream &os) const
FunctionDAG(const vector< Function > &outputs, const MachineParams &params, const Target &target)
FunctionDAG(FunctionDAG &&other)=delete
FunctionDAG(const FunctionDAG &other)=delete
void operator+=(const OptionalRational &other)
Definition: FunctionDAG.h:42
OptionalRational(bool e, int64_t n, int64_t d)
Definition: FunctionDAG.h:38
bool operator==(const OptionalRational &other) const
Definition: FunctionDAG.h:118
OptionalRational operator*(const OptionalRational &other) const
Definition: FunctionDAG.h:61
A class to represent ranges of Exprs.
Definition: Interval.h:14
Intrusive shared pointers have a reference count (a RefCount object) stored in the class itself.
Definition: IntrusivePtr.h:68
A struct representing the machine parameters to generate the auto-scheduled code for.
Definition: Pipeline.h:31
A struct representing a target machine and os to generate code for.
Definition: Target.h:19