Halide  12.0.1
Halide compiler and libraries
Float16.h
Go to the documentation of this file.
1 #ifndef HALIDE_FLOAT16_H
2 #define HALIDE_FLOAT16_H
3 
5 #include <cstdint>
6 #include <string>
7 
8 namespace Halide {
9 
10 /** Class that provides a type that implements half precision
11  * floating point (IEEE754 2008 binary16) in software.
12  *
13  * This type is enforced to be 16-bits wide and maintains no state
14  * other than the raw IEEE754 binary16 bits so that it can passed
15  * to code that checks a type's size and used for halide_buffer_t allocation.
16  * */
17 struct float16_t {
18 
19  static const int mantissa_bits = 10;
20  static const uint16_t sign_mask = 0x8000;
21  static const uint16_t exponent_mask = 0x7c00;
22  static const uint16_t mantissa_mask = 0x03ff;
23 
24  /// \name Constructors
25  /// @{
26 
27  /** Construct from a float, double, or int using
28  * round-to-nearest-ties-to-even. Out-of-range values become +/-
29  * infinity.
30  */
31  // @{
32  explicit float16_t(float value);
33  explicit float16_t(double value);
34  explicit float16_t(int value);
35  // @}
36 
37  /** Construct a float16_t with the bits initialised to 0. This represents
38  * positive zero.*/
39  float16_t() = default;
40 
41  /// @}
42 
43  // Use explicit to avoid accidently raising the precision
44  /** Cast to float */
45  explicit operator float() const;
46  /** Cast to double */
47  explicit operator double() const;
48  /** Cast to int */
49  explicit operator int() const;
50 
51  /** Get a new float16_t that represents a special value */
52  // @{
53  static float16_t make_zero();
57  static float16_t make_nan();
58  // @}
59 
60  /** Get a new float16_t with the given raw bits
61  *
62  * \param bits The bits conformant to IEEE754 binary16
63  */
65 
66  /** Return a new float16_t with a negated sign bit*/
68 
69  /** Arithmetic operators. */
70  // @{
76  return (*this = *this + rhs);
77  }
79  return (*this = *this - rhs);
80  }
82  return (*this = *this * rhs);
83  }
85  return (*this = *this / rhs);
86  }
87  // @}
88 
89  /** Comparison operators */
90  // @{
91  bool operator==(float16_t rhs) const;
92  bool operator!=(float16_t rhs) const {
93  return !(*this == rhs);
94  }
95  bool operator>(float16_t rhs) const;
96  bool operator<(float16_t rhs) const;
97  bool operator>=(float16_t rhs) const {
98  return (*this > rhs) || (*this == rhs);
99  }
100  bool operator<=(float16_t rhs) const {
101  return (*this < rhs) || (*this == rhs);
102  }
103  // @}
104 
105  /** Properties */
106  // @{
107  bool is_nan() const;
108  bool is_infinity() const;
109  bool is_negative() const;
110  bool is_zero() const;
111  // @}
112 
113  /** Returns the bits that represent this float16_t.
114  *
115  * An alternative method to access the bits is to cast a pointer
116  * to this instance as a pointer to a uint16_t.
117  **/
118  uint16_t to_bits() const;
119 
120 private:
121  // The raw bits.
122  uint16_t data = 0;
123 };
124 
125 static_assert(sizeof(float16_t) == 2, "float16_t should occupy two bytes");
126 
127 } // namespace Halide
128 
129 template<>
130 HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<Halide::float16_t>() {
131  return halide_type_t(halide_type_float, 16);
132 }
133 
134 namespace Halide {
135 
136 /** Class that provides a type that implements half precision
137  * floating point using the bfloat16 format.
138  *
139  * This type is enforced to be 16-bits wide and maintains no state
140  * other than the raw bits so that it can passed to code that checks
141  * a type's size and used for halide_buffer_t allocation. */
142 struct bfloat16_t {
143 
144  static const int mantissa_bits = 7;
145  static const uint16_t sign_mask = 0x8000;
146  static const uint16_t exponent_mask = 0x7f80;
147  static const uint16_t mantissa_mask = 0x007f;
148 
150 
151  /// \name Constructors
152  /// @{
153 
154  /** Construct from a float, double, or int using
155  * round-to-nearest-ties-to-even. Out-of-range values become +/-
156  * infinity.
157  */
158  // @{
159  explicit bfloat16_t(float value);
160  explicit bfloat16_t(double value);
161  explicit bfloat16_t(int value);
162  // @}
163 
164  /** Construct a bfloat16_t with the bits initialised to 0. This represents
165  * positive zero.*/
166  bfloat16_t() = default;
167 
168  /// @}
169 
170  // Use explicit to avoid accidently raising the precision
171  /** Cast to float */
172  explicit operator float() const;
173  /** Cast to double */
174  explicit operator double() const;
175  /** Cast to int */
176  explicit operator int() const;
177 
178  /** Get a new bfloat16_t that represents a special value */
179  // @{
185  // @}
186 
187  /** Get a new bfloat16_t with the given raw bits
188  *
189  * \param bits The bits conformant to IEEE754 binary16
190  */
192 
193  /** Return a new bfloat16_t with a negated sign bit*/
195 
196  /** Arithmetic operators. */
197  // @{
203  return (*this = *this + rhs);
204  }
206  return (*this = *this - rhs);
207  }
209  return (*this = *this * rhs);
210  }
212  return (*this = *this / rhs);
213  }
214  // @}
215 
216  /** Comparison operators */
217  // @{
218  bool operator==(bfloat16_t rhs) const;
219  bool operator!=(bfloat16_t rhs) const {
220  return !(*this == rhs);
221  }
222  bool operator>(bfloat16_t rhs) const;
223  bool operator<(bfloat16_t rhs) const;
224  bool operator>=(bfloat16_t rhs) const {
225  return (*this > rhs) || (*this == rhs);
226  }
227  bool operator<=(bfloat16_t rhs) const {
228  return (*this < rhs) || (*this == rhs);
229  }
230  // @}
231 
232  /** Properties */
233  // @{
234  bool is_nan() const;
235  bool is_infinity() const;
236  bool is_negative() const;
237  bool is_zero() const;
238  // @}
239 
240  /** Returns the bits that represent this bfloat16_t.
241  *
242  * An alternative method to access the bits is to cast a pointer
243  * to this instance as a pointer to a uint16_t.
244  **/
245  uint16_t to_bits() const;
246 
247 private:
248  // The raw bits.
249  uint16_t data = 0;
250 };
251 
252 static_assert(sizeof(bfloat16_t) == 2, "bfloat16_t should occupy two bytes");
253 
254 } // namespace Halide
255 
256 template<>
257 HALIDE_ALWAYS_INLINE halide_type_t halide_type_of<Halide::bfloat16_t>() {
258  return halide_type_t(halide_type_bfloat, 16);
259 }
260 
261 #endif
This file declares the routines used by Halide internally in its runtime.
@ halide_type_float
IEEE floating point numbers.
@ halide_type_bfloat
floating point numbers in the bfloat format
#define HALIDE_ALWAYS_INLINE
Definition: HalideRuntime.h:38
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
unsigned __INT16_TYPE__ uint16_t
Class that provides a type that implements half precision floating point using the bfloat16 format.
Definition: Float16.h:142
bfloat16_t operator-=(bfloat16_t rhs)
Definition: Float16.h:205
bool operator>(bfloat16_t rhs) const
bool operator<=(bfloat16_t rhs) const
Definition: Float16.h:227
static const bfloat16_t zero
Definition: Float16.h:149
bfloat16_t operator-() const
Return a new bfloat16_t with a negated sign bit.
bool operator<(bfloat16_t rhs) const
static const uint16_t exponent_mask
Definition: Float16.h:146
bfloat16_t operator*(bfloat16_t rhs) const
bool is_infinity() const
static const uint16_t sign_mask
Definition: Float16.h:145
uint16_t to_bits() const
Returns the bits that represent this bfloat16_t.
static const bfloat16_t infinity
Definition: Float16.h:149
static const uint16_t mantissa_mask
Definition: Float16.h:147
bool operator==(bfloat16_t rhs) const
Comparison operators.
static const bfloat16_t nan
Definition: Float16.h:149
bfloat16_t operator+(bfloat16_t rhs) const
Arithmetic operators.
bfloat16_t operator*=(bfloat16_t rhs)
Definition: Float16.h:208
static const bfloat16_t negative_zero
Definition: Float16.h:149
bfloat16_t operator-(bfloat16_t rhs) const
bfloat16_t operator/=(bfloat16_t rhs)
Definition: Float16.h:211
bool is_negative() const
bfloat16_t()=default
Construct a bfloat16_t with the bits initialised to 0.
bfloat16_t operator/(bfloat16_t rhs) const
static bfloat16_t make_zero()
Get a new bfloat16_t that represents a special value.
static bfloat16_t make_negative_zero()
static const int mantissa_bits
Definition: Float16.h:144
static bfloat16_t make_infinity()
bfloat16_t(float value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
static bfloat16_t make_from_bits(uint16_t bits)
Get a new bfloat16_t with the given raw bits.
bool is_zero() const
bfloat16_t(int value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
bool is_nan() const
Properties.
static bfloat16_t make_negative_infinity()
bfloat16_t(double value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
bool operator!=(bfloat16_t rhs) const
Definition: Float16.h:219
static const bfloat16_t negative_infinity
Definition: Float16.h:149
bfloat16_t operator+=(bfloat16_t rhs)
Definition: Float16.h:202
bool operator>=(bfloat16_t rhs) const
Definition: Float16.h:224
static bfloat16_t make_nan()
Class that provides a type that implements half precision floating point (IEEE754 2008 binary16) in s...
Definition: Float16.h:17
static float16_t make_infinity()
float16_t operator/(float16_t rhs) const
bool is_negative() const
float16_t(double value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
static const uint16_t sign_mask
Definition: Float16.h:20
float16_t operator+(float16_t rhs) const
Arithmetic operators.
bool is_zero() const
float16_t(int value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
bool operator>=(float16_t rhs) const
Definition: Float16.h:97
static float16_t make_zero()
Get a new float16_t that represents a special value.
uint16_t to_bits() const
Returns the bits that represent this float16_t.
bool operator<(float16_t rhs) const
static const uint16_t mantissa_mask
Definition: Float16.h:22
bool operator==(float16_t rhs) const
Comparison operators.
static float16_t make_negative_zero()
static float16_t make_from_bits(uint16_t bits)
Get a new float16_t with the given raw bits.
float16_t operator/=(float16_t rhs)
Definition: Float16.h:84
static float16_t make_nan()
float16_t(float value)
Construct from a float, double, or int using round-to-nearest-ties-to-even.
float16_t()=default
Construct a float16_t with the bits initialised to 0.
static const uint16_t exponent_mask
Definition: Float16.h:21
float16_t operator-(float16_t rhs) const
bool is_nan() const
Properties.
static float16_t make_negative_infinity()
static const int mantissa_bits
Definition: Float16.h:19
bool is_infinity() const
float16_t operator-=(float16_t rhs)
Definition: Float16.h:78
float16_t operator*(float16_t rhs) const
float16_t operator-() const
Return a new float16_t with a negated sign bit.
bool operator!=(float16_t rhs) const
Definition: Float16.h:92
bool operator<=(float16_t rhs) const
Definition: Float16.h:100
bool operator>(float16_t rhs) const
float16_t operator*=(float16_t rhs)
Definition: Float16.h:81
float16_t operator+=(float16_t rhs)
Definition: Float16.h:75
A runtime tag for a type in the halide type system.