36 #ifndef EIGEN_HALF_CUDA_H 37 #define EIGEN_HALF_CUDA_H 39 #if __cplusplus > 199711L 40 #define EIGEN_EXPLICIT_CAST(tgt_type) explicit operator tgt_type() 42 #define EIGEN_EXPLICIT_CAST(tgt_type) operator tgt_type() 50 #if !defined(EIGEN_HAS_CUDA_FP16) 54 EIGEN_DEVICE_FUNC __half() {}
55 explicit EIGEN_DEVICE_FUNC __half(
unsigned short raw) : x(raw) {}
61 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(
unsigned short x);
62 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(
float ff);
63 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
float half_to_float(__half h);
66 struct half :
public __half {
67 EIGEN_DEVICE_FUNC half() {}
69 EIGEN_DEVICE_FUNC half(
const __half& h) : __half(h) {}
70 EIGEN_DEVICE_FUNC half(
const half& h) : __half(h) {}
72 explicit EIGEN_DEVICE_FUNC half(
bool b)
73 : __half(raw_uint16_to_half(b ? 0x3c00 : 0)) {}
75 explicit EIGEN_DEVICE_FUNC half(
const T& val)
76 : __half(float_to_half_rtne(static_cast<float>(val))) {}
77 explicit EIGEN_DEVICE_FUNC half(
float f)
78 : __half(float_to_half_rtne(f)) {}
80 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
bool)
const {
82 return (x & 0x7fff) != 0;
84 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
signed char)
const {
85 return static_cast<signed char>(half_to_float(*
this));
87 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
unsigned char)
const {
88 return static_cast<unsigned char>(half_to_float(*
this));
90 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
short)
const {
91 return static_cast<short>(half_to_float(*
this));
93 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
unsigned short)
const {
94 return static_cast<unsigned short>(half_to_float(*
this));
96 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
int)
const {
97 return static_cast<int>(half_to_float(*
this));
99 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
unsigned int)
const {
100 return static_cast<unsigned int>(half_to_float(*
this));
102 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
long)
const {
103 return static_cast<long>(half_to_float(*
this));
105 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
unsigned long)
const {
106 return static_cast<unsigned long>(half_to_float(*
this));
108 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
long long)
const {
109 return static_cast<long long>(half_to_float(*
this));
111 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
unsigned long long)
const {
112 return static_cast<unsigned long long>(half_to_float(*
this));
114 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
float)
const {
115 return half_to_float(*
this);
117 EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(
double)
const {
118 return static_cast<double>(half_to_float(*
this));
121 EIGEN_DEVICE_FUNC half& operator=(
const half& other) {
127 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 134 __device__ half operator + (
const half& a,
const half& b) {
137 __device__ half
operator * (
const half& a,
const half& b) {
140 __device__ half operator - (
const half& a,
const half& b) {
143 __device__ half operator / (
const half& a,
const half& b) {
144 float num = __half2float(a);
145 float denom = __half2float(b);
146 return __float2half(num / denom);
148 __device__ half operator - (
const half& a) {
151 __device__ half& operator += (half& a,
const half& b) {
155 __device__ half& operator *= (half& a,
const half& b) {
159 __device__ half& operator -= (half& a,
const half& b) {
163 __device__ half& operator /= (half& a,
const half& b) {
167 __device__
bool operator == (
const half& a,
const half& b) {
170 __device__
bool operator != (
const half& a,
const half& b) {
173 __device__
bool operator < (
const half& a,
const half& b) {
176 __device__
bool operator <= (
const half& a,
const half& b) {
179 __device__
bool operator > (
const half& a,
const half& b) {
182 __device__
bool operator >= (
const half& a,
const half& b) {
186 #else // Emulate support for half floats 191 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (
const half& a,
const half& b) {
192 return half(
float(a) +
float(b));
194 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half
operator * (
const half& a,
const half& b) {
195 return half(
float(a) *
float(b));
197 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (
const half& a,
const half& b) {
198 return half(
float(a) -
float(b));
200 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (
const half& a,
const half& b) {
201 return half(
float(a) /
float(b));
203 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator - (
const half& a) {
205 result.x = a.x ^ 0x8000;
208 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator += (half& a,
const half& b) {
209 a = half(
float(a) +
float(b));
212 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator *= (half& a,
const half& b) {
213 a = half(
float(a) *
float(b));
216 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator -= (half& a,
const half& b) {
217 a = half(
float(a) -
float(b));
220 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a,
const half& b) {
221 a = half(
float(a) /
float(b));
224 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool operator == (
const half& a,
const half& b) {
225 return float(a) == float(b);
227 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool operator != (
const half& a,
const half& b) {
228 return float(a) != float(b);
230 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool operator < (
const half& a,
const half& b) {
231 return float(a) < float(b);
233 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool operator <= (
const half& a,
const half& b) {
234 return float(a) <= float(b);
236 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool operator > (
const half& a,
const half& b) {
237 return float(a) > float(b);
239 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool operator >= (
const half& a,
const half& b) {
240 return float(a) >= float(b);
243 #endif // Emulate support for half floats 247 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (
const half& a,
Index b) {
248 return half(static_cast<float>(a) / static_cast<float>(b));
256 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(
unsigned short x) {
267 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(
float ff) {
268 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 269 return __float2half(ff);
271 #elif defined(EIGEN_HAS_FP16_C) 273 h.x = _cvtss_sh(ff, 0);
279 const FP32 f32infty = { 255 << 23 };
280 const FP32 f16max = { (127 + 16) << 23 };
281 const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
282 unsigned int sign_mask = 0x80000000u;
284 o.x =
static_cast<unsigned short>(0x0u);
286 unsigned int sign = f.u & sign_mask;
294 if (f.u >= f16max.u) {
295 o.x = (f.u > f32infty.u) ? 0x7e00 : 0x7c00;
297 if (f.u < (113 << 23)) {
301 f.f += denorm_magic.f;
304 o.x =
static_cast<unsigned short>(f.u - denorm_magic.u);
306 unsigned int mant_odd = (f.u >> 13) & 1;
309 f.u += ((
unsigned int)(15 - 127) << 23) + 0xfff;
313 o.x =
static_cast<unsigned short>(f.u >> 13);
317 o.x |=
static_cast<unsigned short>(sign >> 16);
322 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
float half_to_float(__half h) {
323 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 324 return __half2float(h);
326 #elif defined(EIGEN_HAS_FP16_C) 327 return _cvtsh_ss(h.x);
330 const FP32 magic = { 113 << 23 };
331 const unsigned int shifted_exp = 0x7c00 << 13;
334 o.u = (h.x & 0x7fff) << 13;
335 unsigned int exp = shifted_exp & o.u;
336 o.u += (127 - 15) << 23;
339 if (exp == shifted_exp) {
340 o.u += (128 - 16) << 23;
341 }
else if (exp == 0) {
346 o.u |= (h.x & 0x8000) << 16;
353 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (
isinf)(
const half& a) {
354 return (a.x & 0x7fff) == 0x7c00;
356 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (
isnan)(
const half& a) {
357 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 360 return (a.x & 0x7fff) > 0x7c00;
363 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (
isfinite)(
const half& a) {
364 return !(
isinf EIGEN_NOT_A_MACRO (a)) && !(
isnan EIGEN_NOT_A_MACRO (a));
367 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half
abs(
const half& a) {
369 result.x = a.x & 0x7FFF;
372 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half
exp(
const half& a) {
373 return half(::expf(
float(a)));
375 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half
log(
const half& a) {
376 return half(::logf(
float(a)));
378 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half
log10(
const half& a) {
379 return half(::log10f(
float(a)));
381 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half
sqrt(
const half& a) {
382 return half(::sqrtf(
float(a)));
384 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(
const half& a,
const half& b) {
385 return half(::powf(
float(a),
float(b)));
387 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half
sin(
const half& a) {
388 return half(::sinf(
float(a)));
390 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half
cos(
const half& a) {
391 return half(::cosf(
float(a)));
393 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half
tan(
const half& a) {
394 return half(::tanf(
float(a)));
396 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half
tanh(
const half& a) {
397 return half(::tanhf(
float(a)));
399 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half
floor(
const half& a) {
400 return half(::floorf(
float(a)));
402 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half
ceil(
const half& a) {
403 return half(::ceilf(
float(a)));
406 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(
const half& a,
const half& b) {
407 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 408 return __hlt(b, a) ? b : a;
410 const float f1 =
static_cast<float>(a);
411 const float f2 =
static_cast<float>(b);
412 return f2 < f1 ? b : a;
415 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(
const half& a,
const half& b) {
416 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 417 return __hlt(a, b) ? b : a;
419 const float f1 =
static_cast<float>(a);
420 const float f2 =
static_cast<float>(b);
421 return f1 < f2 ? b : a;
425 EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os,
const half& v) {
426 os << static_cast<float>(v);
433 using half_impl::half;
438 struct random_default_impl<half_impl::half, false, false>
440 static inline half run(
const half& x,
const half& y)
442 return x + (y-x) * half(
float(std::rand()) / float(RAND_MAX));
444 static inline half run()
446 return run(half(-1.f), half(1.f));
450 template<>
struct is_arithmetic<half_impl::half> {
enum { value =
true }; };
454 template<>
struct NumTraits<
Eigen::half_impl::half>
455 : GenericNumTraits<Eigen::half_impl::half>
457 EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Eigen::half_impl::half epsilon() {
458 return half_impl::raw_uint16_to_half(0x0800);
460 EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Eigen::half_impl::half dummy_precision() {
return half_impl::half(1e-2f); }
461 EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Eigen::half_impl::half highest() {
462 return half_impl::raw_uint16_to_half(0x7bff);
464 EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Eigen::half_impl::half lowest() {
465 return half_impl::raw_uint16_to_half(0xfbff);
467 EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Eigen::half_impl::half infinity() {
468 return half_impl::raw_uint16_to_half(0x7c00);
470 EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Eigen::half_impl::half quiet_NaN() {
471 return half_impl::raw_uint16_to_half(0x7c01);
478 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half fabsh(
const Eigen::half& a) {
480 result.x = a.x & 0x7FFF;
483 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(
const Eigen::half& a) {
484 return Eigen::half(::expf(
float(a)));
486 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(
const Eigen::half& a) {
487 return Eigen::half(::logf(
float(a)));
489 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(
const Eigen::half& a) {
490 return Eigen::half(::sqrtf(
float(a)));
492 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(
const Eigen::half& a,
const Eigen::half& b) {
493 return Eigen::half(::powf(
float(a),
float(b)));
495 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(
const Eigen::half& a) {
496 return Eigen::half(::floorf(
float(a)));
498 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ceilh(
const Eigen::half& a) {
499 return Eigen::half(::ceilf(
float(a)));
504 #if __cplusplus > 199711L 506 struct hash<
Eigen::half> {
507 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t operator()(
const Eigen::half& a)
const {
508 return static_cast<std::size_t
>(a.x);
517 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 518 __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var,
int laneMask,
int width=warpSize) {
519 return static_cast<Eigen::half
>(__shfl_xor(static_cast<float>(var), laneMask, width));
524 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 525 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(
const Eigen::half* ptr) {
526 return Eigen::internal::raw_uint16_to_half(
527 __ldg(reinterpret_cast<const unsigned short*>(ptr)));
531 #endif // EIGEN_HALF_CUDA_H const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tanh_op< typename Derived::Scalar >, const Derived > tanh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_isfinite_op< typename Derived::Scalar >, const Derived > isfinite(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sqrt_op< typename Derived::Scalar >, const Derived > sqrt(const Eigen::ArrayBase< Derived > &x)
Namespace containing all symbols from the Eigen library.
Definition: Core:271
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_ceil_op< typename Derived::Scalar >, const Derived > ceil(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_isnan_op< typename Derived::Scalar >, const Derived > isnan(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cos_op< typename Derived::Scalar >, const Derived > cos(const Eigen::ArrayBase< Derived > &x)
const Product< MatrixDerived, PermutationDerived, AliasFreeProduct > operator*(const MatrixBase< MatrixDerived > &matrix, const PermutationBase< PermutationDerived > &permutation)
Definition: PermutationMatrix.h:543
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: XprHelper.h:35
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_floor_op< typename Derived::Scalar >, const Derived > floor(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_isinf_op< typename Derived::Scalar >, const Derived > isinf(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_abs_op< typename Derived::Scalar >, const Derived > abs(const Eigen::ArrayBase< Derived > &x)
Definition: Eigen_Colamd.h:50
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log_op< typename Derived::Scalar >, const Derived > log(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tan_op< typename Derived::Scalar >, const Derived > tan(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sign_op< typename Derived::Scalar >, const Derived > sign(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sin_op< typename Derived::Scalar >, const Derived > sin(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_exp_op< typename Derived::Scalar >, const Derived > exp(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log10_op< typename Derived::Scalar >, const Derived > log10(const Eigen::ArrayBase< Derived > &x)