10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H 27 template<
typename Derived,
typename Device>
30 typedef typename Derived::Index Index;
31 typedef typename Derived::Scalar Scalar;
32 typedef typename Derived::Scalar CoeffReturnType;
33 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
34 typedef typename Derived::Dimensions Dimensions;
37 static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
38 internal::traits<Derived>::NumDimensions : 0;
41 IsAligned = Derived::IsAligned,
42 PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
43 Layout = Derived::Layout,
44 CoordAccess = NumCoords > 0,
48 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorEvaluator(
const Derived& m,
const Device& device)
49 : m_data(const_cast<Scalar*>(m.data())), m_dims(m.dimensions()), m_device(device)
52 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dims; }
54 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType* dest) {
56 m_device.memcpy((
void*)dest, m_data,
sizeof(Scalar) * m_dims.TotalSize());
62 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() { }
64 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
69 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
74 template<
int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
75 PacketReturnType packet(Index index)
const 77 return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
80 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
81 void writePacket(Index index,
const PacketReturnType& x)
83 return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x);
86 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
const array<DenseIndex, NumCoords>& coords)
const {
88 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
89 return m_data[m_dims.IndexOfColMajor(coords)];
91 return m_data[m_dims.IndexOfRowMajor(coords)];
95 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(
const array<DenseIndex, NumCoords>& coords) {
97 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
98 return m_data[m_dims.IndexOfColMajor(coords)];
100 return m_data[m_dims.IndexOfRowMajor(coords)];
104 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
105 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
106 internal::unpacket_traits<PacketReturnType>::size);
109 EIGEN_DEVICE_FUNC Scalar* data()
const {
return m_data; }
114 const Device& m_device;
118 template <
typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
119 T loadConstant(
const T* address) {
123 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 124 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
125 float loadConstant(
const float* address) {
126 return __ldg(address);
128 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
129 double loadConstant(
const double* address) {
130 return __ldg(address);
132 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
133 Eigen::half loadConstant(
const Eigen::half* address) {
134 return Eigen::half(internal::raw_uint16_to_half(__ldg(&address->x)));
141 template<
typename Derived,
typename Device>
144 typedef typename Derived::Index Index;
145 typedef typename Derived::Scalar Scalar;
146 typedef typename Derived::Scalar CoeffReturnType;
147 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
148 typedef typename Derived::Dimensions Dimensions;
151 static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
152 internal::traits<Derived>::NumDimensions : 0;
155 IsAligned = Derived::IsAligned,
156 PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
157 Layout = Derived::Layout,
158 CoordAccess = NumCoords > 0,
162 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorEvaluator(
const Derived& m,
const Device& device)
163 : m_data(m.data()), m_dims(m.dimensions()), m_device(device)
166 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dims; }
168 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType* data) {
169 if (!NumTraits<
typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
170 m_device.memcpy((
void*)data, m_data, m_dims.TotalSize() *
sizeof(Scalar));
176 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() { }
178 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
179 eigen_assert(m_data);
180 return loadConstant(m_data+index);
183 template<
int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
184 PacketReturnType packet(Index index)
const 186 return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index);
189 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
const array<DenseIndex, NumCoords>& coords)
const {
190 eigen_assert(m_data);
191 const Index index = (
static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
192 : m_dims.IndexOfRowMajor(coords);
193 return loadConstant(m_data+index);
196 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
197 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
198 internal::unpacket_traits<PacketReturnType>::size);
201 EIGEN_DEVICE_FUNC
const Scalar* data()
const {
return m_data; }
204 const Scalar* m_data;
206 const Device& m_device;
214 template<
typename NullaryOp,
typename ArgType,
typename Device>
215 struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
217 typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType;
221 PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess,
229 : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device)
232 typedef typename XprType::Index Index;
233 typedef typename XprType::Scalar Scalar;
234 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
235 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
236 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
239 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const {
return m_argImpl.dimensions(); }
241 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType*) {
return true; }
242 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() { }
244 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 246 return m_functor(index);
249 template<
int LoadMode>
250 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 252 return m_functor.template packetOp<Index, PacketReturnType>(index);
255 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
256 costPerCoeff(
bool vectorized)
const {
257 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
258 internal::unpacket_traits<PacketReturnType>::size);
261 EIGEN_DEVICE_FUNC CoeffReturnType* data()
const {
return NULL; }
264 const NullaryOp m_functor;
272 template<
typename UnaryOp,
typename ArgType,
typename Device>
273 struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
275 typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
285 EIGEN_DEVICE_FUNC
TensorEvaluator(
const XprType& op,
const Device& device)
286 : m_functor(op.functor()),
287 m_argImpl(op.nestedExpression(), device)
290 typedef typename XprType::Index Index;
291 typedef typename XprType::Scalar Scalar;
292 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
293 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
294 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
297 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const {
return m_argImpl.dimensions(); }
299 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(Scalar*) {
300 m_argImpl.evalSubExprsIfNeeded(NULL);
303 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() {
307 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 309 return m_functor(m_argImpl.coeff(index));
312 template<
int LoadMode>
313 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 315 return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
318 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
319 const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
320 return m_argImpl.costPerCoeff(vectorized) +
321 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
324 EIGEN_DEVICE_FUNC CoeffReturnType* data()
const {
return NULL; }
327 const UnaryOp m_functor;
334 template<
typename BinaryOp,
typename LeftArgType,
typename RightArgType,
typename Device>
335 struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType>, Device>
337 typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
342 internal::functor_traits<BinaryOp>::PacketAccess,
348 EIGEN_DEVICE_FUNC
TensorEvaluator(
const XprType& op,
const Device& device)
349 : m_functor(op.functor()),
350 m_leftImpl(op.lhsExpression(), device),
351 m_rightImpl(op.rhsExpression(), device)
354 eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
357 typedef typename XprType::Index Index;
358 typedef typename XprType::Scalar Scalar;
359 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
360 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
361 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
364 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const 367 return m_leftImpl.dimensions();
370 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType*) {
371 m_leftImpl.evalSubExprsIfNeeded(NULL);
372 m_rightImpl.evalSubExprsIfNeeded(NULL);
375 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() {
376 m_leftImpl.cleanup();
377 m_rightImpl.cleanup();
380 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 382 return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
384 template<
int LoadMode>
385 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 387 return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
390 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
391 costPerCoeff(
bool vectorized)
const {
392 const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
393 return m_leftImpl.costPerCoeff(vectorized) +
394 m_rightImpl.costPerCoeff(vectorized) +
395 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
398 EIGEN_DEVICE_FUNC CoeffReturnType* data()
const {
return NULL; }
401 const BinaryOp m_functor;
408 template<
typename TernaryOp,
typename Arg1Type,
typename Arg2Type,
typename Arg3Type,
typename Device>
409 struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type>, Device>
411 typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType;
416 internal::functor_traits<TernaryOp>::PacketAccess,
422 EIGEN_DEVICE_FUNC
TensorEvaluator(
const XprType& op,
const Device& device)
423 : m_functor(op.functor()),
424 m_arg1Impl(op.arg1Expression(), device),
425 m_arg2Impl(op.arg2Expression(), device),
426 m_arg3Impl(op.arg3Expression(), device)
430 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::StorageKind,
431 typename internal::traits<Arg2Type>::StorageKind>::value),
432 STORAGE_KIND_MUST_MATCH)
433 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::StorageKind,
434 typename internal::traits<Arg3Type>::StorageKind>::value),
435 STORAGE_KIND_MUST_MATCH)
436 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::Index,
437 typename internal::traits<Arg2Type>::Index>::value),
438 STORAGE_INDEX_MUST_MATCH)
439 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::Index,
440 typename internal::traits<Arg3Type>::Index>::value),
441 STORAGE_INDEX_MUST_MATCH)
443 eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
446 typedef typename XprType::Index Index;
447 typedef typename XprType::Scalar Scalar;
448 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
449 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
450 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
453 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const 456 return m_arg1Impl.dimensions();
459 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType*) {
460 m_arg1Impl.evalSubExprsIfNeeded(NULL);
461 m_arg2Impl.evalSubExprsIfNeeded(NULL);
462 m_arg3Impl.evalSubExprsIfNeeded(NULL);
465 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() {
466 m_arg1Impl.cleanup();
467 m_arg2Impl.cleanup();
468 m_arg3Impl.cleanup();
471 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 473 return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
475 template<
int LoadMode>
476 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const 478 return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index),
479 m_arg2Impl.template packet<LoadMode>(index),
480 m_arg3Impl.template packet<LoadMode>(index));
483 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
484 costPerCoeff(
bool vectorized)
const {
485 const double functor_cost = internal::functor_traits<TernaryOp>::Cost;
486 return m_arg1Impl.costPerCoeff(vectorized) +
487 m_arg2Impl.costPerCoeff(vectorized) +
488 m_arg3Impl.costPerCoeff(vectorized) +
489 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
492 EIGEN_DEVICE_FUNC CoeffReturnType* data()
const {
return NULL; }
495 const TernaryOp m_functor;
504 template<
typename IfArgType,
typename ThenArgType,
typename ElseArgType,
typename Device>
505 struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
507 typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
508 typedef typename XprType::Scalar Scalar;
513 internal::packet_traits<Scalar>::HasBlend,
519 EIGEN_DEVICE_FUNC
TensorEvaluator(
const XprType& op,
const Device& device)
520 : m_condImpl(op.ifExpression(), device),
521 m_thenImpl(op.thenExpression(), device),
522 m_elseImpl(op.elseExpression(), device)
526 eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions()));
527 eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions()));
530 typedef typename XprType::Index Index;
531 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
532 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
533 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
536 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const 539 return m_condImpl.dimensions();
542 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(CoeffReturnType*) {
543 m_condImpl.evalSubExprsIfNeeded(NULL);
544 m_thenImpl.evalSubExprsIfNeeded(NULL);
545 m_elseImpl.evalSubExprsIfNeeded(NULL);
548 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void cleanup() {
549 m_condImpl.cleanup();
550 m_thenImpl.cleanup();
551 m_elseImpl.cleanup();
554 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const 556 return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index);
558 template<
int LoadMode>
559 EIGEN_DEVICE_FUNC PacketReturnType packet(Index index)
const 561 internal::Selector<PacketSize> select;
562 for (Index i = 0; i < PacketSize; ++i) {
563 select.select[i] = m_condImpl.coeff(index+i);
565 return internal::pblend(select,
566 m_thenImpl.template packet<LoadMode>(index),
567 m_elseImpl.template packet<LoadMode>(index));
570 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
571 costPerCoeff(
bool vectorized)
const {
572 return m_condImpl.costPerCoeff(vectorized) +
573 m_thenImpl.costPerCoeff(vectorized)
574 .cwiseMax(m_elseImpl.costPerCoeff(vectorized));
577 EIGEN_DEVICE_FUNC CoeffReturnType* data()
const {
return NULL; }
588 #endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H Namespace containing all symbols from the Eigen library.
Definition: AdolcForward:45
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:28