1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H 12 13 namespace Eigen { 14 15 /** \class TensorConversionOp 16 * \ingroup CXX11_Tensor_Module 17 * 18 * \brief Tensor conversion class. This class makes it possible to vectorize 19 * type casting operations when the number of scalars per packet in the source 20 * and the destination type differ 21 */ 22 namespace internal { 23 template<typename TargetType, typename XprType> 24 struct traits<TensorConversionOp<TargetType, XprType> > 25 { 26 // Type promotion to handle the case where the types of the lhs and the rhs are different. 27 typedef TargetType Scalar; 28 typedef typename traits<XprType>::StorageKind StorageKind; 29 typedef typename traits<XprType>::Index Index; 30 typedef typename XprType::Nested Nested; 31 typedef typename remove_reference<Nested>::type _Nested; 32 static const int NumDimensions = traits<XprType>::NumDimensions; 33 static const int Layout = traits<XprType>::Layout; 34 enum { Flags = 0 }; 35 }; 36 37 template<typename TargetType, typename XprType> 38 struct eval<TensorConversionOp<TargetType, XprType>, Eigen::Dense> 39 { 40 typedef const TensorConversionOp<TargetType, XprType>& type; 41 }; 42 43 template<typename TargetType, typename XprType> 44 struct nested<TensorConversionOp<TargetType, XprType>, 1, typename eval<TensorConversionOp<TargetType, XprType> >::type> 45 { 46 typedef TensorConversionOp<TargetType, XprType> type; 47 }; 48 49 } // end namespace internal 50 51 52 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio> 53 struct PacketConverter { 54 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 55 PacketConverter(const TensorEvaluator& impl) 56 : m_impl(impl) {} 57 58 template<int LoadMode, typename Index> 59 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { 60 return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index)); 61 } 62 63 private: 64 const TensorEvaluator& m_impl; 65 }; 66 67 68 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> 69 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> { 70 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 71 PacketConverter(const TensorEvaluator& impl) 72 : m_impl(impl) {} 73 74 template<int LoadMode, typename Index> 75 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { 76 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size; 77 78 SrcPacket src1 = m_impl.template packet<LoadMode>(index); 79 SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize); 80 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2); 81 return result; 82 } 83 84 private: 85 const TensorEvaluator& m_impl; 86 }; 87 88 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> 89 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> { 90 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 91 PacketConverter(const TensorEvaluator& impl) 92 : m_impl(impl) {} 93 94 template<int LoadMode, typename Index> 95 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { 96 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size; 97 98 SrcPacket src1 = m_impl.template packet<LoadMode>(index); 99 SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize); 100 SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize); 101 SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize); 102 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4); 103 return result; 104 } 105 106 private: 107 const TensorEvaluator& m_impl; 108 }; 109 110 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> 111 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> { 112 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 113 PacketConverter(const TensorEvaluator& impl) 114 : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {} 115 116 template<int LoadMode, typename Index> 117 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { 118 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size; 119 // Only call m_impl.packet() when we have direct access to the underlying data. This 120 // ensures that we don't compute the subexpression twice. We may however load some 121 // coefficients twice, but in practice this doesn't negatively impact performance. 122 if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) { 123 // Force unaligned memory loads since we can't ensure alignment anymore 124 return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index)); 125 } else { 126 const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size; 127 typedef typename internal::unpacket_traits<SrcPacket>::type SrcType; 128 typedef typename internal::unpacket_traits<TgtPacket>::type TgtType; 129 internal::scalar_cast_op<SrcType, TgtType> converter; 130 EIGEN_ALIGN_MAX typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize]; 131 for (int i = 0; i < TgtPacketSize; ++i) { 132 values[i] = converter(m_impl.coeff(index+i)); 133 } 134 TgtPacket rslt = internal::pload<TgtPacket>(values); 135 return rslt; 136 } 137 } 138 139 private: 140 const TensorEvaluator& m_impl; 141 const typename TensorEvaluator::Index m_maxIndex; 142 }; 143 144 template<typename TargetType, typename XprType> 145 class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprType>, ReadOnlyAccessors> 146 { 147 public: 148 typedef typename internal::traits<TensorConversionOp>::Scalar Scalar; 149 typedef typename internal::traits<TensorConversionOp>::StorageKind StorageKind; 150 typedef typename internal::traits<TensorConversionOp>::Index Index; 151 typedef typename internal::nested<TensorConversionOp>::type Nested; 152 typedef Scalar CoeffReturnType; 153 typedef typename NumTraits<Scalar>::Real RealScalar; 154 155 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr) 156 : m_xpr(xpr) {} 157 158 EIGEN_DEVICE_FUNC 159 const typename internal::remove_all<typename XprType::Nested>::type& 160 expression() const { return m_xpr; } 161 162 protected: 163 typename XprType::Nested m_xpr; 164 }; 165 166 template <bool SameType, typename Eval, typename Scalar> struct ConversionSubExprEval { 167 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar*) { 168 impl.evalSubExprsIfNeeded(NULL); 169 return true; 170 } 171 }; 172 173 template <typename Eval, typename Scalar> struct ConversionSubExprEval<true, Eval, Scalar> { 174 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar* data) { 175 return impl.evalSubExprsIfNeeded(data); 176 } 177 }; 178 179 180 // Eval as rvalue 181 template<typename TargetType, typename ArgType, typename Device> 182 struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device> 183 { 184 typedef TensorConversionOp<TargetType, ArgType> XprType; 185 typedef typename XprType::Index Index; 186 typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; 187 typedef TargetType Scalar; 188 typedef TargetType CoeffReturnType; 189 typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType; 190 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 191 typedef typename PacketType<SrcType, Device>::type PacketSourceType; 192 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; 193 194 enum { 195 IsAligned = false, 196 PacketAccess = true, 197 Layout = TensorEvaluator<ArgType, Device>::Layout, 198 RawAccess = false 199 }; 200 201 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 202 : m_impl(op.expression(), device) 203 { 204 } 205 206 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); } 207 208 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) 209 { 210 return ConversionSubExprEval<internal::is_same<TargetType, SrcType>::value, TensorEvaluator<ArgType, Device>, Scalar>::run(m_impl, data); 211 } 212 213 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() 214 { 215 m_impl.cleanup(); 216 } 217 218 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const 219 { 220 internal::scalar_cast_op<SrcType, TargetType> converter; 221 return converter(m_impl.coeff(index)); 222 } 223 224 template<int LoadMode> 225 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const 226 { 227 const bool Vectorizable = TensorEvaluator<ArgType, Device>::PacketAccess & 228 internal::type_casting_traits<SrcType, TargetType>::VectorizedCast; 229 return PacketConv<LoadMode, Vectorizable>::run(m_impl, index); 230 } 231 232 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost 233 costPerCoeff(bool vectorized) const { 234 const double cast_cost = TensorOpCost::CastCost<SrcType, TargetType>(); 235 if (vectorized) { 236 const double SrcCoeffRatio = 237 internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio; 238 const double TgtCoeffRatio = 239 internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio; 240 return m_impl.costPerCoeff(vectorized) * (SrcCoeffRatio / PacketSize) + 241 TensorOpCost(0, 0, TgtCoeffRatio * (cast_cost / PacketSize)); 242 } else { 243 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, cast_cost); 244 } 245 } 246 247 EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } 248 249 protected: 250 template <int LoadMode, bool ActuallyVectorize> 251 struct PacketConv { 252 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) { 253 internal::scalar_cast_op<SrcType, TargetType> converter; 254 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; 255 for (int i = 0; i < PacketSize; ++i) { 256 values[i] = converter(impl.coeff(index+i)); 257 } 258 PacketReturnType rslt = internal::pload<PacketReturnType>(values); 259 return rslt; 260 } 261 }; 262 263 template <int LoadMode> 264 struct PacketConv<LoadMode, true> { 265 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) { 266 const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio; 267 const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio; 268 PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType, 269 SrcCoeffRatio, TgtCoeffRatio> converter(impl); 270 return converter.template packet<LoadMode>(index); 271 } 272 }; 273 274 TensorEvaluator<ArgType, Device> m_impl; 275 }; 276 277 } // end namespace Eigen 278 279 #endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H 280