1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2015 Ke Yang <yangke@gmail.com> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H 12 13 namespace Eigen { 14 15 /** \class TensorInflation 16 * \ingroup CXX11_Tensor_Module 17 * 18 * \brief Tensor inflation class. 19 * 20 * 21 */ 22 namespace internal { 23 template<typename Strides, typename XprType> 24 struct traits<TensorInflationOp<Strides, XprType> > : public traits<XprType> 25 { 26 typedef typename XprType::Scalar Scalar; 27 typedef traits<XprType> XprTraits; 28 typedef typename XprTraits::StorageKind StorageKind; 29 typedef typename XprTraits::Index Index; 30 typedef typename XprType::Nested Nested; 31 typedef typename remove_reference<Nested>::type _Nested; 32 static const int NumDimensions = XprTraits::NumDimensions; 33 static const int Layout = XprTraits::Layout; 34 typedef typename XprTraits::PointerType PointerType; 35 }; 36 37 template<typename Strides, typename XprType> 38 struct eval<TensorInflationOp<Strides, XprType>, Eigen::Dense> 39 { 40 typedef const TensorInflationOp<Strides, XprType>& type; 41 }; 42 43 template<typename Strides, typename XprType> 44 struct nested<TensorInflationOp<Strides, XprType>, 1, typename eval<TensorInflationOp<Strides, XprType> >::type> 45 { 46 typedef TensorInflationOp<Strides, XprType> type; 47 }; 48 49 } // end namespace internal 50 51 template<typename Strides, typename XprType> 52 class TensorInflationOp : public TensorBase<TensorInflationOp<Strides, XprType>, ReadOnlyAccessors> 53 { 54 public: 55 typedef typename Eigen::internal::traits<TensorInflationOp>::Scalar Scalar; 56 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; 57 typedef typename XprType::CoeffReturnType CoeffReturnType; 58 typedef typename Eigen::internal::nested<TensorInflationOp>::type Nested; 59 typedef typename Eigen::internal::traits<TensorInflationOp>::StorageKind StorageKind; 60 typedef typename Eigen::internal::traits<TensorInflationOp>::Index Index; 61 62 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorInflationOp(const XprType& expr, const Strides& strides) 63 : m_xpr(expr), m_strides(strides) {} 64 65 EIGEN_DEVICE_FUNC 66 const Strides& strides() const { return m_strides; } 67 68 EIGEN_DEVICE_FUNC 69 const typename internal::remove_all<typename XprType::Nested>::type& 70 expression() const { return m_xpr; } 71 72 protected: 73 typename XprType::Nested m_xpr; 74 const Strides m_strides; 75 }; 76 77 // Eval as rvalue 78 template<typename Strides, typename ArgType, typename Device> 79 struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device> 80 { 81 typedef TensorInflationOp<Strides, ArgType> XprType; 82 typedef typename XprType::Index Index; 83 static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; 84 typedef DSizes<Index, NumDims> Dimensions; 85 typedef typename XprType::Scalar Scalar; 86 typedef typename XprType::CoeffReturnType CoeffReturnType; 87 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 88 static const int PacketSize = PacketType<CoeffReturnType, Device>::size; 89 typedef StorageMemory<CoeffReturnType, Device> Storage; 90 typedef typename Storage::Type EvaluatorPointerType; 91 92 enum { 93 IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, 94 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 95 BlockAccess = false, 96 PreferBlockAccess = false, 97 Layout = TensorEvaluator<ArgType, Device>::Layout, 98 CoordAccess = false, // to be implemented 99 RawAccess = false 100 }; 101 102 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 103 typedef internal::TensorBlockNotImplemented TensorBlock; 104 //===--------------------------------------------------------------------===// 105 106 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 107 : m_impl(op.expression(), device), m_strides(op.strides()) 108 { 109 m_dimensions = m_impl.dimensions(); 110 // Expand each dimension to the inflated dimension. 111 for (int i = 0; i < NumDims; ++i) { 112 m_dimensions[i] = (m_dimensions[i] - 1) * op.strides()[i] + 1; 113 } 114 115 // Remember the strides for fast division. 116 for (int i = 0; i < NumDims; ++i) { 117 m_fastStrides[i] = internal::TensorIntDivisor<Index>(m_strides[i]); 118 } 119 120 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); 121 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 122 m_outputStrides[0] = 1; 123 m_inputStrides[0] = 1; 124 for (int i = 1; i < NumDims; ++i) { 125 m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; 126 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; 127 } 128 } else { // RowMajor 129 m_outputStrides[NumDims-1] = 1; 130 m_inputStrides[NumDims-1] = 1; 131 for (int i = NumDims - 2; i >= 0; --i) { 132 m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; 133 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; 134 } 135 } 136 } 137 138 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } 139 140 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) { 141 m_impl.evalSubExprsIfNeeded(NULL); 142 return true; 143 } 144 EIGEN_STRONG_INLINE void cleanup() { 145 m_impl.cleanup(); 146 } 147 148 // Computes the input index given the output index. Returns true if the output 149 // index doesn't fall into a hole. 150 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool getInputIndex(Index index, Index* inputIndex) const 151 { 152 eigen_assert(index < dimensions().TotalSize()); 153 *inputIndex = 0; 154 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 155 EIGEN_UNROLL_LOOP 156 for (int i = NumDims - 1; i > 0; --i) { 157 const Index idx = index / m_outputStrides[i]; 158 if (idx != idx / m_fastStrides[i] * m_strides[i]) { 159 return false; 160 } 161 *inputIndex += idx / m_strides[i] * m_inputStrides[i]; 162 index -= idx * m_outputStrides[i]; 163 } 164 if (index != index / m_fastStrides[0] * m_strides[0]) { 165 return false; 166 } 167 *inputIndex += index / m_strides[0]; 168 return true; 169 } else { 170 EIGEN_UNROLL_LOOP 171 for (int i = 0; i < NumDims - 1; ++i) { 172 const Index idx = index / m_outputStrides[i]; 173 if (idx != idx / m_fastStrides[i] * m_strides[i]) { 174 return false; 175 } 176 *inputIndex += idx / m_strides[i] * m_inputStrides[i]; 177 index -= idx * m_outputStrides[i]; 178 } 179 if (index != index / m_fastStrides[NumDims-1] * m_strides[NumDims-1]) { 180 return false; 181 } 182 *inputIndex += index / m_strides[NumDims - 1]; 183 } 184 return true; 185 } 186 187 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const 188 { 189 Index inputIndex = 0; 190 if (getInputIndex(index, &inputIndex)) { 191 return m_impl.coeff(inputIndex); 192 } else { 193 return Scalar(0); 194 } 195 } 196 197 // TODO(yangke): optimize this function so that we can detect and produce 198 // all-zero packets 199 template<int LoadMode> 200 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const 201 { 202 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) 203 eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); 204 205 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; 206 EIGEN_UNROLL_LOOP 207 for (int i = 0; i < PacketSize; ++i) { 208 values[i] = coeff(index+i); 209 } 210 PacketReturnType rslt = internal::pload<PacketReturnType>(values); 211 return rslt; 212 } 213 214 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { 215 const double compute_cost = NumDims * (3 * TensorOpCost::DivCost<Index>() + 216 3 * TensorOpCost::MulCost<Index>() + 217 2 * TensorOpCost::AddCost<Index>()); 218 const double input_size = m_impl.dimensions().TotalSize(); 219 const double output_size = m_dimensions.TotalSize(); 220 if (output_size == 0) 221 return TensorOpCost(); 222 return m_impl.costPerCoeff(vectorized) + 223 TensorOpCost(sizeof(CoeffReturnType) * input_size / output_size, 0, 224 compute_cost, vectorized, PacketSize); 225 } 226 227 EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } 228 229 #ifdef EIGEN_USE_SYCL 230 // binding placeholder accessors to a command group handler for SYCL 231 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { 232 m_impl.bind(cgh); 233 } 234 #endif 235 236 protected: 237 Dimensions m_dimensions; 238 array<Index, NumDims> m_outputStrides; 239 array<Index, NumDims> m_inputStrides; 240 TensorEvaluator<ArgType, Device> m_impl; 241 const Strides m_strides; 242 array<internal::TensorIntDivisor<Index>, NumDims> m_fastStrides; 243 }; 244 245 } // end namespace Eigen 246 247 #endif // EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H 248