1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H 12 13 namespace Eigen { 14 15 /** \class TensorKChippingReshaping 16 * \ingroup CXX11_Tensor_Module 17 * 18 * \brief A chip is a thin slice, corresponding to a column or a row in a 2-d tensor. 19 * 20 * 21 */ 22 23 namespace internal { 24 template<DenseIndex DimId, typename XprType> 25 struct traits<TensorChippingOp<DimId, XprType> > : public traits<XprType> 26 { 27 typedef typename XprType::Scalar Scalar; 28 typedef traits<XprType> XprTraits; 29 typedef typename XprTraits::StorageKind StorageKind; 30 typedef typename XprTraits::Index Index; 31 typedef typename XprType::Nested Nested; 32 typedef typename remove_reference<Nested>::type _Nested; 33 static const int NumDimensions = XprTraits::NumDimensions - 1; 34 static const int Layout = XprTraits::Layout; 35 }; 36 37 template<DenseIndex DimId, typename XprType> 38 struct eval<TensorChippingOp<DimId, XprType>, Eigen::Dense> 39 { 40 typedef const TensorChippingOp<DimId, XprType>& type; 41 }; 42 43 template<DenseIndex DimId, typename XprType> 44 struct nested<TensorChippingOp<DimId, XprType>, 1, typename eval<TensorChippingOp<DimId, XprType> >::type> 45 { 46 typedef TensorChippingOp<DimId, XprType> type; 47 }; 48 49 template <DenseIndex DimId> 50 struct DimensionId 51 { 52 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) { 53 eigen_assert(dim == DimId); 54 } 55 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { 56 return DimId; 57 } 58 }; 59 template <> 60 struct DimensionId<Dynamic> 61 { 62 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) : actual_dim(dim) { 63 eigen_assert(dim >= 0); 64 } 65 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { 66 return actual_dim; 67 } 68 private: 69 const DenseIndex actual_dim; 70 }; 71 72 73 } // end namespace internal 74 75 76 77 template<DenseIndex DimId, typename XprType> 78 class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> > 79 { 80 public: 81 typedef typename Eigen::internal::traits<TensorChippingOp>::Scalar Scalar; 82 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; 83 typedef typename XprType::CoeffReturnType CoeffReturnType; 84 typedef typename Eigen::internal::nested<TensorChippingOp>::type Nested; 85 typedef typename Eigen::internal::traits<TensorChippingOp>::StorageKind StorageKind; 86 typedef typename Eigen::internal::traits<TensorChippingOp>::Index Index; 87 88 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType& expr, const Index offset, const Index dim) 89 : m_xpr(expr), m_offset(offset), m_dim(dim) { 90 } 91 92 EIGEN_DEVICE_FUNC 93 const Index offset() const { return m_offset; } 94 EIGEN_DEVICE_FUNC 95 const Index dim() const { return m_dim.actualDim(); } 96 97 EIGEN_DEVICE_FUNC 98 const typename internal::remove_all<typename XprType::Nested>::type& 99 expression() const { return m_xpr; } 100 101 EIGEN_DEVICE_FUNC 102 EIGEN_STRONG_INLINE TensorChippingOp& operator = (const TensorChippingOp& other) 103 { 104 typedef TensorAssignOp<TensorChippingOp, const TensorChippingOp> Assign; 105 Assign assign(*this, other); 106 internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); 107 return *this; 108 } 109 110 template<typename OtherDerived> 111 EIGEN_DEVICE_FUNC 112 EIGEN_STRONG_INLINE TensorChippingOp& operator = (const OtherDerived& other) 113 { 114 typedef TensorAssignOp<TensorChippingOp, const OtherDerived> Assign; 115 Assign assign(*this, other); 116 internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); 117 return *this; 118 } 119 120 protected: 121 typename XprType::Nested m_xpr; 122 const Index m_offset; 123 const internal::DimensionId<DimId> m_dim; 124 }; 125 126 127 // Eval as rvalue 128 template<DenseIndex DimId, typename ArgType, typename Device> 129 struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> 130 { 131 typedef TensorChippingOp<DimId, ArgType> XprType; 132 static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; 133 static const int NumDims = NumInputDims-1; 134 typedef typename XprType::Index Index; 135 typedef DSizes<Index, NumDims> Dimensions; 136 typedef typename XprType::Scalar Scalar; 137 typedef typename XprType::CoeffReturnType CoeffReturnType; 138 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 139 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; 140 141 142 enum { 143 // Alignment can't be guaranteed at compile time since it depends on the 144 // slice offsets. 145 IsAligned = false, 146 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 147 Layout = TensorEvaluator<ArgType, Device>::Layout, 148 CoordAccess = false, // to be implemented 149 RawAccess = false 150 }; 151 152 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 153 : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device) 154 { 155 EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); 156 eigen_assert(NumInputDims > m_dim.actualDim()); 157 158 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); 159 eigen_assert(op.offset() < input_dims[m_dim.actualDim()]); 160 161 int j = 0; 162 for (int i = 0; i < NumInputDims; ++i) { 163 if (i != m_dim.actualDim()) { 164 m_dimensions[j] = input_dims[i]; 165 ++j; 166 } 167 } 168 169 m_stride = 1; 170 m_inputStride = 1; 171 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 172 for (int i = 0; i < m_dim.actualDim(); ++i) { 173 m_stride *= input_dims[i]; 174 m_inputStride *= input_dims[i]; 175 } 176 } else { 177 for (int i = NumInputDims-1; i > m_dim.actualDim(); --i) { 178 m_stride *= input_dims[i]; 179 m_inputStride *= input_dims[i]; 180 } 181 } 182 m_inputStride *= input_dims[m_dim.actualDim()]; 183 m_inputOffset = m_stride * op.offset(); 184 } 185 186 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } 187 188 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { 189 m_impl.evalSubExprsIfNeeded(NULL); 190 return true; 191 } 192 193 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { 194 m_impl.cleanup(); 195 } 196 197 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const 198 { 199 return m_impl.coeff(srcCoeff(index)); 200 } 201 202 template<int LoadMode> 203 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const 204 { 205 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) 206 eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); 207 208 if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) || 209 (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) { 210 // m_stride is equal to 1, so let's avoid the integer division. 211 eigen_assert(m_stride == 1); 212 Index inputIndex = index * m_inputStride + m_inputOffset; 213 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; 214 for (int i = 0; i < PacketSize; ++i) { 215 values[i] = m_impl.coeff(inputIndex); 216 inputIndex += m_inputStride; 217 } 218 PacketReturnType rslt = internal::pload<PacketReturnType>(values); 219 return rslt; 220 } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims - 1) || 221 (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) { 222 // m_stride is aways greater than index, so let's avoid the integer division. 223 eigen_assert(m_stride > index); 224 return m_impl.template packet<LoadMode>(index + m_inputOffset); 225 } else { 226 const Index idx = index / m_stride; 227 const Index rem = index - idx * m_stride; 228 if (rem + PacketSize <= m_stride) { 229 Index inputIndex = idx * m_inputStride + m_inputOffset + rem; 230 return m_impl.template packet<LoadMode>(inputIndex); 231 } else { 232 // Cross the stride boundary. Fallback to slow path. 233 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; 234 for (int i = 0; i < PacketSize; ++i) { 235 values[i] = coeff(index); 236 ++index; 237 } 238 PacketReturnType rslt = internal::pload<PacketReturnType>(values); 239 return rslt; 240 } 241 } 242 } 243 244 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost 245 costPerCoeff(bool vectorized) const { 246 double cost = 0; 247 if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && 248 m_dim.actualDim() == 0) || 249 (static_cast<int>(Layout) == static_cast<int>(RowMajor) && 250 m_dim.actualDim() == NumInputDims - 1)) { 251 cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>(); 252 } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && 253 m_dim.actualDim() == NumInputDims - 1) || 254 (static_cast<int>(Layout) == static_cast<int>(RowMajor) && 255 m_dim.actualDim() == 0)) { 256 cost += TensorOpCost::AddCost<Index>(); 257 } else { 258 cost += 3 * TensorOpCost::MulCost<Index>() + TensorOpCost::DivCost<Index>() + 259 3 * TensorOpCost::AddCost<Index>(); 260 } 261 262 return m_impl.costPerCoeff(vectorized) + 263 TensorOpCost(0, 0, cost, vectorized, PacketSize); 264 } 265 266 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { 267 CoeffReturnType* result = const_cast<CoeffReturnType*>(m_impl.data()); 268 if (((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumDims) || 269 (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) && 270 result) { 271 return result + m_inputOffset; 272 } else { 273 return NULL; 274 } 275 } 276 277 protected: 278 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const 279 { 280 Index inputIndex; 281 if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) || 282 (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) { 283 // m_stride is equal to 1, so let's avoid the integer division. 284 eigen_assert(m_stride == 1); 285 inputIndex = index * m_inputStride + m_inputOffset; 286 } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims-1) || 287 (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) { 288 // m_stride is aways greater than index, so let's avoid the integer division. 289 eigen_assert(m_stride > index); 290 inputIndex = index + m_inputOffset; 291 } else { 292 const Index idx = index / m_stride; 293 inputIndex = idx * m_inputStride + m_inputOffset; 294 index -= idx * m_stride; 295 inputIndex += index; 296 } 297 return inputIndex; 298 } 299 300 Dimensions m_dimensions; 301 Index m_stride; 302 Index m_inputOffset; 303 Index m_inputStride; 304 TensorEvaluator<ArgType, Device> m_impl; 305 const internal::DimensionId<DimId> m_dim; 306 const Device& m_device; 307 }; 308 309 310 // Eval as lvalue 311 template<DenseIndex DimId, typename ArgType, typename Device> 312 struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device> 313 : public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> 314 { 315 typedef TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> Base; 316 typedef TensorChippingOp<DimId, ArgType> XprType; 317 static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; 318 static const int NumDims = NumInputDims-1; 319 typedef typename XprType::Index Index; 320 typedef DSizes<Index, NumDims> Dimensions; 321 typedef typename XprType::Scalar Scalar; 322 typedef typename XprType::CoeffReturnType CoeffReturnType; 323 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 324 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; 325 326 enum { 327 IsAligned = false, 328 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 329 RawAccess = false 330 }; 331 332 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 333 : Base(op, device) 334 { } 335 336 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) 337 { 338 return this->m_impl.coeffRef(this->srcCoeff(index)); 339 } 340 341 template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 342 void writePacket(Index index, const PacketReturnType& x) 343 { 344 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) 345 346 if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && this->m_dim.actualDim() == 0) || 347 (static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && this->m_dim.actualDim() == NumInputDims-1)) { 348 // m_stride is equal to 1, so let's avoid the integer division. 349 eigen_assert(this->m_stride == 1); 350 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; 351 internal::pstore<CoeffReturnType, PacketReturnType>(values, x); 352 Index inputIndex = index * this->m_inputStride + this->m_inputOffset; 353 for (int i = 0; i < PacketSize; ++i) { 354 this->m_impl.coeffRef(inputIndex) = values[i]; 355 inputIndex += this->m_inputStride; 356 } 357 } else if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && this->m_dim.actualDim() == NumInputDims-1) || 358 (static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && this->m_dim.actualDim() == 0)) { 359 // m_stride is aways greater than index, so let's avoid the integer division. 360 eigen_assert(this->m_stride > index); 361 this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset, x); 362 } else { 363 const Index idx = index / this->m_stride; 364 const Index rem = index - idx * this->m_stride; 365 if (rem + PacketSize <= this->m_stride) { 366 const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem; 367 this->m_impl.template writePacket<StoreMode>(inputIndex, x); 368 } else { 369 // Cross stride boundary. Fallback to slow path. 370 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; 371 internal::pstore<CoeffReturnType, PacketReturnType>(values, x); 372 for (int i = 0; i < PacketSize; ++i) { 373 this->coeffRef(index) = values[i]; 374 ++index; 375 } 376 } 377 } 378 } 379 }; 380 381 382 } // end namespace Eigen 383 384 #endif // EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H 385