1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H 12 13 namespace Eigen { 14 15 /** \class TensorReshaping 16 * \ingroup CXX11_Tensor_Module 17 * 18 * \brief Tensor reshaping class. 19 * 20 * 21 */ 22 namespace internal { 23 template<typename NewDimensions, typename XprType> 24 struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType> 25 { 26 typedef typename XprType::Scalar Scalar; 27 typedef traits<XprType> XprTraits; 28 typedef typename XprTraits::StorageKind StorageKind; 29 typedef typename XprTraits::Index Index; 30 typedef typename XprType::Nested Nested; 31 typedef typename remove_reference<Nested>::type _Nested; 32 static const int NumDimensions = array_size<NewDimensions>::value; 33 static const int Layout = XprTraits::Layout; 34 }; 35 36 template<typename NewDimensions, typename XprType> 37 struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense> 38 { 39 typedef const TensorReshapingOp<NewDimensions, XprType>& type; 40 }; 41 42 template<typename NewDimensions, typename XprType> 43 struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type> 44 { 45 typedef TensorReshapingOp<NewDimensions, XprType> type; 46 }; 47 48 } // end namespace internal 49 50 51 52 template<typename NewDimensions, typename XprType> 53 class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors> 54 { 55 public: 56 typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar; 57 typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; 58 typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested; 59 typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind; 60 typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index; 61 62 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims) 63 : m_xpr(expr), m_dims(dims) {} 64 65 EIGEN_DEVICE_FUNC 66 const NewDimensions& dimensions() const { return m_dims; } 67 68 EIGEN_DEVICE_FUNC 69 const typename internal::remove_all<typename XprType::Nested>::type& 70 expression() const { return m_xpr; } 71 72 EIGEN_DEVICE_FUNC 73 EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const TensorReshapingOp& other) 74 { 75 typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign; 76 Assign assign(*this, other); 77 internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); 78 return *this; 79 } 80 81 template<typename OtherDerived> 82 EIGEN_DEVICE_FUNC 83 EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const OtherDerived& other) 84 { 85 typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign; 86 Assign assign(*this, other); 87 internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); 88 return *this; 89 } 90 91 protected: 92 typename XprType::Nested m_xpr; 93 const NewDimensions m_dims; 94 }; 95 96 97 // Eval as rvalue 98 template<typename NewDimensions, typename ArgType, typename Device> 99 struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> 100 { 101 typedef TensorReshapingOp<NewDimensions, ArgType> XprType; 102 typedef NewDimensions Dimensions; 103 104 enum { 105 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, 106 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 107 Layout = TensorEvaluator<ArgType, Device>::Layout, 108 CoordAccess = false, // to be implemented 109 RawAccess = TensorEvaluator<ArgType, Device>::RawAccess 110 }; 111 112 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 113 : m_impl(op.expression(), device), m_dimensions(op.dimensions()) 114 { 115 // The total size of the reshaped tensor must be equal to the total size 116 // of the input tensor. 117 eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions())); 118 } 119 120 typedef typename XprType::Index Index; 121 typedef typename XprType::Scalar Scalar; 122 typedef typename XprType::CoeffReturnType CoeffReturnType; 123 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 124 125 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } 126 127 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { 128 return m_impl.evalSubExprsIfNeeded(data); 129 } 130 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { 131 m_impl.cleanup(); 132 } 133 134 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const 135 { 136 return m_impl.coeff(index); 137 } 138 139 template<int LoadMode> 140 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const 141 { 142 return m_impl.template packet<LoadMode>(index); 143 } 144 145 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { 146 return m_impl.costPerCoeff(vectorized); 147 } 148 149 EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast<Scalar*>(m_impl.data()); } 150 151 EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } 152 153 protected: 154 TensorEvaluator<ArgType, Device> m_impl; 155 NewDimensions m_dimensions; 156 }; 157 158 159 // Eval as lvalue 160 template<typename NewDimensions, typename ArgType, typename Device> 161 struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device> 162 : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> 163 164 { 165 typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base; 166 typedef TensorReshapingOp<NewDimensions, ArgType> XprType; 167 typedef NewDimensions Dimensions; 168 169 enum { 170 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, 171 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 172 Layout = TensorEvaluator<ArgType, Device>::Layout, 173 CoordAccess = false, // to be implemented 174 RawAccess = TensorEvaluator<ArgType, Device>::RawAccess 175 }; 176 177 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 178 : Base(op, device) 179 { } 180 181 typedef typename XprType::Index Index; 182 typedef typename XprType::Scalar Scalar; 183 typedef typename XprType::CoeffReturnType CoeffReturnType; 184 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 185 186 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) 187 { 188 return this->m_impl.coeffRef(index); 189 } 190 template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 191 void writePacket(Index index, const PacketReturnType& x) 192 { 193 this->m_impl.template writePacket<StoreMode>(index, x); 194 } 195 }; 196 197 198 /** \class TensorSlicing 199 * \ingroup CXX11_Tensor_Module 200 * 201 * \brief Tensor slicing class. 202 * 203 * 204 */ 205 namespace internal { 206 template<typename StartIndices, typename Sizes, typename XprType> 207 struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType> 208 { 209 typedef typename XprType::Scalar Scalar; 210 typedef traits<XprType> XprTraits; 211 typedef typename XprTraits::StorageKind StorageKind; 212 typedef typename XprTraits::Index Index; 213 typedef typename XprType::Nested Nested; 214 typedef typename remove_reference<Nested>::type _Nested; 215 static const int NumDimensions = array_size<StartIndices>::value; 216 static const int Layout = XprTraits::Layout; 217 }; 218 219 template<typename StartIndices, typename Sizes, typename XprType> 220 struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense> 221 { 222 typedef const TensorSlicingOp<StartIndices, Sizes, XprType>& type; 223 }; 224 225 template<typename StartIndices, typename Sizes, typename XprType> 226 struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type> 227 { 228 typedef TensorSlicingOp<StartIndices, Sizes, XprType> type; 229 }; 230 231 } // end namespace internal 232 233 234 235 template<typename StartIndices, typename Sizes, typename XprType> 236 class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> > 237 { 238 public: 239 typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar; 240 typedef typename XprType::CoeffReturnType CoeffReturnType; 241 typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested; 242 typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind; 243 typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index; 244 245 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes) 246 : m_xpr(expr), m_indices(indices), m_sizes(sizes) {} 247 248 EIGEN_DEVICE_FUNC 249 const StartIndices& startIndices() const { return m_indices; } 250 EIGEN_DEVICE_FUNC 251 const Sizes& sizes() const { return m_sizes; } 252 253 EIGEN_DEVICE_FUNC 254 const typename internal::remove_all<typename XprType::Nested>::type& 255 expression() const { return m_xpr; } 256 257 template<typename OtherDerived> 258 EIGEN_DEVICE_FUNC 259 EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const OtherDerived& other) 260 { 261 typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign; 262 Assign assign(*this, other); 263 internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); 264 return *this; 265 } 266 267 EIGEN_DEVICE_FUNC 268 EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const TensorSlicingOp& other) 269 { 270 typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign; 271 Assign assign(*this, other); 272 internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice()); 273 return *this; 274 } 275 276 277 protected: 278 typename XprType::Nested m_xpr; 279 const StartIndices m_indices; 280 const Sizes m_sizes; 281 }; 282 283 284 // Fixme: figure out the exact threshold 285 namespace { 286 template <typename Index, typename Device> struct MemcpyTriggerForSlicing { 287 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { } 288 EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > threshold_; } 289 290 private: 291 Index threshold_; 292 }; 293 294 // It is very expensive to start the memcpy kernel on GPU: we therefore only 295 // use it for large copies. 296 #ifdef EIGEN_USE_GPU 297 template <typename Index> struct MemcpyTriggerForSlicing<Index, GpuDevice> { 298 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { } 299 EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; } 300 }; 301 #endif 302 } 303 304 // Eval as rvalue 305 template<typename StartIndices, typename Sizes, typename ArgType, typename Device> 306 struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> 307 { 308 typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType; 309 static const int NumDims = internal::array_size<Sizes>::value; 310 311 enum { 312 // Alignment can't be guaranteed at compile time since it depends on the 313 // slice offsets and sizes. 314 IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, 315 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 316 Layout = TensorEvaluator<ArgType, Device>::Layout, 317 CoordAccess = false, 318 RawAccess = false 319 }; 320 321 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 322 : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices()) 323 { 324 for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) { 325 eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]); 326 } 327 328 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); 329 const Sizes& output_dims = op.sizes(); 330 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 331 m_inputStrides[0] = 1; 332 for (int i = 1; i < NumDims; ++i) { 333 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; 334 } 335 336 // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed. 337 m_outputStrides[0] = 1; 338 for (int i = 1; i < NumDims; ++i) { 339 m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1]; 340 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]); 341 } 342 } else { 343 m_inputStrides[NumDims-1] = 1; 344 for (int i = NumDims - 2; i >= 0; --i) { 345 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; 346 } 347 348 // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed. 349 m_outputStrides[NumDims-1] = 1; 350 for (int i = NumDims - 2; i >= 0; --i) { 351 m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1]; 352 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]); 353 } 354 } 355 } 356 357 typedef typename XprType::Index Index; 358 typedef typename XprType::Scalar Scalar; 359 typedef typename XprType::CoeffReturnType CoeffReturnType; 360 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 361 typedef Sizes Dimensions; 362 363 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } 364 365 366 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { 367 m_impl.evalSubExprsIfNeeded(NULL); 368 if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data && m_impl.data()) { 369 Index contiguous_values = 1; 370 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 371 for (int i = 0; i < NumDims; ++i) { 372 contiguous_values *= dimensions()[i]; 373 if (dimensions()[i] != m_impl.dimensions()[i]) { 374 break; 375 } 376 } 377 } else { 378 for (int i = NumDims-1; i >= 0; --i) { 379 contiguous_values *= dimensions()[i]; 380 if (dimensions()[i] != m_impl.dimensions()[i]) { 381 break; 382 } 383 } 384 } 385 // Use memcpy if it's going to be faster than using the regular evaluation. 386 const MemcpyTriggerForSlicing<Index, Device> trigger(m_device); 387 if (trigger(contiguous_values)) { 388 Scalar* src = (Scalar*)m_impl.data(); 389 for (int i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) { 390 Index offset = srcCoeff(i); 391 m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar)); 392 } 393 return false; 394 } 395 } 396 return true; 397 } 398 399 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { 400 m_impl.cleanup(); 401 } 402 403 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const 404 { 405 return m_impl.coeff(srcCoeff(index)); 406 } 407 408 template<int LoadMode> 409 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const 410 { 411 const int packetSize = internal::unpacket_traits<PacketReturnType>::size; 412 EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) 413 eigen_assert(index+packetSize-1 < internal::array_prod(dimensions())); 414 415 Index inputIndices[] = {0, 0}; 416 Index indices[] = {index, index + packetSize - 1}; 417 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 418 for (int i = NumDims - 1; i > 0; --i) { 419 const Index idx0 = indices[0] / m_fastOutputStrides[i]; 420 const Index idx1 = indices[1] / m_fastOutputStrides[i]; 421 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; 422 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; 423 indices[0] -= idx0 * m_outputStrides[i]; 424 indices[1] -= idx1 * m_outputStrides[i]; 425 } 426 inputIndices[0] += (indices[0] + m_offsets[0]); 427 inputIndices[1] += (indices[1] + m_offsets[0]); 428 } else { 429 for (int i = 0; i < NumDims - 1; ++i) { 430 const Index idx0 = indices[0] / m_fastOutputStrides[i]; 431 const Index idx1 = indices[1] / m_fastOutputStrides[i]; 432 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; 433 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; 434 indices[0] -= idx0 * m_outputStrides[i]; 435 indices[1] -= idx1 * m_outputStrides[i]; 436 } 437 inputIndices[0] += (indices[0] + m_offsets[NumDims-1]); 438 inputIndices[1] += (indices[1] + m_offsets[NumDims-1]); 439 } 440 if (inputIndices[1] - inputIndices[0] == packetSize - 1) { 441 PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]); 442 return rslt; 443 } 444 else { 445 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize]; 446 values[0] = m_impl.coeff(inputIndices[0]); 447 values[packetSize-1] = m_impl.coeff(inputIndices[1]); 448 for (int i = 1; i < packetSize-1; ++i) { 449 values[i] = coeff(index+i); 450 } 451 PacketReturnType rslt = internal::pload<PacketReturnType>(values); 452 return rslt; 453 } 454 } 455 456 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { 457 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims); 458 } 459 460 461 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { 462 Scalar* result = m_impl.data(); 463 if (result) { 464 Index offset = 0; 465 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 466 for (int i = 0; i < NumDims; ++i) { 467 if (m_dimensions[i] != m_impl.dimensions()[i]) { 468 offset += m_offsets[i] * m_inputStrides[i]; 469 for (int j = i+1; j < NumDims; ++j) { 470 if (m_dimensions[j] > 1) { 471 return NULL; 472 } 473 offset += m_offsets[j] * m_inputStrides[j]; 474 } 475 break; 476 } 477 } 478 } else { 479 for (int i = NumDims - 1; i >= 0; --i) { 480 if (m_dimensions[i] != m_impl.dimensions()[i]) { 481 offset += m_offsets[i] * m_inputStrides[i]; 482 for (int j = i-1; j >= 0; --j) { 483 if (m_dimensions[j] > 1) { 484 return NULL; 485 } 486 offset += m_offsets[j] * m_inputStrides[j]; 487 } 488 break; 489 } 490 } 491 } 492 return result + offset; 493 } 494 return NULL; 495 } 496 497 protected: 498 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const 499 { 500 Index inputIndex = 0; 501 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 502 for (int i = NumDims - 1; i > 0; --i) { 503 const Index idx = index / m_fastOutputStrides[i]; 504 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; 505 index -= idx * m_outputStrides[i]; 506 } 507 inputIndex += (index + m_offsets[0]); 508 } else { 509 for (int i = 0; i < NumDims - 1; ++i) { 510 const Index idx = index / m_fastOutputStrides[i]; 511 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; 512 index -= idx * m_outputStrides[i]; 513 } 514 inputIndex += (index + m_offsets[NumDims-1]); 515 } 516 return inputIndex; 517 } 518 519 array<Index, NumDims> m_outputStrides; 520 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides; 521 array<Index, NumDims> m_inputStrides; 522 TensorEvaluator<ArgType, Device> m_impl; 523 const Device& m_device; 524 Dimensions m_dimensions; 525 const StartIndices m_offsets; 526 }; 527 528 529 // Eval as lvalue 530 template<typename StartIndices, typename Sizes, typename ArgType, typename Device> 531 struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> 532 : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> 533 { 534 typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base; 535 typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType; 536 static const int NumDims = internal::array_size<Sizes>::value; 537 538 enum { 539 IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, 540 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 541 Layout = TensorEvaluator<ArgType, Device>::Layout, 542 CoordAccess = false, 543 RawAccess = false 544 }; 545 546 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 547 : Base(op, device) 548 { } 549 550 typedef typename XprType::Index Index; 551 typedef typename XprType::Scalar Scalar; 552 typedef typename XprType::CoeffReturnType CoeffReturnType; 553 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 554 typedef Sizes Dimensions; 555 556 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) 557 { 558 return this->m_impl.coeffRef(this->srcCoeff(index)); 559 } 560 561 template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 562 void writePacket(Index index, const PacketReturnType& x) 563 { 564 const int packetSize = internal::unpacket_traits<PacketReturnType>::size; 565 Index inputIndices[] = {0, 0}; 566 Index indices[] = {index, index + packetSize - 1}; 567 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 568 for (int i = NumDims - 1; i > 0; --i) { 569 const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; 570 const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; 571 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i]; 572 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i]; 573 indices[0] -= idx0 * this->m_outputStrides[i]; 574 indices[1] -= idx1 * this->m_outputStrides[i]; 575 } 576 inputIndices[0] += (indices[0] + this->m_offsets[0]); 577 inputIndices[1] += (indices[1] + this->m_offsets[0]); 578 } else { 579 for (int i = 0; i < NumDims - 1; ++i) { 580 const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; 581 const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; 582 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i]; 583 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i]; 584 indices[0] -= idx0 * this->m_outputStrides[i]; 585 indices[1] -= idx1 * this->m_outputStrides[i]; 586 } 587 inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]); 588 inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]); 589 } 590 if (inputIndices[1] - inputIndices[0] == packetSize - 1) { 591 this->m_impl.template writePacket<StoreMode>(inputIndices[0], x); 592 } 593 else { 594 EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; 595 internal::pstore<CoeffReturnType, PacketReturnType>(values, x); 596 this->m_impl.coeffRef(inputIndices[0]) = values[0]; 597 this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1]; 598 for (int i = 1; i < packetSize-1; ++i) { 599 this->coeffRef(index+i) = values[i]; 600 } 601 } 602 } 603 }; 604 605 606 607 namespace internal { 608 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> 609 struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > : public traits<XprType> 610 { 611 typedef typename XprType::Scalar Scalar; 612 typedef traits<XprType> XprTraits; 613 typedef typename XprTraits::StorageKind StorageKind; 614 typedef typename XprTraits::Index Index; 615 typedef typename XprType::Nested Nested; 616 typedef typename remove_reference<Nested>::type _Nested; 617 static const int NumDimensions = array_size<StartIndices>::value; 618 static const int Layout = XprTraits::Layout; 619 }; 620 621 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> 622 struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense> 623 { 624 typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>& type; 625 }; 626 627 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> 628 struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type> 629 { 630 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type; 631 }; 632 633 } // end namespace internal 634 635 636 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> 637 class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > 638 { 639 public: 640 typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar; 641 typedef typename XprType::CoeffReturnType CoeffReturnType; 642 typedef typename internal::nested<TensorStridingSlicingOp>::type Nested; 643 typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind; 644 typedef typename internal::traits<TensorStridingSlicingOp>::Index Index; 645 646 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp( 647 const XprType& expr, const StartIndices& startIndices, 648 const StopIndices& stopIndices, const Strides& strides) 649 : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices), 650 m_strides(strides) {} 651 652 EIGEN_DEVICE_FUNC 653 const StartIndices& startIndices() const { return m_startIndices; } 654 EIGEN_DEVICE_FUNC 655 const StartIndices& stopIndices() const { return m_stopIndices; } 656 EIGEN_DEVICE_FUNC 657 const StartIndices& strides() const { return m_strides; } 658 659 EIGEN_DEVICE_FUNC 660 const typename internal::remove_all<typename XprType::Nested>::type& 661 expression() const { return m_xpr; } 662 663 EIGEN_DEVICE_FUNC 664 EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const TensorStridingSlicingOp& other) 665 { 666 typedef TensorAssignOp<TensorStridingSlicingOp, const TensorStridingSlicingOp> Assign; 667 Assign assign(*this, other); 668 internal::TensorExecutor<const Assign, DefaultDevice>::run( 669 assign, DefaultDevice()); 670 return *this; 671 } 672 673 template<typename OtherDerived> 674 EIGEN_DEVICE_FUNC 675 EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const OtherDerived& other) 676 { 677 typedef TensorAssignOp<TensorStridingSlicingOp, const OtherDerived> Assign; 678 Assign assign(*this, other); 679 internal::TensorExecutor<const Assign, DefaultDevice>::run( 680 assign, DefaultDevice()); 681 return *this; 682 } 683 684 protected: 685 typename XprType::Nested m_xpr; 686 const StartIndices m_startIndices; 687 const StopIndices m_stopIndices; 688 const Strides m_strides; 689 }; 690 691 // Eval as rvalue 692 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device> 693 struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> 694 { 695 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType; 696 static const int NumDims = internal::array_size<Strides>::value; 697 698 enum { 699 // Alignment can't be guaranteed at compile time since it depends on the 700 // slice offsets and sizes. 701 IsAligned = false, 702 PacketAccess = false, 703 BlockAccess = false, 704 Layout = TensorEvaluator<ArgType, Device>::Layout, 705 RawAccess = false 706 }; 707 708 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 709 : m_impl(op.expression(), device), m_device(device), m_strides(op.strides()) 710 { 711 // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero 712 DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped; 713 for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) { 714 eigen_assert(m_strides[i] != 0 && "0 stride is invalid"); 715 if(m_strides[i]>0){ 716 startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]); 717 stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]); 718 }else{ 719 /* implies m_strides[i]<0 by assert */ 720 startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1); 721 stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1); 722 } 723 m_startIndices[i] = startIndicesClamped[i]; 724 } 725 726 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); 727 728 // check for degenerate intervals and compute output tensor shape 729 bool degenerate = false;; 730 for(int i = 0; i < NumDims; i++){ 731 Index interval = stopIndicesClamped[i] - startIndicesClamped[i]; 732 if(interval == 0 || ((interval<0) != (m_strides[i]<0))){ 733 m_dimensions[i] = 0; 734 degenerate = true; 735 }else{ 736 m_dimensions[i] = interval / m_strides[i] 737 + (interval % m_strides[i] != 0 ? 1 : 0); 738 eigen_assert(m_dimensions[i] >= 0); 739 } 740 } 741 Strides output_dims = m_dimensions; 742 743 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 744 m_inputStrides[0] = m_strides[0]; 745 m_offsets[0] = startIndicesClamped[0]; 746 Index previousDimProduct = 1; 747 for (int i = 1; i < NumDims; ++i) { 748 previousDimProduct *= input_dims[i-1]; 749 m_inputStrides[i] = previousDimProduct * m_strides[i]; 750 m_offsets[i] = startIndicesClamped[i] * previousDimProduct; 751 } 752 753 // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed. 754 m_outputStrides[0] = 1; 755 for (int i = 1; i < NumDims; ++i) { 756 m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1]; 757 // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash 758 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]); 759 } 760 } else { 761 m_inputStrides[NumDims-1] = m_strides[NumDims-1]; 762 m_offsets[NumDims-1] = startIndicesClamped[NumDims-1]; 763 Index previousDimProduct = 1; 764 for (int i = NumDims - 2; i >= 0; --i) { 765 previousDimProduct *= input_dims[i+1]; 766 m_inputStrides[i] = previousDimProduct * m_strides[i]; 767 m_offsets[i] = startIndicesClamped[i] * previousDimProduct; 768 } 769 770 m_outputStrides[NumDims-1] = 1; 771 for (int i = NumDims - 2; i >= 0; --i) { 772 m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1]; 773 // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash 774 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]); 775 } 776 } 777 m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1), 778 device.lastLevelCacheSize() / 779 sizeof(Scalar)); 780 } 781 782 typedef typename XprType::Index Index; 783 typedef typename XprType::Scalar Scalar; 784 typedef typename internal::remove_const<Scalar>::type ScalarNonConst; 785 typedef typename XprType::CoeffReturnType CoeffReturnType; 786 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 787 typedef Strides Dimensions; 788 789 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } 790 791 792 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { 793 m_impl.evalSubExprsIfNeeded(NULL); 794 return true; 795 } 796 797 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { 798 m_impl.cleanup(); 799 } 800 801 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const 802 { 803 return m_impl.coeff(srcCoeff(index)); 804 } 805 806 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { 807 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims); 808 } 809 810 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { 811 return NULL; 812 } 813 814 protected: 815 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const 816 { 817 Index inputIndex = 0; 818 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 819 for (int i = NumDims - 1; i >= 0; --i) { 820 const Index idx = index / m_fastOutputStrides[i]; 821 inputIndex += idx * m_inputStrides[i] + m_offsets[i]; 822 index -= idx * m_outputStrides[i]; 823 } 824 } else { 825 for (int i = 0; i < NumDims; ++i) { 826 const Index idx = index / m_fastOutputStrides[i]; 827 inputIndex += idx * m_inputStrides[i] + m_offsets[i]; 828 index -= idx * m_outputStrides[i]; 829 } 830 } 831 return inputIndex; 832 } 833 834 static EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) { 835 return numext::maxi(min, numext::mini(max,value)); 836 } 837 838 array<Index, NumDims> m_outputStrides; 839 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides; 840 array<Index, NumDims> m_inputStrides; 841 TensorEvaluator<ArgType, Device> m_impl; 842 const Device& m_device; 843 DSizes<Index, NumDims> m_startIndices; // clamped startIndices 844 DSizes<Index, NumDims> m_dimensions; 845 DSizes<Index, NumDims> m_offsets; // offset in a flattened shape 846 const Strides m_strides; 847 std::size_t m_block_total_size_max; 848 }; 849 850 // Eval as lvalue 851 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device> 852 struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> 853 : public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> 854 { 855 typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base; 856 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType; 857 static const int NumDims = internal::array_size<Strides>::value; 858 859 enum { 860 IsAligned = false, 861 PacketAccess = false, 862 BlockAccess = false, 863 Layout = TensorEvaluator<ArgType, Device>::Layout, 864 CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess, 865 RawAccess = false 866 }; 867 868 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 869 : Base(op, device) 870 { } 871 872 typedef typename XprType::Index Index; 873 typedef typename XprType::Scalar Scalar; 874 typedef typename internal::remove_const<Scalar>::type ScalarNonConst; 875 typedef typename XprType::CoeffReturnType CoeffReturnType; 876 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 877 typedef Strides Dimensions; 878 879 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) 880 { 881 return this->m_impl.coeffRef(this->srcCoeff(index)); 882 } 883 }; 884 885 886 } // end namespace Eigen 887 888 #endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H 889