1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H 12 13 namespace Eigen { 14 15 /** \class TensorBroadcasting 16 * \ingroup CXX11_Tensor_Module 17 * 18 * \brief Tensor broadcasting class. 19 * 20 * 21 */ 22 namespace internal { 23 template<typename Broadcast, typename XprType> 24 struct traits<TensorBroadcastingOp<Broadcast, XprType> > : public traits<XprType> 25 { 26 typedef typename XprType::Scalar Scalar; 27 typedef traits<XprType> XprTraits; 28 typedef typename XprTraits::StorageKind StorageKind; 29 typedef typename XprTraits::Index Index; 30 typedef typename XprType::Nested Nested; 31 typedef typename remove_reference<Nested>::type _Nested; 32 static const int NumDimensions = XprTraits::NumDimensions; 33 static const int Layout = XprTraits::Layout; 34 }; 35 36 template<typename Broadcast, typename XprType> 37 struct eval<TensorBroadcastingOp<Broadcast, XprType>, Eigen::Dense> 38 { 39 typedef const TensorBroadcastingOp<Broadcast, XprType>& type; 40 }; 41 42 template<typename Broadcast, typename XprType> 43 struct nested<TensorBroadcastingOp<Broadcast, XprType>, 1, typename eval<TensorBroadcastingOp<Broadcast, XprType> >::type> 44 { 45 typedef TensorBroadcastingOp<Broadcast, XprType> type; 46 }; 47 48 template <typename Dims> 49 struct is_input_scalar { 50 static const bool value = false; 51 }; 52 template <> 53 struct is_input_scalar<Sizes<> > { 54 static const bool value = true; 55 }; 56 #ifndef EIGEN_EMULATE_CXX11_META_H 57 template <typename std::size_t... Indices> 58 struct is_input_scalar<Sizes<Indices...> > { 59 static const bool value = (Sizes<Indices...>::total_size == 1); 60 }; 61 #endif 62 63 } // end namespace internal 64 65 66 67 template<typename Broadcast, typename XprType> 68 class TensorBroadcastingOp : public TensorBase<TensorBroadcastingOp<Broadcast, XprType>, ReadOnlyAccessors> 69 { 70 public: 71 typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Scalar Scalar; 72 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; 73 typedef typename XprType::CoeffReturnType CoeffReturnType; 74 typedef typename Eigen::internal::nested<TensorBroadcastingOp>::type Nested; 75 typedef typename Eigen::internal::traits<TensorBroadcastingOp>::StorageKind StorageKind; 76 typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Index Index; 77 78 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBroadcastingOp(const XprType& expr, const Broadcast& broadcast) 79 : m_xpr(expr), m_broadcast(broadcast) {} 80 81 EIGEN_DEVICE_FUNC 82 const Broadcast& broadcast() const { return m_broadcast; } 83 84 EIGEN_DEVICE_FUNC 85 const typename internal::remove_all<typename XprType::Nested>::type& 86 expression() const { return m_xpr; } 87 88 protected: 89 typename XprType::Nested m_xpr; 90 const Broadcast m_broadcast; 91 }; 92 93 94 // Eval as rvalue 95 template<typename Broadcast, typename ArgType, typename Device> 96 struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> 97 { 98 typedef TensorBroadcastingOp<Broadcast, ArgType> XprType; 99 typedef typename XprType::Index Index; 100 static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; 101 typedef DSizes<Index, NumDims> Dimensions; 102 typedef typename XprType::Scalar Scalar; 103 typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions; 104 typedef typename XprType::CoeffReturnType CoeffReturnType; 105 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 106 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; 107 108 enum { 109 IsAligned = true, 110 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 111 Layout = TensorEvaluator<ArgType, Device>::Layout, 112 RawAccess = false 113 }; 114 115 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 116 : m_broadcast(op.broadcast()),m_impl(op.expression(), device) 117 { 118 // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar 119 // and store the result in a scalar. Instead one should reshape the scalar into a a N-D 120 // tensor with N >= 1 of 1 element first and then broadcast. 121 EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); 122 const InputDimensions& input_dims = m_impl.dimensions(); 123 const Broadcast& broadcast = op.broadcast(); 124 for (int i = 0; i < NumDims; ++i) { 125 eigen_assert(input_dims[i] > 0); 126 m_dimensions[i] = input_dims[i] * broadcast[i]; 127 } 128 129 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 130 m_inputStrides[0] = 1; 131 m_outputStrides[0] = 1; 132 for (int i = 1; i < NumDims; ++i) { 133 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; 134 m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; 135 } 136 } else { 137 m_inputStrides[NumDims-1] = 1; 138 m_outputStrides[NumDims-1] = 1; 139 for (int i = NumDims-2; i >= 0; --i) { 140 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; 141 m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; 142 } 143 } 144 } 145 146 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } 147 148 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { 149 m_impl.evalSubExprsIfNeeded(NULL); 150 return true; 151 } 152 153 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { 154 m_impl.cleanup(); 155 } 156 157 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const 158 { 159 if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) { 160 return m_impl.coeff(0); 161 } 162 163 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 164 return coeffColMajor(index); 165 } else { 166 return coeffRowMajor(index); 167 } 168 } 169 170 // TODO: attempt to speed this up. The integer divisions and modulo are slow 171 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffColMajor(Index index) const 172 { 173 Index inputIndex = 0; 174 for (int i = NumDims - 1; i > 0; --i) { 175 const Index idx = index / m_outputStrides[i]; 176 if (internal::index_statically_eq<Broadcast>(i, 1)) { 177 eigen_assert(idx < m_impl.dimensions()[i]); 178 inputIndex += idx * m_inputStrides[i]; 179 } else { 180 if (internal::index_statically_eq<InputDimensions>(i, 1)) { 181 eigen_assert(idx % m_impl.dimensions()[i] == 0); 182 } else { 183 inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; 184 } 185 } 186 index -= idx * m_outputStrides[i]; 187 } 188 if (internal::index_statically_eq<Broadcast>(0, 1)) { 189 eigen_assert(index < m_impl.dimensions()[0]); 190 inputIndex += index; 191 } else { 192 if (internal::index_statically_eq<InputDimensions>(0, 1)) { 193 eigen_assert(index % m_impl.dimensions()[0] == 0); 194 } else { 195 inputIndex += (index % m_impl.dimensions()[0]); 196 } 197 } 198 return m_impl.coeff(inputIndex); 199 } 200 201 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffRowMajor(Index index) const 202 { 203 Index inputIndex = 0; 204 for (int i = 0; i < NumDims - 1; ++i) { 205 const Index idx = index / m_outputStrides[i]; 206 if (internal::index_statically_eq<Broadcast>(i, 1)) { 207 eigen_assert(idx < m_impl.dimensions()[i]); 208 inputIndex += idx * m_inputStrides[i]; 209 } else { 210 if (internal::index_statically_eq<InputDimensions>(i, 1)) { 211 eigen_assert(idx % m_impl.dimensions()[i] == 0); 212 } else { 213 inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; 214 } 215 } 216 index -= idx * m_outputStrides[i]; 217 } 218 if (internal::index_statically_eq<Broadcast>(NumDims-1, 1)) { 219 eigen_assert(index < m_impl.dimensions()[NumDims-1]); 220 inputIndex += index; 221 } else { 222 if (internal::index_statically_eq<InputDimensions>(NumDims-1, 1)) { 223 eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); 224 } else { 225 inputIndex += (index % m_impl.dimensions()[NumDims-1]); 226 } 227 } 228 return m_impl.coeff(inputIndex); 229 } 230 231 template<int LoadMode> 232 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const 233 { 234 if (internal::is_input_scalar<typename internal::remove_all<InputDimensions>::type>::value) { 235 return internal::pset1<PacketReturnType>(m_impl.coeff(0)); 236 } 237 238 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 239 return packetColMajor<LoadMode>(index); 240 } else { 241 return packetRowMajor<LoadMode>(index); 242 } 243 } 244 245 // Ignore the LoadMode and always use unaligned loads since we can't guarantee 246 // the alignment at compile time. 247 template<int LoadMode> 248 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const 249 { 250 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) 251 eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); 252 253 const Index originalIndex = index; 254 255 Index inputIndex = 0; 256 for (int i = NumDims - 1; i > 0; --i) { 257 const Index idx = index / m_outputStrides[i]; 258 if (internal::index_statically_eq<Broadcast>(i, 1)) { 259 eigen_assert(idx < m_impl.dimensions()[i]); 260 inputIndex += idx * m_inputStrides[i]; 261 } else { 262 if (internal::index_statically_eq<InputDimensions>(i, 1)) { 263 eigen_assert(idx % m_impl.dimensions()[i] == 0); 264 } else { 265 inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; 266 } 267 } 268 index -= idx * m_outputStrides[i]; 269 } 270 Index innermostLoc; 271 if (internal::index_statically_eq<Broadcast>(0, 1)) { 272 eigen_assert(index < m_impl.dimensions()[0]); 273 innermostLoc = index; 274 } else { 275 if (internal::index_statically_eq<InputDimensions>(0, 1)) { 276 eigen_assert(index % m_impl.dimensions()[0] == 0); 277 innermostLoc = 0; 278 } else { 279 innermostLoc = index % m_impl.dimensions()[0]; 280 } 281 } 282 inputIndex += innermostLoc; 283 284 // Todo: this could be extended to the second dimension if we're not 285 // broadcasting alongside the first dimension, and so on. 286 if (innermostLoc + PacketSize <= m_impl.dimensions()[0]) { 287 return m_impl.template packet<Unaligned>(inputIndex); 288 } else { 289 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; 290 values[0] = m_impl.coeff(inputIndex); 291 for (int i = 1; i < PacketSize; ++i) { 292 values[i] = coeffColMajor(originalIndex+i); 293 } 294 PacketReturnType rslt = internal::pload<PacketReturnType>(values); 295 return rslt; 296 } 297 } 298 299 template<int LoadMode> 300 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const 301 { 302 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) 303 eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); 304 305 const Index originalIndex = index; 306 307 Index inputIndex = 0; 308 for (int i = 0; i < NumDims - 1; ++i) { 309 const Index idx = index / m_outputStrides[i]; 310 if (internal::index_statically_eq<Broadcast>(i, 1)) { 311 eigen_assert(idx < m_impl.dimensions()[i]); 312 inputIndex += idx * m_inputStrides[i]; 313 } else { 314 if (internal::index_statically_eq<InputDimensions>(i, 1)) { 315 eigen_assert(idx % m_impl.dimensions()[i] == 0); 316 } else { 317 inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; 318 } 319 } 320 index -= idx * m_outputStrides[i]; 321 } 322 Index innermostLoc; 323 if (internal::index_statically_eq<Broadcast>(NumDims-1, 1)) { 324 eigen_assert(index < m_impl.dimensions()[NumDims-1]); 325 innermostLoc = index; 326 } else { 327 if (internal::index_statically_eq<InputDimensions>(NumDims-1, 1)) { 328 eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); 329 innermostLoc = 0; 330 } else { 331 innermostLoc = index % m_impl.dimensions()[NumDims-1]; 332 } 333 } 334 inputIndex += innermostLoc; 335 336 // Todo: this could be extended to the second dimension if we're not 337 // broadcasting alongside the first dimension, and so on. 338 if (innermostLoc + PacketSize <= m_impl.dimensions()[NumDims-1]) { 339 return m_impl.template packet<Unaligned>(inputIndex); 340 } else { 341 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; 342 values[0] = m_impl.coeff(inputIndex); 343 for (int i = 1; i < PacketSize; ++i) { 344 values[i] = coeffRowMajor(originalIndex+i); 345 } 346 PacketReturnType rslt = internal::pload<PacketReturnType>(values); 347 return rslt; 348 } 349 } 350 351 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost 352 costPerCoeff(bool vectorized) const { 353 double compute_cost = TensorOpCost::AddCost<Index>(); 354 if (NumDims > 0) { 355 for (int i = NumDims - 1; i > 0; --i) { 356 compute_cost += TensorOpCost::DivCost<Index>(); 357 if (internal::index_statically_eq<Broadcast>(i, 1)) { 358 compute_cost += 359 TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>(); 360 } else { 361 if (!internal::index_statically_eq<InputDimensions>(i, 1)) { 362 compute_cost += TensorOpCost::MulCost<Index>() + 363 TensorOpCost::ModCost<Index>() + 364 TensorOpCost::AddCost<Index>(); 365 } 366 } 367 compute_cost += 368 TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>(); 369 } 370 } 371 return m_impl.costPerCoeff(vectorized) + 372 TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); 373 } 374 375 EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } 376 377 const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } 378 379 Broadcast functor() const { return m_broadcast; } 380 381 protected: 382 const Broadcast m_broadcast; 383 Dimensions m_dimensions; 384 array<Index, NumDims> m_outputStrides; 385 array<Index, NumDims> m_inputStrides; 386 TensorEvaluator<ArgType, Device> m_impl; 387 }; 388 389 390 } // end namespace Eigen 391 392 #endif // EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H 393