1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> 5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> 6 // 7 // This Source Code Form is subject to the terms of the Mozilla 8 // Public License v. 2.0. If a copy of the MPL was not distributed 9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 11 #ifndef EIGEN_REDUX_H 12 #define EIGEN_REDUX_H 13 14 namespace Eigen { 15 16 namespace internal { 17 18 // TODO 19 // * implement other kind of vectorization 20 // * factorize code 21 22 /*************************************************************************** 23 * Part 1 : the logic deciding a strategy for vectorization and unrolling 24 ***************************************************************************/ 25 26 template<typename Func, typename Derived> 27 struct redux_traits 28 { 29 public: 30 typedef typename find_best_packet<typename Derived::Scalar,Derived::SizeAtCompileTime>::type PacketType; 31 enum { 32 PacketSize = unpacket_traits<PacketType>::size, 33 InnerMaxSize = int(Derived::IsRowMajor) 34 ? Derived::MaxColsAtCompileTime 35 : Derived::MaxRowsAtCompileTime 36 }; 37 38 enum { 39 MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit) 40 && (functor_traits<Func>::PacketAccess), 41 MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit), 42 MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize 43 }; 44 45 public: 46 enum { 47 Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal) 48 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) 49 : int(DefaultTraversal) 50 }; 51 52 public: 53 enum { 54 Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost 55 : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost, 56 UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) 57 }; 58 59 public: 60 enum { 61 Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling 62 }; 63 64 #ifdef EIGEN_DEBUG_ASSIGN debugredux_traits65 static void debug() 66 { 67 std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl; 68 std::cerr.setf(std::ios::hex, std::ios::basefield); 69 EIGEN_DEBUG_VAR(Derived::Flags) 70 std::cerr.unsetf(std::ios::hex); 71 EIGEN_DEBUG_VAR(InnerMaxSize) 72 EIGEN_DEBUG_VAR(PacketSize) 73 EIGEN_DEBUG_VAR(MightVectorize) 74 EIGEN_DEBUG_VAR(MayLinearVectorize) 75 EIGEN_DEBUG_VAR(MaySliceVectorize) 76 EIGEN_DEBUG_VAR(Traversal) 77 EIGEN_DEBUG_VAR(UnrollingLimit) 78 EIGEN_DEBUG_VAR(Unrolling) 79 std::cerr << std::endl; 80 } 81 #endif 82 }; 83 84 /*************************************************************************** 85 * Part 2 : unrollers 86 ***************************************************************************/ 87 88 /*** no vectorization ***/ 89 90 template<typename Func, typename Derived, int Start, int Length> 91 struct redux_novec_unroller 92 { 93 enum { 94 HalfLength = Length/2 95 }; 96 97 typedef typename Derived::Scalar Scalar; 98 99 EIGEN_DEVICE_FUNC runredux_novec_unroller100 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) 101 { 102 return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), 103 redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func)); 104 } 105 }; 106 107 template<typename Func, typename Derived, int Start> 108 struct redux_novec_unroller<Func, Derived, Start, 1> 109 { 110 enum { 111 outer = Start / Derived::InnerSizeAtCompileTime, 112 inner = Start % Derived::InnerSizeAtCompileTime 113 }; 114 115 typedef typename Derived::Scalar Scalar; 116 117 EIGEN_DEVICE_FUNC 118 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&) 119 { 120 return mat.coeffByOuterInner(outer, inner); 121 } 122 }; 123 124 // This is actually dead code and will never be called. It is required 125 // to prevent false warnings regarding failed inlining though 126 // for 0 length run() will never be called at all. 127 template<typename Func, typename Derived, int Start> 128 struct redux_novec_unroller<Func, Derived, Start, 0> 129 { 130 typedef typename Derived::Scalar Scalar; 131 EIGEN_DEVICE_FUNC 132 static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); } 133 }; 134 135 /*** vectorization ***/ 136 137 template<typename Func, typename Derived, int Start, int Length> 138 struct redux_vec_unroller 139 { 140 enum { 141 PacketSize = redux_traits<Func, Derived>::PacketSize, 142 HalfLength = Length/2 143 }; 144 145 typedef typename Derived::Scalar Scalar; 146 typedef typename redux_traits<Func, Derived>::PacketType PacketScalar; 147 148 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func) 149 { 150 return func.packetOp( 151 redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), 152 redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) ); 153 } 154 }; 155 156 template<typename Func, typename Derived, int Start> 157 struct redux_vec_unroller<Func, Derived, Start, 1> 158 { 159 enum { 160 index = Start * redux_traits<Func, Derived>::PacketSize, 161 outer = index / int(Derived::InnerSizeAtCompileTime), 162 inner = index % int(Derived::InnerSizeAtCompileTime), 163 alignment = Derived::Alignment 164 }; 165 166 typedef typename Derived::Scalar Scalar; 167 typedef typename redux_traits<Func, Derived>::PacketType PacketScalar; 168 169 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&) 170 { 171 return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner); 172 } 173 }; 174 175 /*************************************************************************** 176 * Part 3 : implementation of all cases 177 ***************************************************************************/ 178 179 template<typename Func, typename Derived, 180 int Traversal = redux_traits<Func, Derived>::Traversal, 181 int Unrolling = redux_traits<Func, Derived>::Unrolling 182 > 183 struct redux_impl; 184 185 template<typename Func, typename Derived> 186 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling> 187 { 188 typedef typename Derived::Scalar Scalar; 189 EIGEN_DEVICE_FUNC 190 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) 191 { 192 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 193 Scalar res; 194 res = mat.coeffByOuterInner(0, 0); 195 for(Index i = 1; i < mat.innerSize(); ++i) 196 res = func(res, mat.coeffByOuterInner(0, i)); 197 for(Index i = 1; i < mat.outerSize(); ++i) 198 for(Index j = 0; j < mat.innerSize(); ++j) 199 res = func(res, mat.coeffByOuterInner(i, j)); 200 return res; 201 } 202 }; 203 204 template<typename Func, typename Derived> 205 struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling> 206 : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime> 207 {}; 208 209 template<typename Func, typename Derived> 210 struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling> 211 { 212 typedef typename Derived::Scalar Scalar; 213 typedef typename redux_traits<Func, Derived>::PacketType PacketScalar; 214 215 static Scalar run(const Derived &mat, const Func& func) 216 { 217 const Index size = mat.size(); 218 219 const Index packetSize = redux_traits<Func, Derived>::PacketSize; 220 const int packetAlignment = unpacket_traits<PacketScalar>::alignment; 221 enum { 222 alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned), 223 alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment) 224 }; 225 const Index alignedStart = internal::first_default_aligned(mat.nestedExpression()); 226 const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize); 227 const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize); 228 const Index alignedEnd2 = alignedStart + alignedSize2; 229 const Index alignedEnd = alignedStart + alignedSize; 230 Scalar res; 231 if(alignedSize) 232 { 233 PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart); 234 if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop 235 { 236 PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize); 237 for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize) 238 { 239 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index)); 240 packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize)); 241 } 242 243 packet_res0 = func.packetOp(packet_res0,packet_res1); 244 if(alignedEnd>alignedEnd2) 245 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2)); 246 } 247 res = func.predux(packet_res0); 248 249 for(Index index = 0; index < alignedStart; ++index) 250 res = func(res,mat.coeff(index)); 251 252 for(Index index = alignedEnd; index < size; ++index) 253 res = func(res,mat.coeff(index)); 254 } 255 else // too small to vectorize anything. 256 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. 257 { 258 res = mat.coeff(0); 259 for(Index index = 1; index < size; ++index) 260 res = func(res,mat.coeff(index)); 261 } 262 263 return res; 264 } 265 }; 266 267 // NOTE: for SliceVectorizedTraversal we simply bypass unrolling 268 template<typename Func, typename Derived, int Unrolling> 269 struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling> 270 { 271 typedef typename Derived::Scalar Scalar; 272 typedef typename redux_traits<Func, Derived>::PacketType PacketType; 273 274 EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func) 275 { 276 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 277 const Index innerSize = mat.innerSize(); 278 const Index outerSize = mat.outerSize(); 279 enum { 280 packetSize = redux_traits<Func, Derived>::PacketSize 281 }; 282 const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize; 283 Scalar res; 284 if(packetedInnerSize) 285 { 286 PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0); 287 for(Index j=0; j<outerSize; ++j) 288 for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize)) 289 packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i)); 290 291 res = func.predux(packet_res); 292 for(Index j=0; j<outerSize; ++j) 293 for(Index i=packetedInnerSize; i<innerSize; ++i) 294 res = func(res, mat.coeffByOuterInner(j,i)); 295 } 296 else // too small to vectorize anything. 297 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. 298 { 299 res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func); 300 } 301 302 return res; 303 } 304 }; 305 306 template<typename Func, typename Derived> 307 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling> 308 { 309 typedef typename Derived::Scalar Scalar; 310 311 typedef typename redux_traits<Func, Derived>::PacketType PacketScalar; 312 enum { 313 PacketSize = redux_traits<Func, Derived>::PacketSize, 314 Size = Derived::SizeAtCompileTime, 315 VectorizedSize = (Size / PacketSize) * PacketSize 316 }; 317 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) 318 { 319 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 320 if (VectorizedSize > 0) { 321 Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func)); 322 if (VectorizedSize != Size) 323 res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func)); 324 return res; 325 } 326 else { 327 return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func); 328 } 329 } 330 }; 331 332 // evaluator adaptor 333 template<typename _XprType> 334 class redux_evaluator 335 { 336 public: 337 typedef _XprType XprType; 338 EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {} 339 340 typedef typename XprType::Scalar Scalar; 341 typedef typename XprType::CoeffReturnType CoeffReturnType; 342 typedef typename XprType::PacketScalar PacketScalar; 343 typedef typename XprType::PacketReturnType PacketReturnType; 344 345 enum { 346 MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime, 347 MaxColsAtCompileTime = XprType::MaxColsAtCompileTime, 348 // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator 349 Flags = evaluator<XprType>::Flags & ~DirectAccessBit, 350 IsRowMajor = XprType::IsRowMajor, 351 SizeAtCompileTime = XprType::SizeAtCompileTime, 352 InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime, 353 CoeffReadCost = evaluator<XprType>::CoeffReadCost, 354 Alignment = evaluator<XprType>::Alignment 355 }; 356 357 EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } 358 EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } 359 EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); } 360 EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); } 361 EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); } 362 363 EIGEN_DEVICE_FUNC 364 CoeffReturnType coeff(Index row, Index col) const 365 { return m_evaluator.coeff(row, col); } 366 367 EIGEN_DEVICE_FUNC 368 CoeffReturnType coeff(Index index) const 369 { return m_evaluator.coeff(index); } 370 371 template<int LoadMode, typename PacketType> 372 PacketType packet(Index row, Index col) const 373 { return m_evaluator.template packet<LoadMode,PacketType>(row, col); } 374 375 template<int LoadMode, typename PacketType> 376 PacketType packet(Index index) const 377 { return m_evaluator.template packet<LoadMode,PacketType>(index); } 378 379 EIGEN_DEVICE_FUNC 380 CoeffReturnType coeffByOuterInner(Index outer, Index inner) const 381 { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } 382 383 template<int LoadMode, typename PacketType> 384 PacketType packetByOuterInner(Index outer, Index inner) const 385 { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } 386 387 const XprType & nestedExpression() const { return m_xpr; } 388 389 protected: 390 internal::evaluator<XprType> m_evaluator; 391 const XprType &m_xpr; 392 }; 393 394 } // end namespace internal 395 396 /*************************************************************************** 397 * Part 4 : public API 398 ***************************************************************************/ 399 400 401 /** \returns the result of a full redux operation on the whole matrix or vector using \a func 402 * 403 * The template parameter \a BinaryOp is the type of the functor \a func which must be 404 * an associative operator. Both current C++98 and C++11 functor styles are handled. 405 * 406 * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise() 407 */ 408 template<typename Derived> 409 template<typename Func> 410 typename internal::traits<Derived>::Scalar 411 DenseBase<Derived>::redux(const Func& func) const 412 { 413 eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); 414 415 typedef typename internal::redux_evaluator<Derived> ThisEvaluator; 416 ThisEvaluator thisEval(derived()); 417 418 return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func); 419 } 420 421 /** \returns the minimum of all coefficients of \c *this. 422 * \warning the result is undefined if \c *this contains NaN. 423 */ 424 template<typename Derived> 425 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 426 DenseBase<Derived>::minCoeff() const 427 { 428 return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>()); 429 } 430 431 /** \returns the maximum of all coefficients of \c *this. 432 * \warning the result is undefined if \c *this contains NaN. 433 */ 434 template<typename Derived> 435 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 436 DenseBase<Derived>::maxCoeff() const 437 { 438 return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>()); 439 } 440 441 /** \returns the sum of all coefficients of \c *this 442 * 443 * If \c *this is empty, then the value 0 is returned. 444 * 445 * \sa trace(), prod(), mean() 446 */ 447 template<typename Derived> 448 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 449 DenseBase<Derived>::sum() const 450 { 451 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) 452 return Scalar(0); 453 return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>()); 454 } 455 456 /** \returns the mean of all coefficients of *this 457 * 458 * \sa trace(), prod(), sum() 459 */ 460 template<typename Derived> 461 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 462 DenseBase<Derived>::mean() const 463 { 464 #ifdef __INTEL_COMPILER 465 #pragma warning push 466 #pragma warning ( disable : 2259 ) 467 #endif 468 return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size()); 469 #ifdef __INTEL_COMPILER 470 #pragma warning pop 471 #endif 472 } 473 474 /** \returns the product of all coefficients of *this 475 * 476 * Example: \include MatrixBase_prod.cpp 477 * Output: \verbinclude MatrixBase_prod.out 478 * 479 * \sa sum(), mean(), trace() 480 */ 481 template<typename Derived> 482 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 483 DenseBase<Derived>::prod() const 484 { 485 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) 486 return Scalar(1); 487 return derived().redux(Eigen::internal::scalar_product_op<Scalar>()); 488 } 489 490 /** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal. 491 * 492 * \c *this can be any matrix, not necessarily square. 493 * 494 * \sa diagonal(), sum() 495 */ 496 template<typename Derived> 497 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 498 MatrixBase<Derived>::trace() const 499 { 500 return derived().diagonal().sum(); 501 } 502 503 } // end namespace Eigen 504 505 #endif // EIGEN_REDUX_H 506