• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_REDUX_H
12 #define EIGEN_REDUX_H
13 
14 namespace Eigen {
15 
16 namespace internal {
17 
18 // TODO
19 //  * implement other kind of vectorization
20 //  * factorize code
21 
22 /***************************************************************************
23 * Part 1 : the logic deciding a strategy for vectorization and unrolling
24 ***************************************************************************/
25 
26 template<typename Func, typename Evaluator>
27 struct redux_traits
28 {
29 public:
30     typedef typename find_best_packet<typename Evaluator::Scalar,Evaluator::SizeAtCompileTime>::type PacketType;
31   enum {
32     PacketSize = unpacket_traits<PacketType>::size,
33     InnerMaxSize = int(Evaluator::IsRowMajor)
34                  ? Evaluator::MaxColsAtCompileTime
35                  : Evaluator::MaxRowsAtCompileTime,
36     OuterMaxSize = int(Evaluator::IsRowMajor)
37                  ? Evaluator::MaxRowsAtCompileTime
38                  : Evaluator::MaxColsAtCompileTime,
39     SliceVectorizedWork = int(InnerMaxSize)==Dynamic ? Dynamic
40                         : int(OuterMaxSize)==Dynamic ? (int(InnerMaxSize)>=int(PacketSize) ? Dynamic : 0)
41                         : (int(InnerMaxSize)/int(PacketSize)) * int(OuterMaxSize)
42   };
43 
44   enum {
45     MightVectorize = (int(Evaluator::Flags)&ActualPacketAccessBit)
46                   && (functor_traits<Func>::PacketAccess),
47     MayLinearVectorize = bool(MightVectorize) && (int(Evaluator::Flags)&LinearAccessBit),
48     MaySliceVectorize  = bool(MightVectorize) && (int(SliceVectorizedWork)==Dynamic || int(SliceVectorizedWork)>=3)
49   };
50 
51 public:
52   enum {
53     Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
54               : int(MaySliceVectorize)  ? int(SliceVectorizedTraversal)
55                                         : int(DefaultTraversal)
56   };
57 
58 public:
59   enum {
60     Cost = Evaluator::SizeAtCompileTime == Dynamic ? HugeCost
61          : int(Evaluator::SizeAtCompileTime) * int(Evaluator::CoeffReadCost) + (Evaluator::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
62     UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
63   };
64 
65 public:
66   enum {
67     Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
68   };
69 
70 #ifdef EIGEN_DEBUG_ASSIGN
debugredux_traits71   static void debug()
72   {
73     std::cerr << "Xpr: " << typeid(typename Evaluator::XprType).name() << std::endl;
74     std::cerr.setf(std::ios::hex, std::ios::basefield);
75     EIGEN_DEBUG_VAR(Evaluator::Flags)
76     std::cerr.unsetf(std::ios::hex);
77     EIGEN_DEBUG_VAR(InnerMaxSize)
78     EIGEN_DEBUG_VAR(OuterMaxSize)
79     EIGEN_DEBUG_VAR(SliceVectorizedWork)
80     EIGEN_DEBUG_VAR(PacketSize)
81     EIGEN_DEBUG_VAR(MightVectorize)
82     EIGEN_DEBUG_VAR(MayLinearVectorize)
83     EIGEN_DEBUG_VAR(MaySliceVectorize)
84     std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
85     EIGEN_DEBUG_VAR(UnrollingLimit)
86     std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
87     std::cerr << std::endl;
88   }
89 #endif
90 };
91 
92 /***************************************************************************
93 * Part 2 : unrollers
94 ***************************************************************************/
95 
96 /*** no vectorization ***/
97 
98 template<typename Func, typename Evaluator, int Start, int Length>
99 struct redux_novec_unroller
100 {
101   enum {
102     HalfLength = Length/2
103   };
104 
105   typedef typename Evaluator::Scalar Scalar;
106 
107   EIGEN_DEVICE_FUNC
runredux_novec_unroller108   static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func& func)
109   {
110     return func(redux_novec_unroller<Func, Evaluator, Start, HalfLength>::run(eval,func),
111                 redux_novec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::run(eval,func));
112   }
113 };
114 
115 template<typename Func, typename Evaluator, int Start>
116 struct redux_novec_unroller<Func, Evaluator, Start, 1>
117 {
118   enum {
119     outer = Start / Evaluator::InnerSizeAtCompileTime,
120     inner = Start % Evaluator::InnerSizeAtCompileTime
121   };
122 
123   typedef typename Evaluator::Scalar Scalar;
124 
125   EIGEN_DEVICE_FUNC
126   static EIGEN_STRONG_INLINE Scalar run(const Evaluator &eval, const Func&)
127   {
128     return eval.coeffByOuterInner(outer, inner);
129   }
130 };
131 
132 // This is actually dead code and will never be called. It is required
133 // to prevent false warnings regarding failed inlining though
134 // for 0 length run() will never be called at all.
135 template<typename Func, typename Evaluator, int Start>
136 struct redux_novec_unroller<Func, Evaluator, Start, 0>
137 {
138   typedef typename Evaluator::Scalar Scalar;
139   EIGEN_DEVICE_FUNC
140   static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); }
141 };
142 
143 /*** vectorization ***/
144 
145 template<typename Func, typename Evaluator, int Start, int Length>
146 struct redux_vec_unroller
147 {
148   template<typename PacketType>
149   EIGEN_DEVICE_FUNC
150   static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func)
151   {
152     enum {
153       PacketSize = unpacket_traits<PacketType>::size,
154       HalfLength = Length/2
155     };
156 
157     return func.packetOp(
158             redux_vec_unroller<Func, Evaluator, Start, HalfLength>::template run<PacketType>(eval,func),
159             redux_vec_unroller<Func, Evaluator, Start+HalfLength, Length-HalfLength>::template run<PacketType>(eval,func) );
160   }
161 };
162 
163 template<typename Func, typename Evaluator, int Start>
164 struct redux_vec_unroller<Func, Evaluator, Start, 1>
165 {
166   template<typename PacketType>
167   EIGEN_DEVICE_FUNC
168   static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func&)
169   {
170     enum {
171       PacketSize = unpacket_traits<PacketType>::size,
172       index = Start * PacketSize,
173       outer = index / int(Evaluator::InnerSizeAtCompileTime),
174       inner = index % int(Evaluator::InnerSizeAtCompileTime),
175       alignment = Evaluator::Alignment
176     };
177     return eval.template packetByOuterInner<alignment,PacketType>(outer, inner);
178   }
179 };
180 
181 /***************************************************************************
182 * Part 3 : implementation of all cases
183 ***************************************************************************/
184 
185 template<typename Func, typename Evaluator,
186          int Traversal = redux_traits<Func, Evaluator>::Traversal,
187          int Unrolling = redux_traits<Func, Evaluator>::Unrolling
188 >
189 struct redux_impl;
190 
191 template<typename Func, typename Evaluator>
192 struct redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>
193 {
194   typedef typename Evaluator::Scalar Scalar;
195 
196   template<typename XprType>
197   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
198   Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
199   {
200     eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
201     Scalar res;
202     res = eval.coeffByOuterInner(0, 0);
203     for(Index i = 1; i < xpr.innerSize(); ++i)
204       res = func(res, eval.coeffByOuterInner(0, i));
205     for(Index i = 1; i < xpr.outerSize(); ++i)
206       for(Index j = 0; j < xpr.innerSize(); ++j)
207         res = func(res, eval.coeffByOuterInner(i, j));
208     return res;
209   }
210 };
211 
212 template<typename Func, typename Evaluator>
213 struct redux_impl<Func,Evaluator, DefaultTraversal, CompleteUnrolling>
214   : redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime>
215 {
216   typedef redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
217   typedef typename Evaluator::Scalar Scalar;
218   template<typename XprType>
219   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
220   Scalar run(const Evaluator &eval, const Func& func, const XprType& /*xpr*/)
221   {
222     return Base::run(eval,func);
223   }
224 };
225 
226 template<typename Func, typename Evaluator>
227 struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, NoUnrolling>
228 {
229   typedef typename Evaluator::Scalar Scalar;
230   typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
231 
232   template<typename XprType>
233   static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
234   {
235     const Index size = xpr.size();
236 
237     const Index packetSize = redux_traits<Func, Evaluator>::PacketSize;
238     const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
239     enum {
240       alignment0 = (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
241       alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Evaluator::Alignment)
242     };
243     const Index alignedStart = internal::first_default_aligned(xpr);
244     const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
245     const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
246     const Index alignedEnd2 = alignedStart + alignedSize2;
247     const Index alignedEnd  = alignedStart + alignedSize;
248     Scalar res;
249     if(alignedSize)
250     {
251       PacketScalar packet_res0 = eval.template packet<alignment,PacketScalar>(alignedStart);
252       if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
253       {
254         PacketScalar packet_res1 = eval.template packet<alignment,PacketScalar>(alignedStart+packetSize);
255         for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
256         {
257           packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(index));
258           packet_res1 = func.packetOp(packet_res1, eval.template packet<alignment,PacketScalar>(index+packetSize));
259         }
260 
261         packet_res0 = func.packetOp(packet_res0,packet_res1);
262         if(alignedEnd>alignedEnd2)
263           packet_res0 = func.packetOp(packet_res0, eval.template packet<alignment,PacketScalar>(alignedEnd2));
264       }
265       res = func.predux(packet_res0);
266 
267       for(Index index = 0; index < alignedStart; ++index)
268         res = func(res,eval.coeff(index));
269 
270       for(Index index = alignedEnd; index < size; ++index)
271         res = func(res,eval.coeff(index));
272     }
273     else // too small to vectorize anything.
274          // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
275     {
276       res = eval.coeff(0);
277       for(Index index = 1; index < size; ++index)
278         res = func(res,eval.coeff(index));
279     }
280 
281     return res;
282   }
283 };
284 
285 // NOTE: for SliceVectorizedTraversal we simply bypass unrolling
286 template<typename Func, typename Evaluator, int Unrolling>
287 struct redux_impl<Func, Evaluator, SliceVectorizedTraversal, Unrolling>
288 {
289   typedef typename Evaluator::Scalar Scalar;
290   typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
291 
292   template<typename XprType>
293   EIGEN_DEVICE_FUNC static Scalar run(const Evaluator &eval, const Func& func, const XprType& xpr)
294   {
295     eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
296     const Index innerSize = xpr.innerSize();
297     const Index outerSize = xpr.outerSize();
298     enum {
299       packetSize = redux_traits<Func, Evaluator>::PacketSize
300     };
301     const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
302     Scalar res;
303     if(packetedInnerSize)
304     {
305       PacketType packet_res = eval.template packet<Unaligned,PacketType>(0,0);
306       for(Index j=0; j<outerSize; ++j)
307         for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
308           packet_res = func.packetOp(packet_res, eval.template packetByOuterInner<Unaligned,PacketType>(j,i));
309 
310       res = func.predux(packet_res);
311       for(Index j=0; j<outerSize; ++j)
312         for(Index i=packetedInnerSize; i<innerSize; ++i)
313           res = func(res, eval.coeffByOuterInner(j,i));
314     }
315     else // too small to vectorize anything.
316          // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
317     {
318       res = redux_impl<Func, Evaluator, DefaultTraversal, NoUnrolling>::run(eval, func, xpr);
319     }
320 
321     return res;
322   }
323 };
324 
325 template<typename Func, typename Evaluator>
326 struct redux_impl<Func, Evaluator, LinearVectorizedTraversal, CompleteUnrolling>
327 {
328   typedef typename Evaluator::Scalar Scalar;
329 
330   typedef typename redux_traits<Func, Evaluator>::PacketType PacketType;
331   enum {
332     PacketSize = redux_traits<Func, Evaluator>::PacketSize,
333     Size = Evaluator::SizeAtCompileTime,
334     VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize)
335   };
336 
337   template<typename XprType>
338   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
339   Scalar run(const Evaluator &eval, const Func& func, const XprType &xpr)
340   {
341     EIGEN_ONLY_USED_FOR_DEBUG(xpr)
342     eigen_assert(xpr.rows()>0 && xpr.cols()>0 && "you are using an empty matrix");
343     if (VectorizedSize > 0) {
344       Scalar res = func.predux(redux_vec_unroller<Func, Evaluator, 0, Size / PacketSize>::template run<PacketType>(eval,func));
345       if (VectorizedSize != Size)
346         res = func(res,redux_novec_unroller<Func, Evaluator, VectorizedSize, Size-VectorizedSize>::run(eval,func));
347       return res;
348     }
349     else {
350       return redux_novec_unroller<Func, Evaluator, 0, Size>::run(eval,func);
351     }
352   }
353 };
354 
355 // evaluator adaptor
356 template<typename _XprType>
357 class redux_evaluator : public internal::evaluator<_XprType>
358 {
359   typedef internal::evaluator<_XprType> Base;
360 public:
361   typedef _XprType XprType;
362   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
363   explicit redux_evaluator(const XprType &xpr) : Base(xpr) {}
364 
365   typedef typename XprType::Scalar Scalar;
366   typedef typename XprType::CoeffReturnType CoeffReturnType;
367   typedef typename XprType::PacketScalar PacketScalar;
368 
369   enum {
370     MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
371     MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
372     // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
373     Flags = Base::Flags & ~DirectAccessBit,
374     IsRowMajor = XprType::IsRowMajor,
375     SizeAtCompileTime = XprType::SizeAtCompileTime,
376     InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime
377   };
378 
379   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
380   CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
381   { return Base::coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
382 
383   template<int LoadMode, typename PacketType>
384   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
385   PacketType packetByOuterInner(Index outer, Index inner) const
386   { return Base::template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
387 
388 };
389 
390 } // end namespace internal
391 
392 /***************************************************************************
393 * Part 4 : public API
394 ***************************************************************************/
395 
396 
397 /** \returns the result of a full redux operation on the whole matrix or vector using \a func
398   *
399   * The template parameter \a BinaryOp is the type of the functor \a func which must be
400   * an associative operator. Both current C++98 and C++11 functor styles are handled.
401   *
402   * \warning the matrix must be not empty, otherwise an assertion is triggered.
403   *
404   * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
405   */
406 template<typename Derived>
407 template<typename Func>
408 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
409 DenseBase<Derived>::redux(const Func& func) const
410 {
411   eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
412 
413   typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
414   ThisEvaluator thisEval(derived());
415 
416   // The initial expression is passed to the reducer as an additional argument instead of
417   // passing it as a member of redux_evaluator to help
418   return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func, derived());
419 }
420 
421 /** \returns the minimum of all coefficients of \c *this.
422   * In case \c *this contains NaN, NaNPropagation determines the behavior:
423   *   NaNPropagation == PropagateFast : undefined
424   *   NaNPropagation == PropagateNaN : result is NaN
425   *   NaNPropagation == PropagateNumbers : result is minimum of elements that are not NaN
426   * \warning the matrix must be not empty, otherwise an assertion is triggered.
427   */
428 template<typename Derived>
429 template<int NaNPropagation>
430 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
431 DenseBase<Derived>::minCoeff() const
432 {
433   return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar, NaNPropagation>());
434 }
435 
436 /** \returns the maximum of all coefficients of \c *this.
437   * In case \c *this contains NaN, NaNPropagation determines the behavior:
438   *   NaNPropagation == PropagateFast : undefined
439   *   NaNPropagation == PropagateNaN : result is NaN
440   *   NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
441   * \warning the matrix must be not empty, otherwise an assertion is triggered.
442   */
443 template<typename Derived>
444 template<int NaNPropagation>
445 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
446 DenseBase<Derived>::maxCoeff() const
447 {
448   return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar, NaNPropagation>());
449 }
450 
451 /** \returns the sum of all coefficients of \c *this
452   *
453   * If \c *this is empty, then the value 0 is returned.
454   *
455   * \sa trace(), prod(), mean()
456   */
457 template<typename Derived>
458 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
459 DenseBase<Derived>::sum() const
460 {
461   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
462     return Scalar(0);
463   return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
464 }
465 
466 /** \returns the mean of all coefficients of *this
467 *
468 * \sa trace(), prod(), sum()
469 */
470 template<typename Derived>
471 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
472 DenseBase<Derived>::mean() const
473 {
474 #ifdef __INTEL_COMPILER
475   #pragma warning push
476   #pragma warning ( disable : 2259 )
477 #endif
478   return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
479 #ifdef __INTEL_COMPILER
480   #pragma warning pop
481 #endif
482 }
483 
484 /** \returns the product of all coefficients of *this
485   *
486   * Example: \include MatrixBase_prod.cpp
487   * Output: \verbinclude MatrixBase_prod.out
488   *
489   * \sa sum(), mean(), trace()
490   */
491 template<typename Derived>
492 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
493 DenseBase<Derived>::prod() const
494 {
495   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
496     return Scalar(1);
497   return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
498 }
499 
500 /** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
501   *
502   * \c *this can be any matrix, not necessarily square.
503   *
504   * \sa diagonal(), sum()
505   */
506 template<typename Derived>
507 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
508 MatrixBase<Derived>::trace() const
509 {
510   return derived().diagonal().sum();
511 }
512 
513 } // end namespace Eigen
514 
515 #endif // EIGEN_REDUX_H
516