• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_REDUX_H
12 #define EIGEN_REDUX_H
13 
14 namespace Eigen {
15 
16 namespace internal {
17 
18 // TODO
19 //  * implement other kind of vectorization
20 //  * factorize code
21 
22 /***************************************************************************
23 * Part 1 : the logic deciding a strategy for vectorization and unrolling
24 ***************************************************************************/
25 
26 template<typename Func, typename Derived>
27 struct redux_traits
28 {
29 public:
30   enum {
31     PacketSize = packet_traits<typename Derived::Scalar>::size,
32     InnerMaxSize = int(Derived::IsRowMajor)
33                  ? Derived::MaxColsAtCompileTime
34                  : Derived::MaxRowsAtCompileTime
35   };
36 
37   enum {
38     MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
39                   && (functor_traits<Func>::PacketAccess),
40     MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
41     MaySliceVectorize  = MightVectorize && int(InnerMaxSize)>=3*PacketSize
42   };
43 
44 public:
45   enum {
46     Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
47               : int(MaySliceVectorize)  ? int(SliceVectorizedTraversal)
48                                         : int(DefaultTraversal)
49   };
50 
51 public:
52   enum {
53     Cost = (  Derived::SizeAtCompileTime == Dynamic
54            || Derived::CoeffReadCost == Dynamic
55            || (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic)
56            ) ? Dynamic
57            : Derived::SizeAtCompileTime * Derived::CoeffReadCost
58                + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
59     UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
60   };
61 
62 public:
63   enum {
64     Unrolling = Cost != Dynamic && Cost <= UnrollingLimit
65               ? CompleteUnrolling
66               : NoUnrolling
67   };
68 };
69 
70 /***************************************************************************
71 * Part 2 : unrollers
72 ***************************************************************************/
73 
74 /*** no vectorization ***/
75 
76 template<typename Func, typename Derived, int Start, int Length>
77 struct redux_novec_unroller
78 {
79   enum {
80     HalfLength = Length/2
81   };
82 
83   typedef typename Derived::Scalar Scalar;
84 
runredux_novec_unroller85   static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
86   {
87     return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
88                 redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
89   }
90 };
91 
92 template<typename Func, typename Derived, int Start>
93 struct redux_novec_unroller<Func, Derived, Start, 1>
94 {
95   enum {
96     outer = Start / Derived::InnerSizeAtCompileTime,
97     inner = Start % Derived::InnerSizeAtCompileTime
98   };
99 
100   typedef typename Derived::Scalar Scalar;
101 
102   static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
103   {
104     return mat.coeffByOuterInner(outer, inner);
105   }
106 };
107 
108 // This is actually dead code and will never be called. It is required
109 // to prevent false warnings regarding failed inlining though
110 // for 0 length run() will never be called at all.
111 template<typename Func, typename Derived, int Start>
112 struct redux_novec_unroller<Func, Derived, Start, 0>
113 {
114   typedef typename Derived::Scalar Scalar;
115   static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
116 };
117 
118 /*** vectorization ***/
119 
120 template<typename Func, typename Derived, int Start, int Length>
121 struct redux_vec_unroller
122 {
123   enum {
124     PacketSize = packet_traits<typename Derived::Scalar>::size,
125     HalfLength = Length/2
126   };
127 
128   typedef typename Derived::Scalar Scalar;
129   typedef typename packet_traits<Scalar>::type PacketScalar;
130 
131   static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
132   {
133     return func.packetOp(
134             redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
135             redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
136   }
137 };
138 
139 template<typename Func, typename Derived, int Start>
140 struct redux_vec_unroller<Func, Derived, Start, 1>
141 {
142   enum {
143     index = Start * packet_traits<typename Derived::Scalar>::size,
144     outer = index / int(Derived::InnerSizeAtCompileTime),
145     inner = index % int(Derived::InnerSizeAtCompileTime),
146     alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
147   };
148 
149   typedef typename Derived::Scalar Scalar;
150   typedef typename packet_traits<Scalar>::type PacketScalar;
151 
152   static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
153   {
154     return mat.template packetByOuterInner<alignment>(outer, inner);
155   }
156 };
157 
158 /***************************************************************************
159 * Part 3 : implementation of all cases
160 ***************************************************************************/
161 
162 template<typename Func, typename Derived,
163          int Traversal = redux_traits<Func, Derived>::Traversal,
164          int Unrolling = redux_traits<Func, Derived>::Unrolling
165 >
166 struct redux_impl;
167 
168 template<typename Func, typename Derived>
169 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
170 {
171   typedef typename Derived::Scalar Scalar;
172   typedef typename Derived::Index Index;
173   static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
174   {
175     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
176     Scalar res;
177     res = mat.coeffByOuterInner(0, 0);
178     for(Index i = 1; i < mat.innerSize(); ++i)
179       res = func(res, mat.coeffByOuterInner(0, i));
180     for(Index i = 1; i < mat.outerSize(); ++i)
181       for(Index j = 0; j < mat.innerSize(); ++j)
182         res = func(res, mat.coeffByOuterInner(i, j));
183     return res;
184   }
185 };
186 
187 template<typename Func, typename Derived>
188 struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
189   : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
190 {};
191 
192 template<typename Func, typename Derived>
193 struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
194 {
195   typedef typename Derived::Scalar Scalar;
196   typedef typename packet_traits<Scalar>::type PacketScalar;
197   typedef typename Derived::Index Index;
198 
199   static Scalar run(const Derived& mat, const Func& func)
200   {
201     const Index size = mat.size();
202     eigen_assert(size && "you are using an empty matrix");
203     const Index packetSize = packet_traits<Scalar>::size;
204     const Index alignedStart = internal::first_aligned(mat);
205     enum {
206       alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
207                 ? Aligned : Unaligned
208     };
209     const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
210     const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
211     const Index alignedEnd2 = alignedStart + alignedSize2;
212     const Index alignedEnd  = alignedStart + alignedSize;
213     Scalar res;
214     if(alignedSize)
215     {
216       PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
217       if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
218       {
219         PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
220         for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
221         {
222           packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
223           packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
224         }
225 
226         packet_res0 = func.packetOp(packet_res0,packet_res1);
227         if(alignedEnd>alignedEnd2)
228           packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
229       }
230       res = func.predux(packet_res0);
231 
232       for(Index index = 0; index < alignedStart; ++index)
233         res = func(res,mat.coeff(index));
234 
235       for(Index index = alignedEnd; index < size; ++index)
236         res = func(res,mat.coeff(index));
237     }
238     else // too small to vectorize anything.
239          // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
240     {
241       res = mat.coeff(0);
242       for(Index index = 1; index < size; ++index)
243         res = func(res,mat.coeff(index));
244     }
245 
246     return res;
247   }
248 };
249 
250 template<typename Func, typename Derived>
251 struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
252 {
253   typedef typename Derived::Scalar Scalar;
254   typedef typename packet_traits<Scalar>::type PacketScalar;
255   typedef typename Derived::Index Index;
256 
257   static Scalar run(const Derived& mat, const Func& func)
258   {
259     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
260     const Index innerSize = mat.innerSize();
261     const Index outerSize = mat.outerSize();
262     enum {
263       packetSize = packet_traits<Scalar>::size
264     };
265     const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
266     Scalar res;
267     if(packetedInnerSize)
268     {
269       PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
270       for(Index j=0; j<outerSize; ++j)
271         for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
272           packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i));
273 
274       res = func.predux(packet_res);
275       for(Index j=0; j<outerSize; ++j)
276         for(Index i=packetedInnerSize; i<innerSize; ++i)
277           res = func(res, mat.coeffByOuterInner(j,i));
278     }
279     else // too small to vectorize anything.
280          // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
281     {
282       res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
283     }
284 
285     return res;
286   }
287 };
288 
289 template<typename Func, typename Derived>
290 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
291 {
292   typedef typename Derived::Scalar Scalar;
293   typedef typename packet_traits<Scalar>::type PacketScalar;
294   enum {
295     PacketSize = packet_traits<Scalar>::size,
296     Size = Derived::SizeAtCompileTime,
297     VectorizedSize = (Size / PacketSize) * PacketSize
298   };
299   static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
300   {
301     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
302     Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
303     if (VectorizedSize != Size)
304       res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
305     return res;
306   }
307 };
308 
309 } // end namespace internal
310 
311 /***************************************************************************
312 * Part 4 : public API
313 ***************************************************************************/
314 
315 
316 /** \returns the result of a full redux operation on the whole matrix or vector using \a func
317   *
318   * The template parameter \a BinaryOp is the type of the functor \a func which must be
319   * an associative operator. Both current STL and TR1 functor styles are handled.
320   *
321   * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
322   */
323 template<typename Derived>
324 template<typename Func>
325 EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
326 DenseBase<Derived>::redux(const Func& func) const
327 {
328   typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested;
329   return internal::redux_impl<Func, ThisNested>
330             ::run(derived(), func);
331 }
332 
333 /** \returns the minimum of all coefficients of \c *this.
334   * \warning the result is undefined if \c *this contains NaN.
335   */
336 template<typename Derived>
337 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
338 DenseBase<Derived>::minCoeff() const
339 {
340   return this->redux(Eigen::internal::scalar_min_op<Scalar>());
341 }
342 
343 /** \returns the maximum of all coefficients of \c *this.
344   * \warning the result is undefined if \c *this contains NaN.
345   */
346 template<typename Derived>
347 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
348 DenseBase<Derived>::maxCoeff() const
349 {
350   return this->redux(Eigen::internal::scalar_max_op<Scalar>());
351 }
352 
353 /** \returns the sum of all coefficients of *this
354   *
355   * \sa trace(), prod(), mean()
356   */
357 template<typename Derived>
358 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
359 DenseBase<Derived>::sum() const
360 {
361   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
362     return Scalar(0);
363   return this->redux(Eigen::internal::scalar_sum_op<Scalar>());
364 }
365 
366 /** \returns the mean of all coefficients of *this
367 *
368 * \sa trace(), prod(), sum()
369 */
370 template<typename Derived>
371 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
372 DenseBase<Derived>::mean() const
373 {
374   return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
375 }
376 
377 /** \returns the product of all coefficients of *this
378   *
379   * Example: \include MatrixBase_prod.cpp
380   * Output: \verbinclude MatrixBase_prod.out
381   *
382   * \sa sum(), mean(), trace()
383   */
384 template<typename Derived>
385 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
386 DenseBase<Derived>::prod() const
387 {
388   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
389     return Scalar(1);
390   return this->redux(Eigen::internal::scalar_product_op<Scalar>());
391 }
392 
393 /** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
394   *
395   * \c *this can be any matrix, not necessarily square.
396   *
397   * \sa diagonal(), sum()
398   */
399 template<typename Derived>
400 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
401 MatrixBase<Derived>::trace() const
402 {
403   return derived().diagonal().sum();
404 }
405 
406 } // end namespace Eigen
407 
408 #endif // EIGEN_REDUX_H
409