• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_
17 #define TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_
18 
19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
20 
21 namespace Eigen {
22 
23 /** SpatialMaxPooling
24  * \ingroup CXX11_NeuralNetworks_Module
25  *
26  * \brief Applies a max-pooling over a multichannel input image.
27  *
28  * The input parameter is expected to be a with a rank of 4 (channels, height,
29  * width, others in col-major, and the reverse of that in row-major).
30  *
31  * The result can be assigned to a tensor of rank equal to the rank of the
32  * input. The dimensions of the result will be channels, height, width, and
33  * others (in col-major, and the reverse of that if the input was row-major).
34  *
35  * The order of the width and height dimensions can be swapped if needed.
36  *
37  */
38 #if !defined(EIGEN_HAS_INDEX_LIST)
39 template <typename Input>
40 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
41     const Eigen::DSizes<typename internal::traits<Input>::Index,
42                         internal::traits<Input>::NumDimensions>,
43     const TensorReductionOp<
44         internal::MaxReducer<typename internal::remove_const<
45             typename internal::traits<Input>::Scalar>::type>,
46         const Eigen::array<int, 2>,
47         const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
48 #else
49 template <typename Input>
50 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
51     const Eigen::DSizes<typename internal::traits<Input>::Index,
52                         internal::traits<Input>::NumDimensions>,
53     const TensorReductionOp<
54         internal::MaxReducer<typename internal::remove_const<
55             typename internal::traits<Input>::Scalar>::type>,
56         typename internal::conditional<
57             internal::traits<Input>::Layout == ColMajor,
58             const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >,
59             const Eigen::IndexList<Eigen::type2index<2>,
60                                    Eigen::type2index<3> > >::type,
61         const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
62 #endif
63 SpatialMaxPooling(const Input& input, DenseIndex patchRows,
64                   DenseIndex patchCols, DenseIndex strideRows,
65                   DenseIndex strideCols, const PaddingType padding_type,
66                   DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) {
67   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4,
68                       YOU_MADE_A_PROGRAMMING_MISTAKE);
69 
70   typedef typename internal::traits<Input>::Index TensorIndex;
71   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
72                    internal::traits<Input>::NumDimensions,
73                    internal::traits<Input>::Layout, TensorIndex> >
74       in(input);
75 
76   const DenseIndex patchRowsEff =
77       patchRows + (patchRows - 1) * (in_strideRows - 1);
78   const DenseIndex patchColsEff =
79       patchCols + (patchCols - 1) * (in_strideCols - 1);
80 
81   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
82   static const int idxRows = isColMajor ? 1 : 2;
83   static const int idxCols = isColMajor ? 2 : 1;
84 
85   // Molds the output of the reduction into the shape expected by the user.
86   // (assuming col-major):
87   // - 1st dim: channels
88   // - 2nd dim: output height
89   // - 3rd dim: output width
90   // - 4th dim and beyond: everything else including batch size
91   Eigen::DSizes<TensorIndex, internal::traits<Input>::NumDimensions>
92       post_reduce_dims;
93   post_reduce_dims[0] = in.dimension(0);
94   if (padding_type == PADDING_VALID) {
95     post_reduce_dims[idxRows] = Eigen::divup(
96         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRowsEff + 1,
97         strideRows);
98     post_reduce_dims[idxCols] = Eigen::divup(
99         static_cast<DenseIndex>(in.dimension(idxCols)) - patchColsEff + 1,
100         strideCols);
101   } else {
102     post_reduce_dims[idxRows] = Eigen::divup(
103         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
104     post_reduce_dims[idxCols] = Eigen::divup(
105         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
106   }
107   post_reduce_dims[3] = in.dimension(3);
108 
109 #if !defined(EIGEN_HAS_INDEX_LIST)
110   // nvcc doesn't support cxx11
111   Eigen::array<int, 2> reduction_dims;
112   if (isColMajor) {
113     reduction_dims[0] = 1;
114     reduction_dims[1] = 2;
115   } else {
116     reduction_dims[0] = 2;
117     reduction_dims[1] = 3;
118   }
119 #else
120   // Take advantage of cxx11 to give the compiler information it can use to
121   // optimize the code.
122   typename internal::conditional<
123       internal::traits<Input>::Layout == ColMajor,
124       const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >,
125       const Eigen::IndexList<Eigen::type2index<2>,
126                              Eigen::type2index<3> > >::type reduction_dims;
127 #endif
128 
129   return input
130       .extract_image_patches(
131           patchRows, patchCols, strideRows, strideCols, in_strideRows,
132           in_strideCols, padding_type,
133           Eigen::NumTraits<typename internal::remove_const<
134               typename internal::traits<Input>::Scalar>::type>::lowest())
135       .maximum(reduction_dims)
136       .reshape(post_reduce_dims);
137 }
138 
139 /** CuboidMaxPooling
140  * \ingroup CXX11_NeuralNetworks_Module
141  *
142  * \brief Applies a max-pooling over a multichannel input volume.
143  *
144  * The input parameter is expected to be a tensor with a rank of 5 (channels,
145  * depth, height, width, others in col-major, and the reverse of that in
146  * row-major).
147  *
148  * The result can be assigned to a tensor of rank equal to the rank of the
149  * input. The dimensions of the result will be channels, depth, height, width,
150  * and others (in col-major, and the reverse of that if the input was
151  * row-major).
152  *
153  * The order of the depth, width and height dimensions can be swapped if
154  * needed.
155  *
156  */
157 #if !defined(EIGEN_HAS_INDEX_LIST)
158 template <typename Input>
159 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
160     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
161     const TensorReductionOp<
162         internal::MaxReducer<float>, const Eigen::array<int, 1>,
163         const TensorReshapingOp<
164             const Eigen::DSizes<DenseIndex, 3>,
165             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
166                                       const Input> > > >
167 #else
168 template <typename Input>
169 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
170     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
171     const TensorReductionOp<
172         internal::MaxReducer<float>,
173         const Eigen::IndexList<Eigen::type2index<1> >,
174         const TensorReshapingOp<
175             const Eigen::DSizes<DenseIndex, 3>,
176             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
177                                       const Input> > > >
178 #endif
CuboidMaxPooling(const Input & input,DenseIndex patchPlanes,DenseIndex patchRows,DenseIndex patchCols,DenseIndex stridePlanes,DenseIndex strideRows,DenseIndex strideCols,const PaddingType padding_type)179 CuboidMaxPooling(const Input& input, DenseIndex patchPlanes,
180                  DenseIndex patchRows, DenseIndex patchCols,
181                  DenseIndex stridePlanes, DenseIndex strideRows,
182                  DenseIndex strideCols, const PaddingType padding_type) {
183   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 5,
184                       YOU_MADE_A_PROGRAMMING_MISTAKE);
185   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
186 
187   typedef typename internal::traits<Input>::Index TensorIndex;
188   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
189                    internal::traits<Input>::NumDimensions,
190                    internal::traits<Input>::Layout, TensorIndex> >
191       in(input);
192 
193   static const int idxPlanes = isColMajor ? 1 : 3;
194   static const int idxRows = 2;
195   static const int idxCols = isColMajor ? 3 : 1;
196 
197   // Molds the output of the reduction into the shape expected by the used
198   // (assuming col-major):
199   // - 1st dim: channels
200   // - 2nd dim: output depth
201   // - 3rd dim: output height
202   // - 4th dim: output width
203   // - 5th dim and beyond: everything else including batch size
204   Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>
205       post_reduce_dims;
206   post_reduce_dims[0] = in.dimension(0);
207   if (padding_type == PADDING_VALID) {
208     post_reduce_dims[idxPlanes] = Eigen::divup(
209         static_cast<DenseIndex>(in.dimension(idxPlanes)) - patchPlanes + 1,
210         stridePlanes);
211     post_reduce_dims[idxRows] = Eigen::divup(
212         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRows + 1,
213         strideRows);
214     post_reduce_dims[idxCols] = Eigen::divup(
215         static_cast<DenseIndex>(in.dimension(idxCols)) - patchCols + 1,
216         strideCols);
217   } else {
218     post_reduce_dims[idxPlanes] = Eigen::divup(
219         static_cast<DenseIndex>(in.dimension(idxPlanes)), stridePlanes);
220     post_reduce_dims[idxRows] = Eigen::divup(
221         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
222     post_reduce_dims[idxCols] = Eigen::divup(
223         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
224   }
225   post_reduce_dims[4] = in.dimension(4);
226 
227   Eigen::DSizes<DenseIndex, 3> pre_reduce_dims;
228   pre_reduce_dims[1] = patchRows * patchCols * patchPlanes;
229   if (isColMajor) {
230     pre_reduce_dims[0] = post_reduce_dims[0];
231     pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] *
232                          post_reduce_dims[3] * post_reduce_dims[4];
233   } else {
234     pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] *
235                          post_reduce_dims[2] * post_reduce_dims[3];
236     pre_reduce_dims[2] = post_reduce_dims[4];
237   }
238 
239 #if !defined(EIGEN_HAS_INDEX_LIST)
240   // nvcc doesn't support cxx11
241   Eigen::array<int, 1> reduction_dims;
242   reduction_dims[0] = 1;
243 #else
244   // Take advantage of cxx11 to give the compiler information it can use to
245   // optimize the code.
246   Eigen::IndexList<Eigen::type2index<1> > reduction_dims;
247 #endif
248   return input
249       .extract_volume_patches(patchPlanes, patchRows, patchCols, stridePlanes,
250                               strideRows, strideCols, padding_type,
251                               -Eigen::NumTraits<float>::highest())
252       .reshape(pre_reduce_dims)
253       .maximum(reduction_dims)
254       .reshape(post_reduce_dims);
255 }
256 
257 /** SpatialAvgPooling
258  * \ingroup CXX11_NeuralNetworks_Module
259  *
260  * \brief Applies an average pooling over a multichannel input image.
261  *
262  * The input parameter is expected to be a tensor with a rank of 4 (channels,
263  * height, width, others in col-major, and the reverse of that in row-major).
264  *
265  * The result can be assigned to a tensor of rank equal to the rank of the
266  * input. The dimensions of the result will be channels, height, width, and
267  * others (in col-major, and the reverse of that if the input was row-major).
268  *
269  * The order of the width and height dimensions can be swapped if needed.
270  *
271  */
272 namespace internal {
273 
274 template <typename T>
275 struct AvgPoolMeanReducer {
276 #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) && \
277     !defined(__HIPCC__)
278   // We only support packet access for floats.
279   static constexpr bool PacketAccess = internal::is_same<T, float>::value;
280 #else
281   static const bool PacketAccess = false;
282 #endif
283   static constexpr bool IsStateful = true;
284 
AvgPoolMeanReducerAvgPoolMeanReducer285   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE AvgPoolMeanReducer() : scalarCount_(0) {
286     typedef typename packet_traits<T>::type Packet;
287     packetCount_ = pset1<Packet>(T(0.0));
288   }
289 
reduceAvgPoolMeanReducer290   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) {
291     if (t != -Eigen::NumTraits<T>::highest()) {
292       (*accum) = (*accum) + t;
293       scalarCount_++;
294     }
295   }
296 
initializeAvgPoolMeanReducer297   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
298     return static_cast<T>(0);
299   }
300 
finalizeAvgPoolMeanReducer301   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
302     eigen_assert(scalarCount_ > 0);
303     return accum / T(scalarCount_);
304   }
305 
306 #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) && \
307     !defined(__HIPCC__)
308 #ifdef EIGEN_VECTORIZE_AVX512
309 #define pequal(a, b)   \
310   _mm512_castsi512_ps( \
311       _mm512_maskz_set1_epi32(_mm512_cmp_ps_mask(a, b, _CMP_EQ_UQ), -1))
312 
313   // The ternarylogic function immediate determines the values in the result
314   // In the case below, 0xd8 implies (false_mask) ? (b) : (a)
315   // For details, refer to the vpternlogd instruction table at
316   // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-2c-manual.pdf
317 
318 #define psel(a, b, false_mask)                        \
319   _mm512_castsi512_ps(_mm512_ternarylogic_epi32(      \
320       _mm512_castps_si512(a), _mm512_castps_si512(b), \
321       _mm512_castps_si512(false_mask), 0xd8))
322 #elif defined EIGEN_VECTORIZE_AVX
323 #define pequal(a, b) _mm256_cmp_ps(a, b, _CMP_EQ_UQ)
324 #define psel(a, b, false_mask) _mm256_blendv_ps(a, b, false_mask)
325 #else
326 #define pequal(a, b) _mm_cmpeq_ps(a, b)
327 #define psel(a, b, false_mask) \
328   _mm_or_ps(_mm_andnot_ps(false_mask, a), _mm_and_ps(false_mask, b))
329 #endif
330 
331   template <typename Packet>
reducePacketAvgPoolMeanReducer332   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p,
333                                                           Packet* accum) {
334     reducePacketWithType(static_cast<T>(0), p, accum);
335   }
336 
337   template <typename Packet>
reducePacketWithTypeAvgPoolMeanReducer338   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacketWithType(
339       T, const Packet& p, Packet* accum) {
340     Packet skip_mask =
341         pequal(p, pset1<Packet>(-Eigen::NumTraits<T>::highest()));
342     (*accum) = padd<Packet>(*accum, psel(p, pset1<Packet>(0), skip_mask));
343     packetCount_ = padd<Packet>(
344         packetCount_, psel(pset1<Packet>(1), pset1<Packet>(0), skip_mask));
345   }
346 
347   template <typename Packet>
initializePacketAvgPoolMeanReducer348   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
349     return pset1<Packet>(0);
350   }
351 
352   template <typename Packet>
353   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
finalizePacketAvgPoolMeanReducer354   finalizePacket(const Packet& vaccum) const {
355     return pdiv(vaccum, packetCount_);
356   }
357   template <typename Packet>
358   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T
finalizeBothAvgPoolMeanReducer359   finalizeBoth(const T saccum, const Packet& vaccum) const {
360     return (saccum + predux(vaccum)) / (scalarCount_ + predux(packetCount_));
361   }
362 #endif
363 
364  protected:
365   typedef typename packet_traits<T>::type Packet;
366   int scalarCount_;
367   Packet packetCount_;
368 };
369 
370 template <typename Device>
371 struct reducer_traits<AvgPoolMeanReducer<float>, Device> {
372   enum {
373     Cost = 1,
374 #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) && \
375     !defined(__HIPCC__)
376     // We only support packet access for floats.
377     PacketAccess = true,
378 #else
379     PacketAccess = false,
380 #endif
381     IsStateful = true,
382     IsExactlyAssociative = false
383   };
384 };
385 
386 template <>
387 struct reducer_traits<AvgPoolMeanReducer<float>, GpuDevice> {
388   enum {
389     Cost = 1,
390     PacketAccess = false,
391     IsStateful = true,
392     IsExactlyAssociative = false
393   };
394 };
395 
396 }  // namespace internal
397 
398 #if !defined(EIGEN_HAS_INDEX_LIST)
399 template <typename Input>
400 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
401     const Eigen::DSizes<typename internal::traits<Input>::Index,
402                         internal::traits<Input>::NumDimensions>,
403     const TensorReductionOp<
404         internal::AvgPoolMeanReducer<typename internal::remove_const<
405             typename internal::traits<Input>::Scalar>::type>,
406         const Eigen::array<int, 2>,
407         const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
408 #else
409 template <typename Input>
410 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
411     const Eigen::DSizes<typename internal::traits<Input>::Index,
412                         internal::traits<Input>::NumDimensions>,
413     const TensorReductionOp<
414         internal::AvgPoolMeanReducer<typename internal::remove_const<
415             typename internal::traits<Input>::Scalar>::type>,
416         typename internal::conditional<
417             internal::traits<Input>::Layout == ColMajor,
418             const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >,
419             const Eigen::IndexList<Eigen::type2index<2>,
420                                    Eigen::type2index<3> > >::type,
421         const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
422 #endif
423 SpatialAvgPooling(const Input& input, DenseIndex patchRows,
424                   DenseIndex patchCols, DenseIndex strideRows,
425                   DenseIndex strideCols, const PaddingType padding_type,
426                   DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) {
427   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4,
428                       YOU_MADE_A_PROGRAMMING_MISTAKE);
429 
430   typedef typename internal::traits<Input>::Index TensorIndex;
431   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
432                    internal::traits<Input>::NumDimensions,
433                    internal::traits<Input>::Layout, TensorIndex> >
434       in(input);
435 
436   const DenseIndex patchRowsEff =
437       patchRows + (patchRows - 1) * (in_strideRows - 1);
438   const DenseIndex patchColsEff =
439       patchCols + (patchCols - 1) * (in_strideCols - 1);
440 
441   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
442   static const int idxRows = isColMajor ? 1 : 2;
443   static const int idxCols = isColMajor ? 2 : 1;
444 
445   // Molds the output of the reduction into the shape expected by the user.
446   // (assuming col-major):
447   // - 1st dim: channels
448   // - 2nd dim: output height
449   // - 3rd dim: output width
450   // - 4th dim and beyond: everything else including batch size
451   Eigen::DSizes<TensorIndex, internal::traits<Input>::NumDimensions>
452       post_reduce_dims;
453   post_reduce_dims[0] = in.dimension(0);
454   if (padding_type == PADDING_VALID) {
455     post_reduce_dims[idxRows] = Eigen::divup(
456         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRowsEff + 1,
457         strideRows);
458     post_reduce_dims[idxCols] = Eigen::divup(
459         static_cast<DenseIndex>(in.dimension(idxCols)) - patchColsEff + 1,
460         strideCols);
461   } else {
462     post_reduce_dims[idxRows] = Eigen::divup(
463         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
464     post_reduce_dims[idxCols] = Eigen::divup(
465         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
466   }
467   post_reduce_dims[3] = in.dimension(3);
468 
469   typedef typename internal::remove_const<
470       typename internal::traits<Input>::Scalar>::type CoeffReturnType;
471   internal::AvgPoolMeanReducer<CoeffReturnType> mean_with_nan;
472 
473 #if !defined(EIGEN_HAS_INDEX_LIST)
474   // nvcc doesn't support cxx11
475   Eigen::array<int, 2> reduction_dims;
476   if (isColMajor) {
477     reduction_dims[0] = 1;
478     reduction_dims[1] = 2;
479   } else {
480     reduction_dims[0] = 2;
481     reduction_dims[1] = 3;
482   }
483 #else
484   // Take advantage of cxx11 to give the compiler information it can use to
485   // optimize the code.
486   typename internal::conditional<
487       internal::traits<Input>::Layout == ColMajor,
488       const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >,
489       const Eigen::IndexList<Eigen::type2index<2>,
490                              Eigen::type2index<3> > >::type reduction_dims;
491 #endif
492   return input
493       .extract_image_patches(patchRows, patchCols, strideRows, strideCols,
494                              in_strideRows, in_strideCols, padding_type,
495                              -Eigen::NumTraits<CoeffReturnType>::highest())
496       .reduce(reduction_dims, mean_with_nan)
497       .reshape(post_reduce_dims);
498 }
499 
500 /** CuboidAvgPooling
501  * \ingroup CXX11_NeuralNetworks_Module
502  *
503  * \brief Applies an average pooling over a multichannel input volume.
504  *
505  * The input parameter is expected to be a tensor with a rank of 5 (channels,
506  * depth, height, width, others, and the reverse of that in row-major).
507  *
508  * The result can be assigned to a tensor of rank equal to the rank of the
509  * input. The dimensions of the result will be channels, depth, width, and
510  * others (in col-major, and the reverse of that if the input was row-major).
511  *
512  * The order of the depth, width and height dimensions can be swapped if
513  * needed.
514  *
515  */
516 #if !defined(EIGEN_HAS_INDEX_LIST)
517 template <typename Input>
518 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
519     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
520     const TensorReductionOp<
521         internal::AvgPoolMeanReducer<float>, const Eigen::array<int, 1>,
522         const TensorReshapingOp<
523             const Eigen::DSizes<DenseIndex, 3>,
524             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
525                                       const Input> > > >
526 #else
527 template <typename Input>
528 EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
529     const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
530     const TensorReductionOp<
531         internal::AvgPoolMeanReducer<float>,
532         const Eigen::IndexList<Eigen::type2index<1> >,
533         const TensorReshapingOp<
534             const Eigen::DSizes<DenseIndex, 3>,
535             const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
536                                       const Input> > > >
537 #endif
538 CuboidAvgPooling(const Input& input, DenseIndex patchPlanes,
539                  DenseIndex patchRows, DenseIndex patchCols,
540                  DenseIndex stridePlanes, DenseIndex strideRows,
541                  DenseIndex strideCols, const PaddingType padding_type) {
542   EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 5,
543                       YOU_MADE_A_PROGRAMMING_MISTAKE);
544   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
545 
546   typedef typename internal::traits<Input>::Index TensorIndex;
547   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
548                    internal::traits<Input>::NumDimensions,
549                    internal::traits<Input>::Layout, TensorIndex> >
550       in(input);
551 
552   static const int idxPlanes = isColMajor ? 1 : 3;
553   static const int idxRows = 2;
554   static const int idxCols = isColMajor ? 3 : 1;
555   // Molds the output of the reduction into the shape expected by the used
556   // (assuming col-major):
557   // - 1st dim: channels
558   // - 2nd dim: outupt depth
559   // - 3rd dim: output height
560   // - 4th dim: output width
561   // - 5th dim and beyond: everything else including batch size
562   Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>
563       post_reduce_dims;
564   post_reduce_dims[0] = in.dimension(0);
565   if (padding_type == PADDING_VALID) {
566     post_reduce_dims[idxPlanes] = Eigen::divup(
567         static_cast<DenseIndex>(in.dimension(idxPlanes)) - patchPlanes + 1,
568         stridePlanes);
569     post_reduce_dims[idxRows] = Eigen::divup(
570         static_cast<DenseIndex>(in.dimension(idxRows)) - patchRows + 1,
571         strideRows);
572     post_reduce_dims[idxCols] = Eigen::divup(
573         static_cast<DenseIndex>(in.dimension(idxCols)) - patchCols + 1,
574         strideCols);
575   } else {
576     post_reduce_dims[idxPlanes] = Eigen::divup(
577         static_cast<DenseIndex>(in.dimension(idxPlanes)), stridePlanes);
578     post_reduce_dims[idxRows] = Eigen::divup(
579         static_cast<DenseIndex>(in.dimension(idxRows)), strideRows);
580     post_reduce_dims[idxCols] = Eigen::divup(
581         static_cast<DenseIndex>(in.dimension(idxCols)), strideCols);
582   }
583   post_reduce_dims[4] = in.dimension(4);
584 
585   Eigen::DSizes<DenseIndex, 3> pre_reduce_dims;
586   pre_reduce_dims[1] = patchRows * patchCols * patchPlanes;
587   if (isColMajor) {
588     pre_reduce_dims[0] = post_reduce_dims[0];
589     pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] *
590                          post_reduce_dims[3] * post_reduce_dims[4];
591   } else {
592     pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] *
593                          post_reduce_dims[2] * post_reduce_dims[3];
594     pre_reduce_dims[2] = post_reduce_dims[4];
595   }
596 
597   typedef typename internal::remove_const<
598       typename internal::traits<Input>::Scalar>::type CoeffReturnType;
599   internal::AvgPoolMeanReducer<CoeffReturnType> mean_with_nan;
600 
601 #if !defined(EIGEN_HAS_INDEX_LIST)
602   // nvcc doesn't support cxx11
603   Eigen::array<int, 1> reduction_dims;
604   reduction_dims[0] = 1;
605 #else
606   // Take advantage of cxx11 to give the compiler information it can use to
607   // optimize the code.
608   Eigen::IndexList<Eigen::type2index<1> > reduction_dims;
609 #endif
610   return input
611       .extract_volume_patches(patchPlanes, patchRows, patchCols, stridePlanes,
612                               strideRows, strideCols, padding_type,
613                               -Eigen::NumTraits<float>::highest())
614       .reshape(pre_reduce_dims)
615       .reduce(reduction_dims, mean_with_nan)
616       .reshape(post_reduce_dims);
617 }
618 
619 }  // end namespace Eigen
620 
621 #endif  // TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_
622