• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_KERNELS_MIRROR_PAD_OP_H_
17 #define TENSORFLOW_CORE_KERNELS_MIRROR_PAD_OP_H_
18 
19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
20 #include "tensorflow/core/framework/tensor_types.h"
21 #include "tensorflow/core/platform/types.h"
22 
23 namespace Eigen {
24 template <typename PaddingDimensions, typename XprType>
25 class TensorMirrorPadOp;
26 
27 namespace internal {
28 template <typename PaddingDimensions, typename XprType>
29 struct traits<TensorMirrorPadOp<PaddingDimensions, XprType>>
30     : public traits<XprType> {
31   typedef typename XprType::Scalar Scalar;
32   typedef traits<XprType> XprTraits;
33   typedef typename XprTraits::StorageKind StorageKind;
34   typedef typename XprTraits::Index Index;
35   typedef typename XprType::Nested Nested;
36   typedef typename remove_reference<Nested>::type _Nested;
37   static constexpr int NumDimensions = XprTraits::NumDimensions;
38   static constexpr int Layout = XprTraits::Layout;
39 };
40 
41 template <typename PaddingDimensions, typename XprType>
42 struct eval<TensorMirrorPadOp<PaddingDimensions, XprType>, Eigen::Dense> {
43   typedef const TensorMirrorPadOp<PaddingDimensions, XprType>& type;
44 };
45 
46 template <typename PaddingDimensions, typename XprType>
47 struct nested<
48     TensorMirrorPadOp<PaddingDimensions, XprType>, 1,
49     typename eval<TensorMirrorPadOp<PaddingDimensions, XprType>>::type> {
50   typedef TensorMirrorPadOp<PaddingDimensions, XprType> type;
51 };
52 }  // namespace internal
53 
54 template <typename PaddingDimensions, typename XprType>
55 class TensorMirrorPadOp
56     : public TensorBase<TensorMirrorPadOp<PaddingDimensions, XprType>,
57                         ReadOnlyAccessors> {
58  public:
59   typedef typename Eigen::internal::traits<TensorMirrorPadOp>::Scalar Scalar;
60   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
61   typedef typename XprType::CoeffReturnType CoeffReturnType;
62   typedef typename Eigen::internal::nested<TensorMirrorPadOp>::type Nested;
63   typedef typename Eigen::internal::traits<TensorMirrorPadOp>::StorageKind
64       StorageKind;
65   typedef typename Eigen::internal::traits<TensorMirrorPadOp>::Index Index;
66 
67   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMirrorPadOp(
68       const XprType& expr, const PaddingDimensions& padding_dims, Index offset)
69       : xpr_(expr), padding_dims_(padding_dims), offset_(offset) {}
70 
71   EIGEN_DEVICE_FUNC
72   const PaddingDimensions& padding() const { return padding_dims_; }
73 
74   EIGEN_DEVICE_FUNC
75   Index offset() const { return offset_; }
76 
77   EIGEN_DEVICE_FUNC
78   const typename internal::remove_all<typename XprType::Nested>::type&
79   expression() const {
80     return xpr_;
81   }
82 
83  protected:
84   typename XprType::Nested xpr_;
85   const PaddingDimensions padding_dims_;
86   const Index offset_;
87 };
88 
89 // Eval as rvalue
90 template <typename PaddingDimensions, typename ArgType, typename Device>
91 struct TensorEvaluator<const TensorMirrorPadOp<PaddingDimensions, ArgType>,
92                        Device> {
93   typedef TensorMirrorPadOp<PaddingDimensions, ArgType> XprType;
94   typedef typename XprType::Index Index;
95   static constexpr int Dims = internal::array_size<PaddingDimensions>::value;
96   typedef DSizes<Index, Dims> Dimensions;
97   typedef typename XprType::Scalar Scalar;
98   typedef typename XprType::CoeffReturnType CoeffReturnType;
99   // Copied from Eigen3 Github version 0e806c1.
100   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
101 
102   enum {
103     IsAligned = false,
104     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
105     BlockAccess = false,
106     PreferBlockAccess = false,
107     Layout = TensorEvaluator<ArgType, Device>::Layout,
108     CoordAccess = true,
109     RawAccess = false
110   };
111 
112   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
113                                                         const Device& device)
114       : impl_(op.expression(), device), padding_(op.padding()) {
115     EIGEN_STATIC_ASSERT(Dims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE)
116 
117     // op.offset() == 0 if padding mode is symmetric.
118     // op.offset() == 1 if padding mode is reflect.
119     eigen_assert(op.offset() == 0 || op.offset() == 1);
120     left_offset_ = -1 + op.offset();
121     right_offset_ = -1 - op.offset();
122 
123     // This should trigger compilation error if padding dimensions and
124     // expression dimensions do not match.
125     dimensions_ = impl_.dimensions();
126     for (int dim = 0; dim < Dims; ++dim) {
127       eigen_assert(padding_[dim].first + op.offset() <= dimensions_[dim]);
128       eigen_assert(padding_[dim].second + op.offset() <= dimensions_[dim]);
129       dimensions_[dim] += padding_[dim].first + padding_[dim].second;
130     }
131 
132     const auto& input_dims = impl_.dimensions();
133     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
134       input_strides_[0] = 1;
135       output_strides_[0] = 1;
136       for (int i = 0; i < Dims - 1; ++i) {
137         input_strides_[i + 1] = input_strides_[i] * input_dims[i];
138         output_strides_[i + 1] = output_strides_[i] * dimensions_[i];
139       }
140     } else {
141       input_strides_[numext::maxi(0, Dims - 1)] = 1;
142       output_strides_[numext::maxi(0, Dims - 1)] = 1;
143       for (int i = Dims - 1; i > 0; --i) {
144         input_strides_[i - 1] = input_strides_[i] * input_dims[i];
145         output_strides_[i - 1] = output_strides_[i] * dimensions_[i];
146       }
147     }
148   }
149 
150   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
151     return dimensions_;
152   }
153 
154   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
155     impl_.evalSubExprsIfNeeded(nullptr);
156     return true;
157   }
158 
159   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { impl_.cleanup(); }
160 
161   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType
162   coeff(Index index) const {
163     eigen_assert(index < dimensions().TotalSize());
164     const Index input_index = ToInputIndex(index);
165     return impl_.coeff(input_index);
166   }
167 
168   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType
169   coeff(array<Index, Dims> coords) const {
170     for (int dim = 0; dim < Dims; ++dim) {
171       coords[dim] = ToInputCoord(coords[dim], dim);
172     }
173     ReadInputHelper<TensorEvaluator<ArgType, Device>::CoordAccess> helper;
174     return helper(coords, input_strides_, impl_);
175   }
176 
177   template <int LoadMode>
178   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType
179   packet(Index index) const {
180     constexpr int kPacketSize =
181         internal::unpacket_traits<PacketReturnType>::size;
182 
183     EIGEN_STATIC_ASSERT(kPacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
184     eigen_assert(index + kPacketSize <= dimensions().TotalSize());
185 
186     // Find the effective inner-most dimension where padding actually happens.
187     // NOTE: This is independent of index argument, and can be done in the
188     // constructor to save computation. However, if packet access does not
189     // happen, then moving to constructor will incur needless overhead.
190     int dim = -1;
191     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
192       for (int k = 0; k < Dims; ++k) {
193         if (padding_[k].first != 0 || padding_[k].second != 0) {
194           dim = k;
195           break;
196         }
197       }
198     } else {
199       for (int k = Dims - 1; k >= 0; --k) {
200         if (padding_[k].first != 0 || padding_[k].second != 0) {
201           dim = k;
202           break;
203         }
204       }
205     }
206 
207     const Index input_index = ToInputIndex(index);
208 
209     // If dim < 0, this means there is no padding at all.
210     if (dim < 0) {
211       return impl_.template packet<Unaligned>(input_index);
212     }
213 
214     // Check if the way from the begin of the packet to the end of the packet
215     // is paved with contiguous road. That is, the indices must be between the
216     // padded region in the effective inner-most dimension.
217     const Index left = padding_[dim].first * output_strides_[dim];
218     const Index right =
219         (dimensions_[dim] - padding_[dim].second) * output_strides_[dim];
220 
221     if (left <= index && (index + kPacketSize - 1) < right) {
222       return impl_.template packet<Unaligned>(input_index);
223     }
224 
225     // If the road is not contiguous, then fall back to coeff().
226     EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type
227         values[kPacketSize];
228     values[0] = impl_.coeff(input_index);
229     for (int i = 1; i < kPacketSize; ++i) {
230       values[i] = coeff(index + i);
231     }
232     PacketReturnType result = internal::pload<PacketReturnType>(values);
233     return result;
234   }
235 
236   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
237   costPerCoeff(bool vectorized) const {
238     constexpr int kPacketSize =
239         internal::unpacket_traits<PacketReturnType>::size;
240 
241     const double compute_cost = Dims * (7 * TensorOpCost::AddCost<Index>() +
242                                         2 * TensorOpCost::MulCost<Index>() +
243                                         TensorOpCost::DivCost<Index>());
244     return impl_.costPerCoeff(vectorized) +
245            TensorOpCost(1, 0, compute_cost, vectorized, kPacketSize);
246   }
247 
248   EIGEN_DEVICE_FUNC Scalar* data() const { return nullptr; }
249 
250  protected:
251   using Coords = array<Index, Dims>;
252 
253   // Full template specialization is not allowed within non-fully specialized
254   // template class. Adding a dummy parameter to make specializations partial.
255   template <bool CoordAccess, bool dummy = true>
256   struct ReadInputHelper;
257 
258   template <bool dummy>
259   struct ReadInputHelper<false, dummy> {
260     template <typename Eval>
261     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index
262     operator()(const Coords& coord, const Coords& strides, const Eval& eval) {
263       Index index = 0;
264       for (int k = 0; k < Dims; ++k) {
265         index += coord[k] * strides[k];
266       }
267       return eval.coeff(index);
268     }
269   };
270 
271   template <bool dummy>
272   struct ReadInputHelper<true, dummy> {
273     template <typename Eval>
274     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index
275     operator()(const Coords& coord, const Coords& strides, const Eval& eval) {
276       return eval.coeff(coord);
277     }
278   };
279 
280   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index ToInputCoord(Index k,
281                                                            int dim) const {
282     const Index m = impl_.dimensions()[dim];
283     k -= padding_[dim].first;
284     if (k < 0) {
285       return -k + left_offset_;
286     }
287     if (k < m) {
288       return k;
289     }
290     return m - (k - m) + right_offset_;
291   }
292 
293   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index
294   ToInputIndex(const Coords& coords) const {
295     Index input_index = 0;
296     for (int dim = 0; dim < Dims; ++dim) {
297       input_index += ToInputCoord(coords[dim], dim) * input_strides_[dim];
298     }
299     return input_index;
300   }
301 
302   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index ToInputIndex(Index index) const {
303     Index input_index = 0;
304     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
305       for (int dim = Dims - 1; dim > 0; --dim) {
306         const Index k = index / output_strides_[dim];
307         index -= k * output_strides_[dim];
308         input_index += ToInputCoord(k, dim) * input_strides_[dim];
309       }
310       input_index += ToInputCoord(index, 0);
311     } else {
312       for (int dim = 0; dim < Dims - 1; ++dim) {
313         const Index k = index / output_strides_[dim];
314         index -= k * output_strides_[dim];
315         input_index += ToInputCoord(k, dim) * input_strides_[dim];
316       }
317       input_index += ToInputCoord(index, Dims - 1);
318     }
319 
320     return input_index;
321   }
322 
323   TensorEvaluator<ArgType, Device> impl_;
324   PaddingDimensions padding_;
325   Dimensions dimensions_;
326   array<Index, Dims> input_strides_;
327   array<Index, Dims> output_strides_;
328 
329   Index left_offset_;
330   Index right_offset_;
331 };
332 }  // namespace Eigen
333 
334 namespace tensorflow {
335 namespace functor {
336 
337 // offset argument must be either 0 or 1. This controls whether the boundary
338 // values are replicated (offset == 0) or not replicated (offset == 1).
339 template <typename Device, typename T, typename Tpaddings, int Dims>
340 struct MirrorPad {
341   void operator()(const Device& device,
342                   typename TTypes<T, Dims, int32>::Tensor output,
343                   typename TTypes<T, Dims, int32>::ConstTensor input,
344                   typename TTypes<Tpaddings>::ConstMatrix padding, int offset) {
345     Eigen::array<Eigen::IndexPair<int32>, Dims> padding_dims;
346 
347     for (int i = 0; i < Dims; ++i) {
348       padding_dims[i] = Eigen::IndexPair<int32>(padding(i, 0), padding(i, 1));
349     }
350 
351     output.device(device) = MirrorPadOp(input, padding_dims, offset);
352   }
353 
354   template <typename PaddingDimensions, typename Derived>
355   static const Eigen::TensorMirrorPadOp<PaddingDimensions, const Derived>
356   MirrorPadOp(
357       const Eigen::TensorBase<Derived, Eigen::ReadOnlyAccessors>& tensor,
358       const PaddingDimensions& padding, int offset) {
359     return Eigen::TensorMirrorPadOp<PaddingDimensions, const Derived>(
360         static_cast<const Derived&>(tensor), padding, offset);
361   }
362 };
363 
364 // offset argument must be either 0 or 1. This controls whether the boundary
365 // values are replicated (offset == 0) or not replicated (offset == 1).
366 template <typename Device, typename T, typename Tpaddings, int Dims>
367 struct MirrorPadGrad {
368   void operator()(const Device& device,
369                   typename TTypes<T, Dims, int32>::Tensor output,
370                   typename TTypes<T, Dims, int32>::ConstTensor input,
371                   typename TTypes<Tpaddings>::ConstMatrix paddings, int offset,
372                   typename TTypes<T, Dims, int32>::Tensor scratch) {
373     // Copy the gradient input into the scratch buffer.
374     scratch.device(device) = input;
375 
376     Eigen::array<int32, Dims> lhs_offsets;
377     Eigen::array<int32, Dims> rhs_offsets;
378     Eigen::array<int32, Dims> extents;
379     Eigen::array<bool, Dims> reverses;
380 
381     for (int i = 0; i < Dims; ++i) {
382       lhs_offsets[i] = 0;
383       rhs_offsets[i] = 0;
384       extents[i] = scratch.dimension(i);
385       reverses[i] = false;
386     }
387 
388     // At this point, the central part (non-padded area) does not include the
389     // gradients back-propagated through padded areas. Those gradient components
390     // need be added to the central part.
391     //
392     // Note that a gradient input element falls into a padded area iff in at
393     // least one dimension i, the coordinate x(i) is in the range (python-style)
394     // [:paddings(i,0)] or [-paddings(i,1):].
395 
396     for (int i = 0; i < Dims; ++i) {
397       reverses[i] = true;
398 
399       // This handles the case when coordinate in dimension i is in the range
400       // [:paddings(i,0)]. This portion is added to the range
401       // [paddings(i,0) + offset:2 * paddings(i,0) + offset].
402       if (paddings(i, 0) > 0) {
403         rhs_offsets[i] = 0;
404         lhs_offsets[i] = paddings(i, 0) + offset;
405         extents[i] = paddings(i, 0);
406 
407         scratch.slice(lhs_offsets, extents).device(device) +=
408             scratch.slice(rhs_offsets, extents).reverse(reverses);
409       }
410 
411       // This handles the case when coordinate in dimension i is in the range
412       // [-paddings(i,1):]. This portion is added to the range
413       // [-2 * paddings(i,1) - offset:-paddings(i,1) - offset].
414       if (paddings(i, 1) > 0) {
415         rhs_offsets[i] = scratch.dimension(i) - paddings(i, 1);
416         lhs_offsets[i] = rhs_offsets[i] - paddings(i, 1) - offset;
417         extents[i] = paddings(i, 1);
418 
419         scratch.slice(lhs_offsets, extents).device(device) +=
420             scratch.slice(rhs_offsets, extents).reverse(reverses);
421       }
422 
423       reverses[i] = false;
424       lhs_offsets[i] = paddings(i, 0);
425       rhs_offsets[i] = paddings(i, 0);
426       extents[i] = output.dimension(i);
427 
428       // At this point, scratch buffer contains gradient input as if paddings
429       // for dimension k = 0,...,i are zeros. Therefore after the loop
430       // termination, the central part of the scratch buffer contains the folded
431       // gradients.
432     }
433 
434     // Copy the central part of the scratch buffer to the output.
435     output.device(device) = scratch.slice(rhs_offsets, extents);
436   }
437 };
438 }  // namespace functor
439 }  // namespace tensorflow
440 
441 #endif  // TENSORFLOW_CORE_KERNELS_MIRROR_PAD_OP_H_
442