• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_KERNELS_IMAGE_MIRROR_PAD_OP_H_
17 #define TENSORFLOW_CORE_KERNELS_IMAGE_MIRROR_PAD_OP_H_
18 
19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
20 #include "tensorflow/core/framework/tensor_types.h"
21 #include "tensorflow/core/platform/types.h"
22 
23 namespace Eigen {
24 template <typename PaddingDimensions, typename XprType>
25 class TensorMirrorPadOp;
26 
27 namespace internal {
28 template <typename PaddingDimensions, typename XprType>
29 struct traits<TensorMirrorPadOp<PaddingDimensions, XprType>>
30     : public traits<XprType> {
31   typedef typename XprType::Scalar Scalar;
32   typedef traits<XprType> XprTraits;
33   typedef typename XprTraits::StorageKind StorageKind;
34   typedef typename XprTraits::Index Index;
35   typedef typename XprType::Nested Nested;
36   typedef typename remove_reference<Nested>::type _Nested;
37   static constexpr int NumDimensions = XprTraits::NumDimensions;
38   static constexpr int Layout = XprTraits::Layout;
39 };
40 
41 template <typename PaddingDimensions, typename XprType>
42 struct eval<TensorMirrorPadOp<PaddingDimensions, XprType>, Eigen::Dense> {
43   typedef const TensorMirrorPadOp<PaddingDimensions, XprType>& type;
44 };
45 
46 template <typename PaddingDimensions, typename XprType>
47 struct nested<
48     TensorMirrorPadOp<PaddingDimensions, XprType>, 1,
49     typename eval<TensorMirrorPadOp<PaddingDimensions, XprType>>::type> {
50   typedef TensorMirrorPadOp<PaddingDimensions, XprType> type;
51 };
52 }  // namespace internal
53 
54 template <typename PaddingDimensions, typename XprType>
55 class TensorMirrorPadOp
56     : public TensorBase<TensorMirrorPadOp<PaddingDimensions, XprType>,
57                         ReadOnlyAccessors> {
58  public:
59   typedef typename Eigen::internal::traits<TensorMirrorPadOp>::Scalar Scalar;
60   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
61   typedef typename XprType::CoeffReturnType CoeffReturnType;
62   typedef typename Eigen::internal::nested<TensorMirrorPadOp>::type Nested;
63   typedef typename Eigen::internal::traits<TensorMirrorPadOp>::StorageKind
64       StorageKind;
65   typedef typename Eigen::internal::traits<TensorMirrorPadOp>::Index Index;
66 
67   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMirrorPadOp(
68       const XprType& expr, const PaddingDimensions& padding_dims, Index offset)
69       : xpr_(expr), padding_dims_(padding_dims), offset_(offset) {}
70 
71   EIGEN_DEVICE_FUNC
72   const PaddingDimensions& padding() const { return padding_dims_; }
73 
74   EIGEN_DEVICE_FUNC
75   Index offset() const { return offset_; }
76 
77   EIGEN_DEVICE_FUNC
78   const typename internal::remove_all<typename XprType::Nested>::type&
79   expression() const {
80     return xpr_;
81   }
82 
83  protected:
84   typename XprType::Nested xpr_;
85   const PaddingDimensions padding_dims_;
86   const Index offset_;
87 };
88 
89 // Eval as rvalue
90 template <typename PaddingDimensions, typename ArgType, typename Device>
91 struct TensorEvaluator<const TensorMirrorPadOp<PaddingDimensions, ArgType>,
92                        Device> {
93   typedef TensorMirrorPadOp<PaddingDimensions, ArgType> XprType;
94   typedef typename XprType::Index Index;
95   static constexpr int Dims = internal::array_size<PaddingDimensions>::value;
96   typedef DSizes<Index, Dims> Dimensions;
97   typedef typename XprType::Scalar Scalar;
98   typedef typename XprType::CoeffReturnType CoeffReturnType;
99   // Copied from Eigen3 Github version 0e806c1.
100   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
101 
102   enum {
103     IsAligned = false,
104     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
105     BlockAccess = false,
106     BlockAccessV2 = false,
107     PreferBlockAccess = false,
108     Layout = TensorEvaluator<ArgType, Device>::Layout,
109     CoordAccess = true,
110     RawAccess = false
111   };
112 
113   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
114   typedef internal::TensorBlockNotImplemented TensorBlock;
115   //===--------------------------------------------------------------------===//
116 
117   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
118                                                         const Device& device)
119       : impl_(op.expression(), device), padding_(op.padding()) {
120     EIGEN_STATIC_ASSERT(Dims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE)
121 
122     // op.offset() == 0 if padding mode is symmetric.
123     // op.offset() == 1 if padding mode is reflect.
124     eigen_assert(op.offset() == 0 || op.offset() == 1);
125     left_offset_ = -1 + op.offset();
126     right_offset_ = -1 - op.offset();
127 
128     // This should trigger compilation error if padding dimensions and
129     // expression dimensions do not match.
130     dimensions_ = impl_.dimensions();
131     for (int dim = 0; dim < Dims; ++dim) {
132       eigen_assert(padding_[dim].first + op.offset() <= dimensions_[dim]);
133       eigen_assert(padding_[dim].second + op.offset() <= dimensions_[dim]);
134       dimensions_[dim] += padding_[dim].first + padding_[dim].second;
135     }
136 
137     const auto& input_dims = impl_.dimensions();
138     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
139       input_strides_[0] = 1;
140       output_strides_[0] = 1;
141       for (int i = 0; i < Dims - 1; ++i) {
142         input_strides_[i + 1] = input_strides_[i] * input_dims[i];
143         output_strides_[i + 1] = output_strides_[i] * dimensions_[i];
144       }
145     } else {
146       input_strides_[numext::maxi(0, Dims - 1)] = 1;
147       output_strides_[numext::maxi(0, Dims - 1)] = 1;
148       for (int i = Dims - 1; i > 0; --i) {
149         input_strides_[i - 1] = input_strides_[i] * input_dims[i];
150         output_strides_[i - 1] = output_strides_[i] * dimensions_[i];
151       }
152     }
153   }
154 
155   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
156     return dimensions_;
157   }
158 
159   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
160     impl_.evalSubExprsIfNeeded(nullptr);
161     return true;
162   }
163 
164   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { impl_.cleanup(); }
165 
166   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType
167   coeff(Index index) const {
168     eigen_assert(index < dimensions().TotalSize());
169     const Index input_index = ToInputIndex(index);
170     return impl_.coeff(input_index);
171   }
172 
173   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType
174   coeff(array<Index, Dims> coords) const {
175     for (int dim = 0; dim < Dims; ++dim) {
176       coords[dim] = ToInputCoord(coords[dim], dim);
177     }
178     ReadInputHelper<TensorEvaluator<ArgType, Device>::CoordAccess> helper;
179     return helper(coords, input_strides_, impl_);
180   }
181 
182   template <int LoadMode>
183   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType
184   packet(Index index) const {
185     constexpr int kPacketSize =
186         internal::unpacket_traits<PacketReturnType>::size;
187 
188     EIGEN_STATIC_ASSERT(kPacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
189     eigen_assert(index + kPacketSize <= dimensions().TotalSize());
190 
191     // Find the effective inner-most dimension where padding actually happens.
192     // NOTE: This is independent of index argument, and can be done in the
193     // constructor to save computation. However, if packet access does not
194     // happen, then moving to constructor will incur needless overhead.
195     int dim = -1;
196     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
197       for (int k = 0; k < Dims; ++k) {
198         if (padding_[k].first != 0 || padding_[k].second != 0) {
199           dim = k;
200           break;
201         }
202       }
203     } else {
204       for (int k = Dims - 1; k >= 0; --k) {
205         if (padding_[k].first != 0 || padding_[k].second != 0) {
206           dim = k;
207           break;
208         }
209       }
210     }
211 
212     const Index input_index = ToInputIndex(index);
213 
214     // If dim < 0, this means there is no padding at all.
215     if (dim < 0) {
216       return impl_.template packet<Unaligned>(input_index);
217     }
218 
219     // Check if the way from the begin of the packet to the end of the packet
220     // is paved with contiguous road. That is, the indices must be between the
221     // padded region in the effective inner-most dimension.
222     const Index left = padding_[dim].first * output_strides_[dim];
223     const Index right =
224         (dimensions_[dim] - padding_[dim].second) * output_strides_[dim];
225 
226     const Index index_mod = index % (dimensions_[dim] * output_strides_[dim]);
227     if (left <= index_mod && (index_mod + kPacketSize - 1) < right) {
228       return impl_.template packet<Unaligned>(input_index);
229     }
230 
231     // If the road is not contiguous, then fall back to coeff().
232     EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type
233         values[kPacketSize];
234     values[0] = impl_.coeff(input_index);
235     for (int i = 1; i < kPacketSize; ++i) {
236       values[i] = coeff(index + i);
237     }
238     PacketReturnType result = internal::pload<PacketReturnType>(values);
239     return result;
240   }
241 
242   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
243   costPerCoeff(bool vectorized) const {
244     constexpr int kPacketSize =
245         internal::unpacket_traits<PacketReturnType>::size;
246 
247     const double compute_cost = Dims * (7 * TensorOpCost::AddCost<Index>() +
248                                         2 * TensorOpCost::MulCost<Index>() +
249                                         TensorOpCost::DivCost<Index>());
250     return impl_.costPerCoeff(vectorized) +
251            TensorOpCost(1, 0, compute_cost, vectorized, kPacketSize);
252   }
253 
254   EIGEN_DEVICE_FUNC Scalar* data() const { return nullptr; }
255 
256  protected:
257   using Coords = array<Index, Dims>;
258 
259   // Full template specialization is not allowed within non-fully specialized
260   // template class. Adding a dummy parameter to make specializations partial.
261   template <bool CoordAccess, bool dummy = true>
262   struct ReadInputHelper;
263 
264   template <bool dummy>
265   struct ReadInputHelper<false, dummy> {
266     template <typename Eval>
267     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index
268     operator()(const Coords& coord, const Coords& strides, const Eval& eval) {
269       Index index = 0;
270       for (int k = 0; k < Dims; ++k) {
271         index += coord[k] * strides[k];
272       }
273       return eval.coeff(index);
274     }
275   };
276 
277   template <bool dummy>
278   struct ReadInputHelper<true, dummy> {
279     template <typename Eval>
280     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index
281     operator()(const Coords& coord, const Coords& strides, const Eval& eval) {
282       return eval.coeff(coord);
283     }
284   };
285 
286   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index ToInputCoord(Index k,
287                                                            int dim) const {
288     const Index m = impl_.dimensions()[dim];
289     k -= padding_[dim].first;
290     if (k < 0) {
291       return -k + left_offset_;
292     }
293     if (k < m) {
294       return k;
295     }
296     return m - (k - m) + right_offset_;
297   }
298 
299   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index
300   ToInputIndex(const Coords& coords) const {
301     Index input_index = 0;
302     for (int dim = 0; dim < Dims; ++dim) {
303       input_index += ToInputCoord(coords[dim], dim) * input_strides_[dim];
304     }
305     return input_index;
306   }
307 
308   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index ToInputIndex(Index index) const {
309     Index input_index = 0;
310     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
311       for (int dim = Dims - 1; dim > 0; --dim) {
312         const Index k = index / output_strides_[dim];
313         index -= k * output_strides_[dim];
314         input_index += ToInputCoord(k, dim) * input_strides_[dim];
315       }
316       input_index += ToInputCoord(index, 0);
317     } else {
318       for (int dim = 0; dim < Dims - 1; ++dim) {
319         const Index k = index / output_strides_[dim];
320         index -= k * output_strides_[dim];
321         input_index += ToInputCoord(k, dim) * input_strides_[dim];
322       }
323       input_index += ToInputCoord(index, Dims - 1);
324     }
325 
326     return input_index;
327   }
328 
329   TensorEvaluator<ArgType, Device> impl_;
330   PaddingDimensions padding_;
331   Dimensions dimensions_;
332   array<Index, Dims> input_strides_;
333   array<Index, Dims> output_strides_;
334 
335   Index left_offset_;
336   Index right_offset_;
337 };
338 }  // namespace Eigen
339 
340 namespace tensorflow {
341 namespace functor {
342 
343 // offset argument must be either 0 or 1. This controls whether the boundary
344 // values are replicated (offset == 0) or not replicated (offset == 1).
345 template <typename Device, typename T, typename Tpaddings, int Dims>
346 struct MirrorPad {
347   void operator()(const Device& device,
348                   typename TTypes<T, Dims, int32>::Tensor output,
349                   typename TTypes<T, Dims, int32>::ConstTensor input,
350                   typename TTypes<Tpaddings>::ConstMatrix padding, int offset) {
351     Eigen::array<Eigen::IndexPair<int32>, Dims> padding_dims;
352 
353     for (int i = 0; i < Dims; ++i) {
354       padding_dims[i] = Eigen::IndexPair<int32>(padding(i, 0), padding(i, 1));
355     }
356 
357     output.device(device) = MirrorPadOp(input, padding_dims, offset);
358   }
359 
360   template <typename PaddingDimensions, typename Derived>
361   static const Eigen::TensorMirrorPadOp<PaddingDimensions, const Derived>
362   MirrorPadOp(
363       const Eigen::TensorBase<Derived, Eigen::ReadOnlyAccessors>& tensor,
364       const PaddingDimensions& padding, int offset) {
365     return Eigen::TensorMirrorPadOp<PaddingDimensions, const Derived>(
366         static_cast<const Derived&>(tensor), padding, offset);
367   }
368 };
369 
370 // offset argument must be either 0 or 1. This controls whether the boundary
371 // values are replicated (offset == 0) or not replicated (offset == 1).
372 template <typename Device, typename T, typename Tpaddings, int Dims>
373 struct MirrorPadGrad {
374   void operator()(const Device& device,
375                   typename TTypes<T, Dims, int32>::Tensor output,
376                   typename TTypes<T, Dims, int32>::ConstTensor input,
377                   typename TTypes<Tpaddings>::ConstMatrix paddings, int offset,
378                   typename TTypes<T, Dims, int32>::Tensor scratch) {
379     // Copy the gradient input into the scratch buffer.
380     scratch.device(device) = input;
381 
382     Eigen::array<int32, Dims> lhs_offsets;
383     Eigen::array<int32, Dims> rhs_offsets;
384     Eigen::array<int32, Dims> extents;
385     Eigen::array<bool, Dims> reverses;
386 
387     for (int i = 0; i < Dims; ++i) {
388       lhs_offsets[i] = 0;
389       rhs_offsets[i] = 0;
390       extents[i] = scratch.dimension(i);
391       reverses[i] = false;
392     }
393 
394     // At this point, the central part (non-padded area) does not include the
395     // gradients back-propagated through padded areas. Those gradient components
396     // need be added to the central part.
397     //
398     // Note that a gradient input element falls into a padded area iff in at
399     // least one dimension i, the coordinate x(i) is in the range (python-style)
400     // [:paddings(i,0)] or [-paddings(i,1):].
401 
402     for (int i = 0; i < Dims; ++i) {
403       reverses[i] = true;
404 
405       // This handles the case when coordinate in dimension i is in the range
406       // [:paddings(i,0)]. This portion is added to the range
407       // [paddings(i,0) + offset:2 * paddings(i,0) + offset].
408       if (paddings(i, 0) > 0) {
409         rhs_offsets[i] = 0;
410         lhs_offsets[i] = paddings(i, 0) + offset;
411         extents[i] = paddings(i, 0);
412 
413         scratch.slice(lhs_offsets, extents).device(device) +=
414             scratch.slice(rhs_offsets, extents).reverse(reverses);
415       }
416 
417       // This handles the case when coordinate in dimension i is in the range
418       // [-paddings(i,1):]. This portion is added to the range
419       // [-2 * paddings(i,1) - offset:-paddings(i,1) - offset].
420       if (paddings(i, 1) > 0) {
421         rhs_offsets[i] = scratch.dimension(i) - paddings(i, 1);
422         lhs_offsets[i] = rhs_offsets[i] - paddings(i, 1) - offset;
423         extents[i] = paddings(i, 1);
424 
425         scratch.slice(lhs_offsets, extents).device(device) +=
426             scratch.slice(rhs_offsets, extents).reverse(reverses);
427       }
428 
429       reverses[i] = false;
430       lhs_offsets[i] = paddings(i, 0);
431       rhs_offsets[i] = paddings(i, 0);
432       extents[i] = output.dimension(i);
433 
434       // At this point, scratch buffer contains gradient input as if paddings
435       // for dimension k = 0,...,i are zeros. Therefore after the loop
436       // termination, the central part of the scratch buffer contains the folded
437       // gradients.
438     }
439 
440     // Copy the central part of the scratch buffer to the output.
441     output.device(device) = scratch.slice(rhs_offsets, extents);
442   }
443 };
444 }  // namespace functor
445 }  // namespace tensorflow
446 
447 #endif  // TENSORFLOW_CORE_KERNELS_IMAGE_MIRROR_PAD_OP_H_
448