1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_KERNELS_IMAGE_MIRROR_PAD_OP_H_ 17 #define TENSORFLOW_CORE_KERNELS_IMAGE_MIRROR_PAD_OP_H_ 18 19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 20 #include "tensorflow/core/framework/tensor_types.h" 21 #include "tensorflow/core/platform/types.h" 22 23 namespace Eigen { 24 template <typename PaddingDimensions, typename XprType> 25 class TensorMirrorPadOp; 26 27 namespace internal { 28 template <typename PaddingDimensions, typename XprType> 29 struct traits<TensorMirrorPadOp<PaddingDimensions, XprType>> 30 : public traits<XprType> { 31 typedef typename XprType::Scalar Scalar; 32 typedef traits<XprType> XprTraits; 33 typedef typename XprTraits::StorageKind StorageKind; 34 typedef typename XprTraits::Index Index; 35 typedef typename XprType::Nested Nested; 36 typedef typename remove_reference<Nested>::type _Nested; 37 static constexpr int NumDimensions = XprTraits::NumDimensions; 38 static constexpr int Layout = XprTraits::Layout; 39 }; 40 41 template <typename PaddingDimensions, typename XprType> 42 struct eval<TensorMirrorPadOp<PaddingDimensions, XprType>, Eigen::Dense> { 43 typedef const TensorMirrorPadOp<PaddingDimensions, XprType>& type; 44 }; 45 46 template <typename PaddingDimensions, typename XprType> 47 struct nested< 48 TensorMirrorPadOp<PaddingDimensions, XprType>, 1, 49 typename eval<TensorMirrorPadOp<PaddingDimensions, XprType>>::type> { 50 typedef TensorMirrorPadOp<PaddingDimensions, XprType> type; 51 }; 52 } // namespace internal 53 54 template <typename PaddingDimensions, typename XprType> 55 class TensorMirrorPadOp 56 : public TensorBase<TensorMirrorPadOp<PaddingDimensions, XprType>, 57 ReadOnlyAccessors> { 58 public: 59 typedef typename Eigen::internal::traits<TensorMirrorPadOp>::Scalar Scalar; 60 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; 61 typedef typename XprType::CoeffReturnType CoeffReturnType; 62 typedef typename Eigen::internal::nested<TensorMirrorPadOp>::type Nested; 63 typedef typename Eigen::internal::traits<TensorMirrorPadOp>::StorageKind 64 StorageKind; 65 typedef typename Eigen::internal::traits<TensorMirrorPadOp>::Index Index; 66 67 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMirrorPadOp( 68 const XprType& expr, const PaddingDimensions& padding_dims, Index offset) 69 : xpr_(expr), padding_dims_(padding_dims), offset_(offset) {} 70 71 EIGEN_DEVICE_FUNC 72 const PaddingDimensions& padding() const { return padding_dims_; } 73 74 EIGEN_DEVICE_FUNC 75 Index offset() const { return offset_; } 76 77 EIGEN_DEVICE_FUNC 78 const typename internal::remove_all<typename XprType::Nested>::type& 79 expression() const { 80 return xpr_; 81 } 82 83 protected: 84 typename XprType::Nested xpr_; 85 const PaddingDimensions padding_dims_; 86 const Index offset_; 87 }; 88 89 // Eval as rvalue 90 template <typename PaddingDimensions, typename ArgType, typename Device> 91 struct TensorEvaluator<const TensorMirrorPadOp<PaddingDimensions, ArgType>, 92 Device> { 93 typedef TensorMirrorPadOp<PaddingDimensions, ArgType> XprType; 94 typedef typename XprType::Index Index; 95 static constexpr int Dims = internal::array_size<PaddingDimensions>::value; 96 typedef DSizes<Index, Dims> Dimensions; 97 typedef typename XprType::Scalar Scalar; 98 typedef typename XprType::CoeffReturnType CoeffReturnType; 99 // Copied from Eigen3 Github version 0e806c1. 100 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 101 102 enum { 103 IsAligned = false, 104 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 105 BlockAccess = false, 106 BlockAccessV2 = false, 107 PreferBlockAccess = false, 108 Layout = TensorEvaluator<ArgType, Device>::Layout, 109 CoordAccess = true, 110 RawAccess = false 111 }; 112 113 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 114 typedef internal::TensorBlockNotImplemented TensorBlock; 115 //===--------------------------------------------------------------------===// 116 117 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, 118 const Device& device) 119 : impl_(op.expression(), device), padding_(op.padding()) { 120 EIGEN_STATIC_ASSERT(Dims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE) 121 122 // op.offset() == 0 if padding mode is symmetric. 123 // op.offset() == 1 if padding mode is reflect. 124 eigen_assert(op.offset() == 0 || op.offset() == 1); 125 left_offset_ = -1 + op.offset(); 126 right_offset_ = -1 - op.offset(); 127 128 // This should trigger compilation error if padding dimensions and 129 // expression dimensions do not match. 130 dimensions_ = impl_.dimensions(); 131 for (int dim = 0; dim < Dims; ++dim) { 132 eigen_assert(padding_[dim].first + op.offset() <= dimensions_[dim]); 133 eigen_assert(padding_[dim].second + op.offset() <= dimensions_[dim]); 134 dimensions_[dim] += padding_[dim].first + padding_[dim].second; 135 } 136 137 const auto& input_dims = impl_.dimensions(); 138 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 139 input_strides_[0] = 1; 140 output_strides_[0] = 1; 141 for (int i = 0; i < Dims - 1; ++i) { 142 input_strides_[i + 1] = input_strides_[i] * input_dims[i]; 143 output_strides_[i + 1] = output_strides_[i] * dimensions_[i]; 144 } 145 } else { 146 input_strides_[numext::maxi(0, Dims - 1)] = 1; 147 output_strides_[numext::maxi(0, Dims - 1)] = 1; 148 for (int i = Dims - 1; i > 0; --i) { 149 input_strides_[i - 1] = input_strides_[i] * input_dims[i]; 150 output_strides_[i - 1] = output_strides_[i] * dimensions_[i]; 151 } 152 } 153 } 154 155 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { 156 return dimensions_; 157 } 158 159 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { 160 impl_.evalSubExprsIfNeeded(nullptr); 161 return true; 162 } 163 164 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { impl_.cleanup(); } 165 166 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType 167 coeff(Index index) const { 168 eigen_assert(index < dimensions().TotalSize()); 169 const Index input_index = ToInputIndex(index); 170 return impl_.coeff(input_index); 171 } 172 173 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType 174 coeff(array<Index, Dims> coords) const { 175 for (int dim = 0; dim < Dims; ++dim) { 176 coords[dim] = ToInputCoord(coords[dim], dim); 177 } 178 ReadInputHelper<TensorEvaluator<ArgType, Device>::CoordAccess> helper; 179 return helper(coords, input_strides_, impl_); 180 } 181 182 template <int LoadMode> 183 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType 184 packet(Index index) const { 185 constexpr int kPacketSize = 186 internal::unpacket_traits<PacketReturnType>::size; 187 188 EIGEN_STATIC_ASSERT(kPacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) 189 eigen_assert(index + kPacketSize <= dimensions().TotalSize()); 190 191 // Find the effective inner-most dimension where padding actually happens. 192 // NOTE: This is independent of index argument, and can be done in the 193 // constructor to save computation. However, if packet access does not 194 // happen, then moving to constructor will incur needless overhead. 195 int dim = -1; 196 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 197 for (int k = 0; k < Dims; ++k) { 198 if (padding_[k].first != 0 || padding_[k].second != 0) { 199 dim = k; 200 break; 201 } 202 } 203 } else { 204 for (int k = Dims - 1; k >= 0; --k) { 205 if (padding_[k].first != 0 || padding_[k].second != 0) { 206 dim = k; 207 break; 208 } 209 } 210 } 211 212 const Index input_index = ToInputIndex(index); 213 214 // If dim < 0, this means there is no padding at all. 215 if (dim < 0) { 216 return impl_.template packet<Unaligned>(input_index); 217 } 218 219 // Check if the way from the begin of the packet to the end of the packet 220 // is paved with contiguous road. That is, the indices must be between the 221 // padded region in the effective inner-most dimension. 222 const Index left = padding_[dim].first * output_strides_[dim]; 223 const Index right = 224 (dimensions_[dim] - padding_[dim].second) * output_strides_[dim]; 225 226 const Index index_mod = index % (dimensions_[dim] * output_strides_[dim]); 227 if (left <= index_mod && (index_mod + kPacketSize - 1) < right) { 228 return impl_.template packet<Unaligned>(input_index); 229 } 230 231 // If the road is not contiguous, then fall back to coeff(). 232 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type 233 values[kPacketSize]; 234 values[0] = impl_.coeff(input_index); 235 for (int i = 1; i < kPacketSize; ++i) { 236 values[i] = coeff(index + i); 237 } 238 PacketReturnType result = internal::pload<PacketReturnType>(values); 239 return result; 240 } 241 242 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost 243 costPerCoeff(bool vectorized) const { 244 constexpr int kPacketSize = 245 internal::unpacket_traits<PacketReturnType>::size; 246 247 const double compute_cost = Dims * (7 * TensorOpCost::AddCost<Index>() + 248 2 * TensorOpCost::MulCost<Index>() + 249 TensorOpCost::DivCost<Index>()); 250 return impl_.costPerCoeff(vectorized) + 251 TensorOpCost(1, 0, compute_cost, vectorized, kPacketSize); 252 } 253 254 EIGEN_DEVICE_FUNC Scalar* data() const { return nullptr; } 255 256 protected: 257 using Coords = array<Index, Dims>; 258 259 // Full template specialization is not allowed within non-fully specialized 260 // template class. Adding a dummy parameter to make specializations partial. 261 template <bool CoordAccess, bool dummy = true> 262 struct ReadInputHelper; 263 264 template <bool dummy> 265 struct ReadInputHelper<false, dummy> { 266 template <typename Eval> 267 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index 268 operator()(const Coords& coord, const Coords& strides, const Eval& eval) { 269 Index index = 0; 270 for (int k = 0; k < Dims; ++k) { 271 index += coord[k] * strides[k]; 272 } 273 return eval.coeff(index); 274 } 275 }; 276 277 template <bool dummy> 278 struct ReadInputHelper<true, dummy> { 279 template <typename Eval> 280 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index 281 operator()(const Coords& coord, const Coords& strides, const Eval& eval) { 282 return eval.coeff(coord); 283 } 284 }; 285 286 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index ToInputCoord(Index k, 287 int dim) const { 288 const Index m = impl_.dimensions()[dim]; 289 k -= padding_[dim].first; 290 if (k < 0) { 291 return -k + left_offset_; 292 } 293 if (k < m) { 294 return k; 295 } 296 return m - (k - m) + right_offset_; 297 } 298 299 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index 300 ToInputIndex(const Coords& coords) const { 301 Index input_index = 0; 302 for (int dim = 0; dim < Dims; ++dim) { 303 input_index += ToInputCoord(coords[dim], dim) * input_strides_[dim]; 304 } 305 return input_index; 306 } 307 308 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index ToInputIndex(Index index) const { 309 Index input_index = 0; 310 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 311 for (int dim = Dims - 1; dim > 0; --dim) { 312 const Index k = index / output_strides_[dim]; 313 index -= k * output_strides_[dim]; 314 input_index += ToInputCoord(k, dim) * input_strides_[dim]; 315 } 316 input_index += ToInputCoord(index, 0); 317 } else { 318 for (int dim = 0; dim < Dims - 1; ++dim) { 319 const Index k = index / output_strides_[dim]; 320 index -= k * output_strides_[dim]; 321 input_index += ToInputCoord(k, dim) * input_strides_[dim]; 322 } 323 input_index += ToInputCoord(index, Dims - 1); 324 } 325 326 return input_index; 327 } 328 329 TensorEvaluator<ArgType, Device> impl_; 330 PaddingDimensions padding_; 331 Dimensions dimensions_; 332 array<Index, Dims> input_strides_; 333 array<Index, Dims> output_strides_; 334 335 Index left_offset_; 336 Index right_offset_; 337 }; 338 } // namespace Eigen 339 340 namespace tensorflow { 341 namespace functor { 342 343 // offset argument must be either 0 or 1. This controls whether the boundary 344 // values are replicated (offset == 0) or not replicated (offset == 1). 345 template <typename Device, typename T, typename Tpaddings, int Dims> 346 struct MirrorPad { 347 void operator()(const Device& device, 348 typename TTypes<T, Dims, int32>::Tensor output, 349 typename TTypes<T, Dims, int32>::ConstTensor input, 350 typename TTypes<Tpaddings>::ConstMatrix padding, int offset) { 351 Eigen::array<Eigen::IndexPair<int32>, Dims> padding_dims; 352 353 for (int i = 0; i < Dims; ++i) { 354 padding_dims[i] = Eigen::IndexPair<int32>(padding(i, 0), padding(i, 1)); 355 } 356 357 output.device(device) = MirrorPadOp(input, padding_dims, offset); 358 } 359 360 template <typename PaddingDimensions, typename Derived> 361 static const Eigen::TensorMirrorPadOp<PaddingDimensions, const Derived> 362 MirrorPadOp( 363 const Eigen::TensorBase<Derived, Eigen::ReadOnlyAccessors>& tensor, 364 const PaddingDimensions& padding, int offset) { 365 return Eigen::TensorMirrorPadOp<PaddingDimensions, const Derived>( 366 static_cast<const Derived&>(tensor), padding, offset); 367 } 368 }; 369 370 // offset argument must be either 0 or 1. This controls whether the boundary 371 // values are replicated (offset == 0) or not replicated (offset == 1). 372 template <typename Device, typename T, typename Tpaddings, int Dims> 373 struct MirrorPadGrad { 374 void operator()(const Device& device, 375 typename TTypes<T, Dims, int32>::Tensor output, 376 typename TTypes<T, Dims, int32>::ConstTensor input, 377 typename TTypes<Tpaddings>::ConstMatrix paddings, int offset, 378 typename TTypes<T, Dims, int32>::Tensor scratch) { 379 // Copy the gradient input into the scratch buffer. 380 scratch.device(device) = input; 381 382 Eigen::array<int32, Dims> lhs_offsets; 383 Eigen::array<int32, Dims> rhs_offsets; 384 Eigen::array<int32, Dims> extents; 385 Eigen::array<bool, Dims> reverses; 386 387 for (int i = 0; i < Dims; ++i) { 388 lhs_offsets[i] = 0; 389 rhs_offsets[i] = 0; 390 extents[i] = scratch.dimension(i); 391 reverses[i] = false; 392 } 393 394 // At this point, the central part (non-padded area) does not include the 395 // gradients back-propagated through padded areas. Those gradient components 396 // need be added to the central part. 397 // 398 // Note that a gradient input element falls into a padded area iff in at 399 // least one dimension i, the coordinate x(i) is in the range (python-style) 400 // [:paddings(i,0)] or [-paddings(i,1):]. 401 402 for (int i = 0; i < Dims; ++i) { 403 reverses[i] = true; 404 405 // This handles the case when coordinate in dimension i is in the range 406 // [:paddings(i,0)]. This portion is added to the range 407 // [paddings(i,0) + offset:2 * paddings(i,0) + offset]. 408 if (paddings(i, 0) > 0) { 409 rhs_offsets[i] = 0; 410 lhs_offsets[i] = paddings(i, 0) + offset; 411 extents[i] = paddings(i, 0); 412 413 scratch.slice(lhs_offsets, extents).device(device) += 414 scratch.slice(rhs_offsets, extents).reverse(reverses); 415 } 416 417 // This handles the case when coordinate in dimension i is in the range 418 // [-paddings(i,1):]. This portion is added to the range 419 // [-2 * paddings(i,1) - offset:-paddings(i,1) - offset]. 420 if (paddings(i, 1) > 0) { 421 rhs_offsets[i] = scratch.dimension(i) - paddings(i, 1); 422 lhs_offsets[i] = rhs_offsets[i] - paddings(i, 1) - offset; 423 extents[i] = paddings(i, 1); 424 425 scratch.slice(lhs_offsets, extents).device(device) += 426 scratch.slice(rhs_offsets, extents).reverse(reverses); 427 } 428 429 reverses[i] = false; 430 lhs_offsets[i] = paddings(i, 0); 431 rhs_offsets[i] = paddings(i, 0); 432 extents[i] = output.dimension(i); 433 434 // At this point, scratch buffer contains gradient input as if paddings 435 // for dimension k = 0,...,i are zeros. Therefore after the loop 436 // termination, the central part of the scratch buffer contains the folded 437 // gradients. 438 } 439 440 // Copy the central part of the scratch buffer to the output. 441 output.device(device) = scratch.slice(rhs_offsets, extents); 442 } 443 }; 444 } // namespace functor 445 } // namespace tensorflow 446 447 #endif // TENSORFLOW_CORE_KERNELS_IMAGE_MIRROR_PAD_OP_H_ 448