1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_KERNELS_MIRROR_PAD_OP_H_ 17 #define TENSORFLOW_CORE_KERNELS_MIRROR_PAD_OP_H_ 18 19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 20 #include "tensorflow/core/framework/tensor_types.h" 21 #include "tensorflow/core/platform/types.h" 22 23 namespace Eigen { 24 template <typename PaddingDimensions, typename XprType> 25 class TensorMirrorPadOp; 26 27 namespace internal { 28 template <typename PaddingDimensions, typename XprType> 29 struct traits<TensorMirrorPadOp<PaddingDimensions, XprType>> 30 : public traits<XprType> { 31 typedef typename XprType::Scalar Scalar; 32 typedef traits<XprType> XprTraits; 33 typedef typename XprTraits::StorageKind StorageKind; 34 typedef typename XprTraits::Index Index; 35 typedef typename XprType::Nested Nested; 36 typedef typename remove_reference<Nested>::type _Nested; 37 static constexpr int NumDimensions = XprTraits::NumDimensions; 38 static constexpr int Layout = XprTraits::Layout; 39 }; 40 41 template <typename PaddingDimensions, typename XprType> 42 struct eval<TensorMirrorPadOp<PaddingDimensions, XprType>, Eigen::Dense> { 43 typedef const TensorMirrorPadOp<PaddingDimensions, XprType>& type; 44 }; 45 46 template <typename PaddingDimensions, typename XprType> 47 struct nested< 48 TensorMirrorPadOp<PaddingDimensions, XprType>, 1, 49 typename eval<TensorMirrorPadOp<PaddingDimensions, XprType>>::type> { 50 typedef TensorMirrorPadOp<PaddingDimensions, XprType> type; 51 }; 52 } // namespace internal 53 54 template <typename PaddingDimensions, typename XprType> 55 class TensorMirrorPadOp 56 : public TensorBase<TensorMirrorPadOp<PaddingDimensions, XprType>, 57 ReadOnlyAccessors> { 58 public: 59 typedef typename Eigen::internal::traits<TensorMirrorPadOp>::Scalar Scalar; 60 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; 61 typedef typename XprType::CoeffReturnType CoeffReturnType; 62 typedef typename Eigen::internal::nested<TensorMirrorPadOp>::type Nested; 63 typedef typename Eigen::internal::traits<TensorMirrorPadOp>::StorageKind 64 StorageKind; 65 typedef typename Eigen::internal::traits<TensorMirrorPadOp>::Index Index; 66 67 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMirrorPadOp( 68 const XprType& expr, const PaddingDimensions& padding_dims, Index offset) 69 : xpr_(expr), padding_dims_(padding_dims), offset_(offset) {} 70 71 EIGEN_DEVICE_FUNC 72 const PaddingDimensions& padding() const { return padding_dims_; } 73 74 EIGEN_DEVICE_FUNC 75 Index offset() const { return offset_; } 76 77 EIGEN_DEVICE_FUNC 78 const typename internal::remove_all<typename XprType::Nested>::type& 79 expression() const { 80 return xpr_; 81 } 82 83 protected: 84 typename XprType::Nested xpr_; 85 const PaddingDimensions padding_dims_; 86 const Index offset_; 87 }; 88 89 // Eval as rvalue 90 template <typename PaddingDimensions, typename ArgType, typename Device> 91 struct TensorEvaluator<const TensorMirrorPadOp<PaddingDimensions, ArgType>, 92 Device> { 93 typedef TensorMirrorPadOp<PaddingDimensions, ArgType> XprType; 94 typedef typename XprType::Index Index; 95 static constexpr int Dims = internal::array_size<PaddingDimensions>::value; 96 typedef DSizes<Index, Dims> Dimensions; 97 typedef typename XprType::Scalar Scalar; 98 typedef typename XprType::CoeffReturnType CoeffReturnType; 99 // Copied from Eigen3 Github version 0e806c1. 100 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 101 102 enum { 103 IsAligned = false, 104 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 105 BlockAccess = false, 106 PreferBlockAccess = false, 107 Layout = TensorEvaluator<ArgType, Device>::Layout, 108 CoordAccess = true, 109 RawAccess = false 110 }; 111 112 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, 113 const Device& device) 114 : impl_(op.expression(), device), padding_(op.padding()) { 115 EIGEN_STATIC_ASSERT(Dims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE) 116 117 // op.offset() == 0 if padding mode is symmetric. 118 // op.offset() == 1 if padding mode is reflect. 119 eigen_assert(op.offset() == 0 || op.offset() == 1); 120 left_offset_ = -1 + op.offset(); 121 right_offset_ = -1 - op.offset(); 122 123 // This should trigger compilation error if padding dimensions and 124 // expression dimensions do not match. 125 dimensions_ = impl_.dimensions(); 126 for (int dim = 0; dim < Dims; ++dim) { 127 eigen_assert(padding_[dim].first + op.offset() <= dimensions_[dim]); 128 eigen_assert(padding_[dim].second + op.offset() <= dimensions_[dim]); 129 dimensions_[dim] += padding_[dim].first + padding_[dim].second; 130 } 131 132 const auto& input_dims = impl_.dimensions(); 133 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 134 input_strides_[0] = 1; 135 output_strides_[0] = 1; 136 for (int i = 0; i < Dims - 1; ++i) { 137 input_strides_[i + 1] = input_strides_[i] * input_dims[i]; 138 output_strides_[i + 1] = output_strides_[i] * dimensions_[i]; 139 } 140 } else { 141 input_strides_[numext::maxi(0, Dims - 1)] = 1; 142 output_strides_[numext::maxi(0, Dims - 1)] = 1; 143 for (int i = Dims - 1; i > 0; --i) { 144 input_strides_[i - 1] = input_strides_[i] * input_dims[i]; 145 output_strides_[i - 1] = output_strides_[i] * dimensions_[i]; 146 } 147 } 148 } 149 150 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { 151 return dimensions_; 152 } 153 154 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { 155 impl_.evalSubExprsIfNeeded(nullptr); 156 return true; 157 } 158 159 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { impl_.cleanup(); } 160 161 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType 162 coeff(Index index) const { 163 eigen_assert(index < dimensions().TotalSize()); 164 const Index input_index = ToInputIndex(index); 165 return impl_.coeff(input_index); 166 } 167 168 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType 169 coeff(array<Index, Dims> coords) const { 170 for (int dim = 0; dim < Dims; ++dim) { 171 coords[dim] = ToInputCoord(coords[dim], dim); 172 } 173 ReadInputHelper<TensorEvaluator<ArgType, Device>::CoordAccess> helper; 174 return helper(coords, input_strides_, impl_); 175 } 176 177 template <int LoadMode> 178 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType 179 packet(Index index) const { 180 constexpr int kPacketSize = 181 internal::unpacket_traits<PacketReturnType>::size; 182 183 EIGEN_STATIC_ASSERT(kPacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) 184 eigen_assert(index + kPacketSize <= dimensions().TotalSize()); 185 186 // Find the effective inner-most dimension where padding actually happens. 187 // NOTE: This is independent of index argument, and can be done in the 188 // constructor to save computation. However, if packet access does not 189 // happen, then moving to constructor will incur needless overhead. 190 int dim = -1; 191 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 192 for (int k = 0; k < Dims; ++k) { 193 if (padding_[k].first != 0 || padding_[k].second != 0) { 194 dim = k; 195 break; 196 } 197 } 198 } else { 199 for (int k = Dims - 1; k >= 0; --k) { 200 if (padding_[k].first != 0 || padding_[k].second != 0) { 201 dim = k; 202 break; 203 } 204 } 205 } 206 207 const Index input_index = ToInputIndex(index); 208 209 // If dim < 0, this means there is no padding at all. 210 if (dim < 0) { 211 return impl_.template packet<Unaligned>(input_index); 212 } 213 214 // Check if the way from the begin of the packet to the end of the packet 215 // is paved with contiguous road. That is, the indices must be between the 216 // padded region in the effective inner-most dimension. 217 const Index left = padding_[dim].first * output_strides_[dim]; 218 const Index right = 219 (dimensions_[dim] - padding_[dim].second) * output_strides_[dim]; 220 221 if (left <= index && (index + kPacketSize - 1) < right) { 222 return impl_.template packet<Unaligned>(input_index); 223 } 224 225 // If the road is not contiguous, then fall back to coeff(). 226 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type 227 values[kPacketSize]; 228 values[0] = impl_.coeff(input_index); 229 for (int i = 1; i < kPacketSize; ++i) { 230 values[i] = coeff(index + i); 231 } 232 PacketReturnType result = internal::pload<PacketReturnType>(values); 233 return result; 234 } 235 236 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost 237 costPerCoeff(bool vectorized) const { 238 constexpr int kPacketSize = 239 internal::unpacket_traits<PacketReturnType>::size; 240 241 const double compute_cost = Dims * (7 * TensorOpCost::AddCost<Index>() + 242 2 * TensorOpCost::MulCost<Index>() + 243 TensorOpCost::DivCost<Index>()); 244 return impl_.costPerCoeff(vectorized) + 245 TensorOpCost(1, 0, compute_cost, vectorized, kPacketSize); 246 } 247 248 EIGEN_DEVICE_FUNC Scalar* data() const { return nullptr; } 249 250 protected: 251 using Coords = array<Index, Dims>; 252 253 // Full template specialization is not allowed within non-fully specialized 254 // template class. Adding a dummy parameter to make specializations partial. 255 template <bool CoordAccess, bool dummy = true> 256 struct ReadInputHelper; 257 258 template <bool dummy> 259 struct ReadInputHelper<false, dummy> { 260 template <typename Eval> 261 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index 262 operator()(const Coords& coord, const Coords& strides, const Eval& eval) { 263 Index index = 0; 264 for (int k = 0; k < Dims; ++k) { 265 index += coord[k] * strides[k]; 266 } 267 return eval.coeff(index); 268 } 269 }; 270 271 template <bool dummy> 272 struct ReadInputHelper<true, dummy> { 273 template <typename Eval> 274 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index 275 operator()(const Coords& coord, const Coords& strides, const Eval& eval) { 276 return eval.coeff(coord); 277 } 278 }; 279 280 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index ToInputCoord(Index k, 281 int dim) const { 282 const Index m = impl_.dimensions()[dim]; 283 k -= padding_[dim].first; 284 if (k < 0) { 285 return -k + left_offset_; 286 } 287 if (k < m) { 288 return k; 289 } 290 return m - (k - m) + right_offset_; 291 } 292 293 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index 294 ToInputIndex(const Coords& coords) const { 295 Index input_index = 0; 296 for (int dim = 0; dim < Dims; ++dim) { 297 input_index += ToInputCoord(coords[dim], dim) * input_strides_[dim]; 298 } 299 return input_index; 300 } 301 302 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index ToInputIndex(Index index) const { 303 Index input_index = 0; 304 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 305 for (int dim = Dims - 1; dim > 0; --dim) { 306 const Index k = index / output_strides_[dim]; 307 index -= k * output_strides_[dim]; 308 input_index += ToInputCoord(k, dim) * input_strides_[dim]; 309 } 310 input_index += ToInputCoord(index, 0); 311 } else { 312 for (int dim = 0; dim < Dims - 1; ++dim) { 313 const Index k = index / output_strides_[dim]; 314 index -= k * output_strides_[dim]; 315 input_index += ToInputCoord(k, dim) * input_strides_[dim]; 316 } 317 input_index += ToInputCoord(index, Dims - 1); 318 } 319 320 return input_index; 321 } 322 323 TensorEvaluator<ArgType, Device> impl_; 324 PaddingDimensions padding_; 325 Dimensions dimensions_; 326 array<Index, Dims> input_strides_; 327 array<Index, Dims> output_strides_; 328 329 Index left_offset_; 330 Index right_offset_; 331 }; 332 } // namespace Eigen 333 334 namespace tensorflow { 335 namespace functor { 336 337 // offset argument must be either 0 or 1. This controls whether the boundary 338 // values are replicated (offset == 0) or not replicated (offset == 1). 339 template <typename Device, typename T, typename Tpaddings, int Dims> 340 struct MirrorPad { 341 void operator()(const Device& device, 342 typename TTypes<T, Dims, int32>::Tensor output, 343 typename TTypes<T, Dims, int32>::ConstTensor input, 344 typename TTypes<Tpaddings>::ConstMatrix padding, int offset) { 345 Eigen::array<Eigen::IndexPair<int32>, Dims> padding_dims; 346 347 for (int i = 0; i < Dims; ++i) { 348 padding_dims[i] = Eigen::IndexPair<int32>(padding(i, 0), padding(i, 1)); 349 } 350 351 output.device(device) = MirrorPadOp(input, padding_dims, offset); 352 } 353 354 template <typename PaddingDimensions, typename Derived> 355 static const Eigen::TensorMirrorPadOp<PaddingDimensions, const Derived> 356 MirrorPadOp( 357 const Eigen::TensorBase<Derived, Eigen::ReadOnlyAccessors>& tensor, 358 const PaddingDimensions& padding, int offset) { 359 return Eigen::TensorMirrorPadOp<PaddingDimensions, const Derived>( 360 static_cast<const Derived&>(tensor), padding, offset); 361 } 362 }; 363 364 // offset argument must be either 0 or 1. This controls whether the boundary 365 // values are replicated (offset == 0) or not replicated (offset == 1). 366 template <typename Device, typename T, typename Tpaddings, int Dims> 367 struct MirrorPadGrad { 368 void operator()(const Device& device, 369 typename TTypes<T, Dims, int32>::Tensor output, 370 typename TTypes<T, Dims, int32>::ConstTensor input, 371 typename TTypes<Tpaddings>::ConstMatrix paddings, int offset, 372 typename TTypes<T, Dims, int32>::Tensor scratch) { 373 // Copy the gradient input into the scratch buffer. 374 scratch.device(device) = input; 375 376 Eigen::array<int32, Dims> lhs_offsets; 377 Eigen::array<int32, Dims> rhs_offsets; 378 Eigen::array<int32, Dims> extents; 379 Eigen::array<bool, Dims> reverses; 380 381 for (int i = 0; i < Dims; ++i) { 382 lhs_offsets[i] = 0; 383 rhs_offsets[i] = 0; 384 extents[i] = scratch.dimension(i); 385 reverses[i] = false; 386 } 387 388 // At this point, the central part (non-padded area) does not include the 389 // gradients back-propagated through padded areas. Those gradient components 390 // need be added to the central part. 391 // 392 // Note that a gradient input element falls into a padded area iff in at 393 // least one dimension i, the coordinate x(i) is in the range (python-style) 394 // [:paddings(i,0)] or [-paddings(i,1):]. 395 396 for (int i = 0; i < Dims; ++i) { 397 reverses[i] = true; 398 399 // This handles the case when coordinate in dimension i is in the range 400 // [:paddings(i,0)]. This portion is added to the range 401 // [paddings(i,0) + offset:2 * paddings(i,0) + offset]. 402 if (paddings(i, 0) > 0) { 403 rhs_offsets[i] = 0; 404 lhs_offsets[i] = paddings(i, 0) + offset; 405 extents[i] = paddings(i, 0); 406 407 scratch.slice(lhs_offsets, extents).device(device) += 408 scratch.slice(rhs_offsets, extents).reverse(reverses); 409 } 410 411 // This handles the case when coordinate in dimension i is in the range 412 // [-paddings(i,1):]. This portion is added to the range 413 // [-2 * paddings(i,1) - offset:-paddings(i,1) - offset]. 414 if (paddings(i, 1) > 0) { 415 rhs_offsets[i] = scratch.dimension(i) - paddings(i, 1); 416 lhs_offsets[i] = rhs_offsets[i] - paddings(i, 1) - offset; 417 extents[i] = paddings(i, 1); 418 419 scratch.slice(lhs_offsets, extents).device(device) += 420 scratch.slice(rhs_offsets, extents).reverse(reverses); 421 } 422 423 reverses[i] = false; 424 lhs_offsets[i] = paddings(i, 0); 425 rhs_offsets[i] = paddings(i, 0); 426 extents[i] = output.dimension(i); 427 428 // At this point, scratch buffer contains gradient input as if paddings 429 // for dimension k = 0,...,i are zeros. Therefore after the loop 430 // termination, the central part of the scratch buffer contains the folded 431 // gradients. 432 } 433 434 // Copy the central part of the scratch buffer to the output. 435 output.device(device) = scratch.slice(rhs_offsets, extents); 436 } 437 }; 438 } // namespace functor 439 } // namespace tensorflow 440 441 #endif // TENSORFLOW_CORE_KERNELS_MIRROR_PAD_OP_H_ 442