1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // See docs in ../ops/nn_ops.cc. 17 18 #define EIGEN_USE_THREADS 19 20 #include "tensorflow/core/kernels/pad_op.h" 21 22 #include <memory> 23 #include <string> 24 #include <utility> 25 26 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 27 #include "tensorflow/core/framework/op.h" 28 #include "tensorflow/core/framework/op_kernel.h" 29 #include "tensorflow/core/framework/register_types.h" 30 #include "tensorflow/core/framework/tensor.h" 31 #include "tensorflow/core/framework/tensor_shape.h" 32 #include "tensorflow/core/framework/tensor_types.h" 33 #include "tensorflow/core/framework/types.h" 34 #include "tensorflow/core/platform/logging.h" 35 #include "tensorflow/core/platform/types.h" 36 37 namespace tensorflow { 38 39 typedef Eigen::ThreadPoolDevice CPUDevice; 40 typedef Eigen::GpuDevice GPUDevice; 41 42 template <typename Device, typename T, typename Tpadding> 43 class PadOp : public OpKernel { 44 public: PadOp(OpKernelConstruction * context)45 explicit PadOp(OpKernelConstruction* context) : OpKernel(context) {} 46 Compute(OpKernelContext * context)47 void Compute(OpKernelContext* context) override { 48 const Tensor& in0 = context->input(0); 49 const Tensor& in1 = context->input(1); 50 const int dims = in0.dims(); 51 static const int kMinDims = 0; 52 static const int kMaxDims = 8; 53 OP_REQUIRES(context, kMinDims <= dims && dims <= kMaxDims, 54 errors::Unimplemented("inputs rank not in [", kMinDims, ",", 55 kMaxDims, "]: ", dims)); 56 OP_REQUIRES( 57 context, 58 TensorShapeUtils::IsMatrix(in1.shape()) && in1.dim_size(1) == 2, 59 errors::InvalidArgument("paddings must be a matrix with 2 columns: ", 60 in1.shape().DebugString())); 61 OP_REQUIRES( 62 context, dims == in1.dim_size(0), 63 errors::InvalidArgument( 64 "The first dimension of paddings must be the rank of inputs", 65 in1.shape().DebugString(), " ", in0.shape().DebugString())); 66 67 T pad_value = T(); 68 if (context->num_inputs() == 3) { 69 const Tensor& constant_values = context->input(2); 70 OP_REQUIRES( 71 context, TensorShapeUtils::IsScalar(constant_values.shape()), 72 errors::InvalidArgument("constant_values must be a scalar. Found: ", 73 constant_values.shape().DebugString())); 74 pad_value = context->input(2).scalar<T>()(); 75 } 76 77 // Compute the shape of the output tensor, and allocate it. 78 TensorShape output_shape; 79 typename TTypes<Tpadding>::ConstMatrix paddings = in1.matrix<Tpadding>(); 80 for (int d = 0; d < dims; ++d) { 81 const Tpadding before_d = 82 paddings(d, 0); // Pad before existing elements. 83 const Tpadding after_d = paddings(d, 1); // Pad after existing elements. 84 OP_REQUIRES(context, before_d >= 0 && after_d >= 0, 85 errors::InvalidArgument("Paddings must be non-negative: ", 86 before_d, " ", after_d)); 87 const int64 size_d = in0.dim_size(d); 88 output_shape.AddDim(before_d + size_d + after_d); 89 } 90 91 // If there is no padding to be done, forward the input to output. 92 if (output_shape.num_elements() == in0.NumElements()) { 93 // When num_elements == 0, shape may have changed. 94 Tensor out; 95 CHECK(out.CopyFrom(in0, output_shape)); 96 context->set_output(0, out); 97 return; 98 } 99 100 TensorShape collapsed_input_shape; 101 TensorShape collapsed_output_shape; 102 Tensor collapsed_paddings; 103 if (dims > 1 && CollapseAdjacentNonPaddedDimensions( 104 in0.shape(), in1, output_shape, &collapsed_input_shape, 105 &collapsed_paddings, &collapsed_output_shape)) { 106 Tensor collapsed_input; 107 CHECK(collapsed_input.CopyFrom(in0, collapsed_input_shape)); 108 Tensor collapsed_output; 109 AllocatorAttributes alloc_attrs; 110 alloc_attrs.set_on_host(context->input_memory_type(0) == HOST_MEMORY); 111 OP_REQUIRES_OK(context, 112 context->allocate_temp(collapsed_input.dtype(), 113 collapsed_output_shape, 114 &collapsed_output, alloc_attrs)); 115 const Tensor& collapsed_paddings_ref = collapsed_paddings; 116 typename TTypes<Tpadding>::ConstMatrix collapsed_paddings_matrix = 117 collapsed_paddings_ref.matrix<Tpadding>(); 118 119 OperateWithVariableRank(context, collapsed_input_shape.dims(), 120 collapsed_input, collapsed_paddings_matrix, 121 pad_value, &collapsed_output); 122 123 Tensor output; 124 CHECK(output.CopyFrom(collapsed_output, output_shape)); 125 context->set_output(0, output); 126 } else { 127 Tensor* output = nullptr; 128 OP_REQUIRES_OK(context, 129 context->allocate_output(0, output_shape, &output)); 130 OperateWithVariableRank(context, dims, in0, paddings, pad_value, output); 131 } 132 } 133 134 private: 135 // Collapses adjacent dimensions that are not padded to one dimension for 136 // speed. Returns true if any two dimensions are collapsed. For example, 137 // 138 // Pad(input_shape=[8, 28, 28, 3], 139 // paddings=[[0, 0], [0, 0], [0, 0], [0, 1]] 140 // is equivalent to 141 // Pad(input_shape=[6272, 3], 142 // paddings=[[0, 0], [0, 1]]) 143 // 144 // input_shape: the original input shape. 145 // paddings_as_tensor: the original paddings. 146 // output_shape: the original output shape. 147 // collapsed_input_shape: the input shape after collapsing. 148 // collapsed_paddings_as_tensor: the paddings after collapsing. 149 // collapsed_output_shape: the output shape after collapsing. CollapseAdjacentNonPaddedDimensions(const TensorShape & input_shape,const Tensor & paddings_as_tensor,const TensorShape & output_shape,TensorShape * collapsed_input_shape,Tensor * collapsed_paddings_as_tensor,TensorShape * collapsed_output_shape)150 static bool CollapseAdjacentNonPaddedDimensions( 151 const TensorShape& input_shape, const Tensor& paddings_as_tensor, 152 const TensorShape& output_shape, TensorShape* collapsed_input_shape, 153 Tensor* collapsed_paddings_as_tensor, 154 TensorShape* collapsed_output_shape) { 155 bool collapsed = false; 156 typename TTypes<Tpadding>::ConstMatrix paddings = 157 paddings_as_tensor.matrix<Tpadding>(); 158 std::vector<std::pair<int, int>> collapsed_paddings; 159 int i = 0; 160 while (i < paddings.dimension(0)) { 161 if (paddings(i, 0) != 0 || paddings(i, 1) != 0) { 162 // If padded, copy the original dimension over. 163 collapsed_input_shape->InsertDim(collapsed_input_shape->dims(), 164 input_shape.dim_size(i)); 165 collapsed_output_shape->InsertDim(collapsed_output_shape->dims(), 166 output_shape.dim_size(i)); 167 collapsed_paddings.push_back({paddings(i, 0), paddings(i, 1)}); 168 ++i; 169 } else { 170 // If not padded, find the next dimension that is padded and collapse 171 // all dimensions in between to one dimension. 172 int64 collapsed_input_dim_size = input_shape.dim_size(i); 173 int64 collapsed_output_dim_size = output_shape.dim_size(i); 174 ++i; 175 while (i < paddings.dimension(0) && paddings(i, 0) == 0 && 176 paddings(i, 1) == 0) { 177 collapsed = true; 178 collapsed_input_dim_size *= input_shape.dim_size(i); 179 collapsed_output_dim_size *= output_shape.dim_size(i); 180 ++i; 181 } 182 collapsed_input_shape->InsertDim(collapsed_input_shape->dims(), 183 collapsed_input_dim_size); 184 collapsed_output_shape->InsertDim(collapsed_output_shape->dims(), 185 collapsed_output_dim_size); 186 collapsed_paddings.push_back({0, 0}); 187 } 188 } 189 190 // Copy collapsed_paddings to collapsed_paddings_as_tensor. 191 *collapsed_paddings_as_tensor = 192 Tensor(paddings_as_tensor.dtype(), 193 TensorShape({static_cast<int64>(collapsed_paddings.size()), 2})); 194 auto collapsed_paddings_as_matrix = 195 collapsed_paddings_as_tensor->matrix<Tpadding>(); 196 for (size_t i = 0; i < collapsed_paddings.size(); ++i) { 197 collapsed_paddings_as_matrix(i, 0) = collapsed_paddings[i].first; 198 collapsed_paddings_as_matrix(i, 1) = collapsed_paddings[i].second; 199 } 200 return collapsed; 201 } 202 OperateWithVariableRank(OpKernelContext * context,int fixed_dims,const Tensor & input,typename TTypes<Tpadding>::ConstMatrix paddings,T pad_value,Tensor * output)203 void OperateWithVariableRank(OpKernelContext* context, int fixed_dims, 204 const Tensor& input, 205 typename TTypes<Tpadding>::ConstMatrix paddings, 206 T pad_value, Tensor* output) { 207 // Invoke the dims-specific implementation. 208 switch (fixed_dims) { 209 case 0: 210 Operate<0>(context, input.tensor<T, 0>(), paddings, pad_value, output); 211 break; 212 case 1: 213 // TODO(irving): Once Pad doesn't need a scalar special case, 214 // change flat to tensor. That is, once !allow_legacy_scalars(). 215 Operate<1>(context, input.flat<T>(), paddings, pad_value, output); 216 break; 217 case 2: 218 Operate<2>(context, input.tensor<T, 2>(), paddings, pad_value, output); 219 break; 220 case 3: 221 Operate<3>(context, input.tensor<T, 3>(), paddings, pad_value, output); 222 break; 223 case 4: 224 Operate<4>(context, input.tensor<T, 4>(), paddings, pad_value, output); 225 break; 226 case 5: 227 Operate<5>(context, input.tensor<T, 5>(), paddings, pad_value, output); 228 break; 229 case 6: 230 Operate<6>(context, input.tensor<T, 6>(), paddings, pad_value, output); 231 break; 232 default: 233 OP_REQUIRES(context, false, 234 errors::InvalidArgument("Only ranks up to 6 supported: ", 235 input.shape().DebugString())); 236 } 237 } 238 239 template <int Dims> Operate(OpKernelContext * context,typename TTypes<T,Dims>::ConstTensor input,typename TTypes<Tpadding>::ConstMatrix paddings,T pad_value,Tensor * output)240 void Operate(OpKernelContext* context, 241 typename TTypes<T, Dims>::ConstTensor input, 242 typename TTypes<Tpadding>::ConstMatrix paddings, T pad_value, 243 Tensor* output) { 244 CHECK_EQ(Dims, paddings.dimension(0)); 245 CHECK_EQ(2, paddings.dimension(1)); 246 Eigen::array<Eigen::IndexPair<Tpadding>, Dims> paddings_array; 247 for (int i = 0; i < Dims; ++i) { 248 paddings_array[i] = {paddings(i, 0), paddings(i, 1)}; 249 } 250 functor::Pad<Device, T, Tpadding, Dims> functor; 251 functor(context->eigen_device<Device>(), output->tensor<T, Dims>(), input, 252 paddings_array, pad_value); 253 } 254 }; 255 256 #define REGISTER_KERNEL(type) \ 257 REGISTER_KERNEL_BUILDER(Name("Pad") \ 258 .Device(DEVICE_CPU) \ 259 .TypeConstraint<type>("T") \ 260 .TypeConstraint<int32>("Tpaddings") \ 261 .HostMemory("paddings"), \ 262 PadOp<CPUDevice, type, int32>); \ 263 REGISTER_KERNEL_BUILDER(Name("Pad") \ 264 .Device(DEVICE_CPU) \ 265 .TypeConstraint<type>("T") \ 266 .TypeConstraint<int64>("Tpaddings") \ 267 .HostMemory("paddings"), \ 268 PadOp<CPUDevice, type, int64>); \ 269 REGISTER_KERNEL_BUILDER(Name("PadV2") \ 270 .Device(DEVICE_CPU) \ 271 .TypeConstraint<type>("T") \ 272 .TypeConstraint<int32>("Tpaddings") \ 273 .HostMemory("paddings") \ 274 .HostMemory("constant_values"), \ 275 PadOp<CPUDevice, type, int32>); \ 276 REGISTER_KERNEL_BUILDER(Name("PadV2") \ 277 .Device(DEVICE_CPU) \ 278 .TypeConstraint<type>("T") \ 279 .TypeConstraint<int64>("Tpaddings") \ 280 .HostMemory("paddings") \ 281 .HostMemory("constant_values"), \ 282 PadOp<CPUDevice, type, int64>); 283 284 TF_CALL_POD_TYPES(REGISTER_KERNEL); 285 TF_CALL_tstring(REGISTER_KERNEL); 286 #undef REGISTER_KERNEL 287 288 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ 289 (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) 290 // Forward declarations of the functor specializations for GPU. 291 namespace functor { 292 #define DECLARE_GPU_SPEC(T, Dims) \ 293 template <> \ 294 void Pad<GPUDevice, T, int32, Dims>::operator()( \ 295 const GPUDevice& d, typename TTypes<T, Dims>::Tensor output, \ 296 typename TTypes<T, Dims>::ConstTensor input, \ 297 Eigen::array<Eigen::IndexPair<int32>, Dims> paddings, T pad_value); \ 298 extern template struct Pad<GPUDevice, T, int32, Dims>; \ 299 template <> \ 300 void Pad<GPUDevice, T, int64, Dims>::operator()( \ 301 const GPUDevice& d, typename TTypes<T, Dims>::Tensor output, \ 302 typename TTypes<T, Dims>::ConstTensor input, \ 303 Eigen::array<Eigen::IndexPair<int64>, Dims> paddings, T pad_value); \ 304 extern template struct Pad<GPUDevice, T, int64, Dims>; 305 306 #define DECLARE_GPU_SPECS(T) \ 307 DECLARE_GPU_SPEC(T, 0); \ 308 DECLARE_GPU_SPEC(T, 1); \ 309 DECLARE_GPU_SPEC(T, 2); \ 310 DECLARE_GPU_SPEC(T, 3); \ 311 DECLARE_GPU_SPEC(T, 4); \ 312 DECLARE_GPU_SPEC(T, 5); \ 313 DECLARE_GPU_SPEC(T, 6); 314 315 TF_CALL_GPU_ALL_TYPES(DECLARE_GPU_SPECS); 316 TF_CALL_int8(DECLARE_GPU_SPECS); 317 TF_CALL_uint8(DECLARE_GPU_SPECS); 318 } // namespace functor 319 320 // Registration of the GPU implementations. 321 #define REGISTER_GPU_KERNEL(T) \ 322 REGISTER_KERNEL_BUILDER(Name("Pad") \ 323 .Device(DEVICE_GPU) \ 324 .TypeConstraint<T>("T") \ 325 .TypeConstraint<int32>("Tpaddings") \ 326 .HostMemory("paddings"), \ 327 PadOp<GPUDevice, T, int32>); \ 328 REGISTER_KERNEL_BUILDER(Name("Pad") \ 329 .Device(DEVICE_GPU) \ 330 .TypeConstraint<T>("T") \ 331 .TypeConstraint<int64>("Tpaddings") \ 332 .HostMemory("paddings"), \ 333 PadOp<GPUDevice, T, int64>); \ 334 REGISTER_KERNEL_BUILDER(Name("PadV2") \ 335 .Device(DEVICE_GPU) \ 336 .TypeConstraint<T>("T") \ 337 .TypeConstraint<int32>("Tpaddings") \ 338 .HostMemory("paddings") \ 339 .HostMemory("constant_values"), \ 340 PadOp<GPUDevice, T, int32>) \ 341 REGISTER_KERNEL_BUILDER(Name("PadV2") \ 342 .Device(DEVICE_GPU) \ 343 .TypeConstraint<T>("T") \ 344 .TypeConstraint<int64>("Tpaddings") \ 345 .HostMemory("paddings") \ 346 .HostMemory("constant_values"), \ 347 PadOp<GPUDevice, T, int64>) 348 349 TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNEL); 350 TF_CALL_int8(REGISTER_GPU_KERNEL); 351 TF_CALL_uint8(REGISTER_GPU_KERNEL); 352 353 // A special GPU kernel for int32. 354 // TODO(b/25387198): Also enable int32 in device memory. This kernel 355 // registration requires all int32 inputs and outputs to be in host memory. 356 REGISTER_KERNEL_BUILDER(Name("Pad") 357 .Device(DEVICE_GPU) 358 .TypeConstraint<int32>("T") 359 .TypeConstraint<int32>("Tpaddings") 360 .HostMemory("input") 361 .HostMemory("paddings") 362 .HostMemory("output"), 363 PadOp<CPUDevice, int32, int32>); 364 REGISTER_KERNEL_BUILDER(Name("Pad") 365 .Device(DEVICE_GPU) 366 .TypeConstraint<int32>("T") 367 .TypeConstraint<int64>("Tpaddings") 368 .HostMemory("input") 369 .HostMemory("paddings") 370 .HostMemory("output"), 371 PadOp<CPUDevice, int32, int64>); 372 REGISTER_KERNEL_BUILDER(Name("PadV2") 373 .Device(DEVICE_GPU) 374 .TypeConstraint<int32>("T") 375 .TypeConstraint<int32>("Tpaddings") 376 .HostMemory("input") 377 .HostMemory("paddings") 378 .HostMemory("constant_values") 379 .HostMemory("output"), 380 PadOp<CPUDevice, int32, int32>); 381 REGISTER_KERNEL_BUILDER(Name("PadV2") 382 .Device(DEVICE_GPU) 383 .TypeConstraint<int32>("T") 384 .TypeConstraint<int64>("Tpaddings") 385 .HostMemory("input") 386 .HostMemory("paddings") 387 .HostMemory("constant_values") 388 .HostMemory("output"), 389 PadOp<CPUDevice, int32, int64>); 390 #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM 391 392 393 } // end namespace tensorflow 394