• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // See docs in ../ops/nn_ops.cc.
17 
18 #define EIGEN_USE_THREADS
19 
20 #include "tensorflow/core/kernels/pad_op.h"
21 
22 #include <memory>
23 #include <string>
24 #include <utility>
25 
26 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
27 #include "tensorflow/core/framework/op.h"
28 #include "tensorflow/core/framework/op_kernel.h"
29 #include "tensorflow/core/framework/register_types.h"
30 #include "tensorflow/core/framework/tensor.h"
31 #include "tensorflow/core/framework/tensor_shape.h"
32 #include "tensorflow/core/framework/tensor_types.h"
33 #include "tensorflow/core/framework/types.h"
34 #include "tensorflow/core/platform/logging.h"
35 #include "tensorflow/core/platform/types.h"
36 
37 namespace tensorflow {
38 
39 typedef Eigen::ThreadPoolDevice CPUDevice;
40 typedef Eigen::GpuDevice GPUDevice;
41 
42 template <typename Device, typename T, typename Tpadding>
43 class PadOp : public OpKernel {
44  public:
PadOp(OpKernelConstruction * context)45   explicit PadOp(OpKernelConstruction* context) : OpKernel(context) {}
46 
Compute(OpKernelContext * context)47   void Compute(OpKernelContext* context) override {
48     const Tensor& in0 = context->input(0);
49     const Tensor& in1 = context->input(1);
50     const int dims = in0.dims();
51     static const int kMinDims = 0;
52     static const int kMaxDims = 8;
53     OP_REQUIRES(context, kMinDims <= dims && dims <= kMaxDims,
54                 errors::Unimplemented("inputs rank not in [", kMinDims, ",",
55                                       kMaxDims, "]: ", dims));
56     OP_REQUIRES(
57         context,
58         TensorShapeUtils::IsMatrix(in1.shape()) && in1.dim_size(1) == 2,
59         errors::InvalidArgument("paddings must be a matrix with 2 columns: ",
60                                 in1.shape().DebugString()));
61     OP_REQUIRES(
62         context, dims == in1.dim_size(0),
63         errors::InvalidArgument(
64             "The first dimension of paddings must be the rank of inputs",
65             in1.shape().DebugString(), " ", in0.shape().DebugString()));
66 
67     T pad_value = T();
68     if (context->num_inputs() == 3) {
69       const Tensor& constant_values = context->input(2);
70       OP_REQUIRES(
71           context, TensorShapeUtils::IsScalar(constant_values.shape()),
72           errors::InvalidArgument("constant_values must be a scalar. Found: ",
73                                   constant_values.shape().DebugString()));
74       pad_value = context->input(2).scalar<T>()();
75     }
76 
77     // Compute the shape of the output tensor, and allocate it.
78     TensorShape output_shape;
79     typename TTypes<Tpadding>::ConstMatrix paddings = in1.matrix<Tpadding>();
80     for (int d = 0; d < dims; ++d) {
81       const Tpadding before_d =
82           paddings(d, 0);                       // Pad before existing elements.
83       const Tpadding after_d = paddings(d, 1);  // Pad after existing elements.
84       OP_REQUIRES(context, before_d >= 0 && after_d >= 0,
85                   errors::InvalidArgument("Paddings must be non-negative: ",
86                                           before_d, " ", after_d));
87       const int64 size_d = in0.dim_size(d);
88       output_shape.AddDim(before_d + size_d + after_d);
89     }
90 
91     // If there is no padding to be done, forward the input to output.
92     if (output_shape.num_elements() == in0.NumElements()) {
93       // When num_elements == 0, shape may have changed.
94       Tensor out;
95       CHECK(out.CopyFrom(in0, output_shape));
96       context->set_output(0, out);
97       return;
98     }
99 
100     TensorShape collapsed_input_shape;
101     TensorShape collapsed_output_shape;
102     Tensor collapsed_paddings;
103     if (dims > 1 && CollapseAdjacentNonPaddedDimensions(
104                         in0.shape(), in1, output_shape, &collapsed_input_shape,
105                         &collapsed_paddings, &collapsed_output_shape)) {
106       Tensor collapsed_input;
107       CHECK(collapsed_input.CopyFrom(in0, collapsed_input_shape));
108       Tensor collapsed_output;
109       AllocatorAttributes alloc_attrs;
110       alloc_attrs.set_on_host(context->input_memory_type(0) == HOST_MEMORY);
111       OP_REQUIRES_OK(context,
112                      context->allocate_temp(collapsed_input.dtype(),
113                                             collapsed_output_shape,
114                                             &collapsed_output, alloc_attrs));
115       const Tensor& collapsed_paddings_ref = collapsed_paddings;
116       typename TTypes<Tpadding>::ConstMatrix collapsed_paddings_matrix =
117           collapsed_paddings_ref.matrix<Tpadding>();
118 
119       OperateWithVariableRank(context, collapsed_input_shape.dims(),
120                               collapsed_input, collapsed_paddings_matrix,
121                               pad_value, &collapsed_output);
122 
123       Tensor output;
124       CHECK(output.CopyFrom(collapsed_output, output_shape));
125       context->set_output(0, output);
126     } else {
127       Tensor* output = nullptr;
128       OP_REQUIRES_OK(context,
129                      context->allocate_output(0, output_shape, &output));
130       OperateWithVariableRank(context, dims, in0, paddings, pad_value, output);
131     }
132   }
133 
134  private:
135   // Collapses adjacent dimensions that are not padded to one dimension for
136   // speed. Returns true if any two dimensions are collapsed. For example,
137   //
138   //   Pad(input_shape=[8, 28, 28, 3],
139   //       paddings=[[0, 0], [0, 0], [0, 0], [0, 1]]
140   // is equivalent to
141   //   Pad(input_shape=[6272, 3],
142   //       paddings=[[0, 0], [0, 1]])
143   //
144   // input_shape: the original input shape.
145   // paddings_as_tensor: the original paddings.
146   // output_shape: the original output shape.
147   // collapsed_input_shape: the input shape after collapsing.
148   // collapsed_paddings_as_tensor: the paddings after collapsing.
149   // collapsed_output_shape: the output shape after collapsing.
CollapseAdjacentNonPaddedDimensions(const TensorShape & input_shape,const Tensor & paddings_as_tensor,const TensorShape & output_shape,TensorShape * collapsed_input_shape,Tensor * collapsed_paddings_as_tensor,TensorShape * collapsed_output_shape)150   static bool CollapseAdjacentNonPaddedDimensions(
151       const TensorShape& input_shape, const Tensor& paddings_as_tensor,
152       const TensorShape& output_shape, TensorShape* collapsed_input_shape,
153       Tensor* collapsed_paddings_as_tensor,
154       TensorShape* collapsed_output_shape) {
155     bool collapsed = false;
156     typename TTypes<Tpadding>::ConstMatrix paddings =
157         paddings_as_tensor.matrix<Tpadding>();
158     std::vector<std::pair<int, int>> collapsed_paddings;
159     int i = 0;
160     while (i < paddings.dimension(0)) {
161       if (paddings(i, 0) != 0 || paddings(i, 1) != 0) {
162         // If padded, copy the original dimension over.
163         collapsed_input_shape->InsertDim(collapsed_input_shape->dims(),
164                                          input_shape.dim_size(i));
165         collapsed_output_shape->InsertDim(collapsed_output_shape->dims(),
166                                           output_shape.dim_size(i));
167         collapsed_paddings.push_back({paddings(i, 0), paddings(i, 1)});
168         ++i;
169       } else {
170         // If not padded, find the next dimension that is padded and collapse
171         // all dimensions in between to one dimension.
172         int64 collapsed_input_dim_size = input_shape.dim_size(i);
173         int64 collapsed_output_dim_size = output_shape.dim_size(i);
174         ++i;
175         while (i < paddings.dimension(0) && paddings(i, 0) == 0 &&
176                paddings(i, 1) == 0) {
177           collapsed = true;
178           collapsed_input_dim_size *= input_shape.dim_size(i);
179           collapsed_output_dim_size *= output_shape.dim_size(i);
180           ++i;
181         }
182         collapsed_input_shape->InsertDim(collapsed_input_shape->dims(),
183                                          collapsed_input_dim_size);
184         collapsed_output_shape->InsertDim(collapsed_output_shape->dims(),
185                                           collapsed_output_dim_size);
186         collapsed_paddings.push_back({0, 0});
187       }
188     }
189 
190     // Copy collapsed_paddings to collapsed_paddings_as_tensor.
191     *collapsed_paddings_as_tensor =
192         Tensor(paddings_as_tensor.dtype(),
193                TensorShape({static_cast<int64>(collapsed_paddings.size()), 2}));
194     auto collapsed_paddings_as_matrix =
195         collapsed_paddings_as_tensor->matrix<Tpadding>();
196     for (size_t i = 0; i < collapsed_paddings.size(); ++i) {
197       collapsed_paddings_as_matrix(i, 0) = collapsed_paddings[i].first;
198       collapsed_paddings_as_matrix(i, 1) = collapsed_paddings[i].second;
199     }
200     return collapsed;
201   }
202 
OperateWithVariableRank(OpKernelContext * context,int fixed_dims,const Tensor & input,typename TTypes<Tpadding>::ConstMatrix paddings,T pad_value,Tensor * output)203   void OperateWithVariableRank(OpKernelContext* context, int fixed_dims,
204                                const Tensor& input,
205                                typename TTypes<Tpadding>::ConstMatrix paddings,
206                                T pad_value, Tensor* output) {
207     // Invoke the dims-specific implementation.
208     switch (fixed_dims) {
209       case 0:
210         Operate<0>(context, input.tensor<T, 0>(), paddings, pad_value, output);
211         break;
212       case 1:
213         // TODO(irving): Once Pad doesn't need a scalar special case,
214         // change flat to tensor.  That is, once !allow_legacy_scalars().
215         Operate<1>(context, input.flat<T>(), paddings, pad_value, output);
216         break;
217       case 2:
218         Operate<2>(context, input.tensor<T, 2>(), paddings, pad_value, output);
219         break;
220       case 3:
221         Operate<3>(context, input.tensor<T, 3>(), paddings, pad_value, output);
222         break;
223       case 4:
224         Operate<4>(context, input.tensor<T, 4>(), paddings, pad_value, output);
225         break;
226       case 5:
227         Operate<5>(context, input.tensor<T, 5>(), paddings, pad_value, output);
228         break;
229       case 6:
230         Operate<6>(context, input.tensor<T, 6>(), paddings, pad_value, output);
231         break;
232       default:
233         OP_REQUIRES(context, false,
234                     errors::InvalidArgument("Only ranks up to 6 supported: ",
235                                             input.shape().DebugString()));
236     }
237   }
238 
239   template <int Dims>
Operate(OpKernelContext * context,typename TTypes<T,Dims>::ConstTensor input,typename TTypes<Tpadding>::ConstMatrix paddings,T pad_value,Tensor * output)240   void Operate(OpKernelContext* context,
241                typename TTypes<T, Dims>::ConstTensor input,
242                typename TTypes<Tpadding>::ConstMatrix paddings, T pad_value,
243                Tensor* output) {
244     CHECK_EQ(Dims, paddings.dimension(0));
245     CHECK_EQ(2, paddings.dimension(1));
246     Eigen::array<Eigen::IndexPair<Tpadding>, Dims> paddings_array;
247     for (int i = 0; i < Dims; ++i) {
248       paddings_array[i] = {paddings(i, 0), paddings(i, 1)};
249     }
250     functor::Pad<Device, T, Tpadding, Dims> functor;
251     functor(context->eigen_device<Device>(), output->tensor<T, Dims>(), input,
252             paddings_array, pad_value);
253   }
254 };
255 
256 #define REGISTER_KERNEL(type)                                     \
257   REGISTER_KERNEL_BUILDER(Name("Pad")                             \
258                               .Device(DEVICE_CPU)                 \
259                               .TypeConstraint<type>("T")          \
260                               .TypeConstraint<int32>("Tpaddings") \
261                               .HostMemory("paddings"),            \
262                           PadOp<CPUDevice, type, int32>);         \
263   REGISTER_KERNEL_BUILDER(Name("Pad")                             \
264                               .Device(DEVICE_CPU)                 \
265                               .TypeConstraint<type>("T")          \
266                               .TypeConstraint<int64>("Tpaddings") \
267                               .HostMemory("paddings"),            \
268                           PadOp<CPUDevice, type, int64>);         \
269   REGISTER_KERNEL_BUILDER(Name("PadV2")                           \
270                               .Device(DEVICE_CPU)                 \
271                               .TypeConstraint<type>("T")          \
272                               .TypeConstraint<int32>("Tpaddings") \
273                               .HostMemory("paddings")             \
274                               .HostMemory("constant_values"),     \
275                           PadOp<CPUDevice, type, int32>);         \
276   REGISTER_KERNEL_BUILDER(Name("PadV2")                           \
277                               .Device(DEVICE_CPU)                 \
278                               .TypeConstraint<type>("T")          \
279                               .TypeConstraint<int64>("Tpaddings") \
280                               .HostMemory("paddings")             \
281                               .HostMemory("constant_values"),     \
282                           PadOp<CPUDevice, type, int64>);
283 
284 TF_CALL_POD_TYPES(REGISTER_KERNEL);
285 TF_CALL_tstring(REGISTER_KERNEL);
286 #undef REGISTER_KERNEL
287 
288 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
289     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
290 // Forward declarations of the functor specializations for GPU.
291 namespace functor {
292 #define DECLARE_GPU_SPEC(T, Dims)                                         \
293   template <>                                                             \
294   void Pad<GPUDevice, T, int32, Dims>::operator()(                        \
295       const GPUDevice& d, typename TTypes<T, Dims>::Tensor output,        \
296       typename TTypes<T, Dims>::ConstTensor input,                        \
297       Eigen::array<Eigen::IndexPair<int32>, Dims> paddings, T pad_value); \
298   extern template struct Pad<GPUDevice, T, int32, Dims>;                  \
299   template <>                                                             \
300   void Pad<GPUDevice, T, int64, Dims>::operator()(                        \
301       const GPUDevice& d, typename TTypes<T, Dims>::Tensor output,        \
302       typename TTypes<T, Dims>::ConstTensor input,                        \
303       Eigen::array<Eigen::IndexPair<int64>, Dims> paddings, T pad_value); \
304   extern template struct Pad<GPUDevice, T, int64, Dims>;
305 
306 #define DECLARE_GPU_SPECS(T) \
307   DECLARE_GPU_SPEC(T, 0);    \
308   DECLARE_GPU_SPEC(T, 1);    \
309   DECLARE_GPU_SPEC(T, 2);    \
310   DECLARE_GPU_SPEC(T, 3);    \
311   DECLARE_GPU_SPEC(T, 4);    \
312   DECLARE_GPU_SPEC(T, 5);    \
313   DECLARE_GPU_SPEC(T, 6);
314 
315 TF_CALL_GPU_ALL_TYPES(DECLARE_GPU_SPECS);
316 TF_CALL_int8(DECLARE_GPU_SPECS);
317 TF_CALL_uint8(DECLARE_GPU_SPECS);
318 }  // namespace functor
319 
320 // Registration of the GPU implementations.
321 #define REGISTER_GPU_KERNEL(T)                                    \
322   REGISTER_KERNEL_BUILDER(Name("Pad")                             \
323                               .Device(DEVICE_GPU)                 \
324                               .TypeConstraint<T>("T")             \
325                               .TypeConstraint<int32>("Tpaddings") \
326                               .HostMemory("paddings"),            \
327                           PadOp<GPUDevice, T, int32>);            \
328   REGISTER_KERNEL_BUILDER(Name("Pad")                             \
329                               .Device(DEVICE_GPU)                 \
330                               .TypeConstraint<T>("T")             \
331                               .TypeConstraint<int64>("Tpaddings") \
332                               .HostMemory("paddings"),            \
333                           PadOp<GPUDevice, T, int64>);            \
334   REGISTER_KERNEL_BUILDER(Name("PadV2")                           \
335                               .Device(DEVICE_GPU)                 \
336                               .TypeConstraint<T>("T")             \
337                               .TypeConstraint<int32>("Tpaddings") \
338                               .HostMemory("paddings")             \
339                               .HostMemory("constant_values"),     \
340                           PadOp<GPUDevice, T, int32>)             \
341   REGISTER_KERNEL_BUILDER(Name("PadV2")                           \
342                               .Device(DEVICE_GPU)                 \
343                               .TypeConstraint<T>("T")             \
344                               .TypeConstraint<int64>("Tpaddings") \
345                               .HostMemory("paddings")             \
346                               .HostMemory("constant_values"),     \
347                           PadOp<GPUDevice, T, int64>)
348 
349 TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNEL);
350 TF_CALL_int8(REGISTER_GPU_KERNEL);
351 TF_CALL_uint8(REGISTER_GPU_KERNEL);
352 
353 // A special GPU kernel for int32.
354 // TODO(b/25387198): Also enable int32 in device memory. This kernel
355 // registration requires all int32 inputs and outputs to be in host memory.
356 REGISTER_KERNEL_BUILDER(Name("Pad")
357                             .Device(DEVICE_GPU)
358                             .TypeConstraint<int32>("T")
359                             .TypeConstraint<int32>("Tpaddings")
360                             .HostMemory("input")
361                             .HostMemory("paddings")
362                             .HostMemory("output"),
363                         PadOp<CPUDevice, int32, int32>);
364 REGISTER_KERNEL_BUILDER(Name("Pad")
365                             .Device(DEVICE_GPU)
366                             .TypeConstraint<int32>("T")
367                             .TypeConstraint<int64>("Tpaddings")
368                             .HostMemory("input")
369                             .HostMemory("paddings")
370                             .HostMemory("output"),
371                         PadOp<CPUDevice, int32, int64>);
372 REGISTER_KERNEL_BUILDER(Name("PadV2")
373                             .Device(DEVICE_GPU)
374                             .TypeConstraint<int32>("T")
375                             .TypeConstraint<int32>("Tpaddings")
376                             .HostMemory("input")
377                             .HostMemory("paddings")
378                             .HostMemory("constant_values")
379                             .HostMemory("output"),
380                         PadOp<CPUDevice, int32, int32>);
381 REGISTER_KERNEL_BUILDER(Name("PadV2")
382                             .Device(DEVICE_GPU)
383                             .TypeConstraint<int32>("T")
384                             .TypeConstraint<int64>("Tpaddings")
385                             .HostMemory("input")
386                             .HostMemory("paddings")
387                             .HostMemory("constant_values")
388                             .HostMemory("output"),
389                         PadOp<CPUDevice, int32, int64>);
390 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
391 
392 
393 }  // end namespace tensorflow
394