• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // See docs in ../ops/array_ops.cc.
17 
18 #define EIGEN_USE_THREADS
19 
20 #include <memory>
21 #include <string>
22 #include <utility>
23 
24 #include "tensorflow/core/kernels/spacetobatch_functor.h"
25 
26 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
27 #include "tensorflow/core/framework/op.h"
28 #include "tensorflow/core/framework/op_kernel.h"
29 #include "tensorflow/core/framework/register_types.h"
30 #include "tensorflow/core/framework/tensor.h"
31 #include "tensorflow/core/framework/tensor_shape.h"
32 #include "tensorflow/core/framework/tensor_types.h"
33 #include "tensorflow/core/framework/types.h"
34 #include "tensorflow/core/platform/logging.h"
35 #include "tensorflow/core/platform/types.h"
36 
37 namespace tensorflow {
38 
39 typedef Eigen::ThreadPoolDevice CPUDevice;
40 typedef Eigen::GpuDevice GPUDevice;
41 
42 namespace {
43 
44 template <typename Device, typename T>
SpaceToBatchOpCompute(OpKernelContext * context,const Tensor & orig_input_tensor,const Tensor & orig_block_shape,const Tensor & orig_paddings)45 Status SpaceToBatchOpCompute(OpKernelContext* context,
46                              const Tensor& orig_input_tensor,
47                              const Tensor& orig_block_shape,
48                              const Tensor& orig_paddings) {
49   const int input_dims = orig_input_tensor.dims();
50   if (!TensorShapeUtils::IsVector(orig_block_shape.shape())) {
51     return errors::InvalidArgument("block_shape rank should be 1 instead of ",
52                                    orig_block_shape.dims());
53   }
54 
55   const int block_dims = orig_block_shape.dim_size(0);
56   if (orig_input_tensor.dims() < 1 + block_dims) {
57     return errors::InvalidArgument("input rank should be >= ", 1 + block_dims,
58                                    " instead of ", orig_input_tensor.dims());
59   }
60 
61   if (!(TensorShapeUtils::IsMatrix(orig_paddings.shape()) &&
62         block_dims == orig_paddings.dim_size(0) &&
63         2 == orig_paddings.dim_size(1))) {
64     return errors::InvalidArgument("paddings should have shape [", block_dims,
65                                    ", 2] instead of ",
66                                    orig_paddings.shape().DebugString());
67   }
68 
69   // To avoid out-of-bounds access in the case that the block_shape and/or
70   // paddings tensors are concurrently modified, we must copy the values.
71   gtl::InlinedVector<int64, 4> block_shape;
72   gtl::InlinedVector<int64, 8> paddings;
73   internal::spacetobatch::SubtleMustCopyFlat(orig_block_shape, &block_shape);
74   internal::spacetobatch::SubtleMustCopyFlat(orig_paddings, &paddings);
75 
76   // Determine the length of the prefix of block dims that can be combined
77   // into the batch dimension due to having no padding and block_shape=1.
78   int removed_prefix_block_dims = 0;
79   for (; removed_prefix_block_dims < block_dims; ++removed_prefix_block_dims) {
80     const int dim = removed_prefix_block_dims;
81     if (paddings[2 * dim] != 0 || paddings[2 * dim + 1] != 0 ||
82         block_shape[dim] != 1) {
83       break;
84     }
85   }
86 
87   // Determine the length of the suffix of block dims that can be combined
88   // into the depth dimension due to having no padding and block_shape=1.
89   int removed_suffix_block_dims = 0;
90   for (; removed_suffix_block_dims < block_dims - removed_prefix_block_dims;
91        ++removed_suffix_block_dims) {
92     const int dim = block_dims - 1 - removed_suffix_block_dims;
93     if (paddings[dim * 2] != 0 || paddings[dim * 2 + 1] != 0 ||
94         block_shape[dim] != 1) {
95       break;
96     }
97   }
98 
99   // Compute the product of the block_shape values.
100   int64 block_shape_product = 1;
101   for (int block_dim = 0; block_dim < block_dims; ++block_dim) {
102     block_shape_product *= block_shape[block_dim];
103   }
104   if (block_shape_product <= 0) {
105     return errors::InvalidArgument(
106         "Product of block sizes must be positive, got ", block_shape_product);
107   }
108 
109   const int internal_block_dims =
110       block_dims - removed_prefix_block_dims - removed_suffix_block_dims;
111   if (internal_block_dims > kMaxSpaceToBatchBlockDims) {
112     return errors::InvalidArgument(
113         "Maximum number of non-combined block dimensions is ",
114         internal_block_dims, " but must not exceed ",
115         kMaxSpaceToBatchBlockDims);
116   }
117 
118   if (internal_block_dims == 0) {
119     context->set_output(0, orig_input_tensor);
120     return Status::OK();
121   }
122 
123   // For the purpose of computing the result, the input will be treated as
124   // having this shape, of rank 2 + internal_block_dims.
125   TensorShape internal_input_shape;
126 
127   // For the purpose of computing the result, the output will be treated as
128   // having this shape, of rank 2 + internal_block_dims.
129   TensorShape internal_output_shape;
130 
131   // The actual output shape exposed to callers.
132   TensorShape external_output_shape;
133 
134   external_output_shape.AddDim(orig_input_tensor.dim_size(0) *
135                                block_shape_product);
136 
137   int64 input_batch_size = orig_input_tensor.dim_size(0);
138   for (int block_dim = 0; block_dim < removed_prefix_block_dims; ++block_dim) {
139     const int64 size = orig_input_tensor.dim_size(block_dim + 1);
140     input_batch_size *= size;
141     external_output_shape.AddDim(size);
142   }
143   internal_input_shape.AddDim(input_batch_size);
144   internal_output_shape.AddDim(input_batch_size * block_shape_product);
145 
146   for (int block_dim = removed_prefix_block_dims;
147        block_dim < block_dims - removed_suffix_block_dims; ++block_dim) {
148     const int64 pad_start = paddings[2 * block_dim],
149                 pad_end = paddings[2 * block_dim + 1];
150     if (pad_start < 0 || pad_end < 0) {
151       return errors::InvalidArgument("Paddings must be non-negative");
152     }
153     const int64 input_size = orig_input_tensor.dim_size(block_dim + 1);
154     const int64 block_shape_value = block_shape[block_dim];
155     const int64 padded_size = input_size + pad_start + pad_end;
156     if (padded_size % block_shape_value != 0) {
157       return errors::InvalidArgument("padded_shape[", block_dim,
158                                      "]=", padded_size,
159                                      " is not divisible by block_shape[",
160                                      block_dim, "]=", block_shape_value);
161     }
162     internal_input_shape.AddDim(input_size);
163     const int64 output_size = padded_size / block_shape_value;
164     internal_output_shape.AddDim(output_size);
165     external_output_shape.AddDim(output_size);
166   }
167 
168   int64 depth = 1;
169   for (int dim = block_dims - removed_suffix_block_dims + 1; dim < input_dims;
170        ++dim) {
171     const int64 size = orig_input_tensor.dim_size(dim);
172     external_output_shape.AddDim(size);
173     depth *= size;
174   }
175   internal_input_shape.AddDim(depth);
176   internal_output_shape.AddDim(depth);
177 
178   // Allocate output tensor.
179   Tensor* output_tensor = nullptr;
180   TF_RETURN_IF_ERROR(
181       context->allocate_output(0, external_output_shape, &output_tensor));
182 
183   const int64* internal_paddings = &paddings[2 * removed_prefix_block_dims];
184   const int64* internal_block_shape = &block_shape[removed_prefix_block_dims];
185 
186   switch (internal_block_dims) {
187 #define TF_SPACETOBATCH_BLOCK_DIMS_CASE(NUM_BLOCK_DIMS)                   \
188   case NUM_BLOCK_DIMS: {                                                  \
189     TF_RETURN_IF_ERROR(                                                   \
190         functor::SpaceToBatchFunctor<Device, T, NUM_BLOCK_DIMS, false>()( \
191             context->eigen_device<Device>(),                              \
192             orig_input_tensor.shaped<T, NUM_BLOCK_DIMS + 2>(              \
193                 internal_input_shape.dim_sizes()),                        \
194             internal_block_shape, internal_paddings,                      \
195             output_tensor->shaped<T, NUM_BLOCK_DIMS + 2>(                 \
196                 internal_output_shape.dim_sizes())));                     \
197   } break;                                                                \
198     /**/
199     TF_SPACETOBATCH_FOR_EACH_NUM_BLOCK_DIMS(TF_SPACETOBATCH_BLOCK_DIMS_CASE)
200 #undef TF_SPACETOBATCH_BLOCK_DIMS_CASE
201   }
202   return Status::OK();
203 }
204 
205 }  // namespace
206 
207 template <typename Device, typename T>
208 class SpaceToBatchNDOp : public OpKernel {
209  public:
SpaceToBatchNDOp(OpKernelConstruction * context)210   explicit SpaceToBatchNDOp(OpKernelConstruction* context)
211       : OpKernel(context) {}
212 
Compute(OpKernelContext * context)213   void Compute(OpKernelContext* context) override {
214     const Tensor& orig_input_tensor = context->input(0);
215     const Tensor& orig_block_shape = context->input(1);
216     const Tensor& orig_paddings = context->input(2);
217     OP_REQUIRES_OK(context, SpaceToBatchOpCompute<Device, T>(
218                                 context, orig_input_tensor, orig_block_shape,
219                                 orig_paddings));
220   }
221 };
222 
223 template <typename Device, typename T>
224 class SpaceToBatchOp : public OpKernel {
225  public:
SpaceToBatchOp(OpKernelConstruction * context)226   explicit SpaceToBatchOp(OpKernelConstruction* context) : OpKernel(context) {
227     OP_REQUIRES_OK(context, context->GetAttr("block_size", &block_size_));
228     OP_REQUIRES(
229         context, block_size_ > 1,
230         errors::InvalidArgument("Block size should be > 1: ", block_size_));
231     // We don't use context->allocate_persistent because the allocation must
232     // happen on the CPU regardless of Device.
233     block_shape_ = Tensor(tensorflow::DT_INT64, TensorShape({2}));
234     auto block_shape_vec = block_shape_.vec<int64>();
235     block_shape_vec(0) = block_size_;
236     block_shape_vec(1) = block_size_;
237   }
238 
Compute(OpKernelContext * context)239   void Compute(OpKernelContext* context) override {
240     const Tensor& in0 = context->input(0);
241     const Tensor& in1 = context->input(1);
242     const int dims = in0.dims();
243 
244     static const int kRequiredDims = 4;
245     OP_REQUIRES(context, kRequiredDims == dims,
246                 errors::InvalidArgument("Input rank should be: ", kRequiredDims,
247                                         "instead of: ", dims));
248     OP_REQUIRES_OK(context, SpaceToBatchOpCompute<Device, T>(
249                                 context, in0, block_shape_, in1));
250   }
251 
252  private:
253   int block_size_;
254   Tensor block_shape_;
255 };
256 
257 #define REGISTER(T)                                        \
258   REGISTER_KERNEL_BUILDER(Name("SpaceToBatchND")           \
259                               .Device(DEVICE_CPU)          \
260                               .TypeConstraint<T>("T")      \
261                               .HostMemory("block_shape")   \
262                               .HostMemory("paddings"),     \
263                           SpaceToBatchNDOp<CPUDevice, T>); \
264   REGISTER_KERNEL_BUILDER(Name("SpaceToBatch")             \
265                               .Device(DEVICE_CPU)          \
266                               .TypeConstraint<T>("T")      \
267                               .HostMemory("paddings"),     \
268                           SpaceToBatchOp<CPUDevice, T>);
269 
270 TF_CALL_REAL_NUMBER_TYPES(REGISTER);
271 #undef REGISTER
272 
273 #if GOOGLE_CUDA
274 #define REGISTER(T)                                        \
275   REGISTER_KERNEL_BUILDER(Name("SpaceToBatchND")           \
276                               .Device(DEVICE_GPU)          \
277                               .TypeConstraint<T>("T")      \
278                               .HostMemory("block_shape")   \
279                               .HostMemory("paddings"),     \
280                           SpaceToBatchNDOp<GPUDevice, T>); \
281   REGISTER_KERNEL_BUILDER(Name("SpaceToBatch")             \
282                               .Device(DEVICE_GPU)          \
283                               .TypeConstraint<T>("T")      \
284                               .HostMemory("paddings"),     \
285                           SpaceToBatchOp<GPUDevice, T>);
286 
287 TF_CALL_GPU_NUMBER_TYPES(REGISTER);
288 #undef REGISTER
289 #endif  // GOOGLE_CUDA
290 
291 }  // end namespace tensorflow
292