1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // See docs in ../ops/array_ops.cc.
17
18 #define EIGEN_USE_THREADS
19
20 #include <memory>
21 #include <string>
22 #include <utility>
23
24 #include "tensorflow/core/kernels/spacetobatch_functor.h"
25
26 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
27 #include "tensorflow/core/framework/op.h"
28 #include "tensorflow/core/framework/op_kernel.h"
29 #include "tensorflow/core/framework/register_types.h"
30 #include "tensorflow/core/framework/tensor.h"
31 #include "tensorflow/core/framework/tensor_shape.h"
32 #include "tensorflow/core/framework/tensor_types.h"
33 #include "tensorflow/core/framework/types.h"
34 #include "tensorflow/core/platform/logging.h"
35 #include "tensorflow/core/platform/types.h"
36
37 namespace tensorflow {
38
39 typedef Eigen::ThreadPoolDevice CPUDevice;
40 typedef Eigen::GpuDevice GPUDevice;
41
42 namespace {
43
44 template <typename Device, typename T>
SpaceToBatchOpCompute(OpKernelContext * context,const Tensor & orig_input_tensor,const Tensor & orig_block_shape,const Tensor & orig_paddings)45 Status SpaceToBatchOpCompute(OpKernelContext* context,
46 const Tensor& orig_input_tensor,
47 const Tensor& orig_block_shape,
48 const Tensor& orig_paddings) {
49 const int input_dims = orig_input_tensor.dims();
50 if (!TensorShapeUtils::IsVector(orig_block_shape.shape())) {
51 return errors::InvalidArgument("block_shape rank should be 1 instead of ",
52 orig_block_shape.dims());
53 }
54
55 const int block_dims = orig_block_shape.dim_size(0);
56 if (orig_input_tensor.dims() < 1 + block_dims) {
57 return errors::InvalidArgument("input rank should be >= ", 1 + block_dims,
58 " instead of ", orig_input_tensor.dims());
59 }
60
61 if (!(TensorShapeUtils::IsMatrix(orig_paddings.shape()) &&
62 block_dims == orig_paddings.dim_size(0) &&
63 2 == orig_paddings.dim_size(1))) {
64 return errors::InvalidArgument("paddings should have shape [", block_dims,
65 ", 2] instead of ",
66 orig_paddings.shape().DebugString());
67 }
68
69 // To avoid out-of-bounds access in the case that the block_shape and/or
70 // paddings tensors are concurrently modified, we must copy the values.
71 gtl::InlinedVector<int64, 4> block_shape;
72 gtl::InlinedVector<int64, 8> paddings;
73 internal::spacetobatch::SubtleMustCopyFlat(orig_block_shape, &block_shape);
74 internal::spacetobatch::SubtleMustCopyFlat(orig_paddings, &paddings);
75
76 // Determine the length of the prefix of block dims that can be combined
77 // into the batch dimension due to having no padding and block_shape=1.
78 int removed_prefix_block_dims = 0;
79 for (; removed_prefix_block_dims < block_dims; ++removed_prefix_block_dims) {
80 const int dim = removed_prefix_block_dims;
81 if (paddings[2 * dim] != 0 || paddings[2 * dim + 1] != 0 ||
82 block_shape[dim] != 1) {
83 break;
84 }
85 }
86
87 // Determine the length of the suffix of block dims that can be combined
88 // into the depth dimension due to having no padding and block_shape=1.
89 int removed_suffix_block_dims = 0;
90 for (; removed_suffix_block_dims < block_dims - removed_prefix_block_dims;
91 ++removed_suffix_block_dims) {
92 const int dim = block_dims - 1 - removed_suffix_block_dims;
93 if (paddings[dim * 2] != 0 || paddings[dim * 2 + 1] != 0 ||
94 block_shape[dim] != 1) {
95 break;
96 }
97 }
98
99 // Compute the product of the block_shape values.
100 int64 block_shape_product = 1;
101 for (int block_dim = 0; block_dim < block_dims; ++block_dim) {
102 block_shape_product *= block_shape[block_dim];
103 }
104 if (block_shape_product <= 0) {
105 return errors::InvalidArgument(
106 "Product of block sizes must be positive, got ", block_shape_product);
107 }
108
109 const int internal_block_dims =
110 block_dims - removed_prefix_block_dims - removed_suffix_block_dims;
111 if (internal_block_dims > kMaxSpaceToBatchBlockDims) {
112 return errors::InvalidArgument(
113 "Maximum number of non-combined block dimensions is ",
114 internal_block_dims, " but must not exceed ",
115 kMaxSpaceToBatchBlockDims);
116 }
117
118 if (internal_block_dims == 0) {
119 context->set_output(0, orig_input_tensor);
120 return Status::OK();
121 }
122
123 // For the purpose of computing the result, the input will be treated as
124 // having this shape, of rank 2 + internal_block_dims.
125 TensorShape internal_input_shape;
126
127 // For the purpose of computing the result, the output will be treated as
128 // having this shape, of rank 2 + internal_block_dims.
129 TensorShape internal_output_shape;
130
131 // The actual output shape exposed to callers.
132 TensorShape external_output_shape;
133
134 external_output_shape.AddDim(orig_input_tensor.dim_size(0) *
135 block_shape_product);
136
137 int64 input_batch_size = orig_input_tensor.dim_size(0);
138 for (int block_dim = 0; block_dim < removed_prefix_block_dims; ++block_dim) {
139 const int64 size = orig_input_tensor.dim_size(block_dim + 1);
140 input_batch_size *= size;
141 external_output_shape.AddDim(size);
142 }
143 internal_input_shape.AddDim(input_batch_size);
144 internal_output_shape.AddDim(input_batch_size * block_shape_product);
145
146 for (int block_dim = removed_prefix_block_dims;
147 block_dim < block_dims - removed_suffix_block_dims; ++block_dim) {
148 const int64 pad_start = paddings[2 * block_dim],
149 pad_end = paddings[2 * block_dim + 1];
150 if (pad_start < 0 || pad_end < 0) {
151 return errors::InvalidArgument("Paddings must be non-negative");
152 }
153 const int64 input_size = orig_input_tensor.dim_size(block_dim + 1);
154 const int64 block_shape_value = block_shape[block_dim];
155 const int64 padded_size = input_size + pad_start + pad_end;
156 if (padded_size % block_shape_value != 0) {
157 return errors::InvalidArgument("padded_shape[", block_dim,
158 "]=", padded_size,
159 " is not divisible by block_shape[",
160 block_dim, "]=", block_shape_value);
161 }
162 internal_input_shape.AddDim(input_size);
163 const int64 output_size = padded_size / block_shape_value;
164 internal_output_shape.AddDim(output_size);
165 external_output_shape.AddDim(output_size);
166 }
167
168 int64 depth = 1;
169 for (int dim = block_dims - removed_suffix_block_dims + 1; dim < input_dims;
170 ++dim) {
171 const int64 size = orig_input_tensor.dim_size(dim);
172 external_output_shape.AddDim(size);
173 depth *= size;
174 }
175 internal_input_shape.AddDim(depth);
176 internal_output_shape.AddDim(depth);
177
178 // Allocate output tensor.
179 Tensor* output_tensor = nullptr;
180 TF_RETURN_IF_ERROR(
181 context->allocate_output(0, external_output_shape, &output_tensor));
182
183 const int64* internal_paddings = &paddings[2 * removed_prefix_block_dims];
184 const int64* internal_block_shape = &block_shape[removed_prefix_block_dims];
185
186 switch (internal_block_dims) {
187 #define TF_SPACETOBATCH_BLOCK_DIMS_CASE(NUM_BLOCK_DIMS) \
188 case NUM_BLOCK_DIMS: { \
189 TF_RETURN_IF_ERROR( \
190 functor::SpaceToBatchFunctor<Device, T, NUM_BLOCK_DIMS, false>()( \
191 context->eigen_device<Device>(), \
192 orig_input_tensor.shaped<T, NUM_BLOCK_DIMS + 2>( \
193 internal_input_shape.dim_sizes()), \
194 internal_block_shape, internal_paddings, \
195 output_tensor->shaped<T, NUM_BLOCK_DIMS + 2>( \
196 internal_output_shape.dim_sizes()))); \
197 } break; \
198 /**/
199 TF_SPACETOBATCH_FOR_EACH_NUM_BLOCK_DIMS(TF_SPACETOBATCH_BLOCK_DIMS_CASE)
200 #undef TF_SPACETOBATCH_BLOCK_DIMS_CASE
201 }
202 return Status::OK();
203 }
204
205 } // namespace
206
207 template <typename Device, typename T>
208 class SpaceToBatchNDOp : public OpKernel {
209 public:
SpaceToBatchNDOp(OpKernelConstruction * context)210 explicit SpaceToBatchNDOp(OpKernelConstruction* context)
211 : OpKernel(context) {}
212
Compute(OpKernelContext * context)213 void Compute(OpKernelContext* context) override {
214 const Tensor& orig_input_tensor = context->input(0);
215 const Tensor& orig_block_shape = context->input(1);
216 const Tensor& orig_paddings = context->input(2);
217 OP_REQUIRES_OK(context, SpaceToBatchOpCompute<Device, T>(
218 context, orig_input_tensor, orig_block_shape,
219 orig_paddings));
220 }
221 };
222
223 template <typename Device, typename T>
224 class SpaceToBatchOp : public OpKernel {
225 public:
SpaceToBatchOp(OpKernelConstruction * context)226 explicit SpaceToBatchOp(OpKernelConstruction* context) : OpKernel(context) {
227 OP_REQUIRES_OK(context, context->GetAttr("block_size", &block_size_));
228 OP_REQUIRES(
229 context, block_size_ > 1,
230 errors::InvalidArgument("Block size should be > 1: ", block_size_));
231 // We don't use context->allocate_persistent because the allocation must
232 // happen on the CPU regardless of Device.
233 block_shape_ = Tensor(tensorflow::DT_INT64, TensorShape({2}));
234 auto block_shape_vec = block_shape_.vec<int64>();
235 block_shape_vec(0) = block_size_;
236 block_shape_vec(1) = block_size_;
237 }
238
Compute(OpKernelContext * context)239 void Compute(OpKernelContext* context) override {
240 const Tensor& in0 = context->input(0);
241 const Tensor& in1 = context->input(1);
242 const int dims = in0.dims();
243
244 static const int kRequiredDims = 4;
245 OP_REQUIRES(context, kRequiredDims == dims,
246 errors::InvalidArgument("Input rank should be: ", kRequiredDims,
247 "instead of: ", dims));
248 OP_REQUIRES_OK(context, SpaceToBatchOpCompute<Device, T>(
249 context, in0, block_shape_, in1));
250 }
251
252 private:
253 int block_size_;
254 Tensor block_shape_;
255 };
256
257 #define REGISTER(T) \
258 REGISTER_KERNEL_BUILDER(Name("SpaceToBatchND") \
259 .Device(DEVICE_CPU) \
260 .TypeConstraint<T>("T") \
261 .HostMemory("block_shape") \
262 .HostMemory("paddings"), \
263 SpaceToBatchNDOp<CPUDevice, T>); \
264 REGISTER_KERNEL_BUILDER(Name("SpaceToBatch") \
265 .Device(DEVICE_CPU) \
266 .TypeConstraint<T>("T") \
267 .HostMemory("paddings"), \
268 SpaceToBatchOp<CPUDevice, T>);
269
270 TF_CALL_REAL_NUMBER_TYPES(REGISTER);
271 #undef REGISTER
272
273 #if GOOGLE_CUDA
274 #define REGISTER(T) \
275 REGISTER_KERNEL_BUILDER(Name("SpaceToBatchND") \
276 .Device(DEVICE_GPU) \
277 .TypeConstraint<T>("T") \
278 .HostMemory("block_shape") \
279 .HostMemory("paddings"), \
280 SpaceToBatchNDOp<GPUDevice, T>); \
281 REGISTER_KERNEL_BUILDER(Name("SpaceToBatch") \
282 .Device(DEVICE_GPU) \
283 .TypeConstraint<T>("T") \
284 .HostMemory("paddings"), \
285 SpaceToBatchOp<GPUDevice, T>);
286
287 TF_CALL_GPU_NUMBER_TYPES(REGISTER);
288 #undef REGISTER
289 #endif // GOOGLE_CUDA
290
291 } // end namespace tensorflow
292