1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // See docs in ../ops/image_ops.cc
17 #define EIGEN_USE_THREADS
18
19 #include "tensorflow/core/kernels/resize_bilinear_op.h"
20
21 #include <memory>
22 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
23 #include "tensorflow/core/framework/op_kernel.h"
24 #include "tensorflow/core/framework/register_types.h"
25 #include "tensorflow/core/framework/tensor.h"
26 #include "tensorflow/core/framework/tensor_shape.h"
27 #include "tensorflow/core/framework/types.h"
28 #include "tensorflow/core/kernels/image_resizer_state.h"
29 #include "tensorflow/core/lib/core/status.h"
30 #include "tensorflow/core/platform/logging.h"
31
32 namespace tensorflow {
33
34 typedef Eigen::ThreadPoolDevice CPUDevice;
35 typedef Eigen::GpuDevice GPUDevice;
36
37 template <typename Device, typename T>
38 class ResizeBilinearOp : public OpKernel {
39 public:
ResizeBilinearOp(OpKernelConstruction * context)40 explicit ResizeBilinearOp(OpKernelConstruction* context) : OpKernel(context) {
41 OP_REQUIRES_OK(context, context->GetAttr("align_corners", &align_corners_));
42 OP_REQUIRES_OK(
43 context, context->GetAttr("half_pixel_centers", &half_pixel_centers_));
44 }
45
Compute(OpKernelContext * context)46 void Compute(OpKernelContext* context) override {
47 const Tensor& input = context->input(0);
48 ImageResizerState st(align_corners_, half_pixel_centers_);
49 st.ValidateAndCreateOutput(context, input);
50
51 if (!context->status().ok()) return;
52
53 // Return if the output is empty.
54 if (st.output->NumElements() == 0) return;
55
56 typename TTypes<T, 4>::ConstTensor image_data(input.tensor<T, 4>());
57 TTypes<float, 4>::Tensor output_data = st.output->tensor<float, 4>();
58
59 functor::ResizeBilinear<Device, T>()(
60 context->eigen_device<Device>(), image_data, st.height_scale,
61 st.width_scale, half_pixel_centers_, output_data);
62 }
63
64 private:
65 bool align_corners_;
66 bool half_pixel_centers_;
67 };
68
69 namespace {
70 // Compute the interpolation indices only once.
71 struct CachedInterpolation {
72 int64 lower; // Lower source index used in the interpolation
73 int64 upper; // Upper source index used in the interpolation
74 // 1-D linear iterpolation scale (see:
75 // https://en.wikipedia.org/wiki/Bilinear_interpolation)
76 float lerp;
77 };
78
79 template <typename Scaler>
compute_interpolation_weights(const Scaler scaler,const int64 out_size,const int64 in_size,const float scale,CachedInterpolation * interpolation)80 inline void compute_interpolation_weights(const Scaler scaler,
81 const int64 out_size,
82 const int64 in_size,
83 const float scale,
84 CachedInterpolation* interpolation) {
85 interpolation[out_size].lower = 0;
86 interpolation[out_size].upper = 0;
87 for (int64 i = out_size - 1; i >= 0; --i) {
88 const float in = scaler(i, scale);
89 const float in_f = std::floor(in);
90 interpolation[i].lower =
91 std::max(static_cast<int64>(in_f), static_cast<int64>(0));
92 interpolation[i].upper =
93 std::min(static_cast<int64>(std::ceil(in)), in_size - 1);
94 interpolation[i].lerp = in - in_f;
95 }
96 }
97
98 /**
99 * Computes the bilinear interpolation from the appropriate 4 float points
100 * and the linear interpolation weights.
101 */
compute_lerp(const float top_left,const float top_right,const float bottom_left,const float bottom_right,const float x_lerp,const float y_lerp)102 inline float compute_lerp(const float top_left, const float top_right,
103 const float bottom_left, const float bottom_right,
104 const float x_lerp, const float y_lerp) {
105 const float top = top_left + (top_right - top_left) * x_lerp;
106 const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
107 return top + (bottom - top) * y_lerp;
108 }
109
110 template <typename T>
111 void resize_image(
112 typename TTypes<T, 4>::ConstTensor images, const int batch_size,
113 const int64 in_height, const int64 in_width, const int64 out_height,
114 const int64 out_width, const int channels,
115 const std::vector<CachedInterpolation>& xs,
116 const std::vector<CachedInterpolation>& ys,
117 typename TTypes<float, 4>::Tensor output) TF_ATTRIBUTE_NOINLINE;
118 template <typename T>
resize_image(typename TTypes<T,4>::ConstTensor images,const int batch_size,const int64 in_height,const int64 in_width,const int64 out_height,const int64 out_width,const int channels,const std::vector<CachedInterpolation> & xs_vec,const std::vector<CachedInterpolation> & ys,typename TTypes<float,4>::Tensor output)119 void resize_image(typename TTypes<T, 4>::ConstTensor images,
120 const int batch_size, const int64 in_height,
121 const int64 in_width, const int64 out_height,
122 const int64 out_width, const int channels,
123 const std::vector<CachedInterpolation>& xs_vec,
124 const std::vector<CachedInterpolation>& ys,
125 typename TTypes<float, 4>::Tensor output) {
126 const int64 in_row_size = in_width * channels;
127 const int64 in_batch_num_values = in_height * in_row_size;
128 const int64 out_row_size = out_width * channels;
129
130 const T* input_b_ptr = images.data();
131 const CachedInterpolation* xs = xs_vec.data();
132
133 if (channels == 3) {
134 float* output_y_ptr = output.data();
135 for (int b = 0; b < batch_size; ++b) {
136 for (int64 y = 0; y < out_height; ++y) {
137 const T* ys_input_lower_ptr = input_b_ptr + ys[y].lower * in_row_size;
138 const T* ys_input_upper_ptr = input_b_ptr + ys[y].upper * in_row_size;
139 const float ys_lerp = ys[y].lerp;
140 for (int64 x = 0; x < out_width; ++x) {
141 const int64 xs_lower = xs[x].lower;
142 const int64 xs_upper = xs[x].upper;
143 const float xs_lerp = xs[x].lerp;
144
145 // Read channel 0.
146 const float top_left0(ys_input_lower_ptr[xs_lower + 0]);
147 const float top_right0(ys_input_lower_ptr[xs_upper + 0]);
148 const float bottom_left0(ys_input_upper_ptr[xs_lower + 0]);
149 const float bottom_right0(ys_input_upper_ptr[xs_upper + 0]);
150
151 // Read channel 1.
152 const float top_left1(ys_input_lower_ptr[xs_lower + 1]);
153 const float top_right1(ys_input_lower_ptr[xs_upper + 1]);
154 const float bottom_left1(ys_input_upper_ptr[xs_lower + 1]);
155 const float bottom_right1(ys_input_upper_ptr[xs_upper + 1]);
156
157 // Read channel 2.
158 const float top_left2(ys_input_lower_ptr[xs_lower + 2]);
159 const float top_right2(ys_input_lower_ptr[xs_upper + 2]);
160 const float bottom_left2(ys_input_upper_ptr[xs_lower + 2]);
161 const float bottom_right2(ys_input_upper_ptr[xs_upper + 2]);
162
163 // Compute output.
164 output_y_ptr[x * channels + 0] =
165 compute_lerp(top_left0, top_right0, bottom_left0, bottom_right0,
166 xs_lerp, ys_lerp);
167 output_y_ptr[x * channels + 1] =
168 compute_lerp(top_left1, top_right1, bottom_left1, bottom_right1,
169 xs_lerp, ys_lerp);
170 output_y_ptr[x * channels + 2] =
171 compute_lerp(top_left2, top_right2, bottom_left2, bottom_right2,
172 xs_lerp, ys_lerp);
173 }
174 output_y_ptr += out_row_size;
175 }
176 input_b_ptr += in_batch_num_values;
177 }
178 } else {
179 float* output_y_ptr = output.data();
180 for (int b = 0; b < batch_size; ++b) {
181 for (int64 y = 0; y < out_height; ++y) {
182 const T* ys_input_lower_ptr = input_b_ptr + ys[y].lower * in_row_size;
183 const T* ys_input_upper_ptr = input_b_ptr + ys[y].upper * in_row_size;
184 const float ys_lerp = ys[y].lerp;
185 for (int64 x = 0; x < out_width; ++x) {
186 auto xs_lower = xs[x].lower;
187 auto xs_upper = xs[x].upper;
188 auto xs_lerp = xs[x].lerp;
189 for (int c = 0; c < channels; ++c) {
190 const float top_left(ys_input_lower_ptr[xs_lower + c]);
191 const float top_right(ys_input_lower_ptr[xs_upper + c]);
192 const float bottom_left(ys_input_upper_ptr[xs_lower + c]);
193 const float bottom_right(ys_input_upper_ptr[xs_upper + c]);
194 output_y_ptr[x * channels + c] =
195 compute_lerp(top_left, top_right, bottom_left, bottom_right,
196 xs_lerp, ys_lerp);
197 }
198 }
199 output_y_ptr += out_row_size;
200 }
201 input_b_ptr += in_batch_num_values;
202 }
203 }
204 }
205
206 } // namespace
207
208 // Partial specialization of ResizeBilinear functor for a CPUDevice.
209 namespace functor {
210 template <typename T>
211 struct ResizeBilinear<CPUDevice, T> {
operator ()tensorflow::functor::ResizeBilinear212 void operator()(const CPUDevice& d, typename TTypes<T, 4>::ConstTensor images,
213 const float height_scale, const float width_scale,
214 bool half_pixel_centers,
215 typename TTypes<float, 4>::Tensor output) {
216 const int batch_size = images.dimension(0);
217 const int64 in_height = images.dimension(1);
218 const int64 in_width = images.dimension(2);
219 const int channels = images.dimension(3);
220
221 const int64 out_height = output.dimension(1);
222 const int64 out_width = output.dimension(2);
223
224 // Handle no-op resizes efficiently.
225 if (out_height == in_height && out_width == in_width) {
226 output = images.template cast<float>();
227 return;
228 }
229
230 std::vector<CachedInterpolation> ys(out_height + 1);
231 std::vector<CachedInterpolation> xs(out_width + 1);
232
233 if (half_pixel_centers) {
234 compute_interpolation_weights(HalfPixelScaler(), out_height, in_height,
235 height_scale, ys.data());
236 compute_interpolation_weights(HalfPixelScaler(), out_width, in_width,
237 width_scale, xs.data());
238
239 } else {
240 // Compute the cached interpolation weights on the x and y dimensions.
241 compute_interpolation_weights(LegacyScaler(), out_height, in_height,
242 height_scale, ys.data());
243 compute_interpolation_weights(LegacyScaler(), out_width, in_width,
244 width_scale, xs.data());
245 }
246 // Scale x interpolation weights to avoid a multiplication during iteration.
247 for (int i = 0; i < xs.size(); ++i) {
248 xs[i].lower *= channels;
249 xs[i].upper *= channels;
250 }
251
252 resize_image<T>(images, batch_size, in_height, in_width, out_height,
253 out_width, channels, xs, ys, output);
254 }
255 };
256 } // namespace functor
257
258 template <typename Device, typename T>
259 class ResizeBilinearOpGrad : public OpKernel {
260 public:
ResizeBilinearOpGrad(OpKernelConstruction * context)261 explicit ResizeBilinearOpGrad(OpKernelConstruction* context)
262 : OpKernel(context) {
263 OP_REQUIRES_OK(context, context->GetAttr("align_corners", &align_corners_));
264 OP_REQUIRES_OK(
265 context, context->GetAttr("half_pixel_centers", &half_pixel_centers_));
266 }
267
Compute(OpKernelContext * context)268 void Compute(OpKernelContext* context) override {
269 // Validate input.
270 // First argument is gradient with respect to resized image.
271 const Tensor& input = context->input(0);
272 const Tensor& original_image = context->input(1);
273
274 ImageResizerGradientState st(align_corners_, half_pixel_centers_);
275 st.ValidateAndCreateOutput(context, input, original_image);
276
277 if (!context->status().ok()) return;
278
279 TTypes<float, 4>::ConstTensor input_grad = input.tensor<float, 4>();
280 typename TTypes<T, 4>::Tensor output_grad(st.output->tensor<T, 4>());
281
282 functor::ResizeBilinearGrad<Device, T>()(
283 context->eigen_device<Device>(), input_grad, st.height_scale,
284 st.width_scale, half_pixel_centers_, output_grad);
285 }
286
287 private:
288 bool align_corners_;
289 bool half_pixel_centers_;
290 };
291
292 // Partial specialization of ResizeBilinearGrad functor for a CPUDevice.
293 namespace functor {
294
295 template <typename T>
296 struct ResizeBilinearGrad<CPUDevice, T> {
297 template <typename Scaler>
ResizeGradCoretensorflow::functor::ResizeBilinearGrad298 void ResizeGradCore(const Scaler& scaler,
299 typename TTypes<float, 4>::ConstTensor input_grad,
300 const float height_scale, const float width_scale,
301 typename TTypes<T, 4>::Tensor output_grad) {
302 const Eigen::Index batch = output_grad.dimension(0);
303 const Eigen::Index original_height = output_grad.dimension(1);
304 const Eigen::Index original_width = output_grad.dimension(2);
305 const Eigen::Index channels = output_grad.dimension(3);
306
307 const Eigen::Index resized_height = input_grad.dimension(1);
308 const Eigen::Index resized_width = input_grad.dimension(2);
309
310 output_grad.setZero();
311
312 // Each resized pixel was computed as a weighted average of four input
313 // pixels. Here we find the pixels that contributed to each output pixel
314 // and add the corresponding coefficient to the gradient.
315 // resized(b, y, x, c) = top_left * (1 - y) * (1 - x)
316 // + top_right * (1 - y) * x
317 // + bottom_left * y * (1 - x)
318 // + bottom_right * y * x
319 for (Eigen::Index b = 0; b < batch; ++b) {
320 for (Eigen::Index y = 0; y < resized_height; ++y) {
321 const float in_y = scaler(y, height_scale);
322 const Eigen::Index top_y_index =
323 std::max(static_cast<Eigen::Index>(floorf(in_y)),
324 static_cast<Eigen::Index>(0));
325 const Eigen::Index bottom_y_index = std::min(
326 static_cast<Eigen::Index>(ceilf(in_y)), original_height - 1);
327 const float y_lerp = in_y - floorf(in_y);
328 const float inverse_y_lerp = (1.0f - y_lerp);
329 for (Eigen::Index x = 0; x < resized_width; ++x) {
330 const float in_x = scaler(x, width_scale);
331 const Eigen::Index left_x_index =
332 std::max(static_cast<Eigen::Index>(floorf(in_x)),
333 static_cast<Eigen::Index>(0));
334 const Eigen::Index right_x_index = std::min(
335 static_cast<Eigen::Index>(ceilf(in_x)), original_width - 1);
336 const float x_lerp = in_x - floorf(in_x);
337 const float inverse_x_lerp = (1.0f - x_lerp);
338 for (Eigen::Index c = 0; c < channels; ++c) {
339 output_grad(b, top_y_index, left_x_index, c) +=
340 T(input_grad(b, y, x, c) * inverse_y_lerp * inverse_x_lerp);
341 output_grad(b, top_y_index, right_x_index, c) +=
342 T(input_grad(b, y, x, c) * inverse_y_lerp * x_lerp);
343 output_grad(b, bottom_y_index, left_x_index, c) +=
344 T(input_grad(b, y, x, c) * y_lerp * inverse_x_lerp);
345 output_grad(b, bottom_y_index, right_x_index, c) +=
346 T(input_grad(b, y, x, c) * y_lerp * x_lerp);
347 }
348 }
349 }
350 }
351 }
operator ()tensorflow::functor::ResizeBilinearGrad352 void operator()(const CPUDevice& d,
353 typename TTypes<float, 4>::ConstTensor input_grad,
354 const float height_scale, const float width_scale,
355 const bool half_pixel_centers,
356 typename TTypes<T, 4>::Tensor output_grad) {
357 if (half_pixel_centers) {
358 return ResizeGradCore(HalfPixelScaler(), input_grad, height_scale,
359 width_scale, output_grad);
360 } else {
361 return ResizeGradCore(LegacyScaler(), input_grad, height_scale,
362 width_scale, output_grad);
363 }
364 }
365 };
366
367 } // namespace functor
368
369 #define REGISTER_KERNEL(T) \
370 REGISTER_KERNEL_BUILDER(Name("ResizeBilinear") \
371 .Device(DEVICE_CPU) \
372 .TypeConstraint<T>("T") \
373 .HostMemory("size"), \
374 ResizeBilinearOp<CPUDevice, T>);
375
376 TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNEL);
377
378 #undef REGISTER_KERNEL
379
380 #define REGISTER_GRAD_KERNEL(T) \
381 REGISTER_KERNEL_BUILDER( \
382 Name("ResizeBilinearGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
383 ResizeBilinearOpGrad<CPUDevice, T>);
384
385 TF_CALL_half(REGISTER_GRAD_KERNEL);
386 TF_CALL_float(REGISTER_GRAD_KERNEL);
387 TF_CALL_double(REGISTER_GRAD_KERNEL);
388
389 #undef REGISTER_GRAD_KERNEL
390
391 #if GOOGLE_CUDA
392
393 #define REGISTER_KERNEL(T) \
394 REGISTER_KERNEL_BUILDER(Name("ResizeBilinear") \
395 .Device(DEVICE_GPU) \
396 .TypeConstraint<T>("T") \
397 .HostMemory("size"), \
398 ResizeBilinearOp<GPUDevice, T>);
399
400 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_KERNEL);
401
402 #undef REGISTER_KERNEL
403
404 #define REGISTER_GRAD_KERNEL(T) \
405 REGISTER_KERNEL_BUILDER( \
406 Name("ResizeBilinearGrad").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
407 ResizeBilinearOpGrad<GPUDevice, T>);
408
409 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_GRAD_KERNEL);
410
411 #undef REGISTER_GRAD_KERNEL
412
413 #endif // GOOGLE_CUDA
414
415 } // namespace tensorflow
416