• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // See docs in ../ops/image_ops.cc
17 #define EIGEN_USE_THREADS
18 
19 #include "tensorflow/core/kernels/resize_bilinear_op.h"
20 
21 #include <memory>
22 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
23 #include "tensorflow/core/framework/op_kernel.h"
24 #include "tensorflow/core/framework/register_types.h"
25 #include "tensorflow/core/framework/tensor.h"
26 #include "tensorflow/core/framework/tensor_shape.h"
27 #include "tensorflow/core/framework/types.h"
28 #include "tensorflow/core/kernels/image_resizer_state.h"
29 #include "tensorflow/core/lib/core/status.h"
30 #include "tensorflow/core/platform/logging.h"
31 
32 namespace tensorflow {
33 
34 typedef Eigen::ThreadPoolDevice CPUDevice;
35 typedef Eigen::GpuDevice GPUDevice;
36 
37 template <typename Device, typename T>
38 class ResizeBilinearOp : public OpKernel {
39  public:
ResizeBilinearOp(OpKernelConstruction * context)40   explicit ResizeBilinearOp(OpKernelConstruction* context) : OpKernel(context) {
41     OP_REQUIRES_OK(context, context->GetAttr("align_corners", &align_corners_));
42     OP_REQUIRES_OK(
43         context, context->GetAttr("half_pixel_centers", &half_pixel_centers_));
44   }
45 
Compute(OpKernelContext * context)46   void Compute(OpKernelContext* context) override {
47     const Tensor& input = context->input(0);
48     ImageResizerState st(align_corners_, half_pixel_centers_);
49     st.ValidateAndCreateOutput(context, input);
50 
51     if (!context->status().ok()) return;
52 
53     // Return if the output is empty.
54     if (st.output->NumElements() == 0) return;
55 
56     typename TTypes<T, 4>::ConstTensor image_data(input.tensor<T, 4>());
57     TTypes<float, 4>::Tensor output_data = st.output->tensor<float, 4>();
58 
59     functor::ResizeBilinear<Device, T>()(
60         context->eigen_device<Device>(), image_data, st.height_scale,
61         st.width_scale, half_pixel_centers_, output_data);
62   }
63 
64  private:
65   bool align_corners_;
66   bool half_pixel_centers_;
67 };
68 
69 namespace {
70 // Compute the interpolation indices only once.
71 struct CachedInterpolation {
72   int64 lower;  // Lower source index used in the interpolation
73   int64 upper;  // Upper source index used in the interpolation
74   // 1-D linear iterpolation scale (see:
75   // https://en.wikipedia.org/wiki/Bilinear_interpolation)
76   float lerp;
77 };
78 
79 template <typename Scaler>
compute_interpolation_weights(const Scaler scaler,const int64 out_size,const int64 in_size,const float scale,CachedInterpolation * interpolation)80 inline void compute_interpolation_weights(const Scaler scaler,
81                                           const int64 out_size,
82                                           const int64 in_size,
83                                           const float scale,
84                                           CachedInterpolation* interpolation) {
85   interpolation[out_size].lower = 0;
86   interpolation[out_size].upper = 0;
87   for (int64 i = out_size - 1; i >= 0; --i) {
88     const float in = scaler(i, scale);
89     const float in_f = std::floor(in);
90     interpolation[i].lower =
91         std::max(static_cast<int64>(in_f), static_cast<int64>(0));
92     interpolation[i].upper =
93         std::min(static_cast<int64>(std::ceil(in)), in_size - 1);
94     interpolation[i].lerp = in - in_f;
95   }
96 }
97 
98 /**
99  * Computes the bilinear interpolation from the appropriate 4 float points
100  * and the linear interpolation weights.
101  */
compute_lerp(const float top_left,const float top_right,const float bottom_left,const float bottom_right,const float x_lerp,const float y_lerp)102 inline float compute_lerp(const float top_left, const float top_right,
103                           const float bottom_left, const float bottom_right,
104                           const float x_lerp, const float y_lerp) {
105   const float top = top_left + (top_right - top_left) * x_lerp;
106   const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
107   return top + (bottom - top) * y_lerp;
108 }
109 
110 template <typename T>
111 void resize_image(
112     typename TTypes<T, 4>::ConstTensor images, const int batch_size,
113     const int64 in_height, const int64 in_width, const int64 out_height,
114     const int64 out_width, const int channels,
115     const std::vector<CachedInterpolation>& xs,
116     const std::vector<CachedInterpolation>& ys,
117     typename TTypes<float, 4>::Tensor output) TF_ATTRIBUTE_NOINLINE;
118 template <typename T>
resize_image(typename TTypes<T,4>::ConstTensor images,const int batch_size,const int64 in_height,const int64 in_width,const int64 out_height,const int64 out_width,const int channels,const std::vector<CachedInterpolation> & xs_vec,const std::vector<CachedInterpolation> & ys,typename TTypes<float,4>::Tensor output)119 void resize_image(typename TTypes<T, 4>::ConstTensor images,
120                   const int batch_size, const int64 in_height,
121                   const int64 in_width, const int64 out_height,
122                   const int64 out_width, const int channels,
123                   const std::vector<CachedInterpolation>& xs_vec,
124                   const std::vector<CachedInterpolation>& ys,
125                   typename TTypes<float, 4>::Tensor output) {
126   const int64 in_row_size = in_width * channels;
127   const int64 in_batch_num_values = in_height * in_row_size;
128   const int64 out_row_size = out_width * channels;
129 
130   const T* input_b_ptr = images.data();
131   const CachedInterpolation* xs = xs_vec.data();
132 
133   if (channels == 3) {
134     float* output_y_ptr = output.data();
135     for (int b = 0; b < batch_size; ++b) {
136       for (int64 y = 0; y < out_height; ++y) {
137         const T* ys_input_lower_ptr = input_b_ptr + ys[y].lower * in_row_size;
138         const T* ys_input_upper_ptr = input_b_ptr + ys[y].upper * in_row_size;
139         const float ys_lerp = ys[y].lerp;
140         for (int64 x = 0; x < out_width; ++x) {
141           const int64 xs_lower = xs[x].lower;
142           const int64 xs_upper = xs[x].upper;
143           const float xs_lerp = xs[x].lerp;
144 
145           // Read channel 0.
146           const float top_left0(ys_input_lower_ptr[xs_lower + 0]);
147           const float top_right0(ys_input_lower_ptr[xs_upper + 0]);
148           const float bottom_left0(ys_input_upper_ptr[xs_lower + 0]);
149           const float bottom_right0(ys_input_upper_ptr[xs_upper + 0]);
150 
151           // Read channel 1.
152           const float top_left1(ys_input_lower_ptr[xs_lower + 1]);
153           const float top_right1(ys_input_lower_ptr[xs_upper + 1]);
154           const float bottom_left1(ys_input_upper_ptr[xs_lower + 1]);
155           const float bottom_right1(ys_input_upper_ptr[xs_upper + 1]);
156 
157           // Read channel 2.
158           const float top_left2(ys_input_lower_ptr[xs_lower + 2]);
159           const float top_right2(ys_input_lower_ptr[xs_upper + 2]);
160           const float bottom_left2(ys_input_upper_ptr[xs_lower + 2]);
161           const float bottom_right2(ys_input_upper_ptr[xs_upper + 2]);
162 
163           // Compute output.
164           output_y_ptr[x * channels + 0] =
165               compute_lerp(top_left0, top_right0, bottom_left0, bottom_right0,
166                            xs_lerp, ys_lerp);
167           output_y_ptr[x * channels + 1] =
168               compute_lerp(top_left1, top_right1, bottom_left1, bottom_right1,
169                            xs_lerp, ys_lerp);
170           output_y_ptr[x * channels + 2] =
171               compute_lerp(top_left2, top_right2, bottom_left2, bottom_right2,
172                            xs_lerp, ys_lerp);
173         }
174         output_y_ptr += out_row_size;
175       }
176       input_b_ptr += in_batch_num_values;
177     }
178   } else {
179     float* output_y_ptr = output.data();
180     for (int b = 0; b < batch_size; ++b) {
181       for (int64 y = 0; y < out_height; ++y) {
182         const T* ys_input_lower_ptr = input_b_ptr + ys[y].lower * in_row_size;
183         const T* ys_input_upper_ptr = input_b_ptr + ys[y].upper * in_row_size;
184         const float ys_lerp = ys[y].lerp;
185         for (int64 x = 0; x < out_width; ++x) {
186           auto xs_lower = xs[x].lower;
187           auto xs_upper = xs[x].upper;
188           auto xs_lerp = xs[x].lerp;
189           for (int c = 0; c < channels; ++c) {
190             const float top_left(ys_input_lower_ptr[xs_lower + c]);
191             const float top_right(ys_input_lower_ptr[xs_upper + c]);
192             const float bottom_left(ys_input_upper_ptr[xs_lower + c]);
193             const float bottom_right(ys_input_upper_ptr[xs_upper + c]);
194             output_y_ptr[x * channels + c] =
195                 compute_lerp(top_left, top_right, bottom_left, bottom_right,
196                              xs_lerp, ys_lerp);
197           }
198         }
199         output_y_ptr += out_row_size;
200       }
201       input_b_ptr += in_batch_num_values;
202     }
203   }
204 }
205 
206 }  // namespace
207 
208 // Partial specialization of ResizeBilinear functor for a CPUDevice.
209 namespace functor {
210 template <typename T>
211 struct ResizeBilinear<CPUDevice, T> {
operator ()tensorflow::functor::ResizeBilinear212   void operator()(const CPUDevice& d, typename TTypes<T, 4>::ConstTensor images,
213                   const float height_scale, const float width_scale,
214                   bool half_pixel_centers,
215                   typename TTypes<float, 4>::Tensor output) {
216     const int batch_size = images.dimension(0);
217     const int64 in_height = images.dimension(1);
218     const int64 in_width = images.dimension(2);
219     const int channels = images.dimension(3);
220 
221     const int64 out_height = output.dimension(1);
222     const int64 out_width = output.dimension(2);
223 
224     // Handle no-op resizes efficiently.
225     if (out_height == in_height && out_width == in_width) {
226       output = images.template cast<float>();
227       return;
228     }
229 
230     std::vector<CachedInterpolation> ys(out_height + 1);
231     std::vector<CachedInterpolation> xs(out_width + 1);
232 
233     if (half_pixel_centers) {
234       compute_interpolation_weights(HalfPixelScaler(), out_height, in_height,
235                                     height_scale, ys.data());
236       compute_interpolation_weights(HalfPixelScaler(), out_width, in_width,
237                                     width_scale, xs.data());
238 
239     } else {
240       // Compute the cached interpolation weights on the x and y dimensions.
241       compute_interpolation_weights(LegacyScaler(), out_height, in_height,
242                                     height_scale, ys.data());
243       compute_interpolation_weights(LegacyScaler(), out_width, in_width,
244                                     width_scale, xs.data());
245     }
246     // Scale x interpolation weights to avoid a multiplication during iteration.
247     for (int i = 0; i < xs.size(); ++i) {
248       xs[i].lower *= channels;
249       xs[i].upper *= channels;
250     }
251 
252     resize_image<T>(images, batch_size, in_height, in_width, out_height,
253                     out_width, channels, xs, ys, output);
254   }
255 };
256 }  // namespace functor
257 
258 template <typename Device, typename T>
259 class ResizeBilinearOpGrad : public OpKernel {
260  public:
ResizeBilinearOpGrad(OpKernelConstruction * context)261   explicit ResizeBilinearOpGrad(OpKernelConstruction* context)
262       : OpKernel(context) {
263     OP_REQUIRES_OK(context, context->GetAttr("align_corners", &align_corners_));
264     OP_REQUIRES_OK(
265         context, context->GetAttr("half_pixel_centers", &half_pixel_centers_));
266   }
267 
Compute(OpKernelContext * context)268   void Compute(OpKernelContext* context) override {
269     // Validate input.
270     // First argument is gradient with respect to resized image.
271     const Tensor& input = context->input(0);
272     const Tensor& original_image = context->input(1);
273 
274     ImageResizerGradientState st(align_corners_, half_pixel_centers_);
275     st.ValidateAndCreateOutput(context, input, original_image);
276 
277     if (!context->status().ok()) return;
278 
279     TTypes<float, 4>::ConstTensor input_grad = input.tensor<float, 4>();
280     typename TTypes<T, 4>::Tensor output_grad(st.output->tensor<T, 4>());
281 
282     functor::ResizeBilinearGrad<Device, T>()(
283         context->eigen_device<Device>(), input_grad, st.height_scale,
284         st.width_scale, half_pixel_centers_, output_grad);
285   }
286 
287  private:
288   bool align_corners_;
289   bool half_pixel_centers_;
290 };
291 
292 // Partial specialization of ResizeBilinearGrad functor for a CPUDevice.
293 namespace functor {
294 
295 template <typename T>
296 struct ResizeBilinearGrad<CPUDevice, T> {
297   template <typename Scaler>
ResizeGradCoretensorflow::functor::ResizeBilinearGrad298   void ResizeGradCore(const Scaler& scaler,
299                       typename TTypes<float, 4>::ConstTensor input_grad,
300                       const float height_scale, const float width_scale,
301                       typename TTypes<T, 4>::Tensor output_grad) {
302     const Eigen::Index batch = output_grad.dimension(0);
303     const Eigen::Index original_height = output_grad.dimension(1);
304     const Eigen::Index original_width = output_grad.dimension(2);
305     const Eigen::Index channels = output_grad.dimension(3);
306 
307     const Eigen::Index resized_height = input_grad.dimension(1);
308     const Eigen::Index resized_width = input_grad.dimension(2);
309 
310     output_grad.setZero();
311 
312     // Each resized pixel was computed as a weighted average of four input
313     // pixels. Here we find the pixels that contributed to each output pixel
314     // and add the corresponding coefficient to the gradient.
315     // resized(b, y, x, c) = top_left * (1 - y) * (1 - x)
316     //                       +  top_right * (1 - y) * x
317     //                       +  bottom_left * y * (1 - x)
318     //                       +  bottom_right * y * x
319     for (Eigen::Index b = 0; b < batch; ++b) {
320       for (Eigen::Index y = 0; y < resized_height; ++y) {
321         const float in_y = scaler(y, height_scale);
322         const Eigen::Index top_y_index =
323             std::max(static_cast<Eigen::Index>(floorf(in_y)),
324                      static_cast<Eigen::Index>(0));
325         const Eigen::Index bottom_y_index = std::min(
326             static_cast<Eigen::Index>(ceilf(in_y)), original_height - 1);
327         const float y_lerp = in_y - floorf(in_y);
328         const float inverse_y_lerp = (1.0f - y_lerp);
329         for (Eigen::Index x = 0; x < resized_width; ++x) {
330           const float in_x = scaler(x, width_scale);
331           const Eigen::Index left_x_index =
332               std::max(static_cast<Eigen::Index>(floorf(in_x)),
333                        static_cast<Eigen::Index>(0));
334           const Eigen::Index right_x_index = std::min(
335               static_cast<Eigen::Index>(ceilf(in_x)), original_width - 1);
336           const float x_lerp = in_x - floorf(in_x);
337           const float inverse_x_lerp = (1.0f - x_lerp);
338           for (Eigen::Index c = 0; c < channels; ++c) {
339             output_grad(b, top_y_index, left_x_index, c) +=
340                 T(input_grad(b, y, x, c) * inverse_y_lerp * inverse_x_lerp);
341             output_grad(b, top_y_index, right_x_index, c) +=
342                 T(input_grad(b, y, x, c) * inverse_y_lerp * x_lerp);
343             output_grad(b, bottom_y_index, left_x_index, c) +=
344                 T(input_grad(b, y, x, c) * y_lerp * inverse_x_lerp);
345             output_grad(b, bottom_y_index, right_x_index, c) +=
346                 T(input_grad(b, y, x, c) * y_lerp * x_lerp);
347           }
348         }
349       }
350     }
351   }
operator ()tensorflow::functor::ResizeBilinearGrad352   void operator()(const CPUDevice& d,
353                   typename TTypes<float, 4>::ConstTensor input_grad,
354                   const float height_scale, const float width_scale,
355                   const bool half_pixel_centers,
356                   typename TTypes<T, 4>::Tensor output_grad) {
357     if (half_pixel_centers) {
358       return ResizeGradCore(HalfPixelScaler(), input_grad, height_scale,
359                             width_scale, output_grad);
360     } else {
361       return ResizeGradCore(LegacyScaler(), input_grad, height_scale,
362                             width_scale, output_grad);
363     }
364   }
365 };
366 
367 }  // namespace functor
368 
369 #define REGISTER_KERNEL(T)                            \
370   REGISTER_KERNEL_BUILDER(Name("ResizeBilinear")      \
371                               .Device(DEVICE_CPU)     \
372                               .TypeConstraint<T>("T") \
373                               .HostMemory("size"),    \
374                           ResizeBilinearOp<CPUDevice, T>);
375 
376 TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNEL);
377 
378 #undef REGISTER_KERNEL
379 
380 #define REGISTER_GRAD_KERNEL(T)                                             \
381   REGISTER_KERNEL_BUILDER(                                                  \
382       Name("ResizeBilinearGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
383       ResizeBilinearOpGrad<CPUDevice, T>);
384 
385 TF_CALL_half(REGISTER_GRAD_KERNEL);
386 TF_CALL_float(REGISTER_GRAD_KERNEL);
387 TF_CALL_double(REGISTER_GRAD_KERNEL);
388 
389 #undef REGISTER_GRAD_KERNEL
390 
391 #if GOOGLE_CUDA
392 
393 #define REGISTER_KERNEL(T)                            \
394   REGISTER_KERNEL_BUILDER(Name("ResizeBilinear")      \
395                               .Device(DEVICE_GPU)     \
396                               .TypeConstraint<T>("T") \
397                               .HostMemory("size"),    \
398                           ResizeBilinearOp<GPUDevice, T>);
399 
400 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_KERNEL);
401 
402 #undef REGISTER_KERNEL
403 
404 #define REGISTER_GRAD_KERNEL(T)                                             \
405   REGISTER_KERNEL_BUILDER(                                                  \
406       Name("ResizeBilinearGrad").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
407       ResizeBilinearOpGrad<GPUDevice, T>);
408 
409 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_GRAD_KERNEL);
410 
411 #undef REGISTER_GRAD_KERNEL
412 
413 #endif  // GOOGLE_CUDA
414 
415 }  // namespace tensorflow
416