1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // This is a helper struct to package up the input and output
17 // parameters of an image resizer (the height, widths, etc.). To
18 // reduce code duplication and ensure consistency across the different
19 // resizers, it performs the input validation.
20
21 #ifndef TENSORFLOW_CORE_KERNELS_UTIL_IMAGE_RESIZER_STATE_H_
22 #define TENSORFLOW_CORE_KERNELS_UTIL_IMAGE_RESIZER_STATE_H_
23
24 #define EIGEN_USE_THREADS
25 #include <math.h>
26
27 #include <algorithm>
28 #include <array>
29
30 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
31 #include "tensorflow/core/framework/bounds_check.h"
32 #include "tensorflow/core/framework/op_kernel.h"
33 #include "tensorflow/core/framework/register_types.h"
34 #include "tensorflow/core/framework/tensor.h"
35 #include "tensorflow/core/framework/tensor_shape.h"
36 #include "tensorflow/core/framework/types.h"
37
38 namespace tensorflow {
39
40 // CalculateResizeScale determines the float scaling factor.
CalculateResizeScale(int64_t in_size,int64_t out_size,bool align_corners)41 inline float CalculateResizeScale(int64_t in_size, int64_t out_size,
42 bool align_corners) {
43 return (align_corners && out_size > 1)
44 ? (in_size - 1) / static_cast<float>(out_size - 1)
45 : in_size / static_cast<float>(out_size);
46 }
47
48 // Half pixel scaler scales assuming that the pixel centers are at 0.5, i.e. the
49 // floating point coordinates of the top,left pixel is 0.5,0.5.
50 struct HalfPixelScaler {
HalfPixelScalerHalfPixelScaler51 HalfPixelScaler(){};
operatorHalfPixelScaler52 inline float operator()(const int x, const float scale) const {
53 // Note that we subtract 0.5 from the return value, as the existing bilinear
54 // sampling code etc assumes pixels are in the old coordinate system.
55 return (static_cast<float>(x) + 0.5f) * scale - 0.5f;
56 }
57 };
58
59 // Older incorrect scaling method that causes all resizes to have a slight
60 // translation leading to inconsistent results. For example, a flip then a
61 // resize gives different results then a resize then a flip.
62 struct LegacyScaler {
LegacyScalerLegacyScaler63 LegacyScaler(){};
operatorLegacyScaler64 inline float operator()(const int x, const float scale) const {
65 return static_cast<float>(x) * scale;
66 }
67 };
68
69 struct ImageResizerState {
ImageResizerStateImageResizerState70 explicit ImageResizerState(bool align_corners, bool half_pixel_centers)
71 : align_corners_(align_corners),
72 half_pixel_centers_(half_pixel_centers) {}
73
74 // ValidateAndCalculateOutputSize checks the bounds on the input tensors
75 // and requested size, sets up some of the resizing state such as the
76 // height_scale and width_scale, and calculates the output size.
77 // If any of these operations fails, it sets an error status in
78 // the context, which the caller must check.
ValidateAndCalculateOutputSizeImageResizerState79 void ValidateAndCalculateOutputSize(OpKernelContext* context) {
80 OP_REQUIRES(
81 context,
82 !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_),
83 errors::InvalidArgument("If half_pixel_centers is True, "
84 "align_corners must be False."));
85
86 const TensorShape& input_shape = context->input(0).shape();
87 OP_REQUIRES(context, input_shape.dims() == 4,
88 errors::InvalidArgument("input must be 4-dimensional",
89 input_shape.DebugString()));
90 batch_size = input_shape.dim_size(0);
91 channels = input_shape.dim_size(3);
92 OP_REQUIRES(
93 context, channels > 0,
94 errors::InvalidArgument("image must have at least one channel"));
95
96 // Verify and assign `in_height` and `in_width`.
97 OP_REQUIRES(
98 context, input_shape.dim_size(1) > 0 && input_shape.dim_size(2) > 0,
99 errors::InvalidArgument("input image must be of non-zero size"));
100 OP_REQUIRES(
101 context,
102 FastBoundsCheck(input_shape.dim_size(1),
103 std::numeric_limits<int32>::max()) &&
104 FastBoundsCheck(input_shape.dim_size(2),
105 std::numeric_limits<int32>::max()),
106 errors::InvalidArgument("input sizes must be between 0 and max int32"));
107 in_height = static_cast<int32>(input_shape.dim_size(1));
108 in_width = static_cast<int32>(input_shape.dim_size(2));
109
110 // Verify the output tensor's shape.
111 const Tensor& shape_t = context->input(1);
112 OP_REQUIRES(context, shape_t.dims() == 1,
113 errors::InvalidArgument("shape_t must be 1-dimensional",
114 shape_t.shape().DebugString()));
115 OP_REQUIRES(context, shape_t.NumElements() == 2,
116 errors::InvalidArgument("shape_t must have two elements",
117 shape_t.shape().DebugString()));
118
119 // Verify and assign `out_height` and `out_width`.
120 auto Svec = shape_t.vec<int32>();
121 out_height = internal::SubtleMustCopy(Svec(0));
122 out_width = internal::SubtleMustCopy(Svec(1));
123 OP_REQUIRES(context, out_height > 0 && out_width > 0,
124 errors::InvalidArgument("output dimensions must be positive"));
125
126 height_scale = CalculateResizeScale(in_height, out_height, align_corners_);
127 width_scale = CalculateResizeScale(in_width, out_width, align_corners_);
128
129 // Guard against overflows
130 OP_REQUIRES(context,
131 ceilf((out_height - 1) * height_scale) <=
132 static_cast<float>(std::numeric_limits<int64>::max()),
133 errors::InvalidArgument(
134 "input image height scale would cause an overflow"));
135 OP_REQUIRES(
136 context,
137 ceilf((out_width - 1) * width_scale) <= static_cast<float>(INT_MAX),
138 errors::InvalidArgument(
139 "input image width scale would cause an overflow"));
140 }
141
142 // Calculates all the required variables, and allocates the output.
ValidateAndCreateOutputImageResizerState143 void ValidateAndCreateOutput(OpKernelContext* context) {
144 ValidateAndCalculateOutputSize(context);
145 if (!context->status().ok()) return;
146 OP_REQUIRES_OK(
147 context,
148 context->allocate_output(
149 0, TensorShape({batch_size, out_height, out_width, channels}),
150 &output));
151 }
152
153 int64 batch_size;
154 int64 out_height;
155 int64 out_width;
156 int64 in_height;
157 int64 in_width;
158 int64 channels;
159 float height_scale;
160 float width_scale;
161 Tensor* output = nullptr;
162
163 private:
164 bool align_corners_;
165 bool half_pixel_centers_;
166 };
167
168 struct ImageResizerGradientState {
ImageResizerGradientStateImageResizerGradientState169 explicit ImageResizerGradientState(bool align_corners,
170 bool half_pixel_centers)
171 : align_corners_(align_corners),
172 half_pixel_centers_(half_pixel_centers) {}
173
ValidateAndCreateOutputImageResizerGradientState174 void ValidateAndCreateOutput(OpKernelContext* context) {
175 OP_REQUIRES(
176 context,
177 !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_),
178 errors::InvalidArgument("If half_pixel_centers is True, "
179 "align_corners must be False."));
180
181 const Tensor& input = context->input(0);
182 OP_REQUIRES(context, input.dims() == 4,
183 errors::InvalidArgument("input_grad must be 4-dimensional",
184 input.shape().DebugString()));
185
186 // Resizers always produce float images, so input gradient must
187 // always be a float.
188 OP_REQUIRES(context, input.dtype() == DT_FLOAT,
189 errors::InvalidArgument("input_grad must be of type float",
190 DataTypeString(input.dtype())));
191
192 batch_size = input.dim_size(0);
193 channels = input.dim_size(3);
194
195 resized_height = input.dim_size(1);
196 resized_width = input.dim_size(2);
197
198 // The following check is also carried out for the forward op. It is added
199 // here to prevent a divide-by-zero exception when either height_scale or
200 // width_scale is being calculated.
201 OP_REQUIRES(context, resized_height > 0 && resized_width > 0,
202 errors::InvalidArgument("resized dimensions must be positive"));
203
204 const TensorShape& output_shape = context->input(1).shape();
205 OP_REQUIRES(context, output_shape.dims() == 4,
206 errors::InvalidArgument("original_image must be 4-dimensional",
207 output_shape.DebugString()));
208 original_height = output_shape.dim_size(1);
209 original_width = output_shape.dim_size(2);
210
211 // The following check is also carried out for the forward op. It is added
212 // here to prevent either height_scale or width_scale from being set to
213 // zero, which would cause a divide-by-zero exception in the deterministic
214 // back-prop path.
215 OP_REQUIRES(
216 context, original_height > 0 && original_width > 0,
217 errors::InvalidArgument("original dimensions must be positive"));
218
219 OP_REQUIRES(
220 context,
221 FastBoundsCheck(original_height, std::numeric_limits<int32>::max()) &&
222 FastBoundsCheck(original_width, std::numeric_limits<int32>::max()),
223 errors::InvalidArgument(
224 "original sizes must be between 0 and max int32"));
225
226 height_scale =
227 CalculateResizeScale(original_height, resized_height, align_corners_);
228 width_scale =
229 CalculateResizeScale(original_width, resized_width, align_corners_);
230
231 OP_REQUIRES_OK(context, context->allocate_output(
232 0,
233 TensorShape({batch_size, original_height,
234 original_width, channels}),
235 &output));
236 }
237
238 int64 batch_size;
239 int64 channels;
240 int64 resized_height;
241 int64 resized_width;
242 int64 original_height;
243 int64 original_width;
244 float height_scale;
245 float width_scale;
246 Tensor* output = nullptr;
247
248 private:
249 bool align_corners_;
250 bool half_pixel_centers_;
251 };
252
253 } // namespace tensorflow
254
255 #endif // TENSORFLOW_CORE_KERNELS_UTIL_IMAGE_RESIZER_STATE_H_
256