1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // This is a helper struct to package up the input and output
17 // parameters of an image resizer (the height, widths, etc.). To
18 // reduce code duplication and ensure consistency across the different
19 // resizers, it performs the input validation.
20
21 #ifndef TENSORFLOW_CORE_KERNELS_UTIL_IMAGE_RESIZER_STATE_H_
22 #define TENSORFLOW_CORE_KERNELS_UTIL_IMAGE_RESIZER_STATE_H_
23
24 #define EIGEN_USE_THREADS
25 #include <math.h>
26
27 #include <algorithm>
28 #include <array>
29
30 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
31 #include "tensorflow/core/framework/bounds_check.h"
32 #include "tensorflow/core/framework/op_kernel.h"
33 #include "tensorflow/core/framework/register_types.h"
34 #include "tensorflow/core/framework/tensor.h"
35 #include "tensorflow/core/framework/tensor_shape.h"
36 #include "tensorflow/core/framework/types.h"
37
38 namespace tensorflow {
39
40 // CalculateResizeScale determines the float scaling factor.
CalculateResizeScale(int64 in_size,int64 out_size,bool align_corners)41 inline float CalculateResizeScale(int64 in_size, int64 out_size,
42 bool align_corners) {
43 return (align_corners && out_size > 1)
44 ? (in_size - 1) / static_cast<float>(out_size - 1)
45 : in_size / static_cast<float>(out_size);
46 }
47
48 // Half pixel scaler scales assuming that the pixel centers are at 0.5, i.e. the
49 // floating point coordinates of the top,left pixel is 0.5,0.5.
50 struct HalfPixelScaler {
HalfPixelScalerHalfPixelScaler51 HalfPixelScaler(){};
operatorHalfPixelScaler52 inline float operator()(const int x, const float scale) const {
53 // Note that we subtract 0.5 from the return value, as the existing bilinear
54 // sampling code etc assumes pixels are in the old coordinate system.
55 return (static_cast<float>(x) + 0.5f) * scale - 0.5f;
56 }
57 };
58
59 // Older incorrect scaling method that causes all resizes to have a slight
60 // translation leading to inconsistent results. For example, a flip then a
61 // resize gives different results then a resize then a flip.
62 struct LegacyScaler {
LegacyScalerLegacyScaler63 LegacyScaler(){};
operatorLegacyScaler64 inline float operator()(const int x, const float scale) const {
65 return static_cast<float>(x) * scale;
66 }
67 };
68
69 struct ImageResizerState {
ImageResizerStateImageResizerState70 explicit ImageResizerState(bool align_corners, bool half_pixel_centers)
71 : align_corners_(align_corners),
72 half_pixel_centers_(half_pixel_centers) {}
73
74 // ValidateAndCalculateOutputSize checks the bounds on the input tensors
75 // and requested size, sets up some of the resizing state such as the
76 // height_scale and width_scale, and calculates the output size.
77 // If any of these operations fails, it sets an error status in
78 // the context, which the caller must check.
ValidateAndCalculateOutputSizeImageResizerState79 void ValidateAndCalculateOutputSize(OpKernelContext* context,
80 const Tensor& input) {
81 OP_REQUIRES(
82 context,
83 !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_),
84 errors::InvalidArgument("If half_pixel_centers is True, "
85 "align_corners must be False."));
86 OP_REQUIRES(context, input.dims() == 4,
87 errors::InvalidArgument("input must be 4-dimensional",
88 input.shape().DebugString()));
89 const Tensor& shape_t = context->input(1);
90 OP_REQUIRES(context, shape_t.dims() == 1,
91 errors::InvalidArgument("shape_t must be 1-dimensional",
92 shape_t.shape().DebugString()));
93 OP_REQUIRES(context, shape_t.NumElements() == 2,
94 errors::InvalidArgument("shape_t must have two elements",
95 shape_t.shape().DebugString()));
96 auto Svec = shape_t.vec<int32>();
97 batch_size = input.dim_size(0);
98 out_height = internal::SubtleMustCopy(Svec(0));
99 out_width = internal::SubtleMustCopy(Svec(1));
100 OP_REQUIRES(
101 context,
102 FastBoundsCheck(input.dim_size(1), std::numeric_limits<int32>::max()) &&
103 FastBoundsCheck(input.dim_size(2),
104 std::numeric_limits<int32>::max()),
105 errors::InvalidArgument("input sizes must be between 0 and max int32"));
106
107 in_height = static_cast<int32>(input.dim_size(1));
108 in_width = static_cast<int32>(input.dim_size(2));
109 channels = input.dim_size(3);
110 OP_REQUIRES(context, out_height > 0 && out_width > 0,
111 errors::InvalidArgument("output dimensions must be positive"));
112 OP_REQUIRES(
113 context, channels > 0,
114 errors::InvalidArgument("image must have at least one channel"));
115 OP_REQUIRES(
116 context, input.dim_size(1) > 0 && input.dim_size(2) > 0,
117 errors::InvalidArgument("input image must be of non-zero size"));
118 height_scale = CalculateResizeScale(in_height, out_height, align_corners_);
119 width_scale = CalculateResizeScale(in_width, out_width, align_corners_);
120
121 // Guard against overflows
122 OP_REQUIRES(context,
123 ceilf((out_height - 1) * height_scale) <=
124 static_cast<float>(std::numeric_limits<int64>::max()),
125 errors::InvalidArgument(
126 "input image height scale would cause an overflow"));
127 OP_REQUIRES(
128 context,
129 ceilf((out_width - 1) * width_scale) <= static_cast<float>(INT_MAX),
130 errors::InvalidArgument(
131 "input image width scale would cause an overflow"));
132 }
133
134 // Calculates all the required variables, and allocates the output.
ValidateAndCreateOutputImageResizerState135 void ValidateAndCreateOutput(OpKernelContext* context, const Tensor& input) {
136 ValidateAndCalculateOutputSize(context, input);
137 if (!context->status().ok()) return;
138 OP_REQUIRES_OK(context, context->allocate_output(
139 0,
140 TensorShape({input.dim_size(0), out_height,
141 out_width, input.dim_size(3)}),
142 &output));
143 }
144
145 int64 batch_size;
146 int64 out_height;
147 int64 out_width;
148 int64 in_height;
149 int64 in_width;
150 int64 channels;
151 float height_scale;
152 float width_scale;
153 Tensor* output = nullptr;
154
155 private:
156 bool align_corners_;
157 bool half_pixel_centers_;
158 };
159
160 struct ImageResizerGradientState {
ImageResizerGradientStateImageResizerGradientState161 explicit ImageResizerGradientState(bool align_corners,
162 bool half_pixel_centers)
163 : align_corners_(align_corners),
164 half_pixel_centers_(half_pixel_centers) {}
165
ValidateAndCreateOutputImageResizerGradientState166 void ValidateAndCreateOutput(OpKernelContext* context, const Tensor& input,
167 const Tensor& original_image) {
168 OP_REQUIRES(
169 context,
170 !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_),
171 errors::InvalidArgument("If half_pixel_centers is True, "
172 "align_corners must be False."));
173
174 OP_REQUIRES(context, input.dims() == 4,
175 errors::InvalidArgument("input_grad must be 4-dimensional",
176 input.shape().DebugString()));
177 // Resizers always produce float images, so input gradient must
178 // always be a float.
179 OP_REQUIRES(context, input.dtype() == DT_FLOAT,
180 errors::InvalidArgument("input_grad must be of type float",
181 DataTypeString(input.dtype())));
182
183 OP_REQUIRES(context, original_image.dims() == 4,
184 errors::InvalidArgument("original_image must be 4-dimensional",
185 original_image.shape().DebugString()));
186
187 // Allocate output and initialize to zeros.
188 batch_size = input.dim_size(0);
189 channels = input.dim_size(3);
190 resized_height = input.dim_size(1);
191 resized_width = input.dim_size(2);
192 original_height = original_image.dim_size(1);
193 original_width = original_image.dim_size(2);
194
195 // The following check is also carried out for the forward op. It is added
196 // here to prevent a divide-by-zero exception when either height_scale or
197 // width_scale is being calculated.
198 OP_REQUIRES(context, resized_height > 0 && resized_width > 0,
199 errors::InvalidArgument("resized dimensions must be positive"));
200
201 // The following check is also carried out for the forward op. It is added
202 // here to prevent either height_scale or width_scale from being set to
203 // zero, which would cause a divide-by-zero exception in the deterministic
204 // back-prop path.
205 OP_REQUIRES(
206 context, original_height > 0 && original_width > 0,
207 errors::InvalidArgument("original dimensions must be positive"));
208
209 OP_REQUIRES(
210 context,
211 FastBoundsCheck(original_height, std::numeric_limits<int32>::max()) &&
212 FastBoundsCheck(original_width, std::numeric_limits<int32>::max()),
213 errors::InvalidArgument(
214 "original sizes must be between 0 and max int32"));
215
216 height_scale =
217 CalculateResizeScale(original_height, resized_height, align_corners_);
218 width_scale =
219 CalculateResizeScale(original_width, resized_width, align_corners_);
220 output = nullptr;
221 OP_REQUIRES_OK(context, context->allocate_output(
222 0,
223 TensorShape({batch_size, original_height,
224 original_width, channels}),
225 &output));
226 }
227
228 int64 batch_size;
229 int64 channels;
230 int64 resized_height;
231 int64 resized_width;
232 int64 original_height;
233 int64 original_width;
234 float height_scale;
235 float width_scale;
236 Tensor* output;
237
238 private:
239 bool align_corners_;
240 bool half_pixel_centers_;
241 };
242
243 } // namespace tensorflow
244
245 #endif // TENSORFLOW_CORE_KERNELS_UTIL_IMAGE_RESIZER_STATE_H_
246