1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
17 #include "tensorflow/core/framework/fake_input.h"
18 #include "tensorflow/core/framework/node_def_builder.h"
19 #include "tensorflow/core/framework/tensor.h"
20 #include "tensorflow/core/kernels/ops_testutil.h"
21 #include "tensorflow/core/lib/strings/str_util.h"
22 #include "tensorflow/core/platform/test.h"
23 #include "tensorflow/core/platform/test_benchmark.h"
24
25 namespace tensorflow {
26
27 class ResizeBicubicOpTest : public OpsTestBase {
28 protected:
ResizeBicubicOpTest()29 ResizeBicubicOpTest() {
30 TF_EXPECT_OK(NodeDefBuilder("resize_bicubic_op", "ResizeBicubic")
31 .Input(FakeInput(DT_FLOAT))
32 .Input(FakeInput(DT_INT32))
33 .Attr("align_corners", false)
34 .Finalize(node_def()));
35 TF_EXPECT_OK(InitOp());
36 }
37
SetRandomImageInput(const TensorShape & shape)38 const Tensor* SetRandomImageInput(const TensorShape& shape) {
39 inputs_.clear();
40
41 CHECK_EQ(shape.dims(), 4) << "All images must have 4 dimensions.";
42 bool is_ref = IsRefType(input_types_[inputs_.size()]);
43 Tensor* input = new Tensor(device_->GetAllocator(AllocatorAttributes()),
44 DataTypeToEnum<float>::v(), shape);
45 input->flat<float>().setRandom();
46 tensors_.push_back(input);
47 if (is_ref) {
48 CHECK_EQ(RemoveRefType(input_types_[inputs_.size()]),
49 DataTypeToEnum<float>::v());
50 inputs_.push_back({&lock_for_refs_, input});
51 } else {
52 CHECK_EQ(input_types_[inputs_.size()], DataTypeToEnum<float>::v());
53 inputs_.push_back({nullptr, input});
54 }
55 return input;
56 }
57
58 private:
59 static const int64 kTableSize = (1 << 10);
60
InitCoeffsTable()61 const float* InitCoeffsTable() {
62 // Allocate and initialize coefficients table using Bicubic
63 // convolution algorithm.
64 // https://en.wikipedia.org/wiki/Bicubic_interpolation
65 float* coeffs_tab = new float[(kTableSize + 1) * 2];
66 static const double A = -0.75;
67 for (int i = 0; i <= kTableSize; ++i) {
68 float x = i * 1.0 / kTableSize;
69 coeffs_tab[i * 2] = ((A + 2) * x - (A + 3)) * x * x + 1;
70 x += 1.0;
71 coeffs_tab[i * 2 + 1] = ((A * x - 5 * A) * x + 8 * A) * x - 4 * A;
72 }
73 return coeffs_tab;
74 }
75
GetCoeffsTable()76 const float* GetCoeffsTable() {
77 // Static so that we initialize it on first use
78 static const float* coeffs_tab = InitCoeffsTable();
79 return coeffs_tab;
80 }
81
82 // Used in the baseline implementation
Bound(int64 val,int64 limit)83 inline int64 Bound(int64 val, int64 limit) {
84 return std::min(limit - 1ll, std::max(int64{0}, val));
85 }
86
87 // Used in the baseline implementation
GetWeightsAndIndices(float scale,int64 out_loc,int64 limit,std::array<float,4> * weights,std::array<int64,4> * indices)88 inline void GetWeightsAndIndices(float scale, int64 out_loc, int64 limit,
89 std::array<float, 4>* weights,
90 std::array<int64, 4>* indices) {
91 const int64 in_loc = scale * out_loc;
92 const float delta = scale * out_loc - in_loc;
93 const int64 offset = lrintf(delta * kTableSize);
94 const float* coeffs_tab = GetCoeffsTable();
95 *weights = {{coeffs_tab[offset * 2 + 1], coeffs_tab[offset * 2],
96 coeffs_tab[(kTableSize - offset) * 2],
97 coeffs_tab[(kTableSize - offset) * 2 + 1]}};
98 *indices = {{Bound(in_loc - 1, limit), Bound(in_loc, limit),
99 Bound(in_loc + 1, limit), Bound(in_loc + 2, limit)}};
100 }
101
102 // Used in the baseline implementation
Interpolate1D(const std::array<float,4> & weights,const std::array<float,4> & values)103 inline float Interpolate1D(const std::array<float, 4>& weights,
104 const std::array<float, 4>& values) {
105 return values[0] * weights[0] + values[1] * weights[1] +
106 values[2] * weights[2] + values[3] * weights[3];
107 }
108
109 // This is the straight forward unoptimized implementation of resize bicubic
110 // We use this to confirm that the optimized version is exactly identical.
ResizeBicubicBaseline(TTypes<float,4>::ConstTensor images,TTypes<float,4>::Tensor output)111 void ResizeBicubicBaseline(TTypes<float, 4>::ConstTensor images,
112 TTypes<float, 4>::Tensor output) {
113 const int batch_size = images.dimension(0);
114 const int64 in_height = images.dimension(1);
115 const int64 in_width = images.dimension(2);
116 const int channels = images.dimension(3);
117
118 ASSERT_EQ(batch_size, output.dimension(0));
119 ASSERT_EQ(channels, output.dimension(3));
120
121 const int64 out_height = output.dimension(1);
122 const int64 out_width = output.dimension(2);
123
124 const float height_scale = in_height / static_cast<float>(out_height);
125 const float width_scale = in_width / static_cast<float>(out_width);
126
127 std::array<float, 4> coeff = {{0.0, 0.0, 0.0, 0.0}};
128 for (int64 b = 0; b < batch_size; ++b) {
129 for (int64 y = 0; y < out_height; ++y) {
130 std::array<float, 4> y_weights;
131 std::array<int64, 4> y_indices;
132 GetWeightsAndIndices(height_scale, y, in_height, &y_weights,
133 &y_indices);
134 for (int64 x = 0; x < out_width; ++x) {
135 std::array<float, 4> x_weights;
136 std::array<int64, 4> x_indices;
137 GetWeightsAndIndices(width_scale, x, in_width, &x_weights,
138 &x_indices);
139 for (int64 c = 0; c < channels; ++c) {
140 // Use a 4x4 patch to compute the interpolated output value at
141 // (b, y, x, c).
142 for (int64 i = 0; i < 4; ++i) {
143 const std::array<float, 4> values = {
144 {static_cast<float>(images(b, y_indices[i], x_indices[0], c)),
145 static_cast<float>(images(b, y_indices[i], x_indices[1], c)),
146 static_cast<float>(images(b, y_indices[i], x_indices[2], c)),
147 static_cast<float>(
148 images(b, y_indices[i], x_indices[3], c))}};
149 coeff[i] = Interpolate1D(x_weights, values);
150 }
151 output(b, y, x, c) = Interpolate1D(y_weights, coeff);
152 }
153 }
154 }
155 }
156 }
157
158 protected:
RunRandomTest(const int batch_size,const int64 in_height,const int64 in_width,const int target_height,const int target_width,int channels)159 void RunRandomTest(const int batch_size, const int64 in_height,
160 const int64 in_width, const int target_height,
161 const int target_width, int channels) {
162 LOG(INFO) << "Running random test " << in_height << "x" << in_width << "x"
163 << channels << " to " << target_height << "x" << target_width
164 << "x" << channels;
165 const Tensor* input = SetRandomImageInput(
166 TensorShape({batch_size, in_height, in_width, channels}));
167 AddInputFromArray<int32>(TensorShape({2}), {target_height, target_width});
168
169 TF_ASSERT_OK(RunOpKernel());
170
171 std::unique_ptr<Tensor> expected(new Tensor(
172 device_->GetAllocator(AllocatorAttributes()),
173 DataTypeToEnum<float>::v(),
174 TensorShape({batch_size, target_height, target_width, channels})));
175
176 ResizeBicubicBaseline(input->tensor<float, 4>(),
177 expected->tensor<float, 4>());
178 // Note: the baseline implementation reduces first in the x direction, and
179 // then in the y direction. The optimized version reduces first in the y
180 // direction, and then the X direction. As a result, there may be
181 // some slight floating point inaccuracies. We thus ensure we're within
182 // 0.00001 of the previous implementation.
183 test::ExpectTensorNear<float>(*expected, *GetOutput(0), 0.00001);
184 }
185
RunManyRandomTests(int channels)186 void RunManyRandomTests(int channels) {
187 for (int batch_size : {1, 2, 5}) {
188 for (int in_w : {2, 4, 7, 20, 165}) {
189 for (int in_h : {1, 3, 5, 8, 100, 233}) {
190 for (int target_height : {1, 2, 3, 50, 113}) {
191 for (int target_width : {target_height, target_height / 2 + 1}) {
192 RunRandomTest(batch_size, in_h, in_w, target_height, target_width,
193 channels);
194 }
195 }
196 }
197 }
198 }
199 }
200 };
201
TEST_F(ResizeBicubicOpTest,TestBicubic2x2To1x1)202 TEST_F(ResizeBicubicOpTest, TestBicubic2x2To1x1) {
203 // Input:
204 // 1, 2
205 // 3, 4
206 AddInputFromArray<float>(TensorShape({1, 2, 2, 1}), {1, 2, 3, 4});
207 AddInputFromArray<int32>(TensorShape({2}), {1, 1});
208 TF_ASSERT_OK(RunOpKernel());
209
210 // When scaling down, we have to arbitrarily pick a pixel from the
211 // original input. In this case, we choose the top/left most pixel.
212 Tensor expected(allocator(), DT_FLOAT, TensorShape({1, 1, 1, 1}));
213 test::FillValues<float>(&expected, {1.0});
214 test::ExpectTensorEqual<float>(expected, *GetOutput(0));
215 }
216
TEST_F(ResizeBicubicOpTest,TestBicubic2x2To0x0)217 TEST_F(ResizeBicubicOpTest, TestBicubic2x2To0x0) {
218 AddInputFromArray<float>(TensorShape({1, 2, 2, 1}), {1, 2, 3, 4});
219 AddInputFromArray<int32>(TensorShape({2}), {0, 0});
220
221 Status s = RunOpKernel();
222 EXPECT_TRUE(str_util::StrContains(
223 s.ToString(), "Invalid argument: output dimensions must be positive"))
224 << s;
225 }
226
TEST_F(ResizeBicubicOpTest,TestBicubicRandom141x186)227 TEST_F(ResizeBicubicOpTest, TestBicubicRandom141x186) {
228 RunRandomTest(2, 141, 186, 299, 299, 1 /* channels */);
229 RunRandomTest(2, 141, 186, 299, 299, 3 /* channels */);
230 }
231
TEST_F(ResizeBicubicOpTest,TestBicubicRandom183x229)232 TEST_F(ResizeBicubicOpTest, TestBicubicRandom183x229) {
233 RunRandomTest(2, 183, 229, 299, 299, 1 /* channels */);
234 RunRandomTest(2, 183, 229, 299, 299, 3 /* channels */);
235 }
236
TEST_F(ResizeBicubicOpTest,TestBicubicRandom749x603)237 TEST_F(ResizeBicubicOpTest, TestBicubicRandom749x603) {
238 RunRandomTest(2, 749, 603, 299, 299, 1 /* channels */);
239 RunRandomTest(2, 749, 603, 299, 299, 3 /* channels */);
240 }
241
TEST_F(ResizeBicubicOpTest,TestAreaRandomDataSeveralInputsSizes1Channel)242 TEST_F(ResizeBicubicOpTest, TestAreaRandomDataSeveralInputsSizes1Channel) {
243 RunManyRandomTests(1);
244 }
245
TEST_F(ResizeBicubicOpTest,TestAreaRandomDataSeveralInputsSizes3Channels)246 TEST_F(ResizeBicubicOpTest, TestAreaRandomDataSeveralInputsSizes3Channels) {
247 RunManyRandomTests(3);
248 }
249
TEST_F(ResizeBicubicOpTest,TestAreaRandomDataSeveralInputsSizes4Channels)250 TEST_F(ResizeBicubicOpTest, TestAreaRandomDataSeveralInputsSizes4Channels) {
251 RunManyRandomTests(4);
252 }
253
ResizeBicubic(int batch_size,int size,int channels,float scale_y=0.3,float scale_x=0.7)254 static Graph* ResizeBicubic(int batch_size, int size, int channels,
255 float scale_y = 0.3, float scale_x = 0.7) {
256 Graph* g = new Graph(OpRegistry::Global());
257 Tensor input(DT_FLOAT, TensorShape({batch_size, size, size, channels}));
258 input.flat<float>().setRandom();
259 Tensor shape(DT_INT32, TensorShape({2}));
260 auto shape_t = shape.flat<int32>();
261 shape_t(0) = scale_y * size;
262 shape_t(1) = scale_x * size;
263 test::graph::Binary(g, "ResizeBicubic", test::graph::Constant(g, input),
264 test::graph::Constant(g, shape));
265 return g;
266 }
267
268 #define BM_ResizeBicubicDev(BATCH, SIZE, CHANNELS) \
269 static void BM_ResizeBicubic##_##BATCH##_##SIZE##_##CHANNELS(int iters) { \
270 testing::ItemsProcessed(static_cast<int64>(iters) * BATCH * SIZE * SIZE * \
271 CHANNELS); \
272 test::Benchmark("cpu", ResizeBicubic(BATCH, SIZE, CHANNELS)).Run(iters); \
273 } \
274 BENCHMARK(BM_ResizeBicubic##_##BATCH##_##SIZE##_##CHANNELS);
275
276 BM_ResizeBicubicDev(8, 32, 3);
277 BM_ResizeBicubicDev(8, 128, 3);
278 BM_ResizeBicubicDev(8, 512, 3);
279 BM_ResizeBicubicDev(8, 1024, 3);
280 BM_ResizeBicubicDev(16, 32, 3);
281 BM_ResizeBicubicDev(16, 128, 3);
282 BM_ResizeBicubicDev(16, 512, 3);
283 BM_ResizeBicubicDev(16, 1024, 3);
284 BM_ResizeBicubicDev(32, 32, 3);
285 BM_ResizeBicubicDev(32, 128, 3);
286 BM_ResizeBicubicDev(32, 512, 3);
287 BM_ResizeBicubicDev(32, 1024, 3);
288
289 #define BM_ResizeBicubicExpand(BATCH, SIZE, CHANNELS) \
290 static void BM_ResizeBicubicExpand##_##BATCH##_##SIZE##_##CHANNELS( \
291 int iters) { \
292 testing::ItemsProcessed(static_cast<int64>(iters) * BATCH * SIZE * SIZE * \
293 CHANNELS * 8 * 8); \
294 test::Benchmark("cpu", ResizeBicubic(BATCH, SIZE, CHANNELS, 8, 8)) \
295 .Run(iters); \
296 } \
297 BENCHMARK(BM_ResizeBicubicExpand##_##BATCH##_##SIZE##_##CHANNELS);
298
299 BM_ResizeBicubicExpand(12, 48, 1);
300 BM_ResizeBicubicExpand(12, 48, 3);
301 BM_ResizeBicubicExpand(12, 48, 40);
302
303 } // end namespace tensorflow
304