1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include <cmath>
17
18 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
19 #include "tensorflow/core/framework/fake_input.h"
20 #include "tensorflow/core/framework/node_def_builder.h"
21 #include "tensorflow/core/framework/tensor.h"
22 #include "tensorflow/core/kernels/ops_testutil.h"
23 #include "tensorflow/core/lib/core/status_test_util.h"
24 #include "tensorflow/core/platform/test.h"
25 #include "tensorflow/core/platform/test_benchmark.h"
26
27 namespace tensorflow {
28
29 class ResizeAreaOpTest : public OpsTestBase {
30 protected:
ResizeAreaOpTest()31 ResizeAreaOpTest() {
32 TF_EXPECT_OK(NodeDefBuilder("resize_area_op", "ResizeArea")
33 .Input(FakeInput(DT_FLOAT))
34 .Input(FakeInput(DT_INT32))
35 .Attr("align_corners", false)
36 .Finalize(node_def()));
37 TF_EXPECT_OK(InitOp());
38 }
39
SetRandomImageInput(const TensorShape & shape)40 const Tensor* SetRandomImageInput(const TensorShape& shape) {
41 inputs_.clear();
42
43 CHECK_EQ(shape.dims(), 4) << "All images must have 4 dimensions.";
44 bool is_ref = IsRefType(input_types_[inputs_.size()]);
45 Tensor* input = new Tensor(device_->GetAllocator(AllocatorAttributes()),
46 DataTypeToEnum<float>::v(), shape);
47 input->flat<float>().setRandom();
48 tensors_.push_back(input);
49 if (is_ref) {
50 CHECK_EQ(RemoveRefType(input_types_[inputs_.size()]),
51 DataTypeToEnum<float>::v());
52 inputs_.push_back({&lock_for_refs_, input});
53 } else {
54 CHECK_EQ(input_types_[inputs_.size()], DataTypeToEnum<float>::v());
55 inputs_.push_back({nullptr, input});
56 }
57 return input;
58 }
59
60 private:
61 // This is the unoptimized implementation of ResizeArea.
62 // We use this to confirm that the optimized version is exactly identical.
ResizeAreaBaseline(TTypes<float,4>::ConstTensor input_data,TTypes<float,4>::Tensor output_data)63 void ResizeAreaBaseline(TTypes<float, 4>::ConstTensor input_data,
64 TTypes<float, 4>::Tensor output_data) {
65 const int batch_size = input_data.dimension(0);
66 const int64 in_height = input_data.dimension(1);
67 const int64 in_width = input_data.dimension(2);
68 const int channels = input_data.dimension(3);
69
70 ASSERT_EQ(batch_size, output_data.dimension(0));
71 ASSERT_EQ(channels, output_data.dimension(3));
72
73 const int64 out_height = output_data.dimension(1);
74 const int64 out_width = output_data.dimension(2);
75
76 const float height_scale = in_height / static_cast<float>(out_height);
77 const float width_scale = in_width / static_cast<float>(out_width);
78
79 // A temporary tensor for computing the sum.
80 Tensor sum_tensor(DT_FLOAT, TensorShape({channels}));
81 typename TTypes<float, 1>::Tensor sum_data = sum_tensor.vec<float>();
82
83 // When using this algorithm for downsizing, the target pixel value is the
84 // weighted average of all the source pixels. The weight is determined by
85 // the contribution percentage of the source pixel.
86 //
87 // Let "scale" be "target_image_size/source_image_size". If 1/n of the
88 // source pixel contributes to the target pixel, then the weight is (1/n *
89 // scale); if the complete source pixel contributes to the target pixel,
90 // then the weight is scale.
91 //
92 // To visualize the implementation, use one dimension as an example:
93 // Resize in[4] to out[3].
94 // scale = 3/4 = 0.75
95 // out[0]: in[0] and 1/3 of in[1]
96 // out[1]: 2/3 of in[1] and 2/3 of in[2]
97 // out[2]: 1/3 of in[2] and in[1]
98 // Hence, the output pixel values are:
99 // out[0] = (in[0] * 1.0 + in[1] * 1/3) * scale
100 // out[1] = (in[1] * 2/3 + in[2] * 2/3 * scale
101 // out[2] = (in[3] * 1/3 + in[3] * 1.0) * scale
102 float scale = 1.0 / (height_scale * width_scale);
103 for (int64 b = 0; b < batch_size; ++b) {
104 for (int64 y = 0; y < out_height; ++y) {
105 const float in_y = y * height_scale;
106 const float in_y1 = (y + 1) * height_scale;
107 // The start and end height indices of all the cells that could
108 // contribute to the target cell.
109 int64 y_start = std::floor(in_y);
110 int64 y_end = std::ceil(in_y1);
111
112 for (int64 x = 0; x < out_width; ++x) {
113 const float in_x = x * width_scale;
114 const float in_x1 = (x + 1) * width_scale;
115 // The start and end width indices of all the cells that could
116 // contribute to the target cell.
117 int64 x_start = std::floor(in_x);
118 int64 x_end = std::ceil(in_x1);
119
120 sum_data.setConstant(0.0);
121 for (int64 i = y_start; i < y_end; ++i) {
122 float scale_y = i < in_y
123 ? (i + 1 > in_y1 ? height_scale : i + 1 - in_y)
124 : (i + 1 > in_y1 ? in_y1 - i : 1.0);
125 for (int64 j = x_start; j < x_end; ++j) {
126 float scale_x = j < in_x
127 ? (j + 1 > in_x1 ? width_scale : j + 1 - in_x)
128 : (j + 1 > in_x1 ? in_x1 - j : 1.0);
129 for (int64 c = 0; c < channels; ++c) {
130 #define BOUND(val, limit) \
131 std::min(((limit)-int64{1}), (std::max(int64{0}, (val))))
132 sum_data(c) +=
133 static_cast<float>(input_data(b, BOUND(i, in_height),
134 BOUND(j, in_width), c)) *
135 scale_y * scale_x * scale;
136 #undef BOUND
137 }
138 }
139 }
140 for (int64 c = 0; c < channels; ++c) {
141 output_data(b, y, x, c) = sum_data(c);
142 }
143 }
144 }
145 }
146 }
147
148 protected:
RunRandomTest(int in_height,int in_width,int target_height,int target_width,int channels)149 void RunRandomTest(int in_height, int in_width, int target_height,
150 int target_width, int channels) {
151 const Tensor* input =
152 SetRandomImageInput(TensorShape({1, in_height, in_width, channels}));
153 AddInputFromArray<int32>(TensorShape({2}), {target_height, target_width});
154
155 TF_ASSERT_OK(RunOpKernel());
156 std::unique_ptr<Tensor> expected(
157 new Tensor(device_->GetAllocator(AllocatorAttributes()),
158 DataTypeToEnum<float>::v(),
159 TensorShape({1, target_height, target_width, channels})));
160 ResizeAreaBaseline(input->tensor<float, 4>(), expected->tensor<float, 4>());
161 test::ExpectTensorNear<float>(*expected, *GetOutput(0), 0.00001);
162 }
163
RunManyRandomTests(int channels)164 void RunManyRandomTests(int channels) {
165 for (int in_w : {2, 4, 7, 20, 165}) {
166 for (int in_h : {1, 3, 5, 8, 100, 233}) {
167 for (int target_height : {1, 2, 3, 50, 113}) {
168 for (int target_width : {target_height, target_height / 2 + 1}) {
169 RunRandomTest(in_h, in_w, target_height, target_width, channels);
170 }
171 }
172 }
173 }
174 }
175 };
176
TEST_F(ResizeAreaOpTest,TestAreaRandom141x186)177 TEST_F(ResizeAreaOpTest, TestAreaRandom141x186) {
178 RunRandomTest(141, 186, 299, 299, 3 /* channels */);
179 }
180
TEST_F(ResizeAreaOpTest,TestAreaRandom183x229)181 TEST_F(ResizeAreaOpTest, TestAreaRandom183x229) {
182 RunRandomTest(183, 229, 299, 299, 3 /* channels */);
183 }
184
TEST_F(ResizeAreaOpTest,TestAreaRandom749x603)185 TEST_F(ResizeAreaOpTest, TestAreaRandom749x603) {
186 RunRandomTest(749, 603, 299, 299, 3 /* channels */);
187 }
188
TEST_F(ResizeAreaOpTest,TestAreaRandomDataSeveralInputsSizes1Channel)189 TEST_F(ResizeAreaOpTest, TestAreaRandomDataSeveralInputsSizes1Channel) {
190 RunManyRandomTests(1);
191 }
192
TEST_F(ResizeAreaOpTest,TestAreaRandomDataSeveralInputsSizes3Channels)193 TEST_F(ResizeAreaOpTest, TestAreaRandomDataSeveralInputsSizes3Channels) {
194 RunManyRandomTests(3);
195 }
196
TEST_F(ResizeAreaOpTest,TestAreaRandomDataSeveralInputsSizes4Channels)197 TEST_F(ResizeAreaOpTest, TestAreaRandomDataSeveralInputsSizes4Channels) {
198 RunManyRandomTests(4);
199 }
200
201 } // namespace tensorflow
202