1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/framework/allocator.h"
17 #include "tensorflow/core/framework/fake_input.h"
18 #include "tensorflow/core/framework/node_def_builder.h"
19 #include "tensorflow/core/framework/op_kernel.h"
20 #include "tensorflow/core/framework/tensor.h"
21 #include "tensorflow/core/framework/tensor_testutil.h"
22 #include "tensorflow/core/framework/types.h"
23 #include "tensorflow/core/framework/types.pb.h"
24 #include "tensorflow/core/kernels/image/sampling_kernels.h"
25 #include "tensorflow/core/kernels/ops_testutil.h"
26 #include "tensorflow/core/kernels/ops_util.h"
27 #include "tensorflow/core/lib/core/status_test_util.h"
28 #include "tensorflow/core/lib/random/random.h"
29 #include "tensorflow/core/lib/random/simple_philox.h"
30 #include "tensorflow/core/lib/strings/str_util.h"
31 #include "tensorflow/core/platform/test.h"
32 #include "tensorflow/core/public/session.h"
33 #include "tensorflow/core/util/ptr_util.h"
34
35 namespace tensorflow {
36 using Eigen::Vector2f;
37
38 class DynamicKernel {
39 public:
~DynamicKernel()40 virtual ~DynamicKernel() {}
41 virtual float Value(const float x) const = 0;
42 virtual float Radius() const = 0;
43 };
44
45 // Wraps a sampling kernel in a common interface.
46 template <typename KernelType>
47 class TypedDynamicKernel : public DynamicKernel {
48 public:
TypedDynamicKernel(const KernelType & kernel)49 explicit TypedDynamicKernel(const KernelType& kernel) : kernel_(kernel) {}
Value(const float x) const50 float Value(const float x) const override { return kernel_(x); }
Radius() const51 float Radius() const override { return kernel_.Radius(); }
52 const KernelType kernel_;
53 };
54
55 template <typename KernelType>
CreateKernel(const KernelType & kernel)56 std::unique_ptr<const DynamicKernel> CreateKernel(const KernelType& kernel) {
57 return MakeUnique<TypedDynamicKernel<KernelType>>(kernel);
58 }
59
Create(functor::SamplingKernelType kernel_type)60 std::unique_ptr<const DynamicKernel> Create(
61 functor::SamplingKernelType kernel_type) {
62 switch (kernel_type) {
63 case functor::Lanczos1Kernel:
64 return CreateKernel(functor::CreateLanczos1Kernel());
65 case functor::Lanczos3Kernel:
66 return CreateKernel(functor::CreateLanczos3Kernel());
67 case functor::Lanczos5Kernel:
68 return CreateKernel(functor::CreateLanczos5Kernel());
69 case functor::GaussianKernel:
70 return CreateKernel(functor::CreateGaussianKernel());
71 case functor::BoxKernel:
72 return CreateKernel(functor::CreateBoxKernel());
73 case functor::TriangleKernel:
74 return CreateKernel(functor::CreateTriangleKernel());
75 case functor::KeysCubicKernel:
76 return CreateKernel(functor::CreateKeysCubicKernel());
77 case functor::MitchellCubicKernel:
78 return CreateKernel(functor::CreateMitchellCubicKernel());
79 default:
80 LOG(FATAL) << "Unknown kernel type.";
81 return nullptr;
82 }
83 }
84
85 template <typename T>
Clamp(const T & low,const T & high,const T & value)86 inline const T& Clamp(const T& low, const T& high, const T& value) {
87 return std::min(high, std::max(low, value));
88 }
89
90 // Samples from the image at the passed batch at pixel location sample_f with a
91 // kernel scaled by scale.
Sample(const DynamicKernel & kernel,const bool antialias,TTypes<float,4>::Tensor images,const int batch,const Vector2f & scale,const Vector2f & sample_f,float * dest)92 void Sample(const DynamicKernel& kernel, const bool antialias,
93 TTypes<float, 4>::Tensor images, const int batch,
94 const Vector2f& scale, const Vector2f& sample_f, float* dest) {
95 const Vector2f kernel_scale(antialias ? std::max(scale.x(), 1.0f) : 1.0,
96 antialias ? std::max(scale.y(), 1.0f) : 1.0);
97
98 const int64 in_height = images.dimension(1);
99 const int64 in_width = images.dimension(2);
100 const int channels = images.dimension(3);
101 const int64 y_span_start = Clamp(
102 static_cast<int64>(0), in_height - 1,
103 static_cast<int64>(
104 std::ceil(sample_f.y() - kernel.Radius() * kernel_scale.y() - 0.5f)));
105 const int64 y_span_end =
106 Clamp(static_cast<int64>(0), in_height - 1,
107 static_cast<int64>(std::floor(
108 sample_f.y() + kernel.Radius() * kernel_scale.y() - 0.5f))) +
109 1;
110 const int64 x_span_start = Clamp(
111 static_cast<int64>(0), in_width - 1,
112 static_cast<int64>(
113 std::ceil(sample_f.x() - kernel.Radius() * kernel_scale.x() - 0.5f)));
114
115 const int64 x_span_end =
116 Clamp(static_cast<int64>(0), in_width - 1,
117 static_cast<int64>(std::floor(
118 sample_f.x() + kernel.Radius() * kernel_scale.x() - 0.5f))) +
119 1;
120
121 std::fill(dest, dest + channels, 0.0f);
122 if (sample_f.x() < 0.0f || sample_f.y() < 0.0f || sample_f.x() > in_width ||
123 sample_f.y() > in_height) {
124 return;
125 }
126 const Vector2f one_over_kernel_scale(1.0f / kernel_scale.x(),
127 1.0f / kernel_scale.y());
128 float total_weight = 0.0f;
129 for (int64 y = y_span_start; y < y_span_end; ++y) {
130 float y_kernel_pos = static_cast<float>(y) + 0.5f - sample_f.y();
131 float y_weight = kernel.Value(y_kernel_pos * one_over_kernel_scale.y());
132 for (int64 x = x_span_start; x < x_span_end; ++x) {
133 float x_kernel_pos = static_cast<float>(x) + 0.5f - sample_f.x();
134 float x_weight = kernel.Value(x_kernel_pos * one_over_kernel_scale.x());
135 float kernel_weight = y_weight * x_weight;
136 total_weight += kernel_weight;
137 for (int c = 0; c < channels; ++c) {
138 dest[c] += static_cast<float>(images(batch, y, x, c)) * kernel_weight;
139 }
140 }
141 }
142 if (std::abs(total_weight) >= 1000.0f * std::numeric_limits<float>::min()) {
143 CHECK_NE(total_weight, 0.0f) << y_span_start << "," << y_span_end << " "
144 << x_span_start << "," << x_span_end;
145 for (int c = 0; c < channels; ++c) {
146 dest[c] /= total_weight;
147 }
148 }
149 }
150
151 // This is the straight forward unoptimized implementation of ScaleAndTranslate
152 // We use this to confirm that the optimized version is almost identical. The
153 // only difference will be small floating point differences, since this version
154 // does not to separable passes in x and y dimensions.
ScaleAndTranslateBaseline(const DynamicKernel & kernel,const bool antialias,TTypes<float,4>::Tensor images,const Vector2f & orig_scale,const Vector2f & orig_translate,TTypes<float,4>::Tensor output)155 void ScaleAndTranslateBaseline(const DynamicKernel& kernel,
156 const bool antialias,
157 TTypes<float, 4>::Tensor images,
158 const Vector2f& orig_scale,
159 const Vector2f& orig_translate,
160 TTypes<float, 4>::Tensor output) {
161 const Vector2f scale(1.0f / orig_scale[0], 1.0f / orig_scale[1]);
162 const Vector2f translate(-orig_translate[0] / orig_scale[0],
163 -orig_translate[1] / orig_scale[1]);
164
165 const int batch = images.dimension(0);
166 const int channels = images.dimension(3);
167
168 ASSERT_EQ(batch, output.dimension(0));
169 ASSERT_EQ(channels, output.dimension(3));
170
171 const int64 out_height = output.dimension(1);
172 const int64 out_width = output.dimension(2);
173 const int64 in_height = images.dimension(1);
174 const int64 in_width = images.dimension(2);
175
176 for (int b = 0; b < batch; ++b) {
177 for (int64 y = 0; y < out_height; ++y) {
178 const float out_y_f = static_cast<float>(y) + 0.5;
179 const float in_y_f = out_y_f * scale.y() + translate.y();
180 for (int64 x = 0; x < out_width; ++x) {
181 const float out_x_f = static_cast<float>(x) + 0.5;
182 const float in_x_f = out_x_f * scale.x() + translate.x();
183 if (in_x_f < 0.0f || in_y_f < 0.0f || in_x_f > in_width ||
184 in_y_f > in_height) {
185 std::fill(&output(b, y, x, 0), &output(b, y, x + 1, 0), 0.0f);
186 } else {
187 Sample(kernel, antialias, images, b, scale, Vector2f(in_x_f, in_y_f),
188 &output(b, y, x, 0));
189 }
190 }
191 }
192 }
193 }
194
195 class ScaleAndTranslateOpTest : public OpsTestBase {
196 protected:
CreateOp(const string & kernel_type_str,const bool antialias)197 void CreateOp(const string& kernel_type_str, const bool antialias) {
198 TF_EXPECT_OK(NodeDefBuilder("scale_and_translate_op", "ScaleAndTranslate")
199 .Input(FakeInput(DT_FLOAT))
200 .Input(FakeInput(DT_INT32))
201 .Input(FakeInput(DT_FLOAT))
202 .Input(FakeInput(DT_FLOAT))
203 .Attr("kernel_type", kernel_type_str)
204 .Attr("antialias", antialias)
205 .Finalize(node_def()));
206 TF_EXPECT_OK(InitOp());
207 kernel_type_ = functor::SamplingKernelTypeFromString(kernel_type_str);
208 antialias_ = antialias;
209 }
210
SetCheckerboardImageInput(int batch_size,int num_row_squares,int num_col_squares,int square_size,int num_channels)211 void SetCheckerboardImageInput(int batch_size, int num_row_squares,
212 int num_col_squares, int square_size,
213 int num_channels) {
214 inputs_.clear();
215 std::vector<float> data;
216 const int64 row_size = num_col_squares * square_size * num_channels;
217 const int64 image_size = num_row_squares * square_size * row_size;
218 data.resize(batch_size * image_size);
219 random::PhiloxRandom philox(42);
220 random::SimplePhilox rnd(&philox);
221 std::vector<float> col(num_channels);
222 for (int b = 0; b < batch_size; ++b) {
223 for (int y = 0; y < num_row_squares; ++y) {
224 for (int x = 0; x < num_col_squares; ++x) {
225 for (int n = 0; n < num_channels; ++n) {
226 col[n] = rnd.RandFloat();
227 }
228 for (int r = y * square_size; r < (y + 1) * square_size; ++r) {
229 auto it = data.begin() + b * image_size + r * row_size +
230 x * square_size * num_channels;
231 for (int n = 0; n < square_size; ++n) {
232 for (int chan = 0; chan < num_channels; ++chan, ++it) {
233 *it = col[chan] * 255.0;
234 }
235 }
236 }
237 }
238 }
239 }
240 AddInputFromArray<float>(
241 TensorShape({batch_size, num_row_squares * square_size,
242 num_col_squares * square_size, num_channels}),
243 data);
244 }
245
RunTest(int output_image_height,int output_image_width,const Vector2f & scale,const Vector2f & translate)246 void RunTest(int output_image_height, int output_image_width,
247 const Vector2f& scale, const Vector2f& translate) {
248 AddInputFromArray<int32>(TensorShape({2}),
249 {output_image_height, output_image_width});
250 AddInputFromArray<float>(TensorShape({2}), {scale[1], scale[0]});
251 AddInputFromArray<float>(TensorShape({2}), {translate[1], translate[0]});
252 Status s = RunOpKernel();
253 const int batch_size = GetOutput(0)->dim_size(0);
254 const int channels = GetOutput(0)->dim_size(3);
255 Tensor expected(allocator(), DT_FLOAT,
256 TensorShape({batch_size, output_image_height,
257 output_image_width, channels}));
258
259 std::unique_ptr<const DynamicKernel> kernel = Create(kernel_type_);
260 ScaleAndTranslateBaseline(*kernel, antialias_,
261 mutable_input(0)->tensor<float, 4>(), scale,
262 translate, expected.tensor<float, 4>());
263 constexpr double kAbs = 1e-2f;
264 test::ExpectTensorNear<float>(expected, *GetOutput(0), kAbs);
265 }
266
267 functor::SamplingKernelType kernel_type_;
268 bool antialias_;
269 };
270
TEST_F(ScaleAndTranslateOpTest,IdentityTest)271 TEST_F(ScaleAndTranslateOpTest, IdentityTest) {
272 CreateOp("lanczos3", true);
273 constexpr int64 kBatchSize = 2;
274 constexpr int64 kNumRowSquares = 16;
275 constexpr int64 kNumColSquares = 13;
276 constexpr int64 kSquareSize = 12;
277 constexpr int64 kNumChannels = 3;
278 SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
279 kSquareSize, kNumChannels);
280 constexpr int kOutputImageHeight = kNumRowSquares * kSquareSize;
281 constexpr int kOutputImageWidth = kNumColSquares * kSquareSize;
282 const Vector2f kScale(1.0f, 1.0f);
283 const Vector2f kTranslate(0.0f, 0.0f);
284 RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
285 }
286
TEST_F(ScaleAndTranslateOpTest,UpsampleTest)287 TEST_F(ScaleAndTranslateOpTest, UpsampleTest) {
288 CreateOp("lanczos3", true);
289 constexpr int64 kBatchSize = 2;
290 constexpr int64 kNumRowSquares = 16;
291 constexpr int64 kNumColSquares = 13;
292 constexpr int64 kSquareSize = 12;
293 constexpr int64 kNumChannels = 3;
294 SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
295 kSquareSize, kNumChannels);
296 constexpr int kOutputImageHeight = kNumRowSquares * kSquareSize * 2;
297 constexpr int kOutputImageWidth = kNumColSquares * kSquareSize * 2;
298 const Vector2f kScale(2.0f, 2.0f);
299 const Vector2f kTranslate(0.0f, 0.0f);
300 RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
301 }
302
TEST_F(ScaleAndTranslateOpTest,DownsampleTest)303 TEST_F(ScaleAndTranslateOpTest, DownsampleTest) {
304 CreateOp("lanczos3", true);
305 constexpr int64 kBatchSize = 2;
306 constexpr int64 kNumRowSquares = 16;
307 constexpr int64 kNumColSquares = 13;
308 constexpr int64 kSquareSize = 12;
309 constexpr int64 kNumChannels = 3;
310 SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
311 kSquareSize, kNumChannels);
312 constexpr int kOutputImageHeight = kNumRowSquares * kSquareSize / 2;
313 constexpr int kOutputImageWidth = kNumColSquares * kSquareSize / 2;
314 const Vector2f kScale(0.5f, 0.5f);
315 const Vector2f kTranslate(0.0f, 0.0f);
316 RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
317 }
318
TEST_F(ScaleAndTranslateOpTest,AntiAliasedDownsampleToASinglePixelTest)319 TEST_F(ScaleAndTranslateOpTest, AntiAliasedDownsampleToASinglePixelTest) {
320 CreateOp("lanczos3", true);
321 constexpr int64 kBatchSize = 2;
322 constexpr int64 kNumRowSquares = 16;
323 constexpr int64 kNumColSquares = 13;
324 constexpr int64 kSquareSize = 12;
325 constexpr int64 kNumChannels = 3;
326 SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
327 kSquareSize, kNumChannels);
328 constexpr int kOutputImageHeight = 1;
329 constexpr int kOutputImageWidth = 1;
330 const Vector2f kScale(1.0f / (kNumRowSquares * kSquareSize),
331 1.0f / (kNumColSquares * kSquareSize));
332 const Vector2f kTranslate(0.0f, 0.0f);
333 RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
334 }
335
TEST_F(ScaleAndTranslateOpTest,NonAntiAliasedDownsampleToASinglePixelTest)336 TEST_F(ScaleAndTranslateOpTest, NonAntiAliasedDownsampleToASinglePixelTest) {
337 CreateOp("lanczos3", false);
338 constexpr int64 kBatchSize = 2;
339 constexpr int64 kNumRowSquares = 16;
340 constexpr int64 kNumColSquares = 13;
341 constexpr int64 kSquareSize = 12;
342 constexpr int64 kNumChannels = 3;
343 SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
344 kSquareSize, kNumChannels);
345 constexpr int kOutputImageHeight = 1;
346 constexpr int kOutputImageWidth = 1;
347 const Vector2f kScale(1.0f / (kNumRowSquares * kSquareSize),
348 1.0f / (kNumColSquares * kSquareSize));
349 const Vector2f kTranslate(0.0f, 0.0f);
350 RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
351 }
352
TEST_F(ScaleAndTranslateOpTest,UsampleFromASinglePixelTest)353 TEST_F(ScaleAndTranslateOpTest, UsampleFromASinglePixelTest) {
354 CreateOp("lanczos3", true);
355 constexpr int64 kBatchSize = 2;
356 constexpr int64 kNumRowSquares = 1;
357 constexpr int64 kNumColSquares = 1;
358 constexpr int64 kSquareSize = 1;
359 constexpr int64 kNumChannels = 3;
360 SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
361 kSquareSize, kNumChannels);
362 constexpr int kOutputImageHeight = 10;
363 constexpr int kOutputImageWidth = 17;
364 const Vector2f kScale(17.0f, 10.0f);
365 const Vector2f kTranslate(0.0f, 0.0f);
366 RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
367 }
368
TEST_F(ScaleAndTranslateOpTest,NonAntialiasedUsampleFromASinglePixelTest)369 TEST_F(ScaleAndTranslateOpTest, NonAntialiasedUsampleFromASinglePixelTest) {
370 CreateOp("lanczos3", false);
371 constexpr int64 kBatchSize = 2;
372 constexpr int64 kNumRowSquares = 1;
373 constexpr int64 kNumColSquares = 1;
374 constexpr int64 kSquareSize = 1;
375 constexpr int64 kNumChannels = 3;
376 SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
377 kSquareSize, kNumChannels);
378 constexpr int kOutputImageHeight = 10;
379 constexpr int kOutputImageWidth = 17;
380 const Vector2f kScale(17.0f, 10.0f);
381 const Vector2f kTranslate(0.0f, 0.0f);
382 // Anti-aliasing shouldn't have any effect here, verify by comparing with the
383 // ground truth with anti-aliasing turned on.
384 antialias_ = true;
385 RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
386 }
387
TEST_F(ScaleAndTranslateOpTest,AntialiasedScaleAndTranslationTest)388 TEST_F(ScaleAndTranslateOpTest, AntialiasedScaleAndTranslationTest) {
389 CreateOp("lanczos3", true);
390 constexpr int64 kBatchSize = 2;
391 constexpr int64 kNumRowSquares = 11;
392 constexpr int64 kNumColSquares = 7;
393 constexpr int64 kSquareSize = 5;
394 constexpr int64 kNumChannels = 3;
395 SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
396 kSquareSize, kNumChannels);
397 constexpr int kOutputImageHeight = 49;
398 constexpr int kOutputImageWidth = 51;
399 const Vector2f kScale(1.25f, 0.6f);
400 const Vector2f kTranslate(4.1f, -3.1f);
401 RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
402 }
403
TEST_F(ScaleAndTranslateOpTest,NonAntialiasedScaleAndTranslationTest)404 TEST_F(ScaleAndTranslateOpTest, NonAntialiasedScaleAndTranslationTest) {
405 CreateOp("lanczos3", false);
406 constexpr int64 kBatchSize = 2;
407 constexpr int64 kNumRowSquares = 11;
408 constexpr int64 kNumColSquares = 7;
409 constexpr int64 kSquareSize = 5;
410 constexpr int64 kNumChannels = 3;
411 SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
412 kSquareSize, kNumChannels);
413 constexpr int kOutputImageHeight = 49;
414 constexpr int kOutputImageWidth = 51;
415 const Vector2f kScale(1.25f, 0.6f);
416 const Vector2f kTranslate(4.1f, -3.1f);
417 RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
418 }
419
TEST_F(ScaleAndTranslateOpTest,TestKernelTypes)420 TEST_F(ScaleAndTranslateOpTest, TestKernelTypes) {
421 const std::vector<string> kKernelTypes = {
422 "lanczos1", "lanczos3", "lanczos5", "box",
423 "triangle", "keyscubic", "mitchellcubic"};
424 for (const string& kernel_type : kKernelTypes) {
425 CreateOp(kernel_type, true);
426 constexpr int64 kBatchSize = 2;
427 constexpr int64 kNumRowSquares = 10;
428 constexpr int64 kNumColSquares = 11;
429 constexpr int64 kSquareSize = 1;
430 constexpr int64 kNumChannels = 3;
431 SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
432 kSquareSize, kNumChannels);
433 constexpr int kOutputImageHeight = 9;
434 constexpr int kOutputImageWidth = 11;
435 const Vector2f kScale(1.9f, 1.9f);
436 const Vector2f kTranslate(0.3f, 2.1f);
437 RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
438 }
439 }
440
441 } // namespace tensorflow
442