• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/framework/allocator.h"
17 #include "tensorflow/core/framework/fake_input.h"
18 #include "tensorflow/core/framework/node_def_builder.h"
19 #include "tensorflow/core/framework/op_kernel.h"
20 #include "tensorflow/core/framework/tensor.h"
21 #include "tensorflow/core/framework/tensor_testutil.h"
22 #include "tensorflow/core/framework/types.h"
23 #include "tensorflow/core/framework/types.pb.h"
24 #include "tensorflow/core/kernels/image/sampling_kernels.h"
25 #include "tensorflow/core/kernels/ops_testutil.h"
26 #include "tensorflow/core/kernels/ops_util.h"
27 #include "tensorflow/core/lib/core/status_test_util.h"
28 #include "tensorflow/core/lib/random/random.h"
29 #include "tensorflow/core/lib/random/simple_philox.h"
30 #include "tensorflow/core/lib/strings/str_util.h"
31 #include "tensorflow/core/platform/test.h"
32 #include "tensorflow/core/public/session.h"
33 #include "tensorflow/core/util/ptr_util.h"
34 
35 namespace tensorflow {
36 using Eigen::Vector2f;
37 
38 class DynamicKernel {
39  public:
~DynamicKernel()40   virtual ~DynamicKernel() {}
41   virtual float Value(const float x) const = 0;
42   virtual float Radius() const = 0;
43 };
44 
45 // Wraps a sampling kernel in a common interface.
46 template <typename KernelType>
47 class TypedDynamicKernel : public DynamicKernel {
48  public:
TypedDynamicKernel(const KernelType & kernel)49   explicit TypedDynamicKernel(const KernelType& kernel) : kernel_(kernel) {}
Value(const float x) const50   float Value(const float x) const override { return kernel_(x); }
Radius() const51   float Radius() const override { return kernel_.Radius(); }
52   const KernelType kernel_;
53 };
54 
55 template <typename KernelType>
CreateKernel(const KernelType & kernel)56 std::unique_ptr<const DynamicKernel> CreateKernel(const KernelType& kernel) {
57   return MakeUnique<TypedDynamicKernel<KernelType>>(kernel);
58 }
59 
Create(functor::SamplingKernelType kernel_type)60 std::unique_ptr<const DynamicKernel> Create(
61     functor::SamplingKernelType kernel_type) {
62   switch (kernel_type) {
63     case functor::Lanczos1Kernel:
64       return CreateKernel(functor::CreateLanczos1Kernel());
65     case functor::Lanczos3Kernel:
66       return CreateKernel(functor::CreateLanczos3Kernel());
67     case functor::Lanczos5Kernel:
68       return CreateKernel(functor::CreateLanczos5Kernel());
69     case functor::GaussianKernel:
70       return CreateKernel(functor::CreateGaussianKernel());
71     case functor::BoxKernel:
72       return CreateKernel(functor::CreateBoxKernel());
73     case functor::TriangleKernel:
74       return CreateKernel(functor::CreateTriangleKernel());
75     case functor::KeysCubicKernel:
76       return CreateKernel(functor::CreateKeysCubicKernel());
77     case functor::MitchellCubicKernel:
78       return CreateKernel(functor::CreateMitchellCubicKernel());
79     default:
80       LOG(FATAL) << "Unknown kernel type.";
81       return nullptr;
82   }
83 }
84 
85 template <typename T>
Clamp(const T & low,const T & high,const T & value)86 inline const T& Clamp(const T& low, const T& high, const T& value) {
87   return std::min(high, std::max(low, value));
88 }
89 
90 // Samples from the image at the passed batch at pixel location sample_f with a
91 // kernel scaled by scale.
Sample(const DynamicKernel & kernel,const bool antialias,TTypes<float,4>::Tensor images,const int batch,const Vector2f & scale,const Vector2f & sample_f,float * dest)92 void Sample(const DynamicKernel& kernel, const bool antialias,
93             TTypes<float, 4>::Tensor images, const int batch,
94             const Vector2f& scale, const Vector2f& sample_f, float* dest) {
95   const Vector2f kernel_scale(antialias ? std::max(scale.x(), 1.0f) : 1.0,
96                               antialias ? std::max(scale.y(), 1.0f) : 1.0);
97 
98   const int64 in_height = images.dimension(1);
99   const int64 in_width = images.dimension(2);
100   const int channels = images.dimension(3);
101   const int64 y_span_start = Clamp(
102       static_cast<int64>(0), in_height - 1,
103       static_cast<int64>(
104           std::ceil(sample_f.y() - kernel.Radius() * kernel_scale.y() - 0.5f)));
105   const int64 y_span_end =
106       Clamp(static_cast<int64>(0), in_height - 1,
107             static_cast<int64>(std::floor(
108                 sample_f.y() + kernel.Radius() * kernel_scale.y() - 0.5f))) +
109       1;
110   const int64 x_span_start = Clamp(
111       static_cast<int64>(0), in_width - 1,
112       static_cast<int64>(
113           std::ceil(sample_f.x() - kernel.Radius() * kernel_scale.x() - 0.5f)));
114 
115   const int64 x_span_end =
116       Clamp(static_cast<int64>(0), in_width - 1,
117             static_cast<int64>(std::floor(
118                 sample_f.x() + kernel.Radius() * kernel_scale.x() - 0.5f))) +
119       1;
120 
121   std::fill(dest, dest + channels, 0.0f);
122   if (sample_f.x() < 0.0f || sample_f.y() < 0.0f || sample_f.x() > in_width ||
123       sample_f.y() > in_height) {
124     return;
125   }
126   const Vector2f one_over_kernel_scale(1.0f / kernel_scale.x(),
127                                        1.0f / kernel_scale.y());
128   float total_weight = 0.0f;
129   for (int64 y = y_span_start; y < y_span_end; ++y) {
130     float y_kernel_pos = static_cast<float>(y) + 0.5f - sample_f.y();
131     float y_weight = kernel.Value(y_kernel_pos * one_over_kernel_scale.y());
132     for (int64 x = x_span_start; x < x_span_end; ++x) {
133       float x_kernel_pos = static_cast<float>(x) + 0.5f - sample_f.x();
134       float x_weight = kernel.Value(x_kernel_pos * one_over_kernel_scale.x());
135       float kernel_weight = y_weight * x_weight;
136       total_weight += kernel_weight;
137       for (int c = 0; c < channels; ++c) {
138         dest[c] += static_cast<float>(images(batch, y, x, c)) * kernel_weight;
139       }
140     }
141   }
142   if (std::abs(total_weight) >= 1000.0f * std::numeric_limits<float>::min()) {
143     CHECK_NE(total_weight, 0.0f) << y_span_start << "," << y_span_end << " "
144                                  << x_span_start << "," << x_span_end;
145     for (int c = 0; c < channels; ++c) {
146       dest[c] /= total_weight;
147     }
148   }
149 }
150 
151 // This is the straight forward unoptimized implementation of ScaleAndTranslate
152 // We use this to confirm that the optimized version is almost identical. The
153 // only difference will be small floating point differences, since this version
154 // does not to separable passes in x and y dimensions.
ScaleAndTranslateBaseline(const DynamicKernel & kernel,const bool antialias,TTypes<float,4>::Tensor images,const Vector2f & orig_scale,const Vector2f & orig_translate,TTypes<float,4>::Tensor output)155 void ScaleAndTranslateBaseline(const DynamicKernel& kernel,
156                                const bool antialias,
157                                TTypes<float, 4>::Tensor images,
158                                const Vector2f& orig_scale,
159                                const Vector2f& orig_translate,
160                                TTypes<float, 4>::Tensor output) {
161   const Vector2f scale(1.0f / orig_scale[0], 1.0f / orig_scale[1]);
162   const Vector2f translate(-orig_translate[0] / orig_scale[0],
163                            -orig_translate[1] / orig_scale[1]);
164 
165   const int batch = images.dimension(0);
166   const int channels = images.dimension(3);
167 
168   ASSERT_EQ(batch, output.dimension(0));
169   ASSERT_EQ(channels, output.dimension(3));
170 
171   const int64 out_height = output.dimension(1);
172   const int64 out_width = output.dimension(2);
173   const int64 in_height = images.dimension(1);
174   const int64 in_width = images.dimension(2);
175 
176   for (int b = 0; b < batch; ++b) {
177     for (int64 y = 0; y < out_height; ++y) {
178       const float out_y_f = static_cast<float>(y) + 0.5;
179       const float in_y_f = out_y_f * scale.y() + translate.y();
180       for (int64 x = 0; x < out_width; ++x) {
181         const float out_x_f = static_cast<float>(x) + 0.5;
182         const float in_x_f = out_x_f * scale.x() + translate.x();
183         if (in_x_f < 0.0f || in_y_f < 0.0f || in_x_f > in_width ||
184             in_y_f > in_height) {
185           std::fill(&output(b, y, x, 0), &output(b, y, x + 1, 0), 0.0f);
186         } else {
187           Sample(kernel, antialias, images, b, scale, Vector2f(in_x_f, in_y_f),
188                  &output(b, y, x, 0));
189         }
190       }
191     }
192   }
193 }
194 
195 class ScaleAndTranslateOpTest : public OpsTestBase {
196  protected:
CreateOp(const string & kernel_type_str,const bool antialias)197   void CreateOp(const string& kernel_type_str, const bool antialias) {
198     TF_EXPECT_OK(NodeDefBuilder("scale_and_translate_op", "ScaleAndTranslate")
199                      .Input(FakeInput(DT_FLOAT))
200                      .Input(FakeInput(DT_INT32))
201                      .Input(FakeInput(DT_FLOAT))
202                      .Input(FakeInput(DT_FLOAT))
203                      .Attr("kernel_type", kernel_type_str)
204                      .Attr("antialias", antialias)
205                      .Finalize(node_def()));
206     TF_EXPECT_OK(InitOp());
207     kernel_type_ = functor::SamplingKernelTypeFromString(kernel_type_str);
208     antialias_ = antialias;
209   }
210 
SetCheckerboardImageInput(int batch_size,int num_row_squares,int num_col_squares,int square_size,int num_channels)211   void SetCheckerboardImageInput(int batch_size, int num_row_squares,
212                                  int num_col_squares, int square_size,
213                                  int num_channels) {
214     inputs_.clear();
215     std::vector<float> data;
216     const int64 row_size = num_col_squares * square_size * num_channels;
217     const int64 image_size = num_row_squares * square_size * row_size;
218     data.resize(batch_size * image_size);
219     random::PhiloxRandom philox(42);
220     random::SimplePhilox rnd(&philox);
221     std::vector<float> col(num_channels);
222     for (int b = 0; b < batch_size; ++b) {
223       for (int y = 0; y < num_row_squares; ++y) {
224         for (int x = 0; x < num_col_squares; ++x) {
225           for (int n = 0; n < num_channels; ++n) {
226             col[n] = rnd.RandFloat();
227           }
228           for (int r = y * square_size; r < (y + 1) * square_size; ++r) {
229             auto it = data.begin() + b * image_size + r * row_size +
230                       x * square_size * num_channels;
231             for (int n = 0; n < square_size; ++n) {
232               for (int chan = 0; chan < num_channels; ++chan, ++it) {
233                 *it = col[chan] * 255.0;
234               }
235             }
236           }
237         }
238       }
239     }
240     AddInputFromArray<float>(
241         TensorShape({batch_size, num_row_squares * square_size,
242                      num_col_squares * square_size, num_channels}),
243         data);
244   }
245 
RunTest(int output_image_height,int output_image_width,const Vector2f & scale,const Vector2f & translate)246   void RunTest(int output_image_height, int output_image_width,
247                const Vector2f& scale, const Vector2f& translate) {
248     AddInputFromArray<int32>(TensorShape({2}),
249                              {output_image_height, output_image_width});
250     AddInputFromArray<float>(TensorShape({2}), {scale[1], scale[0]});
251     AddInputFromArray<float>(TensorShape({2}), {translate[1], translate[0]});
252     Status s = RunOpKernel();
253     const int batch_size = GetOutput(0)->dim_size(0);
254     const int channels = GetOutput(0)->dim_size(3);
255     Tensor expected(allocator(), DT_FLOAT,
256                     TensorShape({batch_size, output_image_height,
257                                  output_image_width, channels}));
258 
259     std::unique_ptr<const DynamicKernel> kernel = Create(kernel_type_);
260     ScaleAndTranslateBaseline(*kernel, antialias_,
261                               mutable_input(0)->tensor<float, 4>(), scale,
262                               translate, expected.tensor<float, 4>());
263     constexpr double kAbs = 1e-2f;
264     test::ExpectTensorNear<float>(expected, *GetOutput(0), kAbs);
265   }
266 
267   functor::SamplingKernelType kernel_type_;
268   bool antialias_;
269 };
270 
TEST_F(ScaleAndTranslateOpTest,IdentityTest)271 TEST_F(ScaleAndTranslateOpTest, IdentityTest) {
272   CreateOp("lanczos3", true);
273   constexpr int64 kBatchSize = 2;
274   constexpr int64 kNumRowSquares = 16;
275   constexpr int64 kNumColSquares = 13;
276   constexpr int64 kSquareSize = 12;
277   constexpr int64 kNumChannels = 3;
278   SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
279                             kSquareSize, kNumChannels);
280   constexpr int kOutputImageHeight = kNumRowSquares * kSquareSize;
281   constexpr int kOutputImageWidth = kNumColSquares * kSquareSize;
282   const Vector2f kScale(1.0f, 1.0f);
283   const Vector2f kTranslate(0.0f, 0.0f);
284   RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
285 }
286 
TEST_F(ScaleAndTranslateOpTest,UpsampleTest)287 TEST_F(ScaleAndTranslateOpTest, UpsampleTest) {
288   CreateOp("lanczos3", true);
289   constexpr int64 kBatchSize = 2;
290   constexpr int64 kNumRowSquares = 16;
291   constexpr int64 kNumColSquares = 13;
292   constexpr int64 kSquareSize = 12;
293   constexpr int64 kNumChannels = 3;
294   SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
295                             kSquareSize, kNumChannels);
296   constexpr int kOutputImageHeight = kNumRowSquares * kSquareSize * 2;
297   constexpr int kOutputImageWidth = kNumColSquares * kSquareSize * 2;
298   const Vector2f kScale(2.0f, 2.0f);
299   const Vector2f kTranslate(0.0f, 0.0f);
300   RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
301 }
302 
TEST_F(ScaleAndTranslateOpTest,DownsampleTest)303 TEST_F(ScaleAndTranslateOpTest, DownsampleTest) {
304   CreateOp("lanczos3", true);
305   constexpr int64 kBatchSize = 2;
306   constexpr int64 kNumRowSquares = 16;
307   constexpr int64 kNumColSquares = 13;
308   constexpr int64 kSquareSize = 12;
309   constexpr int64 kNumChannels = 3;
310   SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
311                             kSquareSize, kNumChannels);
312   constexpr int kOutputImageHeight = kNumRowSquares * kSquareSize / 2;
313   constexpr int kOutputImageWidth = kNumColSquares * kSquareSize / 2;
314   const Vector2f kScale(0.5f, 0.5f);
315   const Vector2f kTranslate(0.0f, 0.0f);
316   RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
317 }
318 
TEST_F(ScaleAndTranslateOpTest,AntiAliasedDownsampleToASinglePixelTest)319 TEST_F(ScaleAndTranslateOpTest, AntiAliasedDownsampleToASinglePixelTest) {
320   CreateOp("lanczos3", true);
321   constexpr int64 kBatchSize = 2;
322   constexpr int64 kNumRowSquares = 16;
323   constexpr int64 kNumColSquares = 13;
324   constexpr int64 kSquareSize = 12;
325   constexpr int64 kNumChannels = 3;
326   SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
327                             kSquareSize, kNumChannels);
328   constexpr int kOutputImageHeight = 1;
329   constexpr int kOutputImageWidth = 1;
330   const Vector2f kScale(1.0f / (kNumRowSquares * kSquareSize),
331                         1.0f / (kNumColSquares * kSquareSize));
332   const Vector2f kTranslate(0.0f, 0.0f);
333   RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
334 }
335 
TEST_F(ScaleAndTranslateOpTest,NonAntiAliasedDownsampleToASinglePixelTest)336 TEST_F(ScaleAndTranslateOpTest, NonAntiAliasedDownsampleToASinglePixelTest) {
337   CreateOp("lanczos3", false);
338   constexpr int64 kBatchSize = 2;
339   constexpr int64 kNumRowSquares = 16;
340   constexpr int64 kNumColSquares = 13;
341   constexpr int64 kSquareSize = 12;
342   constexpr int64 kNumChannels = 3;
343   SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
344                             kSquareSize, kNumChannels);
345   constexpr int kOutputImageHeight = 1;
346   constexpr int kOutputImageWidth = 1;
347   const Vector2f kScale(1.0f / (kNumRowSquares * kSquareSize),
348                         1.0f / (kNumColSquares * kSquareSize));
349   const Vector2f kTranslate(0.0f, 0.0f);
350   RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
351 }
352 
TEST_F(ScaleAndTranslateOpTest,UsampleFromASinglePixelTest)353 TEST_F(ScaleAndTranslateOpTest, UsampleFromASinglePixelTest) {
354   CreateOp("lanczos3", true);
355   constexpr int64 kBatchSize = 2;
356   constexpr int64 kNumRowSquares = 1;
357   constexpr int64 kNumColSquares = 1;
358   constexpr int64 kSquareSize = 1;
359   constexpr int64 kNumChannels = 3;
360   SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
361                             kSquareSize, kNumChannels);
362   constexpr int kOutputImageHeight = 10;
363   constexpr int kOutputImageWidth = 17;
364   const Vector2f kScale(17.0f, 10.0f);
365   const Vector2f kTranslate(0.0f, 0.0f);
366   RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
367 }
368 
TEST_F(ScaleAndTranslateOpTest,NonAntialiasedUsampleFromASinglePixelTest)369 TEST_F(ScaleAndTranslateOpTest, NonAntialiasedUsampleFromASinglePixelTest) {
370   CreateOp("lanczos3", false);
371   constexpr int64 kBatchSize = 2;
372   constexpr int64 kNumRowSquares = 1;
373   constexpr int64 kNumColSquares = 1;
374   constexpr int64 kSquareSize = 1;
375   constexpr int64 kNumChannels = 3;
376   SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
377                             kSquareSize, kNumChannels);
378   constexpr int kOutputImageHeight = 10;
379   constexpr int kOutputImageWidth = 17;
380   const Vector2f kScale(17.0f, 10.0f);
381   const Vector2f kTranslate(0.0f, 0.0f);
382   // Anti-aliasing shouldn't have any effect here, verify by comparing with the
383   // ground truth with anti-aliasing turned on.
384   antialias_ = true;
385   RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
386 }
387 
TEST_F(ScaleAndTranslateOpTest,AntialiasedScaleAndTranslationTest)388 TEST_F(ScaleAndTranslateOpTest, AntialiasedScaleAndTranslationTest) {
389   CreateOp("lanczos3", true);
390   constexpr int64 kBatchSize = 2;
391   constexpr int64 kNumRowSquares = 11;
392   constexpr int64 kNumColSquares = 7;
393   constexpr int64 kSquareSize = 5;
394   constexpr int64 kNumChannels = 3;
395   SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
396                             kSquareSize, kNumChannels);
397   constexpr int kOutputImageHeight = 49;
398   constexpr int kOutputImageWidth = 51;
399   const Vector2f kScale(1.25f, 0.6f);
400   const Vector2f kTranslate(4.1f, -3.1f);
401   RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
402 }
403 
TEST_F(ScaleAndTranslateOpTest,NonAntialiasedScaleAndTranslationTest)404 TEST_F(ScaleAndTranslateOpTest, NonAntialiasedScaleAndTranslationTest) {
405   CreateOp("lanczos3", false);
406   constexpr int64 kBatchSize = 2;
407   constexpr int64 kNumRowSquares = 11;
408   constexpr int64 kNumColSquares = 7;
409   constexpr int64 kSquareSize = 5;
410   constexpr int64 kNumChannels = 3;
411   SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
412                             kSquareSize, kNumChannels);
413   constexpr int kOutputImageHeight = 49;
414   constexpr int kOutputImageWidth = 51;
415   const Vector2f kScale(1.25f, 0.6f);
416   const Vector2f kTranslate(4.1f, -3.1f);
417   RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
418 }
419 
TEST_F(ScaleAndTranslateOpTest,TestKernelTypes)420 TEST_F(ScaleAndTranslateOpTest, TestKernelTypes) {
421   const std::vector<string> kKernelTypes = {
422       "lanczos1", "lanczos3",  "lanczos5",     "box",
423       "triangle", "keyscubic", "mitchellcubic"};
424   for (const string& kernel_type : kKernelTypes) {
425     CreateOp(kernel_type, true);
426     constexpr int64 kBatchSize = 2;
427     constexpr int64 kNumRowSquares = 10;
428     constexpr int64 kNumColSquares = 11;
429     constexpr int64 kSquareSize = 1;
430     constexpr int64 kNumChannels = 3;
431     SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
432                               kSquareSize, kNumChannels);
433     constexpr int kOutputImageHeight = 9;
434     constexpr int kOutputImageWidth = 11;
435     const Vector2f kScale(1.9f, 1.9f);
436     const Vector2f kTranslate(0.3f, 2.1f);
437     RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
438   }
439 }
440 
441 }  // namespace tensorflow
442