• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <gtest/gtest.h>
9 
10 #include <algorithm>
11 #include <cassert>
12 #include <cmath>
13 #include <cstddef>
14 #include <cstdint>
15 #include <functional>
16 #include <random>
17 #include <vector>
18 
19 #include <xnnpack.h>
20 #include <xnnpack/AlignedAllocator.h>
21 #include <xnnpack/math.h>
22 #include <xnnpack/params.h>
23 
24 
25 class IBilinearMicrokernelTester {
26  public:
pixels(uint32_t pixels)27   inline IBilinearMicrokernelTester& pixels(uint32_t pixels) {
28     assert(pixels >= 1);
29     this->pixels_ = pixels;
30     return *this;
31   }
32 
pixels()33   inline uint32_t pixels() const {
34     return this->pixels_;
35   }
36 
channels(uint32_t channels)37   inline IBilinearMicrokernelTester& channels(uint32_t channels) {
38     assert(channels >= 1);
39     this->channels_ = channels;
40     return *this;
41   }
42 
channels()43   inline uint32_t channels() const {
44     return this->channels_;
45   }
46 
input_offset(uint32_t input_offset)47   inline IBilinearMicrokernelTester& input_offset(uint32_t input_offset) {
48     this->input_offset_ = input_offset;
49     return *this;
50   }
51 
input_offset()52   inline uint32_t input_offset() const {
53     return this->input_offset_;
54   }
55 
output_stride(uint32_t output_stride)56   inline IBilinearMicrokernelTester& output_stride(uint32_t output_stride) {
57     assert(output_stride != 0);
58     this->output_stride_ = output_stride;
59     return *this;
60   }
61 
output_stride()62   inline uint32_t output_stride() const {
63     if (this->output_stride_ == 0) {
64       return channels();
65     } else {
66       assert(this->output_stride_ >= channels());
67       return this->output_stride_;
68     }
69   }
70 
iterations(size_t iterations)71   inline IBilinearMicrokernelTester& iterations(size_t iterations) {
72     this->iterations_ = iterations;
73     return *this;
74   }
75 
iterations()76   inline size_t iterations() const {
77     return this->iterations_;
78   }
79 
input_stride(uint32_t input_stride)80   inline IBilinearMicrokernelTester& input_stride(uint32_t input_stride) {
81     assert(input_stride != 0);
82     this->input_stride_ = input_stride;
83     return *this;
84   }
85 
input_stride()86   inline uint32_t input_stride() const {
87     if (this->input_stride_ == 0) {
88       return 4 * pixels();
89     } else {
90       assert(this->input_stride_ >= 4 * pixels());
91       return this->input_stride_;
92     }
93   }
94 
Test(xnn_f32_ibilinear_ukernel_function ibilinear)95   void Test(xnn_f32_ibilinear_ukernel_function ibilinear) const {
96     std::random_device random_device;
97     auto rng = std::mt19937(random_device());
98     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
99 
100     std::vector<const float*> indirection(pixels() * 4);
101     std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + indirection.size() * channels());
102     std::vector<float, AlignedAllocator<float, 64>> packed_weights(pixels() * 2);
103     std::vector<float> output((pixels() - 1) * output_stride() + channels());
104     std::vector<float> output_ref(pixels() * channels());
105 
106     for (size_t iteration = 0; iteration < iterations(); iteration++) {
107       std::generate(input.begin(), input.end(), std::ref(f32rng));
108       std::generate(packed_weights.begin(), packed_weights.end(), std::ref(f32rng));
109       std::fill(output.begin(), output.end(), nanf(""));
110 
111       for (size_t i = 0; i < indirection.size(); i++) {
112         indirection[i] = input.data() + i * channels() - input_offset();
113       }
114       std::shuffle(indirection.begin(), indirection.end(), rng);
115 
116       // Compute reference results.
117       for (size_t i = 0; i < pixels(); i++) {
118         for (size_t c = 0; c < channels(); c++) {
119           const float alpha_h = packed_weights[i * 2 + 0];
120           const float alpha_v = packed_weights[i * 2 + 1];
121           output_ref[i * channels() + c] =
122             indirection[i * 4 + 0][c + input_offset()] * (1.0f - alpha_h) * (1.0f - alpha_v) +
123             indirection[i * 4 + 1][c + input_offset()] * alpha_h * (1.0f - alpha_v) +
124             indirection[i * 4 + 2][c + input_offset()] * (1.0f - alpha_h) * alpha_v +
125             indirection[i * 4 + 3][c + input_offset()] * alpha_h * alpha_v;
126         }
127       }
128 
129       // Call optimized micro-kernel.
130       ibilinear(
131         pixels(), channels() * sizeof(float),
132         indirection.data(), input_offset() * sizeof(float),
133         packed_weights.data(), output.data(),
134         (output_stride() - channels()) * sizeof(float));
135 
136       // Verify results.
137       for (size_t i = 0; i < pixels(); i++) {
138         for (size_t c = 0; c < channels(); c++) {
139           ASSERT_NEAR(
140               output_ref[i * channels() + c],
141               output[i * output_stride() + c],
142               std::abs(output_ref[i * channels() + c]) * 1.0e-4)
143             << "pixel " << i << " / " << pixels() << ", channel " << c << " / " << channels();
144         }
145       }
146     }
147   }
148 
Test(xnn_s8_ibilinear_ukernel_function ibilinear)149   void Test(xnn_s8_ibilinear_ukernel_function ibilinear) const {
150     std::random_device random_device;
151     auto rng = std::mt19937(random_device());
152     auto i8rng = std::bind(
153       std::uniform_int_distribution<int16_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
154       std::ref(rng));
155     auto w11rng = std::bind(std::uniform_int_distribution<int16_t>(0, 2047), std::ref(rng));
156 
157     std::vector<const int8_t*> indirection(pixels() * 4);
158     std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + indirection.size() * channels());
159     std::vector<int16_t, AlignedAllocator<int16_t, 64>> packed_weights(pixels() * 2);
160     std::vector<int8_t> output((pixels() - 1) * output_stride() + channels());
161     std::vector<int8_t> output_ref(pixels() * channels());
162 
163     for (size_t iteration = 0; iteration < iterations(); iteration++) {
164       std::generate(input.begin(), input.end(), std::ref(i8rng));
165       std::generate(packed_weights.begin(), packed_weights.end(), std::ref(w11rng));
166       std::fill(output.begin(), output.end(), INT8_C(0xFA));
167 
168       for (size_t i = 0; i < indirection.size(); i++) {
169         indirection[i] = input.data() + i * channels() - input_offset();
170       }
171       std::shuffle(indirection.begin(), indirection.end(), rng);
172 
173       // Compute reference results.
174       for (size_t i = 0; i < pixels(); i++) {
175         for (size_t c = 0; c < channels(); c++) {
176           const int32_t alpha_h = packed_weights[i * 2 + 0];
177           const int32_t alpha_v = packed_weights[i * 2 + 1];
178           const int32_t acc = asr_s32(
179             int32_t(indirection[i * 4 + 0][c + input_offset()]) * (2048 - alpha_h) * (2048 - alpha_v) +
180             int32_t(indirection[i * 4 + 1][c + input_offset()]) * alpha_h * (2048 - alpha_v) +
181             int32_t(indirection[i * 4 + 2][c + input_offset()]) * (2048 - alpha_h) * alpha_v +
182             int32_t(indirection[i * 4 + 3][c + input_offset()]) * alpha_h * alpha_v +
183             2097152, 22);
184           ASSERT_GE(acc, std::numeric_limits<int8_t>::min());
185           ASSERT_LE(acc, std::numeric_limits<int8_t>::max());
186           output_ref[i * channels() + c] = (int8_t) acc;
187         }
188       }
189 
190       // Call optimized micro-kernel.
191       ibilinear(
192         pixels(), channels() * sizeof(int8_t),
193         indirection.data(), input_offset() * sizeof(int8_t),
194         packed_weights.data(), output.data(),
195         (output_stride() - channels()) * sizeof(int8_t));
196 
197       // Verify results.
198       for (size_t i = 0; i < pixels(); i++) {
199         for (size_t c = 0; c < channels(); c++) {
200           ASSERT_EQ(int32_t(output_ref[i * channels() + c]), int32_t(output[i * output_stride() + c]))
201             << "pixel " << i << " / " << pixels() << ", channel " << c << " / " << channels();
202         }
203       }
204     }
205   }
206 
Test(xnn_u8_ibilinear_ukernel_function ibilinear)207   void Test(xnn_u8_ibilinear_ukernel_function ibilinear) const {
208     std::random_device random_device;
209     auto rng = std::mt19937(random_device());
210     auto u8rng = std::bind(
211       std::uniform_int_distribution<uint16_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
212     auto w11rng = std::bind(std::uniform_int_distribution<uint16_t>(0, 2047), std::ref(rng));
213 
214     std::vector<const uint8_t*> indirection(pixels() * 4);
215     std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + indirection.size() * channels());
216     std::vector<int16_t, AlignedAllocator<int16_t, 64>> packed_weights(pixels() * 2);
217     std::vector<uint8_t> output((pixels() - 1) * output_stride() + channels());
218     std::vector<uint8_t> output_ref(pixels() * channels());
219 
220     for (size_t iteration = 0; iteration < iterations(); iteration++) {
221       std::generate(input.begin(), input.end(), std::ref(u8rng));
222       std::generate(packed_weights.begin(), packed_weights.end(), std::ref(w11rng));
223       std::fill(output.begin(), output.end(), UINT8_C(0xFA));
224 
225       for (size_t i = 0; i < indirection.size(); i++) {
226         indirection[i] = input.data() + i * channels() - input_offset();
227       }
228       std::shuffle(indirection.begin(), indirection.end(), rng);
229 
230       // Compute reference results.
231       for (size_t i = 0; i < pixels(); i++) {
232         for (size_t c = 0; c < channels(); c++) {
233           const uint32_t alpha_h = uint32_t(int32_t(packed_weights[i * 2 + 0]));
234           const uint32_t alpha_v = uint32_t(int32_t(packed_weights[i * 2 + 1]));
235           const uint32_t acc = (2097152 +
236             int32_t(indirection[i * 4 + 0][c + input_offset()]) * (2048 - alpha_h) * (2048 - alpha_v) +
237             int32_t(indirection[i * 4 + 1][c + input_offset()]) * alpha_h * (2048 - alpha_v) +
238             int32_t(indirection[i * 4 + 2][c + input_offset()]) * (2048 - alpha_h) * alpha_v +
239             int32_t(indirection[i * 4 + 3][c + input_offset()]) * alpha_h * alpha_v) >> 22;
240           ASSERT_LE(acc, std::numeric_limits<uint8_t>::max());
241           output_ref[i * channels() + c] = (uint8_t) acc;
242         }
243       }
244 
245       // Call optimized micro-kernel.
246       ibilinear(
247         pixels(), channels() * sizeof(uint8_t),
248         indirection.data(), input_offset() * sizeof(uint8_t),
249         packed_weights.data(), output.data(),
250         (output_stride() - channels()) * sizeof(uint8_t));
251 
252       // Verify results.
253       for (size_t i = 0; i < pixels(); i++) {
254         for (size_t c = 0; c < channels(); c++) {
255           ASSERT_EQ(uint32_t(output_ref[i * channels() + c]), uint32_t(output[i * output_stride() + c]))
256             << "pixel " << i << " / " << pixels() << ", channel " << c << " / " << channels();
257         }
258       }
259     }
260   }
261 
TestCHW(xnn_f32_ibilinear_chw_ukernel_function ibilinear)262   void TestCHW(xnn_f32_ibilinear_chw_ukernel_function ibilinear) const {
263     std::random_device random_device;
264     auto rng = std::mt19937(random_device());
265     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
266 
267     std::vector<const float*> indirection(pixels() * 2);
268     std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + (channels() - 1) * input_stride() + 4 * pixels());
269     std::vector<float, AlignedAllocator<float, 64>> packed_weights(pixels() * 2);
270     std::vector<float> output(pixels() * channels());
271     std::vector<float> output_ref(pixels() * channels());
272 
273     for (size_t iteration = 0; iteration < iterations(); iteration++) {
274       std::generate(input.begin(), input.end(), std::ref(f32rng));
275       std::generate(packed_weights.begin(), packed_weights.end(), std::ref(f32rng));
276       std::fill(output.begin(), output.end(), nanf(""));
277 
278       // Indirection will point to the even ("left") pixels of the input.
279       // The kernels will expect "right" pixels to be placed right next to them.
280       for (size_t i = 0; i < indirection.size(); i++) {
281         const float* left_corner = input.data() + 2 * i - input_offset();
282         indirection[i] = left_corner;
283       }
284       std::shuffle(indirection.begin(), indirection.end(), rng);
285 
286       // Compute reference results.
287       for (size_t i = 0; i < pixels(); i++) {
288         for (size_t c = 0; c < channels(); c++) {
289           const float alpha_h = packed_weights[i * 2 + 0];
290           const float alpha_v = packed_weights[i * 2 + 1];
291           // `c * pixels() + i` because the output is NCHW.
292           output_ref[c * pixels() + i] =
293             // `c * indirection.size()` because the input is NCHW.
294             (indirection[i * 2 + 0] + 0)[c * input_stride() + input_offset()] * (1.0f - alpha_h) * (1.0f - alpha_v) +
295             (indirection[i * 2 + 0] + 1)[c * input_stride() + input_offset()] * alpha_h * (1.0f - alpha_v) +
296             (indirection[i * 2 + 1] + 0)[c * input_stride() + input_offset()] * (1.0f - alpha_h) * alpha_v +
297             (indirection[i * 2 + 1] + 1)[c * input_stride() + input_offset()] * alpha_h * alpha_v;
298         }
299       }
300 
301       // Call optimized micro-kernel.
302       ibilinear(
303         pixels(), channels(),
304         indirection.data(), input_offset() * sizeof(float),
305         packed_weights.data(), output.data(), input_stride() * sizeof(float));
306 
307       // Verify results.
308       for (size_t c = 0; c < channels(); c++) {
309         for (size_t i = 0; i < pixels(); i++) {
310           ASSERT_NEAR(
311               output_ref[c * pixels() + i],
312               output[c * pixels() + i],
313               std::abs(output_ref[c * pixels() + i]) * 1.0e-4)
314             << "i = " << i << ", channel = " << c;
315         }
316       }
317     }
318   }
319 
320  private:
321   uint32_t channels_{1};
322   uint32_t pixels_{1};
323   uint32_t output_stride_{0};
324   uint32_t input_stride_{0};
325   uint32_t input_offset_{0};
326   size_t iterations_{3};
327 };
328