• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <gtest/gtest.h>
9 
10 #include <algorithm>
11 #include <cassert>
12 #include <cstddef>
13 #include <cstdlib>
14 #include <functional>
15 #include <random>
16 #include <vector>
17 
18 #include <xnnpack.h>
19 #include <xnnpack/AlignedAllocator.h>
20 #include <xnnpack/params-init.h>
21 #include <xnnpack/params.h>
22 
23 
24 class ArgMaxPoolMicrokernelTester {
25  public:
26   enum class Variant {
27     Native,
28     Scalar,
29   };
30 
output_pixels(size_t output_pixels)31   inline ArgMaxPoolMicrokernelTester& output_pixels(size_t output_pixels) {
32     assert(output_pixels != 0);
33     this->output_pixels_ = output_pixels;
34     return *this;
35   }
36 
output_pixels()37   inline size_t output_pixels() const {
38     return this->output_pixels_;
39   }
40 
step(size_t step)41   inline ArgMaxPoolMicrokernelTester& step(size_t step) {
42     assert(step != 0);
43     this->step_ = step;
44     return *this;
45   }
46 
step()47   inline size_t step() const {
48     return this->step_;
49   }
50 
input_offset(size_t input_offset)51   inline ArgMaxPoolMicrokernelTester& input_offset(size_t input_offset) {
52     assert(input_offset != 0);
53     this->input_offset_ = input_offset;
54     return *this;
55   }
56 
input_offset()57   inline size_t input_offset() const {
58     return this->input_offset_;
59   }
60 
pooling_elements(size_t pooling_elements)61   inline ArgMaxPoolMicrokernelTester& pooling_elements(size_t pooling_elements) {
62     assert(pooling_elements != 0);
63     this->pooling_elements_ = pooling_elements;
64     return *this;
65   }
66 
pooling_elements()67   inline size_t pooling_elements() const {
68     return this->pooling_elements_;
69   }
70 
packed_pooling_elements()71   inline size_t packed_pooling_elements() const {
72     if (pooling_elements() <= primary_pooling_tile()) {
73       return primary_pooling_tile();
74     } else {
75       return (pooling_elements() - primary_pooling_tile()) % incremental_pooling_tile() == 0 ? pooling_elements() : ((pooling_elements() - primary_pooling_tile()) / incremental_pooling_tile() + 1) * incremental_pooling_tile() + primary_pooling_tile();
76     }
77   }
78 
pooling_tile(size_t primary_tile)79   inline ArgMaxPoolMicrokernelTester& pooling_tile(size_t primary_tile) {
80     assert(primary_tile != 0);
81     this->primary_pooling_tile_ = primary_tile;
82     this->incremental_pooling_tile_ = 0;
83     return *this;
84   }
85 
pooling_tile(size_t primary_tile,size_t incremental_tile)86   inline ArgMaxPoolMicrokernelTester& pooling_tile(size_t primary_tile, size_t incremental_tile) {
87     assert(primary_tile != 0);
88     this->primary_pooling_tile_ = primary_tile;
89     this->incremental_pooling_tile_ = incremental_tile;
90     return *this;
91   }
92 
primary_pooling_tile(size_t primary_pooling_tile)93   inline ArgMaxPoolMicrokernelTester& primary_pooling_tile(size_t primary_pooling_tile) {
94     assert(primary_pooling_tile != 0);
95     this->primary_pooling_tile_ = primary_pooling_tile;
96     return *this;
97   }
98 
primary_pooling_tile()99   inline size_t primary_pooling_tile() const {
100     return this->primary_pooling_tile_;
101   }
102 
incremental_pooling_tile(size_t incremental_pooling_tile)103   inline ArgMaxPoolMicrokernelTester& incremental_pooling_tile(size_t incremental_pooling_tile) {
104     assert(incremental_pooling_tile != 0);
105     this->incremental_pooling_tile_ = incremental_pooling_tile;
106     return *this;
107   }
108 
incremental_pooling_tile()109   inline size_t incremental_pooling_tile() const {
110     return this->incremental_pooling_tile_;
111   }
112 
channels(size_t channels)113   inline ArgMaxPoolMicrokernelTester& channels(size_t channels) {
114     assert(channels != 0);
115     this->channels_ = channels;
116     return *this;
117   }
118 
channels()119   inline size_t channels() const {
120     return this->channels_;
121   }
122 
output_stride(size_t output_stride)123   inline ArgMaxPoolMicrokernelTester& output_stride(size_t output_stride) {
124     assert(output_stride != 0);
125     this->output_stride_ = output_stride;
126     return *this;
127   }
128 
output_stride()129   inline size_t output_stride() const {
130     if (this->output_stride_ == 0) {
131       return channels();
132     } else {
133       assert(this->output_stride_ >= channels());
134       return this->output_stride_;
135     }
136   }
137 
iterations(size_t iterations)138   inline ArgMaxPoolMicrokernelTester& iterations(size_t iterations) {
139     this->iterations_ = iterations;
140     return *this;
141   }
142 
iterations()143   inline size_t iterations() const {
144     return this->iterations_;
145   }
146 
147   void Test(xnn_f32_argmaxpool_unipass_ukernel_function argmaxpool, Variant variant = Variant::Native) const {
148     std::random_device random_device;
149     auto rng = std::mt19937(random_device());
150     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
151 
152     std::vector<const float*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements());
153     std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
154       ((output_pixels() - 1) * step() + pooling_elements()) * channels());
155     std::vector<float> output((output_pixels() - 1) * output_stride() + channels());
156     std::vector<uint32_t> index(output_pixels() * channels());
157     std::vector<float> output_ref(output_pixels() * channels());
158     std::vector<uint32_t> index_ref(output_pixels() * channels());
159     for (size_t iteration = 0; iteration < iterations(); iteration++) {
160       std::generate(input.begin(), input.end(), std::ref(f32rng));
161       std::fill(output.begin(), output.end(), nanf(""));
162 
163       for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) {
164         indirect_input[i] = input.data() + i * channels() - input_offset();
165       }
166       std::shuffle(indirect_input.begin(),
167         indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng);
168 
169       // Compute reference results, without clamping.
170       for (size_t x = 0; x < output_pixels(); x++) {
171         for (size_t c = 0; c < channels(); c++) {
172           float max_value = indirect_input[x * step()][c + input_offset()];
173           uint32_t max_index = 0;
174           for (size_t p = 0; p < pooling_elements(); p++) {
175             const float value = indirect_input[x * step() + p][c + input_offset()];
176             if (value > max_value) {
177               max_value = value;
178               max_index = p;
179             }
180           }
181           output_ref[x * channels() + c] = max_value;
182           index_ref[x * channels() + c] = max_index;
183         }
184       }
185 
186       // Call optimized micro-kernel.
187       argmaxpool(output_pixels(), pooling_elements(), channels(),
188         indirect_input.data(), input_offset() * sizeof(float), output.data(), index.data(),
189         step() * sizeof(void*),
190         (output_stride() - channels()) * sizeof(float));
191 
192       // Verify results.
193       for (size_t x = 0; x < output_pixels(); x++) {
194         for (size_t c = 0; c < channels(); c++) {
195           ASSERT_EQ(output_ref[x * channels() + c], output[x * output_stride() + c])
196             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
197             << ", pooling elements = " << pooling_elements() << ", step = " << step()
198             << ", input offset = " << input_offset();
199           ASSERT_EQ(
200               indirect_input[x * step() + index_ref[x * channels() + c]][c + input_offset()],
201               indirect_input[x * step() + index[x * channels() + c]][c + input_offset()])
202             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
203             << ", pooling elements = " << pooling_elements() << ", step = " << step()
204             << ", input offset = " << input_offset();
205           ASSERT_EQ(index_ref[x * channels() + c], index[x * channels() + c])
206             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
207             << ", pooling elements = " << pooling_elements() << ", step = " << step()
208             << ", input offset = " << input_offset();
209         }
210       }
211     }
212   }
213 
214   void Test(xnn_f32_argmaxpool_multipass_ukernel_function argmaxpool, Variant variant = Variant::Native) const {
215     std::random_device random_device;
216     auto rng = std::mt19937(random_device());
217     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
218 
219     std::vector<const float*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements());
220     std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
221       ((output_pixels() - 1) * step() + pooling_elements()) * channels());
222     std::vector<float> output((output_pixels() - 1) * output_stride() + channels());
223     std::vector<uint32_t> index(output_pixels() * channels());
224     std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> index_buffer(
225       channels() + XNN_EXTRA_BYTES / sizeof(uint32_t));
226     std::vector<float, AlignedAllocator<float, 64>> output_buffer(
227       channels() + XNN_EXTRA_BYTES / sizeof(float));
228     std::vector<float> output_ref(output_pixels() * channels());
229     std::vector<uint32_t> index_ref(output_pixels() * channels());
230     for (size_t iteration = 0; iteration < iterations(); iteration++) {
231       std::generate(input.begin(), input.end(), std::ref(f32rng));
232       std::fill(output.begin(), output.end(), nanf(""));
233 
234       for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) {
235         indirect_input[i] = input.data() + i * channels() - input_offset();
236       }
237       std::shuffle(indirect_input.begin(),
238         indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng);
239 
240       // Compute reference results, without clamping.
241       for (size_t x = 0; x < output_pixels(); x++) {
242         for (size_t c = 0; c < channels(); c++) {
243           float max_value = indirect_input[x * step()][c + input_offset()];
244           uint32_t max_index = 0;
245           for (size_t p = 0; p < pooling_elements(); p++) {
246             const float value = indirect_input[x * step() + p][c + input_offset()];
247             if (value > max_value) {
248               max_value = value;
249               max_index = p;
250             }
251           }
252           output_ref[x * channels() + c] = max_value;
253           index_ref[x * channels() + c] = max_index;
254         }
255       }
256 
257       // Call optimized micro-kernel.
258       argmaxpool(output_pixels(), pooling_elements(), channels(),
259         indirect_input.data(), input_offset() * sizeof(float),
260         output_buffer.data(), index_buffer.data(),
261         output.data(), index.data(),
262         (step() - (packed_pooling_elements() - incremental_pooling_tile())) * sizeof(void*),
263         (output_stride() - channels()) * sizeof(float));
264 
265       // Verify results.
266       for (size_t x = 0; x < output_pixels(); x++) {
267         for (size_t c = 0; c < channels(); c++) {
268           ASSERT_EQ(output_ref[x * channels() + c], output[x * output_stride() + c])
269             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
270             << ", pooling elements = " << pooling_elements() << ", step = " << step()
271             << ", input offset = " << input_offset();
272           ASSERT_EQ(
273               indirect_input[x * step() + index_ref[x * channels() + c]][c + input_offset()],
274               indirect_input[x * step() + index[x * channels() + c]][c + input_offset()])
275             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
276             << ", pooling elements = " << pooling_elements() << ", step = " << step()
277             << ", input offset = " << input_offset();
278           ASSERT_EQ(index_ref[x * channels() + c], index[x * channels() + c])
279             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
280             << ", pooling elements = " << pooling_elements() << ", step = " << step()
281             << ", input offset = " << input_offset();
282         }
283       }
284     }
285   }
286 
287  private:
288   size_t output_pixels_{1};
289   size_t pooling_elements_{1};
290   size_t channels_{1};
291   size_t input_offset_{0};
292   size_t step_{1};
293   size_t primary_pooling_tile_{1};
294   size_t incremental_pooling_tile_{1};
295   size_t output_stride_{0};
296   size_t iterations_{3};
297 };
298