• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #pragma once
10 
11 #include <gtest/gtest.h>
12 
13 #include <algorithm>
14 #include <cassert>
15 #include <cstddef>
16 #include <cstdlib>
17 #include <functional>
18 #include <random>
19 #include <vector>
20 
21 #include <xnnpack.h>
22 #include <xnnpack/params-init.h>
23 #include <xnnpack/params.h>
24 
25 
26 class MaxPoolMicrokernelTester {
27  public:
28   enum class Variant {
29     Native,
30     Scalar,
31   };
32 
output_pixels(size_t output_pixels)33   inline MaxPoolMicrokernelTester& output_pixels(size_t output_pixels) {
34     assert(output_pixels != 0);
35     this->output_pixels_ = output_pixels;
36     return *this;
37   }
38 
output_pixels()39   inline size_t output_pixels() const {
40     return this->output_pixels_;
41   }
42 
step(size_t step)43   inline MaxPoolMicrokernelTester& step(size_t step) {
44     assert(step != 0);
45     this->step_ = step;
46     return *this;
47   }
48 
step()49   inline size_t step() const {
50     return this->step_;
51   }
52 
input_offset(size_t input_offset)53   inline MaxPoolMicrokernelTester& input_offset(size_t input_offset) {
54     assert(input_offset != 0);
55     this->input_offset_ = input_offset;
56     return *this;
57   }
58 
input_offset()59   inline size_t input_offset() const {
60     return this->input_offset_;
61   }
62 
pooling_elements(size_t pooling_elements)63   inline MaxPoolMicrokernelTester& pooling_elements(size_t pooling_elements) {
64     assert(pooling_elements != 0);
65     this->pooling_elements_ = pooling_elements;
66     return *this;
67   }
68 
pooling_elements()69   inline size_t pooling_elements() const {
70     return this->pooling_elements_;
71   }
72 
packed_pooling_elements()73   inline size_t packed_pooling_elements() const {
74     if (pooling_elements() <= primary_pooling_tile()) {
75       return primary_pooling_tile();
76     } else {
77       return (pooling_elements() - primary_pooling_tile()) % incremental_pooling_tile() == 0 ? pooling_elements() : ((pooling_elements() - primary_pooling_tile()) / incremental_pooling_tile() + 1) * incremental_pooling_tile() + primary_pooling_tile();
78     }
79   }
80 
pooling_tile(size_t primary_tile,size_t incremental_tile)81   inline MaxPoolMicrokernelTester& pooling_tile(size_t primary_tile, size_t incremental_tile) {
82     assert(primary_tile != 0);
83     this->primary_pooling_tile_ = primary_tile;
84     this->incremental_pooling_tile_ = incremental_tile;
85     return *this;
86   }
87 
primary_pooling_tile(size_t primary_pooling_tile)88   inline MaxPoolMicrokernelTester& primary_pooling_tile(size_t primary_pooling_tile) {
89     assert(primary_pooling_tile != 0);
90     this->primary_pooling_tile_ = primary_pooling_tile;
91     return *this;
92   }
93 
primary_pooling_tile()94   inline size_t primary_pooling_tile() const {
95     return this->primary_pooling_tile_;
96   }
97 
incremental_pooling_tile(size_t incremental_pooling_tile)98   inline MaxPoolMicrokernelTester& incremental_pooling_tile(size_t incremental_pooling_tile) {
99     assert(incremental_pooling_tile != 0);
100     this->incremental_pooling_tile_ = incremental_pooling_tile;
101     return *this;
102   }
103 
incremental_pooling_tile()104   inline size_t incremental_pooling_tile() const {
105     return this->incremental_pooling_tile_;
106   }
107 
channels(size_t channels)108   inline MaxPoolMicrokernelTester& channels(size_t channels) {
109     assert(channels != 0);
110     this->channels_ = channels;
111     return *this;
112   }
113 
channels()114   inline size_t channels() const {
115     return this->channels_;
116   }
117 
output_stride(size_t output_stride)118   inline MaxPoolMicrokernelTester& output_stride(size_t output_stride) {
119     assert(output_stride != 0);
120     this->output_stride_ = output_stride;
121     return *this;
122   }
123 
output_stride()124   inline size_t output_stride() const {
125     if (this->output_stride_ == 0) {
126       return channels();
127     } else {
128       assert(this->output_stride_ >= channels());
129       return this->output_stride_;
130     }
131   }
132 
qmin(uint8_t qmin)133   inline MaxPoolMicrokernelTester& qmin(uint8_t qmin) {
134     this->qmin_ = qmin;
135     return *this;
136   }
137 
qmin()138   inline uint8_t qmin() const {
139     return this->qmin_;
140   }
141 
qmax(uint8_t qmax)142   inline MaxPoolMicrokernelTester& qmax(uint8_t qmax) {
143     this->qmax_ = qmax;
144     return *this;
145   }
146 
qmax()147   inline uint8_t qmax() const {
148     return this->qmax_;
149   }
150 
iterations(size_t iterations)151   inline MaxPoolMicrokernelTester& iterations(size_t iterations) {
152     this->iterations_ = iterations;
153     return *this;
154   }
155 
iterations()156   inline size_t iterations() const {
157     return this->iterations_;
158   }
159 
160   void Test(xnn_u8_maxpool_ukernel_function maxpool, Variant variant = Variant::Native) const {
161     std::random_device random_device;
162     auto rng = std::mt19937(random_device());
163     auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
164 
165     std::vector<const uint8_t*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements());
166     std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +
167       indirect_input.size() * channels());
168     std::vector<uint8_t> output(XNN_EXTRA_BYTES / sizeof(uint8_t) +
169       (output_pixels() - 1) * output_stride() + channels());
170     std::vector<uint8_t> output_ref(output_pixels() * channels());
171     for (size_t iteration = 0; iteration < iterations(); iteration++) {
172       do {
173         std::generate(input.begin(), input.end(), std::ref(u8rng));
174       } while (input.size() > 1 && *std::max_element(input.cbegin(), input.cend()) == *std::min_element(input.cbegin(), input.cend()));
175       std::fill(output.begin(), output.end(), 0xA5);
176 
177       for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) {
178         indirect_input[i] = input.data() + i * channels() - input_offset();
179       }
180       std::shuffle(indirect_input.begin(),
181         indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng);
182 
183       // Prepare output parameters.
184       xnn_u8_output_params output_params = { };
185       switch (variant) {
186         case Variant::Native:
187           output_params = xnn_init_u8_output_params(qmin(), qmax());
188           break;
189         case Variant::Scalar:
190           output_params = xnn_init_scalar_u8_output_params(qmin(), qmax());
191           break;
192       }
193 
194       // Compute reference results.
195       for (size_t x = 0; x < output_pixels(); x++) {
196         for (size_t c = 0; c < channels(); c++) {
197           uint8_t max_value = 0;
198           for (size_t p = 0; p < pooling_elements(); p++) {
199             max_value = std::max(max_value, indirect_input[x * step() + p][c + input_offset()]);
200           }
201           max_value = std::min(max_value, qmax());
202           max_value = std::max(max_value, qmin());
203           output_ref[x * channels() + c] = max_value;
204         }
205       }
206 
207       // Call optimized micro-kernel.
208       maxpool(output_pixels(), pooling_elements(), channels(),
209         indirect_input.data(), input_offset() * sizeof(uint8_t), output.data(),
210         (step() - packed_pooling_elements()) * sizeof(void*),
211         (output_stride() - channels()) * sizeof(uint8_t),
212         &output_params);
213 
214       // Verify results.
215       for (size_t x = 0; x < output_pixels(); x++) {
216         for (size_t c = 0; c < channels(); c++) {
217           ASSERT_GE(uint32_t(output[x * output_stride() + c]), uint32_t(qmin()))
218             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
219             << ", pooling elements = " << pooling_elements() << ", step = " << step()
220             << ", input offset = " << input_offset();
221           ASSERT_LE(uint32_t(output[x * output_stride() + c]), uint32_t(qmax()))
222             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
223             << ", pooling elements = " << pooling_elements() << ", step = " << step()
224             << ", input offset = " << input_offset();
225           ASSERT_EQ(uint32_t(output_ref[x * channels() + c]), uint32_t(output[x * output_stride() + c]))
226             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
227             << ", pooling elements = " << pooling_elements() << ", step = " << step()
228             << ", input offset = " << input_offset();
229         }
230       }
231     }
232   }
233 
234   void Test(xnn_f32_maxpool_ukernel_function maxpool, Variant variant = Variant::Native) const {
235     std::random_device random_device;
236     auto rng = std::mt19937(random_device());
237     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
238 
239     std::vector<const float*> indirect_input((output_pixels() - 1) * step() + packed_pooling_elements());
240     std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +
241       ((output_pixels() - 1) * step() + pooling_elements()) * channels());
242     std::vector<float> output(XNN_EXTRA_BYTES / sizeof(float) +
243       (output_pixels() - 1) * output_stride() + channels());
244     std::vector<float> output_ref(output_pixels() * channels());
245     for (size_t iteration = 0; iteration < iterations(); iteration++) {
246       std::generate(input.begin(), input.end(), std::ref(f32rng));
247       std::fill(output.begin(), output.end(), nanf(""));
248 
249       for (size_t i = 0; i < (output_pixels() - 1) * step() + pooling_elements(); i++) {
250         indirect_input[i] = input.data() + i * channels() - input_offset();
251       }
252       std::shuffle(indirect_input.begin(),
253         indirect_input.begin() + (output_pixels() - 1) * step() + pooling_elements(), rng);
254 
255       // Compute reference results, without clamping.
256       for (size_t x = 0; x < output_pixels(); x++) {
257         for (size_t c = 0; c < channels(); c++) {
258           float max_value = -std::numeric_limits<float>::infinity();
259           for (size_t p = 0; p < pooling_elements(); p++) {
260             max_value = std::max(max_value, indirect_input[x * step() + p][c + input_offset()]);
261           }
262           output_ref[x * channels() + c] = max_value;
263         }
264       }
265 
266       // Compute clamping parameters.
267       const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
268       const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
269       const float accumulated_range = accumulated_max - accumulated_min;
270       const float output_min = accumulated_min + float(qmin()) / 255.0f * accumulated_range;
271       const float output_max = accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range;
272 
273 
274       // Prepare output parameters.
275       xnn_f32_output_params output_params = { };
276       switch (variant) {
277         case Variant::Native:
278           output_params = xnn_init_f32_output_params(output_min, output_max);
279           break;
280         case Variant::Scalar:
281           output_params = xnn_init_scalar_f32_output_params(output_min, output_max);
282           break;
283       }
284 
285       // Clamp reference results.
286       for (float& output_value : output_ref) {
287         output_value = std::max(std::min(output_value, output_max), output_min);
288       }
289 
290       // Call optimized micro-kernel.
291       maxpool(output_pixels(), pooling_elements(), channels(),
292         indirect_input.data(), input_offset() * sizeof(float), output.data(),
293         (step() - packed_pooling_elements()) * sizeof(void*),
294         (output_stride() - channels()) * sizeof(float),
295         &output_params);
296 
297       // Verify results.
298       for (size_t x = 0; x < output_pixels(); x++) {
299         for (size_t c = 0; c < channels(); c++) {
300           ASSERT_GE(output[x * output_stride() + c], output_min)
301             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
302             << ", pooling elements = " << pooling_elements() << ", step = " << step()
303             << ", input offset = " << input_offset();
304           ASSERT_LE(output[x * output_stride() + c], output_max)
305             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
306             << ", pooling elements = " << pooling_elements() << ", step = " << step()
307             << ", input offset = " << input_offset();
308           ASSERT_EQ(output_ref[x * channels() + c], output[x * output_stride() + c])
309             << "at pixel " << x << " / " << output_pixels() << ", channel " << c << " / " << channels()
310             << ", pooling elements = " << pooling_elements() << ", step = " << step()
311             << ", input offset = " << input_offset();
312         }
313       }
314     }
315   }
316 
317  private:
318   size_t output_pixels_{1};
319   size_t pooling_elements_{1};
320   size_t channels_{1};
321   size_t input_offset_{0};
322   size_t step_{1};
323   size_t primary_pooling_tile_{1};
324   size_t incremental_pooling_tile_{1};
325   size_t output_stride_{0};
326   uint8_t qmin_{0};
327   uint8_t qmax_{255};
328   size_t iterations_{3};
329 };
330