• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #pragma once
10 
11 #include <gtest/gtest.h>
12 
13 #include <algorithm>
14 #include <cassert>
15 #include <cmath>
16 #include <cstddef>
17 #include <cstdlib>
18 #include <functional>
19 #include <random>
20 #include <vector>
21 
22 #include <xnnpack.h>
23 #include <xnnpack/AlignedAllocator.h>
24 #include <xnnpack/pack.h>
25 #include <xnnpack/params-init.h>
26 #include <xnnpack/params.h>
27 #include <xnnpack/requantization.h>
28 
29 
30 class DWConvMicrokernelTester {
31  public:
32   enum class Variant {
33     Native,
34     Scalar,
35   };
36 
width(uint32_t width)37   inline DWConvMicrokernelTester& width(uint32_t width) {
38     assert(width >= 1);
39     this->width_ = width;
40     return *this;
41   }
42 
width()43   inline uint32_t width() const {
44     return this->width_;
45   }
46 
step(uint32_t step)47   inline DWConvMicrokernelTester& step(uint32_t step) {
48     assert(step >= 1);
49     this->step_ = step;
50     return *this;
51   }
52 
step()53   inline uint32_t step() const {
54     return this->step_;
55   }
56 
channels(uint32_t channels)57   inline DWConvMicrokernelTester& channels(uint32_t channels) {
58     assert(channels >= 1);
59     this->channels_ = channels;
60     return *this;
61   }
62 
channels()63   inline uint32_t channels() const {
64     return this->channels_;
65   }
66 
cr(uint32_t cr)67   inline DWConvMicrokernelTester& cr(uint32_t cr) {
68     assert(cr != 0);
69     assert((cr & (cr - 1)) == 0);
70     this->cr_ = cr;
71     return *this;
72   }
73 
cr()74   inline uint32_t cr() const {
75     return this->cr_;
76   }
77 
kr(uint32_t kr)78   inline DWConvMicrokernelTester& kr(uint32_t kr) {
79     assert(kr != 0);
80     this->kr_ = kr;
81     return *this;
82   }
83 
kr()84   inline uint32_t kr() const {
85     return this->kr_;
86   }
87 
packed_channels()88   inline uint32_t packed_channels() const {
89     return (channels() / cr() + !!(channels() % cr())) * cr();
90   }
91 
output_stride(uint32_t output_stride)92   inline DWConvMicrokernelTester& output_stride(uint32_t output_stride) {
93     assert(output_stride != 0);
94     this->output_stride_ = output_stride;
95     return *this;
96   }
97 
output_stride()98   inline uint32_t output_stride() const {
99     if (this->output_stride_ == 0) {
100       return channels();
101     } else {
102       assert(this->output_stride_ >= channels());
103       return this->output_stride_;
104     }
105   }
106 
input_zero_point(uint8_t input_zero_point)107   inline DWConvMicrokernelTester& input_zero_point(uint8_t input_zero_point) {
108     this->input_zero_point_ = input_zero_point;
109     return *this;
110   }
111 
input_zero_point()112   inline uint8_t input_zero_point() const {
113     return this->input_zero_point_;
114   }
115 
kernel_zero_point(uint8_t kernel_zero_point)116   inline DWConvMicrokernelTester& kernel_zero_point(uint8_t kernel_zero_point) {
117     this->kernel_zero_point_ = kernel_zero_point;
118     return *this;
119   }
120 
kernel_zero_point()121   inline uint8_t kernel_zero_point() const {
122     return this->kernel_zero_point_;
123   }
124 
qmin(uint8_t qmin)125   inline DWConvMicrokernelTester& qmin(uint8_t qmin) {
126     this->qmin_ = qmin;
127     return *this;
128   }
129 
qmin()130   inline uint8_t qmin() const {
131     return this->qmin_;
132   }
133 
qmax(uint8_t qmax)134   inline DWConvMicrokernelTester& qmax(uint8_t qmax) {
135     this->qmax_ = qmax;
136     return *this;
137   }
138 
qmax()139   inline uint8_t qmax() const {
140     return this->qmax_;
141   }
142 
iterations(size_t iterations)143   inline DWConvMicrokernelTester& iterations(size_t iterations) {
144     this->iterations_ = iterations;
145     return *this;
146   }
147 
iterations()148   inline size_t iterations() const {
149     return this->iterations_;
150   }
151 
152   void Test(xnn_q8_dwconv_up_ukernel_function dwconv, Variant variant = Variant::Native) const {
153     std::random_device random_device;
154     auto rng = std::mt19937(random_device());
155     auto s32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);
156     auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
157 
158     std::vector<const uint8_t*> indirection((width() - 1) * step() + kr());
159     std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + indirection.size() * channels());
160     std::vector<uint8_t> kernel(channels() * kr());
161     std::vector<int32_t> bias(channels());
162     std::vector<uint8_t, AlignedAllocator<uint8_t, 64>> packed_weights((kr() + sizeof(int32_t) / sizeof(uint8_t)) * packed_channels());
163     std::vector<uint8_t> output((width() - 1) * output_stride() + channels());
164     std::vector<int32_t> accumulators(width() * channels());
165     std::vector<uint8_t> output_ref(width() * channels());
166 
167     for (size_t iteration = 0; iteration < iterations(); iteration++) {
168       do {
169         std::generate(input.begin(), input.end(), std::ref(u8rng));
170       } while (input.size() > 1 && *std::max_element(input.cbegin(), input.cend()) == *std::min_element(input.cbegin(), input.cend()));
171       do {
172         std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
173       } while (kernel.size() > 1 && *std::max_element(kernel.cbegin(), kernel.cend()) == *std::min_element(kernel.cbegin(), kernel.cend()));
174       std::generate(bias.begin(), bias.end(), std::ref(s32rng));
175       std::fill(output.begin(), output.end(), 0xA5);
176 
177       std::fill(packed_weights.begin(), packed_weights.end(), 0);
178       xnn_pack_q8_dwconv_ghw_w(
179         kr(), 1, channels(), cr(),
180         input_zero_point(), kernel_zero_point(),
181         kernel.data(), bias.data(), packed_weights.data());
182       for (size_t i = 0; i < indirection.size(); i++) {
183         indirection[i] = input.data() + i * channels();
184       }
185       std::shuffle(indirection.begin(), indirection.end(), rng);
186 
187       // Compute reference results, without renormalization.
188       for (size_t x = 0; x < width(); x++) {
189         for (size_t c = 0; c < channels(); c++) {
190           float acc = bias[c];
191           for (size_t k = 0; k < kr(); k++) {
192             acc +=
193               (int32_t(indirection[x * step() + k][c]) - int32_t(input_zero_point())) *
194               (int32_t(kernel[c * kr() + k]) - int32_t(kernel_zero_point()));
195           }
196           accumulators[x * channels() + c] = acc;
197         }
198       }
199 
200       // Compute renormalization parameters.
201       const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());
202       const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());
203       const uint32_t accumulated_range = uint32_t(accumulated_max) - uint32_t(accumulated_min);
204       const double output_scale = accumulated_range >= 256 ? double(accumulated_range) / 255.0 : 1.00001;
205       const uint8_t output_zero_point = uint8_t(std::max(std::min(
206         lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),
207         long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));
208 
209       // Prepare convolution parameters.
210       const float requantization_scale = 1.0f / float(output_scale);
211       union xnn_q8_gemm_params quantization_params = { };
212       switch (variant) {
213         case Variant::Native:
214           quantization_params = xnn_init_q8_gemm_params(
215             input_zero_point(), kernel_zero_point(),
216             requantization_scale, output_zero_point, qmin(), qmax());
217           break;
218         case Variant::Scalar:
219           quantization_params = xnn_init_scalar_q8_gemm_params(
220             input_zero_point(), kernel_zero_point(),
221             requantization_scale, output_zero_point, qmin(), qmax());
222           break;
223       }
224       const union xnn_q31_requantization_params scalar_requantization_params =
225         xnn_init_scalar_requantization_params(
226           requantization_scale, output_zero_point, qmin(), qmax());
227 
228       // Renormalize reference results.
229       for (size_t x = 0; x < width(); x++) {
230         for (size_t c = 0; c < channels(); c++) {
231           output_ref[x * channels() + c] = xnn_q31_requantize(accumulators[x * channels() + c], scalar_requantization_params);
232         }
233       }
234 
235       // Call optimized micro-kernel.
236       dwconv(
237         channels(), width(),
238         indirection.data(), packed_weights.data(), output.data(),
239         step() * sizeof(void*),
240         (output_stride() - channels()) * sizeof(uint8_t),
241         &quantization_params);
242 
243       // Verify results.
244       for (size_t x = 0; x < width(); x++) {
245         for (size_t c = 0; c < channels(); c++) {
246           ASSERT_GE(uint32_t(output[x * output_stride() + c]), uint32_t(qmin()))
247             << "x = " << x << ", channel = " << c;
248           ASSERT_LE(uint32_t(output[x * output_stride() + c]), uint32_t(qmax()))
249             << "x = " << x << ", channel = " << c;
250           ASSERT_EQ(uint32_t(output[x * output_stride() + c]), uint32_t(output_ref[x * channels() + c]))
251             << "x = " << x << ", channel = " << c << ", accumulator = " << accumulators[x * channels() + c];
252         }
253       }
254     }
255   }
256 
257   void Test(xnn_f32_dwconv_up_ukernel_function dwconv, Variant variant = Variant::Native) const {
258     std::random_device random_device;
259     auto rng = std::mt19937(random_device());
260     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
261 
262     std::vector<const float*> indirection((width() - 1) * step() + kr());
263     std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + indirection.size() * channels());
264     std::vector<float> kernel(channels() * kr());
265     std::vector<float> bias(channels());
266     std::vector<float, AlignedAllocator<float, 64>> packed_weights((kr() + 1) * packed_channels());
267     std::vector<float> output((width() - 1) * output_stride() + channels());
268     std::vector<float> output_ref(width() * channels());
269 
270     for (size_t iteration = 0; iteration < iterations(); iteration++) {
271       std::generate(input.begin(), input.end(), std::ref(f32rng));
272       std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));
273       std::generate(bias.begin(), bias.end(), std::ref(f32rng));
274       std::fill(output_ref.begin(), output_ref.end(), nanf(""));
275       std::fill(output.begin(), output.end(), nanf(""));
276 
277       std::fill(packed_weights.begin(), packed_weights.end(), 0.0f);
278       xnn_pack_f32_dwconv_ghw_w(
279         kr(), 1, channels(), cr(),
280         kernel.data(), bias.data(), packed_weights.data());
281       for (size_t i = 0; i < indirection.size(); i++) {
282         indirection[i] = input.data() + i * channels();
283       }
284       std::shuffle(indirection.begin(), indirection.end(), rng);
285 
286       // Compute reference results, without clamping.
287       for (size_t x = 0; x < width(); x++) {
288         for (size_t c = 0; c < channels(); c++) {
289           float acc = bias[c];
290           for (size_t k = 0; k < kr(); k++) {
291             acc += indirection[x * step() + k][c] * kernel[c * kr() + k];
292           }
293           output_ref[x * channels() + c] = acc;
294         }
295       }
296 
297       // Compute clamping parameters.
298       const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
299       const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
300       const float accumulated_range = accumulated_max - accumulated_min;
301       const float output_min = accumulated_min + accumulated_range / 255.0f * float(qmin());
302       const float output_max = accumulated_max - accumulated_range / 255.0f * float(255 - qmax());
303 
304       // Prepare output parameters.
305       xnn_f32_output_params output_params = { };
306       switch (variant) {
307         case Variant::Native:
308           output_params = xnn_init_f32_output_params(output_min, output_max);
309           break;
310         case Variant::Scalar:
311           output_params = xnn_init_scalar_f32_output_params(output_min, output_max);
312           break;
313       }
314 
315       // Clamp reference results.
316       for (float& output_val : output_ref) {
317         output_val = std::max(std::min(output_val, output_max), output_min);
318       }
319 
320       // Call optimized micro-kernel.
321       dwconv(
322         channels(), width(),
323         indirection.data(), packed_weights.data(), output.data(),
324         step() * sizeof(void*),
325         (output_stride() - channels()) * sizeof(float),
326         &output_params);
327 
328       // Verify results.
329       for (size_t x = 0; x < width(); x++) {
330         for (size_t c = 0; c < channels(); c++) {
331           ASSERT_GE(output[x * output_stride() + c], output_min)
332             << "x = " << x << ", channel = " << c;
333           ASSERT_LE(output[x * output_stride() + c], output_max)
334             << "x = " << x << ", channel = " << c;
335           ASSERT_NEAR(
336               output_ref[x * channels() + c],
337               output[x * output_stride() + c],
338               std::abs(output_ref[x * channels() + c]) * 1.0e-5)
339             << "x = " << x << ", channel = " << c;
340         }
341       }
342     }
343   }
344 
345  private:
346   uint32_t channels_{1};
347   uint32_t cr_{1};
348   uint32_t kr_{1};
349   uint32_t width_{1};
350   uint32_t step_{1};
351   uint32_t output_stride_{0};
352   uint8_t input_zero_point_{127};
353   uint8_t kernel_zero_point_{127};
354   uint8_t qmin_{0};
355   uint8_t qmax_{255};
356   size_t iterations_{3};
357 };
358