• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <gtest/gtest.h>
9 
10 #include <algorithm>
11 #include <cassert>
12 #include <cmath>
13 #include <cstddef>
14 #include <cstdlib>
15 #include <functional>
16 #include <limits>
17 #include <random>
18 #include <vector>
19 
20 #include <fp16.h>
21 
22 #include <xnnpack.h>
23 #include <xnnpack/params.h>
24 #include <xnnpack/params-init.h>
25 
26 
27 class VCvtMicrokernelTester {
28  public:
batch_size(size_t batch_size)29   inline VCvtMicrokernelTester& batch_size(size_t batch_size) {
30     assert(batch_size != 0);
31     this->batch_size_ = batch_size;
32     return *this;
33   }
34 
batch_size()35   inline size_t batch_size() const {
36     return this->batch_size_;
37   }
38 
scale(float scale)39   inline VCvtMicrokernelTester& scale(float scale) {
40     assert(scale > 0.0f);
41     assert(std::isnormal(scale));
42     this->scale_ = scale;
43     return *this;
44   }
45 
scale()46   inline float scale() const {
47     return this->scale_;
48   }
49 
zero_point(int16_t zero_point)50   inline VCvtMicrokernelTester& zero_point(int16_t zero_point) {
51     this->zero_point_ = zero_point;
52     return *this;
53   }
54 
zero_point()55   inline int16_t zero_point() const {
56     return this->zero_point_;
57   }
58 
qmin(int16_t qmin)59   inline VCvtMicrokernelTester& qmin(int16_t qmin) {
60     this->qmin_ = qmin;
61     return *this;
62   }
63 
qmin()64   inline int16_t qmin() const {
65     return this->qmin_;
66   }
67 
qmax(int16_t qmax)68   inline VCvtMicrokernelTester& qmax(int16_t qmax) {
69     this->qmax_ = qmax;
70     return *this;
71   }
72 
qmax()73   inline int16_t qmax() const {
74     return this->qmax_;
75   }
76 
iterations(size_t iterations)77   inline VCvtMicrokernelTester& iterations(size_t iterations) {
78     this->iterations_ = iterations;
79     return *this;
80   }
81 
iterations()82   inline size_t iterations() const {
83     return this->iterations_;
84   }
85 
86   void Test(xnn_f16_f32_vcvt_ukernel_function vcvt, xnn_init_f16_f32_cvt_params_fn init_params = nullptr) const {
87     std::random_device random_device;
88     auto rng = std::mt19937(random_device());
89     auto distribution = std::uniform_real_distribution<float>(-100.0f, 100.0f);
90     auto f32rng = std::bind(distribution, std::ref(rng));
91     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
92 
93     std::vector<uint16_t> input(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
94     std::vector<float> output(batch_size());
95     for (size_t iteration = 0; iteration < iterations(); iteration++) {
96       std::generate(input.begin(), input.end(), std::ref(f16rng));
97       std::fill(output.begin(), output.end(), nanf(""));
98 
99       union xnn_f16_f32_cvt_params params;
100       if (init_params) {
101         init_params(&params);
102       }
103 
104       // Call optimized micro-kernel.
105       vcvt(batch_size() * sizeof(uint16_t), input.data(), output.data(), &params);
106 
107       // Verify results.
108       for (size_t i = 0; i < batch_size(); i++) {
109         ASSERT_EQ(fp32_to_bits(output[i]), fp32_to_bits(fp16_ieee_to_fp32_value(input[i])))
110           << "at " << i << " / " << batch_size()
111           << ", x[" << i << "] = 0x" << std::hex << std::setw(4) << std::setfill('0') << input[i];
112       }
113     }
114   }
115 
116   void Test(xnn_f32_f16_vcvt_ukernel_function vcvt, xnn_init_f32_f16_cvt_params_fn init_params = nullptr) const {
117     std::random_device random_device;
118     auto rng = std::mt19937(random_device());
119     auto distribution = std::uniform_real_distribution<float>(-100.0f, 100.0f);
120     auto f32rng = std::bind(distribution, std::ref(rng));
121 
122     std::vector<float> input(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
123     std::vector<uint16_t> output(batch_size());
124     for (size_t iteration = 0; iteration < iterations(); iteration++) {
125       std::generate(input.begin(), input.end(), std::ref(f32rng));
126       std::fill(output.begin(), output.end(), UINT16_C(0x7E));
127 
128       union xnn_f32_f16_cvt_params params;
129       if (init_params) {
130         init_params(&params);
131       }
132 
133       // Call optimized micro-kernel.
134       vcvt(batch_size() * sizeof(float), input.data(), output.data(), &params);
135 
136       // Verify results.
137       for (size_t i = 0; i < batch_size(); i++) {
138         ASSERT_EQ(output[i], fp16_ieee_from_fp32_value(input[i]))
139           << "at " << i << " / " << batch_size()
140           << ", x[" << i << "] = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(input[i])
141           << " (" << input[i] << ")";
142       }
143     }
144   }
145 
Test(xnn_f32_qs8_vcvt_ukernel_function vcvt,xnn_init_f32_qs8_cvt_params_fn init_params)146   void Test(xnn_f32_qs8_vcvt_ukernel_function vcvt, xnn_init_f32_qs8_cvt_params_fn init_params) const {
147     ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min());
148     ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max());
149     ASSERT_LT(qmin(), qmax());
150 
151     ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min());
152     ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max());
153 
154     std::random_device random_device;
155     auto rng = std::mt19937(random_device());
156     auto distribution = std::uniform_real_distribution<float>(-1.0f, 1.0f);
157     auto f32rng = std::bind(distribution, std::ref(rng));
158 
159     std::vector<float> input(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
160     std::vector<int8_t> output(batch_size());
161     std::vector<int8_t> output_ref(batch_size());
162     for (size_t iteration = 0; iteration < iterations(); iteration++) {
163       std::generate(input.begin(), input.end(), std::ref(f32rng));
164       std::fill(output.begin(), output.end(), INT8_C(0xA5));
165 
166       union xnn_f32_qs8_cvt_params params;
167       if (init_params) {
168         init_params(&params, scale(), zero_point(), qmin(), qmax());
169       }
170 
171       // Call optimized micro-kernel.
172       vcvt(batch_size() * sizeof(float), input.data(), output.data(), &params);
173 
174       // Compute reference results
175       for (size_t i = 0; i < batch_size(); i++) {
176         float scaled_input = input[i] * scale();
177         scaled_input = std::min<float>(scaled_input, float(qmax() - zero_point()));
178         scaled_input = std::max<float>(scaled_input, float(qmin() - zero_point()));
179         output_ref[i] = int8_t(std::lrintf(scaled_input) + long(zero_point()));
180       }
181 
182       // Verify results.
183       for (size_t i = 0; i < batch_size(); i++) {
184         ASSERT_EQ(int32_t(output[i]), int32_t(output_ref[i]))
185           << "at " << i << " / " << batch_size()
186           << ", x[" << i << "] = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(input[i])
187           << " (" << input[i] << ")";
188       }
189     }
190   }
191 
Test(xnn_f32_qu8_vcvt_ukernel_function vcvt,xnn_init_f32_qu8_cvt_params_fn init_params)192   void Test(xnn_f32_qu8_vcvt_ukernel_function vcvt, xnn_init_f32_qu8_cvt_params_fn init_params) const {
193     ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min());
194     ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max());
195     ASSERT_LT(qmin(), qmax());
196 
197     ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min());
198     ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max());
199 
200     std::random_device random_device;
201     auto rng = std::mt19937(random_device());
202     auto distribution = std::uniform_real_distribution<float>(-1.0f, 1.0f);
203     auto f32rng = std::bind(distribution, std::ref(rng));
204 
205     std::vector<float> input(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
206     std::vector<uint8_t> output(batch_size());
207     std::vector<uint8_t> output_ref(batch_size());
208     for (size_t iteration = 0; iteration < iterations(); iteration++) {
209       std::generate(input.begin(), input.end(), std::ref(f32rng));
210       std::fill(output.begin(), output.end(), UINT8_C(0xA5));
211 
212       union xnn_f32_qu8_cvt_params params;
213       init_params(&params, scale(), zero_point(), qmin(), qmax());
214 
215       // Call optimized micro-kernel.
216       vcvt(batch_size() * sizeof(float), input.data(), output.data(), &params);
217 
218       // Compute reference results
219       for (size_t i = 0; i < batch_size(); i++) {
220         float scaled_input = input[i] * scale();
221         scaled_input = std::min<float>(scaled_input, float(qmax() - zero_point()));
222         scaled_input = std::max<float>(scaled_input, float(qmin() - zero_point()));
223         output_ref[i] = uint8_t(std::lrintf(scaled_input) + long(zero_point()));
224       }
225 
226       // Verify results.
227       for (size_t i = 0; i < batch_size(); i++) {
228         ASSERT_EQ(int32_t(output[i]), int32_t(output_ref[i]))
229           << "at " << i << " / " << batch_size()
230           << ", x[" << i << "] = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(input[i])
231           << " (" << input[i] << ")";
232       }
233     }
234   }
235 
Test(xnn_qs8_f32_vcvt_ukernel_function vcvt,xnn_init_qs8_f32_cvt_params_fn init_params)236   void Test(xnn_qs8_f32_vcvt_ukernel_function vcvt, xnn_init_qs8_f32_cvt_params_fn init_params) const {
237     ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min());
238     ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max());
239 
240     std::random_device random_device;
241     auto rng = std::mt19937(random_device());
242     auto distribution =
243       std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max());
244     auto i8rng = std::bind(distribution, std::ref(rng));
245 
246     std::vector<int8_t> input(batch_size() + XNN_EXTRA_BYTES / sizeof(int8_t));
247     std::vector<float> output(batch_size());
248     std::vector<float> output_ref(batch_size());
249     for (size_t iteration = 0; iteration < iterations(); iteration++) {
250       std::generate(input.begin(), input.end(), std::ref(i8rng));
251       std::fill(output.begin(), output.end(), std::nanf(""));
252 
253       union xnn_qs8_f32_cvt_params params;
254       init_params(&params, scale(), zero_point());
255 
256       // Call optimized micro-kernel.
257       vcvt(batch_size() * sizeof(int8_t), input.data(), output.data(), &params);
258 
259       // Compute reference results
260       for (size_t i = 0; i < batch_size(); i++) {
261         output_ref[i] = float(int16_t(input[i]) - zero_point()) * scale();
262       }
263 
264       // Verify results.
265       for (size_t i = 0; i < batch_size(); i++) {
266         ASSERT_EQ(output[i], output_ref[i])
267           << "at " << i << " / " << batch_size()
268           << ", x[" << i << "] = " << int32_t(input[i]);
269       }
270     }
271   }
272 
Test(xnn_qu8_f32_vcvt_ukernel_function vcvt,xnn_init_qu8_f32_cvt_params_fn init_params)273   void Test(xnn_qu8_f32_vcvt_ukernel_function vcvt, xnn_init_qu8_f32_cvt_params_fn init_params) const {
274     ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min());
275     ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max());
276 
277     std::random_device random_device;
278     auto rng = std::mt19937(random_device());
279     auto distribution =
280       std::uniform_int_distribution<int32_t>(std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max());
281     auto u8rng = std::bind(distribution, std::ref(rng));
282 
283     std::vector<uint8_t> input(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t));
284     std::vector<float> output(batch_size());
285     std::vector<float> output_ref(batch_size());
286     for (size_t iteration = 0; iteration < iterations(); iteration++) {
287       std::generate(input.begin(), input.end(), std::ref(u8rng));
288       std::fill(output.begin(), output.end(), std::nanf(""));
289 
290       union xnn_qu8_f32_cvt_params params;
291       init_params(&params, scale(), zero_point());
292 
293       // Call optimized micro-kernel.
294       vcvt(batch_size() * sizeof(uint8_t), input.data(), output.data(), &params);
295 
296       // Compute reference results
297       for (size_t i = 0; i < batch_size(); i++) {
298         output_ref[i] = float(int16_t(input[i]) - zero_point()) * scale();
299       }
300 
301       // Verify results.
302       for (size_t i = 0; i < batch_size(); i++) {
303         ASSERT_EQ(output[i], output_ref[i])
304           << "at " << i << " / " << batch_size()
305           << ", x[" << i << "] = " << int32_t(input[i]);
306       }
307     }
308   }
309 
310  private:
311   float scale_ = 1.75f;
312   int16_t zero_point_ = 1;
313   int16_t qmin_ = std::numeric_limits<int16_t>::min();
314   int16_t qmax_ = std::numeric_limits<int16_t>::max();
315   size_t batch_size_ = 1;
316   size_t iterations_ = 15;
317 };
318