• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <gtest/gtest.h>
9 
10 #include <algorithm>
11 #include <cassert>
12 #include <cstddef>
13 #include <cstdlib>
14 #include <functional>
15 #include <random>
16 #include <vector>
17 
18 #include <fp16.h>
19 
20 #include <xnnpack.h>
21 #include <xnnpack/params-init.h>
22 #include <xnnpack/params.h>
23 
24 
25 class VUnaryMicrokernelTester {
26  public:
27   enum class OpType {
28     ReLU,
29     RoundToNearestEven,
30     RoundTowardsZero,
31     RoundUp,
32     RoundDown,
33   };
34 
35   enum class Variant {
36     Native,
37     Scalar,
38   };
39 
batch_size(size_t batch_size)40   inline VUnaryMicrokernelTester& batch_size(size_t batch_size) {
41     assert(batch_size != 0);
42     this->batch_size_ = batch_size;
43     return *this;
44   }
45 
batch_size()46   inline size_t batch_size() const {
47     return this->batch_size_;
48   }
49 
inplace(bool inplace)50   inline VUnaryMicrokernelTester& inplace(bool inplace) {
51     this->inplace_ = inplace;
52     return *this;
53   }
54 
inplace()55   inline bool inplace() const {
56     return this->inplace_;
57   }
58 
slope(float slope)59   inline VUnaryMicrokernelTester& slope(float slope) {
60     this->slope_ = slope;
61     return *this;
62   }
63 
slope()64   inline float slope() const {
65     return this->slope_;
66   }
67 
prescale(float prescale)68   inline VUnaryMicrokernelTester& prescale(float prescale) {
69     this->prescale_ = prescale;
70     return *this;
71   }
72 
prescale()73   inline float prescale() const {
74     return this->prescale_;
75   }
76 
alpha(float alpha)77   inline VUnaryMicrokernelTester& alpha(float alpha) {
78     this->alpha_ = alpha;
79     return *this;
80   }
81 
alpha()82   inline float alpha() const {
83     return this->alpha_;
84   }
85 
beta(float beta)86   inline VUnaryMicrokernelTester& beta(float beta) {
87     this->beta_ = beta;
88     return *this;
89   }
90 
beta()91   inline float beta() const {
92     return this->beta_;
93   }
94 
qmin(uint8_t qmin)95   inline VUnaryMicrokernelTester& qmin(uint8_t qmin) {
96     this->qmin_ = qmin;
97     return *this;
98   }
99 
qmin()100   inline uint8_t qmin() const {
101     return this->qmin_;
102   }
103 
qmax(uint8_t qmax)104   inline VUnaryMicrokernelTester& qmax(uint8_t qmax) {
105     this->qmax_ = qmax;
106     return *this;
107   }
108 
qmax()109   inline uint8_t qmax() const {
110     return this->qmax_;
111   }
112 
iterations(size_t iterations)113   inline VUnaryMicrokernelTester& iterations(size_t iterations) {
114     this->iterations_ = iterations;
115     return *this;
116   }
117 
iterations()118   inline size_t iterations() const {
119     return this->iterations_;
120   }
121 
122   void Test(xnn_f32_vunary_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
123     std::random_device random_device;
124     auto rng = std::mt19937(random_device());
125     auto distribution = std::uniform_real_distribution<float>(-125.0f, 125.0f);
126     auto f32rng = std::bind(distribution, std::ref(rng));
127 
128     std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
129     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
130     std::vector<double> y_ref(batch_size());
131     for (size_t iteration = 0; iteration < iterations(); iteration++) {
132       if (inplace()) {
133         std::generate(y.begin(), y.end(), std::ref(f32rng));
134       } else {
135         std::generate(x.begin(), x.end(), std::ref(f32rng));
136         std::fill(y.begin(), y.end(), nanf(""));
137       }
138       const float* x_data = inplace() ? y.data() : x.data();
139 
140       // Compute reference results.
141       for (size_t i = 0; i < batch_size(); i++) {
142         switch (op_type) {
143           case OpType::ReLU:
144             y_ref[i] = std::max(x_data[i], 0.0f);
145             break;
146           default:
147             GTEST_FAIL() << "Unexpected operation type";
148             return;
149         }
150       }
151 
152       // Call optimized micro-kernel.
153       vunary(batch_size() * sizeof(float), x_data, y.data(), nullptr);
154 
155       // Verify results.
156       for (size_t i = 0; i < batch_size(); i++) {
157         ASSERT_NEAR(y[i], y_ref[i], std::max(5.0e-6, std::abs(y_ref[i]) * 1.0e-5))
158           << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
159       }
160     }
161   }
162 
163   void Test(xnn_f32_vabs_ukernel_function vabs, xnn_init_f32_abs_params_fn init_params = nullptr) const {
164     std::random_device random_device;
165     auto rng = std::mt19937(random_device());
166     auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::ref(rng));
167 
168     std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
169     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
170     std::vector<float> y_ref(batch_size());
171     for (size_t iteration = 0; iteration < iterations(); iteration++) {
172       if (inplace()) {
173         std::generate(y.begin(), y.end(), std::ref(f32rng));
174       } else {
175         std::generate(x.begin(), x.end(), std::ref(f32rng));
176         std::fill(y.begin(), y.end(), nanf(""));
177       }
178       const float* x_data = inplace() ? y.data() : x.data();
179 
180       // Compute reference results.
181       for (size_t i = 0; i < batch_size(); i++) {
182         y_ref[i] = std::abs(x_data[i]);
183       }
184 
185       // Prepare parameters.
186       union xnn_f32_abs_params params;
187       if (init_params != nullptr) {
188         init_params(&params);
189       }
190 
191       // Call optimized micro-kernel.
192       vabs(batch_size() * sizeof(float), x_data, y.data(), &params);
193 
194       // Verify results.
195       for (size_t i = 0; i < batch_size(); i++) {
196         ASSERT_EQ(y[i], y_ref[i])
197           << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
198       }
199     }
200   }
201 
Test(xnn_f32_vclamp_ukernel_function vclamp,xnn_init_f32_minmax_params_fn init_params)202   void Test(xnn_f32_vclamp_ukernel_function vclamp, xnn_init_f32_minmax_params_fn init_params) const {
203     std::random_device random_device;
204     auto rng = std::mt19937(random_device());
205     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 255.0f), std::ref(rng));
206 
207     std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
208     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
209     std::vector<float> y_ref(batch_size());
210     for (size_t iteration = 0; iteration < iterations(); iteration++) {
211       if (inplace()) {
212         std::generate(y.begin(), y.end(), std::ref(f32rng));
213       } else {
214         std::generate(x.begin(), x.end(), std::ref(f32rng));
215         std::fill(y.begin(), y.end(), nanf(""));
216       }
217       const float* x_data = inplace() ? y.data() : x.data();
218 
219       // Compute reference results.
220       for (size_t i = 0; i < batch_size(); i++) {
221         y_ref[i] = std::max(std::min(x_data[i], float(qmax())), float(qmin()));
222       }
223 
224       // Prepare parameters.
225       union xnn_f32_minmax_params params;
226       init_params(&params, float(qmin()), float(qmax()));
227 
228       // Call optimized micro-kernel.
229       vclamp(batch_size() * sizeof(float), x_data, y.data(), &params);
230 
231       // Verify results.
232       for (size_t i = 0; i < batch_size(); i++) {
233         ASSERT_EQ(y[i], y_ref[i])
234           << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
235       }
236     }
237   }
238 
Test(xnn_f32_velu_ukernel_function velu,xnn_init_f32_elu_params_fn init_params)239   void Test(xnn_f32_velu_ukernel_function velu, xnn_init_f32_elu_params_fn init_params) const {
240     std::random_device random_device;
241     auto rng = std::mt19937(random_device());
242     auto f32rng = std::bind(std::uniform_real_distribution<float>(-20.0f, 20.0f), std::ref(rng));
243 
244     std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
245     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
246     std::vector<double> y_ref(batch_size());
247     for (size_t iteration = 0; iteration < iterations(); iteration++) {
248       if (inplace()) {
249         std::generate(y.begin(), y.end(), std::ref(f32rng));
250       } else {
251         std::generate(x.begin(), x.end(), std::ref(f32rng));
252         std::fill(y.begin(), y.end(), nanf(""));
253       }
254       const float* x_data = inplace() ? y.data() : x.data();
255 
256       // Compute reference results.
257       for (size_t i = 0; i < batch_size(); i++) {
258         y_ref[i] = std::signbit(x_data[i]) ? alpha() * std::expm1(double(x_data[i]) * prescale()) : double(x_data[i]) * beta();
259       }
260 
261       // Prepare parameters.
262       union xnn_f32_elu_params params;
263       init_params(&params, prescale(), alpha(), beta());
264 
265       // Call optimized micro-kernel.
266       velu(batch_size() * sizeof(float), x_data, y.data(), &params);
267 
268       // Verify results.
269       for (size_t i = 0; i < batch_size(); i++) {
270         ASSERT_NEAR(y[i], y_ref[i], std::max(5.0e-6, std::abs(y_ref[i]) * 1.0e-5))
271           << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
272       }
273     }
274   }
275 
Test(xnn_f32_vhswish_ukernel_function vhswish,xnn_init_f32_hswish_params_fn init_params)276   void Test(xnn_f32_vhswish_ukernel_function vhswish, xnn_init_f32_hswish_params_fn init_params) const {
277     std::random_device random_device;
278     auto rng = std::mt19937(random_device());
279     auto f32rng = std::bind(std::uniform_real_distribution<float>(-4.0f, 4.0f), std::ref(rng));
280 
281     std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
282     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
283     std::vector<double> y_ref(batch_size());
284     for (size_t iteration = 0; iteration < iterations(); iteration++) {
285       if (inplace()) {
286         std::generate(y.begin(), y.end(), std::ref(f32rng));
287       } else {
288         std::generate(x.begin(), x.end(), std::ref(f32rng));
289         std::fill(y.begin(), y.end(), nanf(""));
290       }
291       const float* x_data = inplace() ? y.data() : x.data();
292 
293       // Compute reference results.
294       for (size_t i = 0; i < batch_size(); i++) {
295         y_ref[i] = (x_data[i] / 6.0f) * std::max(std::min(x_data[i] + 3.0f, 6.0f), 0.0f);
296       }
297 
298       // Prepare parameters.
299       union xnn_f32_hswish_params params;
300       init_params(&params);
301 
302       // Call optimized micro-kernel.
303       vhswish(batch_size() * sizeof(float), x_data, y.data(), &params);
304 
305       // Verify results.
306       for (size_t i = 0; i < batch_size(); i++) {
307         ASSERT_NEAR(y[i], y_ref[i], std::max(5.0e-6, std::abs(y_ref[i]) * 1.0e-5))
308           << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
309       }
310     }
311   }
312 
Test(xnn_f32_vlrelu_ukernel_function vlrelu,xnn_init_f32_lrelu_params_fn init_params)313   void Test(xnn_f32_vlrelu_ukernel_function vlrelu, xnn_init_f32_lrelu_params_fn init_params) const {
314     std::random_device random_device;
315     auto rng = std::mt19937(random_device());
316     auto f32rng = std::bind(std::uniform_real_distribution<float>(-125.0f, 125.0f), std::ref(rng));
317 
318     std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
319     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
320     std::vector<double> y_ref(batch_size());
321     for (size_t iteration = 0; iteration < iterations(); iteration++) {
322       if (inplace()) {
323         std::generate(y.begin(), y.end(), std::ref(f32rng));
324       } else {
325         std::generate(x.begin(), x.end(), std::ref(f32rng));
326         std::fill(y.begin(), y.end(), nanf(""));
327       }
328       const float* x_data = inplace() ? y.data() : x.data();
329 
330       // Compute reference results.
331       for (size_t i = 0; i < batch_size(); i++) {
332         y_ref[i] = std::signbit(x_data[i]) ? x_data[i] * slope() : x_data[i];
333       }
334 
335       // Prepare parameters.
336       union xnn_f32_lrelu_params params;
337       init_params(&params, slope());
338 
339       // Call optimized micro-kernel.
340       vlrelu(batch_size() * sizeof(float), x_data, y.data(), &params);
341 
342       // Verify results.
343       for (size_t i = 0; i < batch_size(); i++) {
344         ASSERT_EQ(y[i], y_ref[i])
345           << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
346       }
347     }
348   }
349 
350   void Test(xnn_f32_vneg_ukernel_function vneg, xnn_init_f32_neg_params_fn init_params = nullptr) const {
351     std::random_device random_device;
352     auto rng = std::mt19937(random_device());
353     auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), std::ref(rng));
354 
355     std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
356     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
357     std::vector<float> y_ref(batch_size());
358     for (size_t iteration = 0; iteration < iterations(); iteration++) {
359       if (inplace()) {
360         std::generate(y.begin(), y.end(), std::ref(f32rng));
361       } else {
362         std::generate(x.begin(), x.end(), std::ref(f32rng));
363         std::fill(y.begin(), y.end(), nanf(""));
364       }
365       const float* x_data = inplace() ? y.data() : x.data();
366 
367       // Compute reference results.
368       for (size_t i = 0; i < batch_size(); i++) {
369         y_ref[i] = -x_data[i];
370       }
371 
372       // Prepare parameters.
373       union xnn_f32_neg_params params;
374       if (init_params != nullptr) {
375         init_params(&params);
376       }
377 
378       // Call optimized micro-kernel.
379       vneg(batch_size() * sizeof(float), x_data, y.data(), &params);
380 
381       // Verify results.
382       for (size_t i = 0; i < batch_size(); i++) {
383         ASSERT_EQ(y[i], y_ref[i])
384           << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
385       }
386     }
387   }
388 
389   void Test(xnn_f32_vround_ukernel_function vrnd, OpType op_type, xnn_init_f32_rnd_params_fn init_params = nullptr) const {
390     std::random_device random_device;
391     auto rng = std::mt19937(random_device());
392     auto distribution = std::uniform_real_distribution<float>(-5.0f, 5.0f);
393     auto f32rng = std::bind(distribution, std::ref(rng));
394 
395     std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
396     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
397     std::vector<float> y_ref(batch_size());
398     for (size_t iteration = 0; iteration < iterations(); iteration++) {
399       if (inplace()) {
400         std::generate(y.begin(), y.end(), std::ref(f32rng));
401       } else {
402         std::generate(x.begin(), x.end(), std::ref(f32rng));
403         std::fill(y.begin(), y.end(), nanf(""));
404       }
405       const float* x_data = inplace() ? y.data() : x.data();
406 
407       // Compute reference results.
408       for (size_t i = 0; i < batch_size(); i++) {
409         switch (op_type) {
410           case OpType::RoundToNearestEven:
411             y_ref[i] = std::nearbyint(double(x_data[i]));
412             break;
413           case OpType::RoundTowardsZero:
414             y_ref[i] = std::trunc(double(x_data[i]));
415             break;
416           case OpType::RoundUp:
417             y_ref[i] = std::ceil(double(x_data[i]));
418             break;
419           case OpType::RoundDown:
420             y_ref[i] = std::floor(double(x_data[i]));
421             break;
422           default:
423             GTEST_FAIL() << "Unexpected operation type";
424             return;
425         }
426       }
427 
428       // Prepare parameters.
429       xnn_f32_rnd_params params;
430       if (init_params != nullptr) {
431         init_params(&params);
432       }
433 
434       // Call optimized micro-kernel.
435       vrnd(batch_size() * sizeof(float), x_data, y.data(), &params);
436 
437       // Verify results.
438       for (size_t i = 0; i < batch_size(); i++) {
439         ASSERT_EQ(y[i], y_ref[i])
440           << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
441       }
442     }
443   }
444 
Test(xnn_f32_vsigmoid_ukernel_function vsigmoid,xnn_init_f32_sigmoid_params_fn init_params)445   void Test(xnn_f32_vsigmoid_ukernel_function vsigmoid, xnn_init_f32_sigmoid_params_fn init_params) const {
446     std::random_device random_device;
447     auto rng = std::mt19937(random_device());
448     auto distribution = std::uniform_real_distribution<float>(-125.0f, 125.0f);
449     auto f32rng = std::bind(distribution, std::ref(rng));
450 
451     std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
452     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
453     std::vector<double> y_ref(batch_size());
454     for (size_t iteration = 0; iteration < iterations(); iteration++) {
455       if (inplace()) {
456         std::generate(y.begin(), y.end(), std::ref(f32rng));
457       } else {
458         std::generate(x.begin(), x.end(), std::ref(f32rng));
459         std::fill(y.begin(), y.end(), nanf(""));
460       }
461       const float* x_data = inplace() ? y.data() : x.data();
462 
463       // Compute reference results.
464       for (size_t i = 0; i < batch_size(); i++) {
465         const double e = std::exp(double(x_data[i]));
466         y_ref[i] = e / (1.0 + e);
467       }
468 
469       // Prepare parameters.
470       union xnn_f32_sigmoid_params params;
471       init_params(&params);
472 
473       // Call optimized micro-kernel.
474       vsigmoid(batch_size() * sizeof(float), x_data, y.data(), &params);
475 
476       // Verify results.
477       for (size_t i = 0; i < batch_size(); i++) {
478         ASSERT_NEAR(y[i], y_ref[i], std::max(5.0e-6, std::abs(y_ref[i]) * 1.0e-5))
479           << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
480       }
481     }
482   }
483 
484   void Test(xnn_f32_vsqr_ukernel_function vsqr, xnn_init_f32_default_params_fn init_params = nullptr) const {
485     std::random_device random_device;
486     auto rng = std::mt19937(random_device());
487     auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
488 
489     std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
490     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
491     std::vector<float> y_ref(batch_size());
492     for (size_t iteration = 0; iteration < iterations(); iteration++) {
493       if (inplace()) {
494         std::generate(y.begin(), y.end(), std::ref(f32rng));
495       } else {
496         std::generate(x.begin(), x.end(), std::ref(f32rng));
497         std::fill(y.begin(), y.end(), nanf(""));
498       }
499       const float* x_data = inplace() ? y.data() : x.data();
500 
501       // Compute reference results.
502       for (size_t i = 0; i < batch_size(); i++) {
503         y_ref[i] = x_data[i] * x_data[i];
504       }
505 
506       // Prepare parameters.
507       union xnn_f32_default_params params;
508       if (init_params != nullptr) {
509         init_params(&params);
510       }
511 
512       // Call optimized micro-kernel.
513       vsqr(batch_size() * sizeof(float), x_data, y.data(), &params);
514 
515       // Verify results.
516       for (size_t i = 0; i < batch_size(); i++) {
517         ASSERT_EQ(y[i], y_ref[i])
518           << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
519       }
520     }
521   }
522 
523   void Test(xnn_f32_vsqrt_ukernel_function vsqrt, xnn_init_f32_sqrt_params_fn init_params = nullptr) const {
524     std::random_device random_device;
525     auto rng = std::mt19937(random_device());
526     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 10.0f), std::ref(rng));
527 
528     std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
529     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
530     std::vector<float> y_ref(batch_size());
531     for (size_t iteration = 0; iteration < iterations(); iteration++) {
532       if (inplace()) {
533         std::generate(y.begin(), y.end(), std::ref(f32rng));
534       } else {
535         std::generate(x.begin(), x.end(), std::ref(f32rng));
536         std::fill(y.begin(), y.end(), nanf(""));
537       }
538       const float* x_data = inplace() ? y.data() : x.data();
539 
540       // Compute reference results.
541       for (size_t i = 0; i < batch_size(); i++) {
542         y_ref[i] = std::sqrt(x_data[i]);
543       }
544 
545       // Prepare parameters.
546       union xnn_f32_sqrt_params params;
547       if (init_params != nullptr) {
548         init_params(&params);
549       }
550 
551       // Call optimized micro-kernel.
552       vsqrt(batch_size() * sizeof(float), x_data, y.data(), init_params != nullptr ? &params : nullptr);
553 
554       // Verify results.
555       for (size_t i = 0; i < batch_size(); i++) {
556         ASSERT_EQ(y[i], y_ref[i])
557           << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << x[i];
558       }
559     }
560   }
561 
562   inline void Test(xnn_f32_vabs_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
563     Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
564   }
565 
566   inline void Test(xnn_f32_velu_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
567     Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
568   }
569 
570   inline void Test(xnn_f32_vneg_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
571     Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
572   }
573 
574   inline void Test(xnn_f32_vrelu_ukernel_function vunary, OpType op_type, Variant variant = Variant::Native) const {
575     Test(xnn_f32_vunary_ukernel_function(vunary), op_type, variant);
576   }
577 
Test(xnn_f16_vclamp_ukernel_function vclamp,xnn_init_f16_minmax_params_fn init_params)578   void Test(xnn_f16_vclamp_ukernel_function vclamp, xnn_init_f16_minmax_params_fn init_params) const {
579     std::random_device random_device;
580     auto rng = std::mt19937(random_device());
581     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 255.0f), std::ref(rng));
582     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
583 
584     std::vector<uint16_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
585     std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
586     std::vector<float> y_ref(batch_size());
587     for (size_t iteration = 0; iteration < iterations(); iteration++) {
588       std::generate(x.begin(), x.end(), std::ref(f16rng));
589       if (inplace()) {
590         std::generate(y.begin(), y.end(), std::ref(f16rng));
591       } else {
592         std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
593       }
594       const uint16_t* x_data = inplace() ? y.data() : x.data();
595 
596       // Compute reference results.
597       for (size_t i = 0; i < batch_size(); i++) {
598         y_ref[i] = std::max(std::min(fp16_ieee_to_fp32_value(x_data[i]), float(qmax())), float(qmin()));
599       }
600 
601       // Prepare parameters.
602       union xnn_f16_minmax_params params;
603       init_params(&params, fp16_ieee_from_fp32_value(float(qmin())), fp16_ieee_from_fp32_value(float(qmax())));
604 
605       // Call optimized micro-kernel.
606       vclamp(batch_size() * sizeof(uint16_t), x_data, y.data(), &params);
607 
608       // Verify results.
609       for (size_t i = 0; i < batch_size(); i++) {
610         ASSERT_NEAR(y_ref[i], fp16_ieee_to_fp32_value(y[i]), std::max(1.0e-3f, std::abs(y_ref[i]) * 1.0e-2f))
611           << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << fp16_ieee_to_fp32_value(x[i]);
612       }
613     }
614   }
615 
Test(xnn_f16_vhswish_ukernel_function vhswish,xnn_init_f16_hswish_params_fn init_params)616   void Test(xnn_f16_vhswish_ukernel_function vhswish, xnn_init_f16_hswish_params_fn init_params) const {
617     std::random_device random_device;
618     auto rng = std::mt19937(random_device());
619     auto f32rng = std::bind(std::uniform_real_distribution<float>(-4.0f, 4.0f), std::ref(rng));
620     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
621 
622     std::vector<uint16_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
623     std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
624     std::vector<float> y_ref(batch_size());
625     for (size_t iteration = 0; iteration < iterations(); iteration++) {
626       std::generate(x.begin(), x.end(), std::ref(f16rng));
627       if (inplace()) {
628         std::generate(y.begin(), y.end(), std::ref(f16rng));
629       } else {
630         std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
631       }
632       const uint16_t* x_data = inplace() ? y.data() : x.data();
633 
634       // Compute reference results.
635       for (size_t i = 0; i < batch_size(); i++) {
636         const float x_value = fp16_ieee_to_fp32_value(x_data[i]);
637         y_ref[i] = (x_value / 6.0f) * std::max(std::min(x_value + 3.0f, 6.0f), 0.0f);
638       }
639 
640       // Prepare parameters.
641       union xnn_f16_hswish_params params;
642       init_params(&params);
643 
644       // Call optimized micro-kernel.
645       vhswish(batch_size() * sizeof(uint16_t), x_data, y.data(), &params);
646 
647       // Verify results.
648       for (size_t i = 0; i < batch_size(); i++) {
649         ASSERT_NEAR(y_ref[i], fp16_ieee_to_fp32_value(y[i]), std::max(1.0e-3f, std::abs(y_ref[i]) * 1.0e-2f))
650           << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << fp16_ieee_to_fp32_value(x[i]);
651       }
652     }
653   }
654 
Test(xnn_s8_vclamp_ukernel_function vclamp,xnn_init_s8_minmax_params_fn init_params)655   void Test(xnn_s8_vclamp_ukernel_function vclamp, xnn_init_s8_minmax_params_fn init_params) const {
656     std::random_device random_device;
657     auto rng = std::mt19937(random_device());
658     auto i8rng = std::bind(
659       std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),
660       std::ref(rng));
661 
662     std::vector<int8_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(int8_t));
663     std::vector<int8_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(int8_t) : 0));
664     std::vector<int8_t> y_ref(batch_size());
665     for (size_t iteration = 0; iteration < iterations(); iteration++) {
666       std::generate(x.begin(), x.end(), std::ref(i8rng));
667       if (inplace()) {
668         std::copy(x.cbegin(), x.cend(), y.begin());
669       } else {
670         std::fill(y.begin(), y.end(), INT8_C(0xA5));
671       }
672       const int8_t* x_data = inplace() ? y.data() : x.data();
673 
674       // Compute reference results.
675       for (size_t i = 0; i < batch_size(); i++) {
676         y_ref[i] = std::min(std::max(x_data[i], int8_t(qmin() - 0x80)), int8_t(qmax() - 0x80));
677       }
678 
679       // Prepare parameters.
680       union xnn_s8_minmax_params params;
681       init_params(&params, int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
682 
683       // Call optimized micro-kernel.
684       vclamp(batch_size() * sizeof(int8_t), x_data, y.data(), &params);
685 
686       // Verify results.
687       for (size_t i = 0; i < batch_size(); i++) {
688         ASSERT_EQ(int32_t(y_ref[i]), int32_t(y[i]))
689           << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << int32_t(x[i]);
690       }
691     }
692   }
693 
Test(xnn_u8_vclamp_ukernel_function vclamp,xnn_init_u8_minmax_params_fn init_params)694   void Test(xnn_u8_vclamp_ukernel_function vclamp, xnn_init_u8_minmax_params_fn init_params) const {
695     std::random_device random_device;
696     auto rng = std::mt19937(random_device());
697     auto u8rng = std::bind(
698       std::uniform_int_distribution<int32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
699 
700     std::vector<uint8_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t));
701     std::vector<uint8_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint8_t) : 0));
702     std::vector<uint8_t> y_ref(batch_size());
703     for (size_t iteration = 0; iteration < iterations(); iteration++) {
704       std::generate(x.begin(), x.end(), std::ref(u8rng));
705       if (inplace()) {
706         std::copy(x.cbegin(), x.cend(), y.begin());
707       } else {
708         std::fill(y.begin(), y.end(), UINT8_C(0xA5));
709       }
710       const uint8_t* x_data = inplace() ? y.data() : x.data();
711 
712       // Compute reference results.
713       for (size_t i = 0; i < batch_size(); i++) {
714         y_ref[i] = std::min(std::max(x_data[i], qmin()), qmax());
715       }
716 
717       // Prepare parameters.
718       union xnn_u8_minmax_params params;
719       init_params(&params, qmin(), qmax());
720 
721       // Call optimized micro-kernel.
722       vclamp(batch_size() * sizeof(uint8_t), x_data, y.data(), &params);
723 
724       // Verify results.
725       for (size_t i = 0; i < batch_size(); i++) {
726         ASSERT_EQ(uint32_t(y_ref[i]), uint32_t(y[i]))
727           << "at " << i << " / " << batch_size() << ", x[" << i << "] = " << uint32_t(x[i]);
728       }
729     }
730   }
731 
732  private:
733   size_t batch_size_ = 1;
734   bool inplace_ = false;
735   float slope_ = 0.5f;
736   float prescale_ = 1.0f;
737   float alpha_ = 1.0f;
738   float beta_ = 1.0f;
739   uint8_t qmin_ = 0;
740   uint8_t qmax_ = 255;
741   size_t iterations_ = 15;
742 };
743