• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #pragma once
10 
11 #include <gtest/gtest.h>
12 
13 #include <algorithm>
14 #include <cassert>
15 #include <cstddef>
16 #include <cstdlib>
17 #include <functional>
18 #include <random>
19 #include <vector>
20 
21 #include <xnnpack.h>
22 #include <xnnpack/params-init.h>
23 #include <xnnpack/params.h>
24 
25 
26 class ClampMicrokernelTester {
27  public:
28   enum class Variant {
29     Native,
30     Scalar,
31   };
32 
n(size_t n)33   inline ClampMicrokernelTester& n(size_t n) {
34     assert(n != 0);
35     this->n_ = n;
36     return *this;
37   }
38 
n()39   inline size_t n() const {
40     return this->n_;
41   }
42 
inplace(bool inplace)43   inline ClampMicrokernelTester& inplace(bool inplace) {
44     this->inplace_ = inplace;
45     return *this;
46   }
47 
inplace()48   inline bool inplace() const {
49     return this->inplace_;
50   }
51 
qmin(uint8_t qmin)52   inline ClampMicrokernelTester& qmin(uint8_t qmin) {
53     this->qmin_ = qmin;
54     return *this;
55   }
56 
qmin()57   inline uint8_t qmin() const {
58     return this->qmin_;
59   }
60 
qmax(uint8_t qmax)61   inline ClampMicrokernelTester& qmax(uint8_t qmax) {
62     this->qmax_ = qmax;
63     return *this;
64   }
65 
qmax()66   inline uint8_t qmax() const {
67     return this->qmax_;
68   }
69 
iterations(size_t iterations)70   inline ClampMicrokernelTester& iterations(size_t iterations) {
71     this->iterations_ = iterations;
72     return *this;
73   }
74 
iterations()75   inline size_t iterations() const {
76     return this->iterations_;
77   }
78 
79   void Test(xnn_u8_clamp_ukernel_function clamp, Variant variant = Variant::Native) const {
80     std::random_device random_device;
81     auto rng = std::mt19937(random_device());
82     auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
83 
84     std::vector<uint8_t> x(n() + XNN_EXTRA_BYTES / sizeof(uint8_t));
85     std::vector<uint8_t> y(n() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint8_t) : 0));
86     std::vector<uint8_t> y_ref(n());
87     for (size_t iteration = 0; iteration < iterations(); iteration++) {
88       std::generate(x.begin(), x.end(), std::ref(u8rng));
89       if (inplace()) {
90         std::generate(y.begin(), y.end(), std::ref(u8rng));
91       } else {
92         std::fill(y.begin(), y.end(), 0xA5);
93       }
94       const uint8_t* x_data = inplace() ? y.data() : x.data();
95 
96       // Prepare clamping parameters.
97       union xnn_u8_output_params output_params = { };
98       switch (variant) {
99         case Variant::Native:
100           output_params = xnn_init_u8_output_params(qmin(), qmax());
101           break;
102         case Variant::Scalar:
103           output_params = xnn_init_scalar_u8_output_params(qmin(), qmax());
104           break;
105       }
106 
107       // Compute reference results.
108       for (size_t i = 0; i < n(); i++) {
109         y_ref[i] = std::max(std::min(x_data[i], qmax()), qmin());
110       }
111 
112       // Call optimized micro-kernel.
113       clamp(n() * sizeof(uint8_t), x_data, y.data(), &output_params);
114 
115       // Verify results.
116       for (size_t i = 0; i < n(); i++) {
117         ASSERT_LE(uint32_t(y[i]), uint32_t(qmax()))
118           << "at position " << i << ", n = " << n();
119         ASSERT_GE(uint32_t(y[i]), uint32_t(qmin()))
120           << "at position " << i << ", n = " << n();
121         ASSERT_EQ(uint32_t(y_ref[i]), uint32_t(y[i]))
122           << "at position " << i << ", n = " << n()
123           << ", qmin = " << uint32_t(qmin()) << ", qmax = " << uint32_t(qmax());
124       }
125     }
126   }
127 
128   void Test(xnn_f32_clamp_ukernel_function clamp, Variant variant = Variant::Native) const {
129     std::random_device random_device;
130     auto rng = std::mt19937(random_device());
131     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 255.0f), rng);
132 
133     std::vector<float> x(n() + XNN_EXTRA_BYTES / sizeof(float));
134     std::vector<float> y(n() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
135     std::vector<float> y_ref(n());
136     for (size_t iteration = 0; iteration < iterations(); iteration++) {
137       std::generate(x.begin(), x.end(), std::ref(f32rng));
138       if (inplace()) {
139         std::generate(y.begin(), y.end(), std::ref(f32rng));
140       } else {
141         std::fill(y.begin(), y.end(), std::nanf(""));
142       }
143       const float* x_data = inplace() ? y.data() : x.data();
144 
145       // Prepare output parameters.
146       xnn_f32_output_params output_params = { };
147       switch (variant) {
148         case Variant::Native:
149           output_params = xnn_init_f32_output_params(float(qmin()), float(qmax()));
150           break;
151         case Variant::Scalar:
152           output_params = xnn_init_scalar_f32_output_params(float(qmin()), float(qmax()));
153           break;
154       }
155 
156       // Compute reference results.
157       for (size_t i = 0; i < n(); i++) {
158         y_ref[i] = std::max(std::min(x_data[i], float(qmax())), float(qmin()));
159       }
160 
161       // Call optimized micro-kernel.
162       clamp(n() * sizeof(float), x_data, y.data(), &output_params);
163 
164       // Verify results.
165       for (size_t i = 0; i < n(); i++) {
166         ASSERT_LE(y[i], float(qmax()))
167           << "at position " << i << ", n = " << n();
168         ASSERT_GE(y[i], float(qmin()))
169           << "at position " << i << ", n = " << n();
170         ASSERT_EQ(y_ref[i], y[i])
171           << "at position " << i << ", n = " << n()
172           << ", qmin = " << uint32_t(qmin()) << ", qmax = " << uint32_t(qmax());
173       }
174     }
175   }
176 
177  private:
178   size_t n_{1};
179   bool inplace_{false};
180   uint8_t qmin_{5};
181   uint8_t qmax_{250};
182   size_t iterations_{15};
183 };
184