• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #pragma once
10 
11 #include <gtest/gtest.h>
12 
13 #include <algorithm>
14 #include <cassert>
15 #include <cstddef>
16 #include <cstdlib>
17 #include <functional>
18 #include <limits>
19 #include <random>
20 #include <vector>
21 
22 #include <fp16.h>
23 
24 #include <xnnpack.h>
25 #include <xnnpack/params-init.h>
26 #include <xnnpack/params.h>
27 
28 
29 class ClampMicrokernelTester {
30  public:
31   enum class Variant {
32     Native,
33     Scalar,
34   };
35 
batch_size(size_t batch_size)36   inline ClampMicrokernelTester& batch_size(size_t batch_size) {
37     assert(batch_size != 0);
38     this->batch_size_ = batch_size;
39     return *this;
40   }
41 
batch_size()42   inline size_t batch_size() const {
43     return this->batch_size_;
44   }
45 
inplace(bool inplace)46   inline ClampMicrokernelTester& inplace(bool inplace) {
47     this->inplace_ = inplace;
48     return *this;
49   }
50 
inplace()51   inline bool inplace() const {
52     return this->inplace_;
53   }
54 
qmin(uint8_t qmin)55   inline ClampMicrokernelTester& qmin(uint8_t qmin) {
56     this->qmin_ = qmin;
57     return *this;
58   }
59 
qmin()60   inline uint8_t qmin() const {
61     return this->qmin_;
62   }
63 
qmax(uint8_t qmax)64   inline ClampMicrokernelTester& qmax(uint8_t qmax) {
65     this->qmax_ = qmax;
66     return *this;
67   }
68 
qmax()69   inline uint8_t qmax() const {
70     return this->qmax_;
71   }
72 
iterations(size_t iterations)73   inline ClampMicrokernelTester& iterations(size_t iterations) {
74     this->iterations_ = iterations;
75     return *this;
76   }
77 
iterations()78   inline size_t iterations() const {
79     return this->iterations_;
80   }
81 
82   void Test(xnn_u8_clamp_ukernel_function clamp, Variant variant = Variant::Native) const {
83     std::random_device random_device;
84     auto rng = std::mt19937(random_device());
85     auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
86 
87     std::vector<uint8_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t));
88     std::vector<uint8_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint8_t) : 0));
89     std::vector<uint8_t> y_ref(batch_size());
90     for (size_t iteration = 0; iteration < iterations(); iteration++) {
91       std::generate(x.begin(), x.end(), std::ref(u8rng));
92       if (inplace()) {
93         std::generate(y.begin(), y.end(), std::ref(u8rng));
94       } else {
95         std::fill(y.begin(), y.end(), 0xA5);
96       }
97       const uint8_t* x_data = inplace() ? y.data() : x.data();
98 
99       // Prepare parameters.
100       union xnn_u8_minmax_params params = { };
101       switch (variant) {
102         case Variant::Native:
103           params = xnn_init_u8_minmax_params(qmin(), qmax());
104           break;
105         case Variant::Scalar:
106           params = xnn_init_scalar_u8_minmax_params(qmin(), qmax());
107           break;
108       }
109 
110       // Compute reference results.
111       for (size_t i = 0; i < batch_size(); i++) {
112         y_ref[i] = std::max(std::min(x_data[i], qmax()), qmin());
113       }
114 
115       // Call optimized micro-kernel.
116       clamp(batch_size() * sizeof(uint8_t), x_data, y.data(), &params);
117 
118       // Verify results.
119       for (size_t i = 0; i < batch_size(); i++) {
120         ASSERT_LE(uint32_t(y[i]), uint32_t(qmax()))
121           << "at position " << i << ", batch_size = " << batch_size();
122         ASSERT_GE(uint32_t(y[i]), uint32_t(qmin()))
123           << "at position " << i << ", batch_size = " << batch_size();
124         ASSERT_EQ(uint32_t(y_ref[i]), uint32_t(y[i]))
125           << "at position " << i << ", batch_size = " << batch_size()
126           << ", qmin = " << uint32_t(qmin()) << ", qmax = " << uint32_t(qmax());
127       }
128     }
129   }
130 
Test(xnn_f16_clamp_ukernel_function clamp)131   void Test(xnn_f16_clamp_ukernel_function clamp) const {
132     std::random_device random_device;
133     auto rng = std::mt19937(random_device());
134     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 255.0f), rng);
135     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
136 
137     std::vector<uint16_t> x(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
138     std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
139     std::vector<float> y_ref(batch_size());
140     for (size_t iteration = 0; iteration < iterations(); iteration++) {
141       std::generate(x.begin(), x.end(), std::ref(f16rng));
142       if (inplace()) {
143         std::generate(y.begin(), y.end(), std::ref(f16rng));
144       } else {
145         std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
146       }
147       const uint16_t* x_data = inplace() ? y.data() : x.data();
148 
149       // Prepare parameters.
150       xnn_f16_minmax_params params = xnn_init_f16_minmax_params(
151         fp16_ieee_from_fp32_value(float(qmin())),
152         fp16_ieee_from_fp32_value(float(qmax())));
153 
154       // Compute reference results.
155       for (size_t i = 0; i < batch_size(); i++) {
156         y_ref[i] = std::max(std::min(fp16_ieee_to_fp32_value(x_data[i]), float(qmax())), float(qmin()));
157       }
158 
159       // Call optimized micro-kernel.
160       clamp(batch_size() * sizeof(uint16_t), x_data, y.data(), &params);
161 
162       // Verify results.
163       for (size_t i = 0; i < batch_size(); i++) {
164         ASSERT_LE(fp16_ieee_to_fp32_value(y[i]), float(qmax()))
165           << "at position " << i << ", batch_size = " << batch_size();
166         ASSERT_GE(fp16_ieee_to_fp32_value(y[i]), float(qmin()))
167           << "at position " << i << ", batch_size = " << batch_size();
168         ASSERT_EQ(y_ref[i], fp16_ieee_to_fp32_value(y[i]))
169           << "at position " << i << ", batch_size = " << batch_size()
170           << ", qmin = " << float(qmin()) << ", qmax = " << float(qmax());
171       }
172     }
173   }
174 
175   void Test(xnn_f32_clamp_ukernel_function clamp, Variant variant = Variant::Native) const {
176     std::random_device random_device;
177     auto rng = std::mt19937(random_device());
178     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 255.0f), rng);
179 
180     std::vector<float> x(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
181     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
182     std::vector<float> y_ref(batch_size());
183     for (size_t iteration = 0; iteration < iterations(); iteration++) {
184       std::generate(x.begin(), x.end(), std::ref(f32rng));
185       if (inplace()) {
186         std::generate(y.begin(), y.end(), std::ref(f32rng));
187       } else {
188         std::fill(y.begin(), y.end(), std::nanf(""));
189       }
190       const float* x_data = inplace() ? y.data() : x.data();
191 
192       // Prepare parameters.
193       xnn_f32_minmax_params params = { };
194       switch (variant) {
195         case Variant::Native:
196           params = xnn_init_f32_minmax_params(float(qmin()), float(qmax()));
197           break;
198         case Variant::Scalar:
199           params = xnn_init_scalar_f32_minmax_params(float(qmin()), float(qmax()));
200           break;
201       }
202 
203       // Compute reference results.
204       for (size_t i = 0; i < batch_size(); i++) {
205         y_ref[i] = std::max(std::min(x_data[i], float(qmax())), float(qmin()));
206       }
207 
208       // Call optimized micro-kernel.
209       clamp(batch_size() * sizeof(float), x_data, y.data(), &params);
210 
211       // Verify results.
212       for (size_t i = 0; i < batch_size(); i++) {
213         ASSERT_LE(y[i], float(qmax()))
214           << "at position " << i << ", batch_size = " << batch_size();
215         ASSERT_GE(y[i], float(qmin()))
216           << "at position " << i << ", batch_size = " << batch_size();
217         ASSERT_EQ(y_ref[i], y[i])
218           << "at position " << i << ", batch_size = " << batch_size()
219           << ", qmin = " << uint32_t(qmin()) << ", qmax = " << uint32_t(qmax());
220       }
221     }
222   }
223 
224  private:
225   size_t batch_size_{1};
226   bool inplace_{false};
227   uint8_t qmin_{50};
228   uint8_t qmax_{200};
229   size_t iterations_{15};
230 };
231