• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <gtest/gtest.h>
9 
10 #include <algorithm>
11 #include <cassert>
12 #include <cstddef>
13 #include <cstdlib>
14 #include <functional>
15 #include <random>
16 #include <vector>
17 
18 #include <fp16.h>
19 
20 #include <xnnpack.h>
21 #include <xnnpack/params-init.h>
22 #include <xnnpack/params.h>
23 
24 
25 class VBinaryMicrokernelTester {
26  public:
27   enum class OpType {
28     Add,
29     Div,
30     Max,
31     Min,
32     Mul,
33     Sub,
34     SqrDiff,
35   };
36 
batch_size(size_t batch_size)37   inline VBinaryMicrokernelTester& batch_size(size_t batch_size) {
38     assert(batch_size != 0);
39     this->batch_size_ = batch_size;
40     return *this;
41   }
42 
batch_size()43   inline size_t batch_size() const {
44     return this->batch_size_;
45   }
46 
inplace_a(bool inplace_a)47   inline VBinaryMicrokernelTester& inplace_a(bool inplace_a) {
48     this->inplace_a_ = inplace_a;
49     return *this;
50   }
51 
inplace_a()52   inline bool inplace_a() const {
53     return this->inplace_a_;
54   }
55 
inplace_b(bool inplace_b)56   inline VBinaryMicrokernelTester& inplace_b(bool inplace_b) {
57     this->inplace_b_ = inplace_b;
58     return *this;
59   }
60 
inplace_b()61   inline bool inplace_b() const {
62     return this->inplace_b_;
63   }
64 
qmin(uint8_t qmin)65   inline VBinaryMicrokernelTester& qmin(uint8_t qmin) {
66     this->qmin_ = qmin;
67     return *this;
68   }
69 
qmin()70   inline uint8_t qmin() const {
71     return this->qmin_;
72   }
73 
qmax(uint8_t qmax)74   inline VBinaryMicrokernelTester& qmax(uint8_t qmax) {
75     this->qmax_ = qmax;
76     return *this;
77   }
78 
qmax()79   inline uint8_t qmax() const {
80     return this->qmax_;
81   }
82 
iterations(size_t iterations)83   inline VBinaryMicrokernelTester& iterations(size_t iterations) {
84     this->iterations_ = iterations;
85     return *this;
86   }
87 
iterations()88   inline size_t iterations() const {
89     return this->iterations_;
90   }
91 
Test(xnn_f16_vbinary_ukernel_function vbinary,OpType op_type)92   void Test(xnn_f16_vbinary_ukernel_function vbinary, OpType op_type) const {
93     std::random_device random_device;
94     auto rng = std::mt19937(random_device());
95     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng);
96     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
97 
98     std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
99     std::vector<uint16_t> b(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
100     std::vector<uint16_t> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
101     std::vector<float> y_ref(batch_size());
102     for (size_t iteration = 0; iteration < iterations(); iteration++) {
103       std::generate(a.begin(), a.end(), std::ref(f16rng));
104       std::generate(b.begin(), b.end(), std::ref(f16rng));
105       if (inplace_a() || inplace_b()) {
106         std::generate(y.begin(), y.end(), std::ref(f16rng));
107       } else {
108         std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
109       }
110       const uint16_t* a_data = inplace_a() ? y.data() : a.data();
111       const uint16_t* b_data = inplace_b() ? y.data() : b.data();
112 
113       // Compute reference results.
114       for (size_t i = 0; i < batch_size(); i++) {
115         switch (op_type) {
116           case OpType::Add:
117             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b_data[i]);
118             break;
119           case OpType::Div:
120             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b_data[i]);
121             break;
122           case OpType::Max:
123             y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i]));
124             break;
125           case OpType::Min:
126             y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i]));
127             break;
128           case OpType::Mul:
129             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b_data[i]);
130             break;
131           case OpType::SqrDiff:
132           {
133             const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]);
134             y_ref[i] = diff * diff;
135             break;
136           }
137           case OpType::Sub:
138             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]);
139             break;
140         }
141       }
142 
143       // Call optimized micro-kernel.
144       vbinary(batch_size() * sizeof(uint16_t), a_data, b_data, y.data(), nullptr);
145 
146       // Verify results.
147       for (size_t i = 0; i < batch_size(); i++) {
148         ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f))
149           << "at " << i << " / " << batch_size();
150       }
151     }
152   }
153 
Test(xnn_f16_vbinary_minmax_ukernel_function vbinary_minmax,OpType op_type,xnn_init_f16_minmax_params_fn init_params)154   void Test(xnn_f16_vbinary_minmax_ukernel_function vbinary_minmax, OpType op_type, xnn_init_f16_minmax_params_fn init_params) const {
155     std::random_device random_device;
156     auto rng = std::mt19937(random_device());
157     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng);
158     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
159 
160     std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
161     std::vector<uint16_t> b(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
162     std::vector<uint16_t> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
163     std::vector<float> y_ref(batch_size());
164     for (size_t iteration = 0; iteration < iterations(); iteration++) {
165       std::generate(a.begin(), a.end(), std::ref(f16rng));
166       std::generate(b.begin(), b.end(), std::ref(f16rng));
167       if (inplace_a() || inplace_b()) {
168         std::generate(y.begin(), y.end(), std::ref(f16rng));
169       } else {
170         std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
171       }
172       const uint16_t* a_data = inplace_a() ? y.data() : a.data();
173       const uint16_t* b_data = inplace_b() ? y.data() : b.data();
174 
175       // Compute reference results.
176       for (size_t i = 0; i < batch_size(); i++) {
177         switch (op_type) {
178           case OpType::Add:
179             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b_data[i]);
180             break;
181           case OpType::Div:
182             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b_data[i]);
183             break;
184           case OpType::Max:
185             y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i]));
186             break;
187           case OpType::Min:
188             y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b_data[i]));
189             break;
190           case OpType::Mul:
191             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b_data[i]);
192             break;
193           case OpType::SqrDiff:
194           {
195             const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]);
196             y_ref[i] = diff * diff;
197             break;
198           }
199           case OpType::Sub:
200             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b_data[i]);
201             break;
202         }
203       }
204 
205       const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend());
206       const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend());
207       const float accumulated_range = accumulated_max - accumulated_min;
208       const float y_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ?
209         (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) :
210         +std::numeric_limits<float>::infinity()));
211       const float y_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ?
212         (accumulated_min + accumulated_range / 255.0f * float(qmin())) :
213         -std::numeric_limits<float>::infinity()));
214       for (size_t i = 0; i < batch_size(); i++) {
215         y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min);
216       }
217 
218       // Prepare parameters.
219       xnn_f16_minmax_params params;
220       init_params(&params,
221         fp16_ieee_from_fp32_value(y_min), fp16_ieee_from_fp32_value(y_max));
222 
223       // Call optimized micro-kernel.
224       vbinary_minmax(batch_size() * sizeof(uint16_t), a_data, b_data, y.data(), &params);
225 
226       // Verify results.
227       for (size_t i = 0; i < batch_size(); i++) {
228         ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f))
229           << "at " << i << " / " << batch_size();
230       }
231     }
232   }
233 
234   void Test(xnn_f32_vbinary_ukernel_function vbinary, OpType op_type, xnn_init_f32_default_params_fn init_params = nullptr) const {
235     std::random_device random_device;
236     auto rng = std::mt19937(random_device());
237     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng);
238 
239     std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
240     std::vector<float> b(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
241     std::vector<float> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
242     std::vector<float> y_ref(batch_size());
243     for (size_t iteration = 0; iteration < iterations(); iteration++) {
244       std::generate(a.begin(), a.end(), std::ref(f32rng));
245       std::generate(b.begin(), b.end(), std::ref(f32rng));
246       if (inplace_a() || inplace_b()) {
247         std::generate(y.begin(), y.end(), std::ref(f32rng));
248       } else {
249         std::fill(y.begin(), y.end(), nanf(""));
250       }
251       const float* a_data = inplace_a() ? y.data() : a.data();
252       const float* b_data = inplace_b() ? y.data() : b.data();
253 
254       // Compute reference results.
255       for (size_t i = 0; i < batch_size(); i++) {
256         switch (op_type) {
257           case OpType::Add:
258             y_ref[i] = a_data[i] + b_data[i];
259             break;
260           case OpType::Div:
261             y_ref[i] = a_data[i] / b_data[i];
262             break;
263           case OpType::Max:
264             y_ref[i] = std::max<float>(a_data[i], b_data[i]);
265             break;
266           case OpType::Min:
267             y_ref[i] = std::min<float>(a_data[i], b_data[i]);
268             break;
269           case OpType::Mul:
270             y_ref[i] = a_data[i] * b_data[i];
271             break;
272           case OpType::SqrDiff:
273           {
274             const float diff = a_data[i] - b_data[i];
275             y_ref[i] = diff * diff;
276             break;
277           }
278           case OpType::Sub:
279             y_ref[i] = a_data[i] - b_data[i];
280             break;
281         }
282       }
283 
284       // Prepare parameters.
285       xnn_f32_default_params params;
286       if (init_params) {
287         init_params(&params);
288       }
289 
290       // Call optimized micro-kernel.
291       vbinary(batch_size() * sizeof(float), a_data, b_data, y.data(), init_params != nullptr ? &params : nullptr);
292 
293       // Verify results.
294       for (size_t i = 0; i < batch_size(); i++) {
295         ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f)
296           << "at " << i << " / " << batch_size();
297       }
298     }
299   }
300 
Test(xnn_f32_vbinary_relu_ukernel_function vbinary_relu,OpType op_type)301   void Test(xnn_f32_vbinary_relu_ukernel_function vbinary_relu, OpType op_type) const {
302     std::random_device random_device;
303     auto rng = std::mt19937(random_device());
304     auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), rng);
305 
306     std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
307     std::vector<float> b(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
308     std::vector<float> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
309     std::vector<float> y_ref(batch_size());
310     for (size_t iteration = 0; iteration < iterations(); iteration++) {
311       std::generate(a.begin(), a.end(), std::ref(f32rng));
312       std::generate(b.begin(), b.end(), std::ref(f32rng));
313       if (inplace_a() || inplace_b()) {
314         std::generate(y.begin(), y.end(), std::ref(f32rng));
315       } else {
316         std::fill(y.begin(), y.end(), nanf(""));
317       }
318       const float* a_data = inplace_a() ? y.data() : a.data();
319       const float* b_data = inplace_b() ? y.data() : b.data();
320 
321       // Compute reference results.
322       for (size_t i = 0; i < batch_size(); i++) {
323         switch (op_type) {
324           case OpType::Add:
325             y_ref[i] = a_data[i] + b_data[i];
326             break;
327           case OpType::Div:
328             y_ref[i] = a_data[i] / b_data[i];
329             break;
330           case OpType::Max:
331             y_ref[i] = std::max<float>(a_data[i], b_data[i]);
332             break;
333           case OpType::Min:
334             y_ref[i] = std::min<float>(a_data[i], b_data[i]);
335             break;
336           case OpType::Mul:
337             y_ref[i] = a_data[i] * b_data[i];
338             break;
339           case OpType::SqrDiff:
340           {
341             const float diff = a_data[i] - b_data[i];
342             y_ref[i] = diff * diff;
343             break;
344           }
345           case OpType::Sub:
346             y_ref[i] = a_data[i] - b_data[i];
347             break;
348         }
349       }
350       for (size_t i = 0; i < batch_size(); i++) {
351         y_ref[i] = std::max(y_ref[i], 0.0f);
352       }
353 
354       // Call optimized micro-kernel.
355       vbinary_relu(batch_size() * sizeof(float), a_data, b_data, y.data(), nullptr);
356 
357       // Verify results.
358       for (size_t i = 0; i < batch_size(); i++) {
359         ASSERT_GE(y[i], 0.0f)
360           << "at " << i << " / " << batch_size();
361         ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f)
362           << "at " << i << " / " << batch_size();
363       }
364     }
365   }
366 
Test(xnn_f32_vbinary_minmax_ukernel_function vbinary_minmax,OpType op_type,xnn_init_f32_minmax_params_fn init_params)367   void Test(xnn_f32_vbinary_minmax_ukernel_function vbinary_minmax, OpType op_type, xnn_init_f32_minmax_params_fn init_params) const {
368     std::random_device random_device;
369     auto rng = std::mt19937(random_device());
370     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng);
371 
372     std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
373     std::vector<float> b(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
374     std::vector<float> y(batch_size() + (inplace_a() || inplace_b() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
375     std::vector<float> y_ref(batch_size());
376     for (size_t iteration = 0; iteration < iterations(); iteration++) {
377       std::generate(a.begin(), a.end(), std::ref(f32rng));
378       std::generate(b.begin(), b.end(), std::ref(f32rng));
379       if (inplace_a() || inplace_b()) {
380         std::generate(y.begin(), y.end(), std::ref(f32rng));
381       } else {
382         std::fill(y.begin(), y.end(), nanf(""));
383       }
384       const float* a_data = inplace_a() ? y.data() : a.data();
385       const float* b_data = inplace_b() ? y.data() : b.data();
386 
387       // Compute reference results.
388       for (size_t i = 0; i < batch_size(); i++) {
389         switch (op_type) {
390           case OpType::Add:
391             y_ref[i] = a_data[i] + b_data[i];
392             break;
393           case OpType::Div:
394             y_ref[i] = a_data[i] / b_data[i];
395             break;
396           case OpType::Max:
397             y_ref[i] = std::max<float>(a_data[i], b_data[i]);
398             break;
399           case OpType::Min:
400             y_ref[i] = std::min<float>(a_data[i], b_data[i]);
401             break;
402           case OpType::Mul:
403             y_ref[i] = a_data[i] * b_data[i];
404             break;
405           case OpType::SqrDiff:
406           {
407             const float diff = a_data[i] - b_data[i];
408             y_ref[i] = diff * diff;
409             break;
410           }
411           case OpType::Sub:
412             y_ref[i] = a_data[i] - b_data[i];
413             break;
414         }
415       }
416       const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend());
417       const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend());
418       const float accumulated_range = accumulated_max - accumulated_min;
419       const float y_max = accumulated_range > 0.0f ?
420         (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) :
421         +std::numeric_limits<float>::infinity();
422       const float y_min = accumulated_range > 0.0f ?
423         (accumulated_min + accumulated_range / 255.0f * float(qmin())) :
424         -std::numeric_limits<float>::infinity();
425       for (size_t i = 0; i < batch_size(); i++) {
426         y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min);
427       }
428 
429       // Prepare parameters.
430       xnn_f32_minmax_params params;
431       init_params(&params, y_min, y_max);
432 
433       // Call optimized micro-kernel.
434       vbinary_minmax(batch_size() * sizeof(float), a_data, b_data, y.data(), &params);
435 
436       // Verify results.
437       for (size_t i = 0; i < batch_size(); i++) {
438         ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f)
439           << "at " << i << " / " << batch_size();
440       }
441     }
442   }
443 
444  private:
445   size_t batch_size_{1};
446   bool inplace_a_{false};
447   bool inplace_b_{false};
448   uint8_t qmin_{0};
449   uint8_t qmax_{255};
450   size_t iterations_{15};
451 };
452