• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <gtest/gtest.h>
9 
10 #include <algorithm>
11 #include <cassert>
12 #include <cstddef>
13 #include <cstdlib>
14 #include <functional>
15 #include <random>
16 #include <vector>
17 
18 #include <fp16.h>
19 
20 #include <xnnpack.h>
21 #include <xnnpack/params-init.h>
22 #include <xnnpack/params.h>
23 
24 
25 class VBinaryCMicrokernelTester {
26  public:
27   enum class OpType {
28     AddC,
29     DivC,
30     RDivC,
31     MaxC,
32     MinC,
33     MulC,
34     SqrDiffC,
35     SubC,
36     RSubC,
37   };
38 
batch_size(size_t batch_size)39   inline VBinaryCMicrokernelTester& batch_size(size_t batch_size) {
40     assert(batch_size != 0);
41     this->batch_size_ = batch_size;
42     return *this;
43   }
44 
batch_size()45   inline size_t batch_size() const {
46     return this->batch_size_;
47   }
48 
inplace(bool inplace)49   inline VBinaryCMicrokernelTester& inplace(bool inplace) {
50     this->inplace_ = inplace;
51     return *this;
52   }
53 
inplace()54   inline bool inplace() const {
55     return this->inplace_;
56   }
57 
qmin(uint8_t qmin)58   inline VBinaryCMicrokernelTester& qmin(uint8_t qmin) {
59     this->qmin_ = qmin;
60     return *this;
61   }
62 
qmin()63   inline uint8_t qmin() const {
64     return this->qmin_;
65   }
66 
qmax(uint8_t qmax)67   inline VBinaryCMicrokernelTester& qmax(uint8_t qmax) {
68     this->qmax_ = qmax;
69     return *this;
70   }
71 
qmax()72   inline uint8_t qmax() const {
73     return this->qmax_;
74   }
75 
iterations(size_t iterations)76   inline VBinaryCMicrokernelTester& iterations(size_t iterations) {
77     this->iterations_ = iterations;
78     return *this;
79   }
80 
iterations()81   inline size_t iterations() const {
82     return this->iterations_;
83   }
84 
Test(xnn_f16_vbinary_ukernel_function vbinaryc,OpType op_type)85   void Test(xnn_f16_vbinary_ukernel_function vbinaryc, OpType op_type) const {
86     std::random_device random_device;
87     auto rng = std::mt19937(random_device());
88     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng);
89     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
90 
91     std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
92     const uint16_t b = f16rng();
93     std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
94     std::vector<float> y_ref(batch_size());
95     for (size_t iteration = 0; iteration < iterations(); iteration++) {
96       std::generate(a.begin(), a.end(), std::ref(f16rng));
97       if (inplace()) {
98         std::generate(y.begin(), y.end(), std::ref(f16rng));
99       } else {
100         std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
101       }
102       const uint16_t* a_data = inplace() ? y.data() : a.data();
103 
104       // Compute reference results.
105       for (size_t i = 0; i < batch_size(); i++) {
106         switch (op_type) {
107           case OpType::AddC:
108             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b);
109             break;
110           case OpType::DivC:
111             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b);
112             break;
113           case OpType::RDivC:
114             y_ref[i] = fp16_ieee_to_fp32_value(b) / fp16_ieee_to_fp32_value(a_data[i]);
115             break;
116           case OpType::MaxC:
117             y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b));
118             break;
119           case OpType::MinC:
120             y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b));
121             break;
122           case OpType::MulC:
123             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b);
124             break;
125           case OpType::SqrDiffC:
126           {
127             const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b);
128             y_ref[i] = diff * diff;
129             break;
130           }
131           case OpType::SubC:
132             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b);
133             break;
134           case OpType::RSubC:
135             y_ref[i] = fp16_ieee_to_fp32_value(b) - fp16_ieee_to_fp32_value(a_data[i]);
136             break;
137         }
138       }
139       // Call optimized micro-kernel.
140       vbinaryc(batch_size() * sizeof(uint16_t), a_data, &b, y.data(), nullptr);
141 
142       // Verify results.
143       for (size_t i = 0; i < batch_size(); i++) {
144         ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f))
145           << "at " << i << " / " << batch_size();
146       }
147     }
148   }
149 
Test(xnn_f16_vbinary_minmax_ukernel_function vbinaryc_minmax,OpType op_type,xnn_init_f16_minmax_params_fn init_params)150   void Test(xnn_f16_vbinary_minmax_ukernel_function vbinaryc_minmax, OpType op_type, xnn_init_f16_minmax_params_fn init_params) const {
151     std::random_device random_device;
152     auto rng = std::mt19937(random_device());
153     auto f32rng = std::bind(std::uniform_real_distribution<float>(1.0e-3f, 1.0f), rng);
154     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
155 
156     std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
157     const uint16_t b = f16rng();
158     std::vector<uint16_t> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(uint16_t) : 0));
159     std::vector<float> y_ref(batch_size());
160     for (size_t iteration = 0; iteration < iterations(); iteration++) {
161       std::generate(a.begin(), a.end(), std::ref(f16rng));
162       if (inplace()) {
163         std::generate(y.begin(), y.end(), std::ref(f16rng));
164       } else {
165         std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
166       }
167       const uint16_t* a_data = inplace() ? y.data() : a.data();
168 
169       // Compute reference results.
170       for (size_t i = 0; i < batch_size(); i++) {
171         switch (op_type) {
172           case OpType::AddC:
173             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) + fp16_ieee_to_fp32_value(b);
174             break;
175           case OpType::DivC:
176             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) / fp16_ieee_to_fp32_value(b);
177             break;
178           case OpType::RDivC:
179             y_ref[i] = fp16_ieee_to_fp32_value(b) / fp16_ieee_to_fp32_value(a_data[i]);
180             break;
181           case OpType::MaxC:
182             y_ref[i] = std::max<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b));
183             break;
184           case OpType::MinC:
185             y_ref[i] = std::min<float>(fp16_ieee_to_fp32_value(a_data[i]), fp16_ieee_to_fp32_value(b));
186             break;
187           case OpType::MulC:
188             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) * fp16_ieee_to_fp32_value(b);
189             break;
190           case OpType::SqrDiffC:
191           {
192             const float diff = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b);
193             y_ref[i] = diff * diff;
194             break;
195           }
196           case OpType::SubC:
197             y_ref[i] = fp16_ieee_to_fp32_value(a_data[i]) - fp16_ieee_to_fp32_value(b);
198             break;
199           case OpType::RSubC:
200             y_ref[i] = fp16_ieee_to_fp32_value(b) - fp16_ieee_to_fp32_value(a_data[i]);
201             break;
202         }
203       }
204       const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend());
205       const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend());
206       const float accumulated_range = accumulated_max - accumulated_min;
207       const float y_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ?
208         (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) :
209         +std::numeric_limits<float>::infinity()));
210       const float y_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_range > 0.0f ?
211         (accumulated_min + accumulated_range / 255.0f * float(qmin())) :
212         -std::numeric_limits<float>::infinity()));
213       for (size_t i = 0; i < batch_size(); i++) {
214         y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min);
215       }
216 
217       // Prepare parameters.
218       xnn_f16_minmax_params params;
219       init_params(&params,
220         fp16_ieee_from_fp32_value(y_min), fp16_ieee_from_fp32_value(y_max));
221 
222       // Call optimized micro-kernel.
223       vbinaryc_minmax(batch_size() * sizeof(uint16_t), a_data, &b, y.data(), &params);
224 
225       // Verify results.
226       for (size_t i = 0; i < batch_size(); i++) {
227         ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-4f, std::abs(y_ref[i]) * 1.0e-2f))
228           << "at " << i << " / " << batch_size();
229       }
230     }
231   }
232 
233   void Test(xnn_f32_vbinary_ukernel_function vbinaryc, OpType op_type, xnn_init_f32_default_params_fn init_params = nullptr) const {
234     std::random_device random_device;
235     auto rng = std::mt19937(random_device());
236     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
237 
238     std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
239     const float b = f32rng();
240     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
241     std::vector<float> y_ref(batch_size());
242     for (size_t iteration = 0; iteration < iterations(); iteration++) {
243       std::generate(a.begin(), a.end(), std::ref(f32rng));
244       if (inplace()) {
245         std::generate(y.begin(), y.end(), std::ref(f32rng));
246       } else {
247         std::fill(y.begin(), y.end(), nanf(""));
248       }
249       const float* a_data = inplace() ? y.data() : a.data();
250 
251       // Compute reference results.
252       for (size_t i = 0; i < batch_size(); i++) {
253         switch (op_type) {
254           case OpType::AddC:
255             y_ref[i] = a_data[i] + b;
256             break;
257           case OpType::DivC:
258             y_ref[i] = a_data[i] / b;
259             break;
260           case OpType::RDivC:
261             y_ref[i] = b / a_data[i];
262             break;
263           case OpType::MaxC:
264             y_ref[i] = std::max<float>(a_data[i], b);
265             break;
266           case OpType::MinC:
267             y_ref[i] = std::min<float>(a_data[i], b);
268             break;
269           case OpType::MulC:
270             y_ref[i] = a_data[i] * b;
271             break;
272           case OpType::SqrDiffC:
273           {
274             const float diff = a_data[i] - b;
275             y_ref[i] = diff * diff;
276             break;
277           }
278           case OpType::SubC:
279             y_ref[i] = a_data[i] - b;
280             break;
281           case OpType::RSubC:
282             y_ref[i] = b - a_data[i];
283             break;
284         }
285       }
286 
287       // Prepare parameters.
288       xnn_f32_default_params params;
289       if (init_params) {
290         init_params(&params);
291       }
292 
293       // Call optimized micro-kernel.
294       vbinaryc(batch_size() * sizeof(float), a_data, &b, y.data(), init_params != nullptr ? &params : nullptr);
295 
296       // Verify results.
297       for (size_t i = 0; i < batch_size(); i++) {
298         ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f)
299           << "at " << i << " / " << batch_size();
300       }
301     }
302   }
303 
Test(xnn_f32_vbinary_relu_ukernel_function vbinaryc_relu,OpType op_type)304   void Test(xnn_f32_vbinary_relu_ukernel_function vbinaryc_relu, OpType op_type) const {
305     std::random_device random_device;
306     auto rng = std::mt19937(random_device());
307     auto f32rng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), rng);
308 
309     std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
310     const float b = f32rng();
311     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
312     std::vector<float> y_ref(batch_size());
313     for (size_t iteration = 0; iteration < iterations(); iteration++) {
314       std::generate(a.begin(), a.end(), std::ref(f32rng));
315       if (inplace()) {
316         std::generate(y.begin(), y.end(), std::ref(f32rng));
317       } else {
318         std::fill(y.begin(), y.end(), nanf(""));
319       }
320       const float* a_data = inplace() ? y.data() : a.data();
321 
322       // Compute reference results.
323       for (size_t i = 0; i < batch_size(); i++) {
324         switch (op_type) {
325           case OpType::AddC:
326             y_ref[i] = a_data[i] + b;
327             break;
328           case OpType::DivC:
329             y_ref[i] = a_data[i] / b;
330             break;
331           case OpType::RDivC:
332             y_ref[i] = b / a_data[i];
333             break;
334           case OpType::MaxC:
335             y_ref[i] = std::max<float>(a_data[i], b);
336             break;
337           case OpType::MinC:
338             y_ref[i] = std::min<float>(a_data[i], b);
339             break;
340           case OpType::MulC:
341             y_ref[i] = a_data[i] * b;
342             break;
343           case OpType::SqrDiffC:
344           {
345             const float diff = a_data[i] - b;
346             y_ref[i] = diff * diff;
347             break;
348           }
349           case OpType::SubC:
350             y_ref[i] = a_data[i] - b;
351             break;
352           case OpType::RSubC:
353             y_ref[i] = b - a_data[i];
354             break;
355         }
356       }
357       for (size_t i = 0; i < batch_size(); i++) {
358         y_ref[i] = std::max(y_ref[i], 0.0f);
359       }
360 
361       // Call optimized micro-kernel.
362       vbinaryc_relu(batch_size() * sizeof(float), a_data, &b, y.data(), nullptr);
363 
364       // Verify results.
365       for (size_t i = 0; i < batch_size(); i++) {
366         ASSERT_GE(y[i], 0.0f)
367           << "at " << i << " / " << batch_size();
368         ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f)
369           << "at " << i << " / " << batch_size();
370       }
371     }
372   }
373 
Test(xnn_f32_vbinary_minmax_ukernel_function vbinaryc_minmax,OpType op_type,xnn_init_f32_minmax_params_fn init_params)374   void Test(xnn_f32_vbinary_minmax_ukernel_function vbinaryc_minmax, OpType op_type, xnn_init_f32_minmax_params_fn init_params) const {
375     std::random_device random_device;
376     auto rng = std::mt19937(random_device());
377     auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
378 
379     std::vector<float> a(batch_size() + XNN_EXTRA_BYTES / sizeof(float));
380     const float b = f32rng();
381     std::vector<float> y(batch_size() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0));
382     std::vector<float> y_ref(batch_size());
383     for (size_t iteration = 0; iteration < iterations(); iteration++) {
384       std::generate(a.begin(), a.end(), std::ref(f32rng));
385       if (inplace()) {
386         std::generate(y.begin(), y.end(), std::ref(f32rng));
387       } else {
388         std::fill(y.begin(), y.end(), nanf(""));
389       }
390       const float* a_data = inplace() ? y.data() : a.data();
391 
392       // Compute reference results.
393       for (size_t i = 0; i < batch_size(); i++) {
394         switch (op_type) {
395           case OpType::AddC:
396             y_ref[i] = a_data[i] + b;
397             break;
398           case OpType::DivC:
399             y_ref[i] = a_data[i] / b;
400             break;
401           case OpType::RDivC:
402             y_ref[i] = b / a_data[i];
403             break;
404           case OpType::MaxC:
405             y_ref[i] = std::max<float>(a_data[i], b);
406             break;
407           case OpType::MinC:
408             y_ref[i] = std::min<float>(a_data[i], b);
409             break;
410           case OpType::MulC:
411             y_ref[i] = a_data[i] * b;
412             break;
413           case OpType::SqrDiffC:
414           {
415             const float diff = a_data[i] - b;
416             y_ref[i] = diff * diff;
417             break;
418           }
419           case OpType::SubC:
420             y_ref[i] = a_data[i] - b;
421             break;
422           case OpType::RSubC:
423             y_ref[i] = b - a_data[i];
424             break;
425         }
426       }
427       const float accumulated_min = *std::min_element(y_ref.cbegin(), y_ref.cend());
428       const float accumulated_max = *std::max_element(y_ref.cbegin(), y_ref.cend());
429       const float accumulated_range = accumulated_max - accumulated_min;
430       const float y_max = accumulated_range > 0.0f ?
431         (accumulated_max - accumulated_range / 255.0f * float(255 - qmax())) :
432         +std::numeric_limits<float>::infinity();
433       const float y_min = accumulated_range > 0.0f ?
434         (accumulated_min + accumulated_range / 255.0f * float(qmin())) :
435         -std::numeric_limits<float>::infinity();
436       for (size_t i = 0; i < batch_size(); i++) {
437         y_ref[i] = std::max<float>(std::min<float>(y_ref[i], y_max), y_min);
438       }
439 
440       // Prepare parameters.
441       xnn_f32_minmax_params params;
442       init_params(&params, y_min, y_max);
443 
444       // Call optimized micro-kernel.
445       vbinaryc_minmax(batch_size() * sizeof(float), a_data, &b, y.data(), &params);
446 
447       // Verify results.
448       for (size_t i = 0; i < batch_size(); i++) {
449         ASSERT_NEAR(y[i], y_ref[i], std::abs(y_ref[i]) * 1.0e-6f)
450           << "at " << i << " / " << batch_size();
451       }
452     }
453   }
454 
455  private:
456   size_t batch_size_{1};
457   bool inplace_{false};
458   uint8_t qmin_{0};
459   uint8_t qmax_{255};
460   size_t iterations_{15};
461 };
462