• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #pragma once
7 
8 #include <gtest/gtest.h>
9 
10 #include <algorithm>
11 #include <cassert>
12 #include <cmath>
13 #include <cstddef>
14 #include <cstdlib>
15 #include <random>
16 #include <vector>
17 
18 #include <fp16.h>
19 
20 #include <xnnpack.h>
21 #include <xnnpack/aligned-allocator.h>
22 #include <xnnpack/microfnptr.h>
23 
24 
25 class PReLUMicrokernelTester {
26  public:
rows(size_t rows)27   inline PReLUMicrokernelTester& rows(size_t rows) {
28     assert(rows != 0);
29     this->rows_ = rows;
30     return *this;
31   }
32 
rows()33   inline size_t rows() const {
34     return this->rows_;
35   }
36 
channels(size_t channels)37   inline PReLUMicrokernelTester& channels(size_t channels) {
38     assert(channels != 0);
39     this->channels_ = channels;
40     return *this;
41   }
42 
channels()43   inline size_t channels() const {
44     return this->channels_;
45   }
46 
input_stride(size_t input_stride)47   inline PReLUMicrokernelTester& input_stride(size_t input_stride) {
48     assert(input_stride != 0);
49     this->input_stride_ = input_stride;
50     return *this;
51   }
52 
input_stride()53   inline size_t input_stride() const {
54     if (this->input_stride_ == 0) {
55       return channels();
56     } else {
57       assert(this->input_stride_ >= channels());
58       return this->input_stride_;
59     }
60   }
61 
output_stride(size_t output_stride)62   inline PReLUMicrokernelTester& output_stride(size_t output_stride) {
63     assert(output_stride != 0);
64     this->output_stride_ = output_stride;
65     return *this;
66   }
67 
output_stride()68   inline size_t output_stride() const {
69     if (this->output_stride_ == 0) {
70       return channels();
71     } else {
72       assert(this->output_stride_ >= channels());
73       return this->output_stride_;
74     }
75   }
76 
inplace(bool inplace)77   inline PReLUMicrokernelTester& inplace(bool inplace) {
78     this->inplace_ = inplace;
79     return *this;
80   }
81 
inplace()82   inline bool inplace() const {
83     return this->inplace_;
84   }
85 
iterations(size_t iterations)86   inline PReLUMicrokernelTester& iterations(size_t iterations) {
87     this->iterations_ = iterations;
88     return *this;
89   }
90 
iterations()91   inline size_t iterations() const {
92     return this->iterations_;
93   }
94 
Test(xnn_f16_prelu_ukernel_function prelu)95   void Test(xnn_f16_prelu_ukernel_function prelu) const {
96     std::random_device random_device;
97     auto rng = std::mt19937(random_device());
98     std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f);
99     std::uniform_real_distribution<float> w32dist(0.25f, 0.75f);
100 
101     std::vector<uint16_t> x(channels() + (rows() - 1) * input_stride() + XNN_EXTRA_BYTES / sizeof(uint16_t));
102     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> w(channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));
103     std::vector<uint16_t> y(channels() + (rows() - 1) * output_stride() + XNN_EXTRA_BYTES / sizeof(uint16_t));
104     std::vector<float> y_ref(channels() * rows());
105     for (size_t iteration = 0; iteration < iterations(); iteration++) {
106       std::generate(x.begin(), x.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
107       std::generate(w.begin(), w.end(), [&]() { return fp16_ieee_from_fp32_value(w32dist(rng)); });
108       if (inplace()) {
109         std::generate(y.begin(), y.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); });
110       } else {
111         std::fill(y.begin(), y.end(), UINT16_C(0x7E00) /* NaN */);
112       }
113       const uint16_t* x_data = inplace() ? y.data() : x.data();
114 
115       // Compute reference results, without clamping.
116       for (size_t n = 0; n < rows(); n++) {
117         for (size_t c = 0; c < channels(); c++) {
118           const float x_value = fp16_ieee_to_fp32_value(x_data[n * input_stride() + c]);
119           y_ref[n * channels() + c] = std::signbit(x_value) ?
120               fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(x_value * fp16_ieee_to_fp32_value(w[c]))) : x_value;
121         }
122       }
123 
124       // Call optimized micro-kernel.
125       prelu(rows(), channels() * sizeof(uint16_t),
126         x_data, input_stride() * sizeof(uint16_t),
127         w.data(),
128         y.data(), output_stride() * sizeof(uint16_t));
129 
130       // Verify results.
131       for (size_t n = 0; n < rows(); n++) {
132         for (size_t c = 0; c < channels(); c++) {
133           ASSERT_EQ(fp16_ieee_to_fp32_value(y[n * output_stride() + c]), y_ref[n * channels() + c])
134             << "at row " << n << " / " << rows()
135             << ", channel " << c << " / " << channels();
136         }
137       }
138     }
139   }
140 
Test(xnn_f32_prelu_ukernel_function prelu)141   void Test(xnn_f32_prelu_ukernel_function prelu) const {
142     std::random_device random_device;
143     auto rng = std::mt19937(random_device());
144     std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f);
145     std::uniform_real_distribution<float> w32dist(0.25f, 0.75f);
146 
147     std::vector<float> x(channels() + (rows() - 1) * input_stride() + XNN_EXTRA_BYTES / sizeof(float));
148     std::vector<float, AlignedAllocator<float, 64>> w(channels() + XNN_EXTRA_BYTES / sizeof(float));
149     std::vector<float> y(channels() + (rows() - 1) * output_stride() + XNN_EXTRA_BYTES / sizeof(float));
150     std::vector<float> y_ref(channels() * rows());
151     for (size_t iteration = 0; iteration < iterations(); iteration++) {
152       std::generate(x.begin(), x.end(), [&]() { return f32dist(rng); });
153       std::generate(w.begin(), w.end(), [&]() { return w32dist(rng); });
154       if (inplace()) {
155         std::generate(y.begin(), y.end(), [&]() { return f32dist(rng); });
156       } else {
157         std::fill(y.begin(), y.end(), nanf(""));
158       }
159       const float* x_data = inplace() ? y.data() : x.data();
160 
161       // Compute reference results, without clamping.
162       for (size_t n = 0; n < rows(); n++) {
163         for (size_t c = 0; c < channels(); c++) {
164           const float x_value = x_data[n * input_stride() + c];
165           y_ref[n * channels() + c] = std::signbit(x_value) ? x_value * w[c] : x_value;
166         }
167       }
168 
169       // Call optimized micro-kernel.
170       prelu(rows(), channels() * sizeof(float),
171         x_data, input_stride() * sizeof(float),
172         w.data(),
173         y.data(), output_stride() * sizeof(float));
174 
175       // Verify results.
176       for (size_t n = 0; n < rows(); n++) {
177         for (size_t c = 0; c < channels(); c++) {
178           ASSERT_EQ(y[n * output_stride() + c], y_ref[n * channels() + c])
179             << "at row " << n << " / " << rows()
180             << ", channel " << c << " / " << channels();
181         }
182       }
183     }
184   }
185 
186  private:
187   size_t rows_{1};
188   size_t channels_{1};
189   size_t input_stride_{0};
190   size_t output_stride_{0};
191   bool inplace_{false};
192   size_t iterations_{15};
193 };
194