• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <ios>
13 #include <vector>
14 
15 #include <gtest/gtest.h>
16 
17 #include <fp16.h>
18 
19 #include <xnnpack/AlignedAllocator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/isa-checks.h>
22 #include <xnnpack/math-stubs.h>
23 
24 
25 constexpr int kBlockSize = 1024;
26 
27 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(CVT__NEON,positive_normal)28   TEST(CVT__NEON, positive_normal) {
29     TEST_REQUIRES_ARM_NEON;
30 
31     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
32     std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize);
33     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
34          zero_point <= std::numeric_limits<int8_t>::max();
35          zero_point++)
36     {
37       const uint32_t max_input = fp32_to_bits((float) (std::numeric_limits<int8_t>::max() - zero_point));
38       for (uint32_t n = 0; n < max_input; n += kBlockSize) {
39         for (uint32_t i = 0; i < kBlockSize; i++) {
40           inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, max_input));
41         }
42         xnn_math_f32_qs8_cvt__neon(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point));
43         for (uint32_t i = 0; i < kBlockSize; i++) {
44           long reference_output = std::lrintf(inputs[i]) + long(zero_point);
45           if (inputs[i] >= float(std::numeric_limits<long>::max())) {
46             reference_output = std::numeric_limits<int8_t>::max();
47           } else if (inputs[i] <= float(std::numeric_limits<long>::min())) {
48             reference_output = std::numeric_limits<int8_t>::min();
49           }
50           ASSERT_EQ(reference_output, long(outputs[i]))
51             << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
52             << ", reference = " << std::dec << reference_output
53             << ", optimized = " << std::dec << int32_t(outputs[i])
54             << ", zero point = " << std::dec << zero_point;
55         }
56       }
57     }
58   }
59 
TEST(CVT__NEON,negative_normal)60   TEST(CVT__NEON, negative_normal) {
61     TEST_REQUIRES_ARM_NEON;
62 
63     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
64     std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize);
65     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
66          zero_point <= std::numeric_limits<int8_t>::max();
67          zero_point++)
68     {
69       const uint32_t max_input = fp32_to_bits((float) (zero_point - std::numeric_limits<int8_t>::min()));
70       for (uint32_t n = 0; n < max_input; n += kBlockSize) {
71         for (uint32_t i = 0; i < kBlockSize; i++) {
72           inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input));
73         }
74         xnn_math_f32_qs8_cvt__neon(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point));
75         for (uint32_t i = 0; i < kBlockSize; i++) {
76           long reference_output = std::lrintf(inputs[i]) + long(zero_point);
77           if (inputs[i] >= float(std::numeric_limits<long>::max())) {
78             reference_output = std::numeric_limits<int8_t>::max();
79           } else if (inputs[i] <= float(std::numeric_limits<long>::min())) {
80             reference_output = std::numeric_limits<int8_t>::min();
81           }
82           ASSERT_EQ(reference_output, long(outputs[i]))
83             << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
84             << ", reference = " << std::dec << reference_output
85             << ", optimized = " << std::dec << int32_t(outputs[i])
86             << ", zero point = " << std::dec << zero_point;
87         }
88       }
89     }
90   }
91 
TEST(CVT__NEON,positive_saturation)92   TEST(CVT__NEON, positive_saturation) {
93     TEST_REQUIRES_ARM_NEON;
94 
95     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
96     std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize);
97     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
98          zero_point <= std::numeric_limits<int8_t>::max();
99          zero_point++)
100     {
101       const uint32_t min_input = fp32_to_bits((float) (std::numeric_limits<int8_t>::max() - zero_point));
102       const uint32_t max_input = UINT32_C(0x7F800000);
103       for (uint32_t n = min_input; n < max_input; n += kBlockSize) {
104         for (uint32_t i = 0; i < kBlockSize; i++) {
105           inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, max_input));
106         }
107         xnn_math_f32_qs8_cvt__neon(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point));
108         for (uint32_t i = 0; i < kBlockSize; i++) {
109           const int32_t reference_output = std::numeric_limits<int8_t>::max();
110           ASSERT_EQ(reference_output, int32_t(outputs[i]))
111             << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
112             << ", reference = " << std::dec << reference_output
113             << ", optimized = " << std::dec << int32_t(outputs[i])
114             << ", zero point = " << std::dec << zero_point;
115         }
116       }
117     }
118   }
119 
TEST(CVT__NEON,negative_saturation)120   TEST(CVT__NEON, negative_saturation) {
121     TEST_REQUIRES_ARM_NEON;
122 
123     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
124     std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize);
125     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
126          zero_point <= std::numeric_limits<int8_t>::max();
127          zero_point++)
128     {
129       const uint32_t min_input = fp32_to_bits((float) (zero_point - std::numeric_limits<int8_t>::min()));
130       const uint32_t max_input = UINT32_C(0x7F800000);
131       for (uint32_t n = min_input; n < max_input; n += kBlockSize) {
132         for (uint32_t i = 0; i < kBlockSize; i++) {
133           inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input));
134         }
135         xnn_math_f32_qs8_cvt__neon(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point));
136         for (uint32_t i = 0; i < kBlockSize; i++) {
137           const int32_t reference_output = std::numeric_limits<int8_t>::min();
138           ASSERT_EQ(reference_output, int32_t(outputs[i]))
139             << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
140             << ", reference = " << std::dec << reference_output
141             << ", optimized = " << std::dec << int32_t(outputs[i])
142             << ", zero point = " << std::dec << zero_point;
143         }
144       }
145     }
146   }
147 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
148 
149 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(CVT__NEONV8,positive_normal)150   TEST(CVT__NEONV8, positive_normal) {
151     TEST_REQUIRES_ARM_NEON_V8;
152 
153     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
154     std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize);
155     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
156          zero_point <= std::numeric_limits<int8_t>::max();
157          zero_point++)
158     {
159       const uint32_t max_input = fp32_to_bits((float) (std::numeric_limits<int8_t>::max() - zero_point));
160       for (uint32_t n = 0; n < max_input; n += kBlockSize) {
161         for (uint32_t i = 0; i < kBlockSize; i++) {
162           inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, max_input));
163         }
164         xnn_math_f32_qs8_cvt__neonv8(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point));
165         for (uint32_t i = 0; i < kBlockSize; i++) {
166           long reference_output = std::lrintf(inputs[i]) + long(zero_point);
167           if (inputs[i] >= float(std::numeric_limits<long>::max())) {
168             reference_output = std::numeric_limits<int8_t>::max();
169           } else if (inputs[i] <= float(std::numeric_limits<long>::min())) {
170             reference_output = std::numeric_limits<int8_t>::min();
171           }
172           ASSERT_EQ(reference_output, long(outputs[i]))
173             << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
174             << ", reference = " << std::dec << reference_output
175             << ", optimized = " << std::dec << int32_t(outputs[i])
176             << ", zero point = " << std::dec << zero_point;
177         }
178       }
179     }
180   }
181 
TEST(CVT__NEONV8,negative_normal)182   TEST(CVT__NEONV8, negative_normal) {
183     TEST_REQUIRES_ARM_NEON_V8;
184 
185     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
186     std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize);
187     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
188          zero_point <= std::numeric_limits<int8_t>::max();
189          zero_point++)
190     {
191       const uint32_t max_input = fp32_to_bits((float) (zero_point - std::numeric_limits<int8_t>::min()));
192       for (uint32_t n = 0; n < max_input; n += kBlockSize) {
193         for (uint32_t i = 0; i < kBlockSize; i++) {
194           inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input));
195         }
196         xnn_math_f32_qs8_cvt__neonv8(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point));
197         for (uint32_t i = 0; i < kBlockSize; i++) {
198           long reference_output = std::lrintf(inputs[i]) + long(zero_point);
199           if (inputs[i] >= float(std::numeric_limits<long>::max())) {
200             reference_output = std::numeric_limits<int8_t>::max();
201           } else if (inputs[i] <= float(std::numeric_limits<long>::min())) {
202             reference_output = std::numeric_limits<int8_t>::min();
203           }
204           ASSERT_EQ(reference_output, long(outputs[i]))
205             << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
206             << ", reference = " << std::dec << reference_output
207             << ", optimized = " << std::dec << int32_t(outputs[i])
208             << ", zero point = " << std::dec << zero_point;
209         }
210       }
211     }
212   }
213 
TEST(CVT__NEONV8,positive_saturation)214   TEST(CVT__NEONV8, positive_saturation) {
215     TEST_REQUIRES_ARM_NEON_V8;
216 
217     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
218     std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize);
219     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
220          zero_point <= std::numeric_limits<int8_t>::max();
221          zero_point++)
222     {
223       const uint32_t min_input = fp32_to_bits((float) (std::numeric_limits<int8_t>::max() - zero_point));
224       const uint32_t max_input = UINT32_C(0x7F800000);
225       for (uint32_t n = min_input; n < max_input; n += kBlockSize) {
226         for (uint32_t i = 0; i < kBlockSize; i++) {
227           inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, max_input));
228         }
229         xnn_math_f32_qs8_cvt__neonv8(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point));
230         for (uint32_t i = 0; i < kBlockSize; i++) {
231           const int32_t reference_output = std::numeric_limits<int8_t>::max();
232           ASSERT_EQ(reference_output, int32_t(outputs[i]))
233             << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
234             << ", reference = " << std::dec << reference_output
235             << ", optimized = " << std::dec << int32_t(outputs[i])
236             << ", zero point = " << std::dec << zero_point;
237         }
238       }
239     }
240   }
241 
TEST(CVT__NEONV8,negative_saturation)242   TEST(CVT__NEONV8, negative_saturation) {
243     TEST_REQUIRES_ARM_NEON_V8;
244 
245     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
246     std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize);
247     for (int32_t zero_point = std::numeric_limits<int8_t>::min();
248          zero_point <= std::numeric_limits<int8_t>::max();
249          zero_point++)
250     {
251       const uint32_t min_input = fp32_to_bits((float) (zero_point - std::numeric_limits<int8_t>::min()));
252       const uint32_t max_input = UINT32_C(0x7F800000);
253       for (uint32_t n = min_input; n < max_input; n += kBlockSize) {
254         for (uint32_t i = 0; i < kBlockSize; i++) {
255           inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input));
256         }
257         xnn_math_f32_qs8_cvt__neonv8(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point));
258         for (uint32_t i = 0; i < kBlockSize; i++) {
259           const int32_t reference_output = std::numeric_limits<int8_t>::min();
260           ASSERT_EQ(reference_output, int32_t(outputs[i]))
261             << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
262             << ", reference = " << std::dec << reference_output
263             << ", optimized = " << std::dec << int32_t(outputs[i])
264             << ", zero point = " << std::dec << zero_point;
265         }
266       }
267     }
268   }
269 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
270