1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <ios>
13 #include <vector>
14
15 #include <gtest/gtest.h>
16
17 #include <fp16.h>
18
19 #include <xnnpack/AlignedAllocator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/math-stubs.h>
22
23
24 constexpr int kBlockSize = 1024;
25
26 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDNE__SSE_ADDSUB,positive_normal)27 TEST(ROUNDNE__SSE_ADDSUB, positive_normal) {
28 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
29 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
30 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
31 for (uint32_t i = 0; i < kBlockSize; i++) {
32 inputs[i] = fp32_from_bits(n + i);
33 }
34 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
35 for (uint32_t i = 0; i < kBlockSize; i++) {
36 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
37 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
38 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
39 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
40 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
41 }
42 }
43 }
44
TEST(ROUNDNE__SSE_ADDSUB,negative_normal)45 TEST(ROUNDNE__SSE_ADDSUB, negative_normal) {
46 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
47 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
48 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
49 for (uint32_t i = 0; i < kBlockSize; i++) {
50 inputs[i] = fp32_from_bits(n + i);
51 }
52 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
53 for (uint32_t i = 0; i < kBlockSize; i++) {
54 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
55 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
56 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
57 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
58 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
59 }
60 }
61 }
62
TEST(ROUNDNE__SSE_ADDSUB,positive_integral)63 TEST(ROUNDNE__SSE_ADDSUB, positive_integral) {
64 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
65 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
66 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
67 for (uint32_t i = 0; i < kBlockSize; i++) {
68 inputs[i] = fp32_from_bits(n + i);
69 }
70 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
71 for (uint32_t i = 0; i < kBlockSize; i++) {
72 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
73 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
74 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
75 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
76 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
77 }
78 }
79 }
80
TEST(ROUNDNE__SSE_ADDSUB,negative_integral)81 TEST(ROUNDNE__SSE_ADDSUB, negative_integral) {
82 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
83 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
84 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
85 for (uint32_t i = 0; i < kBlockSize; i++) {
86 inputs[i] = fp32_from_bits(n + i);
87 }
88 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
89 for (uint32_t i = 0; i < kBlockSize; i++) {
90 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
91 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
92 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
93 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
94 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
95 }
96 }
97 }
98
TEST(ROUNDNE__SSE_ADDSUB,positive_infinity)99 TEST(ROUNDNE__SSE_ADDSUB, positive_infinity) {
100 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
101 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
102 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
103 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
104 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
105 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
106 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
107 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
108 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
109 }
110
TEST(ROUNDNE__SSE_ADDSUB,negative_infinity)111 TEST(ROUNDNE__SSE_ADDSUB, negative_infinity) {
112 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
113 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
114 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
115 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
116 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
117 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
118 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
119 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
120 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
121 }
122
TEST(ROUNDNE__SSE_ADDSUB,positive_qnan)123 TEST(ROUNDNE__SSE_ADDSUB, positive_qnan) {
124 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
125 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
126 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
127 for (uint32_t i = 0; i < kBlockSize; i++) {
128 inputs[i] = fp32_from_bits(n + i);
129 }
130 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
131 for (uint32_t i = 0; i < kBlockSize; i++) {
132 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
133 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
134 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
135 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
136 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
137 }
138 }
139 }
140
TEST(ROUNDNE__SSE_ADDSUB,negative_qnan)141 TEST(ROUNDNE__SSE_ADDSUB, negative_qnan) {
142 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
143 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
144 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
145 for (uint32_t i = 0; i < kBlockSize; i++) {
146 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
147 }
148 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
149 for (uint32_t i = 0; i < kBlockSize; i++) {
150 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
151 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
152 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
153 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
154 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
155 }
156 }
157 }
158
TEST(ROUNDNE__SSE_ADDSUB,positive_snan)159 TEST(ROUNDNE__SSE_ADDSUB, positive_snan) {
160 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
161 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
162 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
163 for (uint32_t i = 0; i < kBlockSize; i++) {
164 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
165 }
166 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
167 for (uint32_t i = 0; i < kBlockSize; i++) {
168 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
169 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
170 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
171 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
172 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
173 }
174 }
175 }
176
TEST(ROUNDNE__SSE_ADDSUB,negative_snan)177 TEST(ROUNDNE__SSE_ADDSUB, negative_snan) {
178 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
179 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
180 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
181 for (uint32_t i = 0; i < kBlockSize; i++) {
182 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
183 }
184 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
185 for (uint32_t i = 0; i < kBlockSize; i++) {
186 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
187 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
188 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
189 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
190 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
191 }
192 }
193 }
194
TEST(ROUNDNE__SSE_ADDSUB,positive_snan_to_qnan)195 TEST(ROUNDNE__SSE_ADDSUB, positive_snan_to_qnan) {
196 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
197 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
198 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
199 for (uint32_t i = 0; i < kBlockSize; i++) {
200 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
201 }
202 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
203 for (uint32_t i = 0; i < kBlockSize; i++) {
204 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
205 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
206 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
207 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
208 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
209 }
210 }
211 }
212
TEST(ROUNDNE__SSE_ADDSUB,negative_snan_to_qnan)213 TEST(ROUNDNE__SSE_ADDSUB, negative_snan_to_qnan) {
214 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
215 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
216 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
217 for (uint32_t i = 0; i < kBlockSize; i++) {
218 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
219 }
220 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
221 for (uint32_t i = 0; i < kBlockSize; i++) {
222 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
223 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
224 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
225 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
226 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
227 }
228 }
229 }
230 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
231
232 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDNE__SSE2_CVT,positive_normal)233 TEST(ROUNDNE__SSE2_CVT, positive_normal) {
234 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
235 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
236 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
237 for (uint32_t i = 0; i < kBlockSize; i++) {
238 inputs[i] = fp32_from_bits(n + i);
239 }
240 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
241 for (uint32_t i = 0; i < kBlockSize; i++) {
242 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
243 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
244 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
245 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
246 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
247 }
248 }
249 }
250
TEST(ROUNDNE__SSE2_CVT,negative_normal)251 TEST(ROUNDNE__SSE2_CVT, negative_normal) {
252 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
253 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
254 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
255 for (uint32_t i = 0; i < kBlockSize; i++) {
256 inputs[i] = fp32_from_bits(n + i);
257 }
258 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
259 for (uint32_t i = 0; i < kBlockSize; i++) {
260 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
261 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
262 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
263 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
264 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
265 }
266 }
267 }
268
TEST(ROUNDNE__SSE2_CVT,positive_integral)269 TEST(ROUNDNE__SSE2_CVT, positive_integral) {
270 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
271 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
272 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
273 for (uint32_t i = 0; i < kBlockSize; i++) {
274 inputs[i] = fp32_from_bits(n + i);
275 }
276 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
277 for (uint32_t i = 0; i < kBlockSize; i++) {
278 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
279 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
280 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
281 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
282 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
283 }
284 }
285 }
286
TEST(ROUNDNE__SSE2_CVT,negative_integral)287 TEST(ROUNDNE__SSE2_CVT, negative_integral) {
288 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
289 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
290 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
291 for (uint32_t i = 0; i < kBlockSize; i++) {
292 inputs[i] = fp32_from_bits(n + i);
293 }
294 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
295 for (uint32_t i = 0; i < kBlockSize; i++) {
296 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
297 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
298 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
299 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
300 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
301 }
302 }
303 }
304
TEST(ROUNDNE__SSE2_CVT,positive_infinity)305 TEST(ROUNDNE__SSE2_CVT, positive_infinity) {
306 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
307 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
308 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
309 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
310 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
311 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
312 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
313 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
314 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
315 }
316
TEST(ROUNDNE__SSE2_CVT,negative_infinity)317 TEST(ROUNDNE__SSE2_CVT, negative_infinity) {
318 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
319 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
320 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
321 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
322 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
323 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
324 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
325 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
326 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
327 }
328
TEST(ROUNDNE__SSE2_CVT,positive_qnan)329 TEST(ROUNDNE__SSE2_CVT, positive_qnan) {
330 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
331 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
332 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
333 for (uint32_t i = 0; i < kBlockSize; i++) {
334 inputs[i] = fp32_from_bits(n + i);
335 }
336 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
337 for (uint32_t i = 0; i < kBlockSize; i++) {
338 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
339 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
340 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
341 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
342 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
343 }
344 }
345 }
346
TEST(ROUNDNE__SSE2_CVT,negative_qnan)347 TEST(ROUNDNE__SSE2_CVT, negative_qnan) {
348 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
349 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
350 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
351 for (uint32_t i = 0; i < kBlockSize; i++) {
352 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
353 }
354 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
355 for (uint32_t i = 0; i < kBlockSize; i++) {
356 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
357 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
358 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
359 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
360 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
361 }
362 }
363 }
364
TEST(ROUNDNE__SSE2_CVT,positive_snan)365 TEST(ROUNDNE__SSE2_CVT, positive_snan) {
366 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
367 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
368 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
369 for (uint32_t i = 0; i < kBlockSize; i++) {
370 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
371 }
372 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
373 for (uint32_t i = 0; i < kBlockSize; i++) {
374 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
375 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
376 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
377 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
378 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
379 }
380 }
381 }
382
TEST(ROUNDNE__SSE2_CVT,negative_snan)383 TEST(ROUNDNE__SSE2_CVT, negative_snan) {
384 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
385 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
386 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
387 for (uint32_t i = 0; i < kBlockSize; i++) {
388 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
389 }
390 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
391 for (uint32_t i = 0; i < kBlockSize; i++) {
392 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
393 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
394 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
395 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
396 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
397 }
398 }
399 }
400
TEST(ROUNDNE__SSE2_CVT,DISABLED_positive_snan_to_qnan)401 TEST(ROUNDNE__SSE2_CVT, DISABLED_positive_snan_to_qnan) {
402 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
403 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
404 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
405 for (uint32_t i = 0; i < kBlockSize; i++) {
406 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
407 }
408 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
409 for (uint32_t i = 0; i < kBlockSize; i++) {
410 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
411 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
412 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
413 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
414 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
415 }
416 }
417 }
418
TEST(ROUNDNE__SSE2_CVT,DISABLED_negative_snan_to_qnan)419 TEST(ROUNDNE__SSE2_CVT, DISABLED_negative_snan_to_qnan) {
420 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
421 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
422 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
423 for (uint32_t i = 0; i < kBlockSize; i++) {
424 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
425 }
426 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
427 for (uint32_t i = 0; i < kBlockSize; i++) {
428 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
429 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
430 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
431 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
432 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
433 }
434 }
435 }
436 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
437
438 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDNE__SSE41,positive_normal)439 TEST(ROUNDNE__SSE41, positive_normal) {
440 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
441 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
442 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
443 for (uint32_t i = 0; i < kBlockSize; i++) {
444 inputs[i] = fp32_from_bits(n + i);
445 }
446 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
447 for (uint32_t i = 0; i < kBlockSize; i++) {
448 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
449 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
450 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
451 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
452 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
453 }
454 }
455 }
456
TEST(ROUNDNE__SSE41,negative_normal)457 TEST(ROUNDNE__SSE41, negative_normal) {
458 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
459 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
460 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
461 for (uint32_t i = 0; i < kBlockSize; i++) {
462 inputs[i] = fp32_from_bits(n + i);
463 }
464 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
465 for (uint32_t i = 0; i < kBlockSize; i++) {
466 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
467 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
468 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
469 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
470 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
471 }
472 }
473 }
474
TEST(ROUNDNE__SSE41,positive_integral)475 TEST(ROUNDNE__SSE41, positive_integral) {
476 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
477 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
478 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
479 for (uint32_t i = 0; i < kBlockSize; i++) {
480 inputs[i] = fp32_from_bits(n + i);
481 }
482 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
483 for (uint32_t i = 0; i < kBlockSize; i++) {
484 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
485 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
486 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
487 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
488 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
489 }
490 }
491 }
492
TEST(ROUNDNE__SSE41,negative_integral)493 TEST(ROUNDNE__SSE41, negative_integral) {
494 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
495 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
496 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
497 for (uint32_t i = 0; i < kBlockSize; i++) {
498 inputs[i] = fp32_from_bits(n + i);
499 }
500 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
501 for (uint32_t i = 0; i < kBlockSize; i++) {
502 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
503 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
504 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
505 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
506 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
507 }
508 }
509 }
510
TEST(ROUNDNE__SSE41,positive_infinity)511 TEST(ROUNDNE__SSE41, positive_infinity) {
512 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
513 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
514 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
515 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
516 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
517 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
518 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
519 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
520 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
521 }
522
TEST(ROUNDNE__SSE41,negative_infinity)523 TEST(ROUNDNE__SSE41, negative_infinity) {
524 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
525 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
526 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
527 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
528 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
529 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
530 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
531 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
532 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
533 }
534
TEST(ROUNDNE__SSE41,positive_qnan)535 TEST(ROUNDNE__SSE41, positive_qnan) {
536 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
537 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
538 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
539 for (uint32_t i = 0; i < kBlockSize; i++) {
540 inputs[i] = fp32_from_bits(n + i);
541 }
542 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
543 for (uint32_t i = 0; i < kBlockSize; i++) {
544 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
545 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
546 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
547 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
548 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
549 }
550 }
551 }
552
TEST(ROUNDNE__SSE41,negative_qnan)553 TEST(ROUNDNE__SSE41, negative_qnan) {
554 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
555 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
556 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
557 for (uint32_t i = 0; i < kBlockSize; i++) {
558 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
559 }
560 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
561 for (uint32_t i = 0; i < kBlockSize; i++) {
562 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
563 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
564 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
565 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
566 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
567 }
568 }
569 }
570
TEST(ROUNDNE__SSE41,positive_snan)571 TEST(ROUNDNE__SSE41, positive_snan) {
572 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
573 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
574 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
575 for (uint32_t i = 0; i < kBlockSize; i++) {
576 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
577 }
578 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
579 for (uint32_t i = 0; i < kBlockSize; i++) {
580 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
581 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
582 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
583 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
584 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
585 }
586 }
587 }
588
TEST(ROUNDNE__SSE41,negative_snan)589 TEST(ROUNDNE__SSE41, negative_snan) {
590 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
591 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
592 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
593 for (uint32_t i = 0; i < kBlockSize; i++) {
594 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
595 }
596 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
597 for (uint32_t i = 0; i < kBlockSize; i++) {
598 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
599 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
600 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
601 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
602 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
603 }
604 }
605 }
606
TEST(ROUNDNE__SSE41,positive_snan_to_qnan)607 TEST(ROUNDNE__SSE41, positive_snan_to_qnan) {
608 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
609 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
610 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
611 for (uint32_t i = 0; i < kBlockSize; i++) {
612 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
613 }
614 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
615 for (uint32_t i = 0; i < kBlockSize; i++) {
616 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
617 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
618 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
619 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
620 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
621 }
622 }
623 }
624
TEST(ROUNDNE__SSE41,negative_snan_to_qnan)625 TEST(ROUNDNE__SSE41, negative_snan_to_qnan) {
626 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
627 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
628 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
629 for (uint32_t i = 0; i < kBlockSize; i++) {
630 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
631 }
632 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
633 for (uint32_t i = 0; i < kBlockSize; i++) {
634 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
635 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
636 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
637 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
638 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
639 }
640 }
641 }
642 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
643
644 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDNE__NEON_ADDSUB,positive_normal)645 TEST(ROUNDNE__NEON_ADDSUB, positive_normal) {
646 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
647 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
648 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
649 for (uint32_t i = 0; i < kBlockSize; i++) {
650 inputs[i] = fp32_from_bits(n + i);
651 }
652 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
653 for (uint32_t i = 0; i < kBlockSize; i++) {
654 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
655 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
656 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
657 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
658 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
659 }
660 }
661 }
662
TEST(ROUNDNE__NEON_ADDSUB,negative_normal)663 TEST(ROUNDNE__NEON_ADDSUB, negative_normal) {
664 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
665 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
666 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
667 for (uint32_t i = 0; i < kBlockSize; i++) {
668 inputs[i] = fp32_from_bits(n + i);
669 }
670 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
671 for (uint32_t i = 0; i < kBlockSize; i++) {
672 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
673 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
674 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
675 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
676 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
677 }
678 }
679 }
680
TEST(ROUNDNE__NEON_ADDSUB,positive_integral)681 TEST(ROUNDNE__NEON_ADDSUB, positive_integral) {
682 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
683 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
684 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
685 for (uint32_t i = 0; i < kBlockSize; i++) {
686 inputs[i] = fp32_from_bits(n + i);
687 }
688 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
689 for (uint32_t i = 0; i < kBlockSize; i++) {
690 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
691 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
692 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
693 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
694 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
695 }
696 }
697 }
698
TEST(ROUNDNE__NEON_ADDSUB,negative_integral)699 TEST(ROUNDNE__NEON_ADDSUB, negative_integral) {
700 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
701 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
702 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
703 for (uint32_t i = 0; i < kBlockSize; i++) {
704 inputs[i] = fp32_from_bits(n + i);
705 }
706 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
707 for (uint32_t i = 0; i < kBlockSize; i++) {
708 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
709 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
710 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
711 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
712 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
713 }
714 }
715 }
716
TEST(ROUNDNE__NEON_ADDSUB,positive_infinity)717 TEST(ROUNDNE__NEON_ADDSUB, positive_infinity) {
718 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
719 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
720 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
721 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
722 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
723 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
724 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
725 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
726 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
727 }
728
TEST(ROUNDNE__NEON_ADDSUB,negative_infinity)729 TEST(ROUNDNE__NEON_ADDSUB, negative_infinity) {
730 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
731 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
732 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
733 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
734 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
735 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
736 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
737 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
738 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
739 }
740
TEST(ROUNDNE__NEON_ADDSUB,positive_qnan)741 TEST(ROUNDNE__NEON_ADDSUB, positive_qnan) {
742 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
743 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
744 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
745 for (uint32_t i = 0; i < kBlockSize; i++) {
746 inputs[i] = fp32_from_bits(n + i);
747 }
748 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
749 for (uint32_t i = 0; i < kBlockSize; i++) {
750 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
751 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
752 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
753 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
754 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
755 }
756 }
757 }
758
TEST(ROUNDNE__NEON_ADDSUB,negative_qnan)759 TEST(ROUNDNE__NEON_ADDSUB, negative_qnan) {
760 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
761 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
762 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
763 for (uint32_t i = 0; i < kBlockSize; i++) {
764 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
765 }
766 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
767 for (uint32_t i = 0; i < kBlockSize; i++) {
768 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
769 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
770 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
771 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
772 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
773 }
774 }
775 }
776
TEST(ROUNDNE__NEON_ADDSUB,positive_snan)777 TEST(ROUNDNE__NEON_ADDSUB, positive_snan) {
778 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
779 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
780 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
781 for (uint32_t i = 0; i < kBlockSize; i++) {
782 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
783 }
784 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
785 for (uint32_t i = 0; i < kBlockSize; i++) {
786 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
787 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
788 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
789 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
790 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
791 }
792 }
793 }
794
TEST(ROUNDNE__NEON_ADDSUB,negative_snan)795 TEST(ROUNDNE__NEON_ADDSUB, negative_snan) {
796 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
797 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
798 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
799 for (uint32_t i = 0; i < kBlockSize; i++) {
800 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
801 }
802 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
803 for (uint32_t i = 0; i < kBlockSize; i++) {
804 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
805 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
806 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
807 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
808 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
809 }
810 }
811 }
812
TEST(ROUNDNE__NEON_ADDSUB,positive_snan_to_qnan)813 TEST(ROUNDNE__NEON_ADDSUB, positive_snan_to_qnan) {
814 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
815 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
816 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
817 for (uint32_t i = 0; i < kBlockSize; i++) {
818 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
819 }
820 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
821 for (uint32_t i = 0; i < kBlockSize; i++) {
822 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
823 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
824 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
825 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
826 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
827 }
828 }
829 }
830
TEST(ROUNDNE__NEON_ADDSUB,negative_snan_to_qnan)831 TEST(ROUNDNE__NEON_ADDSUB, negative_snan_to_qnan) {
832 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
833 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
834 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
835 for (uint32_t i = 0; i < kBlockSize; i++) {
836 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
837 }
838 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
839 for (uint32_t i = 0; i < kBlockSize; i++) {
840 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
841 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
842 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
843 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
844 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
845 }
846 }
847 }
848 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
849
850 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDNE__NEONV8,positive_normal)851 TEST(ROUNDNE__NEONV8, positive_normal) {
852 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
853 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
854 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
855 for (uint32_t i = 0; i < kBlockSize; i++) {
856 inputs[i] = fp32_from_bits(n + i);
857 }
858 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
859 for (uint32_t i = 0; i < kBlockSize; i++) {
860 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
861 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
862 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
863 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
864 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
865 }
866 }
867 }
868
TEST(ROUNDNE__NEONV8,negative_normal)869 TEST(ROUNDNE__NEONV8, negative_normal) {
870 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
871 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
872 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
873 for (uint32_t i = 0; i < kBlockSize; i++) {
874 inputs[i] = fp32_from_bits(n + i);
875 }
876 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
877 for (uint32_t i = 0; i < kBlockSize; i++) {
878 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
879 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
880 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
881 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
882 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
883 }
884 }
885 }
886
TEST(ROUNDNE__NEONV8,positive_integral)887 TEST(ROUNDNE__NEONV8, positive_integral) {
888 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
889 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
890 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
891 for (uint32_t i = 0; i < kBlockSize; i++) {
892 inputs[i] = fp32_from_bits(n + i);
893 }
894 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
895 for (uint32_t i = 0; i < kBlockSize; i++) {
896 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
897 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
898 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
899 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
900 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
901 }
902 }
903 }
904
TEST(ROUNDNE__NEONV8,negative_integral)905 TEST(ROUNDNE__NEONV8, negative_integral) {
906 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
907 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
908 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
909 for (uint32_t i = 0; i < kBlockSize; i++) {
910 inputs[i] = fp32_from_bits(n + i);
911 }
912 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
913 for (uint32_t i = 0; i < kBlockSize; i++) {
914 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
915 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
916 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
917 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
918 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
919 }
920 }
921 }
922
TEST(ROUNDNE__NEONV8,positive_infinity)923 TEST(ROUNDNE__NEONV8, positive_infinity) {
924 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
925 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
926 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
927 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
928 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
929 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
930 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
931 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
932 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
933 }
934
TEST(ROUNDNE__NEONV8,negative_infinity)935 TEST(ROUNDNE__NEONV8, negative_infinity) {
936 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
937 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
938 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
939 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
940 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
941 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
942 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
943 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
944 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
945 }
946
TEST(ROUNDNE__NEONV8,positive_qnan)947 TEST(ROUNDNE__NEONV8, positive_qnan) {
948 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
949 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
950 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
951 for (uint32_t i = 0; i < kBlockSize; i++) {
952 inputs[i] = fp32_from_bits(n + i);
953 }
954 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
955 for (uint32_t i = 0; i < kBlockSize; i++) {
956 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
957 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
958 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
959 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
960 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
961 }
962 }
963 }
964
TEST(ROUNDNE__NEONV8,negative_qnan)965 TEST(ROUNDNE__NEONV8, negative_qnan) {
966 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
967 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
968 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
969 for (uint32_t i = 0; i < kBlockSize; i++) {
970 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
971 }
972 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
973 for (uint32_t i = 0; i < kBlockSize; i++) {
974 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
975 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
976 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
977 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
978 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
979 }
980 }
981 }
982
TEST(ROUNDNE__NEONV8,positive_snan)983 TEST(ROUNDNE__NEONV8, positive_snan) {
984 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
985 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
986 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
987 for (uint32_t i = 0; i < kBlockSize; i++) {
988 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
989 }
990 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
991 for (uint32_t i = 0; i < kBlockSize; i++) {
992 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
993 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
994 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
995 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
996 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
997 }
998 }
999 }
1000
TEST(ROUNDNE__NEONV8,negative_snan)1001 TEST(ROUNDNE__NEONV8, negative_snan) {
1002 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1003 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1004 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1005 for (uint32_t i = 0; i < kBlockSize; i++) {
1006 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1007 }
1008 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1009 for (uint32_t i = 0; i < kBlockSize; i++) {
1010 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1011 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1012 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1013 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1014 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1015 }
1016 }
1017 }
1018
TEST(ROUNDNE__NEONV8,positive_snan_to_qnan)1019 TEST(ROUNDNE__NEONV8, positive_snan_to_qnan) {
1020 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1021 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1022 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1023 for (uint32_t i = 0; i < kBlockSize; i++) {
1024 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1025 }
1026 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1027 for (uint32_t i = 0; i < kBlockSize; i++) {
1028 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1029 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1030 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1031 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1032 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1033 }
1034 }
1035 }
1036
TEST(ROUNDNE__NEONV8,negative_snan_to_qnan)1037 TEST(ROUNDNE__NEONV8, negative_snan_to_qnan) {
1038 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1039 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1040 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1041 for (uint32_t i = 0; i < kBlockSize; i++) {
1042 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1043 }
1044 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1045 for (uint32_t i = 0; i < kBlockSize; i++) {
1046 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1047 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1048 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1049 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1050 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1051 }
1052 }
1053 }
1054 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1055
1056 #if XNN_ARCH_WASMSIMD
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_normal)1057 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_normal) {
1058 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1059 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1060 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1061 for (uint32_t i = 0; i < kBlockSize; i++) {
1062 inputs[i] = fp32_from_bits(n + i);
1063 }
1064 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1065 for (uint32_t i = 0; i < kBlockSize; i++) {
1066 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1067 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1068 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1069 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1070 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1071 }
1072 }
1073 }
1074
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_normal)1075 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_normal) {
1076 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1077 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1078 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1079 for (uint32_t i = 0; i < kBlockSize; i++) {
1080 inputs[i] = fp32_from_bits(n + i);
1081 }
1082 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1083 for (uint32_t i = 0; i < kBlockSize; i++) {
1084 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1085 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1086 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1087 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1088 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1089 }
1090 }
1091 }
1092
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_integral)1093 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_integral) {
1094 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1095 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1096 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1097 for (uint32_t i = 0; i < kBlockSize; i++) {
1098 inputs[i] = fp32_from_bits(n + i);
1099 }
1100 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1101 for (uint32_t i = 0; i < kBlockSize; i++) {
1102 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1103 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1104 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1105 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1106 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1107 }
1108 }
1109 }
1110
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_integral)1111 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_integral) {
1112 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1113 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1114 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1115 for (uint32_t i = 0; i < kBlockSize; i++) {
1116 inputs[i] = fp32_from_bits(n + i);
1117 }
1118 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1119 for (uint32_t i = 0; i < kBlockSize; i++) {
1120 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1121 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1122 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1123 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1124 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1125 }
1126 }
1127 }
1128
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_infinity)1129 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_infinity) {
1130 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1131 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1132 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1133 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1134 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
1135 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1136 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1137 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1138 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1139 }
1140
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_infinity)1141 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_infinity) {
1142 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1143 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1144 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1145 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1146 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
1147 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1148 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1149 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1150 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1151 }
1152
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_qnan)1153 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_qnan) {
1154 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1155 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1156 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1157 for (uint32_t i = 0; i < kBlockSize; i++) {
1158 inputs[i] = fp32_from_bits(n + i);
1159 }
1160 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1161 for (uint32_t i = 0; i < kBlockSize; i++) {
1162 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1163 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1164 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1165 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1166 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1167 }
1168 }
1169 }
1170
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_qnan)1171 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_qnan) {
1172 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1173 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1174 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1175 for (uint32_t i = 0; i < kBlockSize; i++) {
1176 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1177 }
1178 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1179 for (uint32_t i = 0; i < kBlockSize; i++) {
1180 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1181 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1182 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1183 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1184 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1185 }
1186 }
1187 }
1188
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_snan)1189 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_snan) {
1190 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1191 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1192 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1193 for (uint32_t i = 0; i < kBlockSize; i++) {
1194 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1195 }
1196 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1197 for (uint32_t i = 0; i < kBlockSize; i++) {
1198 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1199 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1200 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1201 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1202 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1203 }
1204 }
1205 }
1206
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_snan)1207 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_snan) {
1208 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1209 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1210 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1211 for (uint32_t i = 0; i < kBlockSize; i++) {
1212 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1213 }
1214 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1215 for (uint32_t i = 0; i < kBlockSize; i++) {
1216 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1217 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1218 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1219 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1220 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1221 }
1222 }
1223 }
1224
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_snan_to_qnan)1225 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_snan_to_qnan) {
1226 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1227 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1228 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1229 for (uint32_t i = 0; i < kBlockSize; i++) {
1230 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1231 }
1232 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1233 for (uint32_t i = 0; i < kBlockSize; i++) {
1234 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1235 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1236 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1237 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1238 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1239 }
1240 }
1241 }
1242
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_snan_to_qnan)1243 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_snan_to_qnan) {
1244 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1245 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1246 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1247 for (uint32_t i = 0; i < kBlockSize; i++) {
1248 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1249 }
1250 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1251 for (uint32_t i = 0; i < kBlockSize; i++) {
1252 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1253 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1254 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1255 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1256 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1257 }
1258 }
1259 }
1260 #endif // XNN_ARCH_WASMSIMD
1261
TEST(ROUNDNE__SCALAR_ADDSUB,positive_normal)1262 TEST(ROUNDNE__SCALAR_ADDSUB, positive_normal) {
1263 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1264 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1265 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1266 for (uint32_t i = 0; i < kBlockSize; i++) {
1267 inputs[i] = fp32_from_bits(n + i);
1268 }
1269 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1270 for (uint32_t i = 0; i < kBlockSize; i++) {
1271 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1272 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1273 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1274 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1275 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1276 }
1277 }
1278 }
1279
TEST(ROUNDNE__SCALAR_ADDSUB,negative_normal)1280 TEST(ROUNDNE__SCALAR_ADDSUB, negative_normal) {
1281 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1282 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1283 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1284 for (uint32_t i = 0; i < kBlockSize; i++) {
1285 inputs[i] = fp32_from_bits(n + i);
1286 }
1287 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1288 for (uint32_t i = 0; i < kBlockSize; i++) {
1289 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1290 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1291 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1292 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1293 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1294 }
1295 }
1296 }
1297
TEST(ROUNDNE__SCALAR_ADDSUB,positive_integral)1298 TEST(ROUNDNE__SCALAR_ADDSUB, positive_integral) {
1299 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1300 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1301 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1302 for (uint32_t i = 0; i < kBlockSize; i++) {
1303 inputs[i] = fp32_from_bits(n + i);
1304 }
1305 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1306 for (uint32_t i = 0; i < kBlockSize; i++) {
1307 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1308 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1309 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1310 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1311 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1312 }
1313 }
1314 }
1315
TEST(ROUNDNE__SCALAR_ADDSUB,negative_integral)1316 TEST(ROUNDNE__SCALAR_ADDSUB, negative_integral) {
1317 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1318 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1319 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1320 for (uint32_t i = 0; i < kBlockSize; i++) {
1321 inputs[i] = fp32_from_bits(n + i);
1322 }
1323 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1324 for (uint32_t i = 0; i < kBlockSize; i++) {
1325 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1326 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1327 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1328 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1329 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1330 }
1331 }
1332 }
1333
TEST(ROUNDNE__SCALAR_ADDSUB,positive_infinity)1334 TEST(ROUNDNE__SCALAR_ADDSUB, positive_infinity) {
1335 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1336 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1337 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1338 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1339 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
1340 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1341 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1342 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1343 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1344 }
1345
TEST(ROUNDNE__SCALAR_ADDSUB,negative_infinity)1346 TEST(ROUNDNE__SCALAR_ADDSUB, negative_infinity) {
1347 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1348 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1349 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1350 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1351 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
1352 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1353 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1354 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1355 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1356 }
1357
TEST(ROUNDNE__SCALAR_ADDSUB,positive_qnan)1358 TEST(ROUNDNE__SCALAR_ADDSUB, positive_qnan) {
1359 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1360 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1361 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1362 for (uint32_t i = 0; i < kBlockSize; i++) {
1363 inputs[i] = fp32_from_bits(n + i);
1364 }
1365 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1366 for (uint32_t i = 0; i < kBlockSize; i++) {
1367 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1368 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1369 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1370 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1371 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1372 }
1373 }
1374 }
1375
TEST(ROUNDNE__SCALAR_ADDSUB,negative_qnan)1376 TEST(ROUNDNE__SCALAR_ADDSUB, negative_qnan) {
1377 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1378 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1379 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1380 for (uint32_t i = 0; i < kBlockSize; i++) {
1381 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1382 }
1383 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1384 for (uint32_t i = 0; i < kBlockSize; i++) {
1385 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1386 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1387 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1388 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1389 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1390 }
1391 }
1392 }
1393
TEST(ROUNDNE__SCALAR_ADDSUB,positive_snan)1394 TEST(ROUNDNE__SCALAR_ADDSUB, positive_snan) {
1395 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1396 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1397 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1398 for (uint32_t i = 0; i < kBlockSize; i++) {
1399 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1400 }
1401 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1402 for (uint32_t i = 0; i < kBlockSize; i++) {
1403 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1404 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1405 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1406 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1407 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1408 }
1409 }
1410 }
1411
TEST(ROUNDNE__SCALAR_ADDSUB,negative_snan)1412 TEST(ROUNDNE__SCALAR_ADDSUB, negative_snan) {
1413 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1414 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1415 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1416 for (uint32_t i = 0; i < kBlockSize; i++) {
1417 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1418 }
1419 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1420 for (uint32_t i = 0; i < kBlockSize; i++) {
1421 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1422 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1423 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1424 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1425 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1426 }
1427 }
1428 }
1429
TEST(ROUNDNE__SCALAR_ADDSUB,positive_snan_to_qnan)1430 TEST(ROUNDNE__SCALAR_ADDSUB, positive_snan_to_qnan) {
1431 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1432 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1433 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1434 for (uint32_t i = 0; i < kBlockSize; i++) {
1435 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1436 }
1437 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1438 for (uint32_t i = 0; i < kBlockSize; i++) {
1439 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1440 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1441 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1442 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1443 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1444 }
1445 }
1446 }
1447
TEST(ROUNDNE__SCALAR_ADDSUB,negative_snan_to_qnan)1448 TEST(ROUNDNE__SCALAR_ADDSUB, negative_snan_to_qnan) {
1449 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1450 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1451 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1452 for (uint32_t i = 0; i < kBlockSize; i++) {
1453 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1454 }
1455 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1456 for (uint32_t i = 0; i < kBlockSize; i++) {
1457 const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
1458 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1459 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1460 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1461 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1462 }
1463 }
1464 }
1465