1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <ios>
13 #include <vector>
14
15 #include <gtest/gtest.h>
16
17 #include <fp16.h>
18
19 #include <xnnpack/AlignedAllocator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/math-stubs.h>
22
23
24 constexpr int kBlockSize = 1024;
25
26 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDZ__SSE_ADDSUB,positive_normal)27 TEST(ROUNDZ__SSE_ADDSUB, positive_normal) {
28 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
29 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
30 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
31 for (uint32_t i = 0; i < kBlockSize; i++) {
32 inputs[i] = fp32_from_bits(n + i);
33 }
34 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
35 for (uint32_t i = 0; i < kBlockSize; i++) {
36 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
37 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
38 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
39 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
40 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
41 }
42 }
43 }
44
TEST(ROUNDZ__SSE_ADDSUB,negative_normal)45 TEST(ROUNDZ__SSE_ADDSUB, negative_normal) {
46 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
47 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
48 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
49 for (uint32_t i = 0; i < kBlockSize; i++) {
50 inputs[i] = fp32_from_bits(n + i);
51 }
52 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
53 for (uint32_t i = 0; i < kBlockSize; i++) {
54 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
55 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
56 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
57 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
58 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
59 }
60 }
61 }
62
TEST(ROUNDZ__SSE_ADDSUB,positive_integral)63 TEST(ROUNDZ__SSE_ADDSUB, positive_integral) {
64 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
65 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
66 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
67 for (uint32_t i = 0; i < kBlockSize; i++) {
68 inputs[i] = fp32_from_bits(n + i);
69 }
70 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
71 for (uint32_t i = 0; i < kBlockSize; i++) {
72 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
73 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
74 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
75 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
76 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
77 }
78 }
79 }
80
TEST(ROUNDZ__SSE_ADDSUB,negative_integral)81 TEST(ROUNDZ__SSE_ADDSUB, negative_integral) {
82 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
83 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
84 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
85 for (uint32_t i = 0; i < kBlockSize; i++) {
86 inputs[i] = fp32_from_bits(n + i);
87 }
88 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
89 for (uint32_t i = 0; i < kBlockSize; i++) {
90 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
91 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
92 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
93 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
94 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
95 }
96 }
97 }
98
TEST(ROUNDZ__SSE_ADDSUB,positive_infinity)99 TEST(ROUNDZ__SSE_ADDSUB, positive_infinity) {
100 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
101 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
102 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
103 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
104 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
105 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
106 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
107 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
108 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
109 }
110
TEST(ROUNDZ__SSE_ADDSUB,negative_infinity)111 TEST(ROUNDZ__SSE_ADDSUB, negative_infinity) {
112 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
113 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
114 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
115 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
116 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
117 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
118 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
119 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
120 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
121 }
122
TEST(ROUNDZ__SSE_ADDSUB,positive_qnan)123 TEST(ROUNDZ__SSE_ADDSUB, positive_qnan) {
124 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
125 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
126 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
127 for (uint32_t i = 0; i < kBlockSize; i++) {
128 inputs[i] = fp32_from_bits(n + i);
129 }
130 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
131 for (uint32_t i = 0; i < kBlockSize; i++) {
132 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
133 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
134 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
135 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
136 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
137 }
138 }
139 }
140
TEST(ROUNDZ__SSE_ADDSUB,negative_qnan)141 TEST(ROUNDZ__SSE_ADDSUB, negative_qnan) {
142 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
143 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
144 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
145 for (uint32_t i = 0; i < kBlockSize; i++) {
146 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
147 }
148 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
149 for (uint32_t i = 0; i < kBlockSize; i++) {
150 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
151 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
152 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
153 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
154 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
155 }
156 }
157 }
158
TEST(ROUNDZ__SSE_ADDSUB,positive_snan)159 TEST(ROUNDZ__SSE_ADDSUB, positive_snan) {
160 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
161 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
162 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
163 for (uint32_t i = 0; i < kBlockSize; i++) {
164 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
165 }
166 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
167 for (uint32_t i = 0; i < kBlockSize; i++) {
168 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
169 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
170 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
171 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
172 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
173 }
174 }
175 }
176
TEST(ROUNDZ__SSE_ADDSUB,negative_snan)177 TEST(ROUNDZ__SSE_ADDSUB, negative_snan) {
178 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
179 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
180 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
181 for (uint32_t i = 0; i < kBlockSize; i++) {
182 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
183 }
184 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
185 for (uint32_t i = 0; i < kBlockSize; i++) {
186 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
187 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
188 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
189 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
190 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
191 }
192 }
193 }
194
TEST(ROUNDZ__SSE_ADDSUB,positive_snan_to_qnan)195 TEST(ROUNDZ__SSE_ADDSUB, positive_snan_to_qnan) {
196 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
197 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
198 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
199 for (uint32_t i = 0; i < kBlockSize; i++) {
200 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
201 }
202 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
203 for (uint32_t i = 0; i < kBlockSize; i++) {
204 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
205 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
206 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
207 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
208 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
209 }
210 }
211 }
212
TEST(ROUNDZ__SSE_ADDSUB,negative_snan_to_qnan)213 TEST(ROUNDZ__SSE_ADDSUB, negative_snan_to_qnan) {
214 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
215 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
216 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
217 for (uint32_t i = 0; i < kBlockSize; i++) {
218 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
219 }
220 xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
221 for (uint32_t i = 0; i < kBlockSize; i++) {
222 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
223 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
224 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
225 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
226 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
227 }
228 }
229 }
230 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
231
232 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDZ__SSE2_CVT,positive_normal)233 TEST(ROUNDZ__SSE2_CVT, positive_normal) {
234 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
235 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
236 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
237 for (uint32_t i = 0; i < kBlockSize; i++) {
238 inputs[i] = fp32_from_bits(n + i);
239 }
240 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
241 for (uint32_t i = 0; i < kBlockSize; i++) {
242 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
243 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
244 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
245 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
246 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
247 }
248 }
249 }
250
TEST(ROUNDZ__SSE2_CVT,negative_normal)251 TEST(ROUNDZ__SSE2_CVT, negative_normal) {
252 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
253 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
254 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
255 for (uint32_t i = 0; i < kBlockSize; i++) {
256 inputs[i] = fp32_from_bits(n + i);
257 }
258 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
259 for (uint32_t i = 0; i < kBlockSize; i++) {
260 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
261 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
262 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
263 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
264 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
265 }
266 }
267 }
268
TEST(ROUNDZ__SSE2_CVT,positive_integral)269 TEST(ROUNDZ__SSE2_CVT, positive_integral) {
270 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
271 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
272 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
273 for (uint32_t i = 0; i < kBlockSize; i++) {
274 inputs[i] = fp32_from_bits(n + i);
275 }
276 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
277 for (uint32_t i = 0; i < kBlockSize; i++) {
278 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
279 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
280 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
281 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
282 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
283 }
284 }
285 }
286
TEST(ROUNDZ__SSE2_CVT,negative_integral)287 TEST(ROUNDZ__SSE2_CVT, negative_integral) {
288 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
289 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
290 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
291 for (uint32_t i = 0; i < kBlockSize; i++) {
292 inputs[i] = fp32_from_bits(n + i);
293 }
294 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
295 for (uint32_t i = 0; i < kBlockSize; i++) {
296 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
297 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
298 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
299 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
300 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
301 }
302 }
303 }
304
TEST(ROUNDZ__SSE2_CVT,positive_infinity)305 TEST(ROUNDZ__SSE2_CVT, positive_infinity) {
306 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
307 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
308 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
309 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
310 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
311 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
312 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
313 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
314 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
315 }
316
TEST(ROUNDZ__SSE2_CVT,negative_infinity)317 TEST(ROUNDZ__SSE2_CVT, negative_infinity) {
318 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
319 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
320 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
321 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
322 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
323 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
324 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
325 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
326 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
327 }
328
TEST(ROUNDZ__SSE2_CVT,positive_qnan)329 TEST(ROUNDZ__SSE2_CVT, positive_qnan) {
330 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
331 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
332 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
333 for (uint32_t i = 0; i < kBlockSize; i++) {
334 inputs[i] = fp32_from_bits(n + i);
335 }
336 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
337 for (uint32_t i = 0; i < kBlockSize; i++) {
338 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
339 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
340 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
341 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
342 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
343 }
344 }
345 }
346
TEST(ROUNDZ__SSE2_CVT,negative_qnan)347 TEST(ROUNDZ__SSE2_CVT, negative_qnan) {
348 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
349 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
350 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
351 for (uint32_t i = 0; i < kBlockSize; i++) {
352 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
353 }
354 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
355 for (uint32_t i = 0; i < kBlockSize; i++) {
356 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
357 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
358 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
359 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
360 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
361 }
362 }
363 }
364
TEST(ROUNDZ__SSE2_CVT,positive_snan)365 TEST(ROUNDZ__SSE2_CVT, positive_snan) {
366 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
367 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
368 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
369 for (uint32_t i = 0; i < kBlockSize; i++) {
370 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
371 }
372 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
373 for (uint32_t i = 0; i < kBlockSize; i++) {
374 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
375 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
376 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
377 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
378 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
379 }
380 }
381 }
382
TEST(ROUNDZ__SSE2_CVT,negative_snan)383 TEST(ROUNDZ__SSE2_CVT, negative_snan) {
384 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
385 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
386 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
387 for (uint32_t i = 0; i < kBlockSize; i++) {
388 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
389 }
390 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
391 for (uint32_t i = 0; i < kBlockSize; i++) {
392 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
393 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
394 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
395 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
396 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
397 }
398 }
399 }
400
TEST(ROUNDZ__SSE2_CVT,DISABLED_positive_snan_to_qnan)401 TEST(ROUNDZ__SSE2_CVT, DISABLED_positive_snan_to_qnan) {
402 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
403 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
404 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
405 for (uint32_t i = 0; i < kBlockSize; i++) {
406 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
407 }
408 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
409 for (uint32_t i = 0; i < kBlockSize; i++) {
410 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
411 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
412 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
413 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
414 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
415 }
416 }
417 }
418
TEST(ROUNDZ__SSE2_CVT,DISABLED_negative_snan_to_qnan)419 TEST(ROUNDZ__SSE2_CVT, DISABLED_negative_snan_to_qnan) {
420 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
421 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
422 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
423 for (uint32_t i = 0; i < kBlockSize; i++) {
424 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
425 }
426 xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
427 for (uint32_t i = 0; i < kBlockSize; i++) {
428 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
429 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
430 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
431 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
432 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
433 }
434 }
435 }
436 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
437
438 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDZ__SSE41,positive_normal)439 TEST(ROUNDZ__SSE41, positive_normal) {
440 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
441 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
442 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
443 for (uint32_t i = 0; i < kBlockSize; i++) {
444 inputs[i] = fp32_from_bits(n + i);
445 }
446 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
447 for (uint32_t i = 0; i < kBlockSize; i++) {
448 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
449 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
450 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
451 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
452 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
453 }
454 }
455 }
456
TEST(ROUNDZ__SSE41,negative_normal)457 TEST(ROUNDZ__SSE41, negative_normal) {
458 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
459 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
460 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
461 for (uint32_t i = 0; i < kBlockSize; i++) {
462 inputs[i] = fp32_from_bits(n + i);
463 }
464 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
465 for (uint32_t i = 0; i < kBlockSize; i++) {
466 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
467 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
468 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
469 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
470 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
471 }
472 }
473 }
474
TEST(ROUNDZ__SSE41,positive_integral)475 TEST(ROUNDZ__SSE41, positive_integral) {
476 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
477 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
478 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
479 for (uint32_t i = 0; i < kBlockSize; i++) {
480 inputs[i] = fp32_from_bits(n + i);
481 }
482 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
483 for (uint32_t i = 0; i < kBlockSize; i++) {
484 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
485 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
486 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
487 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
488 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
489 }
490 }
491 }
492
TEST(ROUNDZ__SSE41,negative_integral)493 TEST(ROUNDZ__SSE41, negative_integral) {
494 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
495 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
496 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
497 for (uint32_t i = 0; i < kBlockSize; i++) {
498 inputs[i] = fp32_from_bits(n + i);
499 }
500 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
501 for (uint32_t i = 0; i < kBlockSize; i++) {
502 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
503 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
504 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
505 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
506 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
507 }
508 }
509 }
510
TEST(ROUNDZ__SSE41,positive_infinity)511 TEST(ROUNDZ__SSE41, positive_infinity) {
512 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
513 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
514 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
515 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
516 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
517 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
518 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
519 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
520 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
521 }
522
TEST(ROUNDZ__SSE41,negative_infinity)523 TEST(ROUNDZ__SSE41, negative_infinity) {
524 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
525 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
526 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
527 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
528 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
529 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
530 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
531 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
532 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
533 }
534
TEST(ROUNDZ__SSE41,positive_qnan)535 TEST(ROUNDZ__SSE41, positive_qnan) {
536 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
537 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
538 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
539 for (uint32_t i = 0; i < kBlockSize; i++) {
540 inputs[i] = fp32_from_bits(n + i);
541 }
542 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
543 for (uint32_t i = 0; i < kBlockSize; i++) {
544 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
545 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
546 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
547 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
548 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
549 }
550 }
551 }
552
TEST(ROUNDZ__SSE41,negative_qnan)553 TEST(ROUNDZ__SSE41, negative_qnan) {
554 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
555 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
556 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
557 for (uint32_t i = 0; i < kBlockSize; i++) {
558 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
559 }
560 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
561 for (uint32_t i = 0; i < kBlockSize; i++) {
562 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
563 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
564 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
565 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
566 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
567 }
568 }
569 }
570
TEST(ROUNDZ__SSE41,positive_snan)571 TEST(ROUNDZ__SSE41, positive_snan) {
572 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
573 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
574 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
575 for (uint32_t i = 0; i < kBlockSize; i++) {
576 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
577 }
578 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
579 for (uint32_t i = 0; i < kBlockSize; i++) {
580 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
581 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
582 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
583 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
584 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
585 }
586 }
587 }
588
TEST(ROUNDZ__SSE41,negative_snan)589 TEST(ROUNDZ__SSE41, negative_snan) {
590 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
591 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
592 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
593 for (uint32_t i = 0; i < kBlockSize; i++) {
594 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
595 }
596 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
597 for (uint32_t i = 0; i < kBlockSize; i++) {
598 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
599 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
600 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
601 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
602 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
603 }
604 }
605 }
606
TEST(ROUNDZ__SSE41,positive_snan_to_qnan)607 TEST(ROUNDZ__SSE41, positive_snan_to_qnan) {
608 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
609 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
610 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
611 for (uint32_t i = 0; i < kBlockSize; i++) {
612 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
613 }
614 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
615 for (uint32_t i = 0; i < kBlockSize; i++) {
616 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
617 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
618 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
619 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
620 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
621 }
622 }
623 }
624
TEST(ROUNDZ__SSE41,negative_snan_to_qnan)625 TEST(ROUNDZ__SSE41, negative_snan_to_qnan) {
626 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
627 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
628 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
629 for (uint32_t i = 0; i < kBlockSize; i++) {
630 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
631 }
632 xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
633 for (uint32_t i = 0; i < kBlockSize; i++) {
634 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
635 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
636 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
637 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
638 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
639 }
640 }
641 }
642 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
643
644 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDZ__NEON_ADDSUB,positive_normal)645 TEST(ROUNDZ__NEON_ADDSUB, positive_normal) {
646 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
647 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
648 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
649 for (uint32_t i = 0; i < kBlockSize; i++) {
650 inputs[i] = fp32_from_bits(n + i);
651 }
652 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
653 for (uint32_t i = 0; i < kBlockSize; i++) {
654 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
655 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
656 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
657 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
658 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
659 }
660 }
661 }
662
TEST(ROUNDZ__NEON_ADDSUB,negative_normal)663 TEST(ROUNDZ__NEON_ADDSUB, negative_normal) {
664 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
665 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
666 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
667 for (uint32_t i = 0; i < kBlockSize; i++) {
668 inputs[i] = fp32_from_bits(n + i);
669 }
670 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
671 for (uint32_t i = 0; i < kBlockSize; i++) {
672 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
673 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
674 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
675 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
676 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
677 }
678 }
679 }
680
TEST(ROUNDZ__NEON_ADDSUB,positive_integral)681 TEST(ROUNDZ__NEON_ADDSUB, positive_integral) {
682 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
683 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
684 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
685 for (uint32_t i = 0; i < kBlockSize; i++) {
686 inputs[i] = fp32_from_bits(n + i);
687 }
688 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
689 for (uint32_t i = 0; i < kBlockSize; i++) {
690 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
691 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
692 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
693 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
694 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
695 }
696 }
697 }
698
TEST(ROUNDZ__NEON_ADDSUB,negative_integral)699 TEST(ROUNDZ__NEON_ADDSUB, negative_integral) {
700 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
701 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
702 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
703 for (uint32_t i = 0; i < kBlockSize; i++) {
704 inputs[i] = fp32_from_bits(n + i);
705 }
706 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
707 for (uint32_t i = 0; i < kBlockSize; i++) {
708 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
709 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
710 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
711 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
712 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
713 }
714 }
715 }
716
TEST(ROUNDZ__NEON_ADDSUB,positive_infinity)717 TEST(ROUNDZ__NEON_ADDSUB, positive_infinity) {
718 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
719 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
720 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
721 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
722 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
723 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
724 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
725 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
726 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
727 }
728
TEST(ROUNDZ__NEON_ADDSUB,negative_infinity)729 TEST(ROUNDZ__NEON_ADDSUB, negative_infinity) {
730 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
731 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
732 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
733 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
734 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
735 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
736 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
737 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
738 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
739 }
740
TEST(ROUNDZ__NEON_ADDSUB,positive_qnan)741 TEST(ROUNDZ__NEON_ADDSUB, positive_qnan) {
742 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
743 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
744 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
745 for (uint32_t i = 0; i < kBlockSize; i++) {
746 inputs[i] = fp32_from_bits(n + i);
747 }
748 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
749 for (uint32_t i = 0; i < kBlockSize; i++) {
750 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
751 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
752 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
753 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
754 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
755 }
756 }
757 }
758
TEST(ROUNDZ__NEON_ADDSUB,negative_qnan)759 TEST(ROUNDZ__NEON_ADDSUB, negative_qnan) {
760 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
761 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
762 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
763 for (uint32_t i = 0; i < kBlockSize; i++) {
764 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
765 }
766 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
767 for (uint32_t i = 0; i < kBlockSize; i++) {
768 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
769 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
770 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
771 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
772 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
773 }
774 }
775 }
776
TEST(ROUNDZ__NEON_ADDSUB,positive_snan)777 TEST(ROUNDZ__NEON_ADDSUB, positive_snan) {
778 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
779 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
780 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
781 for (uint32_t i = 0; i < kBlockSize; i++) {
782 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
783 }
784 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
785 for (uint32_t i = 0; i < kBlockSize; i++) {
786 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
787 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
788 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
789 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
790 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
791 }
792 }
793 }
794
TEST(ROUNDZ__NEON_ADDSUB,negative_snan)795 TEST(ROUNDZ__NEON_ADDSUB, negative_snan) {
796 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
797 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
798 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
799 for (uint32_t i = 0; i < kBlockSize; i++) {
800 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
801 }
802 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
803 for (uint32_t i = 0; i < kBlockSize; i++) {
804 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
805 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
806 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
807 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
808 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
809 }
810 }
811 }
812
TEST(ROUNDZ__NEON_ADDSUB,positive_snan_to_qnan)813 TEST(ROUNDZ__NEON_ADDSUB, positive_snan_to_qnan) {
814 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
815 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
816 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
817 for (uint32_t i = 0; i < kBlockSize; i++) {
818 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
819 }
820 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
821 for (uint32_t i = 0; i < kBlockSize; i++) {
822 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
823 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
824 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
825 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
826 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
827 }
828 }
829 }
830
TEST(ROUNDZ__NEON_ADDSUB,negative_snan_to_qnan)831 TEST(ROUNDZ__NEON_ADDSUB, negative_snan_to_qnan) {
832 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
833 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
834 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
835 for (uint32_t i = 0; i < kBlockSize; i++) {
836 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
837 }
838 xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
839 for (uint32_t i = 0; i < kBlockSize; i++) {
840 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
841 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
842 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
843 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
844 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
845 }
846 }
847 }
848 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
849
850 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDZ__NEON_CVT,positive_normal)851 TEST(ROUNDZ__NEON_CVT, positive_normal) {
852 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
853 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
854 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
855 for (uint32_t i = 0; i < kBlockSize; i++) {
856 inputs[i] = fp32_from_bits(n + i);
857 }
858 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
859 for (uint32_t i = 0; i < kBlockSize; i++) {
860 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
861 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
862 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
863 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
864 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
865 }
866 }
867 }
868
TEST(ROUNDZ__NEON_CVT,negative_normal)869 TEST(ROUNDZ__NEON_CVT, negative_normal) {
870 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
871 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
872 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
873 for (uint32_t i = 0; i < kBlockSize; i++) {
874 inputs[i] = fp32_from_bits(n + i);
875 }
876 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
877 for (uint32_t i = 0; i < kBlockSize; i++) {
878 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
879 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
880 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
881 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
882 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
883 }
884 }
885 }
886
TEST(ROUNDZ__NEON_CVT,positive_integral)887 TEST(ROUNDZ__NEON_CVT, positive_integral) {
888 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
889 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
890 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
891 for (uint32_t i = 0; i < kBlockSize; i++) {
892 inputs[i] = fp32_from_bits(n + i);
893 }
894 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
895 for (uint32_t i = 0; i < kBlockSize; i++) {
896 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
897 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
898 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
899 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
900 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
901 }
902 }
903 }
904
TEST(ROUNDZ__NEON_CVT,negative_integral)905 TEST(ROUNDZ__NEON_CVT, negative_integral) {
906 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
907 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
908 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
909 for (uint32_t i = 0; i < kBlockSize; i++) {
910 inputs[i] = fp32_from_bits(n + i);
911 }
912 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
913 for (uint32_t i = 0; i < kBlockSize; i++) {
914 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
915 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
916 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
917 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
918 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
919 }
920 }
921 }
922
TEST(ROUNDZ__NEON_CVT,positive_infinity)923 TEST(ROUNDZ__NEON_CVT, positive_infinity) {
924 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
925 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
926 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
927 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
928 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
929 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
930 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
931 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
932 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
933 }
934
TEST(ROUNDZ__NEON_CVT,negative_infinity)935 TEST(ROUNDZ__NEON_CVT, negative_infinity) {
936 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
937 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
938 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
939 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
940 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
941 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
942 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
943 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
944 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
945 }
946
TEST(ROUNDZ__NEON_CVT,positive_qnan)947 TEST(ROUNDZ__NEON_CVT, positive_qnan) {
948 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
949 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
950 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
951 for (uint32_t i = 0; i < kBlockSize; i++) {
952 inputs[i] = fp32_from_bits(n + i);
953 }
954 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
955 for (uint32_t i = 0; i < kBlockSize; i++) {
956 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
957 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
958 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
959 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
960 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
961 }
962 }
963 }
964
TEST(ROUNDZ__NEON_CVT,negative_qnan)965 TEST(ROUNDZ__NEON_CVT, negative_qnan) {
966 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
967 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
968 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
969 for (uint32_t i = 0; i < kBlockSize; i++) {
970 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
971 }
972 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
973 for (uint32_t i = 0; i < kBlockSize; i++) {
974 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
975 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
976 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
977 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
978 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
979 }
980 }
981 }
982
TEST(ROUNDZ__NEON_CVT,positive_snan)983 TEST(ROUNDZ__NEON_CVT, positive_snan) {
984 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
985 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
986 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
987 for (uint32_t i = 0; i < kBlockSize; i++) {
988 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
989 }
990 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
991 for (uint32_t i = 0; i < kBlockSize; i++) {
992 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
993 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
994 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
995 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
996 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
997 }
998 }
999 }
1000
TEST(ROUNDZ__NEON_CVT,negative_snan)1001 TEST(ROUNDZ__NEON_CVT, negative_snan) {
1002 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1003 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1004 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1005 for (uint32_t i = 0; i < kBlockSize; i++) {
1006 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1007 }
1008 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1009 for (uint32_t i = 0; i < kBlockSize; i++) {
1010 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1011 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1012 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1013 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1014 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1015 }
1016 }
1017 }
1018
TEST(ROUNDZ__NEON_CVT,DISABLED_positive_snan_to_qnan)1019 TEST(ROUNDZ__NEON_CVT, DISABLED_positive_snan_to_qnan) {
1020 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1021 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1022 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1023 for (uint32_t i = 0; i < kBlockSize; i++) {
1024 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1025 }
1026 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1027 for (uint32_t i = 0; i < kBlockSize; i++) {
1028 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1029 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1030 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1031 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1032 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1033 }
1034 }
1035 }
1036
TEST(ROUNDZ__NEON_CVT,DISABLED_negative_snan_to_qnan)1037 TEST(ROUNDZ__NEON_CVT, DISABLED_negative_snan_to_qnan) {
1038 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1039 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1040 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1041 for (uint32_t i = 0; i < kBlockSize; i++) {
1042 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1043 }
1044 xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1045 for (uint32_t i = 0; i < kBlockSize; i++) {
1046 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1047 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1048 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1049 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1050 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1051 }
1052 }
1053 }
1054 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1055
1056 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDZ__NEONV8,positive_normal)1057 TEST(ROUNDZ__NEONV8, positive_normal) {
1058 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1059 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1060 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1061 for (uint32_t i = 0; i < kBlockSize; i++) {
1062 inputs[i] = fp32_from_bits(n + i);
1063 }
1064 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1065 for (uint32_t i = 0; i < kBlockSize; i++) {
1066 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1067 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1068 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1069 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1070 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1071 }
1072 }
1073 }
1074
TEST(ROUNDZ__NEONV8,negative_normal)1075 TEST(ROUNDZ__NEONV8, negative_normal) {
1076 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1077 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1078 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1079 for (uint32_t i = 0; i < kBlockSize; i++) {
1080 inputs[i] = fp32_from_bits(n + i);
1081 }
1082 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1083 for (uint32_t i = 0; i < kBlockSize; i++) {
1084 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1085 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1086 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1087 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1088 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1089 }
1090 }
1091 }
1092
TEST(ROUNDZ__NEONV8,positive_integral)1093 TEST(ROUNDZ__NEONV8, positive_integral) {
1094 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1095 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1096 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1097 for (uint32_t i = 0; i < kBlockSize; i++) {
1098 inputs[i] = fp32_from_bits(n + i);
1099 }
1100 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1101 for (uint32_t i = 0; i < kBlockSize; i++) {
1102 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1103 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1104 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1105 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1106 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1107 }
1108 }
1109 }
1110
TEST(ROUNDZ__NEONV8,negative_integral)1111 TEST(ROUNDZ__NEONV8, negative_integral) {
1112 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1113 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1114 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1115 for (uint32_t i = 0; i < kBlockSize; i++) {
1116 inputs[i] = fp32_from_bits(n + i);
1117 }
1118 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1119 for (uint32_t i = 0; i < kBlockSize; i++) {
1120 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1121 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1122 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1123 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1124 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1125 }
1126 }
1127 }
1128
TEST(ROUNDZ__NEONV8,positive_infinity)1129 TEST(ROUNDZ__NEONV8, positive_infinity) {
1130 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1131 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1132 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1133 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1134 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1135 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1136 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1137 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1138 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1139 }
1140
TEST(ROUNDZ__NEONV8,negative_infinity)1141 TEST(ROUNDZ__NEONV8, negative_infinity) {
1142 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1143 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1144 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1145 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1146 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1147 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1148 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1149 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1150 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1151 }
1152
TEST(ROUNDZ__NEONV8,positive_qnan)1153 TEST(ROUNDZ__NEONV8, positive_qnan) {
1154 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1155 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1156 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1157 for (uint32_t i = 0; i < kBlockSize; i++) {
1158 inputs[i] = fp32_from_bits(n + i);
1159 }
1160 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1161 for (uint32_t i = 0; i < kBlockSize; i++) {
1162 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1163 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1164 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1165 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1166 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1167 }
1168 }
1169 }
1170
TEST(ROUNDZ__NEONV8,negative_qnan)1171 TEST(ROUNDZ__NEONV8, negative_qnan) {
1172 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1173 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1174 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1175 for (uint32_t i = 0; i < kBlockSize; i++) {
1176 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1177 }
1178 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1179 for (uint32_t i = 0; i < kBlockSize; i++) {
1180 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1181 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1182 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1183 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1184 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1185 }
1186 }
1187 }
1188
TEST(ROUNDZ__NEONV8,positive_snan)1189 TEST(ROUNDZ__NEONV8, positive_snan) {
1190 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1191 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1192 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1193 for (uint32_t i = 0; i < kBlockSize; i++) {
1194 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1195 }
1196 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1197 for (uint32_t i = 0; i < kBlockSize; i++) {
1198 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1199 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1200 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1201 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1202 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1203 }
1204 }
1205 }
1206
TEST(ROUNDZ__NEONV8,negative_snan)1207 TEST(ROUNDZ__NEONV8, negative_snan) {
1208 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1209 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1210 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1211 for (uint32_t i = 0; i < kBlockSize; i++) {
1212 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1213 }
1214 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1215 for (uint32_t i = 0; i < kBlockSize; i++) {
1216 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1217 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1218 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1219 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1220 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1221 }
1222 }
1223 }
1224
TEST(ROUNDZ__NEONV8,positive_snan_to_qnan)1225 TEST(ROUNDZ__NEONV8, positive_snan_to_qnan) {
1226 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1227 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1228 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1229 for (uint32_t i = 0; i < kBlockSize; i++) {
1230 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1231 }
1232 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1233 for (uint32_t i = 0; i < kBlockSize; i++) {
1234 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1235 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1236 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1237 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1238 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1239 }
1240 }
1241 }
1242
TEST(ROUNDZ__NEONV8,negative_snan_to_qnan)1243 TEST(ROUNDZ__NEONV8, negative_snan_to_qnan) {
1244 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1245 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1246 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1247 for (uint32_t i = 0; i < kBlockSize; i++) {
1248 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1249 }
1250 xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1251 for (uint32_t i = 0; i < kBlockSize; i++) {
1252 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1253 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1254 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1255 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1256 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1257 }
1258 }
1259 }
1260 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1261
1262 #if XNN_ARCH_WASMSIMD
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_normal)1263 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_normal) {
1264 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1265 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1266 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1267 for (uint32_t i = 0; i < kBlockSize; i++) {
1268 inputs[i] = fp32_from_bits(n + i);
1269 }
1270 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1271 for (uint32_t i = 0; i < kBlockSize; i++) {
1272 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1273 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1274 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1275 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1276 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1277 }
1278 }
1279 }
1280
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_normal)1281 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_normal) {
1282 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1283 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1284 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1285 for (uint32_t i = 0; i < kBlockSize; i++) {
1286 inputs[i] = fp32_from_bits(n + i);
1287 }
1288 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1289 for (uint32_t i = 0; i < kBlockSize; i++) {
1290 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1291 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1292 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1293 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1294 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1295 }
1296 }
1297 }
1298
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_integral)1299 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_integral) {
1300 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1301 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1302 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1303 for (uint32_t i = 0; i < kBlockSize; i++) {
1304 inputs[i] = fp32_from_bits(n + i);
1305 }
1306 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1307 for (uint32_t i = 0; i < kBlockSize; i++) {
1308 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1309 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1310 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1311 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1312 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1313 }
1314 }
1315 }
1316
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_integral)1317 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_integral) {
1318 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1319 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1320 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1321 for (uint32_t i = 0; i < kBlockSize; i++) {
1322 inputs[i] = fp32_from_bits(n + i);
1323 }
1324 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1325 for (uint32_t i = 0; i < kBlockSize; i++) {
1326 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1327 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1328 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1329 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1330 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1331 }
1332 }
1333 }
1334
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_infinity)1335 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_infinity) {
1336 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1337 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1338 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1339 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1340 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1341 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1342 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1343 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1344 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1345 }
1346
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_infinity)1347 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_infinity) {
1348 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1349 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1350 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1351 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1352 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1353 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1354 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1355 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1356 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1357 }
1358
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_qnan)1359 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_qnan) {
1360 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1361 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1362 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1363 for (uint32_t i = 0; i < kBlockSize; i++) {
1364 inputs[i] = fp32_from_bits(n + i);
1365 }
1366 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1367 for (uint32_t i = 0; i < kBlockSize; i++) {
1368 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1369 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1370 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1371 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1372 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1373 }
1374 }
1375 }
1376
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_qnan)1377 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_qnan) {
1378 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1379 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1380 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1381 for (uint32_t i = 0; i < kBlockSize; i++) {
1382 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1383 }
1384 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1385 for (uint32_t i = 0; i < kBlockSize; i++) {
1386 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1387 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1388 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1389 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1390 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1391 }
1392 }
1393 }
1394
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_snan)1395 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_snan) {
1396 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1397 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1398 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1399 for (uint32_t i = 0; i < kBlockSize; i++) {
1400 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1401 }
1402 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1403 for (uint32_t i = 0; i < kBlockSize; i++) {
1404 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1405 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1406 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1407 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1408 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1409 }
1410 }
1411 }
1412
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_snan)1413 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_snan) {
1414 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1415 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1416 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1417 for (uint32_t i = 0; i < kBlockSize; i++) {
1418 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1419 }
1420 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1421 for (uint32_t i = 0; i < kBlockSize; i++) {
1422 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1423 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1424 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1425 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1426 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1427 }
1428 }
1429 }
1430
TEST(ROUNDZ__WASMSIMD_ADDSUB,positive_snan_to_qnan)1431 TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_snan_to_qnan) {
1432 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1433 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1434 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1435 for (uint32_t i = 0; i < kBlockSize; i++) {
1436 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1437 }
1438 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1439 for (uint32_t i = 0; i < kBlockSize; i++) {
1440 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1441 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1442 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1443 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1444 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1445 }
1446 }
1447 }
1448
TEST(ROUNDZ__WASMSIMD_ADDSUB,negative_snan_to_qnan)1449 TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_snan_to_qnan) {
1450 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1451 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1452 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1453 for (uint32_t i = 0; i < kBlockSize; i++) {
1454 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1455 }
1456 xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1457 for (uint32_t i = 0; i < kBlockSize; i++) {
1458 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1459 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1460 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1461 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1462 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1463 }
1464 }
1465 }
1466 #endif // XNN_ARCH_WASMSIMD
1467
1468 #if XNN_ARCH_WASMSIMD
TEST(ROUNDZ__WASMSIMD_CVT,positive_normal)1469 TEST(ROUNDZ__WASMSIMD_CVT, positive_normal) {
1470 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1471 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1472 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1473 for (uint32_t i = 0; i < kBlockSize; i++) {
1474 inputs[i] = fp32_from_bits(n + i);
1475 }
1476 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1477 for (uint32_t i = 0; i < kBlockSize; i++) {
1478 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1479 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1480 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1481 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1482 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1483 }
1484 }
1485 }
1486
TEST(ROUNDZ__WASMSIMD_CVT,negative_normal)1487 TEST(ROUNDZ__WASMSIMD_CVT, negative_normal) {
1488 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1489 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1490 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1491 for (uint32_t i = 0; i < kBlockSize; i++) {
1492 inputs[i] = fp32_from_bits(n + i);
1493 }
1494 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1495 for (uint32_t i = 0; i < kBlockSize; i++) {
1496 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1497 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1498 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1499 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1500 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1501 }
1502 }
1503 }
1504
TEST(ROUNDZ__WASMSIMD_CVT,positive_integral)1505 TEST(ROUNDZ__WASMSIMD_CVT, positive_integral) {
1506 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1507 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1508 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1509 for (uint32_t i = 0; i < kBlockSize; i++) {
1510 inputs[i] = fp32_from_bits(n + i);
1511 }
1512 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1513 for (uint32_t i = 0; i < kBlockSize; i++) {
1514 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1515 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1516 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1517 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1518 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1519 }
1520 }
1521 }
1522
TEST(ROUNDZ__WASMSIMD_CVT,negative_integral)1523 TEST(ROUNDZ__WASMSIMD_CVT, negative_integral) {
1524 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1525 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1526 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1527 for (uint32_t i = 0; i < kBlockSize; i++) {
1528 inputs[i] = fp32_from_bits(n + i);
1529 }
1530 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1531 for (uint32_t i = 0; i < kBlockSize; i++) {
1532 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1533 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1534 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1535 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1536 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1537 }
1538 }
1539 }
1540
TEST(ROUNDZ__WASMSIMD_CVT,positive_infinity)1541 TEST(ROUNDZ__WASMSIMD_CVT, positive_infinity) {
1542 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1543 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1544 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1545 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1546 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1547 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1548 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1549 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1550 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1551 }
1552
TEST(ROUNDZ__WASMSIMD_CVT,negative_infinity)1553 TEST(ROUNDZ__WASMSIMD_CVT, negative_infinity) {
1554 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1555 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1556 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1557 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1558 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1559 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1560 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1561 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1562 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1563 }
1564
TEST(ROUNDZ__WASMSIMD_CVT,positive_qnan)1565 TEST(ROUNDZ__WASMSIMD_CVT, positive_qnan) {
1566 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1567 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1568 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1569 for (uint32_t i = 0; i < kBlockSize; i++) {
1570 inputs[i] = fp32_from_bits(n + i);
1571 }
1572 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1573 for (uint32_t i = 0; i < kBlockSize; i++) {
1574 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1575 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1576 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1577 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1578 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1579 }
1580 }
1581 }
1582
TEST(ROUNDZ__WASMSIMD_CVT,negative_qnan)1583 TEST(ROUNDZ__WASMSIMD_CVT, negative_qnan) {
1584 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1585 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1586 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1587 for (uint32_t i = 0; i < kBlockSize; i++) {
1588 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1589 }
1590 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1591 for (uint32_t i = 0; i < kBlockSize; i++) {
1592 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1593 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1594 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1595 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1596 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1597 }
1598 }
1599 }
1600
TEST(ROUNDZ__WASMSIMD_CVT,positive_snan)1601 TEST(ROUNDZ__WASMSIMD_CVT, positive_snan) {
1602 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1603 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1604 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1605 for (uint32_t i = 0; i < kBlockSize; i++) {
1606 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1607 }
1608 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1609 for (uint32_t i = 0; i < kBlockSize; i++) {
1610 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1611 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1612 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1613 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1614 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1615 }
1616 }
1617 }
1618
TEST(ROUNDZ__WASMSIMD_CVT,negative_snan)1619 TEST(ROUNDZ__WASMSIMD_CVT, negative_snan) {
1620 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1621 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1622 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1623 for (uint32_t i = 0; i < kBlockSize; i++) {
1624 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1625 }
1626 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1627 for (uint32_t i = 0; i < kBlockSize; i++) {
1628 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1629 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1630 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1631 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1632 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1633 }
1634 }
1635 }
1636
TEST(ROUNDZ__WASMSIMD_CVT,DISABLED_positive_snan_to_qnan)1637 TEST(ROUNDZ__WASMSIMD_CVT, DISABLED_positive_snan_to_qnan) {
1638 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1639 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1640 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1641 for (uint32_t i = 0; i < kBlockSize; i++) {
1642 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1643 }
1644 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1645 for (uint32_t i = 0; i < kBlockSize; i++) {
1646 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1647 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1648 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1649 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1650 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1651 }
1652 }
1653 }
1654
TEST(ROUNDZ__WASMSIMD_CVT,DISABLED_negative_snan_to_qnan)1655 TEST(ROUNDZ__WASMSIMD_CVT, DISABLED_negative_snan_to_qnan) {
1656 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1657 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1658 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1659 for (uint32_t i = 0; i < kBlockSize; i++) {
1660 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1661 }
1662 xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1663 for (uint32_t i = 0; i < kBlockSize; i++) {
1664 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1665 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1666 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1667 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1668 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1669 }
1670 }
1671 }
1672 #endif // XNN_ARCH_WASMSIMD
1673
TEST(ROUNDZ__SCALAR_ADDSUB,positive_normal)1674 TEST(ROUNDZ__SCALAR_ADDSUB, positive_normal) {
1675 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1676 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1677 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1678 for (uint32_t i = 0; i < kBlockSize; i++) {
1679 inputs[i] = fp32_from_bits(n + i);
1680 }
1681 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1682 for (uint32_t i = 0; i < kBlockSize; i++) {
1683 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1684 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1685 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1686 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1687 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1688 }
1689 }
1690 }
1691
TEST(ROUNDZ__SCALAR_ADDSUB,negative_normal)1692 TEST(ROUNDZ__SCALAR_ADDSUB, negative_normal) {
1693 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1694 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1695 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1696 for (uint32_t i = 0; i < kBlockSize; i++) {
1697 inputs[i] = fp32_from_bits(n + i);
1698 }
1699 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1700 for (uint32_t i = 0; i < kBlockSize; i++) {
1701 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1702 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1703 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1704 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1705 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1706 }
1707 }
1708 }
1709
TEST(ROUNDZ__SCALAR_ADDSUB,positive_integral)1710 TEST(ROUNDZ__SCALAR_ADDSUB, positive_integral) {
1711 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1712 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1713 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1714 for (uint32_t i = 0; i < kBlockSize; i++) {
1715 inputs[i] = fp32_from_bits(n + i);
1716 }
1717 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1718 for (uint32_t i = 0; i < kBlockSize; i++) {
1719 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1720 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1721 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1722 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1723 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1724 }
1725 }
1726 }
1727
TEST(ROUNDZ__SCALAR_ADDSUB,negative_integral)1728 TEST(ROUNDZ__SCALAR_ADDSUB, negative_integral) {
1729 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1730 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1731 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1732 for (uint32_t i = 0; i < kBlockSize; i++) {
1733 inputs[i] = fp32_from_bits(n + i);
1734 }
1735 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1736 for (uint32_t i = 0; i < kBlockSize; i++) {
1737 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1738 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1739 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1740 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1741 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1742 }
1743 }
1744 }
1745
TEST(ROUNDZ__SCALAR_ADDSUB,positive_infinity)1746 TEST(ROUNDZ__SCALAR_ADDSUB, positive_infinity) {
1747 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1748 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1749 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1750 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1751 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1752 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1753 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1754 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1755 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1756 }
1757
TEST(ROUNDZ__SCALAR_ADDSUB,negative_infinity)1758 TEST(ROUNDZ__SCALAR_ADDSUB, negative_infinity) {
1759 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1760 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1761 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1762 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1763 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1764 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1765 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1766 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1767 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1768 }
1769
TEST(ROUNDZ__SCALAR_ADDSUB,positive_qnan)1770 TEST(ROUNDZ__SCALAR_ADDSUB, positive_qnan) {
1771 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1772 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1773 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1774 for (uint32_t i = 0; i < kBlockSize; i++) {
1775 inputs[i] = fp32_from_bits(n + i);
1776 }
1777 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1778 for (uint32_t i = 0; i < kBlockSize; i++) {
1779 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1780 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1781 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1782 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1783 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1784 }
1785 }
1786 }
1787
TEST(ROUNDZ__SCALAR_ADDSUB,negative_qnan)1788 TEST(ROUNDZ__SCALAR_ADDSUB, negative_qnan) {
1789 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1790 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1791 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1792 for (uint32_t i = 0; i < kBlockSize; i++) {
1793 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1794 }
1795 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1796 for (uint32_t i = 0; i < kBlockSize; i++) {
1797 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1798 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1799 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1800 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1801 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1802 }
1803 }
1804 }
1805
TEST(ROUNDZ__SCALAR_ADDSUB,positive_snan)1806 TEST(ROUNDZ__SCALAR_ADDSUB, positive_snan) {
1807 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1808 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1809 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1810 for (uint32_t i = 0; i < kBlockSize; i++) {
1811 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1812 }
1813 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1814 for (uint32_t i = 0; i < kBlockSize; i++) {
1815 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1816 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1817 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1818 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1819 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1820 }
1821 }
1822 }
1823
TEST(ROUNDZ__SCALAR_ADDSUB,negative_snan)1824 TEST(ROUNDZ__SCALAR_ADDSUB, negative_snan) {
1825 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1826 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1827 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1828 for (uint32_t i = 0; i < kBlockSize; i++) {
1829 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1830 }
1831 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1832 for (uint32_t i = 0; i < kBlockSize; i++) {
1833 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1834 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
1835 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1836 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1837 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1838 }
1839 }
1840 }
1841
TEST(ROUNDZ__SCALAR_ADDSUB,positive_snan_to_qnan)1842 TEST(ROUNDZ__SCALAR_ADDSUB, positive_snan_to_qnan) {
1843 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1844 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1845 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1846 for (uint32_t i = 0; i < kBlockSize; i++) {
1847 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1848 }
1849 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1850 for (uint32_t i = 0; i < kBlockSize; i++) {
1851 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1852 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1853 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1854 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1855 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1856 }
1857 }
1858 }
1859
TEST(ROUNDZ__SCALAR_ADDSUB,negative_snan_to_qnan)1860 TEST(ROUNDZ__SCALAR_ADDSUB, negative_snan_to_qnan) {
1861 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1862 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1863 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1864 for (uint32_t i = 0; i < kBlockSize; i++) {
1865 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1866 }
1867 xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1868 for (uint32_t i = 0; i < kBlockSize; i++) {
1869 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1870 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1871 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1872 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1873 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1874 }
1875 }
1876 }
1877
TEST(ROUNDZ__SCALAR_CVT,positive_normal)1878 TEST(ROUNDZ__SCALAR_CVT, positive_normal) {
1879 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1880 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1881 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1882 for (uint32_t i = 0; i < kBlockSize; i++) {
1883 inputs[i] = fp32_from_bits(n + i);
1884 }
1885 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1886 for (uint32_t i = 0; i < kBlockSize; i++) {
1887 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1888 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1889 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1890 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1891 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1892 }
1893 }
1894 }
1895
TEST(ROUNDZ__SCALAR_CVT,negative_normal)1896 TEST(ROUNDZ__SCALAR_CVT, negative_normal) {
1897 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1898 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1899 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1900 for (uint32_t i = 0; i < kBlockSize; i++) {
1901 inputs[i] = fp32_from_bits(n + i);
1902 }
1903 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1904 for (uint32_t i = 0; i < kBlockSize; i++) {
1905 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1906 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1907 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1908 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1909 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1910 }
1911 }
1912 }
1913
TEST(ROUNDZ__SCALAR_CVT,positive_integral)1914 TEST(ROUNDZ__SCALAR_CVT, positive_integral) {
1915 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1916 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1917 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1918 for (uint32_t i = 0; i < kBlockSize; i++) {
1919 inputs[i] = fp32_from_bits(n + i);
1920 }
1921 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1922 for (uint32_t i = 0; i < kBlockSize; i++) {
1923 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1924 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1925 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1926 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1927 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1928 }
1929 }
1930 }
1931
TEST(ROUNDZ__SCALAR_CVT,negative_integral)1932 TEST(ROUNDZ__SCALAR_CVT, negative_integral) {
1933 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1934 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1935 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1936 for (uint32_t i = 0; i < kBlockSize; i++) {
1937 inputs[i] = fp32_from_bits(n + i);
1938 }
1939 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1940 for (uint32_t i = 0; i < kBlockSize; i++) {
1941 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1942 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1943 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1944 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1945 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1946 }
1947 }
1948 }
1949
TEST(ROUNDZ__SCALAR_CVT,positive_infinity)1950 TEST(ROUNDZ__SCALAR_CVT, positive_infinity) {
1951 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1952 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1953 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1954 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1955 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1956 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1957 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1958 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1959 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1960 }
1961
TEST(ROUNDZ__SCALAR_CVT,negative_infinity)1962 TEST(ROUNDZ__SCALAR_CVT, negative_infinity) {
1963 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1964 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1965 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1966 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1967 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
1968 ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
1969 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
1970 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1971 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
1972 }
1973
TEST(ROUNDZ__SCALAR_CVT,positive_qnan)1974 TEST(ROUNDZ__SCALAR_CVT, positive_qnan) {
1975 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1976 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1977 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1978 for (uint32_t i = 0; i < kBlockSize; i++) {
1979 inputs[i] = fp32_from_bits(n + i);
1980 }
1981 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1982 for (uint32_t i = 0; i < kBlockSize; i++) {
1983 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
1984 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
1985 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
1986 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1987 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
1988 }
1989 }
1990 }
1991
TEST(ROUNDZ__SCALAR_CVT,negative_qnan)1992 TEST(ROUNDZ__SCALAR_CVT, negative_qnan) {
1993 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1994 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1995 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1996 for (uint32_t i = 0; i < kBlockSize; i++) {
1997 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | (n + i));
1998 }
1999 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2000 for (uint32_t i = 0; i < kBlockSize; i++) {
2001 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2002 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2003 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2004 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2005 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2006 }
2007 }
2008 }
2009
TEST(ROUNDZ__SCALAR_CVT,positive_snan)2010 TEST(ROUNDZ__SCALAR_CVT, positive_snan) {
2011 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2012 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2013 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2014 for (uint32_t i = 0; i < kBlockSize; i++) {
2015 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2016 }
2017 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2018 for (uint32_t i = 0; i < kBlockSize; i++) {
2019 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2020 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2021 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2022 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2023 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2024 }
2025 }
2026 }
2027
TEST(ROUNDZ__SCALAR_CVT,negative_snan)2028 TEST(ROUNDZ__SCALAR_CVT, negative_snan) {
2029 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2030 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2031 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2032 for (uint32_t i = 0; i < kBlockSize; i++) {
2033 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2034 }
2035 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2036 for (uint32_t i = 0; i < kBlockSize; i++) {
2037 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2038 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
2039 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2040 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2041 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2042 }
2043 }
2044 }
2045
TEST(ROUNDZ__SCALAR_CVT,DISABLED_positive_snan_to_qnan)2046 TEST(ROUNDZ__SCALAR_CVT, DISABLED_positive_snan_to_qnan) {
2047 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2048 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2049 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2050 for (uint32_t i = 0; i < kBlockSize; i++) {
2051 inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2052 }
2053 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2054 for (uint32_t i = 0; i < kBlockSize; i++) {
2055 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2056 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2057 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2058 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2059 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2060 }
2061 }
2062 }
2063
TEST(ROUNDZ__SCALAR_CVT,DISABLED_negative_snan_to_qnan)2064 TEST(ROUNDZ__SCALAR_CVT, DISABLED_negative_snan_to_qnan) {
2065 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2066 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2067 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2068 for (uint32_t i = 0; i < kBlockSize; i++) {
2069 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2070 }
2071 xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2072 for (uint32_t i = 0; i < kBlockSize; i++) {
2073 const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
2074 ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
2075 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
2076 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2077 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
2078 }
2079 }
2080 }
2081