• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <ios>
13 #include <vector>
14 
15 #include <gtest/gtest.h>
16 
17 #include <fp16.h>
18 
19 #include <xnnpack/aligned-allocator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/math.h>
22 #include <xnnpack/math-stubs.h>
23 
24 
25 constexpr int kBlockSize = 1024;
26 
27 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDU__SSE_ADDSUB,positive_zero)28   TEST(ROUNDU__SSE_ADDSUB, positive_zero) {
29     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
30     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
31     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
32     xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
33     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
34     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
35       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
36       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
37       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
38   }
39 
TEST(ROUNDU__SSE_ADDSUB,negative_zero)40   TEST(ROUNDU__SSE_ADDSUB, negative_zero) {
41     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
42     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
43     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
44     xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
45     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
46     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
47       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
48       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
49       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
50   }
51 
TEST(ROUNDU__SSE_ADDSUB,positive_subnormal)52   TEST(ROUNDU__SSE_ADDSUB, positive_subnormal) {
53     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
54     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
55     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
56       for (uint32_t i = 0; i < kBlockSize; i++) {
57         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
58       }
59       xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
60       for (uint32_t i = 0; i < kBlockSize; i++) {
61         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
62         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
63           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
64           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
65           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
66       }
67     }
68   }
69 
TEST(ROUNDU__SSE_ADDSUB,negative_subnormal)70   TEST(ROUNDU__SSE_ADDSUB, negative_subnormal) {
71     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
72     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
73     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
74       for (uint32_t i = 0; i < kBlockSize; i++) {
75         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
76       }
77       xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
78       for (uint32_t i = 0; i < kBlockSize; i++) {
79         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
80         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
81           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
82           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
83           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
84       }
85     }
86   }
87 
TEST(ROUNDU__SSE_ADDSUB,positive_normal)88   TEST(ROUNDU__SSE_ADDSUB, positive_normal) {
89     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
90     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
91     for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
92       for (uint32_t i = 0; i < kBlockSize; i++) {
93         inputs[i] = uint32_as_float(n + i);
94       }
95       xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
96       for (uint32_t i = 0; i < kBlockSize; i++) {
97         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
98         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
99           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
100           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
101           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
102       }
103     }
104   }
105 
TEST(ROUNDU__SSE_ADDSUB,negative_normal)106   TEST(ROUNDU__SSE_ADDSUB, negative_normal) {
107     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
108     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
109     for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
110       for (uint32_t i = 0; i < kBlockSize; i++) {
111         inputs[i] = uint32_as_float(n + i);
112       }
113       xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
114       for (uint32_t i = 0; i < kBlockSize; i++) {
115         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
116         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
117           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
118           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
119           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
120       }
121     }
122   }
123 
TEST(ROUNDU__SSE_ADDSUB,positive_integral)124   TEST(ROUNDU__SSE_ADDSUB, positive_integral) {
125     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
126     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
127     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
128       for (uint32_t i = 0; i < kBlockSize; i++) {
129         inputs[i] = uint32_as_float(n + i);
130       }
131       xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
132       for (uint32_t i = 0; i < kBlockSize; i++) {
133         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
134         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
135           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
136           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
137           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
138       }
139     }
140   }
141 
TEST(ROUNDU__SSE_ADDSUB,negative_integral)142   TEST(ROUNDU__SSE_ADDSUB, negative_integral) {
143     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
144     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
145     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
146       for (uint32_t i = 0; i < kBlockSize; i++) {
147         inputs[i] = uint32_as_float(n + i);
148       }
149       xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
150       for (uint32_t i = 0; i < kBlockSize; i++) {
151         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
152         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
153           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
154           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
155           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
156       }
157     }
158   }
159 
TEST(ROUNDU__SSE_ADDSUB,positive_infinity)160   TEST(ROUNDU__SSE_ADDSUB, positive_infinity) {
161     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
162     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
163     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
164     xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
165     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
166     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
167       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
168       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
169       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
170   }
171 
TEST(ROUNDU__SSE_ADDSUB,negative_infinity)172   TEST(ROUNDU__SSE_ADDSUB, negative_infinity) {
173     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
174     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
175     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
176     xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
177     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
178     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
179       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
180       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
181       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
182   }
183 
TEST(ROUNDU__SSE_ADDSUB,positive_qnan)184   TEST(ROUNDU__SSE_ADDSUB, positive_qnan) {
185     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
186     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
187     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
188       for (uint32_t i = 0; i < kBlockSize; i++) {
189         inputs[i] = uint32_as_float(n + i);
190       }
191       xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
192       for (uint32_t i = 0; i < kBlockSize; i++) {
193         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
194         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
195           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
196           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
197           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
198       }
199     }
200   }
201 
TEST(ROUNDU__SSE_ADDSUB,negative_qnan)202   TEST(ROUNDU__SSE_ADDSUB, negative_qnan) {
203     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
204     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
205     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
206       for (uint32_t i = 0; i < kBlockSize; i++) {
207         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
208       }
209       xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
210       for (uint32_t i = 0; i < kBlockSize; i++) {
211         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
212         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
213           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
214           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
215           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
216       }
217     }
218   }
219 
TEST(ROUNDU__SSE_ADDSUB,positive_snan)220   TEST(ROUNDU__SSE_ADDSUB, positive_snan) {
221     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
222     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
223     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
224       for (uint32_t i = 0; i < kBlockSize; i++) {
225         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
226       }
227       xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
228       for (uint32_t i = 0; i < kBlockSize; i++) {
229         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
230         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
231           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
232           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
233           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
234       }
235     }
236   }
237 
TEST(ROUNDU__SSE_ADDSUB,negative_snan)238   TEST(ROUNDU__SSE_ADDSUB, negative_snan) {
239     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
240     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
241     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
242       for (uint32_t i = 0; i < kBlockSize; i++) {
243         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
244       }
245       xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
246       for (uint32_t i = 0; i < kBlockSize; i++) {
247         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
248         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
249           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
250           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
251           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
252       }
253     }
254   }
255 
TEST(ROUNDU__SSE_ADDSUB,positive_snan_to_qnan)256   TEST(ROUNDU__SSE_ADDSUB, positive_snan_to_qnan) {
257     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
258     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
259     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
260       for (uint32_t i = 0; i < kBlockSize; i++) {
261         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
262       }
263       xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
264       for (uint32_t i = 0; i < kBlockSize; i++) {
265         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
266         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
267           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
268           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
269           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
270       }
271     }
272   }
273 
TEST(ROUNDU__SSE_ADDSUB,negative_snan_to_qnan)274   TEST(ROUNDU__SSE_ADDSUB, negative_snan_to_qnan) {
275     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
276     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
277     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
278       for (uint32_t i = 0; i < kBlockSize; i++) {
279         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
280       }
281       xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
282       for (uint32_t i = 0; i < kBlockSize; i++) {
283         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
284         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
285           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
286           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
287           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
288       }
289     }
290   }
291 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
292 
293 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDU__SSE2_CVT,positive_zero)294   TEST(ROUNDU__SSE2_CVT, positive_zero) {
295     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
296     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
297     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
298     xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
299     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
300     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
301       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
302       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
303       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
304   }
305 
TEST(ROUNDU__SSE2_CVT,negative_zero)306   TEST(ROUNDU__SSE2_CVT, negative_zero) {
307     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
308     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
309     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
310     xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
311     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
312     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
313       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
314       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
315       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
316   }
317 
TEST(ROUNDU__SSE2_CVT,positive_subnormal)318   TEST(ROUNDU__SSE2_CVT, positive_subnormal) {
319     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
320     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
321     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
322       for (uint32_t i = 0; i < kBlockSize; i++) {
323         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
324       }
325       xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
326       for (uint32_t i = 0; i < kBlockSize; i++) {
327         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
328         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
329           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
330           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
331           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
332       }
333     }
334   }
335 
TEST(ROUNDU__SSE2_CVT,negative_subnormal)336   TEST(ROUNDU__SSE2_CVT, negative_subnormal) {
337     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
338     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
339     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
340       for (uint32_t i = 0; i < kBlockSize; i++) {
341         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
342       }
343       xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
344       for (uint32_t i = 0; i < kBlockSize; i++) {
345         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
346         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
347           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
348           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
349           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
350       }
351     }
352   }
353 
TEST(ROUNDU__SSE2_CVT,positive_normal)354   TEST(ROUNDU__SSE2_CVT, positive_normal) {
355     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
356     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
357     for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
358       for (uint32_t i = 0; i < kBlockSize; i++) {
359         inputs[i] = uint32_as_float(n + i);
360       }
361       xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
362       for (uint32_t i = 0; i < kBlockSize; i++) {
363         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
364         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
365           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
366           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
367           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
368       }
369     }
370   }
371 
TEST(ROUNDU__SSE2_CVT,negative_normal)372   TEST(ROUNDU__SSE2_CVT, negative_normal) {
373     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
374     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
375     for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
376       for (uint32_t i = 0; i < kBlockSize; i++) {
377         inputs[i] = uint32_as_float(n + i);
378       }
379       xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
380       for (uint32_t i = 0; i < kBlockSize; i++) {
381         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
382         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
383           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
384           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
385           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
386       }
387     }
388   }
389 
TEST(ROUNDU__SSE2_CVT,positive_integral)390   TEST(ROUNDU__SSE2_CVT, positive_integral) {
391     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
392     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
393     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
394       for (uint32_t i = 0; i < kBlockSize; i++) {
395         inputs[i] = uint32_as_float(n + i);
396       }
397       xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
398       for (uint32_t i = 0; i < kBlockSize; i++) {
399         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
400         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
401           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
402           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
403           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
404       }
405     }
406   }
407 
TEST(ROUNDU__SSE2_CVT,negative_integral)408   TEST(ROUNDU__SSE2_CVT, negative_integral) {
409     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
410     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
411     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
412       for (uint32_t i = 0; i < kBlockSize; i++) {
413         inputs[i] = uint32_as_float(n + i);
414       }
415       xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
416       for (uint32_t i = 0; i < kBlockSize; i++) {
417         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
418         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
419           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
420           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
421           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
422       }
423     }
424   }
425 
TEST(ROUNDU__SSE2_CVT,positive_infinity)426   TEST(ROUNDU__SSE2_CVT, positive_infinity) {
427     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
428     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
429     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
430     xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
431     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
432     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
433       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
434       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
435       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
436   }
437 
TEST(ROUNDU__SSE2_CVT,negative_infinity)438   TEST(ROUNDU__SSE2_CVT, negative_infinity) {
439     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
440     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
441     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
442     xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
443     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
444     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
445       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
446       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
447       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
448   }
449 
TEST(ROUNDU__SSE2_CVT,positive_qnan)450   TEST(ROUNDU__SSE2_CVT, positive_qnan) {
451     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
452     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
453     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
454       for (uint32_t i = 0; i < kBlockSize; i++) {
455         inputs[i] = uint32_as_float(n + i);
456       }
457       xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
458       for (uint32_t i = 0; i < kBlockSize; i++) {
459         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
460         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
461           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
462           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
463           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
464       }
465     }
466   }
467 
TEST(ROUNDU__SSE2_CVT,negative_qnan)468   TEST(ROUNDU__SSE2_CVT, negative_qnan) {
469     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
470     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
471     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
472       for (uint32_t i = 0; i < kBlockSize; i++) {
473         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
474       }
475       xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
476       for (uint32_t i = 0; i < kBlockSize; i++) {
477         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
478         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
479           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
480           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
481           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
482       }
483     }
484   }
485 
TEST(ROUNDU__SSE2_CVT,positive_snan)486   TEST(ROUNDU__SSE2_CVT, positive_snan) {
487     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
488     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
489     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
490       for (uint32_t i = 0; i < kBlockSize; i++) {
491         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
492       }
493       xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
494       for (uint32_t i = 0; i < kBlockSize; i++) {
495         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
496         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
497           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
498           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
499           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
500       }
501     }
502   }
503 
TEST(ROUNDU__SSE2_CVT,negative_snan)504   TEST(ROUNDU__SSE2_CVT, negative_snan) {
505     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
506     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
507     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
508       for (uint32_t i = 0; i < kBlockSize; i++) {
509         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
510       }
511       xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
512       for (uint32_t i = 0; i < kBlockSize; i++) {
513         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
514         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
515           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
516           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
517           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
518       }
519     }
520   }
521 
TEST(ROUNDU__SSE2_CVT,positive_snan_to_qnan)522   TEST(ROUNDU__SSE2_CVT, positive_snan_to_qnan) {
523     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
524     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
525     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
526       for (uint32_t i = 0; i < kBlockSize; i++) {
527         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
528       }
529       xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
530       for (uint32_t i = 0; i < kBlockSize; i++) {
531         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
532         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
533           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
534           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
535           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
536       }
537     }
538   }
539 
TEST(ROUNDU__SSE2_CVT,negative_snan_to_qnan)540   TEST(ROUNDU__SSE2_CVT, negative_snan_to_qnan) {
541     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
542     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
543     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
544       for (uint32_t i = 0; i < kBlockSize; i++) {
545         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
546       }
547       xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
548       for (uint32_t i = 0; i < kBlockSize; i++) {
549         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
550         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
551           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
552           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
553           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
554       }
555     }
556   }
557 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
558 
559 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDU__SSE41,positive_zero)560   TEST(ROUNDU__SSE41, positive_zero) {
561     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
562     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
563     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
564     xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
565     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
566     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
567       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
568       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
569       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
570   }
571 
TEST(ROUNDU__SSE41,negative_zero)572   TEST(ROUNDU__SSE41, negative_zero) {
573     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
574     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
575     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
576     xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
577     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
578     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
579       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
580       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
581       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
582   }
583 
TEST(ROUNDU__SSE41,positive_subnormal)584   TEST(ROUNDU__SSE41, positive_subnormal) {
585     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
586     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
587     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
588       for (uint32_t i = 0; i < kBlockSize; i++) {
589         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
590       }
591       xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
592       for (uint32_t i = 0; i < kBlockSize; i++) {
593         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
594         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
595           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
596           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
597           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
598       }
599     }
600   }
601 
TEST(ROUNDU__SSE41,negative_subnormal)602   TEST(ROUNDU__SSE41, negative_subnormal) {
603     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
604     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
605     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
606       for (uint32_t i = 0; i < kBlockSize; i++) {
607         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
608       }
609       xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
610       for (uint32_t i = 0; i < kBlockSize; i++) {
611         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
612         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
613           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
614           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
615           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
616       }
617     }
618   }
619 
TEST(ROUNDU__SSE41,positive_normal)620   TEST(ROUNDU__SSE41, positive_normal) {
621     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
622     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
623     for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
624       for (uint32_t i = 0; i < kBlockSize; i++) {
625         inputs[i] = uint32_as_float(n + i);
626       }
627       xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
628       for (uint32_t i = 0; i < kBlockSize; i++) {
629         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
630         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
631           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
632           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
633           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
634       }
635     }
636   }
637 
TEST(ROUNDU__SSE41,negative_normal)638   TEST(ROUNDU__SSE41, negative_normal) {
639     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
640     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
641     for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
642       for (uint32_t i = 0; i < kBlockSize; i++) {
643         inputs[i] = uint32_as_float(n + i);
644       }
645       xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
646       for (uint32_t i = 0; i < kBlockSize; i++) {
647         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
648         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
649           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
650           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
651           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
652       }
653     }
654   }
655 
TEST(ROUNDU__SSE41,positive_integral)656   TEST(ROUNDU__SSE41, positive_integral) {
657     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
658     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
659     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
660       for (uint32_t i = 0; i < kBlockSize; i++) {
661         inputs[i] = uint32_as_float(n + i);
662       }
663       xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
664       for (uint32_t i = 0; i < kBlockSize; i++) {
665         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
666         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
667           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
668           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
669           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
670       }
671     }
672   }
673 
TEST(ROUNDU__SSE41,negative_integral)674   TEST(ROUNDU__SSE41, negative_integral) {
675     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
676     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
677     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
678       for (uint32_t i = 0; i < kBlockSize; i++) {
679         inputs[i] = uint32_as_float(n + i);
680       }
681       xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
682       for (uint32_t i = 0; i < kBlockSize; i++) {
683         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
684         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
685           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
686           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
687           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
688       }
689     }
690   }
691 
TEST(ROUNDU__SSE41,positive_infinity)692   TEST(ROUNDU__SSE41, positive_infinity) {
693     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
694     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
695     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
696     xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
697     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
698     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
699       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
700       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
701       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
702   }
703 
TEST(ROUNDU__SSE41,negative_infinity)704   TEST(ROUNDU__SSE41, negative_infinity) {
705     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
706     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
707     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
708     xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
709     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
710     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
711       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
712       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
713       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
714   }
715 
TEST(ROUNDU__SSE41,positive_qnan)716   TEST(ROUNDU__SSE41, positive_qnan) {
717     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
718     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
719     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
720       for (uint32_t i = 0; i < kBlockSize; i++) {
721         inputs[i] = uint32_as_float(n + i);
722       }
723       xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
724       for (uint32_t i = 0; i < kBlockSize; i++) {
725         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
726         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
727           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
728           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
729           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
730       }
731     }
732   }
733 
TEST(ROUNDU__SSE41,negative_qnan)734   TEST(ROUNDU__SSE41, negative_qnan) {
735     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
736     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
737     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
738       for (uint32_t i = 0; i < kBlockSize; i++) {
739         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
740       }
741       xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
742       for (uint32_t i = 0; i < kBlockSize; i++) {
743         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
744         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
745           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
746           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
747           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
748       }
749     }
750   }
751 
TEST(ROUNDU__SSE41,positive_snan)752   TEST(ROUNDU__SSE41, positive_snan) {
753     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
754     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
755     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
756       for (uint32_t i = 0; i < kBlockSize; i++) {
757         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
758       }
759       xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
760       for (uint32_t i = 0; i < kBlockSize; i++) {
761         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
762         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
763           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
764           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
765           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
766       }
767     }
768   }
769 
TEST(ROUNDU__SSE41,negative_snan)770   TEST(ROUNDU__SSE41, negative_snan) {
771     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
772     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
773     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
774       for (uint32_t i = 0; i < kBlockSize; i++) {
775         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
776       }
777       xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
778       for (uint32_t i = 0; i < kBlockSize; i++) {
779         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
780         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
781           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
782           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
783           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
784       }
785     }
786   }
787 
TEST(ROUNDU__SSE41,positive_snan_to_qnan)788   TEST(ROUNDU__SSE41, positive_snan_to_qnan) {
789     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
790     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
791     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
792       for (uint32_t i = 0; i < kBlockSize; i++) {
793         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
794       }
795       xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
796       for (uint32_t i = 0; i < kBlockSize; i++) {
797         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
798         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
799           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
800           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
801           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
802       }
803     }
804   }
805 
TEST(ROUNDU__SSE41,negative_snan_to_qnan)806   TEST(ROUNDU__SSE41, negative_snan_to_qnan) {
807     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
808     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
809     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
810       for (uint32_t i = 0; i < kBlockSize; i++) {
811         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
812       }
813       xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
814       for (uint32_t i = 0; i < kBlockSize; i++) {
815         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
816         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
817           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
818           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
819           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
820       }
821     }
822   }
823 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
824 
825 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDU__NEON_ADDSUB,positive_zero)826   TEST(ROUNDU__NEON_ADDSUB, positive_zero) {
827     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
828     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
829     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
830     xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
831     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
832     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
833       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
834       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
835       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
836   }
837 
TEST(ROUNDU__NEON_ADDSUB,negative_zero)838   TEST(ROUNDU__NEON_ADDSUB, negative_zero) {
839     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
840     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
841     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
842     xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
843     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
844     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
845       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
846       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
847       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
848   }
849 
TEST(ROUNDU__NEON_ADDSUB,positive_subnormal)850   TEST(ROUNDU__NEON_ADDSUB, positive_subnormal) {
851     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
852     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
853     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
854       for (uint32_t i = 0; i < kBlockSize; i++) {
855         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
856       }
857       xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
858       for (uint32_t i = 0; i < kBlockSize; i++) {
859         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
860         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
861           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
862           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
863           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
864       }
865     }
866   }
867 
TEST(ROUNDU__NEON_ADDSUB,negative_subnormal)868   TEST(ROUNDU__NEON_ADDSUB, negative_subnormal) {
869     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
870     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
871     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
872       for (uint32_t i = 0; i < kBlockSize; i++) {
873         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
874       }
875       xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
876       for (uint32_t i = 0; i < kBlockSize; i++) {
877         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
878         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
879           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
880           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
881           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
882       }
883     }
884   }
885 
TEST(ROUNDU__NEON_ADDSUB,positive_normal)886   TEST(ROUNDU__NEON_ADDSUB, positive_normal) {
887     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
888     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
889     for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
890       for (uint32_t i = 0; i < kBlockSize; i++) {
891         inputs[i] = uint32_as_float(n + i);
892       }
893       xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
894       for (uint32_t i = 0; i < kBlockSize; i++) {
895         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
896         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
897           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
898           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
899           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
900       }
901     }
902   }
903 
TEST(ROUNDU__NEON_ADDSUB,negative_normal)904   TEST(ROUNDU__NEON_ADDSUB, negative_normal) {
905     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
906     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
907     for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
908       for (uint32_t i = 0; i < kBlockSize; i++) {
909         inputs[i] = uint32_as_float(n + i);
910       }
911       xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
912       for (uint32_t i = 0; i < kBlockSize; i++) {
913         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
914         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
915           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
916           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
917           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
918       }
919     }
920   }
921 
TEST(ROUNDU__NEON_ADDSUB,positive_integral)922   TEST(ROUNDU__NEON_ADDSUB, positive_integral) {
923     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
924     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
925     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
926       for (uint32_t i = 0; i < kBlockSize; i++) {
927         inputs[i] = uint32_as_float(n + i);
928       }
929       xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
930       for (uint32_t i = 0; i < kBlockSize; i++) {
931         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
932         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
933           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
934           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
935           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
936       }
937     }
938   }
939 
TEST(ROUNDU__NEON_ADDSUB,negative_integral)940   TEST(ROUNDU__NEON_ADDSUB, negative_integral) {
941     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
942     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
943     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
944       for (uint32_t i = 0; i < kBlockSize; i++) {
945         inputs[i] = uint32_as_float(n + i);
946       }
947       xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
948       for (uint32_t i = 0; i < kBlockSize; i++) {
949         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
950         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
951           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
952           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
953           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
954       }
955     }
956   }
957 
TEST(ROUNDU__NEON_ADDSUB,positive_infinity)958   TEST(ROUNDU__NEON_ADDSUB, positive_infinity) {
959     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
960     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
961     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
962     xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
963     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
964     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
965       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
966       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
967       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
968   }
969 
TEST(ROUNDU__NEON_ADDSUB,negative_infinity)970   TEST(ROUNDU__NEON_ADDSUB, negative_infinity) {
971     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
972     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
973     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
974     xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
975     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
976     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
977       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
978       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
979       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
980   }
981 
TEST(ROUNDU__NEON_ADDSUB,positive_qnan)982   TEST(ROUNDU__NEON_ADDSUB, positive_qnan) {
983     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
984     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
985     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
986       for (uint32_t i = 0; i < kBlockSize; i++) {
987         inputs[i] = uint32_as_float(n + i);
988       }
989       xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
990       for (uint32_t i = 0; i < kBlockSize; i++) {
991         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
992         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
993           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
994           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
995           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
996       }
997     }
998   }
999 
TEST(ROUNDU__NEON_ADDSUB,negative_qnan)1000   TEST(ROUNDU__NEON_ADDSUB, negative_qnan) {
1001     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1002     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1003     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1004       for (uint32_t i = 0; i < kBlockSize; i++) {
1005         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1006       }
1007       xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1008       for (uint32_t i = 0; i < kBlockSize; i++) {
1009         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1010         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1011           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1012           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1013           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1014       }
1015     }
1016   }
1017 
TEST(ROUNDU__NEON_ADDSUB,positive_snan)1018   TEST(ROUNDU__NEON_ADDSUB, positive_snan) {
1019     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1020     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1021     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1022       for (uint32_t i = 0; i < kBlockSize; i++) {
1023         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1024       }
1025       xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1026       for (uint32_t i = 0; i < kBlockSize; i++) {
1027         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1028         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1029           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1030           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1031           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1032       }
1033     }
1034   }
1035 
TEST(ROUNDU__NEON_ADDSUB,negative_snan)1036   TEST(ROUNDU__NEON_ADDSUB, negative_snan) {
1037     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1038     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1039     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1040       for (uint32_t i = 0; i < kBlockSize; i++) {
1041         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1042       }
1043       xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1044       for (uint32_t i = 0; i < kBlockSize; i++) {
1045         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1046         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1047           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1048           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1049           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1050       }
1051     }
1052   }
1053 
TEST(ROUNDU__NEON_ADDSUB,positive_snan_to_qnan)1054   TEST(ROUNDU__NEON_ADDSUB, positive_snan_to_qnan) {
1055     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1056     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1057     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1058       for (uint32_t i = 0; i < kBlockSize; i++) {
1059         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1060       }
1061       xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1062       for (uint32_t i = 0; i < kBlockSize; i++) {
1063         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1064         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1065           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1066           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1067           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1068       }
1069     }
1070   }
1071 
TEST(ROUNDU__NEON_ADDSUB,negative_snan_to_qnan)1072   TEST(ROUNDU__NEON_ADDSUB, negative_snan_to_qnan) {
1073     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1074     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1075     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1076       for (uint32_t i = 0; i < kBlockSize; i++) {
1077         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1078       }
1079       xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1080       for (uint32_t i = 0; i < kBlockSize; i++) {
1081         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1082         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1083           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1084           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1085           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1086       }
1087     }
1088   }
1089 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1090 
1091 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDU__NEON_CVT,positive_zero)1092   TEST(ROUNDU__NEON_CVT, positive_zero) {
1093     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1094     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1095     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
1096     xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1097     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1098     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1099       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1100       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1101       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1102   }
1103 
TEST(ROUNDU__NEON_CVT,negative_zero)1104   TEST(ROUNDU__NEON_CVT, negative_zero) {
1105     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1106     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1107     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
1108     xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1109     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1110     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1111       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1112       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1113       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1114   }
1115 
TEST(ROUNDU__NEON_CVT,positive_subnormal)1116   TEST(ROUNDU__NEON_CVT, positive_subnormal) {
1117     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1118     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1119     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
1120       for (uint32_t i = 0; i < kBlockSize; i++) {
1121         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
1122       }
1123       xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1124       for (uint32_t i = 0; i < kBlockSize; i++) {
1125         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1126         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1127           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1128           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1129           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1130       }
1131     }
1132   }
1133 
TEST(ROUNDU__NEON_CVT,negative_subnormal)1134   TEST(ROUNDU__NEON_CVT, negative_subnormal) {
1135     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1136     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1137     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
1138       for (uint32_t i = 0; i < kBlockSize; i++) {
1139         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
1140       }
1141       xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1142       for (uint32_t i = 0; i < kBlockSize; i++) {
1143         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1144         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1145           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1146           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1147           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1148       }
1149     }
1150   }
1151 
TEST(ROUNDU__NEON_CVT,positive_normal)1152   TEST(ROUNDU__NEON_CVT, positive_normal) {
1153     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1154     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1155     for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1156       for (uint32_t i = 0; i < kBlockSize; i++) {
1157         inputs[i] = uint32_as_float(n + i);
1158       }
1159       xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1160       for (uint32_t i = 0; i < kBlockSize; i++) {
1161         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1162         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1163           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1164           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1165           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1166       }
1167     }
1168   }
1169 
TEST(ROUNDU__NEON_CVT,negative_normal)1170   TEST(ROUNDU__NEON_CVT, negative_normal) {
1171     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1172     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1173     for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1174       for (uint32_t i = 0; i < kBlockSize; i++) {
1175         inputs[i] = uint32_as_float(n + i);
1176       }
1177       xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1178       for (uint32_t i = 0; i < kBlockSize; i++) {
1179         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1180         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1181           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1182           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1183           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1184       }
1185     }
1186   }
1187 
TEST(ROUNDU__NEON_CVT,positive_integral)1188   TEST(ROUNDU__NEON_CVT, positive_integral) {
1189     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1190     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1191     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1192       for (uint32_t i = 0; i < kBlockSize; i++) {
1193         inputs[i] = uint32_as_float(n + i);
1194       }
1195       xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1196       for (uint32_t i = 0; i < kBlockSize; i++) {
1197         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1198         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1199           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1200           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1201           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1202       }
1203     }
1204   }
1205 
TEST(ROUNDU__NEON_CVT,negative_integral)1206   TEST(ROUNDU__NEON_CVT, negative_integral) {
1207     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1208     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1209     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1210       for (uint32_t i = 0; i < kBlockSize; i++) {
1211         inputs[i] = uint32_as_float(n + i);
1212       }
1213       xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1214       for (uint32_t i = 0; i < kBlockSize; i++) {
1215         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1216         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1217           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1218           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1219           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1220       }
1221     }
1222   }
1223 
TEST(ROUNDU__NEON_CVT,positive_infinity)1224   TEST(ROUNDU__NEON_CVT, positive_infinity) {
1225     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1226     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1227     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1228     xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1229     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1230     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1231       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1232       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1233       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1234   }
1235 
TEST(ROUNDU__NEON_CVT,negative_infinity)1236   TEST(ROUNDU__NEON_CVT, negative_infinity) {
1237     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1238     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1239     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1240     xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1241     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1242     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1243       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1244       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1245       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1246   }
1247 
TEST(ROUNDU__NEON_CVT,positive_qnan)1248   TEST(ROUNDU__NEON_CVT, positive_qnan) {
1249     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1250     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1251     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1252       for (uint32_t i = 0; i < kBlockSize; i++) {
1253         inputs[i] = uint32_as_float(n + i);
1254       }
1255       xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1256       for (uint32_t i = 0; i < kBlockSize; i++) {
1257         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1258         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1259           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1260           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1261           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1262       }
1263     }
1264   }
1265 
TEST(ROUNDU__NEON_CVT,negative_qnan)1266   TEST(ROUNDU__NEON_CVT, negative_qnan) {
1267     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1268     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1269     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1270       for (uint32_t i = 0; i < kBlockSize; i++) {
1271         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1272       }
1273       xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1274       for (uint32_t i = 0; i < kBlockSize; i++) {
1275         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1276         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1277           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1278           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1279           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1280       }
1281     }
1282   }
1283 
TEST(ROUNDU__NEON_CVT,positive_snan)1284   TEST(ROUNDU__NEON_CVT, positive_snan) {
1285     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1286     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1287     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1288       for (uint32_t i = 0; i < kBlockSize; i++) {
1289         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1290       }
1291       xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1292       for (uint32_t i = 0; i < kBlockSize; i++) {
1293         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1294         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1295           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1296           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1297           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1298       }
1299     }
1300   }
1301 
TEST(ROUNDU__NEON_CVT,negative_snan)1302   TEST(ROUNDU__NEON_CVT, negative_snan) {
1303     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1304     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1305     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1306       for (uint32_t i = 0; i < kBlockSize; i++) {
1307         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1308       }
1309       xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1310       for (uint32_t i = 0; i < kBlockSize; i++) {
1311         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1312         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1313           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1314           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1315           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1316       }
1317     }
1318   }
1319 
TEST(ROUNDU__NEON_CVT,positive_snan_to_qnan)1320   TEST(ROUNDU__NEON_CVT, positive_snan_to_qnan) {
1321     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1322     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1323     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1324       for (uint32_t i = 0; i < kBlockSize; i++) {
1325         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1326       }
1327       xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1328       for (uint32_t i = 0; i < kBlockSize; i++) {
1329         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1330         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1331           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1332           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1333           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1334       }
1335     }
1336   }
1337 
TEST(ROUNDU__NEON_CVT,negative_snan_to_qnan)1338   TEST(ROUNDU__NEON_CVT, negative_snan_to_qnan) {
1339     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1340     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1341     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1342       for (uint32_t i = 0; i < kBlockSize; i++) {
1343         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1344       }
1345       xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1346       for (uint32_t i = 0; i < kBlockSize; i++) {
1347         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1348         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1349           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1350           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1351           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1352       }
1353     }
1354   }
1355 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1356 
1357 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDU__NEONV8,positive_zero)1358   TEST(ROUNDU__NEONV8, positive_zero) {
1359     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1360     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1361     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
1362     xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1363     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1364     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1365       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1366       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1367       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1368   }
1369 
TEST(ROUNDU__NEONV8,negative_zero)1370   TEST(ROUNDU__NEONV8, negative_zero) {
1371     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1372     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1373     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
1374     xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1375     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1376     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1377       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1378       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1379       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1380   }
1381 
TEST(ROUNDU__NEONV8,positive_subnormal)1382   TEST(ROUNDU__NEONV8, positive_subnormal) {
1383     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1384     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1385     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
1386       for (uint32_t i = 0; i < kBlockSize; i++) {
1387         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
1388       }
1389       xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1390       for (uint32_t i = 0; i < kBlockSize; i++) {
1391         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1392         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1393           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1394           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1395           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1396       }
1397     }
1398   }
1399 
TEST(ROUNDU__NEONV8,negative_subnormal)1400   TEST(ROUNDU__NEONV8, negative_subnormal) {
1401     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1402     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1403     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
1404       for (uint32_t i = 0; i < kBlockSize; i++) {
1405         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
1406       }
1407       xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1408       for (uint32_t i = 0; i < kBlockSize; i++) {
1409         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1410         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1411           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1412           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1413           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1414       }
1415     }
1416   }
1417 
TEST(ROUNDU__NEONV8,positive_normal)1418   TEST(ROUNDU__NEONV8, positive_normal) {
1419     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1420     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1421     for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1422       for (uint32_t i = 0; i < kBlockSize; i++) {
1423         inputs[i] = uint32_as_float(n + i);
1424       }
1425       xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1426       for (uint32_t i = 0; i < kBlockSize; i++) {
1427         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1428         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1429           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1430           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1431           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1432       }
1433     }
1434   }
1435 
TEST(ROUNDU__NEONV8,negative_normal)1436   TEST(ROUNDU__NEONV8, negative_normal) {
1437     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1438     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1439     for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1440       for (uint32_t i = 0; i < kBlockSize; i++) {
1441         inputs[i] = uint32_as_float(n + i);
1442       }
1443       xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1444       for (uint32_t i = 0; i < kBlockSize; i++) {
1445         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1446         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1447           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1448           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1449           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1450       }
1451     }
1452   }
1453 
TEST(ROUNDU__NEONV8,positive_integral)1454   TEST(ROUNDU__NEONV8, positive_integral) {
1455     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1456     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1457     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1458       for (uint32_t i = 0; i < kBlockSize; i++) {
1459         inputs[i] = uint32_as_float(n + i);
1460       }
1461       xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1462       for (uint32_t i = 0; i < kBlockSize; i++) {
1463         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1464         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1465           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1466           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1467           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1468       }
1469     }
1470   }
1471 
TEST(ROUNDU__NEONV8,negative_integral)1472   TEST(ROUNDU__NEONV8, negative_integral) {
1473     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1474     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1475     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1476       for (uint32_t i = 0; i < kBlockSize; i++) {
1477         inputs[i] = uint32_as_float(n + i);
1478       }
1479       xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1480       for (uint32_t i = 0; i < kBlockSize; i++) {
1481         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1482         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1483           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1484           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1485           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1486       }
1487     }
1488   }
1489 
TEST(ROUNDU__NEONV8,positive_infinity)1490   TEST(ROUNDU__NEONV8, positive_infinity) {
1491     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1492     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1493     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1494     xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1495     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1496     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1497       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1498       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1499       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1500   }
1501 
TEST(ROUNDU__NEONV8,negative_infinity)1502   TEST(ROUNDU__NEONV8, negative_infinity) {
1503     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1504     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1505     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1506     xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1507     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1508     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1509       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1510       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1511       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1512   }
1513 
TEST(ROUNDU__NEONV8,positive_qnan)1514   TEST(ROUNDU__NEONV8, positive_qnan) {
1515     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1516     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1517     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1518       for (uint32_t i = 0; i < kBlockSize; i++) {
1519         inputs[i] = uint32_as_float(n + i);
1520       }
1521       xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1522       for (uint32_t i = 0; i < kBlockSize; i++) {
1523         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1524         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1525           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1526           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1527           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1528       }
1529     }
1530   }
1531 
TEST(ROUNDU__NEONV8,negative_qnan)1532   TEST(ROUNDU__NEONV8, negative_qnan) {
1533     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1534     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1535     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1536       for (uint32_t i = 0; i < kBlockSize; i++) {
1537         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1538       }
1539       xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1540       for (uint32_t i = 0; i < kBlockSize; i++) {
1541         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1542         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1543           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1544           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1545           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1546       }
1547     }
1548   }
1549 
TEST(ROUNDU__NEONV8,positive_snan)1550   TEST(ROUNDU__NEONV8, positive_snan) {
1551     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1552     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1553     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1554       for (uint32_t i = 0; i < kBlockSize; i++) {
1555         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1556       }
1557       xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1558       for (uint32_t i = 0; i < kBlockSize; i++) {
1559         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1560         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1561           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1562           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1563           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1564       }
1565     }
1566   }
1567 
TEST(ROUNDU__NEONV8,negative_snan)1568   TEST(ROUNDU__NEONV8, negative_snan) {
1569     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1570     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1571     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1572       for (uint32_t i = 0; i < kBlockSize; i++) {
1573         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1574       }
1575       xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1576       for (uint32_t i = 0; i < kBlockSize; i++) {
1577         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1578         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1579           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1580           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1581           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1582       }
1583     }
1584   }
1585 
TEST(ROUNDU__NEONV8,positive_snan_to_qnan)1586   TEST(ROUNDU__NEONV8, positive_snan_to_qnan) {
1587     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1588     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1589     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1590       for (uint32_t i = 0; i < kBlockSize; i++) {
1591         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1592       }
1593       xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1594       for (uint32_t i = 0; i < kBlockSize; i++) {
1595         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1596         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1597           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1598           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1599           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1600       }
1601     }
1602   }
1603 
TEST(ROUNDU__NEONV8,negative_snan_to_qnan)1604   TEST(ROUNDU__NEONV8, negative_snan_to_qnan) {
1605     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1606     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1607     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1608       for (uint32_t i = 0; i < kBlockSize; i++) {
1609         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1610       }
1611       xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1612       for (uint32_t i = 0; i < kBlockSize; i++) {
1613         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1614         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1615           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1616           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1617           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1618       }
1619     }
1620   }
1621 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1622 
1623 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(ROUNDU__WASMSIMD_ADDSUB,positive_zero)1624   TEST(ROUNDU__WASMSIMD_ADDSUB, positive_zero) {
1625     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1626     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1627     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
1628     xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1629     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1630     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1631       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1632       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1633       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1634   }
1635 
TEST(ROUNDU__WASMSIMD_ADDSUB,negative_zero)1636   TEST(ROUNDU__WASMSIMD_ADDSUB, negative_zero) {
1637     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1638     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1639     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
1640     xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1641     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1642     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1643       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1644       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1645       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1646   }
1647 
TEST(ROUNDU__WASMSIMD_ADDSUB,positive_subnormal)1648   TEST(ROUNDU__WASMSIMD_ADDSUB, positive_subnormal) {
1649     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1650     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1651     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
1652       for (uint32_t i = 0; i < kBlockSize; i++) {
1653         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
1654       }
1655       xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1656       for (uint32_t i = 0; i < kBlockSize; i++) {
1657         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1658         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1659           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1660           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1661           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1662       }
1663     }
1664   }
1665 
TEST(ROUNDU__WASMSIMD_ADDSUB,negative_subnormal)1666   TEST(ROUNDU__WASMSIMD_ADDSUB, negative_subnormal) {
1667     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1668     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1669     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
1670       for (uint32_t i = 0; i < kBlockSize; i++) {
1671         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
1672       }
1673       xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1674       for (uint32_t i = 0; i < kBlockSize; i++) {
1675         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1676         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1677           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1678           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1679           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1680       }
1681     }
1682   }
1683 
TEST(ROUNDU__WASMSIMD_ADDSUB,positive_normal)1684   TEST(ROUNDU__WASMSIMD_ADDSUB, positive_normal) {
1685     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1686     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1687     for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1688       for (uint32_t i = 0; i < kBlockSize; i++) {
1689         inputs[i] = uint32_as_float(n + i);
1690       }
1691       xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1692       for (uint32_t i = 0; i < kBlockSize; i++) {
1693         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1694         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1695           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1696           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1697           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1698       }
1699     }
1700   }
1701 
TEST(ROUNDU__WASMSIMD_ADDSUB,negative_normal)1702   TEST(ROUNDU__WASMSIMD_ADDSUB, negative_normal) {
1703     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1704     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1705     for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1706       for (uint32_t i = 0; i < kBlockSize; i++) {
1707         inputs[i] = uint32_as_float(n + i);
1708       }
1709       xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1710       for (uint32_t i = 0; i < kBlockSize; i++) {
1711         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1712         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1713           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1714           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1715           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1716       }
1717     }
1718   }
1719 
TEST(ROUNDU__WASMSIMD_ADDSUB,positive_integral)1720   TEST(ROUNDU__WASMSIMD_ADDSUB, positive_integral) {
1721     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1722     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1723     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1724       for (uint32_t i = 0; i < kBlockSize; i++) {
1725         inputs[i] = uint32_as_float(n + i);
1726       }
1727       xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1728       for (uint32_t i = 0; i < kBlockSize; i++) {
1729         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1730         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1731           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1732           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1733           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1734       }
1735     }
1736   }
1737 
TEST(ROUNDU__WASMSIMD_ADDSUB,negative_integral)1738   TEST(ROUNDU__WASMSIMD_ADDSUB, negative_integral) {
1739     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1740     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1741     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1742       for (uint32_t i = 0; i < kBlockSize; i++) {
1743         inputs[i] = uint32_as_float(n + i);
1744       }
1745       xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1746       for (uint32_t i = 0; i < kBlockSize; i++) {
1747         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1748         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1749           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1750           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1751           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1752       }
1753     }
1754   }
1755 
TEST(ROUNDU__WASMSIMD_ADDSUB,positive_infinity)1756   TEST(ROUNDU__WASMSIMD_ADDSUB, positive_infinity) {
1757     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1758     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1759     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1760     xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1761     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1762     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1763       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1764       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1765       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1766   }
1767 
TEST(ROUNDU__WASMSIMD_ADDSUB,negative_infinity)1768   TEST(ROUNDU__WASMSIMD_ADDSUB, negative_infinity) {
1769     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1770     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1771     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1772     xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1773     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1774     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1775       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1776       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1777       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1778   }
1779 
TEST(ROUNDU__WASMSIMD_ADDSUB,positive_qnan)1780   TEST(ROUNDU__WASMSIMD_ADDSUB, positive_qnan) {
1781     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1782     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1783     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1784       for (uint32_t i = 0; i < kBlockSize; i++) {
1785         inputs[i] = uint32_as_float(n + i);
1786       }
1787       xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1788       for (uint32_t i = 0; i < kBlockSize; i++) {
1789         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1790         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1791           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1792           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1793           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1794       }
1795     }
1796   }
1797 
TEST(ROUNDU__WASMSIMD_ADDSUB,negative_qnan)1798   TEST(ROUNDU__WASMSIMD_ADDSUB, negative_qnan) {
1799     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1800     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1801     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1802       for (uint32_t i = 0; i < kBlockSize; i++) {
1803         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1804       }
1805       xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1806       for (uint32_t i = 0; i < kBlockSize; i++) {
1807         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1808         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1809           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1810           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1811           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1812       }
1813     }
1814   }
1815 
TEST(ROUNDU__WASMSIMD_ADDSUB,positive_snan)1816   TEST(ROUNDU__WASMSIMD_ADDSUB, positive_snan) {
1817     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1818     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1819     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1820       for (uint32_t i = 0; i < kBlockSize; i++) {
1821         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1822       }
1823       xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1824       for (uint32_t i = 0; i < kBlockSize; i++) {
1825         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1826         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1827           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1828           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1829           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1830       }
1831     }
1832   }
1833 
TEST(ROUNDU__WASMSIMD_ADDSUB,negative_snan)1834   TEST(ROUNDU__WASMSIMD_ADDSUB, negative_snan) {
1835     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1836     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1837     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1838       for (uint32_t i = 0; i < kBlockSize; i++) {
1839         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1840       }
1841       xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1842       for (uint32_t i = 0; i < kBlockSize; i++) {
1843         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1844         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1845           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1846           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1847           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1848       }
1849     }
1850   }
1851 
TEST(ROUNDU__WASMSIMD_ADDSUB,positive_snan_to_qnan)1852   TEST(ROUNDU__WASMSIMD_ADDSUB, positive_snan_to_qnan) {
1853     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1854     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1855     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1856       for (uint32_t i = 0; i < kBlockSize; i++) {
1857         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1858       }
1859       xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1860       for (uint32_t i = 0; i < kBlockSize; i++) {
1861         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1862         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1863           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1864           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1865           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1866       }
1867     }
1868   }
1869 
TEST(ROUNDU__WASMSIMD_ADDSUB,negative_snan_to_qnan)1870   TEST(ROUNDU__WASMSIMD_ADDSUB, negative_snan_to_qnan) {
1871     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1872     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1873     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1874       for (uint32_t i = 0; i < kBlockSize; i++) {
1875         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1876       }
1877       xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1878       for (uint32_t i = 0; i < kBlockSize; i++) {
1879         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1880         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1881           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1882           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1883           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1884       }
1885     }
1886   }
1887 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1888 
1889 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(ROUNDU__WASMSIMD_CVT,positive_zero)1890   TEST(ROUNDU__WASMSIMD_CVT, positive_zero) {
1891     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1892     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1893     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
1894     xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1895     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1896     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1897       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1898       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1899       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1900   }
1901 
TEST(ROUNDU__WASMSIMD_CVT,negative_zero)1902   TEST(ROUNDU__WASMSIMD_CVT, negative_zero) {
1903     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1904     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1905     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
1906     xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1907     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1908     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1909       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1910       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1911       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1912   }
1913 
TEST(ROUNDU__WASMSIMD_CVT,positive_subnormal)1914   TEST(ROUNDU__WASMSIMD_CVT, positive_subnormal) {
1915     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1916     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1917     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
1918       for (uint32_t i = 0; i < kBlockSize; i++) {
1919         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
1920       }
1921       xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1922       for (uint32_t i = 0; i < kBlockSize; i++) {
1923         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1924         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1925           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1926           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1927           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1928       }
1929     }
1930   }
1931 
TEST(ROUNDU__WASMSIMD_CVT,negative_subnormal)1932   TEST(ROUNDU__WASMSIMD_CVT, negative_subnormal) {
1933     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1934     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1935     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
1936       for (uint32_t i = 0; i < kBlockSize; i++) {
1937         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
1938       }
1939       xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1940       for (uint32_t i = 0; i < kBlockSize; i++) {
1941         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1942         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1943           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1944           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1945           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1946       }
1947     }
1948   }
1949 
TEST(ROUNDU__WASMSIMD_CVT,positive_normal)1950   TEST(ROUNDU__WASMSIMD_CVT, positive_normal) {
1951     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1952     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1953     for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1954       for (uint32_t i = 0; i < kBlockSize; i++) {
1955         inputs[i] = uint32_as_float(n + i);
1956       }
1957       xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1958       for (uint32_t i = 0; i < kBlockSize; i++) {
1959         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1960         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1961           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1962           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1963           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1964       }
1965     }
1966   }
1967 
TEST(ROUNDU__WASMSIMD_CVT,negative_normal)1968   TEST(ROUNDU__WASMSIMD_CVT, negative_normal) {
1969     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1970     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1971     for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1972       for (uint32_t i = 0; i < kBlockSize; i++) {
1973         inputs[i] = uint32_as_float(n + i);
1974       }
1975       xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1976       for (uint32_t i = 0; i < kBlockSize; i++) {
1977         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1978         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1979           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1980           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1981           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1982       }
1983     }
1984   }
1985 
TEST(ROUNDU__WASMSIMD_CVT,positive_integral)1986   TEST(ROUNDU__WASMSIMD_CVT, positive_integral) {
1987     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1988     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1989     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1990       for (uint32_t i = 0; i < kBlockSize; i++) {
1991         inputs[i] = uint32_as_float(n + i);
1992       }
1993       xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1994       for (uint32_t i = 0; i < kBlockSize; i++) {
1995         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1996         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1997           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1998           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1999           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2000       }
2001     }
2002   }
2003 
TEST(ROUNDU__WASMSIMD_CVT,negative_integral)2004   TEST(ROUNDU__WASMSIMD_CVT, negative_integral) {
2005     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2006     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2007     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2008       for (uint32_t i = 0; i < kBlockSize; i++) {
2009         inputs[i] = uint32_as_float(n + i);
2010       }
2011       xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2012       for (uint32_t i = 0; i < kBlockSize; i++) {
2013         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2014         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2015           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2016           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2017           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2018       }
2019     }
2020   }
2021 
TEST(ROUNDU__WASMSIMD_CVT,positive_infinity)2022   TEST(ROUNDU__WASMSIMD_CVT, positive_infinity) {
2023     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2024     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2025     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
2026     xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2027     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2028     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2029       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2030       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2031       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2032   }
2033 
TEST(ROUNDU__WASMSIMD_CVT,negative_infinity)2034   TEST(ROUNDU__WASMSIMD_CVT, negative_infinity) {
2035     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2036     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2037     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
2038     xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2039     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2040     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2041       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2042       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2043       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2044   }
2045 
TEST(ROUNDU__WASMSIMD_CVT,positive_qnan)2046   TEST(ROUNDU__WASMSIMD_CVT, positive_qnan) {
2047     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2048     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2049     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2050       for (uint32_t i = 0; i < kBlockSize; i++) {
2051         inputs[i] = uint32_as_float(n + i);
2052       }
2053       xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2054       for (uint32_t i = 0; i < kBlockSize; i++) {
2055         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2056         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2057           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2058           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2059           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2060       }
2061     }
2062   }
2063 
TEST(ROUNDU__WASMSIMD_CVT,negative_qnan)2064   TEST(ROUNDU__WASMSIMD_CVT, negative_qnan) {
2065     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2066     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2067     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2068       for (uint32_t i = 0; i < kBlockSize; i++) {
2069         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
2070       }
2071       xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2072       for (uint32_t i = 0; i < kBlockSize; i++) {
2073         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2074         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2075           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2076           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2077           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2078       }
2079     }
2080   }
2081 
TEST(ROUNDU__WASMSIMD_CVT,positive_snan)2082   TEST(ROUNDU__WASMSIMD_CVT, positive_snan) {
2083     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2084     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2085     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2086       for (uint32_t i = 0; i < kBlockSize; i++) {
2087         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2088       }
2089       xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2090       for (uint32_t i = 0; i < kBlockSize; i++) {
2091         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2092         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2093           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2094           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2095           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2096       }
2097     }
2098   }
2099 
TEST(ROUNDU__WASMSIMD_CVT,negative_snan)2100   TEST(ROUNDU__WASMSIMD_CVT, negative_snan) {
2101     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2102     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2103     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2104       for (uint32_t i = 0; i < kBlockSize; i++) {
2105         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2106       }
2107       xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2108       for (uint32_t i = 0; i < kBlockSize; i++) {
2109         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2110         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2111           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2112           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2113           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2114       }
2115     }
2116   }
2117 
TEST(ROUNDU__WASMSIMD_CVT,positive_snan_to_qnan)2118   TEST(ROUNDU__WASMSIMD_CVT, positive_snan_to_qnan) {
2119     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2120     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2121     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2122       for (uint32_t i = 0; i < kBlockSize; i++) {
2123         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2124       }
2125       xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2126       for (uint32_t i = 0; i < kBlockSize; i++) {
2127         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2128         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2129           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2130           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2131           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2132       }
2133     }
2134   }
2135 
TEST(ROUNDU__WASMSIMD_CVT,negative_snan_to_qnan)2136   TEST(ROUNDU__WASMSIMD_CVT, negative_snan_to_qnan) {
2137     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2138     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2139     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2140       for (uint32_t i = 0; i < kBlockSize; i++) {
2141         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2142       }
2143       xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2144       for (uint32_t i = 0; i < kBlockSize; i++) {
2145         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2146         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2147           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2148           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2149           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2150       }
2151     }
2152   }
2153 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2154 
2155 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(ROUNDU__WASMSIMD_NATIVE,positive_zero)2156   TEST(ROUNDU__WASMSIMD_NATIVE, positive_zero) {
2157     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2158     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2159     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
2160     xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2161     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2162     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2163       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2164       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2165       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2166   }
2167 
TEST(ROUNDU__WASMSIMD_NATIVE,negative_zero)2168   TEST(ROUNDU__WASMSIMD_NATIVE, negative_zero) {
2169     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2170     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2171     std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
2172     xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2173     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2174     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2175       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2176       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2177       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2178   }
2179 
TEST(ROUNDU__WASMSIMD_NATIVE,positive_subnormal)2180   TEST(ROUNDU__WASMSIMD_NATIVE, positive_subnormal) {
2181     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2182     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2183     for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
2184       for (uint32_t i = 0; i < kBlockSize; i++) {
2185         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
2186       }
2187       xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2188       for (uint32_t i = 0; i < kBlockSize; i++) {
2189         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2190         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2191           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2192           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2193           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2194       }
2195     }
2196   }
2197 
TEST(ROUNDU__WASMSIMD_NATIVE,negative_subnormal)2198   TEST(ROUNDU__WASMSIMD_NATIVE, negative_subnormal) {
2199     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2200     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2201     for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
2202       for (uint32_t i = 0; i < kBlockSize; i++) {
2203         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
2204       }
2205       xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2206       for (uint32_t i = 0; i < kBlockSize; i++) {
2207         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2208         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2209           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2210           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2211           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2212       }
2213     }
2214   }
2215 
TEST(ROUNDU__WASMSIMD_NATIVE,positive_normal)2216   TEST(ROUNDU__WASMSIMD_NATIVE, positive_normal) {
2217     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2218     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2219     for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
2220       for (uint32_t i = 0; i < kBlockSize; i++) {
2221         inputs[i] = uint32_as_float(n + i);
2222       }
2223       xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2224       for (uint32_t i = 0; i < kBlockSize; i++) {
2225         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2226         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2227           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2228           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2229           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2230       }
2231     }
2232   }
2233 
TEST(ROUNDU__WASMSIMD_NATIVE,negative_normal)2234   TEST(ROUNDU__WASMSIMD_NATIVE, negative_normal) {
2235     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2236     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2237     for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
2238       for (uint32_t i = 0; i < kBlockSize; i++) {
2239         inputs[i] = uint32_as_float(n + i);
2240       }
2241       xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2242       for (uint32_t i = 0; i < kBlockSize; i++) {
2243         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2244         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2245           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2246           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2247           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2248       }
2249     }
2250   }
2251 
TEST(ROUNDU__WASMSIMD_NATIVE,positive_integral)2252   TEST(ROUNDU__WASMSIMD_NATIVE, positive_integral) {
2253     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2254     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2255     for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
2256       for (uint32_t i = 0; i < kBlockSize; i++) {
2257         inputs[i] = uint32_as_float(n + i);
2258       }
2259       xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2260       for (uint32_t i = 0; i < kBlockSize; i++) {
2261         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2262         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2263           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2264           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2265           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2266       }
2267     }
2268   }
2269 
TEST(ROUNDU__WASMSIMD_NATIVE,negative_integral)2270   TEST(ROUNDU__WASMSIMD_NATIVE, negative_integral) {
2271     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2272     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2273     for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2274       for (uint32_t i = 0; i < kBlockSize; i++) {
2275         inputs[i] = uint32_as_float(n + i);
2276       }
2277       xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2278       for (uint32_t i = 0; i < kBlockSize; i++) {
2279         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2280         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2281           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2282           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2283           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2284       }
2285     }
2286   }
2287 
TEST(ROUNDU__WASMSIMD_NATIVE,positive_infinity)2288   TEST(ROUNDU__WASMSIMD_NATIVE, positive_infinity) {
2289     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2290     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2291     std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
2292     xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2293     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2294     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2295       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2296       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2297       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2298   }
2299 
TEST(ROUNDU__WASMSIMD_NATIVE,negative_infinity)2300   TEST(ROUNDU__WASMSIMD_NATIVE, negative_infinity) {
2301     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2302     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2303     std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
2304     xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2305     const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2306     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2307       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2308       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2309       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2310   }
2311 
TEST(ROUNDU__WASMSIMD_NATIVE,positive_qnan)2312   TEST(ROUNDU__WASMSIMD_NATIVE, positive_qnan) {
2313     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2314     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2315     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2316       for (uint32_t i = 0; i < kBlockSize; i++) {
2317         inputs[i] = uint32_as_float(n + i);
2318       }
2319       xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2320       for (uint32_t i = 0; i < kBlockSize; i++) {
2321         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2322         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2323           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2324           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2325           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2326       }
2327     }
2328   }
2329 
TEST(ROUNDU__WASMSIMD_NATIVE,negative_qnan)2330   TEST(ROUNDU__WASMSIMD_NATIVE, negative_qnan) {
2331     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2332     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2333     for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2334       for (uint32_t i = 0; i < kBlockSize; i++) {
2335         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
2336       }
2337       xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2338       for (uint32_t i = 0; i < kBlockSize; i++) {
2339         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2340         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2341           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2342           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2343           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2344       }
2345     }
2346   }
2347 
TEST(ROUNDU__WASMSIMD_NATIVE,positive_snan)2348   TEST(ROUNDU__WASMSIMD_NATIVE, positive_snan) {
2349     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2350     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2351     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2352       for (uint32_t i = 0; i < kBlockSize; i++) {
2353         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2354       }
2355       xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2356       for (uint32_t i = 0; i < kBlockSize; i++) {
2357         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2358         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2359           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2360           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2361           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2362       }
2363     }
2364   }
2365 
TEST(ROUNDU__WASMSIMD_NATIVE,negative_snan)2366   TEST(ROUNDU__WASMSIMD_NATIVE, negative_snan) {
2367     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2368     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2369     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2370       for (uint32_t i = 0; i < kBlockSize; i++) {
2371         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2372       }
2373       xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2374       for (uint32_t i = 0; i < kBlockSize; i++) {
2375         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2376         ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2377           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2378           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2379           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2380       }
2381     }
2382   }
2383 
TEST(ROUNDU__WASMSIMD_NATIVE,positive_snan_to_qnan)2384   TEST(ROUNDU__WASMSIMD_NATIVE, positive_snan_to_qnan) {
2385     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2386     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2387     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2388       for (uint32_t i = 0; i < kBlockSize; i++) {
2389         inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2390       }
2391       xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2392       for (uint32_t i = 0; i < kBlockSize; i++) {
2393         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2394         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2395           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2396           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2397           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2398       }
2399     }
2400   }
2401 
TEST(ROUNDU__WASMSIMD_NATIVE,negative_snan_to_qnan)2402   TEST(ROUNDU__WASMSIMD_NATIVE, negative_snan_to_qnan) {
2403     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2404     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2405     for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2406       for (uint32_t i = 0; i < kBlockSize; i++) {
2407         inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2408       }
2409       xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2410       for (uint32_t i = 0; i < kBlockSize; i++) {
2411         const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2412         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2413           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2414           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2415           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2416       }
2417     }
2418   }
2419 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2420 
TEST(ROUNDU__SCALAR_ADDSUB,positive_zero)2421 TEST(ROUNDU__SCALAR_ADDSUB, positive_zero) {
2422   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2423   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2424   std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
2425   xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2426   const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2427   ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2428     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2429     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2430     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2431 }
2432 
TEST(ROUNDU__SCALAR_ADDSUB,negative_zero)2433 TEST(ROUNDU__SCALAR_ADDSUB, negative_zero) {
2434   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2435   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2436   std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
2437   xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2438   const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2439   ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2440     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2441     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2442     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2443 }
2444 
TEST(ROUNDU__SCALAR_ADDSUB,positive_subnormal)2445 TEST(ROUNDU__SCALAR_ADDSUB, positive_subnormal) {
2446   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2447   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2448   for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
2449     for (uint32_t i = 0; i < kBlockSize; i++) {
2450       inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
2451     }
2452     xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2453     for (uint32_t i = 0; i < kBlockSize; i++) {
2454       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2455       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2456         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2457         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2458         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2459     }
2460   }
2461 }
2462 
TEST(ROUNDU__SCALAR_ADDSUB,negative_subnormal)2463 TEST(ROUNDU__SCALAR_ADDSUB, negative_subnormal) {
2464   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2465   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2466   for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
2467     for (uint32_t i = 0; i < kBlockSize; i++) {
2468       inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
2469     }
2470     xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2471     for (uint32_t i = 0; i < kBlockSize; i++) {
2472       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2473       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2474         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2475         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2476         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2477     }
2478   }
2479 }
2480 
TEST(ROUNDU__SCALAR_ADDSUB,positive_normal)2481 TEST(ROUNDU__SCALAR_ADDSUB, positive_normal) {
2482   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2483   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2484   for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
2485     for (uint32_t i = 0; i < kBlockSize; i++) {
2486       inputs[i] = uint32_as_float(n + i);
2487     }
2488     xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2489     for (uint32_t i = 0; i < kBlockSize; i++) {
2490       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2491       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2492         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2493         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2494         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2495     }
2496   }
2497 }
2498 
TEST(ROUNDU__SCALAR_ADDSUB,negative_normal)2499 TEST(ROUNDU__SCALAR_ADDSUB, negative_normal) {
2500   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2501   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2502   for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
2503     for (uint32_t i = 0; i < kBlockSize; i++) {
2504       inputs[i] = uint32_as_float(n + i);
2505     }
2506     xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2507     for (uint32_t i = 0; i < kBlockSize; i++) {
2508       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2509       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2510         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2511         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2512         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2513     }
2514   }
2515 }
2516 
TEST(ROUNDU__SCALAR_ADDSUB,positive_integral)2517 TEST(ROUNDU__SCALAR_ADDSUB, positive_integral) {
2518   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2519   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2520   for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
2521     for (uint32_t i = 0; i < kBlockSize; i++) {
2522       inputs[i] = uint32_as_float(n + i);
2523     }
2524     xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2525     for (uint32_t i = 0; i < kBlockSize; i++) {
2526       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2527       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2528         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2529         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2530         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2531     }
2532   }
2533 }
2534 
TEST(ROUNDU__SCALAR_ADDSUB,negative_integral)2535 TEST(ROUNDU__SCALAR_ADDSUB, negative_integral) {
2536   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2537   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2538   for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2539     for (uint32_t i = 0; i < kBlockSize; i++) {
2540       inputs[i] = uint32_as_float(n + i);
2541     }
2542     xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2543     for (uint32_t i = 0; i < kBlockSize; i++) {
2544       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2545       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2546         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2547         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2548         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2549     }
2550   }
2551 }
2552 
TEST(ROUNDU__SCALAR_ADDSUB,positive_infinity)2553 TEST(ROUNDU__SCALAR_ADDSUB, positive_infinity) {
2554   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2555   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2556   std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
2557   xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2558   const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2559   ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2560     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2561     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2562     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2563 }
2564 
TEST(ROUNDU__SCALAR_ADDSUB,negative_infinity)2565 TEST(ROUNDU__SCALAR_ADDSUB, negative_infinity) {
2566   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2567   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2568   std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
2569   xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2570   const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2571   ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2572     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2573     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2574     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2575 }
2576 
TEST(ROUNDU__SCALAR_ADDSUB,positive_qnan)2577 TEST(ROUNDU__SCALAR_ADDSUB, positive_qnan) {
2578   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2579   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2580   for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2581     for (uint32_t i = 0; i < kBlockSize; i++) {
2582       inputs[i] = uint32_as_float(n + i);
2583     }
2584     xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2585     for (uint32_t i = 0; i < kBlockSize; i++) {
2586       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2587       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2588         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2589         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2590         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2591     }
2592   }
2593 }
2594 
TEST(ROUNDU__SCALAR_ADDSUB,negative_qnan)2595 TEST(ROUNDU__SCALAR_ADDSUB, negative_qnan) {
2596   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2597   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2598   for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2599     for (uint32_t i = 0; i < kBlockSize; i++) {
2600       inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
2601     }
2602     xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2603     for (uint32_t i = 0; i < kBlockSize; i++) {
2604       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2605       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2606         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2607         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2608         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2609     }
2610   }
2611 }
2612 
TEST(ROUNDU__SCALAR_ADDSUB,positive_snan)2613 TEST(ROUNDU__SCALAR_ADDSUB, positive_snan) {
2614   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2615   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2616   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2617     for (uint32_t i = 0; i < kBlockSize; i++) {
2618       inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2619     }
2620     xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2621     for (uint32_t i = 0; i < kBlockSize; i++) {
2622       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2623       ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2624         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2625         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2626         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2627     }
2628   }
2629 }
2630 
TEST(ROUNDU__SCALAR_ADDSUB,negative_snan)2631 TEST(ROUNDU__SCALAR_ADDSUB, negative_snan) {
2632   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2633   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2634   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2635     for (uint32_t i = 0; i < kBlockSize; i++) {
2636       inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2637     }
2638     xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2639     for (uint32_t i = 0; i < kBlockSize; i++) {
2640       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2641       ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2642         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2643         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2644         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2645     }
2646   }
2647 }
2648 
TEST(ROUNDU__SCALAR_ADDSUB,positive_snan_to_qnan)2649 TEST(ROUNDU__SCALAR_ADDSUB, positive_snan_to_qnan) {
2650   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2651   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2652   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2653     for (uint32_t i = 0; i < kBlockSize; i++) {
2654       inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2655     }
2656     xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2657     for (uint32_t i = 0; i < kBlockSize; i++) {
2658       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2659       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2660         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2661         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2662         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2663     }
2664   }
2665 }
2666 
TEST(ROUNDU__SCALAR_ADDSUB,negative_snan_to_qnan)2667 TEST(ROUNDU__SCALAR_ADDSUB, negative_snan_to_qnan) {
2668   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2669   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2670   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2671     for (uint32_t i = 0; i < kBlockSize; i++) {
2672       inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2673     }
2674     xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2675     for (uint32_t i = 0; i < kBlockSize; i++) {
2676       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2677       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2678         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2679         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2680         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2681     }
2682   }
2683 }
2684 
TEST(ROUNDU__SCALAR_CVT,positive_zero)2685 TEST(ROUNDU__SCALAR_CVT, positive_zero) {
2686   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2687   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2688   std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
2689   xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2690   const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2691   ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2692     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2693     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2694     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2695 }
2696 
TEST(ROUNDU__SCALAR_CVT,negative_zero)2697 TEST(ROUNDU__SCALAR_CVT, negative_zero) {
2698   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2699   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2700   std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
2701   xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2702   const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2703   ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2704     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2705     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2706     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2707 }
2708 
TEST(ROUNDU__SCALAR_CVT,positive_subnormal)2709 TEST(ROUNDU__SCALAR_CVT, positive_subnormal) {
2710   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2711   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2712   for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
2713     for (uint32_t i = 0; i < kBlockSize; i++) {
2714       inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
2715     }
2716     xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2717     for (uint32_t i = 0; i < kBlockSize; i++) {
2718       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2719       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2720         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2721         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2722         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2723     }
2724   }
2725 }
2726 
TEST(ROUNDU__SCALAR_CVT,negative_subnormal)2727 TEST(ROUNDU__SCALAR_CVT, negative_subnormal) {
2728   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2729   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2730   for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
2731     for (uint32_t i = 0; i < kBlockSize; i++) {
2732       inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
2733     }
2734     xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2735     for (uint32_t i = 0; i < kBlockSize; i++) {
2736       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2737       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2738         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2739         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2740         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2741     }
2742   }
2743 }
2744 
TEST(ROUNDU__SCALAR_CVT,positive_normal)2745 TEST(ROUNDU__SCALAR_CVT, positive_normal) {
2746   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2747   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2748   for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
2749     for (uint32_t i = 0; i < kBlockSize; i++) {
2750       inputs[i] = uint32_as_float(n + i);
2751     }
2752     xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2753     for (uint32_t i = 0; i < kBlockSize; i++) {
2754       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2755       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2756         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2757         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2758         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2759     }
2760   }
2761 }
2762 
TEST(ROUNDU__SCALAR_CVT,negative_normal)2763 TEST(ROUNDU__SCALAR_CVT, negative_normal) {
2764   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2765   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2766   for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
2767     for (uint32_t i = 0; i < kBlockSize; i++) {
2768       inputs[i] = uint32_as_float(n + i);
2769     }
2770     xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2771     for (uint32_t i = 0; i < kBlockSize; i++) {
2772       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2773       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2774         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2775         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2776         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2777     }
2778   }
2779 }
2780 
TEST(ROUNDU__SCALAR_CVT,positive_integral)2781 TEST(ROUNDU__SCALAR_CVT, positive_integral) {
2782   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2783   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2784   for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
2785     for (uint32_t i = 0; i < kBlockSize; i++) {
2786       inputs[i] = uint32_as_float(n + i);
2787     }
2788     xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2789     for (uint32_t i = 0; i < kBlockSize; i++) {
2790       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2791       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2792         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2793         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2794         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2795     }
2796   }
2797 }
2798 
TEST(ROUNDU__SCALAR_CVT,negative_integral)2799 TEST(ROUNDU__SCALAR_CVT, negative_integral) {
2800   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2801   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2802   for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2803     for (uint32_t i = 0; i < kBlockSize; i++) {
2804       inputs[i] = uint32_as_float(n + i);
2805     }
2806     xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2807     for (uint32_t i = 0; i < kBlockSize; i++) {
2808       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2809       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2810         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2811         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2812         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2813     }
2814   }
2815 }
2816 
TEST(ROUNDU__SCALAR_CVT,positive_infinity)2817 TEST(ROUNDU__SCALAR_CVT, positive_infinity) {
2818   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2819   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2820   std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
2821   xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2822   const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2823   ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2824     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2825     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2826     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2827 }
2828 
TEST(ROUNDU__SCALAR_CVT,negative_infinity)2829 TEST(ROUNDU__SCALAR_CVT, negative_infinity) {
2830   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2831   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2832   std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
2833   xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2834   const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2835   ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2836     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2837     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2838     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2839 }
2840 
TEST(ROUNDU__SCALAR_CVT,positive_qnan)2841 TEST(ROUNDU__SCALAR_CVT, positive_qnan) {
2842   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2843   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2844   for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2845     for (uint32_t i = 0; i < kBlockSize; i++) {
2846       inputs[i] = uint32_as_float(n + i);
2847     }
2848     xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2849     for (uint32_t i = 0; i < kBlockSize; i++) {
2850       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2851       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2852         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2853         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2854         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2855     }
2856   }
2857 }
2858 
TEST(ROUNDU__SCALAR_CVT,negative_qnan)2859 TEST(ROUNDU__SCALAR_CVT, negative_qnan) {
2860   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2861   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2862   for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2863     for (uint32_t i = 0; i < kBlockSize; i++) {
2864       inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
2865     }
2866     xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2867     for (uint32_t i = 0; i < kBlockSize; i++) {
2868       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2869       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2870         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2871         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2872         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2873     }
2874   }
2875 }
2876 
TEST(ROUNDU__SCALAR_CVT,positive_snan)2877 TEST(ROUNDU__SCALAR_CVT, positive_snan) {
2878   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2879   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2880   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2881     for (uint32_t i = 0; i < kBlockSize; i++) {
2882       inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2883     }
2884     xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2885     for (uint32_t i = 0; i < kBlockSize; i++) {
2886       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2887       ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2888         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2889         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2890         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2891     }
2892   }
2893 }
2894 
TEST(ROUNDU__SCALAR_CVT,negative_snan)2895 TEST(ROUNDU__SCALAR_CVT, negative_snan) {
2896   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2897   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2898   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2899     for (uint32_t i = 0; i < kBlockSize; i++) {
2900       inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2901     }
2902     xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2903     for (uint32_t i = 0; i < kBlockSize; i++) {
2904       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2905       ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2906         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2907         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2908         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2909     }
2910   }
2911 }
2912 
TEST(ROUNDU__SCALAR_CVT,positive_snan_to_qnan)2913 TEST(ROUNDU__SCALAR_CVT, positive_snan_to_qnan) {
2914   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2915   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2916   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2917     for (uint32_t i = 0; i < kBlockSize; i++) {
2918       inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2919     }
2920     xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2921     for (uint32_t i = 0; i < kBlockSize; i++) {
2922       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2923       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2924         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2925         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2926         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2927     }
2928   }
2929 }
2930 
TEST(ROUNDU__SCALAR_CVT,negative_snan_to_qnan)2931 TEST(ROUNDU__SCALAR_CVT, negative_snan_to_qnan) {
2932   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2933   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2934   for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2935     for (uint32_t i = 0; i < kBlockSize; i++) {
2936       inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2937     }
2938     xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2939     for (uint32_t i = 0; i < kBlockSize; i++) {
2940       const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2941       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2942         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2943         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2944         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2945     }
2946   }
2947 }
2948