1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <ios>
13 #include <vector>
14
15 #include <gtest/gtest.h>
16
17 #include <fp16.h>
18
19 #include <xnnpack/aligned-allocator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/math.h>
22 #include <xnnpack/math-stubs.h>
23
24
25 constexpr int kBlockSize = 1024;
26
27 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDU__SSE_ADDSUB,positive_zero)28 TEST(ROUNDU__SSE_ADDSUB, positive_zero) {
29 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
30 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
31 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
32 xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
33 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
34 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
35 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
36 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
37 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
38 }
39
TEST(ROUNDU__SSE_ADDSUB,negative_zero)40 TEST(ROUNDU__SSE_ADDSUB, negative_zero) {
41 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
42 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
43 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
44 xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
45 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
46 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
47 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
48 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
49 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
50 }
51
TEST(ROUNDU__SSE_ADDSUB,positive_subnormal)52 TEST(ROUNDU__SSE_ADDSUB, positive_subnormal) {
53 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
54 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
55 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
56 for (uint32_t i = 0; i < kBlockSize; i++) {
57 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
58 }
59 xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
60 for (uint32_t i = 0; i < kBlockSize; i++) {
61 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
62 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
63 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
64 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
65 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
66 }
67 }
68 }
69
TEST(ROUNDU__SSE_ADDSUB,negative_subnormal)70 TEST(ROUNDU__SSE_ADDSUB, negative_subnormal) {
71 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
72 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
73 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
74 for (uint32_t i = 0; i < kBlockSize; i++) {
75 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
76 }
77 xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
78 for (uint32_t i = 0; i < kBlockSize; i++) {
79 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
80 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
81 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
82 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
83 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
84 }
85 }
86 }
87
TEST(ROUNDU__SSE_ADDSUB,positive_normal)88 TEST(ROUNDU__SSE_ADDSUB, positive_normal) {
89 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
90 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
91 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
92 for (uint32_t i = 0; i < kBlockSize; i++) {
93 inputs[i] = uint32_as_float(n + i);
94 }
95 xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
96 for (uint32_t i = 0; i < kBlockSize; i++) {
97 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
98 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
99 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
100 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
101 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
102 }
103 }
104 }
105
TEST(ROUNDU__SSE_ADDSUB,negative_normal)106 TEST(ROUNDU__SSE_ADDSUB, negative_normal) {
107 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
108 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
109 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
110 for (uint32_t i = 0; i < kBlockSize; i++) {
111 inputs[i] = uint32_as_float(n + i);
112 }
113 xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
114 for (uint32_t i = 0; i < kBlockSize; i++) {
115 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
116 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
117 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
118 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
119 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
120 }
121 }
122 }
123
TEST(ROUNDU__SSE_ADDSUB,positive_integral)124 TEST(ROUNDU__SSE_ADDSUB, positive_integral) {
125 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
126 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
127 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
128 for (uint32_t i = 0; i < kBlockSize; i++) {
129 inputs[i] = uint32_as_float(n + i);
130 }
131 xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
132 for (uint32_t i = 0; i < kBlockSize; i++) {
133 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
134 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
135 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
136 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
137 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
138 }
139 }
140 }
141
TEST(ROUNDU__SSE_ADDSUB,negative_integral)142 TEST(ROUNDU__SSE_ADDSUB, negative_integral) {
143 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
144 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
145 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
146 for (uint32_t i = 0; i < kBlockSize; i++) {
147 inputs[i] = uint32_as_float(n + i);
148 }
149 xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
150 for (uint32_t i = 0; i < kBlockSize; i++) {
151 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
152 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
153 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
154 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
155 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
156 }
157 }
158 }
159
TEST(ROUNDU__SSE_ADDSUB,positive_infinity)160 TEST(ROUNDU__SSE_ADDSUB, positive_infinity) {
161 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
162 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
163 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
164 xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
165 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
166 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
167 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
168 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
169 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
170 }
171
TEST(ROUNDU__SSE_ADDSUB,negative_infinity)172 TEST(ROUNDU__SSE_ADDSUB, negative_infinity) {
173 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
174 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
175 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
176 xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
177 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
178 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
179 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
180 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
181 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
182 }
183
TEST(ROUNDU__SSE_ADDSUB,positive_qnan)184 TEST(ROUNDU__SSE_ADDSUB, positive_qnan) {
185 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
186 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
187 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
188 for (uint32_t i = 0; i < kBlockSize; i++) {
189 inputs[i] = uint32_as_float(n + i);
190 }
191 xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
192 for (uint32_t i = 0; i < kBlockSize; i++) {
193 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
194 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
195 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
196 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
197 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
198 }
199 }
200 }
201
TEST(ROUNDU__SSE_ADDSUB,negative_qnan)202 TEST(ROUNDU__SSE_ADDSUB, negative_qnan) {
203 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
204 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
205 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
206 for (uint32_t i = 0; i < kBlockSize; i++) {
207 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
208 }
209 xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
210 for (uint32_t i = 0; i < kBlockSize; i++) {
211 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
212 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
213 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
214 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
215 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
216 }
217 }
218 }
219
TEST(ROUNDU__SSE_ADDSUB,positive_snan)220 TEST(ROUNDU__SSE_ADDSUB, positive_snan) {
221 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
222 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
223 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
224 for (uint32_t i = 0; i < kBlockSize; i++) {
225 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
226 }
227 xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
228 for (uint32_t i = 0; i < kBlockSize; i++) {
229 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
230 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
231 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
232 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
233 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
234 }
235 }
236 }
237
TEST(ROUNDU__SSE_ADDSUB,negative_snan)238 TEST(ROUNDU__SSE_ADDSUB, negative_snan) {
239 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
240 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
241 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
242 for (uint32_t i = 0; i < kBlockSize; i++) {
243 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
244 }
245 xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
246 for (uint32_t i = 0; i < kBlockSize; i++) {
247 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
248 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
249 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
250 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
251 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
252 }
253 }
254 }
255
TEST(ROUNDU__SSE_ADDSUB,positive_snan_to_qnan)256 TEST(ROUNDU__SSE_ADDSUB, positive_snan_to_qnan) {
257 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
258 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
259 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
260 for (uint32_t i = 0; i < kBlockSize; i++) {
261 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
262 }
263 xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
264 for (uint32_t i = 0; i < kBlockSize; i++) {
265 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
266 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
267 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
268 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
269 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
270 }
271 }
272 }
273
TEST(ROUNDU__SSE_ADDSUB,negative_snan_to_qnan)274 TEST(ROUNDU__SSE_ADDSUB, negative_snan_to_qnan) {
275 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
276 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
277 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
278 for (uint32_t i = 0; i < kBlockSize; i++) {
279 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
280 }
281 xnn_math_f32_roundu__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
282 for (uint32_t i = 0; i < kBlockSize; i++) {
283 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
284 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
285 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
286 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
287 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
288 }
289 }
290 }
291 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
292
293 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDU__SSE2_CVT,positive_zero)294 TEST(ROUNDU__SSE2_CVT, positive_zero) {
295 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
296 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
297 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
298 xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
299 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
300 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
301 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
302 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
303 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
304 }
305
TEST(ROUNDU__SSE2_CVT,negative_zero)306 TEST(ROUNDU__SSE2_CVT, negative_zero) {
307 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
308 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
309 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
310 xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
311 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
312 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
313 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
314 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
315 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
316 }
317
TEST(ROUNDU__SSE2_CVT,positive_subnormal)318 TEST(ROUNDU__SSE2_CVT, positive_subnormal) {
319 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
320 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
321 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
322 for (uint32_t i = 0; i < kBlockSize; i++) {
323 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
324 }
325 xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
326 for (uint32_t i = 0; i < kBlockSize; i++) {
327 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
328 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
329 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
330 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
331 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
332 }
333 }
334 }
335
TEST(ROUNDU__SSE2_CVT,negative_subnormal)336 TEST(ROUNDU__SSE2_CVT, negative_subnormal) {
337 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
338 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
339 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
340 for (uint32_t i = 0; i < kBlockSize; i++) {
341 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
342 }
343 xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
344 for (uint32_t i = 0; i < kBlockSize; i++) {
345 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
346 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
347 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
348 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
349 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
350 }
351 }
352 }
353
TEST(ROUNDU__SSE2_CVT,positive_normal)354 TEST(ROUNDU__SSE2_CVT, positive_normal) {
355 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
356 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
357 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
358 for (uint32_t i = 0; i < kBlockSize; i++) {
359 inputs[i] = uint32_as_float(n + i);
360 }
361 xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
362 for (uint32_t i = 0; i < kBlockSize; i++) {
363 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
364 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
365 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
366 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
367 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
368 }
369 }
370 }
371
TEST(ROUNDU__SSE2_CVT,negative_normal)372 TEST(ROUNDU__SSE2_CVT, negative_normal) {
373 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
374 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
375 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
376 for (uint32_t i = 0; i < kBlockSize; i++) {
377 inputs[i] = uint32_as_float(n + i);
378 }
379 xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
380 for (uint32_t i = 0; i < kBlockSize; i++) {
381 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
382 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
383 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
384 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
385 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
386 }
387 }
388 }
389
TEST(ROUNDU__SSE2_CVT,positive_integral)390 TEST(ROUNDU__SSE2_CVT, positive_integral) {
391 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
392 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
393 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
394 for (uint32_t i = 0; i < kBlockSize; i++) {
395 inputs[i] = uint32_as_float(n + i);
396 }
397 xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
398 for (uint32_t i = 0; i < kBlockSize; i++) {
399 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
400 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
401 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
402 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
403 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
404 }
405 }
406 }
407
TEST(ROUNDU__SSE2_CVT,negative_integral)408 TEST(ROUNDU__SSE2_CVT, negative_integral) {
409 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
410 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
411 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
412 for (uint32_t i = 0; i < kBlockSize; i++) {
413 inputs[i] = uint32_as_float(n + i);
414 }
415 xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
416 for (uint32_t i = 0; i < kBlockSize; i++) {
417 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
418 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
419 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
420 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
421 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
422 }
423 }
424 }
425
TEST(ROUNDU__SSE2_CVT,positive_infinity)426 TEST(ROUNDU__SSE2_CVT, positive_infinity) {
427 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
428 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
429 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
430 xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
431 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
432 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
433 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
434 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
435 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
436 }
437
TEST(ROUNDU__SSE2_CVT,negative_infinity)438 TEST(ROUNDU__SSE2_CVT, negative_infinity) {
439 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
440 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
441 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
442 xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
443 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
444 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
445 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
446 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
447 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
448 }
449
TEST(ROUNDU__SSE2_CVT,positive_qnan)450 TEST(ROUNDU__SSE2_CVT, positive_qnan) {
451 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
452 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
453 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
454 for (uint32_t i = 0; i < kBlockSize; i++) {
455 inputs[i] = uint32_as_float(n + i);
456 }
457 xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
458 for (uint32_t i = 0; i < kBlockSize; i++) {
459 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
460 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
461 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
462 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
463 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
464 }
465 }
466 }
467
TEST(ROUNDU__SSE2_CVT,negative_qnan)468 TEST(ROUNDU__SSE2_CVT, negative_qnan) {
469 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
470 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
471 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
472 for (uint32_t i = 0; i < kBlockSize; i++) {
473 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
474 }
475 xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
476 for (uint32_t i = 0; i < kBlockSize; i++) {
477 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
478 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
479 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
480 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
481 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
482 }
483 }
484 }
485
TEST(ROUNDU__SSE2_CVT,positive_snan)486 TEST(ROUNDU__SSE2_CVT, positive_snan) {
487 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
488 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
489 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
490 for (uint32_t i = 0; i < kBlockSize; i++) {
491 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
492 }
493 xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
494 for (uint32_t i = 0; i < kBlockSize; i++) {
495 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
496 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
497 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
498 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
499 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
500 }
501 }
502 }
503
TEST(ROUNDU__SSE2_CVT,negative_snan)504 TEST(ROUNDU__SSE2_CVT, negative_snan) {
505 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
506 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
507 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
508 for (uint32_t i = 0; i < kBlockSize; i++) {
509 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
510 }
511 xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
512 for (uint32_t i = 0; i < kBlockSize; i++) {
513 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
514 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
515 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
516 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
517 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
518 }
519 }
520 }
521
TEST(ROUNDU__SSE2_CVT,positive_snan_to_qnan)522 TEST(ROUNDU__SSE2_CVT, positive_snan_to_qnan) {
523 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
524 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
525 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
526 for (uint32_t i = 0; i < kBlockSize; i++) {
527 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
528 }
529 xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
530 for (uint32_t i = 0; i < kBlockSize; i++) {
531 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
532 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
533 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
534 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
535 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
536 }
537 }
538 }
539
TEST(ROUNDU__SSE2_CVT,negative_snan_to_qnan)540 TEST(ROUNDU__SSE2_CVT, negative_snan_to_qnan) {
541 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
542 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
543 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
544 for (uint32_t i = 0; i < kBlockSize; i++) {
545 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
546 }
547 xnn_math_f32_roundu__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
548 for (uint32_t i = 0; i < kBlockSize; i++) {
549 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
550 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
551 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
552 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
553 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
554 }
555 }
556 }
557 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
558
559 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDU__SSE41,positive_zero)560 TEST(ROUNDU__SSE41, positive_zero) {
561 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
562 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
563 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
564 xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
565 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
566 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
567 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
568 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
569 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
570 }
571
TEST(ROUNDU__SSE41,negative_zero)572 TEST(ROUNDU__SSE41, negative_zero) {
573 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
574 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
575 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
576 xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
577 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
578 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
579 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
580 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
581 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
582 }
583
TEST(ROUNDU__SSE41,positive_subnormal)584 TEST(ROUNDU__SSE41, positive_subnormal) {
585 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
586 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
587 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
588 for (uint32_t i = 0; i < kBlockSize; i++) {
589 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
590 }
591 xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
592 for (uint32_t i = 0; i < kBlockSize; i++) {
593 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
594 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
595 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
596 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
597 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
598 }
599 }
600 }
601
TEST(ROUNDU__SSE41,negative_subnormal)602 TEST(ROUNDU__SSE41, negative_subnormal) {
603 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
604 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
605 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
606 for (uint32_t i = 0; i < kBlockSize; i++) {
607 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
608 }
609 xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
610 for (uint32_t i = 0; i < kBlockSize; i++) {
611 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
612 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
613 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
614 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
615 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
616 }
617 }
618 }
619
TEST(ROUNDU__SSE41,positive_normal)620 TEST(ROUNDU__SSE41, positive_normal) {
621 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
622 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
623 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
624 for (uint32_t i = 0; i < kBlockSize; i++) {
625 inputs[i] = uint32_as_float(n + i);
626 }
627 xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
628 for (uint32_t i = 0; i < kBlockSize; i++) {
629 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
630 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
631 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
632 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
633 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
634 }
635 }
636 }
637
TEST(ROUNDU__SSE41,negative_normal)638 TEST(ROUNDU__SSE41, negative_normal) {
639 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
640 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
641 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
642 for (uint32_t i = 0; i < kBlockSize; i++) {
643 inputs[i] = uint32_as_float(n + i);
644 }
645 xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
646 for (uint32_t i = 0; i < kBlockSize; i++) {
647 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
648 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
649 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
650 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
651 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
652 }
653 }
654 }
655
TEST(ROUNDU__SSE41,positive_integral)656 TEST(ROUNDU__SSE41, positive_integral) {
657 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
658 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
659 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
660 for (uint32_t i = 0; i < kBlockSize; i++) {
661 inputs[i] = uint32_as_float(n + i);
662 }
663 xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
664 for (uint32_t i = 0; i < kBlockSize; i++) {
665 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
666 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
667 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
668 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
669 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
670 }
671 }
672 }
673
TEST(ROUNDU__SSE41,negative_integral)674 TEST(ROUNDU__SSE41, negative_integral) {
675 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
676 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
677 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
678 for (uint32_t i = 0; i < kBlockSize; i++) {
679 inputs[i] = uint32_as_float(n + i);
680 }
681 xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
682 for (uint32_t i = 0; i < kBlockSize; i++) {
683 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
684 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
685 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
686 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
687 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
688 }
689 }
690 }
691
TEST(ROUNDU__SSE41,positive_infinity)692 TEST(ROUNDU__SSE41, positive_infinity) {
693 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
694 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
695 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
696 xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
697 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
698 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
699 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
700 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
701 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
702 }
703
TEST(ROUNDU__SSE41,negative_infinity)704 TEST(ROUNDU__SSE41, negative_infinity) {
705 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
706 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
707 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
708 xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
709 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
710 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
711 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
712 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
713 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
714 }
715
TEST(ROUNDU__SSE41,positive_qnan)716 TEST(ROUNDU__SSE41, positive_qnan) {
717 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
718 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
719 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
720 for (uint32_t i = 0; i < kBlockSize; i++) {
721 inputs[i] = uint32_as_float(n + i);
722 }
723 xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
724 for (uint32_t i = 0; i < kBlockSize; i++) {
725 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
726 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
727 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
728 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
729 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
730 }
731 }
732 }
733
TEST(ROUNDU__SSE41,negative_qnan)734 TEST(ROUNDU__SSE41, negative_qnan) {
735 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
736 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
737 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
738 for (uint32_t i = 0; i < kBlockSize; i++) {
739 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
740 }
741 xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
742 for (uint32_t i = 0; i < kBlockSize; i++) {
743 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
744 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
745 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
746 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
747 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
748 }
749 }
750 }
751
TEST(ROUNDU__SSE41,positive_snan)752 TEST(ROUNDU__SSE41, positive_snan) {
753 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
754 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
755 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
756 for (uint32_t i = 0; i < kBlockSize; i++) {
757 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
758 }
759 xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
760 for (uint32_t i = 0; i < kBlockSize; i++) {
761 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
762 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
763 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
764 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
765 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
766 }
767 }
768 }
769
TEST(ROUNDU__SSE41,negative_snan)770 TEST(ROUNDU__SSE41, negative_snan) {
771 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
772 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
773 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
774 for (uint32_t i = 0; i < kBlockSize; i++) {
775 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
776 }
777 xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
778 for (uint32_t i = 0; i < kBlockSize; i++) {
779 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
780 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
781 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
782 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
783 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
784 }
785 }
786 }
787
TEST(ROUNDU__SSE41,positive_snan_to_qnan)788 TEST(ROUNDU__SSE41, positive_snan_to_qnan) {
789 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
790 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
791 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
792 for (uint32_t i = 0; i < kBlockSize; i++) {
793 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
794 }
795 xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
796 for (uint32_t i = 0; i < kBlockSize; i++) {
797 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
798 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
799 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
800 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
801 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
802 }
803 }
804 }
805
TEST(ROUNDU__SSE41,negative_snan_to_qnan)806 TEST(ROUNDU__SSE41, negative_snan_to_qnan) {
807 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
808 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
809 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
810 for (uint32_t i = 0; i < kBlockSize; i++) {
811 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
812 }
813 xnn_math_f32_roundu__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
814 for (uint32_t i = 0; i < kBlockSize; i++) {
815 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
816 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
817 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
818 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
819 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
820 }
821 }
822 }
823 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
824
825 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDU__NEON_ADDSUB,positive_zero)826 TEST(ROUNDU__NEON_ADDSUB, positive_zero) {
827 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
828 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
829 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
830 xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
831 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
832 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
833 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
834 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
835 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
836 }
837
TEST(ROUNDU__NEON_ADDSUB,negative_zero)838 TEST(ROUNDU__NEON_ADDSUB, negative_zero) {
839 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
840 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
841 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
842 xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
843 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
844 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
845 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
846 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
847 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
848 }
849
TEST(ROUNDU__NEON_ADDSUB,positive_subnormal)850 TEST(ROUNDU__NEON_ADDSUB, positive_subnormal) {
851 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
852 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
853 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
854 for (uint32_t i = 0; i < kBlockSize; i++) {
855 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
856 }
857 xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
858 for (uint32_t i = 0; i < kBlockSize; i++) {
859 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
860 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
861 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
862 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
863 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
864 }
865 }
866 }
867
TEST(ROUNDU__NEON_ADDSUB,negative_subnormal)868 TEST(ROUNDU__NEON_ADDSUB, negative_subnormal) {
869 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
870 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
871 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
872 for (uint32_t i = 0; i < kBlockSize; i++) {
873 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
874 }
875 xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
876 for (uint32_t i = 0; i < kBlockSize; i++) {
877 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
878 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
879 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
880 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
881 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
882 }
883 }
884 }
885
TEST(ROUNDU__NEON_ADDSUB,positive_normal)886 TEST(ROUNDU__NEON_ADDSUB, positive_normal) {
887 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
888 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
889 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
890 for (uint32_t i = 0; i < kBlockSize; i++) {
891 inputs[i] = uint32_as_float(n + i);
892 }
893 xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
894 for (uint32_t i = 0; i < kBlockSize; i++) {
895 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
896 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
897 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
898 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
899 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
900 }
901 }
902 }
903
TEST(ROUNDU__NEON_ADDSUB,negative_normal)904 TEST(ROUNDU__NEON_ADDSUB, negative_normal) {
905 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
906 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
907 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
908 for (uint32_t i = 0; i < kBlockSize; i++) {
909 inputs[i] = uint32_as_float(n + i);
910 }
911 xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
912 for (uint32_t i = 0; i < kBlockSize; i++) {
913 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
914 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
915 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
916 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
917 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
918 }
919 }
920 }
921
TEST(ROUNDU__NEON_ADDSUB,positive_integral)922 TEST(ROUNDU__NEON_ADDSUB, positive_integral) {
923 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
924 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
925 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
926 for (uint32_t i = 0; i < kBlockSize; i++) {
927 inputs[i] = uint32_as_float(n + i);
928 }
929 xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
930 for (uint32_t i = 0; i < kBlockSize; i++) {
931 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
932 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
933 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
934 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
935 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
936 }
937 }
938 }
939
TEST(ROUNDU__NEON_ADDSUB,negative_integral)940 TEST(ROUNDU__NEON_ADDSUB, negative_integral) {
941 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
942 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
943 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
944 for (uint32_t i = 0; i < kBlockSize; i++) {
945 inputs[i] = uint32_as_float(n + i);
946 }
947 xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
948 for (uint32_t i = 0; i < kBlockSize; i++) {
949 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
950 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
951 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
952 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
953 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
954 }
955 }
956 }
957
TEST(ROUNDU__NEON_ADDSUB,positive_infinity)958 TEST(ROUNDU__NEON_ADDSUB, positive_infinity) {
959 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
960 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
961 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
962 xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
963 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
964 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
965 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
966 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
967 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
968 }
969
TEST(ROUNDU__NEON_ADDSUB,negative_infinity)970 TEST(ROUNDU__NEON_ADDSUB, negative_infinity) {
971 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
972 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
973 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
974 xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
975 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
976 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
977 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
978 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
979 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
980 }
981
TEST(ROUNDU__NEON_ADDSUB,positive_qnan)982 TEST(ROUNDU__NEON_ADDSUB, positive_qnan) {
983 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
984 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
985 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
986 for (uint32_t i = 0; i < kBlockSize; i++) {
987 inputs[i] = uint32_as_float(n + i);
988 }
989 xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
990 for (uint32_t i = 0; i < kBlockSize; i++) {
991 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
992 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
993 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
994 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
995 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
996 }
997 }
998 }
999
TEST(ROUNDU__NEON_ADDSUB,negative_qnan)1000 TEST(ROUNDU__NEON_ADDSUB, negative_qnan) {
1001 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1002 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1003 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1004 for (uint32_t i = 0; i < kBlockSize; i++) {
1005 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1006 }
1007 xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1008 for (uint32_t i = 0; i < kBlockSize; i++) {
1009 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1010 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1011 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1012 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1013 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1014 }
1015 }
1016 }
1017
TEST(ROUNDU__NEON_ADDSUB,positive_snan)1018 TEST(ROUNDU__NEON_ADDSUB, positive_snan) {
1019 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1020 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1021 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1022 for (uint32_t i = 0; i < kBlockSize; i++) {
1023 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1024 }
1025 xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1026 for (uint32_t i = 0; i < kBlockSize; i++) {
1027 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1028 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1029 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1030 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1031 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1032 }
1033 }
1034 }
1035
TEST(ROUNDU__NEON_ADDSUB,negative_snan)1036 TEST(ROUNDU__NEON_ADDSUB, negative_snan) {
1037 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1038 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1039 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1040 for (uint32_t i = 0; i < kBlockSize; i++) {
1041 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1042 }
1043 xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1044 for (uint32_t i = 0; i < kBlockSize; i++) {
1045 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1046 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1047 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1048 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1049 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1050 }
1051 }
1052 }
1053
TEST(ROUNDU__NEON_ADDSUB,positive_snan_to_qnan)1054 TEST(ROUNDU__NEON_ADDSUB, positive_snan_to_qnan) {
1055 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1056 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1057 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1058 for (uint32_t i = 0; i < kBlockSize; i++) {
1059 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1060 }
1061 xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1062 for (uint32_t i = 0; i < kBlockSize; i++) {
1063 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1064 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1065 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1066 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1067 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1068 }
1069 }
1070 }
1071
TEST(ROUNDU__NEON_ADDSUB,negative_snan_to_qnan)1072 TEST(ROUNDU__NEON_ADDSUB, negative_snan_to_qnan) {
1073 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1074 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1075 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1076 for (uint32_t i = 0; i < kBlockSize; i++) {
1077 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1078 }
1079 xnn_math_f32_roundu__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1080 for (uint32_t i = 0; i < kBlockSize; i++) {
1081 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1082 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1083 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1084 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1085 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1086 }
1087 }
1088 }
1089 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1090
1091 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDU__NEON_CVT,positive_zero)1092 TEST(ROUNDU__NEON_CVT, positive_zero) {
1093 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1094 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1095 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
1096 xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1097 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1098 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1099 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1100 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1101 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1102 }
1103
TEST(ROUNDU__NEON_CVT,negative_zero)1104 TEST(ROUNDU__NEON_CVT, negative_zero) {
1105 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1106 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1107 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
1108 xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1109 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1110 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1111 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1112 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1113 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1114 }
1115
TEST(ROUNDU__NEON_CVT,positive_subnormal)1116 TEST(ROUNDU__NEON_CVT, positive_subnormal) {
1117 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1118 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1119 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
1120 for (uint32_t i = 0; i < kBlockSize; i++) {
1121 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
1122 }
1123 xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1124 for (uint32_t i = 0; i < kBlockSize; i++) {
1125 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1126 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1127 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1128 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1129 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1130 }
1131 }
1132 }
1133
TEST(ROUNDU__NEON_CVT,negative_subnormal)1134 TEST(ROUNDU__NEON_CVT, negative_subnormal) {
1135 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1136 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1137 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
1138 for (uint32_t i = 0; i < kBlockSize; i++) {
1139 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
1140 }
1141 xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1142 for (uint32_t i = 0; i < kBlockSize; i++) {
1143 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1144 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1145 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1146 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1147 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1148 }
1149 }
1150 }
1151
TEST(ROUNDU__NEON_CVT,positive_normal)1152 TEST(ROUNDU__NEON_CVT, positive_normal) {
1153 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1154 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1155 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1156 for (uint32_t i = 0; i < kBlockSize; i++) {
1157 inputs[i] = uint32_as_float(n + i);
1158 }
1159 xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1160 for (uint32_t i = 0; i < kBlockSize; i++) {
1161 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1162 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1163 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1164 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1165 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1166 }
1167 }
1168 }
1169
TEST(ROUNDU__NEON_CVT,negative_normal)1170 TEST(ROUNDU__NEON_CVT, negative_normal) {
1171 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1172 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1173 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1174 for (uint32_t i = 0; i < kBlockSize; i++) {
1175 inputs[i] = uint32_as_float(n + i);
1176 }
1177 xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1178 for (uint32_t i = 0; i < kBlockSize; i++) {
1179 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1180 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1181 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1182 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1183 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1184 }
1185 }
1186 }
1187
TEST(ROUNDU__NEON_CVT,positive_integral)1188 TEST(ROUNDU__NEON_CVT, positive_integral) {
1189 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1190 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1191 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1192 for (uint32_t i = 0; i < kBlockSize; i++) {
1193 inputs[i] = uint32_as_float(n + i);
1194 }
1195 xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1196 for (uint32_t i = 0; i < kBlockSize; i++) {
1197 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1198 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1199 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1200 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1201 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1202 }
1203 }
1204 }
1205
TEST(ROUNDU__NEON_CVT,negative_integral)1206 TEST(ROUNDU__NEON_CVT, negative_integral) {
1207 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1208 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1209 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1210 for (uint32_t i = 0; i < kBlockSize; i++) {
1211 inputs[i] = uint32_as_float(n + i);
1212 }
1213 xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1214 for (uint32_t i = 0; i < kBlockSize; i++) {
1215 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1216 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1217 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1218 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1219 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1220 }
1221 }
1222 }
1223
TEST(ROUNDU__NEON_CVT,positive_infinity)1224 TEST(ROUNDU__NEON_CVT, positive_infinity) {
1225 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1226 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1227 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1228 xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1229 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1230 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1231 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1232 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1233 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1234 }
1235
TEST(ROUNDU__NEON_CVT,negative_infinity)1236 TEST(ROUNDU__NEON_CVT, negative_infinity) {
1237 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1238 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1239 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1240 xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1241 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1242 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1243 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1244 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1245 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1246 }
1247
TEST(ROUNDU__NEON_CVT,positive_qnan)1248 TEST(ROUNDU__NEON_CVT, positive_qnan) {
1249 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1250 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1251 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1252 for (uint32_t i = 0; i < kBlockSize; i++) {
1253 inputs[i] = uint32_as_float(n + i);
1254 }
1255 xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1256 for (uint32_t i = 0; i < kBlockSize; i++) {
1257 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1258 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1259 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1260 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1261 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1262 }
1263 }
1264 }
1265
TEST(ROUNDU__NEON_CVT,negative_qnan)1266 TEST(ROUNDU__NEON_CVT, negative_qnan) {
1267 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1268 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1269 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1270 for (uint32_t i = 0; i < kBlockSize; i++) {
1271 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1272 }
1273 xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1274 for (uint32_t i = 0; i < kBlockSize; i++) {
1275 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1276 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1277 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1278 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1279 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1280 }
1281 }
1282 }
1283
TEST(ROUNDU__NEON_CVT,positive_snan)1284 TEST(ROUNDU__NEON_CVT, positive_snan) {
1285 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1286 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1287 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1288 for (uint32_t i = 0; i < kBlockSize; i++) {
1289 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1290 }
1291 xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1292 for (uint32_t i = 0; i < kBlockSize; i++) {
1293 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1294 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1295 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1296 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1297 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1298 }
1299 }
1300 }
1301
TEST(ROUNDU__NEON_CVT,negative_snan)1302 TEST(ROUNDU__NEON_CVT, negative_snan) {
1303 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1304 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1305 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1306 for (uint32_t i = 0; i < kBlockSize; i++) {
1307 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1308 }
1309 xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1310 for (uint32_t i = 0; i < kBlockSize; i++) {
1311 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1312 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1313 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1314 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1315 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1316 }
1317 }
1318 }
1319
TEST(ROUNDU__NEON_CVT,positive_snan_to_qnan)1320 TEST(ROUNDU__NEON_CVT, positive_snan_to_qnan) {
1321 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1322 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1323 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1324 for (uint32_t i = 0; i < kBlockSize; i++) {
1325 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1326 }
1327 xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1328 for (uint32_t i = 0; i < kBlockSize; i++) {
1329 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1330 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1331 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1332 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1333 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1334 }
1335 }
1336 }
1337
TEST(ROUNDU__NEON_CVT,negative_snan_to_qnan)1338 TEST(ROUNDU__NEON_CVT, negative_snan_to_qnan) {
1339 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1340 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1341 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1342 for (uint32_t i = 0; i < kBlockSize; i++) {
1343 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1344 }
1345 xnn_math_f32_roundu__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1346 for (uint32_t i = 0; i < kBlockSize; i++) {
1347 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1348 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1349 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1350 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1351 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1352 }
1353 }
1354 }
1355 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1356
1357 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDU__NEONV8,positive_zero)1358 TEST(ROUNDU__NEONV8, positive_zero) {
1359 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1360 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1361 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
1362 xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1363 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1364 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1365 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1366 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1367 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1368 }
1369
TEST(ROUNDU__NEONV8,negative_zero)1370 TEST(ROUNDU__NEONV8, negative_zero) {
1371 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1372 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1373 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
1374 xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1375 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1376 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1377 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1378 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1379 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1380 }
1381
TEST(ROUNDU__NEONV8,positive_subnormal)1382 TEST(ROUNDU__NEONV8, positive_subnormal) {
1383 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1384 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1385 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
1386 for (uint32_t i = 0; i < kBlockSize; i++) {
1387 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
1388 }
1389 xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1390 for (uint32_t i = 0; i < kBlockSize; i++) {
1391 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1392 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1393 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1394 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1395 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1396 }
1397 }
1398 }
1399
TEST(ROUNDU__NEONV8,negative_subnormal)1400 TEST(ROUNDU__NEONV8, negative_subnormal) {
1401 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1402 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1403 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
1404 for (uint32_t i = 0; i < kBlockSize; i++) {
1405 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
1406 }
1407 xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1408 for (uint32_t i = 0; i < kBlockSize; i++) {
1409 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1410 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1411 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1412 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1413 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1414 }
1415 }
1416 }
1417
TEST(ROUNDU__NEONV8,positive_normal)1418 TEST(ROUNDU__NEONV8, positive_normal) {
1419 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1420 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1421 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1422 for (uint32_t i = 0; i < kBlockSize; i++) {
1423 inputs[i] = uint32_as_float(n + i);
1424 }
1425 xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1426 for (uint32_t i = 0; i < kBlockSize; i++) {
1427 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1428 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1429 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1430 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1431 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1432 }
1433 }
1434 }
1435
TEST(ROUNDU__NEONV8,negative_normal)1436 TEST(ROUNDU__NEONV8, negative_normal) {
1437 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1438 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1439 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1440 for (uint32_t i = 0; i < kBlockSize; i++) {
1441 inputs[i] = uint32_as_float(n + i);
1442 }
1443 xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1444 for (uint32_t i = 0; i < kBlockSize; i++) {
1445 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1446 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1447 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1448 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1449 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1450 }
1451 }
1452 }
1453
TEST(ROUNDU__NEONV8,positive_integral)1454 TEST(ROUNDU__NEONV8, positive_integral) {
1455 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1456 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1457 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1458 for (uint32_t i = 0; i < kBlockSize; i++) {
1459 inputs[i] = uint32_as_float(n + i);
1460 }
1461 xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1462 for (uint32_t i = 0; i < kBlockSize; i++) {
1463 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1464 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1465 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1466 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1467 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1468 }
1469 }
1470 }
1471
TEST(ROUNDU__NEONV8,negative_integral)1472 TEST(ROUNDU__NEONV8, negative_integral) {
1473 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1474 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1475 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1476 for (uint32_t i = 0; i < kBlockSize; i++) {
1477 inputs[i] = uint32_as_float(n + i);
1478 }
1479 xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1480 for (uint32_t i = 0; i < kBlockSize; i++) {
1481 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1482 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1483 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1484 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1485 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1486 }
1487 }
1488 }
1489
TEST(ROUNDU__NEONV8,positive_infinity)1490 TEST(ROUNDU__NEONV8, positive_infinity) {
1491 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1492 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1493 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1494 xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1495 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1496 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1497 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1498 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1499 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1500 }
1501
TEST(ROUNDU__NEONV8,negative_infinity)1502 TEST(ROUNDU__NEONV8, negative_infinity) {
1503 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1504 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1505 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1506 xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1507 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1508 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1509 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1510 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1511 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1512 }
1513
TEST(ROUNDU__NEONV8,positive_qnan)1514 TEST(ROUNDU__NEONV8, positive_qnan) {
1515 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1516 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1517 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1518 for (uint32_t i = 0; i < kBlockSize; i++) {
1519 inputs[i] = uint32_as_float(n + i);
1520 }
1521 xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1522 for (uint32_t i = 0; i < kBlockSize; i++) {
1523 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1524 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1525 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1526 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1527 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1528 }
1529 }
1530 }
1531
TEST(ROUNDU__NEONV8,negative_qnan)1532 TEST(ROUNDU__NEONV8, negative_qnan) {
1533 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1534 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1535 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1536 for (uint32_t i = 0; i < kBlockSize; i++) {
1537 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1538 }
1539 xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1540 for (uint32_t i = 0; i < kBlockSize; i++) {
1541 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1542 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1543 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1544 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1545 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1546 }
1547 }
1548 }
1549
TEST(ROUNDU__NEONV8,positive_snan)1550 TEST(ROUNDU__NEONV8, positive_snan) {
1551 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1552 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1553 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1554 for (uint32_t i = 0; i < kBlockSize; i++) {
1555 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1556 }
1557 xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1558 for (uint32_t i = 0; i < kBlockSize; i++) {
1559 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1560 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1561 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1562 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1563 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1564 }
1565 }
1566 }
1567
TEST(ROUNDU__NEONV8,negative_snan)1568 TEST(ROUNDU__NEONV8, negative_snan) {
1569 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1570 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1571 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1572 for (uint32_t i = 0; i < kBlockSize; i++) {
1573 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1574 }
1575 xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1576 for (uint32_t i = 0; i < kBlockSize; i++) {
1577 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1578 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1579 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1580 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1581 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1582 }
1583 }
1584 }
1585
TEST(ROUNDU__NEONV8,positive_snan_to_qnan)1586 TEST(ROUNDU__NEONV8, positive_snan_to_qnan) {
1587 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1588 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1589 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1590 for (uint32_t i = 0; i < kBlockSize; i++) {
1591 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1592 }
1593 xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1594 for (uint32_t i = 0; i < kBlockSize; i++) {
1595 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1596 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1597 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1598 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1599 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1600 }
1601 }
1602 }
1603
TEST(ROUNDU__NEONV8,negative_snan_to_qnan)1604 TEST(ROUNDU__NEONV8, negative_snan_to_qnan) {
1605 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1606 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1607 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1608 for (uint32_t i = 0; i < kBlockSize; i++) {
1609 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1610 }
1611 xnn_math_f32_roundu__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1612 for (uint32_t i = 0; i < kBlockSize; i++) {
1613 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1614 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1615 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1616 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1617 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1618 }
1619 }
1620 }
1621 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1622
1623 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(ROUNDU__WASMSIMD_ADDSUB,positive_zero)1624 TEST(ROUNDU__WASMSIMD_ADDSUB, positive_zero) {
1625 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1626 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1627 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
1628 xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1629 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1630 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1631 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1632 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1633 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1634 }
1635
TEST(ROUNDU__WASMSIMD_ADDSUB,negative_zero)1636 TEST(ROUNDU__WASMSIMD_ADDSUB, negative_zero) {
1637 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1638 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1639 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
1640 xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1641 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1642 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1643 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1644 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1645 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1646 }
1647
TEST(ROUNDU__WASMSIMD_ADDSUB,positive_subnormal)1648 TEST(ROUNDU__WASMSIMD_ADDSUB, positive_subnormal) {
1649 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1650 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1651 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
1652 for (uint32_t i = 0; i < kBlockSize; i++) {
1653 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
1654 }
1655 xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1656 for (uint32_t i = 0; i < kBlockSize; i++) {
1657 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1658 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1659 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1660 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1661 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1662 }
1663 }
1664 }
1665
TEST(ROUNDU__WASMSIMD_ADDSUB,negative_subnormal)1666 TEST(ROUNDU__WASMSIMD_ADDSUB, negative_subnormal) {
1667 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1668 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1669 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
1670 for (uint32_t i = 0; i < kBlockSize; i++) {
1671 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
1672 }
1673 xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1674 for (uint32_t i = 0; i < kBlockSize; i++) {
1675 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1676 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1677 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1678 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1679 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1680 }
1681 }
1682 }
1683
TEST(ROUNDU__WASMSIMD_ADDSUB,positive_normal)1684 TEST(ROUNDU__WASMSIMD_ADDSUB, positive_normal) {
1685 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1686 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1687 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1688 for (uint32_t i = 0; i < kBlockSize; i++) {
1689 inputs[i] = uint32_as_float(n + i);
1690 }
1691 xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1692 for (uint32_t i = 0; i < kBlockSize; i++) {
1693 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1694 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1695 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1696 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1697 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1698 }
1699 }
1700 }
1701
TEST(ROUNDU__WASMSIMD_ADDSUB,negative_normal)1702 TEST(ROUNDU__WASMSIMD_ADDSUB, negative_normal) {
1703 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1704 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1705 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1706 for (uint32_t i = 0; i < kBlockSize; i++) {
1707 inputs[i] = uint32_as_float(n + i);
1708 }
1709 xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1710 for (uint32_t i = 0; i < kBlockSize; i++) {
1711 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1712 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1713 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1714 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1715 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1716 }
1717 }
1718 }
1719
TEST(ROUNDU__WASMSIMD_ADDSUB,positive_integral)1720 TEST(ROUNDU__WASMSIMD_ADDSUB, positive_integral) {
1721 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1722 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1723 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1724 for (uint32_t i = 0; i < kBlockSize; i++) {
1725 inputs[i] = uint32_as_float(n + i);
1726 }
1727 xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1728 for (uint32_t i = 0; i < kBlockSize; i++) {
1729 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1730 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1731 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1732 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1733 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1734 }
1735 }
1736 }
1737
TEST(ROUNDU__WASMSIMD_ADDSUB,negative_integral)1738 TEST(ROUNDU__WASMSIMD_ADDSUB, negative_integral) {
1739 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1740 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1741 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1742 for (uint32_t i = 0; i < kBlockSize; i++) {
1743 inputs[i] = uint32_as_float(n + i);
1744 }
1745 xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1746 for (uint32_t i = 0; i < kBlockSize; i++) {
1747 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1748 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1749 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1750 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1751 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1752 }
1753 }
1754 }
1755
TEST(ROUNDU__WASMSIMD_ADDSUB,positive_infinity)1756 TEST(ROUNDU__WASMSIMD_ADDSUB, positive_infinity) {
1757 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1758 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1759 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1760 xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1761 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1762 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1763 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1764 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1765 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1766 }
1767
TEST(ROUNDU__WASMSIMD_ADDSUB,negative_infinity)1768 TEST(ROUNDU__WASMSIMD_ADDSUB, negative_infinity) {
1769 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1770 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1771 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1772 xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1773 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1774 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1775 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1776 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1777 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1778 }
1779
TEST(ROUNDU__WASMSIMD_ADDSUB,positive_qnan)1780 TEST(ROUNDU__WASMSIMD_ADDSUB, positive_qnan) {
1781 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1782 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1783 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1784 for (uint32_t i = 0; i < kBlockSize; i++) {
1785 inputs[i] = uint32_as_float(n + i);
1786 }
1787 xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1788 for (uint32_t i = 0; i < kBlockSize; i++) {
1789 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1790 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1791 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1792 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1793 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1794 }
1795 }
1796 }
1797
TEST(ROUNDU__WASMSIMD_ADDSUB,negative_qnan)1798 TEST(ROUNDU__WASMSIMD_ADDSUB, negative_qnan) {
1799 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1800 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1801 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1802 for (uint32_t i = 0; i < kBlockSize; i++) {
1803 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1804 }
1805 xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1806 for (uint32_t i = 0; i < kBlockSize; i++) {
1807 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1808 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1809 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1810 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1811 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1812 }
1813 }
1814 }
1815
TEST(ROUNDU__WASMSIMD_ADDSUB,positive_snan)1816 TEST(ROUNDU__WASMSIMD_ADDSUB, positive_snan) {
1817 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1818 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1819 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1820 for (uint32_t i = 0; i < kBlockSize; i++) {
1821 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1822 }
1823 xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1824 for (uint32_t i = 0; i < kBlockSize; i++) {
1825 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1826 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1827 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1828 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1829 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1830 }
1831 }
1832 }
1833
TEST(ROUNDU__WASMSIMD_ADDSUB,negative_snan)1834 TEST(ROUNDU__WASMSIMD_ADDSUB, negative_snan) {
1835 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1836 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1837 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1838 for (uint32_t i = 0; i < kBlockSize; i++) {
1839 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1840 }
1841 xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1842 for (uint32_t i = 0; i < kBlockSize; i++) {
1843 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1844 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1845 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1846 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1847 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1848 }
1849 }
1850 }
1851
TEST(ROUNDU__WASMSIMD_ADDSUB,positive_snan_to_qnan)1852 TEST(ROUNDU__WASMSIMD_ADDSUB, positive_snan_to_qnan) {
1853 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1854 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1855 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1856 for (uint32_t i = 0; i < kBlockSize; i++) {
1857 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1858 }
1859 xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1860 for (uint32_t i = 0; i < kBlockSize; i++) {
1861 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1862 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1863 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1864 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1865 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1866 }
1867 }
1868 }
1869
TEST(ROUNDU__WASMSIMD_ADDSUB,negative_snan_to_qnan)1870 TEST(ROUNDU__WASMSIMD_ADDSUB, negative_snan_to_qnan) {
1871 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1872 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1873 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1874 for (uint32_t i = 0; i < kBlockSize; i++) {
1875 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1876 }
1877 xnn_math_f32_roundu__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1878 for (uint32_t i = 0; i < kBlockSize; i++) {
1879 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1880 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1881 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1882 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1883 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1884 }
1885 }
1886 }
1887 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1888
1889 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(ROUNDU__WASMSIMD_CVT,positive_zero)1890 TEST(ROUNDU__WASMSIMD_CVT, positive_zero) {
1891 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1892 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1893 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
1894 xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1895 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1896 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1897 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1898 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1899 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1900 }
1901
TEST(ROUNDU__WASMSIMD_CVT,negative_zero)1902 TEST(ROUNDU__WASMSIMD_CVT, negative_zero) {
1903 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1904 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1905 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
1906 xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1907 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
1908 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1909 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1910 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1911 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1912 }
1913
TEST(ROUNDU__WASMSIMD_CVT,positive_subnormal)1914 TEST(ROUNDU__WASMSIMD_CVT, positive_subnormal) {
1915 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1916 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1917 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
1918 for (uint32_t i = 0; i < kBlockSize; i++) {
1919 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
1920 }
1921 xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1922 for (uint32_t i = 0; i < kBlockSize; i++) {
1923 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1924 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1925 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1926 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1927 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1928 }
1929 }
1930 }
1931
TEST(ROUNDU__WASMSIMD_CVT,negative_subnormal)1932 TEST(ROUNDU__WASMSIMD_CVT, negative_subnormal) {
1933 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1934 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1935 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
1936 for (uint32_t i = 0; i < kBlockSize; i++) {
1937 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
1938 }
1939 xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1940 for (uint32_t i = 0; i < kBlockSize; i++) {
1941 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1942 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1943 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1944 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1945 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1946 }
1947 }
1948 }
1949
TEST(ROUNDU__WASMSIMD_CVT,positive_normal)1950 TEST(ROUNDU__WASMSIMD_CVT, positive_normal) {
1951 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1952 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1953 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1954 for (uint32_t i = 0; i < kBlockSize; i++) {
1955 inputs[i] = uint32_as_float(n + i);
1956 }
1957 xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1958 for (uint32_t i = 0; i < kBlockSize; i++) {
1959 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1960 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1961 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1962 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1963 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1964 }
1965 }
1966 }
1967
TEST(ROUNDU__WASMSIMD_CVT,negative_normal)1968 TEST(ROUNDU__WASMSIMD_CVT, negative_normal) {
1969 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1970 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1971 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1972 for (uint32_t i = 0; i < kBlockSize; i++) {
1973 inputs[i] = uint32_as_float(n + i);
1974 }
1975 xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1976 for (uint32_t i = 0; i < kBlockSize; i++) {
1977 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1978 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1979 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1980 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1981 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1982 }
1983 }
1984 }
1985
TEST(ROUNDU__WASMSIMD_CVT,positive_integral)1986 TEST(ROUNDU__WASMSIMD_CVT, positive_integral) {
1987 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1988 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1989 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1990 for (uint32_t i = 0; i < kBlockSize; i++) {
1991 inputs[i] = uint32_as_float(n + i);
1992 }
1993 xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1994 for (uint32_t i = 0; i < kBlockSize; i++) {
1995 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
1996 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1997 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1998 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1999 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2000 }
2001 }
2002 }
2003
TEST(ROUNDU__WASMSIMD_CVT,negative_integral)2004 TEST(ROUNDU__WASMSIMD_CVT, negative_integral) {
2005 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2006 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2007 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2008 for (uint32_t i = 0; i < kBlockSize; i++) {
2009 inputs[i] = uint32_as_float(n + i);
2010 }
2011 xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2012 for (uint32_t i = 0; i < kBlockSize; i++) {
2013 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2014 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2015 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2016 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2017 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2018 }
2019 }
2020 }
2021
TEST(ROUNDU__WASMSIMD_CVT,positive_infinity)2022 TEST(ROUNDU__WASMSIMD_CVT, positive_infinity) {
2023 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2024 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2025 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
2026 xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2027 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2028 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2029 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2030 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2031 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2032 }
2033
TEST(ROUNDU__WASMSIMD_CVT,negative_infinity)2034 TEST(ROUNDU__WASMSIMD_CVT, negative_infinity) {
2035 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2036 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2037 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
2038 xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2039 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2040 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2041 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2042 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2043 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2044 }
2045
TEST(ROUNDU__WASMSIMD_CVT,positive_qnan)2046 TEST(ROUNDU__WASMSIMD_CVT, positive_qnan) {
2047 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2048 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2049 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2050 for (uint32_t i = 0; i < kBlockSize; i++) {
2051 inputs[i] = uint32_as_float(n + i);
2052 }
2053 xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2054 for (uint32_t i = 0; i < kBlockSize; i++) {
2055 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2056 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2057 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2058 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2059 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2060 }
2061 }
2062 }
2063
TEST(ROUNDU__WASMSIMD_CVT,negative_qnan)2064 TEST(ROUNDU__WASMSIMD_CVT, negative_qnan) {
2065 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2066 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2067 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2068 for (uint32_t i = 0; i < kBlockSize; i++) {
2069 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
2070 }
2071 xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2072 for (uint32_t i = 0; i < kBlockSize; i++) {
2073 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2074 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2075 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2076 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2077 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2078 }
2079 }
2080 }
2081
TEST(ROUNDU__WASMSIMD_CVT,positive_snan)2082 TEST(ROUNDU__WASMSIMD_CVT, positive_snan) {
2083 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2084 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2085 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2086 for (uint32_t i = 0; i < kBlockSize; i++) {
2087 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2088 }
2089 xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2090 for (uint32_t i = 0; i < kBlockSize; i++) {
2091 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2092 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2093 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2094 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2095 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2096 }
2097 }
2098 }
2099
TEST(ROUNDU__WASMSIMD_CVT,negative_snan)2100 TEST(ROUNDU__WASMSIMD_CVT, negative_snan) {
2101 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2102 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2103 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2104 for (uint32_t i = 0; i < kBlockSize; i++) {
2105 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2106 }
2107 xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2108 for (uint32_t i = 0; i < kBlockSize; i++) {
2109 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2110 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2111 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2112 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2113 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2114 }
2115 }
2116 }
2117
TEST(ROUNDU__WASMSIMD_CVT,positive_snan_to_qnan)2118 TEST(ROUNDU__WASMSIMD_CVT, positive_snan_to_qnan) {
2119 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2120 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2121 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2122 for (uint32_t i = 0; i < kBlockSize; i++) {
2123 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2124 }
2125 xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2126 for (uint32_t i = 0; i < kBlockSize; i++) {
2127 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2128 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2129 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2130 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2131 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2132 }
2133 }
2134 }
2135
TEST(ROUNDU__WASMSIMD_CVT,negative_snan_to_qnan)2136 TEST(ROUNDU__WASMSIMD_CVT, negative_snan_to_qnan) {
2137 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2138 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2139 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2140 for (uint32_t i = 0; i < kBlockSize; i++) {
2141 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2142 }
2143 xnn_math_f32_roundu__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2144 for (uint32_t i = 0; i < kBlockSize; i++) {
2145 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2146 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2147 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2148 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2149 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2150 }
2151 }
2152 }
2153 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2154
2155 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(ROUNDU__WASMSIMD_NATIVE,positive_zero)2156 TEST(ROUNDU__WASMSIMD_NATIVE, positive_zero) {
2157 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2158 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2159 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
2160 xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2161 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2162 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2163 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2164 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2165 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2166 }
2167
TEST(ROUNDU__WASMSIMD_NATIVE,negative_zero)2168 TEST(ROUNDU__WASMSIMD_NATIVE, negative_zero) {
2169 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2170 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2171 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
2172 xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2173 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2174 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2175 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2176 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2177 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2178 }
2179
TEST(ROUNDU__WASMSIMD_NATIVE,positive_subnormal)2180 TEST(ROUNDU__WASMSIMD_NATIVE, positive_subnormal) {
2181 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2182 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2183 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
2184 for (uint32_t i = 0; i < kBlockSize; i++) {
2185 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
2186 }
2187 xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2188 for (uint32_t i = 0; i < kBlockSize; i++) {
2189 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2190 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2191 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2192 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2193 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2194 }
2195 }
2196 }
2197
TEST(ROUNDU__WASMSIMD_NATIVE,negative_subnormal)2198 TEST(ROUNDU__WASMSIMD_NATIVE, negative_subnormal) {
2199 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2200 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2201 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
2202 for (uint32_t i = 0; i < kBlockSize; i++) {
2203 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
2204 }
2205 xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2206 for (uint32_t i = 0; i < kBlockSize; i++) {
2207 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2208 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2209 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2210 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2211 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2212 }
2213 }
2214 }
2215
TEST(ROUNDU__WASMSIMD_NATIVE,positive_normal)2216 TEST(ROUNDU__WASMSIMD_NATIVE, positive_normal) {
2217 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2218 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2219 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
2220 for (uint32_t i = 0; i < kBlockSize; i++) {
2221 inputs[i] = uint32_as_float(n + i);
2222 }
2223 xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2224 for (uint32_t i = 0; i < kBlockSize; i++) {
2225 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2226 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2227 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2228 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2229 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2230 }
2231 }
2232 }
2233
TEST(ROUNDU__WASMSIMD_NATIVE,negative_normal)2234 TEST(ROUNDU__WASMSIMD_NATIVE, negative_normal) {
2235 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2236 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2237 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
2238 for (uint32_t i = 0; i < kBlockSize; i++) {
2239 inputs[i] = uint32_as_float(n + i);
2240 }
2241 xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2242 for (uint32_t i = 0; i < kBlockSize; i++) {
2243 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2244 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2245 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2246 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2247 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2248 }
2249 }
2250 }
2251
TEST(ROUNDU__WASMSIMD_NATIVE,positive_integral)2252 TEST(ROUNDU__WASMSIMD_NATIVE, positive_integral) {
2253 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2254 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2255 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
2256 for (uint32_t i = 0; i < kBlockSize; i++) {
2257 inputs[i] = uint32_as_float(n + i);
2258 }
2259 xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2260 for (uint32_t i = 0; i < kBlockSize; i++) {
2261 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2262 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2263 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2264 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2265 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2266 }
2267 }
2268 }
2269
TEST(ROUNDU__WASMSIMD_NATIVE,negative_integral)2270 TEST(ROUNDU__WASMSIMD_NATIVE, negative_integral) {
2271 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2272 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2273 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2274 for (uint32_t i = 0; i < kBlockSize; i++) {
2275 inputs[i] = uint32_as_float(n + i);
2276 }
2277 xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2278 for (uint32_t i = 0; i < kBlockSize; i++) {
2279 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2280 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2281 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2282 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2283 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2284 }
2285 }
2286 }
2287
TEST(ROUNDU__WASMSIMD_NATIVE,positive_infinity)2288 TEST(ROUNDU__WASMSIMD_NATIVE, positive_infinity) {
2289 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2290 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2291 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
2292 xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2293 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2294 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2295 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2296 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2297 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2298 }
2299
TEST(ROUNDU__WASMSIMD_NATIVE,negative_infinity)2300 TEST(ROUNDU__WASMSIMD_NATIVE, negative_infinity) {
2301 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2302 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2303 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
2304 xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2305 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2306 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2307 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2308 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2309 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2310 }
2311
TEST(ROUNDU__WASMSIMD_NATIVE,positive_qnan)2312 TEST(ROUNDU__WASMSIMD_NATIVE, positive_qnan) {
2313 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2314 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2315 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2316 for (uint32_t i = 0; i < kBlockSize; i++) {
2317 inputs[i] = uint32_as_float(n + i);
2318 }
2319 xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2320 for (uint32_t i = 0; i < kBlockSize; i++) {
2321 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2322 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2323 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2324 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2325 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2326 }
2327 }
2328 }
2329
TEST(ROUNDU__WASMSIMD_NATIVE,negative_qnan)2330 TEST(ROUNDU__WASMSIMD_NATIVE, negative_qnan) {
2331 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2332 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2333 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2334 for (uint32_t i = 0; i < kBlockSize; i++) {
2335 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
2336 }
2337 xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2338 for (uint32_t i = 0; i < kBlockSize; i++) {
2339 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2340 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2341 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2342 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2343 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2344 }
2345 }
2346 }
2347
TEST(ROUNDU__WASMSIMD_NATIVE,positive_snan)2348 TEST(ROUNDU__WASMSIMD_NATIVE, positive_snan) {
2349 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2350 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2351 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2352 for (uint32_t i = 0; i < kBlockSize; i++) {
2353 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2354 }
2355 xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2356 for (uint32_t i = 0; i < kBlockSize; i++) {
2357 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2358 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2359 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2360 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2361 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2362 }
2363 }
2364 }
2365
TEST(ROUNDU__WASMSIMD_NATIVE,negative_snan)2366 TEST(ROUNDU__WASMSIMD_NATIVE, negative_snan) {
2367 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2368 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2369 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2370 for (uint32_t i = 0; i < kBlockSize; i++) {
2371 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2372 }
2373 xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2374 for (uint32_t i = 0; i < kBlockSize; i++) {
2375 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2376 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2377 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2378 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2379 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2380 }
2381 }
2382 }
2383
TEST(ROUNDU__WASMSIMD_NATIVE,positive_snan_to_qnan)2384 TEST(ROUNDU__WASMSIMD_NATIVE, positive_snan_to_qnan) {
2385 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2386 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2387 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2388 for (uint32_t i = 0; i < kBlockSize; i++) {
2389 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2390 }
2391 xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2392 for (uint32_t i = 0; i < kBlockSize; i++) {
2393 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2394 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2395 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2396 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2397 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2398 }
2399 }
2400 }
2401
TEST(ROUNDU__WASMSIMD_NATIVE,negative_snan_to_qnan)2402 TEST(ROUNDU__WASMSIMD_NATIVE, negative_snan_to_qnan) {
2403 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2404 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2405 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2406 for (uint32_t i = 0; i < kBlockSize; i++) {
2407 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2408 }
2409 xnn_math_f32_roundu__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2410 for (uint32_t i = 0; i < kBlockSize; i++) {
2411 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2412 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2413 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2414 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2415 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2416 }
2417 }
2418 }
2419 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2420
TEST(ROUNDU__SCALAR_ADDSUB,positive_zero)2421 TEST(ROUNDU__SCALAR_ADDSUB, positive_zero) {
2422 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2423 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2424 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
2425 xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2426 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2427 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2428 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2429 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2430 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2431 }
2432
TEST(ROUNDU__SCALAR_ADDSUB,negative_zero)2433 TEST(ROUNDU__SCALAR_ADDSUB, negative_zero) {
2434 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2435 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2436 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
2437 xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2438 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2439 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2440 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2441 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2442 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2443 }
2444
TEST(ROUNDU__SCALAR_ADDSUB,positive_subnormal)2445 TEST(ROUNDU__SCALAR_ADDSUB, positive_subnormal) {
2446 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2447 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2448 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
2449 for (uint32_t i = 0; i < kBlockSize; i++) {
2450 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
2451 }
2452 xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2453 for (uint32_t i = 0; i < kBlockSize; i++) {
2454 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2455 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2456 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2457 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2458 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2459 }
2460 }
2461 }
2462
TEST(ROUNDU__SCALAR_ADDSUB,negative_subnormal)2463 TEST(ROUNDU__SCALAR_ADDSUB, negative_subnormal) {
2464 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2465 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2466 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
2467 for (uint32_t i = 0; i < kBlockSize; i++) {
2468 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
2469 }
2470 xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2471 for (uint32_t i = 0; i < kBlockSize; i++) {
2472 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2473 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2474 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2475 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2476 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2477 }
2478 }
2479 }
2480
TEST(ROUNDU__SCALAR_ADDSUB,positive_normal)2481 TEST(ROUNDU__SCALAR_ADDSUB, positive_normal) {
2482 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2483 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2484 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
2485 for (uint32_t i = 0; i < kBlockSize; i++) {
2486 inputs[i] = uint32_as_float(n + i);
2487 }
2488 xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2489 for (uint32_t i = 0; i < kBlockSize; i++) {
2490 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2491 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2492 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2493 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2494 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2495 }
2496 }
2497 }
2498
TEST(ROUNDU__SCALAR_ADDSUB,negative_normal)2499 TEST(ROUNDU__SCALAR_ADDSUB, negative_normal) {
2500 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2501 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2502 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
2503 for (uint32_t i = 0; i < kBlockSize; i++) {
2504 inputs[i] = uint32_as_float(n + i);
2505 }
2506 xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2507 for (uint32_t i = 0; i < kBlockSize; i++) {
2508 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2509 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2510 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2511 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2512 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2513 }
2514 }
2515 }
2516
TEST(ROUNDU__SCALAR_ADDSUB,positive_integral)2517 TEST(ROUNDU__SCALAR_ADDSUB, positive_integral) {
2518 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2519 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2520 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
2521 for (uint32_t i = 0; i < kBlockSize; i++) {
2522 inputs[i] = uint32_as_float(n + i);
2523 }
2524 xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2525 for (uint32_t i = 0; i < kBlockSize; i++) {
2526 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2527 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2528 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2529 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2530 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2531 }
2532 }
2533 }
2534
TEST(ROUNDU__SCALAR_ADDSUB,negative_integral)2535 TEST(ROUNDU__SCALAR_ADDSUB, negative_integral) {
2536 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2537 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2538 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2539 for (uint32_t i = 0; i < kBlockSize; i++) {
2540 inputs[i] = uint32_as_float(n + i);
2541 }
2542 xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2543 for (uint32_t i = 0; i < kBlockSize; i++) {
2544 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2545 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2546 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2547 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2548 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2549 }
2550 }
2551 }
2552
TEST(ROUNDU__SCALAR_ADDSUB,positive_infinity)2553 TEST(ROUNDU__SCALAR_ADDSUB, positive_infinity) {
2554 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2555 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2556 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
2557 xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2558 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2559 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2560 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2561 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2562 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2563 }
2564
TEST(ROUNDU__SCALAR_ADDSUB,negative_infinity)2565 TEST(ROUNDU__SCALAR_ADDSUB, negative_infinity) {
2566 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2567 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2568 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
2569 xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2570 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2571 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2572 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2573 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2574 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2575 }
2576
TEST(ROUNDU__SCALAR_ADDSUB,positive_qnan)2577 TEST(ROUNDU__SCALAR_ADDSUB, positive_qnan) {
2578 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2579 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2580 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2581 for (uint32_t i = 0; i < kBlockSize; i++) {
2582 inputs[i] = uint32_as_float(n + i);
2583 }
2584 xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2585 for (uint32_t i = 0; i < kBlockSize; i++) {
2586 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2587 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2588 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2589 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2590 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2591 }
2592 }
2593 }
2594
TEST(ROUNDU__SCALAR_ADDSUB,negative_qnan)2595 TEST(ROUNDU__SCALAR_ADDSUB, negative_qnan) {
2596 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2597 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2598 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2599 for (uint32_t i = 0; i < kBlockSize; i++) {
2600 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
2601 }
2602 xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2603 for (uint32_t i = 0; i < kBlockSize; i++) {
2604 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2605 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2606 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2607 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2608 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2609 }
2610 }
2611 }
2612
TEST(ROUNDU__SCALAR_ADDSUB,positive_snan)2613 TEST(ROUNDU__SCALAR_ADDSUB, positive_snan) {
2614 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2615 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2616 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2617 for (uint32_t i = 0; i < kBlockSize; i++) {
2618 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2619 }
2620 xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2621 for (uint32_t i = 0; i < kBlockSize; i++) {
2622 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2623 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2624 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2625 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2626 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2627 }
2628 }
2629 }
2630
TEST(ROUNDU__SCALAR_ADDSUB,negative_snan)2631 TEST(ROUNDU__SCALAR_ADDSUB, negative_snan) {
2632 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2633 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2634 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2635 for (uint32_t i = 0; i < kBlockSize; i++) {
2636 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2637 }
2638 xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2639 for (uint32_t i = 0; i < kBlockSize; i++) {
2640 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2641 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2642 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2643 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2644 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2645 }
2646 }
2647 }
2648
TEST(ROUNDU__SCALAR_ADDSUB,positive_snan_to_qnan)2649 TEST(ROUNDU__SCALAR_ADDSUB, positive_snan_to_qnan) {
2650 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2651 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2652 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2653 for (uint32_t i = 0; i < kBlockSize; i++) {
2654 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2655 }
2656 xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2657 for (uint32_t i = 0; i < kBlockSize; i++) {
2658 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2659 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2660 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2661 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2662 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2663 }
2664 }
2665 }
2666
TEST(ROUNDU__SCALAR_ADDSUB,negative_snan_to_qnan)2667 TEST(ROUNDU__SCALAR_ADDSUB, negative_snan_to_qnan) {
2668 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2669 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2670 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2671 for (uint32_t i = 0; i < kBlockSize; i++) {
2672 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2673 }
2674 xnn_math_f32_roundu__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2675 for (uint32_t i = 0; i < kBlockSize; i++) {
2676 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2677 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2678 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2679 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2680 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2681 }
2682 }
2683 }
2684
TEST(ROUNDU__SCALAR_CVT,positive_zero)2685 TEST(ROUNDU__SCALAR_CVT, positive_zero) {
2686 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2687 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2688 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x00000000));
2689 xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2690 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2691 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2692 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2693 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2694 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2695 }
2696
TEST(ROUNDU__SCALAR_CVT,negative_zero)2697 TEST(ROUNDU__SCALAR_CVT, negative_zero) {
2698 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2699 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2700 std::fill(inputs.begin(), inputs.end(), UINT32_C(0x80000000));
2701 xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2702 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2703 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2704 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2705 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2706 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2707 }
2708
TEST(ROUNDU__SCALAR_CVT,positive_subnormal)2709 TEST(ROUNDU__SCALAR_CVT, positive_subnormal) {
2710 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2711 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2712 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x00800000); n += kBlockSize) {
2713 for (uint32_t i = 0; i < kBlockSize; i++) {
2714 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x00000001)));
2715 }
2716 xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2717 for (uint32_t i = 0; i < kBlockSize; i++) {
2718 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2719 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2720 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2721 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2722 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2723 }
2724 }
2725 }
2726
TEST(ROUNDU__SCALAR_CVT,negative_subnormal)2727 TEST(ROUNDU__SCALAR_CVT, negative_subnormal) {
2728 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2729 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2730 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0x80800000); n += kBlockSize) {
2731 for (uint32_t i = 0; i < kBlockSize; i++) {
2732 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x80000001)));
2733 }
2734 xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2735 for (uint32_t i = 0; i < kBlockSize; i++) {
2736 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2737 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2738 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2739 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2740 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2741 }
2742 }
2743 }
2744
TEST(ROUNDU__SCALAR_CVT,positive_normal)2745 TEST(ROUNDU__SCALAR_CVT, positive_normal) {
2746 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2747 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2748 for (uint32_t n = UINT32_C(0x00800000); n < UINT32_C(0x4B800000); n += kBlockSize) {
2749 for (uint32_t i = 0; i < kBlockSize; i++) {
2750 inputs[i] = uint32_as_float(n + i);
2751 }
2752 xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2753 for (uint32_t i = 0; i < kBlockSize; i++) {
2754 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2755 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2756 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2757 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2758 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2759 }
2760 }
2761 }
2762
TEST(ROUNDU__SCALAR_CVT,negative_normal)2763 TEST(ROUNDU__SCALAR_CVT, negative_normal) {
2764 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2765 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2766 for (uint32_t n = UINT32_C(0x80800000); n < UINT32_C(0xCB800000); n += kBlockSize) {
2767 for (uint32_t i = 0; i < kBlockSize; i++) {
2768 inputs[i] = uint32_as_float(n + i);
2769 }
2770 xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2771 for (uint32_t i = 0; i < kBlockSize; i++) {
2772 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2773 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2774 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2775 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2776 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2777 }
2778 }
2779 }
2780
TEST(ROUNDU__SCALAR_CVT,positive_integral)2781 TEST(ROUNDU__SCALAR_CVT, positive_integral) {
2782 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2783 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2784 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
2785 for (uint32_t i = 0; i < kBlockSize; i++) {
2786 inputs[i] = uint32_as_float(n + i);
2787 }
2788 xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2789 for (uint32_t i = 0; i < kBlockSize; i++) {
2790 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2791 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2792 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2793 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2794 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2795 }
2796 }
2797 }
2798
TEST(ROUNDU__SCALAR_CVT,negative_integral)2799 TEST(ROUNDU__SCALAR_CVT, negative_integral) {
2800 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2801 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2802 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
2803 for (uint32_t i = 0; i < kBlockSize; i++) {
2804 inputs[i] = uint32_as_float(n + i);
2805 }
2806 xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2807 for (uint32_t i = 0; i < kBlockSize; i++) {
2808 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2809 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2810 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2811 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2812 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2813 }
2814 }
2815 }
2816
TEST(ROUNDU__SCALAR_CVT,positive_infinity)2817 TEST(ROUNDU__SCALAR_CVT, positive_infinity) {
2818 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2819 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2820 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
2821 xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2822 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2823 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2824 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2825 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2826 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2827 }
2828
TEST(ROUNDU__SCALAR_CVT,negative_infinity)2829 TEST(ROUNDU__SCALAR_CVT, negative_infinity) {
2830 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2831 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2832 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
2833 xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2834 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[0]));
2835 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
2836 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
2837 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2838 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
2839 }
2840
TEST(ROUNDU__SCALAR_CVT,positive_qnan)2841 TEST(ROUNDU__SCALAR_CVT, positive_qnan) {
2842 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2843 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2844 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2845 for (uint32_t i = 0; i < kBlockSize; i++) {
2846 inputs[i] = uint32_as_float(n + i);
2847 }
2848 xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2849 for (uint32_t i = 0; i < kBlockSize; i++) {
2850 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2851 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2852 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2853 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2854 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2855 }
2856 }
2857 }
2858
TEST(ROUNDU__SCALAR_CVT,negative_qnan)2859 TEST(ROUNDU__SCALAR_CVT, negative_qnan) {
2860 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2861 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2862 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
2863 for (uint32_t i = 0; i < kBlockSize; i++) {
2864 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
2865 }
2866 xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2867 for (uint32_t i = 0; i < kBlockSize; i++) {
2868 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2869 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2870 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2871 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2872 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2873 }
2874 }
2875 }
2876
TEST(ROUNDU__SCALAR_CVT,positive_snan)2877 TEST(ROUNDU__SCALAR_CVT, positive_snan) {
2878 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2879 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2880 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2881 for (uint32_t i = 0; i < kBlockSize; i++) {
2882 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2883 }
2884 xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2885 for (uint32_t i = 0; i < kBlockSize; i++) {
2886 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2887 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2888 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2889 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2890 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2891 }
2892 }
2893 }
2894
TEST(ROUNDU__SCALAR_CVT,negative_snan)2895 TEST(ROUNDU__SCALAR_CVT, negative_snan) {
2896 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2897 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2898 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2899 for (uint32_t i = 0; i < kBlockSize; i++) {
2900 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2901 }
2902 xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2903 for (uint32_t i = 0; i < kBlockSize; i++) {
2904 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2905 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
2906 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2907 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2908 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2909 }
2910 }
2911 }
2912
TEST(ROUNDU__SCALAR_CVT,positive_snan_to_qnan)2913 TEST(ROUNDU__SCALAR_CVT, positive_snan_to_qnan) {
2914 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2915 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2916 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2917 for (uint32_t i = 0; i < kBlockSize; i++) {
2918 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2919 }
2920 xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2921 for (uint32_t i = 0; i < kBlockSize; i++) {
2922 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2923 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2924 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2925 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2926 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2927 }
2928 }
2929 }
2930
TEST(ROUNDU__SCALAR_CVT,negative_snan_to_qnan)2931 TEST(ROUNDU__SCALAR_CVT, negative_snan_to_qnan) {
2932 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
2933 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
2934 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
2935 for (uint32_t i = 0; i < kBlockSize; i++) {
2936 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
2937 }
2938 xnn_math_f32_roundu__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
2939 for (uint32_t i = 0; i < kBlockSize; i++) {
2940 const uint32_t reference_output = float_as_uint32(std::ceil(inputs[i]));
2941 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
2942 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
2943 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
2944 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
2945 }
2946 }
2947 }
2948