• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <ios>
13 #include <vector>
14 
15 #include <gtest/gtest.h>
16 
17 #include <fp16.h>
18 
19 #include <xnnpack/aligned-allocator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/isa-checks.h>
22 #include <xnnpack/math.h>
23 #include <xnnpack/math-stubs.h>
24 
25 
26 constexpr int kBlockSize = 1024;
27 
28 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(CVT__SSE2_INT16,positive_normal)29   TEST(CVT__SSE2_INT16, positive_normal) {
30     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
31     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
32     for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) {
33       for (uint16_t i = 0; i < kBlockSize; i++) {
34         inputs[i] = n + i;
35       }
36       xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
37       for (uint32_t i = 0; i < kBlockSize; i++) {
38         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
39         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
40           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
41           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
42           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
43       }
44     }
45   }
46 
TEST(CVT__SSE2_INT16,negative_normal)47   TEST(CVT__SSE2_INT16, negative_normal) {
48     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
49     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
50     for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) {
51       for (uint16_t i = 0; i < kBlockSize; i++) {
52         inputs[i] = n + i;
53       }
54       xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
55       for (uint32_t i = 0; i < kBlockSize; i++) {
56         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
57         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
58           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
59           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
60           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
61       }
62     }
63   }
64 
TEST(CVT__SSE2_INT16,positive_zero)65   TEST(CVT__SSE2_INT16, positive_zero) {
66     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
67     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
68     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000));
69     xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
70     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
71     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
72       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
73       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
74       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
75   }
76 
TEST(CVT__SSE2_INT16,negative_zero)77   TEST(CVT__SSE2_INT16, negative_zero) {
78     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
79     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
80     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000));
81     xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
82     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
83     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
84       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
85       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
86       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
87   }
88 
TEST(CVT__SSE2_INT16,positive_subnormal)89   TEST(CVT__SSE2_INT16, positive_subnormal) {
90     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
91     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
92     for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) {
93       for (uint16_t i = 0; i < kBlockSize; i++) {
94         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001));
95       }
96       xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
97       for (uint32_t i = 0; i < kBlockSize; i++) {
98         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
99         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
100           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
101           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
102           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
103       }
104     }
105   }
106 
TEST(CVT__SSE2_INT16,negative_subnormal)107   TEST(CVT__SSE2_INT16, negative_subnormal) {
108     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
109     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
110     for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) {
111       for (uint16_t i = 0; i < kBlockSize; i++) {
112         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001));
113       }
114       xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
115       for (uint32_t i = 0; i < kBlockSize; i++) {
116         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
117         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
118           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
119           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
120           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
121       }
122     }
123   }
124 
TEST(CVT__SSE2_INT16,positive_infinity)125   TEST(CVT__SSE2_INT16, positive_infinity) {
126     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
127     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
128     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00));
129     xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
130     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
131     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
132       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
133       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
134       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
135   }
136 
TEST(CVT__SSE2_INT16,negative_infinity)137   TEST(CVT__SSE2_INT16, negative_infinity) {
138     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
139     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
140     std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00));
141     xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
142     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
143     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
144       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
145       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
146       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
147   }
148 
TEST(CVT__SSE2_INT16,positive_nan)149   TEST(CVT__SSE2_INT16, positive_nan) {
150     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
151     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
152     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
153       for (uint16_t i = 0; i < kBlockSize; i++) {
154         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01));
155       }
156       xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
157       for (uint32_t i = 0; i < kBlockSize; i++) {
158         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
159         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
160           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
161           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
162           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
163       }
164     }
165   }
166 
TEST(CVT__SSE2_INT16,negative_nan)167   TEST(CVT__SSE2_INT16, negative_nan) {
168     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
169     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
170     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
171       for (uint16_t i = 0; i < kBlockSize; i++) {
172         inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01));
173       }
174       xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
175       for (uint32_t i = 0; i < kBlockSize; i++) {
176         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
177         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
178           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
179           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
180           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
181       }
182     }
183   }
184 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
185 
186 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(CVT__SSE2_INT32,positive_normal)187   TEST(CVT__SSE2_INT32, positive_normal) {
188     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
189     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
190     for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) {
191       for (uint16_t i = 0; i < kBlockSize; i++) {
192         inputs[i] = n + i;
193       }
194       xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
195       for (uint32_t i = 0; i < kBlockSize; i++) {
196         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
197         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
198           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
199           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
200           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
201       }
202     }
203   }
204 
TEST(CVT__SSE2_INT32,negative_normal)205   TEST(CVT__SSE2_INT32, negative_normal) {
206     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
207     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
208     for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) {
209       for (uint16_t i = 0; i < kBlockSize; i++) {
210         inputs[i] = n + i;
211       }
212       xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
213       for (uint32_t i = 0; i < kBlockSize; i++) {
214         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
215         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
216           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
217           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
218           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
219       }
220     }
221   }
222 
TEST(CVT__SSE2_INT32,positive_zero)223   TEST(CVT__SSE2_INT32, positive_zero) {
224     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
225     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
226     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000));
227     xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
228     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
229     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
230       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
231       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
232       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
233   }
234 
TEST(CVT__SSE2_INT32,negative_zero)235   TEST(CVT__SSE2_INT32, negative_zero) {
236     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
237     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
238     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000));
239     xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
240     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
241     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
242       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
243       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
244       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
245   }
246 
TEST(CVT__SSE2_INT32,positive_subnormal)247   TEST(CVT__SSE2_INT32, positive_subnormal) {
248     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
249     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
250     for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) {
251       for (uint16_t i = 0; i < kBlockSize; i++) {
252         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001));
253       }
254       xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
255       for (uint32_t i = 0; i < kBlockSize; i++) {
256         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
257         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
258           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
259           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
260           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
261       }
262     }
263   }
264 
TEST(CVT__SSE2_INT32,negative_subnormal)265   TEST(CVT__SSE2_INT32, negative_subnormal) {
266     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
267     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
268     for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) {
269       for (uint16_t i = 0; i < kBlockSize; i++) {
270         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001));
271       }
272       xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
273       for (uint32_t i = 0; i < kBlockSize; i++) {
274         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
275         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
276           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
277           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
278           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
279       }
280     }
281   }
282 
TEST(CVT__SSE2_INT32,positive_infinity)283   TEST(CVT__SSE2_INT32, positive_infinity) {
284     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
285     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
286     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00));
287     xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
288     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
289     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
290       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
291       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
292       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
293   }
294 
TEST(CVT__SSE2_INT32,negative_infinity)295   TEST(CVT__SSE2_INT32, negative_infinity) {
296     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
297     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
298     std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00));
299     xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
300     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
301     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
302       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
303       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
304       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
305   }
306 
TEST(CVT__SSE2_INT32,positive_nan)307   TEST(CVT__SSE2_INT32, positive_nan) {
308     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
309     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
310     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
311       for (uint16_t i = 0; i < kBlockSize; i++) {
312         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01));
313       }
314       xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
315       for (uint32_t i = 0; i < kBlockSize; i++) {
316         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
317         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
318           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
319           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
320           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
321       }
322     }
323   }
324 
TEST(CVT__SSE2_INT32,negative_nan)325   TEST(CVT__SSE2_INT32, negative_nan) {
326     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
327     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
328     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
329       for (uint16_t i = 0; i < kBlockSize; i++) {
330         inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01));
331       }
332       xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
333       for (uint32_t i = 0; i < kBlockSize; i++) {
334         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
335         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
336           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
337           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
338           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
339       }
340     }
341   }
342 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
343 
344 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(CVT__SSE41_INT16,positive_normal)345   TEST(CVT__SSE41_INT16, positive_normal) {
346     TEST_REQUIRES_X86_SSE41;
347 
348     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
349     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
350     for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) {
351       for (uint16_t i = 0; i < kBlockSize; i++) {
352         inputs[i] = n + i;
353       }
354       xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
355       for (uint32_t i = 0; i < kBlockSize; i++) {
356         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
357         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
358           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
359           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
360           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
361       }
362     }
363   }
364 
TEST(CVT__SSE41_INT16,negative_normal)365   TEST(CVT__SSE41_INT16, negative_normal) {
366     TEST_REQUIRES_X86_SSE41;
367 
368     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
369     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
370     for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) {
371       for (uint16_t i = 0; i < kBlockSize; i++) {
372         inputs[i] = n + i;
373       }
374       xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
375       for (uint32_t i = 0; i < kBlockSize; i++) {
376         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
377         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
378           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
379           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
380           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
381       }
382     }
383   }
384 
TEST(CVT__SSE41_INT16,positive_zero)385   TEST(CVT__SSE41_INT16, positive_zero) {
386     TEST_REQUIRES_X86_SSE41;
387 
388     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
389     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
390     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000));
391     xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
392     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
393     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
394       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
395       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
396       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
397   }
398 
TEST(CVT__SSE41_INT16,negative_zero)399   TEST(CVT__SSE41_INT16, negative_zero) {
400     TEST_REQUIRES_X86_SSE41;
401 
402     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
403     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
404     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000));
405     xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
406     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
407     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
408       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
409       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
410       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
411   }
412 
TEST(CVT__SSE41_INT16,positive_subnormal)413   TEST(CVT__SSE41_INT16, positive_subnormal) {
414     TEST_REQUIRES_X86_SSE41;
415 
416     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
417     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
418     for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) {
419       for (uint16_t i = 0; i < kBlockSize; i++) {
420         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001));
421       }
422       xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
423       for (uint32_t i = 0; i < kBlockSize; i++) {
424         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
425         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
426           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
427           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
428           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
429       }
430     }
431   }
432 
TEST(CVT__SSE41_INT16,negative_subnormal)433   TEST(CVT__SSE41_INT16, negative_subnormal) {
434     TEST_REQUIRES_X86_SSE41;
435 
436     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
437     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
438     for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) {
439       for (uint16_t i = 0; i < kBlockSize; i++) {
440         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001));
441       }
442       xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
443       for (uint32_t i = 0; i < kBlockSize; i++) {
444         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
445         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
446           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
447           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
448           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
449       }
450     }
451   }
452 
TEST(CVT__SSE41_INT16,positive_infinity)453   TEST(CVT__SSE41_INT16, positive_infinity) {
454     TEST_REQUIRES_X86_SSE41;
455 
456     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
457     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
458     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00));
459     xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
460     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
461     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
462       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
463       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
464       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
465   }
466 
TEST(CVT__SSE41_INT16,negative_infinity)467   TEST(CVT__SSE41_INT16, negative_infinity) {
468     TEST_REQUIRES_X86_SSE41;
469 
470     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
471     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
472     std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00));
473     xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
474     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
475     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
476       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
477       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
478       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
479   }
480 
TEST(CVT__SSE41_INT16,positive_nan)481   TEST(CVT__SSE41_INT16, positive_nan) {
482     TEST_REQUIRES_X86_SSE41;
483 
484     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
485     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
486     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
487       for (uint16_t i = 0; i < kBlockSize; i++) {
488         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01));
489       }
490       xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
491       for (uint32_t i = 0; i < kBlockSize; i++) {
492         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
493         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
494           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
495           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
496           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
497       }
498     }
499   }
500 
TEST(CVT__SSE41_INT16,negative_nan)501   TEST(CVT__SSE41_INT16, negative_nan) {
502     TEST_REQUIRES_X86_SSE41;
503 
504     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
505     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
506     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
507       for (uint16_t i = 0; i < kBlockSize; i++) {
508         inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01));
509       }
510       xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
511       for (uint32_t i = 0; i < kBlockSize; i++) {
512         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
513         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
514           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
515           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
516           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
517       }
518     }
519   }
520 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
521 
522 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(CVT__SSE41_INT32,positive_normal)523   TEST(CVT__SSE41_INT32, positive_normal) {
524     TEST_REQUIRES_X86_SSE41;
525 
526     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
527     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
528     for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) {
529       for (uint16_t i = 0; i < kBlockSize; i++) {
530         inputs[i] = n + i;
531       }
532       xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
533       for (uint32_t i = 0; i < kBlockSize; i++) {
534         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
535         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
536           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
537           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
538           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
539       }
540     }
541   }
542 
TEST(CVT__SSE41_INT32,negative_normal)543   TEST(CVT__SSE41_INT32, negative_normal) {
544     TEST_REQUIRES_X86_SSE41;
545 
546     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
547     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
548     for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) {
549       for (uint16_t i = 0; i < kBlockSize; i++) {
550         inputs[i] = n + i;
551       }
552       xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
553       for (uint32_t i = 0; i < kBlockSize; i++) {
554         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
555         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
556           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
557           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
558           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
559       }
560     }
561   }
562 
TEST(CVT__SSE41_INT32,positive_zero)563   TEST(CVT__SSE41_INT32, positive_zero) {
564     TEST_REQUIRES_X86_SSE41;
565 
566     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
567     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
568     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000));
569     xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
570     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
571     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
572       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
573       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
574       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
575   }
576 
TEST(CVT__SSE41_INT32,negative_zero)577   TEST(CVT__SSE41_INT32, negative_zero) {
578     TEST_REQUIRES_X86_SSE41;
579 
580     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
581     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
582     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000));
583     xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
584     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
585     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
586       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
587       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
588       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
589   }
590 
TEST(CVT__SSE41_INT32,positive_subnormal)591   TEST(CVT__SSE41_INT32, positive_subnormal) {
592     TEST_REQUIRES_X86_SSE41;
593 
594     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
595     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
596     for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) {
597       for (uint16_t i = 0; i < kBlockSize; i++) {
598         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001));
599       }
600       xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
601       for (uint32_t i = 0; i < kBlockSize; i++) {
602         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
603         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
604           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
605           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
606           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
607       }
608     }
609   }
610 
TEST(CVT__SSE41_INT32,negative_subnormal)611   TEST(CVT__SSE41_INT32, negative_subnormal) {
612     TEST_REQUIRES_X86_SSE41;
613 
614     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
615     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
616     for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) {
617       for (uint16_t i = 0; i < kBlockSize; i++) {
618         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001));
619       }
620       xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
621       for (uint32_t i = 0; i < kBlockSize; i++) {
622         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
623         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
624           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
625           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
626           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
627       }
628     }
629   }
630 
TEST(CVT__SSE41_INT32,positive_infinity)631   TEST(CVT__SSE41_INT32, positive_infinity) {
632     TEST_REQUIRES_X86_SSE41;
633 
634     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
635     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
636     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00));
637     xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
638     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
639     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
640       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
641       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
642       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
643   }
644 
TEST(CVT__SSE41_INT32,negative_infinity)645   TEST(CVT__SSE41_INT32, negative_infinity) {
646     TEST_REQUIRES_X86_SSE41;
647 
648     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
649     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
650     std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00));
651     xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
652     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
653     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
654       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
655       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
656       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
657   }
658 
TEST(CVT__SSE41_INT32,positive_nan)659   TEST(CVT__SSE41_INT32, positive_nan) {
660     TEST_REQUIRES_X86_SSE41;
661 
662     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
663     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
664     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
665       for (uint16_t i = 0; i < kBlockSize; i++) {
666         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01));
667       }
668       xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
669       for (uint32_t i = 0; i < kBlockSize; i++) {
670         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
671         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
672           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
673           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
674           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
675       }
676     }
677   }
678 
TEST(CVT__SSE41_INT32,negative_nan)679   TEST(CVT__SSE41_INT32, negative_nan) {
680     TEST_REQUIRES_X86_SSE41;
681 
682     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
683     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
684     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
685       for (uint16_t i = 0; i < kBlockSize; i++) {
686         inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01));
687       }
688       xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
689       for (uint32_t i = 0; i < kBlockSize; i++) {
690         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
691         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
692           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
693           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
694           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
695       }
696     }
697   }
698 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
699 
700 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(CVT__F16C,positive_normal)701   TEST(CVT__F16C, positive_normal) {
702     TEST_REQUIRES_X86_F16C;
703 
704     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
705     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
706     for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) {
707       for (uint16_t i = 0; i < kBlockSize; i++) {
708         inputs[i] = n + i;
709       }
710       xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data());
711       for (uint32_t i = 0; i < kBlockSize; i++) {
712         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
713         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
714           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
715           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
716           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
717       }
718     }
719   }
720 
TEST(CVT__F16C,negative_normal)721   TEST(CVT__F16C, negative_normal) {
722     TEST_REQUIRES_X86_F16C;
723 
724     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
725     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
726     for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) {
727       for (uint16_t i = 0; i < kBlockSize; i++) {
728         inputs[i] = n + i;
729       }
730       xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data());
731       for (uint32_t i = 0; i < kBlockSize; i++) {
732         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
733         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
734           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
735           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
736           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
737       }
738     }
739   }
740 
TEST(CVT__F16C,positive_zero)741   TEST(CVT__F16C, positive_zero) {
742     TEST_REQUIRES_X86_F16C;
743 
744     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
745     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
746     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000));
747     xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data());
748     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
749     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
750       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
751       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
752       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
753   }
754 
TEST(CVT__F16C,negative_zero)755   TEST(CVT__F16C, negative_zero) {
756     TEST_REQUIRES_X86_F16C;
757 
758     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
759     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
760     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000));
761     xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data());
762     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
763     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
764       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
765       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
766       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
767   }
768 
TEST(CVT__F16C,positive_subnormal)769   TEST(CVT__F16C, positive_subnormal) {
770     TEST_REQUIRES_X86_F16C;
771 
772     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
773     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
774     for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) {
775       for (uint16_t i = 0; i < kBlockSize; i++) {
776         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001));
777       }
778       xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data());
779       for (uint32_t i = 0; i < kBlockSize; i++) {
780         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
781         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
782           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
783           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
784           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
785       }
786     }
787   }
788 
TEST(CVT__F16C,negative_subnormal)789   TEST(CVT__F16C, negative_subnormal) {
790     TEST_REQUIRES_X86_F16C;
791 
792     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
793     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
794     for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) {
795       for (uint16_t i = 0; i < kBlockSize; i++) {
796         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001));
797       }
798       xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data());
799       for (uint32_t i = 0; i < kBlockSize; i++) {
800         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
801         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
802           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
803           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
804           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
805       }
806     }
807   }
808 
TEST(CVT__F16C,positive_infinity)809   TEST(CVT__F16C, positive_infinity) {
810     TEST_REQUIRES_X86_F16C;
811 
812     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
813     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
814     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00));
815     xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data());
816     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
817     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
818       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
819       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
820       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
821   }
822 
TEST(CVT__F16C,negative_infinity)823   TEST(CVT__F16C, negative_infinity) {
824     TEST_REQUIRES_X86_F16C;
825 
826     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
827     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
828     std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00));
829     xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data());
830     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
831     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
832       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
833       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
834       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
835   }
836 
TEST(CVT__F16C,positive_nan)837   TEST(CVT__F16C, positive_nan) {
838     TEST_REQUIRES_X86_F16C;
839 
840     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
841     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
842     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
843       for (uint16_t i = 0; i < kBlockSize; i++) {
844         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01));
845       }
846       xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data());
847       for (uint32_t i = 0; i < kBlockSize; i++) {
848         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
849         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
850           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
851           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
852           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
853       }
854     }
855   }
856 
TEST(CVT__F16C,negative_nan)857   TEST(CVT__F16C, negative_nan) {
858     TEST_REQUIRES_X86_F16C;
859 
860     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
861     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
862     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
863       for (uint16_t i = 0; i < kBlockSize; i++) {
864         inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01));
865       }
866       xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data());
867       for (uint32_t i = 0; i < kBlockSize; i++) {
868         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
869         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
870           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
871           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
872           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
873       }
874     }
875   }
876 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
877 
878 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(CVT__NEON_INT16,positive_normal)879   TEST(CVT__NEON_INT16, positive_normal) {
880     TEST_REQUIRES_ARM_NEON;
881 
882     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
883     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
884     for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) {
885       for (uint16_t i = 0; i < kBlockSize; i++) {
886         inputs[i] = n + i;
887       }
888       xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
889       for (uint32_t i = 0; i < kBlockSize; i++) {
890         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
891         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
892           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
893           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
894           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
895       }
896     }
897   }
898 
TEST(CVT__NEON_INT16,negative_normal)899   TEST(CVT__NEON_INT16, negative_normal) {
900     TEST_REQUIRES_ARM_NEON;
901 
902     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
903     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
904     for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) {
905       for (uint16_t i = 0; i < kBlockSize; i++) {
906         inputs[i] = n + i;
907       }
908       xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
909       for (uint32_t i = 0; i < kBlockSize; i++) {
910         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
911         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
912           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
913           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
914           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
915       }
916     }
917   }
918 
TEST(CVT__NEON_INT16,positive_zero)919   TEST(CVT__NEON_INT16, positive_zero) {
920     TEST_REQUIRES_ARM_NEON;
921 
922     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
923     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
924     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000));
925     xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
926     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
927     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
928       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
929       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
930       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
931   }
932 
TEST(CVT__NEON_INT16,negative_zero)933   TEST(CVT__NEON_INT16, negative_zero) {
934     TEST_REQUIRES_ARM_NEON;
935 
936     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
937     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
938     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000));
939     xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
940     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
941     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
942       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
943       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
944       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
945   }
946 
TEST(CVT__NEON_INT16,positive_subnormal)947   TEST(CVT__NEON_INT16, positive_subnormal) {
948     TEST_REQUIRES_ARM_NEON;
949 
950     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
951     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
952     for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) {
953       for (uint16_t i = 0; i < kBlockSize; i++) {
954         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001));
955       }
956       xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
957       for (uint32_t i = 0; i < kBlockSize; i++) {
958         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
959         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
960           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
961           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
962           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
963       }
964     }
965   }
966 
TEST(CVT__NEON_INT16,negative_subnormal)967   TEST(CVT__NEON_INT16, negative_subnormal) {
968     TEST_REQUIRES_ARM_NEON;
969 
970     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
971     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
972     for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) {
973       for (uint16_t i = 0; i < kBlockSize; i++) {
974         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001));
975       }
976       xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
977       for (uint32_t i = 0; i < kBlockSize; i++) {
978         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
979         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
980           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
981           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
982           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
983       }
984     }
985   }
986 
TEST(CVT__NEON_INT16,positive_infinity)987   TEST(CVT__NEON_INT16, positive_infinity) {
988     TEST_REQUIRES_ARM_NEON;
989 
990     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
991     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
992     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00));
993     xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
994     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
995     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
996       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
997       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
998       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
999   }
1000 
TEST(CVT__NEON_INT16,negative_infinity)1001   TEST(CVT__NEON_INT16, negative_infinity) {
1002     TEST_REQUIRES_ARM_NEON;
1003 
1004     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1005     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1006     std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00));
1007     xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1008     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
1009     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1010       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
1011       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1012       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1013   }
1014 
TEST(CVT__NEON_INT16,positive_nan)1015   TEST(CVT__NEON_INT16, positive_nan) {
1016     TEST_REQUIRES_ARM_NEON;
1017 
1018     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1019     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1020     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
1021       for (uint16_t i = 0; i < kBlockSize; i++) {
1022         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01));
1023       }
1024       xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1025       for (uint32_t i = 0; i < kBlockSize; i++) {
1026         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1027         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1028           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1029           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1030           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1031       }
1032     }
1033   }
1034 
TEST(CVT__NEON_INT16,negative_nan)1035   TEST(CVT__NEON_INT16, negative_nan) {
1036     TEST_REQUIRES_ARM_NEON;
1037 
1038     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1039     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1040     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
1041       for (uint16_t i = 0; i < kBlockSize; i++) {
1042         inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01));
1043       }
1044       xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1045       for (uint32_t i = 0; i < kBlockSize; i++) {
1046         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1047         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1048           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1049           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1050           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1051       }
1052     }
1053   }
1054 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1055 
1056 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(CVT__NEON_INT32,positive_normal)1057   TEST(CVT__NEON_INT32, positive_normal) {
1058     TEST_REQUIRES_ARM_NEON;
1059 
1060     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1061     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1062     for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) {
1063       for (uint16_t i = 0; i < kBlockSize; i++) {
1064         inputs[i] = n + i;
1065       }
1066       xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1067       for (uint32_t i = 0; i < kBlockSize; i++) {
1068         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1069         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1070           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1071           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1072           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1073       }
1074     }
1075   }
1076 
TEST(CVT__NEON_INT32,negative_normal)1077   TEST(CVT__NEON_INT32, negative_normal) {
1078     TEST_REQUIRES_ARM_NEON;
1079 
1080     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1081     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1082     for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) {
1083       for (uint16_t i = 0; i < kBlockSize; i++) {
1084         inputs[i] = n + i;
1085       }
1086       xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1087       for (uint32_t i = 0; i < kBlockSize; i++) {
1088         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1089         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1090           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1091           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1092           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1093       }
1094     }
1095   }
1096 
TEST(CVT__NEON_INT32,positive_zero)1097   TEST(CVT__NEON_INT32, positive_zero) {
1098     TEST_REQUIRES_ARM_NEON;
1099 
1100     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1101     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1102     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000));
1103     xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1104     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
1105     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1106       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
1107       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1108       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1109   }
1110 
TEST(CVT__NEON_INT32,negative_zero)1111   TEST(CVT__NEON_INT32, negative_zero) {
1112     TEST_REQUIRES_ARM_NEON;
1113 
1114     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1115     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1116     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000));
1117     xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1118     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
1119     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1120       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
1121       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1122       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1123   }
1124 
TEST(CVT__NEON_INT32,positive_subnormal)1125   TEST(CVT__NEON_INT32, positive_subnormal) {
1126     TEST_REQUIRES_ARM_NEON;
1127 
1128     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1129     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1130     for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) {
1131       for (uint16_t i = 0; i < kBlockSize; i++) {
1132         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001));
1133       }
1134       xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1135       for (uint32_t i = 0; i < kBlockSize; i++) {
1136         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1137         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1138           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1139           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1140           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1141       }
1142     }
1143   }
1144 
TEST(CVT__NEON_INT32,negative_subnormal)1145   TEST(CVT__NEON_INT32, negative_subnormal) {
1146     TEST_REQUIRES_ARM_NEON;
1147 
1148     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1149     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1150     for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) {
1151       for (uint16_t i = 0; i < kBlockSize; i++) {
1152         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001));
1153       }
1154       xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1155       for (uint32_t i = 0; i < kBlockSize; i++) {
1156         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1157         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1158           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1159           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1160           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1161       }
1162     }
1163   }
1164 
TEST(CVT__NEON_INT32,positive_infinity)1165   TEST(CVT__NEON_INT32, positive_infinity) {
1166     TEST_REQUIRES_ARM_NEON;
1167 
1168     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1169     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1170     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00));
1171     xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1172     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
1173     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1174       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
1175       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1176       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1177   }
1178 
TEST(CVT__NEON_INT32,negative_infinity)1179   TEST(CVT__NEON_INT32, negative_infinity) {
1180     TEST_REQUIRES_ARM_NEON;
1181 
1182     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1183     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1184     std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00));
1185     xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1186     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
1187     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1188       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
1189       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1190       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1191   }
1192 
TEST(CVT__NEON_INT32,positive_nan)1193   TEST(CVT__NEON_INT32, positive_nan) {
1194     TEST_REQUIRES_ARM_NEON;
1195 
1196     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1197     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1198     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
1199       for (uint16_t i = 0; i < kBlockSize; i++) {
1200         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01));
1201       }
1202       xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1203       for (uint32_t i = 0; i < kBlockSize; i++) {
1204         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1205         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1206           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1207           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1208           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1209       }
1210     }
1211   }
1212 
TEST(CVT__NEON_INT32,negative_nan)1213   TEST(CVT__NEON_INT32, negative_nan) {
1214     TEST_REQUIRES_ARM_NEON;
1215 
1216     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1217     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1218     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
1219       for (uint16_t i = 0; i < kBlockSize; i++) {
1220         inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01));
1221       }
1222       xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1223       for (uint32_t i = 0; i < kBlockSize; i++) {
1224         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1225         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1226           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1227           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1228           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1229       }
1230     }
1231   }
1232 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1233 
1234 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(CVT__NEONFP16,positive_normal)1235   TEST(CVT__NEONFP16, positive_normal) {
1236     TEST_REQUIRES_ARM_NEON_FP16;
1237 
1238     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1239     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1240     for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) {
1241       for (uint16_t i = 0; i < kBlockSize; i++) {
1242         inputs[i] = n + i;
1243       }
1244       xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1245       for (uint32_t i = 0; i < kBlockSize; i++) {
1246         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1247         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1248           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1249           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1250           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1251       }
1252     }
1253   }
1254 
TEST(CVT__NEONFP16,negative_normal)1255   TEST(CVT__NEONFP16, negative_normal) {
1256     TEST_REQUIRES_ARM_NEON_FP16;
1257 
1258     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1259     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1260     for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) {
1261       for (uint16_t i = 0; i < kBlockSize; i++) {
1262         inputs[i] = n + i;
1263       }
1264       xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1265       for (uint32_t i = 0; i < kBlockSize; i++) {
1266         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1267         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1268           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1269           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1270           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1271       }
1272     }
1273   }
1274 
TEST(CVT__NEONFP16,positive_zero)1275   TEST(CVT__NEONFP16, positive_zero) {
1276     TEST_REQUIRES_ARM_NEON_FP16;
1277 
1278     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1279     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1280     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000));
1281     xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1282     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
1283     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1284       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
1285       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1286       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1287   }
1288 
TEST(CVT__NEONFP16,negative_zero)1289   TEST(CVT__NEONFP16, negative_zero) {
1290     TEST_REQUIRES_ARM_NEON_FP16;
1291 
1292     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1293     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1294     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000));
1295     xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1296     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
1297     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1298       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
1299       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1300       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1301   }
1302 
TEST(CVT__NEONFP16,positive_subnormal)1303   TEST(CVT__NEONFP16, positive_subnormal) {
1304     TEST_REQUIRES_ARM_NEON_FP16;
1305 
1306     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1307     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1308     for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) {
1309       for (uint16_t i = 0; i < kBlockSize; i++) {
1310         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001));
1311       }
1312       xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1313       for (uint32_t i = 0; i < kBlockSize; i++) {
1314         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1315         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1316           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1317           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1318           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1319       }
1320     }
1321   }
1322 
TEST(CVT__NEONFP16,negative_subnormal)1323   TEST(CVT__NEONFP16, negative_subnormal) {
1324     TEST_REQUIRES_ARM_NEON_FP16;
1325 
1326     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1327     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1328     for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) {
1329       for (uint16_t i = 0; i < kBlockSize; i++) {
1330         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001));
1331       }
1332       xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1333       for (uint32_t i = 0; i < kBlockSize; i++) {
1334         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1335         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1336           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1337           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1338           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1339       }
1340     }
1341   }
1342 
TEST(CVT__NEONFP16,positive_infinity)1343   TEST(CVT__NEONFP16, positive_infinity) {
1344     TEST_REQUIRES_ARM_NEON_FP16;
1345 
1346     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1347     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1348     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00));
1349     xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1350     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
1351     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1352       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
1353       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1354       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1355   }
1356 
TEST(CVT__NEONFP16,negative_infinity)1357   TEST(CVT__NEONFP16, negative_infinity) {
1358     TEST_REQUIRES_ARM_NEON_FP16;
1359 
1360     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1361     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1362     std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00));
1363     xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1364     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
1365     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1366       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
1367       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1368       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1369   }
1370 
TEST(CVT__NEONFP16,positive_nan)1371   TEST(CVT__NEONFP16, positive_nan) {
1372     TEST_REQUIRES_ARM_NEON_FP16;
1373 
1374     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1375     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1376     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
1377       for (uint16_t i = 0; i < kBlockSize; i++) {
1378         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01));
1379       }
1380       xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1381       for (uint32_t i = 0; i < kBlockSize; i++) {
1382         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1383         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1384           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1385           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1386           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1387       }
1388     }
1389   }
1390 
TEST(CVT__NEONFP16,negative_nan)1391   TEST(CVT__NEONFP16, negative_nan) {
1392     TEST_REQUIRES_ARM_NEON_FP16;
1393 
1394     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1395     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1396     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
1397       for (uint16_t i = 0; i < kBlockSize; i++) {
1398         inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01));
1399       }
1400       xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1401       for (uint32_t i = 0; i < kBlockSize; i++) {
1402         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1403         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1404           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1405           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1406           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1407       }
1408     }
1409   }
1410 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1411 
1412 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(CVT__WASMSIMD_INT16,positive_normal)1413   TEST(CVT__WASMSIMD_INT16, positive_normal) {
1414     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1415     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1416     for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) {
1417       for (uint16_t i = 0; i < kBlockSize; i++) {
1418         inputs[i] = n + i;
1419       }
1420       xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1421       for (uint32_t i = 0; i < kBlockSize; i++) {
1422         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1423         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1424           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1425           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1426           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1427       }
1428     }
1429   }
1430 
TEST(CVT__WASMSIMD_INT16,negative_normal)1431   TEST(CVT__WASMSIMD_INT16, negative_normal) {
1432     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1433     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1434     for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) {
1435       for (uint16_t i = 0; i < kBlockSize; i++) {
1436         inputs[i] = n + i;
1437       }
1438       xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1439       for (uint32_t i = 0; i < kBlockSize; i++) {
1440         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1441         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1442           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1443           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1444           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1445       }
1446     }
1447   }
1448 
TEST(CVT__WASMSIMD_INT16,positive_zero)1449   TEST(CVT__WASMSIMD_INT16, positive_zero) {
1450     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1451     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1452     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000));
1453     xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1454     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
1455     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1456       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
1457       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1458       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1459   }
1460 
TEST(CVT__WASMSIMD_INT16,negative_zero)1461   TEST(CVT__WASMSIMD_INT16, negative_zero) {
1462     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1463     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1464     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000));
1465     xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1466     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
1467     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1468       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
1469       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1470       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1471   }
1472 
TEST(CVT__WASMSIMD_INT16,positive_subnormal)1473   TEST(CVT__WASMSIMD_INT16, positive_subnormal) {
1474     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1475     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1476     for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) {
1477       for (uint16_t i = 0; i < kBlockSize; i++) {
1478         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001));
1479       }
1480       xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1481       for (uint32_t i = 0; i < kBlockSize; i++) {
1482         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1483         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1484           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1485           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1486           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1487       }
1488     }
1489   }
1490 
TEST(CVT__WASMSIMD_INT16,negative_subnormal)1491   TEST(CVT__WASMSIMD_INT16, negative_subnormal) {
1492     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1493     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1494     for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) {
1495       for (uint16_t i = 0; i < kBlockSize; i++) {
1496         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001));
1497       }
1498       xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1499       for (uint32_t i = 0; i < kBlockSize; i++) {
1500         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1501         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1502           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1503           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1504           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1505       }
1506     }
1507   }
1508 
TEST(CVT__WASMSIMD_INT16,positive_infinity)1509   TEST(CVT__WASMSIMD_INT16, positive_infinity) {
1510     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1511     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1512     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00));
1513     xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1514     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
1515     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1516       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
1517       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1518       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1519   }
1520 
TEST(CVT__WASMSIMD_INT16,negative_infinity)1521   TEST(CVT__WASMSIMD_INT16, negative_infinity) {
1522     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1523     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1524     std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00));
1525     xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1526     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
1527     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1528       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
1529       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1530       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1531   }
1532 
TEST(CVT__WASMSIMD_INT16,positive_nan)1533   TEST(CVT__WASMSIMD_INT16, positive_nan) {
1534     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1535     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1536     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
1537       for (uint16_t i = 0; i < kBlockSize; i++) {
1538         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01));
1539       }
1540       xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1541       for (uint32_t i = 0; i < kBlockSize; i++) {
1542         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1543         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1544           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1545           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1546           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1547       }
1548     }
1549   }
1550 
TEST(CVT__WASMSIMD_INT16,negative_nan)1551   TEST(CVT__WASMSIMD_INT16, negative_nan) {
1552     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1553     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1554     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
1555       for (uint16_t i = 0; i < kBlockSize; i++) {
1556         inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01));
1557       }
1558       xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1559       for (uint32_t i = 0; i < kBlockSize; i++) {
1560         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1561         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1562           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1563           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1564           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1565       }
1566     }
1567   }
1568 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1569 
1570 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(CVT__WASMSIMD_INT32,positive_normal)1571   TEST(CVT__WASMSIMD_INT32, positive_normal) {
1572     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1573     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1574     for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) {
1575       for (uint16_t i = 0; i < kBlockSize; i++) {
1576         inputs[i] = n + i;
1577       }
1578       xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1579       for (uint32_t i = 0; i < kBlockSize; i++) {
1580         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1581         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1582           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1583           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1584           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1585       }
1586     }
1587   }
1588 
TEST(CVT__WASMSIMD_INT32,negative_normal)1589   TEST(CVT__WASMSIMD_INT32, negative_normal) {
1590     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1591     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1592     for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) {
1593       for (uint16_t i = 0; i < kBlockSize; i++) {
1594         inputs[i] = n + i;
1595       }
1596       xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1597       for (uint32_t i = 0; i < kBlockSize; i++) {
1598         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1599         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1600           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1601           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1602           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1603       }
1604     }
1605   }
1606 
TEST(CVT__WASMSIMD_INT32,positive_zero)1607   TEST(CVT__WASMSIMD_INT32, positive_zero) {
1608     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1609     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1610     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000));
1611     xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1612     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
1613     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1614       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
1615       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1616       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1617   }
1618 
TEST(CVT__WASMSIMD_INT32,negative_zero)1619   TEST(CVT__WASMSIMD_INT32, negative_zero) {
1620     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1621     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1622     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000));
1623     xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1624     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
1625     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1626       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
1627       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1628       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1629   }
1630 
TEST(CVT__WASMSIMD_INT32,positive_subnormal)1631   TEST(CVT__WASMSIMD_INT32, positive_subnormal) {
1632     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1633     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1634     for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) {
1635       for (uint16_t i = 0; i < kBlockSize; i++) {
1636         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001));
1637       }
1638       xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1639       for (uint32_t i = 0; i < kBlockSize; i++) {
1640         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1641         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1642           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1643           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1644           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1645       }
1646     }
1647   }
1648 
TEST(CVT__WASMSIMD_INT32,negative_subnormal)1649   TEST(CVT__WASMSIMD_INT32, negative_subnormal) {
1650     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1651     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1652     for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) {
1653       for (uint16_t i = 0; i < kBlockSize; i++) {
1654         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001));
1655       }
1656       xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1657       for (uint32_t i = 0; i < kBlockSize; i++) {
1658         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1659         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1660           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1661           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1662           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1663       }
1664     }
1665   }
1666 
TEST(CVT__WASMSIMD_INT32,positive_infinity)1667   TEST(CVT__WASMSIMD_INT32, positive_infinity) {
1668     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1669     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1670     std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00));
1671     xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1672     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
1673     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1674       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
1675       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1676       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1677   }
1678 
TEST(CVT__WASMSIMD_INT32,negative_infinity)1679   TEST(CVT__WASMSIMD_INT32, negative_infinity) {
1680     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1681     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1682     std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00));
1683     xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1684     const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0]));
1685     ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1686       << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0])
1687       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1688       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1689   }
1690 
TEST(CVT__WASMSIMD_INT32,positive_nan)1691   TEST(CVT__WASMSIMD_INT32, positive_nan) {
1692     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1693     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1694     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
1695       for (uint16_t i = 0; i < kBlockSize; i++) {
1696         inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01));
1697       }
1698       xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1699       for (uint32_t i = 0; i < kBlockSize; i++) {
1700         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1701         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1702           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1703           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1704           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1705       }
1706     }
1707   }
1708 
TEST(CVT__WASMSIMD_INT32,negative_nan)1709   TEST(CVT__WASMSIMD_INT32, negative_nan) {
1710     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize);
1711     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1712     for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) {
1713       for (uint16_t i = 0; i < kBlockSize; i++) {
1714         inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01));
1715       }
1716       xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1717       for (uint32_t i = 0; i < kBlockSize; i++) {
1718         const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i]));
1719         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1720           << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i])
1721           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1722           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1723       }
1724     }
1725   }
1726 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1727