• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <ios>
13 #include <vector>
14 
15 #include <gtest/gtest.h>
16 
17 #include <fp16.h>
18 
19 #include <xnnpack/aligned-allocator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/isa-checks.h>
22 #include <xnnpack/math.h>
23 #include <xnnpack/math-stubs.h>
24 
25 
26 constexpr int kBlockSize = 1024;
27 
28 
29 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2,negative_zero)30   TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, negative_zero) {
31     TEST_REQUIRES_ARM_NEON_FMA;
32 
33     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
34     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
35     std::fill(inputs.begin(), inputs.end(), -0.0f);
36     xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
37     const float reference_output = 1.0f;
38     ASSERT_EQ(reference_output, outputs[0])
39       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
40       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
41       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
42   }
43 
TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2,positive_zero)44   TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, positive_zero) {
45     TEST_REQUIRES_ARM_NEON_FMA;
46 
47     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
48     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
49     std::fill(inputs.begin(), inputs.end(), +0.0f);
50     xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
51     const float reference_output = 1.0f;
52     ASSERT_EQ(reference_output, outputs[0])
53       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
54       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
55       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
56   }
57 
TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2,negative_saturation)58   TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, negative_saturation) {
59     TEST_REQUIRES_ARM_NEON_FMA;
60 
61     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
62     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
63     for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
64       for (uint32_t i = 0; i < kBlockSize; i++) {
65         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
66       }
67       xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
68       for (uint32_t i = 0; i < kBlockSize; i++) {
69         const uint32_t reference_output = UINT32_C(0x00000000);
70         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
71           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
72           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
73           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
74       }
75     }
76   }
77 
TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2,positive_nan)78   TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, positive_nan) {
79     TEST_REQUIRES_ARM_NEON_FMA;
80 
81     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
82     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
83     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
84       for (uint32_t i = 0; i < kBlockSize; i++) {
85         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
86       }
87       xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
88       for (uint32_t i = 0; i < kBlockSize; i++) {
89         ASSERT_TRUE(std::isnan(outputs[i]))
90           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
91           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
92       }
93     }
94   }
95 
TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2,negative_nan)96   TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, negative_nan) {
97     TEST_REQUIRES_ARM_NEON_FMA;
98 
99     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
100     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
101     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
102       for (uint32_t i = 0; i < kBlockSize; i++) {
103         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
104       }
105       xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
106       for (uint32_t i = 0; i < kBlockSize; i++) {
107         ASSERT_TRUE(std::isnan(outputs[i]))
108           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
109           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
110       }
111     }
112   }
113 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
114 
115 
116 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1,negative_zero)117   TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, negative_zero) {
118     TEST_REQUIRES_ARM_NEON_FMA;
119 
120     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
121     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
122     std::fill(inputs.begin(), inputs.end(), -0.0f);
123     xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
124     const float reference_output = 1.0f;
125     ASSERT_EQ(reference_output, outputs[0])
126       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
127       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
128       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
129   }
130 
TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1,positive_zero)131   TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, positive_zero) {
132     TEST_REQUIRES_ARM_NEON_FMA;
133 
134     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
135     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
136     std::fill(inputs.begin(), inputs.end(), +0.0f);
137     xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
138     const float reference_output = 1.0f;
139     ASSERT_EQ(reference_output, outputs[0])
140       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
141       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
142       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
143   }
144 
TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1,negative_saturation)145   TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, negative_saturation) {
146     TEST_REQUIRES_ARM_NEON_FMA;
147 
148     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
149     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
150     for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
151       for (uint32_t i = 0; i < kBlockSize; i++) {
152         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
153       }
154       xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
155       for (uint32_t i = 0; i < kBlockSize; i++) {
156         const uint32_t reference_output = UINT32_C(0x00000000);
157         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
158           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
159           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
160           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
161       }
162     }
163   }
164 
TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1,positive_nan)165   TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, positive_nan) {
166     TEST_REQUIRES_ARM_NEON_FMA;
167 
168     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
169     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
170     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
171       for (uint32_t i = 0; i < kBlockSize; i++) {
172         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
173       }
174       xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
175       for (uint32_t i = 0; i < kBlockSize; i++) {
176         ASSERT_TRUE(std::isnan(outputs[i]))
177           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
178           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
179       }
180     }
181   }
182 
TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1,negative_nan)183   TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, negative_nan) {
184     TEST_REQUIRES_ARM_NEON_FMA;
185 
186     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
187     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
188     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
189       for (uint32_t i = 0; i < kBlockSize; i++) {
190         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
191       }
192       xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
193       for (uint32_t i = 0; i < kBlockSize; i++) {
194         ASSERT_TRUE(std::isnan(outputs[i]))
195           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
196           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
197       }
198     }
199   }
200 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
201 
202 
203 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXPMINUS__NEONFMA_RR2_P5,negative_zero)204   TEST(EXPMINUS__NEONFMA_RR2_P5, negative_zero) {
205     TEST_REQUIRES_ARM_NEON_FMA;
206 
207     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
208     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
209     std::fill(inputs.begin(), inputs.end(), -0.0f);
210     xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
211     const float reference_output = 1.0f;
212     ASSERT_EQ(reference_output, outputs[0])
213       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
214       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
215       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
216   }
217 
TEST(EXPMINUS__NEONFMA_RR2_P5,positive_zero)218   TEST(EXPMINUS__NEONFMA_RR2_P5, positive_zero) {
219     TEST_REQUIRES_ARM_NEON_FMA;
220 
221     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
222     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
223     std::fill(inputs.begin(), inputs.end(), +0.0f);
224     xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
225     const float reference_output = 1.0f;
226     ASSERT_EQ(reference_output, outputs[0])
227       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
228       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
229       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
230   }
231 
TEST(EXPMINUS__NEONFMA_RR2_P5,negative_saturation)232   TEST(EXPMINUS__NEONFMA_RR2_P5, negative_saturation) {
233     TEST_REQUIRES_ARM_NEON_FMA;
234 
235     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
236     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
237     for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
238       for (uint32_t i = 0; i < kBlockSize; i++) {
239         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
240       }
241       xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
242       for (uint32_t i = 0; i < kBlockSize; i++) {
243         const uint32_t reference_output = UINT32_C(0x00000000);
244         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
245           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
246           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
247           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
248       }
249     }
250   }
251 
TEST(EXPMINUS__NEONFMA_RR2_P5,positive_nan)252   TEST(EXPMINUS__NEONFMA_RR2_P5, positive_nan) {
253     TEST_REQUIRES_ARM_NEON_FMA;
254 
255     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
256     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
257     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
258       for (uint32_t i = 0; i < kBlockSize; i++) {
259         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
260       }
261       xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
262       for (uint32_t i = 0; i < kBlockSize; i++) {
263         ASSERT_TRUE(std::isnan(outputs[i]))
264           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
265           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
266       }
267     }
268   }
269 
TEST(EXPMINUS__NEONFMA_RR2_P5,negative_nan)270   TEST(EXPMINUS__NEONFMA_RR2_P5, negative_nan) {
271     TEST_REQUIRES_ARM_NEON_FMA;
272 
273     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
274     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
275     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
276       for (uint32_t i = 0; i < kBlockSize; i++) {
277         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
278       }
279       xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
280       for (uint32_t i = 0; i < kBlockSize; i++) {
281         ASSERT_TRUE(std::isnan(outputs[i]))
282           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
283           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
284       }
285     }
286   }
287 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
288 
289 
290 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPMINUS__AVX2_RR2_P5,negative_zero)291   TEST(EXPMINUS__AVX2_RR2_P5, negative_zero) {
292     TEST_REQUIRES_X86_AVX2;
293 
294     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
295     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
296     std::fill(inputs.begin(), inputs.end(), -0.0f);
297     xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
298     const float reference_output = 1.0f;
299     ASSERT_EQ(reference_output, outputs[0])
300       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
301       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
302       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
303   }
304 
TEST(EXPMINUS__AVX2_RR2_P5,positive_zero)305   TEST(EXPMINUS__AVX2_RR2_P5, positive_zero) {
306     TEST_REQUIRES_X86_AVX2;
307 
308     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
309     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
310     std::fill(inputs.begin(), inputs.end(), +0.0f);
311     xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
312     const float reference_output = 1.0f;
313     ASSERT_EQ(reference_output, outputs[0])
314       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
315       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
316       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
317   }
318 
TEST(EXPMINUS__AVX2_RR2_P5,negative_saturation)319   TEST(EXPMINUS__AVX2_RR2_P5, negative_saturation) {
320     TEST_REQUIRES_X86_AVX2;
321 
322     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
323     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
324     for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
325       for (uint32_t i = 0; i < kBlockSize; i++) {
326         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
327       }
328       xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
329       for (uint32_t i = 0; i < kBlockSize; i++) {
330         const uint32_t reference_output = UINT32_C(0x00000000);
331         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
332           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
333           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
334           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
335       }
336     }
337   }
338 
TEST(EXPMINUS__AVX2_RR2_P5,positive_nan)339   TEST(EXPMINUS__AVX2_RR2_P5, positive_nan) {
340     TEST_REQUIRES_X86_AVX2;
341 
342     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
343     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
344     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
345       for (uint32_t i = 0; i < kBlockSize; i++) {
346         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
347       }
348       xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
349       for (uint32_t i = 0; i < kBlockSize; i++) {
350         ASSERT_TRUE(std::isnan(outputs[i]))
351           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
352           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
353       }
354     }
355   }
356 
TEST(EXPMINUS__AVX2_RR2_P5,negative_nan)357   TEST(EXPMINUS__AVX2_RR2_P5, negative_nan) {
358     TEST_REQUIRES_X86_AVX2;
359 
360     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
361     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
362     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
363       for (uint32_t i = 0; i < kBlockSize; i++) {
364         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
365       }
366       xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
367       for (uint32_t i = 0; i < kBlockSize; i++) {
368         ASSERT_TRUE(std::isnan(outputs[i]))
369           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
370           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
371       }
372     }
373   }
374 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
375 
376 
377 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPMINUS__SSE2_RR2_P5,negative_zero)378   TEST(EXPMINUS__SSE2_RR2_P5, negative_zero) {
379     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
380     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
381     std::fill(inputs.begin(), inputs.end(), -0.0f);
382     xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
383     const float reference_output = 1.0f;
384     ASSERT_EQ(reference_output, outputs[0])
385       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
386       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
387       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
388   }
389 
TEST(EXPMINUS__SSE2_RR2_P5,positive_zero)390   TEST(EXPMINUS__SSE2_RR2_P5, positive_zero) {
391     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
392     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
393     std::fill(inputs.begin(), inputs.end(), +0.0f);
394     xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
395     const float reference_output = 1.0f;
396     ASSERT_EQ(reference_output, outputs[0])
397       << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
398       << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
399       << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
400   }
401 
TEST(EXPMINUS__SSE2_RR2_P5,negative_saturation)402   TEST(EXPMINUS__SSE2_RR2_P5, negative_saturation) {
403     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
404     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
405     for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
406       for (uint32_t i = 0; i < kBlockSize; i++) {
407         inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
408       }
409       xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
410       for (uint32_t i = 0; i < kBlockSize; i++) {
411         const uint32_t reference_output = UINT32_C(0x00000000);
412         ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
413           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
414           << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
415           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
416       }
417     }
418   }
419 
TEST(EXPMINUS__SSE2_RR2_P5,positive_nan)420   TEST(EXPMINUS__SSE2_RR2_P5, positive_nan) {
421     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
422     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
423     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
424       for (uint32_t i = 0; i < kBlockSize; i++) {
425         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
426       }
427       xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
428       for (uint32_t i = 0; i < kBlockSize; i++) {
429         ASSERT_TRUE(std::isnan(outputs[i]))
430           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
431           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
432       }
433     }
434   }
435 
TEST(EXPMINUS__SSE2_RR2_P5,negative_nan)436   TEST(EXPMINUS__SSE2_RR2_P5, negative_nan) {
437     std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
438     std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
439     for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
440       for (uint32_t i = 0; i < kBlockSize; i++) {
441         inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
442       }
443       xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
444       for (uint32_t i = 0; i < kBlockSize; i++) {
445         ASSERT_TRUE(std::isnan(outputs[i]))
446           << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
447           << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
448       }
449     }
450   }
451 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
452 
453 
TEST(EXPMINUS__SCALAR_RR2_LUT64_P2,negative_zero)454 TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, negative_zero) {
455   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
456   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
457   std::fill(inputs.begin(), inputs.end(), -0.0f);
458   xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
459   const float reference_output = 1.0f;
460   ASSERT_EQ(reference_output, outputs[0])
461     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
462     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
463     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
464 }
465 
TEST(EXPMINUS__SCALAR_RR2_LUT64_P2,positive_zero)466 TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, positive_zero) {
467   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
468   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
469   std::fill(inputs.begin(), inputs.end(), +0.0f);
470   xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
471   const float reference_output = 1.0f;
472   ASSERT_EQ(reference_output, outputs[0])
473     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
474     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
475     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
476 }
477 
TEST(EXPMINUS__SCALAR_RR2_LUT64_P2,negative_saturation)478 TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, negative_saturation) {
479   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
480   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
481   for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
482     for (uint32_t i = 0; i < kBlockSize; i++) {
483       inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
484     }
485     xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
486     for (uint32_t i = 0; i < kBlockSize; i++) {
487       const uint32_t reference_output = UINT32_C(0x00000000);
488       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
489         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
490         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
491         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
492     }
493   }
494 }
495 
TEST(EXPMINUS__SCALAR_RR2_LUT64_P2,positive_nan)496 TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, positive_nan) {
497   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
498   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
499   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
500     for (uint32_t i = 0; i < kBlockSize; i++) {
501       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
502     }
503     xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
504     for (uint32_t i = 0; i < kBlockSize; i++) {
505       ASSERT_TRUE(std::isnan(outputs[i]))
506         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
507         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
508     }
509   }
510 }
511 
TEST(EXPMINUS__SCALAR_RR2_LUT64_P2,negative_nan)512 TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, negative_nan) {
513   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
514   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
515   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
516     for (uint32_t i = 0; i < kBlockSize; i++) {
517       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
518     }
519     xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
520     for (uint32_t i = 0; i < kBlockSize; i++) {
521       ASSERT_TRUE(std::isnan(outputs[i]))
522         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
523         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
524     }
525   }
526 }
527 
528 
TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1,negative_zero)529 TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, negative_zero) {
530   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
531   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
532   std::fill(inputs.begin(), inputs.end(), -0.0f);
533   xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
534   const float reference_output = 1.0f;
535   ASSERT_EQ(reference_output, outputs[0])
536     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
537     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
538     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
539 }
540 
TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1,positive_zero)541 TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, positive_zero) {
542   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
543   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
544   std::fill(inputs.begin(), inputs.end(), +0.0f);
545   xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
546   const float reference_output = 1.0f;
547   ASSERT_EQ(reference_output, outputs[0])
548     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
549     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
550     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
551 }
552 
TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1,negative_saturation)553 TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, negative_saturation) {
554   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
555   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
556   for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
557     for (uint32_t i = 0; i < kBlockSize; i++) {
558       inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
559     }
560     xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
561     for (uint32_t i = 0; i < kBlockSize; i++) {
562       const uint32_t reference_output = UINT32_C(0x00000000);
563       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
564         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
565         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
566         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
567     }
568   }
569 }
570 
TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1,positive_nan)571 TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, positive_nan) {
572   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
573   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
574   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
575     for (uint32_t i = 0; i < kBlockSize; i++) {
576       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
577     }
578     xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
579     for (uint32_t i = 0; i < kBlockSize; i++) {
580       ASSERT_TRUE(std::isnan(outputs[i]))
581         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
582         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
583     }
584   }
585 }
586 
TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1,negative_nan)587 TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, negative_nan) {
588   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
589   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
590   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
591     for (uint32_t i = 0; i < kBlockSize; i++) {
592       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
593     }
594     xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
595     for (uint32_t i = 0; i < kBlockSize; i++) {
596       ASSERT_TRUE(std::isnan(outputs[i]))
597         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
598         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
599     }
600   }
601 }
602 
603 
TEST(EXPMINUS__SCALAR_RR2_P5,negative_zero)604 TEST(EXPMINUS__SCALAR_RR2_P5, negative_zero) {
605   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
606   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
607   std::fill(inputs.begin(), inputs.end(), -0.0f);
608   xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
609   const float reference_output = 1.0f;
610   ASSERT_EQ(reference_output, outputs[0])
611     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
612     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
613     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
614 }
615 
TEST(EXPMINUS__SCALAR_RR2_P5,positive_zero)616 TEST(EXPMINUS__SCALAR_RR2_P5, positive_zero) {
617   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
618   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
619   std::fill(inputs.begin(), inputs.end(), +0.0f);
620   xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
621   const float reference_output = 1.0f;
622   ASSERT_EQ(reference_output, outputs[0])
623     << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
624     << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
625     << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
626 }
627 
TEST(EXPMINUS__SCALAR_RR2_P5,negative_saturation)628 TEST(EXPMINUS__SCALAR_RR2_P5, negative_saturation) {
629   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
630   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
631   for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
632     for (uint32_t i = 0; i < kBlockSize; i++) {
633       inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
634     }
635     xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
636     for (uint32_t i = 0; i < kBlockSize; i++) {
637       const uint32_t reference_output = UINT32_C(0x00000000);
638       ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
639         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
640         << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
641         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
642     }
643   }
644 }
645 
TEST(EXPMINUS__SCALAR_RR2_P5,positive_nan)646 TEST(EXPMINUS__SCALAR_RR2_P5, positive_nan) {
647   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
648   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
649   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
650     for (uint32_t i = 0; i < kBlockSize; i++) {
651       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
652     }
653     xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
654     for (uint32_t i = 0; i < kBlockSize; i++) {
655       ASSERT_TRUE(std::isnan(outputs[i]))
656         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
657         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
658     }
659   }
660 }
661 
TEST(EXPMINUS__SCALAR_RR2_P5,negative_nan)662 TEST(EXPMINUS__SCALAR_RR2_P5, negative_nan) {
663   std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
664   std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
665   for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
666     for (uint32_t i = 0; i < kBlockSize; i++) {
667       inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
668     }
669     xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
670     for (uint32_t i = 0; i < kBlockSize; i++) {
671       ASSERT_TRUE(std::isnan(outputs[i]))
672         << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
673         << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
674     }
675   }
676 }
677