1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <ios>
13 #include <vector>
14
15 #include <gtest/gtest.h>
16
17 #include <fp16.h>
18
19 #include <xnnpack/aligned-allocator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/isa-checks.h>
22 #include <xnnpack/math.h>
23 #include <xnnpack/math-stubs.h>
24
25
26 constexpr int kBlockSize = 1024;
27
28
29 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2,negative_zero)30 TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, negative_zero) {
31 TEST_REQUIRES_ARM_NEON_FMA;
32
33 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
34 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
35 std::fill(inputs.begin(), inputs.end(), -0.0f);
36 xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
37 const float reference_output = 1.0f;
38 ASSERT_EQ(reference_output, outputs[0])
39 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
40 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
41 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
42 }
43
TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2,positive_zero)44 TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, positive_zero) {
45 TEST_REQUIRES_ARM_NEON_FMA;
46
47 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
48 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
49 std::fill(inputs.begin(), inputs.end(), +0.0f);
50 xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
51 const float reference_output = 1.0f;
52 ASSERT_EQ(reference_output, outputs[0])
53 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
54 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
55 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
56 }
57
TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2,negative_saturation)58 TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, negative_saturation) {
59 TEST_REQUIRES_ARM_NEON_FMA;
60
61 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
62 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
63 for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
64 for (uint32_t i = 0; i < kBlockSize; i++) {
65 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
66 }
67 xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
68 for (uint32_t i = 0; i < kBlockSize; i++) {
69 const uint32_t reference_output = UINT32_C(0x00000000);
70 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
71 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
72 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
73 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
74 }
75 }
76 }
77
TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2,positive_nan)78 TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, positive_nan) {
79 TEST_REQUIRES_ARM_NEON_FMA;
80
81 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
82 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
83 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
84 for (uint32_t i = 0; i < kBlockSize; i++) {
85 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
86 }
87 xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
88 for (uint32_t i = 0; i < kBlockSize; i++) {
89 ASSERT_TRUE(std::isnan(outputs[i]))
90 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
91 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
92 }
93 }
94 }
95
TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2,negative_nan)96 TEST(EXPMINUS__NEONFMA_RR2_LUT64_P2, negative_nan) {
97 TEST_REQUIRES_ARM_NEON_FMA;
98
99 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
100 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
101 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
102 for (uint32_t i = 0; i < kBlockSize; i++) {
103 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
104 }
105 xnn_math_f32_expminus__neonfma_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
106 for (uint32_t i = 0; i < kBlockSize; i++) {
107 ASSERT_TRUE(std::isnan(outputs[i]))
108 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
109 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
110 }
111 }
112 }
113 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
114
115
116 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1,negative_zero)117 TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, negative_zero) {
118 TEST_REQUIRES_ARM_NEON_FMA;
119
120 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
121 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
122 std::fill(inputs.begin(), inputs.end(), -0.0f);
123 xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
124 const float reference_output = 1.0f;
125 ASSERT_EQ(reference_output, outputs[0])
126 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
127 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
128 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
129 }
130
TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1,positive_zero)131 TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, positive_zero) {
132 TEST_REQUIRES_ARM_NEON_FMA;
133
134 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
135 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
136 std::fill(inputs.begin(), inputs.end(), +0.0f);
137 xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
138 const float reference_output = 1.0f;
139 ASSERT_EQ(reference_output, outputs[0])
140 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
141 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
142 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
143 }
144
TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1,negative_saturation)145 TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, negative_saturation) {
146 TEST_REQUIRES_ARM_NEON_FMA;
147
148 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
149 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
150 for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
151 for (uint32_t i = 0; i < kBlockSize; i++) {
152 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
153 }
154 xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
155 for (uint32_t i = 0; i < kBlockSize; i++) {
156 const uint32_t reference_output = UINT32_C(0x00000000);
157 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
158 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
159 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
160 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
161 }
162 }
163 }
164
TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1,positive_nan)165 TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, positive_nan) {
166 TEST_REQUIRES_ARM_NEON_FMA;
167
168 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
169 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
170 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
171 for (uint32_t i = 0; i < kBlockSize; i++) {
172 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
173 }
174 xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
175 for (uint32_t i = 0; i < kBlockSize; i++) {
176 ASSERT_TRUE(std::isnan(outputs[i]))
177 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
178 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
179 }
180 }
181 }
182
TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1,negative_nan)183 TEST(EXPMINUS__NEONFMA_RR2_LUT2048_P1, negative_nan) {
184 TEST_REQUIRES_ARM_NEON_FMA;
185
186 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
187 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
188 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
189 for (uint32_t i = 0; i < kBlockSize; i++) {
190 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
191 }
192 xnn_math_f32_expminus__neonfma_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
193 for (uint32_t i = 0; i < kBlockSize; i++) {
194 ASSERT_TRUE(std::isnan(outputs[i]))
195 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
196 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
197 }
198 }
199 }
200 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
201
202
203 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(EXPMINUS__NEONFMA_RR2_P5,negative_zero)204 TEST(EXPMINUS__NEONFMA_RR2_P5, negative_zero) {
205 TEST_REQUIRES_ARM_NEON_FMA;
206
207 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
208 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
209 std::fill(inputs.begin(), inputs.end(), -0.0f);
210 xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
211 const float reference_output = 1.0f;
212 ASSERT_EQ(reference_output, outputs[0])
213 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
214 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
215 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
216 }
217
TEST(EXPMINUS__NEONFMA_RR2_P5,positive_zero)218 TEST(EXPMINUS__NEONFMA_RR2_P5, positive_zero) {
219 TEST_REQUIRES_ARM_NEON_FMA;
220
221 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
222 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
223 std::fill(inputs.begin(), inputs.end(), +0.0f);
224 xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
225 const float reference_output = 1.0f;
226 ASSERT_EQ(reference_output, outputs[0])
227 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
228 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
229 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
230 }
231
TEST(EXPMINUS__NEONFMA_RR2_P5,negative_saturation)232 TEST(EXPMINUS__NEONFMA_RR2_P5, negative_saturation) {
233 TEST_REQUIRES_ARM_NEON_FMA;
234
235 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
236 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
237 for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
238 for (uint32_t i = 0; i < kBlockSize; i++) {
239 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
240 }
241 xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
242 for (uint32_t i = 0; i < kBlockSize; i++) {
243 const uint32_t reference_output = UINT32_C(0x00000000);
244 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
245 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
246 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
247 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
248 }
249 }
250 }
251
TEST(EXPMINUS__NEONFMA_RR2_P5,positive_nan)252 TEST(EXPMINUS__NEONFMA_RR2_P5, positive_nan) {
253 TEST_REQUIRES_ARM_NEON_FMA;
254
255 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
256 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
257 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
258 for (uint32_t i = 0; i < kBlockSize; i++) {
259 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
260 }
261 xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
262 for (uint32_t i = 0; i < kBlockSize; i++) {
263 ASSERT_TRUE(std::isnan(outputs[i]))
264 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
265 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
266 }
267 }
268 }
269
TEST(EXPMINUS__NEONFMA_RR2_P5,negative_nan)270 TEST(EXPMINUS__NEONFMA_RR2_P5, negative_nan) {
271 TEST_REQUIRES_ARM_NEON_FMA;
272
273 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
274 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
275 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
276 for (uint32_t i = 0; i < kBlockSize; i++) {
277 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
278 }
279 xnn_math_f32_expminus__neonfma_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
280 for (uint32_t i = 0; i < kBlockSize; i++) {
281 ASSERT_TRUE(std::isnan(outputs[i]))
282 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
283 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
284 }
285 }
286 }
287 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
288
289
290 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPMINUS__AVX2_RR2_P5,negative_zero)291 TEST(EXPMINUS__AVX2_RR2_P5, negative_zero) {
292 TEST_REQUIRES_X86_AVX2;
293
294 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
295 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
296 std::fill(inputs.begin(), inputs.end(), -0.0f);
297 xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
298 const float reference_output = 1.0f;
299 ASSERT_EQ(reference_output, outputs[0])
300 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
301 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
302 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
303 }
304
TEST(EXPMINUS__AVX2_RR2_P5,positive_zero)305 TEST(EXPMINUS__AVX2_RR2_P5, positive_zero) {
306 TEST_REQUIRES_X86_AVX2;
307
308 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
309 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
310 std::fill(inputs.begin(), inputs.end(), +0.0f);
311 xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
312 const float reference_output = 1.0f;
313 ASSERT_EQ(reference_output, outputs[0])
314 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
315 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
316 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
317 }
318
TEST(EXPMINUS__AVX2_RR2_P5,negative_saturation)319 TEST(EXPMINUS__AVX2_RR2_P5, negative_saturation) {
320 TEST_REQUIRES_X86_AVX2;
321
322 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
323 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
324 for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
325 for (uint32_t i = 0; i < kBlockSize; i++) {
326 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
327 }
328 xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
329 for (uint32_t i = 0; i < kBlockSize; i++) {
330 const uint32_t reference_output = UINT32_C(0x00000000);
331 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
332 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
333 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
334 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
335 }
336 }
337 }
338
TEST(EXPMINUS__AVX2_RR2_P5,positive_nan)339 TEST(EXPMINUS__AVX2_RR2_P5, positive_nan) {
340 TEST_REQUIRES_X86_AVX2;
341
342 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
343 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
344 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
345 for (uint32_t i = 0; i < kBlockSize; i++) {
346 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
347 }
348 xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
349 for (uint32_t i = 0; i < kBlockSize; i++) {
350 ASSERT_TRUE(std::isnan(outputs[i]))
351 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
352 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
353 }
354 }
355 }
356
TEST(EXPMINUS__AVX2_RR2_P5,negative_nan)357 TEST(EXPMINUS__AVX2_RR2_P5, negative_nan) {
358 TEST_REQUIRES_X86_AVX2;
359
360 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
361 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
362 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
363 for (uint32_t i = 0; i < kBlockSize; i++) {
364 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
365 }
366 xnn_math_f32_expminus__avx2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
367 for (uint32_t i = 0; i < kBlockSize; i++) {
368 ASSERT_TRUE(std::isnan(outputs[i]))
369 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
370 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
371 }
372 }
373 }
374 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
375
376
377 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(EXPMINUS__SSE2_RR2_P5,negative_zero)378 TEST(EXPMINUS__SSE2_RR2_P5, negative_zero) {
379 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
380 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
381 std::fill(inputs.begin(), inputs.end(), -0.0f);
382 xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
383 const float reference_output = 1.0f;
384 ASSERT_EQ(reference_output, outputs[0])
385 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
386 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
387 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
388 }
389
TEST(EXPMINUS__SSE2_RR2_P5,positive_zero)390 TEST(EXPMINUS__SSE2_RR2_P5, positive_zero) {
391 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
392 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
393 std::fill(inputs.begin(), inputs.end(), +0.0f);
394 xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
395 const float reference_output = 1.0f;
396 ASSERT_EQ(reference_output, outputs[0])
397 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
398 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
399 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
400 }
401
TEST(EXPMINUS__SSE2_RR2_P5,negative_saturation)402 TEST(EXPMINUS__SSE2_RR2_P5, negative_saturation) {
403 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
404 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
405 for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
406 for (uint32_t i = 0; i < kBlockSize; i++) {
407 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
408 }
409 xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
410 for (uint32_t i = 0; i < kBlockSize; i++) {
411 const uint32_t reference_output = UINT32_C(0x00000000);
412 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
413 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
414 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
415 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
416 }
417 }
418 }
419
TEST(EXPMINUS__SSE2_RR2_P5,positive_nan)420 TEST(EXPMINUS__SSE2_RR2_P5, positive_nan) {
421 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
422 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
423 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
424 for (uint32_t i = 0; i < kBlockSize; i++) {
425 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
426 }
427 xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
428 for (uint32_t i = 0; i < kBlockSize; i++) {
429 ASSERT_TRUE(std::isnan(outputs[i]))
430 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
431 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
432 }
433 }
434 }
435
TEST(EXPMINUS__SSE2_RR2_P5,negative_nan)436 TEST(EXPMINUS__SSE2_RR2_P5, negative_nan) {
437 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
438 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
439 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
440 for (uint32_t i = 0; i < kBlockSize; i++) {
441 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
442 }
443 xnn_math_f32_expminus__sse2_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
444 for (uint32_t i = 0; i < kBlockSize; i++) {
445 ASSERT_TRUE(std::isnan(outputs[i]))
446 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
447 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
448 }
449 }
450 }
451 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
452
453
TEST(EXPMINUS__SCALAR_RR2_LUT64_P2,negative_zero)454 TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, negative_zero) {
455 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
456 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
457 std::fill(inputs.begin(), inputs.end(), -0.0f);
458 xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
459 const float reference_output = 1.0f;
460 ASSERT_EQ(reference_output, outputs[0])
461 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
462 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
463 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
464 }
465
TEST(EXPMINUS__SCALAR_RR2_LUT64_P2,positive_zero)466 TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, positive_zero) {
467 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
468 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
469 std::fill(inputs.begin(), inputs.end(), +0.0f);
470 xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
471 const float reference_output = 1.0f;
472 ASSERT_EQ(reference_output, outputs[0])
473 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
474 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
475 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
476 }
477
TEST(EXPMINUS__SCALAR_RR2_LUT64_P2,negative_saturation)478 TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, negative_saturation) {
479 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
480 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
481 for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
482 for (uint32_t i = 0; i < kBlockSize; i++) {
483 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
484 }
485 xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
486 for (uint32_t i = 0; i < kBlockSize; i++) {
487 const uint32_t reference_output = UINT32_C(0x00000000);
488 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
489 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
490 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
491 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
492 }
493 }
494 }
495
TEST(EXPMINUS__SCALAR_RR2_LUT64_P2,positive_nan)496 TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, positive_nan) {
497 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
498 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
499 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
500 for (uint32_t i = 0; i < kBlockSize; i++) {
501 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
502 }
503 xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
504 for (uint32_t i = 0; i < kBlockSize; i++) {
505 ASSERT_TRUE(std::isnan(outputs[i]))
506 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
507 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
508 }
509 }
510 }
511
TEST(EXPMINUS__SCALAR_RR2_LUT64_P2,negative_nan)512 TEST(EXPMINUS__SCALAR_RR2_LUT64_P2, negative_nan) {
513 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
514 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
515 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
516 for (uint32_t i = 0; i < kBlockSize; i++) {
517 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
518 }
519 xnn_math_f32_expminus__scalar_rr2_lut64_p2(kBlockSize * sizeof(float), inputs.data(), outputs.data());
520 for (uint32_t i = 0; i < kBlockSize; i++) {
521 ASSERT_TRUE(std::isnan(outputs[i]))
522 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
523 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
524 }
525 }
526 }
527
528
TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1,negative_zero)529 TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, negative_zero) {
530 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
531 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
532 std::fill(inputs.begin(), inputs.end(), -0.0f);
533 xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
534 const float reference_output = 1.0f;
535 ASSERT_EQ(reference_output, outputs[0])
536 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
537 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
538 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
539 }
540
TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1,positive_zero)541 TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, positive_zero) {
542 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
543 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
544 std::fill(inputs.begin(), inputs.end(), +0.0f);
545 xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
546 const float reference_output = 1.0f;
547 ASSERT_EQ(reference_output, outputs[0])
548 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
549 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
550 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
551 }
552
TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1,negative_saturation)553 TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, negative_saturation) {
554 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
555 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
556 for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
557 for (uint32_t i = 0; i < kBlockSize; i++) {
558 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
559 }
560 xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
561 for (uint32_t i = 0; i < kBlockSize; i++) {
562 const uint32_t reference_output = UINT32_C(0x00000000);
563 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
564 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
565 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
566 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
567 }
568 }
569 }
570
TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1,positive_nan)571 TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, positive_nan) {
572 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
573 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
574 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
575 for (uint32_t i = 0; i < kBlockSize; i++) {
576 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
577 }
578 xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
579 for (uint32_t i = 0; i < kBlockSize; i++) {
580 ASSERT_TRUE(std::isnan(outputs[i]))
581 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
582 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
583 }
584 }
585 }
586
TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1,negative_nan)587 TEST(EXPMINUS__SCALAR_RR2_LUT2048_P1, negative_nan) {
588 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
589 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
590 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
591 for (uint32_t i = 0; i < kBlockSize; i++) {
592 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
593 }
594 xnn_math_f32_expminus__scalar_rr2_lut2048_p1(kBlockSize * sizeof(float), inputs.data(), outputs.data());
595 for (uint32_t i = 0; i < kBlockSize; i++) {
596 ASSERT_TRUE(std::isnan(outputs[i]))
597 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
598 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
599 }
600 }
601 }
602
603
TEST(EXPMINUS__SCALAR_RR2_P5,negative_zero)604 TEST(EXPMINUS__SCALAR_RR2_P5, negative_zero) {
605 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
606 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
607 std::fill(inputs.begin(), inputs.end(), -0.0f);
608 xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
609 const float reference_output = 1.0f;
610 ASSERT_EQ(reference_output, outputs[0])
611 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
612 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
613 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
614 }
615
TEST(EXPMINUS__SCALAR_RR2_P5,positive_zero)616 TEST(EXPMINUS__SCALAR_RR2_P5, positive_zero) {
617 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
618 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
619 std::fill(inputs.begin(), inputs.end(), +0.0f);
620 xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
621 const float reference_output = 1.0f;
622 ASSERT_EQ(reference_output, outputs[0])
623 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
624 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(reference_output)
625 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
626 }
627
TEST(EXPMINUS__SCALAR_RR2_P5,negative_saturation)628 TEST(EXPMINUS__SCALAR_RR2_P5, negative_saturation) {
629 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
630 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
631 for (uint32_t n = UINT32_C(0xC2AEAC50); n <= UINT32_C(0xFF800000); n += kBlockSize) {
632 for (uint32_t i = 0; i < kBlockSize; i++) {
633 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xFF800000)));
634 }
635 xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
636 for (uint32_t i = 0; i < kBlockSize; i++) {
637 const uint32_t reference_output = UINT32_C(0x00000000);
638 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
639 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
640 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
641 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
642 }
643 }
644 }
645
TEST(EXPMINUS__SCALAR_RR2_P5,positive_nan)646 TEST(EXPMINUS__SCALAR_RR2_P5, positive_nan) {
647 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
648 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
649 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
650 for (uint32_t i = 0; i < kBlockSize; i++) {
651 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), n + i));
652 }
653 xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
654 for (uint32_t i = 0; i < kBlockSize; i++) {
655 ASSERT_TRUE(std::isnan(outputs[i]))
656 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
657 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
658 }
659 }
660 }
661
TEST(EXPMINUS__SCALAR_RR2_P5,negative_nan)662 TEST(EXPMINUS__SCALAR_RR2_P5, negative_nan) {
663 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
664 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
665 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
666 for (uint32_t i = 0; i < kBlockSize; i++) {
667 inputs[i] = uint32_as_float(std::min<uint32_t>(UINT32_C(0x7FFFFFFF), UINT32_C(0x80000000) | (n + i)));
668 }
669 xnn_math_f32_expminus__scalar_rr2_p5(kBlockSize * sizeof(float), inputs.data(), outputs.data());
670 for (uint32_t i = 0; i < kBlockSize; i++) {
671 ASSERT_TRUE(std::isnan(outputs[i]))
672 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
673 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
674 }
675 }
676 }
677