1 // Copyright 2021 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #include <algorithm> 7 #include <cmath> 8 #include <cstddef> 9 #include <cstdint> 10 #include <cstdlib> 11 #include <iomanip> 12 #include <ios> 13 #include <vector> 14 15 #include <gtest/gtest.h> 16 17 #include <fp16.h> 18 19 #include <xnnpack/aligned-allocator.h> 20 #include <xnnpack/common.h> 21 #include <xnnpack/isa-checks.h> 22 #include <xnnpack/math.h> 23 #include <xnnpack/math-stubs.h> 24 25 26 constexpr int kBlockSize = 1024; 27 28 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(CVT__SSE2_INT16,positive_normal)29 TEST(CVT__SSE2_INT16, positive_normal) { 30 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 31 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 32 for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) { 33 for (uint16_t i = 0; i < kBlockSize; i++) { 34 inputs[i] = n + i; 35 } 36 xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 37 for (uint32_t i = 0; i < kBlockSize; i++) { 38 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 39 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 40 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 41 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 42 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 43 } 44 } 45 } 46 TEST(CVT__SSE2_INT16,negative_normal)47 TEST(CVT__SSE2_INT16, negative_normal) { 48 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 49 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 50 for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) { 51 for (uint16_t i = 0; i < kBlockSize; i++) { 52 inputs[i] = n + i; 53 } 54 xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 55 for (uint32_t i = 0; i < kBlockSize; i++) { 56 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 57 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 58 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 59 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 60 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 61 } 62 } 63 } 64 TEST(CVT__SSE2_INT16,positive_zero)65 TEST(CVT__SSE2_INT16, positive_zero) { 66 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 67 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 68 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000)); 69 xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 70 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 71 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 72 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 73 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 74 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 75 } 76 TEST(CVT__SSE2_INT16,negative_zero)77 TEST(CVT__SSE2_INT16, negative_zero) { 78 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 79 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 80 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000)); 81 xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 82 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 83 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 84 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 85 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 86 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 87 } 88 TEST(CVT__SSE2_INT16,positive_subnormal)89 TEST(CVT__SSE2_INT16, positive_subnormal) { 90 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 91 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 92 for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) { 93 for (uint16_t i = 0; i < kBlockSize; i++) { 94 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001)); 95 } 96 xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 97 for (uint32_t i = 0; i < kBlockSize; i++) { 98 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 99 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 100 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 101 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 102 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 103 } 104 } 105 } 106 TEST(CVT__SSE2_INT16,negative_subnormal)107 TEST(CVT__SSE2_INT16, negative_subnormal) { 108 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 109 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 110 for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) { 111 for (uint16_t i = 0; i < kBlockSize; i++) { 112 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001)); 113 } 114 xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 115 for (uint32_t i = 0; i < kBlockSize; i++) { 116 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 117 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 118 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 119 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 120 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 121 } 122 } 123 } 124 TEST(CVT__SSE2_INT16,positive_infinity)125 TEST(CVT__SSE2_INT16, positive_infinity) { 126 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 127 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 128 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00)); 129 xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 130 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 131 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 132 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 133 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 134 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 135 } 136 TEST(CVT__SSE2_INT16,negative_infinity)137 TEST(CVT__SSE2_INT16, negative_infinity) { 138 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 139 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 140 std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00)); 141 xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 142 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 143 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 144 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 145 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 146 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 147 } 148 TEST(CVT__SSE2_INT16,positive_nan)149 TEST(CVT__SSE2_INT16, positive_nan) { 150 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 151 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 152 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 153 for (uint16_t i = 0; i < kBlockSize; i++) { 154 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01)); 155 } 156 xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 157 for (uint32_t i = 0; i < kBlockSize; i++) { 158 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 159 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 160 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 161 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 162 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 163 } 164 } 165 } 166 TEST(CVT__SSE2_INT16,negative_nan)167 TEST(CVT__SSE2_INT16, negative_nan) { 168 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 169 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 170 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 171 for (uint16_t i = 0; i < kBlockSize; i++) { 172 inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01)); 173 } 174 xnn_math_f16_f32_cvt__sse2_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 175 for (uint32_t i = 0; i < kBlockSize; i++) { 176 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 177 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 178 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 179 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 180 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 181 } 182 } 183 } 184 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 185 186 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(CVT__SSE2_INT32,positive_normal)187 TEST(CVT__SSE2_INT32, positive_normal) { 188 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 189 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 190 for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) { 191 for (uint16_t i = 0; i < kBlockSize; i++) { 192 inputs[i] = n + i; 193 } 194 xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 195 for (uint32_t i = 0; i < kBlockSize; i++) { 196 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 197 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 198 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 199 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 200 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 201 } 202 } 203 } 204 TEST(CVT__SSE2_INT32,negative_normal)205 TEST(CVT__SSE2_INT32, negative_normal) { 206 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 207 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 208 for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) { 209 for (uint16_t i = 0; i < kBlockSize; i++) { 210 inputs[i] = n + i; 211 } 212 xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 213 for (uint32_t i = 0; i < kBlockSize; i++) { 214 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 215 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 216 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 217 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 218 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 219 } 220 } 221 } 222 TEST(CVT__SSE2_INT32,positive_zero)223 TEST(CVT__SSE2_INT32, positive_zero) { 224 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 225 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 226 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000)); 227 xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 228 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 229 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 230 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 231 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 232 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 233 } 234 TEST(CVT__SSE2_INT32,negative_zero)235 TEST(CVT__SSE2_INT32, negative_zero) { 236 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 237 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 238 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000)); 239 xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 240 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 241 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 242 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 243 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 244 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 245 } 246 TEST(CVT__SSE2_INT32,positive_subnormal)247 TEST(CVT__SSE2_INT32, positive_subnormal) { 248 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 249 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 250 for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) { 251 for (uint16_t i = 0; i < kBlockSize; i++) { 252 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001)); 253 } 254 xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 255 for (uint32_t i = 0; i < kBlockSize; i++) { 256 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 257 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 258 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 259 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 260 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 261 } 262 } 263 } 264 TEST(CVT__SSE2_INT32,negative_subnormal)265 TEST(CVT__SSE2_INT32, negative_subnormal) { 266 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 267 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 268 for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) { 269 for (uint16_t i = 0; i < kBlockSize; i++) { 270 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001)); 271 } 272 xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 273 for (uint32_t i = 0; i < kBlockSize; i++) { 274 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 275 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 276 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 277 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 278 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 279 } 280 } 281 } 282 TEST(CVT__SSE2_INT32,positive_infinity)283 TEST(CVT__SSE2_INT32, positive_infinity) { 284 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 285 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 286 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00)); 287 xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 288 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 289 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 290 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 291 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 292 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 293 } 294 TEST(CVT__SSE2_INT32,negative_infinity)295 TEST(CVT__SSE2_INT32, negative_infinity) { 296 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 297 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 298 std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00)); 299 xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 300 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 301 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 302 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 303 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 304 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 305 } 306 TEST(CVT__SSE2_INT32,positive_nan)307 TEST(CVT__SSE2_INT32, positive_nan) { 308 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 309 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 310 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 311 for (uint16_t i = 0; i < kBlockSize; i++) { 312 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01)); 313 } 314 xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 315 for (uint32_t i = 0; i < kBlockSize; i++) { 316 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 317 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 318 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 319 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 320 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 321 } 322 } 323 } 324 TEST(CVT__SSE2_INT32,negative_nan)325 TEST(CVT__SSE2_INT32, negative_nan) { 326 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 327 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 328 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 329 for (uint16_t i = 0; i < kBlockSize; i++) { 330 inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01)); 331 } 332 xnn_math_f16_f32_cvt__sse2_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 333 for (uint32_t i = 0; i < kBlockSize; i++) { 334 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 335 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 336 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 337 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 338 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 339 } 340 } 341 } 342 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 343 344 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(CVT__SSE41_INT16,positive_normal)345 TEST(CVT__SSE41_INT16, positive_normal) { 346 TEST_REQUIRES_X86_SSE41; 347 348 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 349 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 350 for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) { 351 for (uint16_t i = 0; i < kBlockSize; i++) { 352 inputs[i] = n + i; 353 } 354 xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 355 for (uint32_t i = 0; i < kBlockSize; i++) { 356 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 357 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 358 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 359 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 360 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 361 } 362 } 363 } 364 TEST(CVT__SSE41_INT16,negative_normal)365 TEST(CVT__SSE41_INT16, negative_normal) { 366 TEST_REQUIRES_X86_SSE41; 367 368 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 369 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 370 for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) { 371 for (uint16_t i = 0; i < kBlockSize; i++) { 372 inputs[i] = n + i; 373 } 374 xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 375 for (uint32_t i = 0; i < kBlockSize; i++) { 376 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 377 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 378 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 379 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 380 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 381 } 382 } 383 } 384 TEST(CVT__SSE41_INT16,positive_zero)385 TEST(CVT__SSE41_INT16, positive_zero) { 386 TEST_REQUIRES_X86_SSE41; 387 388 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 389 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 390 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000)); 391 xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 392 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 393 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 394 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 395 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 396 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 397 } 398 TEST(CVT__SSE41_INT16,negative_zero)399 TEST(CVT__SSE41_INT16, negative_zero) { 400 TEST_REQUIRES_X86_SSE41; 401 402 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 403 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 404 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000)); 405 xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 406 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 407 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 408 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 409 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 410 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 411 } 412 TEST(CVT__SSE41_INT16,positive_subnormal)413 TEST(CVT__SSE41_INT16, positive_subnormal) { 414 TEST_REQUIRES_X86_SSE41; 415 416 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 417 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 418 for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) { 419 for (uint16_t i = 0; i < kBlockSize; i++) { 420 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001)); 421 } 422 xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 423 for (uint32_t i = 0; i < kBlockSize; i++) { 424 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 425 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 426 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 427 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 428 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 429 } 430 } 431 } 432 TEST(CVT__SSE41_INT16,negative_subnormal)433 TEST(CVT__SSE41_INT16, negative_subnormal) { 434 TEST_REQUIRES_X86_SSE41; 435 436 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 437 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 438 for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) { 439 for (uint16_t i = 0; i < kBlockSize; i++) { 440 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001)); 441 } 442 xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 443 for (uint32_t i = 0; i < kBlockSize; i++) { 444 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 445 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 446 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 447 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 448 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 449 } 450 } 451 } 452 TEST(CVT__SSE41_INT16,positive_infinity)453 TEST(CVT__SSE41_INT16, positive_infinity) { 454 TEST_REQUIRES_X86_SSE41; 455 456 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 457 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 458 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00)); 459 xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 460 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 461 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 462 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 463 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 464 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 465 } 466 TEST(CVT__SSE41_INT16,negative_infinity)467 TEST(CVT__SSE41_INT16, negative_infinity) { 468 TEST_REQUIRES_X86_SSE41; 469 470 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 471 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 472 std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00)); 473 xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 474 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 475 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 476 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 477 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 478 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 479 } 480 TEST(CVT__SSE41_INT16,positive_nan)481 TEST(CVT__SSE41_INT16, positive_nan) { 482 TEST_REQUIRES_X86_SSE41; 483 484 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 485 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 486 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 487 for (uint16_t i = 0; i < kBlockSize; i++) { 488 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01)); 489 } 490 xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 491 for (uint32_t i = 0; i < kBlockSize; i++) { 492 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 493 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 494 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 495 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 496 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 497 } 498 } 499 } 500 TEST(CVT__SSE41_INT16,negative_nan)501 TEST(CVT__SSE41_INT16, negative_nan) { 502 TEST_REQUIRES_X86_SSE41; 503 504 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 505 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 506 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 507 for (uint16_t i = 0; i < kBlockSize; i++) { 508 inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01)); 509 } 510 xnn_math_f16_f32_cvt__sse41_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 511 for (uint32_t i = 0; i < kBlockSize; i++) { 512 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 513 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 514 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 515 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 516 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 517 } 518 } 519 } 520 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 521 522 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(CVT__SSE41_INT32,positive_normal)523 TEST(CVT__SSE41_INT32, positive_normal) { 524 TEST_REQUIRES_X86_SSE41; 525 526 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 527 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 528 for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) { 529 for (uint16_t i = 0; i < kBlockSize; i++) { 530 inputs[i] = n + i; 531 } 532 xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 533 for (uint32_t i = 0; i < kBlockSize; i++) { 534 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 535 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 536 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 537 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 538 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 539 } 540 } 541 } 542 TEST(CVT__SSE41_INT32,negative_normal)543 TEST(CVT__SSE41_INT32, negative_normal) { 544 TEST_REQUIRES_X86_SSE41; 545 546 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 547 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 548 for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) { 549 for (uint16_t i = 0; i < kBlockSize; i++) { 550 inputs[i] = n + i; 551 } 552 xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 553 for (uint32_t i = 0; i < kBlockSize; i++) { 554 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 555 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 556 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 557 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 558 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 559 } 560 } 561 } 562 TEST(CVT__SSE41_INT32,positive_zero)563 TEST(CVT__SSE41_INT32, positive_zero) { 564 TEST_REQUIRES_X86_SSE41; 565 566 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 567 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 568 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000)); 569 xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 570 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 571 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 572 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 573 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 574 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 575 } 576 TEST(CVT__SSE41_INT32,negative_zero)577 TEST(CVT__SSE41_INT32, negative_zero) { 578 TEST_REQUIRES_X86_SSE41; 579 580 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 581 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 582 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000)); 583 xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 584 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 585 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 586 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 587 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 588 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 589 } 590 TEST(CVT__SSE41_INT32,positive_subnormal)591 TEST(CVT__SSE41_INT32, positive_subnormal) { 592 TEST_REQUIRES_X86_SSE41; 593 594 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 595 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 596 for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) { 597 for (uint16_t i = 0; i < kBlockSize; i++) { 598 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001)); 599 } 600 xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 601 for (uint32_t i = 0; i < kBlockSize; i++) { 602 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 603 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 604 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 605 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 606 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 607 } 608 } 609 } 610 TEST(CVT__SSE41_INT32,negative_subnormal)611 TEST(CVT__SSE41_INT32, negative_subnormal) { 612 TEST_REQUIRES_X86_SSE41; 613 614 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 615 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 616 for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) { 617 for (uint16_t i = 0; i < kBlockSize; i++) { 618 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001)); 619 } 620 xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 621 for (uint32_t i = 0; i < kBlockSize; i++) { 622 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 623 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 624 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 625 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 626 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 627 } 628 } 629 } 630 TEST(CVT__SSE41_INT32,positive_infinity)631 TEST(CVT__SSE41_INT32, positive_infinity) { 632 TEST_REQUIRES_X86_SSE41; 633 634 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 635 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 636 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00)); 637 xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 638 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 639 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 640 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 641 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 642 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 643 } 644 TEST(CVT__SSE41_INT32,negative_infinity)645 TEST(CVT__SSE41_INT32, negative_infinity) { 646 TEST_REQUIRES_X86_SSE41; 647 648 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 649 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 650 std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00)); 651 xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 652 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 653 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 654 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 655 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 656 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 657 } 658 TEST(CVT__SSE41_INT32,positive_nan)659 TEST(CVT__SSE41_INT32, positive_nan) { 660 TEST_REQUIRES_X86_SSE41; 661 662 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 663 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 664 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 665 for (uint16_t i = 0; i < kBlockSize; i++) { 666 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01)); 667 } 668 xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 669 for (uint32_t i = 0; i < kBlockSize; i++) { 670 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 671 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 672 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 673 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 674 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 675 } 676 } 677 } 678 TEST(CVT__SSE41_INT32,negative_nan)679 TEST(CVT__SSE41_INT32, negative_nan) { 680 TEST_REQUIRES_X86_SSE41; 681 682 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 683 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 684 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 685 for (uint16_t i = 0; i < kBlockSize; i++) { 686 inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01)); 687 } 688 xnn_math_f16_f32_cvt__sse41_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 689 for (uint32_t i = 0; i < kBlockSize; i++) { 690 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 691 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 692 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 693 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 694 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 695 } 696 } 697 } 698 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 699 700 #if XNN_ARCH_X86 || XNN_ARCH_X86_64 TEST(CVT__F16C,positive_normal)701 TEST(CVT__F16C, positive_normal) { 702 TEST_REQUIRES_X86_F16C; 703 704 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 705 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 706 for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) { 707 for (uint16_t i = 0; i < kBlockSize; i++) { 708 inputs[i] = n + i; 709 } 710 xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 711 for (uint32_t i = 0; i < kBlockSize; i++) { 712 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 713 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 714 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 715 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 716 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 717 } 718 } 719 } 720 TEST(CVT__F16C,negative_normal)721 TEST(CVT__F16C, negative_normal) { 722 TEST_REQUIRES_X86_F16C; 723 724 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 725 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 726 for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) { 727 for (uint16_t i = 0; i < kBlockSize; i++) { 728 inputs[i] = n + i; 729 } 730 xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 731 for (uint32_t i = 0; i < kBlockSize; i++) { 732 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 733 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 734 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 735 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 736 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 737 } 738 } 739 } 740 TEST(CVT__F16C,positive_zero)741 TEST(CVT__F16C, positive_zero) { 742 TEST_REQUIRES_X86_F16C; 743 744 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 745 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 746 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000)); 747 xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 748 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 749 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 750 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 751 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 752 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 753 } 754 TEST(CVT__F16C,negative_zero)755 TEST(CVT__F16C, negative_zero) { 756 TEST_REQUIRES_X86_F16C; 757 758 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 759 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 760 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000)); 761 xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 762 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 763 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 764 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 765 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 766 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 767 } 768 TEST(CVT__F16C,positive_subnormal)769 TEST(CVT__F16C, positive_subnormal) { 770 TEST_REQUIRES_X86_F16C; 771 772 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 773 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 774 for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) { 775 for (uint16_t i = 0; i < kBlockSize; i++) { 776 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001)); 777 } 778 xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 779 for (uint32_t i = 0; i < kBlockSize; i++) { 780 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 781 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 782 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 783 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 784 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 785 } 786 } 787 } 788 TEST(CVT__F16C,negative_subnormal)789 TEST(CVT__F16C, negative_subnormal) { 790 TEST_REQUIRES_X86_F16C; 791 792 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 793 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 794 for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) { 795 for (uint16_t i = 0; i < kBlockSize; i++) { 796 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001)); 797 } 798 xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 799 for (uint32_t i = 0; i < kBlockSize; i++) { 800 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 801 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 802 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 803 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 804 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 805 } 806 } 807 } 808 TEST(CVT__F16C,positive_infinity)809 TEST(CVT__F16C, positive_infinity) { 810 TEST_REQUIRES_X86_F16C; 811 812 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 813 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 814 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00)); 815 xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 816 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 817 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 818 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 819 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 820 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 821 } 822 TEST(CVT__F16C,negative_infinity)823 TEST(CVT__F16C, negative_infinity) { 824 TEST_REQUIRES_X86_F16C; 825 826 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 827 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 828 std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00)); 829 xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 830 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 831 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 832 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 833 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 834 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 835 } 836 TEST(CVT__F16C,positive_nan)837 TEST(CVT__F16C, positive_nan) { 838 TEST_REQUIRES_X86_F16C; 839 840 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 841 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 842 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 843 for (uint16_t i = 0; i < kBlockSize; i++) { 844 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01)); 845 } 846 xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 847 for (uint32_t i = 0; i < kBlockSize; i++) { 848 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 849 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 850 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 851 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 852 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 853 } 854 } 855 } 856 TEST(CVT__F16C,negative_nan)857 TEST(CVT__F16C, negative_nan) { 858 TEST_REQUIRES_X86_F16C; 859 860 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 861 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 862 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 863 for (uint16_t i = 0; i < kBlockSize; i++) { 864 inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01)); 865 } 866 xnn_math_f16_f32_cvt__f16c(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 867 for (uint32_t i = 0; i < kBlockSize; i++) { 868 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 869 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 870 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 871 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 872 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 873 } 874 } 875 } 876 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 877 878 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(CVT__NEON_INT16,positive_normal)879 TEST(CVT__NEON_INT16, positive_normal) { 880 TEST_REQUIRES_ARM_NEON; 881 882 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 883 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 884 for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) { 885 for (uint16_t i = 0; i < kBlockSize; i++) { 886 inputs[i] = n + i; 887 } 888 xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 889 for (uint32_t i = 0; i < kBlockSize; i++) { 890 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 891 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 892 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 893 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 894 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 895 } 896 } 897 } 898 TEST(CVT__NEON_INT16,negative_normal)899 TEST(CVT__NEON_INT16, negative_normal) { 900 TEST_REQUIRES_ARM_NEON; 901 902 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 903 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 904 for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) { 905 for (uint16_t i = 0; i < kBlockSize; i++) { 906 inputs[i] = n + i; 907 } 908 xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 909 for (uint32_t i = 0; i < kBlockSize; i++) { 910 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 911 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 912 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 913 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 914 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 915 } 916 } 917 } 918 TEST(CVT__NEON_INT16,positive_zero)919 TEST(CVT__NEON_INT16, positive_zero) { 920 TEST_REQUIRES_ARM_NEON; 921 922 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 923 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 924 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000)); 925 xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 926 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 927 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 928 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 929 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 930 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 931 } 932 TEST(CVT__NEON_INT16,negative_zero)933 TEST(CVT__NEON_INT16, negative_zero) { 934 TEST_REQUIRES_ARM_NEON; 935 936 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 937 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 938 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000)); 939 xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 940 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 941 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 942 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 943 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 944 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 945 } 946 TEST(CVT__NEON_INT16,positive_subnormal)947 TEST(CVT__NEON_INT16, positive_subnormal) { 948 TEST_REQUIRES_ARM_NEON; 949 950 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 951 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 952 for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) { 953 for (uint16_t i = 0; i < kBlockSize; i++) { 954 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001)); 955 } 956 xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 957 for (uint32_t i = 0; i < kBlockSize; i++) { 958 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 959 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 960 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 961 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 962 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 963 } 964 } 965 } 966 TEST(CVT__NEON_INT16,negative_subnormal)967 TEST(CVT__NEON_INT16, negative_subnormal) { 968 TEST_REQUIRES_ARM_NEON; 969 970 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 971 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 972 for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) { 973 for (uint16_t i = 0; i < kBlockSize; i++) { 974 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001)); 975 } 976 xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 977 for (uint32_t i = 0; i < kBlockSize; i++) { 978 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 979 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 980 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 981 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 982 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 983 } 984 } 985 } 986 TEST(CVT__NEON_INT16,positive_infinity)987 TEST(CVT__NEON_INT16, positive_infinity) { 988 TEST_REQUIRES_ARM_NEON; 989 990 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 991 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 992 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00)); 993 xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 994 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 995 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 996 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 997 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 998 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 999 } 1000 TEST(CVT__NEON_INT16,negative_infinity)1001 TEST(CVT__NEON_INT16, negative_infinity) { 1002 TEST_REQUIRES_ARM_NEON; 1003 1004 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1005 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1006 std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00)); 1007 xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1008 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 1009 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 1010 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 1011 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1012 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1013 } 1014 TEST(CVT__NEON_INT16,positive_nan)1015 TEST(CVT__NEON_INT16, positive_nan) { 1016 TEST_REQUIRES_ARM_NEON; 1017 1018 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1019 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1020 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 1021 for (uint16_t i = 0; i < kBlockSize; i++) { 1022 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01)); 1023 } 1024 xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1025 for (uint32_t i = 0; i < kBlockSize; i++) { 1026 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1027 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1028 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1029 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1030 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1031 } 1032 } 1033 } 1034 TEST(CVT__NEON_INT16,negative_nan)1035 TEST(CVT__NEON_INT16, negative_nan) { 1036 TEST_REQUIRES_ARM_NEON; 1037 1038 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1039 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1040 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 1041 for (uint16_t i = 0; i < kBlockSize; i++) { 1042 inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01)); 1043 } 1044 xnn_math_f16_f32_cvt__neon_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1045 for (uint32_t i = 0; i < kBlockSize; i++) { 1046 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1047 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1048 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1049 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1050 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1051 } 1052 } 1053 } 1054 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1055 1056 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(CVT__NEON_INT32,positive_normal)1057 TEST(CVT__NEON_INT32, positive_normal) { 1058 TEST_REQUIRES_ARM_NEON; 1059 1060 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1061 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1062 for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) { 1063 for (uint16_t i = 0; i < kBlockSize; i++) { 1064 inputs[i] = n + i; 1065 } 1066 xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1067 for (uint32_t i = 0; i < kBlockSize; i++) { 1068 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1069 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1070 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1071 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1072 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1073 } 1074 } 1075 } 1076 TEST(CVT__NEON_INT32,negative_normal)1077 TEST(CVT__NEON_INT32, negative_normal) { 1078 TEST_REQUIRES_ARM_NEON; 1079 1080 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1081 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1082 for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) { 1083 for (uint16_t i = 0; i < kBlockSize; i++) { 1084 inputs[i] = n + i; 1085 } 1086 xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1087 for (uint32_t i = 0; i < kBlockSize; i++) { 1088 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1089 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1090 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1091 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1092 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1093 } 1094 } 1095 } 1096 TEST(CVT__NEON_INT32,positive_zero)1097 TEST(CVT__NEON_INT32, positive_zero) { 1098 TEST_REQUIRES_ARM_NEON; 1099 1100 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1101 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1102 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000)); 1103 xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1104 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 1105 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 1106 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 1107 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1108 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1109 } 1110 TEST(CVT__NEON_INT32,negative_zero)1111 TEST(CVT__NEON_INT32, negative_zero) { 1112 TEST_REQUIRES_ARM_NEON; 1113 1114 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1115 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1116 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000)); 1117 xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1118 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 1119 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 1120 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 1121 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1122 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1123 } 1124 TEST(CVT__NEON_INT32,positive_subnormal)1125 TEST(CVT__NEON_INT32, positive_subnormal) { 1126 TEST_REQUIRES_ARM_NEON; 1127 1128 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1129 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1130 for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) { 1131 for (uint16_t i = 0; i < kBlockSize; i++) { 1132 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001)); 1133 } 1134 xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1135 for (uint32_t i = 0; i < kBlockSize; i++) { 1136 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1137 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1138 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1139 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1140 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1141 } 1142 } 1143 } 1144 TEST(CVT__NEON_INT32,negative_subnormal)1145 TEST(CVT__NEON_INT32, negative_subnormal) { 1146 TEST_REQUIRES_ARM_NEON; 1147 1148 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1149 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1150 for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) { 1151 for (uint16_t i = 0; i < kBlockSize; i++) { 1152 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001)); 1153 } 1154 xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1155 for (uint32_t i = 0; i < kBlockSize; i++) { 1156 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1157 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1158 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1159 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1160 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1161 } 1162 } 1163 } 1164 TEST(CVT__NEON_INT32,positive_infinity)1165 TEST(CVT__NEON_INT32, positive_infinity) { 1166 TEST_REQUIRES_ARM_NEON; 1167 1168 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1169 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1170 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00)); 1171 xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1172 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 1173 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 1174 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 1175 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1176 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1177 } 1178 TEST(CVT__NEON_INT32,negative_infinity)1179 TEST(CVT__NEON_INT32, negative_infinity) { 1180 TEST_REQUIRES_ARM_NEON; 1181 1182 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1183 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1184 std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00)); 1185 xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1186 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 1187 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 1188 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 1189 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1190 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1191 } 1192 TEST(CVT__NEON_INT32,positive_nan)1193 TEST(CVT__NEON_INT32, positive_nan) { 1194 TEST_REQUIRES_ARM_NEON; 1195 1196 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1197 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1198 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 1199 for (uint16_t i = 0; i < kBlockSize; i++) { 1200 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01)); 1201 } 1202 xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1203 for (uint32_t i = 0; i < kBlockSize; i++) { 1204 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1205 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1206 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1207 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1208 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1209 } 1210 } 1211 } 1212 TEST(CVT__NEON_INT32,negative_nan)1213 TEST(CVT__NEON_INT32, negative_nan) { 1214 TEST_REQUIRES_ARM_NEON; 1215 1216 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1217 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1218 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 1219 for (uint16_t i = 0; i < kBlockSize; i++) { 1220 inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01)); 1221 } 1222 xnn_math_f16_f32_cvt__neon_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1223 for (uint32_t i = 0; i < kBlockSize; i++) { 1224 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1225 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1226 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1227 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1228 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1229 } 1230 } 1231 } 1232 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1233 1234 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(CVT__NEONFP16,positive_normal)1235 TEST(CVT__NEONFP16, positive_normal) { 1236 TEST_REQUIRES_ARM_NEON_FP16; 1237 1238 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1239 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1240 for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) { 1241 for (uint16_t i = 0; i < kBlockSize; i++) { 1242 inputs[i] = n + i; 1243 } 1244 xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1245 for (uint32_t i = 0; i < kBlockSize; i++) { 1246 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1247 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1248 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1249 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1250 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1251 } 1252 } 1253 } 1254 TEST(CVT__NEONFP16,negative_normal)1255 TEST(CVT__NEONFP16, negative_normal) { 1256 TEST_REQUIRES_ARM_NEON_FP16; 1257 1258 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1259 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1260 for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) { 1261 for (uint16_t i = 0; i < kBlockSize; i++) { 1262 inputs[i] = n + i; 1263 } 1264 xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1265 for (uint32_t i = 0; i < kBlockSize; i++) { 1266 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1267 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1268 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1269 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1270 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1271 } 1272 } 1273 } 1274 TEST(CVT__NEONFP16,positive_zero)1275 TEST(CVT__NEONFP16, positive_zero) { 1276 TEST_REQUIRES_ARM_NEON_FP16; 1277 1278 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1279 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1280 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000)); 1281 xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1282 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 1283 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 1284 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 1285 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1286 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1287 } 1288 TEST(CVT__NEONFP16,negative_zero)1289 TEST(CVT__NEONFP16, negative_zero) { 1290 TEST_REQUIRES_ARM_NEON_FP16; 1291 1292 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1293 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1294 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000)); 1295 xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1296 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 1297 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 1298 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 1299 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1300 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1301 } 1302 TEST(CVT__NEONFP16,positive_subnormal)1303 TEST(CVT__NEONFP16, positive_subnormal) { 1304 TEST_REQUIRES_ARM_NEON_FP16; 1305 1306 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1307 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1308 for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) { 1309 for (uint16_t i = 0; i < kBlockSize; i++) { 1310 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001)); 1311 } 1312 xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1313 for (uint32_t i = 0; i < kBlockSize; i++) { 1314 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1315 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1316 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1317 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1318 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1319 } 1320 } 1321 } 1322 TEST(CVT__NEONFP16,negative_subnormal)1323 TEST(CVT__NEONFP16, negative_subnormal) { 1324 TEST_REQUIRES_ARM_NEON_FP16; 1325 1326 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1327 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1328 for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) { 1329 for (uint16_t i = 0; i < kBlockSize; i++) { 1330 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001)); 1331 } 1332 xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1333 for (uint32_t i = 0; i < kBlockSize; i++) { 1334 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1335 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1336 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1337 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1338 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1339 } 1340 } 1341 } 1342 TEST(CVT__NEONFP16,positive_infinity)1343 TEST(CVT__NEONFP16, positive_infinity) { 1344 TEST_REQUIRES_ARM_NEON_FP16; 1345 1346 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1347 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1348 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00)); 1349 xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1350 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 1351 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 1352 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 1353 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1354 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1355 } 1356 TEST(CVT__NEONFP16,negative_infinity)1357 TEST(CVT__NEONFP16, negative_infinity) { 1358 TEST_REQUIRES_ARM_NEON_FP16; 1359 1360 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1361 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1362 std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00)); 1363 xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1364 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 1365 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 1366 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 1367 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1368 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1369 } 1370 TEST(CVT__NEONFP16,positive_nan)1371 TEST(CVT__NEONFP16, positive_nan) { 1372 TEST_REQUIRES_ARM_NEON_FP16; 1373 1374 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1375 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1376 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 1377 for (uint16_t i = 0; i < kBlockSize; i++) { 1378 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01)); 1379 } 1380 xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1381 for (uint32_t i = 0; i < kBlockSize; i++) { 1382 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1383 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1384 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1385 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1386 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1387 } 1388 } 1389 } 1390 TEST(CVT__NEONFP16,negative_nan)1391 TEST(CVT__NEONFP16, negative_nan) { 1392 TEST_REQUIRES_ARM_NEON_FP16; 1393 1394 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1395 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1396 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 1397 for (uint16_t i = 0; i < kBlockSize; i++) { 1398 inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01)); 1399 } 1400 xnn_math_f16_f32_cvt__neonfp16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1401 for (uint32_t i = 0; i < kBlockSize; i++) { 1402 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1403 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1404 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1405 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1406 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1407 } 1408 } 1409 } 1410 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1411 1412 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD TEST(CVT__WASMSIMD_INT16,positive_normal)1413 TEST(CVT__WASMSIMD_INT16, positive_normal) { 1414 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1415 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1416 for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) { 1417 for (uint16_t i = 0; i < kBlockSize; i++) { 1418 inputs[i] = n + i; 1419 } 1420 xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1421 for (uint32_t i = 0; i < kBlockSize; i++) { 1422 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1423 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1424 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1425 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1426 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1427 } 1428 } 1429 } 1430 TEST(CVT__WASMSIMD_INT16,negative_normal)1431 TEST(CVT__WASMSIMD_INT16, negative_normal) { 1432 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1433 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1434 for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) { 1435 for (uint16_t i = 0; i < kBlockSize; i++) { 1436 inputs[i] = n + i; 1437 } 1438 xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1439 for (uint32_t i = 0; i < kBlockSize; i++) { 1440 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1441 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1442 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1443 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1444 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1445 } 1446 } 1447 } 1448 TEST(CVT__WASMSIMD_INT16,positive_zero)1449 TEST(CVT__WASMSIMD_INT16, positive_zero) { 1450 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1451 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1452 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000)); 1453 xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1454 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 1455 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 1456 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 1457 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1458 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1459 } 1460 TEST(CVT__WASMSIMD_INT16,negative_zero)1461 TEST(CVT__WASMSIMD_INT16, negative_zero) { 1462 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1463 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1464 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000)); 1465 xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1466 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 1467 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 1468 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 1469 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1470 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1471 } 1472 TEST(CVT__WASMSIMD_INT16,positive_subnormal)1473 TEST(CVT__WASMSIMD_INT16, positive_subnormal) { 1474 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1475 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1476 for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) { 1477 for (uint16_t i = 0; i < kBlockSize; i++) { 1478 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001)); 1479 } 1480 xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1481 for (uint32_t i = 0; i < kBlockSize; i++) { 1482 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1483 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1484 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1485 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1486 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1487 } 1488 } 1489 } 1490 TEST(CVT__WASMSIMD_INT16,negative_subnormal)1491 TEST(CVT__WASMSIMD_INT16, negative_subnormal) { 1492 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1493 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1494 for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) { 1495 for (uint16_t i = 0; i < kBlockSize; i++) { 1496 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001)); 1497 } 1498 xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1499 for (uint32_t i = 0; i < kBlockSize; i++) { 1500 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1501 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1502 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1503 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1504 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1505 } 1506 } 1507 } 1508 TEST(CVT__WASMSIMD_INT16,positive_infinity)1509 TEST(CVT__WASMSIMD_INT16, positive_infinity) { 1510 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1511 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1512 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00)); 1513 xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1514 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 1515 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 1516 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 1517 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1518 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1519 } 1520 TEST(CVT__WASMSIMD_INT16,negative_infinity)1521 TEST(CVT__WASMSIMD_INT16, negative_infinity) { 1522 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1523 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1524 std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00)); 1525 xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1526 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 1527 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 1528 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 1529 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1530 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1531 } 1532 TEST(CVT__WASMSIMD_INT16,positive_nan)1533 TEST(CVT__WASMSIMD_INT16, positive_nan) { 1534 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1535 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1536 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 1537 for (uint16_t i = 0; i < kBlockSize; i++) { 1538 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01)); 1539 } 1540 xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1541 for (uint32_t i = 0; i < kBlockSize; i++) { 1542 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1543 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1544 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1545 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1546 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1547 } 1548 } 1549 } 1550 TEST(CVT__WASMSIMD_INT16,negative_nan)1551 TEST(CVT__WASMSIMD_INT16, negative_nan) { 1552 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1553 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1554 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 1555 for (uint16_t i = 0; i < kBlockSize; i++) { 1556 inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01)); 1557 } 1558 xnn_math_f16_f32_cvt__wasmsimd_int16(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1559 for (uint32_t i = 0; i < kBlockSize; i++) { 1560 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1561 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1562 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1563 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1564 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1565 } 1566 } 1567 } 1568 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD 1569 1570 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD TEST(CVT__WASMSIMD_INT32,positive_normal)1571 TEST(CVT__WASMSIMD_INT32, positive_normal) { 1572 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1573 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1574 for (uint16_t n = UINT16_C(0x0400); n < UINT16_C(0x7C00); n += kBlockSize) { 1575 for (uint16_t i = 0; i < kBlockSize; i++) { 1576 inputs[i] = n + i; 1577 } 1578 xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1579 for (uint32_t i = 0; i < kBlockSize; i++) { 1580 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1581 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1582 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1583 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1584 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1585 } 1586 } 1587 } 1588 TEST(CVT__WASMSIMD_INT32,negative_normal)1589 TEST(CVT__WASMSIMD_INT32, negative_normal) { 1590 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1591 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1592 for (uint16_t n = UINT16_C(0x8400); n < UINT16_C(0xFC00); n += kBlockSize) { 1593 for (uint16_t i = 0; i < kBlockSize; i++) { 1594 inputs[i] = n + i; 1595 } 1596 xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1597 for (uint32_t i = 0; i < kBlockSize; i++) { 1598 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1599 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1600 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1601 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1602 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1603 } 1604 } 1605 } 1606 TEST(CVT__WASMSIMD_INT32,positive_zero)1607 TEST(CVT__WASMSIMD_INT32, positive_zero) { 1608 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1609 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1610 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x0000)); 1611 xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1612 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 1613 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 1614 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 1615 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1616 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1617 } 1618 TEST(CVT__WASMSIMD_INT32,negative_zero)1619 TEST(CVT__WASMSIMD_INT32, negative_zero) { 1620 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1621 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1622 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x8000)); 1623 xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1624 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 1625 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 1626 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 1627 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1628 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1629 } 1630 TEST(CVT__WASMSIMD_INT32,positive_subnormal)1631 TEST(CVT__WASMSIMD_INT32, positive_subnormal) { 1632 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1633 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1634 for (uint16_t n = 0; n < UINT16_C(0x0400); n += kBlockSize) { 1635 for (uint16_t i = 0; i < kBlockSize; i++) { 1636 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x0001)); 1637 } 1638 xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1639 for (uint32_t i = 0; i < kBlockSize; i++) { 1640 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1641 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1642 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1643 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1644 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1645 } 1646 } 1647 } 1648 TEST(CVT__WASMSIMD_INT32,negative_subnormal)1649 TEST(CVT__WASMSIMD_INT32, negative_subnormal) { 1650 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1651 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1652 for (uint16_t n = UINT16_C(0x8000); n < UINT16_C(0x8400); n += kBlockSize) { 1653 for (uint16_t i = 0; i < kBlockSize; i++) { 1654 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x8001)); 1655 } 1656 xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1657 for (uint32_t i = 0; i < kBlockSize; i++) { 1658 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1659 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1660 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1661 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1662 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1663 } 1664 } 1665 } 1666 TEST(CVT__WASMSIMD_INT32,positive_infinity)1667 TEST(CVT__WASMSIMD_INT32, positive_infinity) { 1668 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1669 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1670 std::fill(inputs.begin(), inputs.end(), UINT16_C(0x7C00)); 1671 xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1672 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 1673 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 1674 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 1675 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1676 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1677 } 1678 TEST(CVT__WASMSIMD_INT32,negative_infinity)1679 TEST(CVT__WASMSIMD_INT32, negative_infinity) { 1680 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1681 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1682 std::fill(inputs.begin(), inputs.end(), UINT16_C(0xFC00)); 1683 xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1684 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[0])); 1685 ASSERT_EQ(reference_output, float_as_uint32(outputs[0])) 1686 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[0]) 1687 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1688 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]); 1689 } 1690 TEST(CVT__WASMSIMD_INT32,positive_nan)1691 TEST(CVT__WASMSIMD_INT32, positive_nan) { 1692 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1693 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1694 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 1695 for (uint16_t i = 0; i < kBlockSize; i++) { 1696 inputs[i] = std::max<uint16_t>(n + i, UINT16_C(0x7C01)); 1697 } 1698 xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1699 for (uint32_t i = 0; i < kBlockSize; i++) { 1700 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1701 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1702 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1703 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1704 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1705 } 1706 } 1707 } 1708 TEST(CVT__WASMSIMD_INT32,negative_nan)1709 TEST(CVT__WASMSIMD_INT32, negative_nan) { 1710 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> inputs(kBlockSize); 1711 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize); 1712 for (uint16_t n = UINT16_C(0x7C00); n < UINT16_C(0x8000); n += kBlockSize) { 1713 for (uint16_t i = 0; i < kBlockSize; i++) { 1714 inputs[i] = std::max<uint16_t>(UINT16_C(0x8000) | (n + i), UINT16_C(0xFC01)); 1715 } 1716 xnn_math_f16_f32_cvt__wasmsimd_int32(kBlockSize * sizeof(float), inputs.data(), outputs.data()); 1717 for (uint32_t i = 0; i < kBlockSize; i++) { 1718 const uint32_t reference_output = float_as_uint32(fp16_ieee_to_fp32_value(inputs[i])); 1719 ASSERT_EQ(reference_output, float_as_uint32(outputs[i])) 1720 << "input = 0x" << std::hex << std::setw(4) << std::setfill('0') << float_as_uint32(inputs[i]) 1721 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output 1722 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]); 1723 } 1724 } 1725 } 1726 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD 1727