1 // Copyright 2021 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #include <algorithm> 7 #include <cmath> 8 #include <cstddef> 9 #include <cstdint> 10 #include <cstdlib> 11 #include <iomanip> 12 #include <ios> 13 #include <vector> 14 15 #include <gtest/gtest.h> 16 17 #include <fp16.h> 18 19 #include <xnnpack/AlignedAllocator.h> 20 #include <xnnpack/common.h> 21 #include <xnnpack/isa-checks.h> 22 #include <xnnpack/math-stubs.h> 23 24 25 constexpr int kBlockSize = 1024; 26 27 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(CVT__NEON,positive_normal)28 TEST(CVT__NEON, positive_normal) { 29 TEST_REQUIRES_ARM_NEON; 30 31 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 32 std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize); 33 for (int32_t zero_point = std::numeric_limits<int8_t>::min(); 34 zero_point <= std::numeric_limits<int8_t>::max(); 35 zero_point++) 36 { 37 const uint32_t max_input = fp32_to_bits((float) (std::numeric_limits<int8_t>::max() - zero_point)); 38 for (uint32_t n = 0; n < max_input; n += kBlockSize) { 39 for (uint32_t i = 0; i < kBlockSize; i++) { 40 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, max_input)); 41 } 42 xnn_math_f32_qs8_cvt__neon(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point)); 43 for (uint32_t i = 0; i < kBlockSize; i++) { 44 long reference_output = std::lrintf(inputs[i]) + long(zero_point); 45 if (inputs[i] >= float(std::numeric_limits<long>::max())) { 46 reference_output = std::numeric_limits<int8_t>::max(); 47 } else if (inputs[i] <= float(std::numeric_limits<long>::min())) { 48 reference_output = std::numeric_limits<int8_t>::min(); 49 } 50 ASSERT_EQ(reference_output, long(outputs[i])) 51 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i]) 52 << ", reference = " << std::dec << reference_output 53 << ", optimized = " << std::dec << int32_t(outputs[i]) 54 << ", zero point = " << std::dec << zero_point; 55 } 56 } 57 } 58 } 59 TEST(CVT__NEON,negative_normal)60 TEST(CVT__NEON, negative_normal) { 61 TEST_REQUIRES_ARM_NEON; 62 63 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 64 std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize); 65 for (int32_t zero_point = std::numeric_limits<int8_t>::min(); 66 zero_point <= std::numeric_limits<int8_t>::max(); 67 zero_point++) 68 { 69 const uint32_t max_input = fp32_to_bits((float) (zero_point - std::numeric_limits<int8_t>::min())); 70 for (uint32_t n = 0; n < max_input; n += kBlockSize) { 71 for (uint32_t i = 0; i < kBlockSize; i++) { 72 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input)); 73 } 74 xnn_math_f32_qs8_cvt__neon(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point)); 75 for (uint32_t i = 0; i < kBlockSize; i++) { 76 long reference_output = std::lrintf(inputs[i]) + long(zero_point); 77 if (inputs[i] >= float(std::numeric_limits<long>::max())) { 78 reference_output = std::numeric_limits<int8_t>::max(); 79 } else if (inputs[i] <= float(std::numeric_limits<long>::min())) { 80 reference_output = std::numeric_limits<int8_t>::min(); 81 } 82 ASSERT_EQ(reference_output, long(outputs[i])) 83 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i]) 84 << ", reference = " << std::dec << reference_output 85 << ", optimized = " << std::dec << int32_t(outputs[i]) 86 << ", zero point = " << std::dec << zero_point; 87 } 88 } 89 } 90 } 91 TEST(CVT__NEON,positive_saturation)92 TEST(CVT__NEON, positive_saturation) { 93 TEST_REQUIRES_ARM_NEON; 94 95 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 96 std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize); 97 for (int32_t zero_point = std::numeric_limits<int8_t>::min(); 98 zero_point <= std::numeric_limits<int8_t>::max(); 99 zero_point++) 100 { 101 const uint32_t min_input = fp32_to_bits((float) (std::numeric_limits<int8_t>::max() - zero_point)); 102 const uint32_t max_input = UINT32_C(0x7F800000); 103 for (uint32_t n = min_input; n < max_input; n += kBlockSize) { 104 for (uint32_t i = 0; i < kBlockSize; i++) { 105 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, max_input)); 106 } 107 xnn_math_f32_qs8_cvt__neon(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point)); 108 for (uint32_t i = 0; i < kBlockSize; i++) { 109 const int32_t reference_output = std::numeric_limits<int8_t>::max(); 110 ASSERT_EQ(reference_output, int32_t(outputs[i])) 111 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i]) 112 << ", reference = " << std::dec << reference_output 113 << ", optimized = " << std::dec << int32_t(outputs[i]) 114 << ", zero point = " << std::dec << zero_point; 115 } 116 } 117 } 118 } 119 TEST(CVT__NEON,negative_saturation)120 TEST(CVT__NEON, negative_saturation) { 121 TEST_REQUIRES_ARM_NEON; 122 123 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 124 std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize); 125 for (int32_t zero_point = std::numeric_limits<int8_t>::min(); 126 zero_point <= std::numeric_limits<int8_t>::max(); 127 zero_point++) 128 { 129 const uint32_t min_input = fp32_to_bits((float) (zero_point - std::numeric_limits<int8_t>::min())); 130 const uint32_t max_input = UINT32_C(0x7F800000); 131 for (uint32_t n = min_input; n < max_input; n += kBlockSize) { 132 for (uint32_t i = 0; i < kBlockSize; i++) { 133 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input)); 134 } 135 xnn_math_f32_qs8_cvt__neon(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point)); 136 for (uint32_t i = 0; i < kBlockSize; i++) { 137 const int32_t reference_output = std::numeric_limits<int8_t>::min(); 138 ASSERT_EQ(reference_output, int32_t(outputs[i])) 139 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i]) 140 << ", reference = " << std::dec << reference_output 141 << ", optimized = " << std::dec << int32_t(outputs[i]) 142 << ", zero point = " << std::dec << zero_point; 143 } 144 } 145 } 146 } 147 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 148 149 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(CVT__NEONV8,positive_normal)150 TEST(CVT__NEONV8, positive_normal) { 151 TEST_REQUIRES_ARM_NEON_V8; 152 153 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 154 std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize); 155 for (int32_t zero_point = std::numeric_limits<int8_t>::min(); 156 zero_point <= std::numeric_limits<int8_t>::max(); 157 zero_point++) 158 { 159 const uint32_t max_input = fp32_to_bits((float) (std::numeric_limits<int8_t>::max() - zero_point)); 160 for (uint32_t n = 0; n < max_input; n += kBlockSize) { 161 for (uint32_t i = 0; i < kBlockSize; i++) { 162 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, max_input)); 163 } 164 xnn_math_f32_qs8_cvt__neonv8(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point)); 165 for (uint32_t i = 0; i < kBlockSize; i++) { 166 long reference_output = std::lrintf(inputs[i]) + long(zero_point); 167 if (inputs[i] >= float(std::numeric_limits<long>::max())) { 168 reference_output = std::numeric_limits<int8_t>::max(); 169 } else if (inputs[i] <= float(std::numeric_limits<long>::min())) { 170 reference_output = std::numeric_limits<int8_t>::min(); 171 } 172 ASSERT_EQ(reference_output, long(outputs[i])) 173 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i]) 174 << ", reference = " << std::dec << reference_output 175 << ", optimized = " << std::dec << int32_t(outputs[i]) 176 << ", zero point = " << std::dec << zero_point; 177 } 178 } 179 } 180 } 181 TEST(CVT__NEONV8,negative_normal)182 TEST(CVT__NEONV8, negative_normal) { 183 TEST_REQUIRES_ARM_NEON_V8; 184 185 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 186 std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize); 187 for (int32_t zero_point = std::numeric_limits<int8_t>::min(); 188 zero_point <= std::numeric_limits<int8_t>::max(); 189 zero_point++) 190 { 191 const uint32_t max_input = fp32_to_bits((float) (zero_point - std::numeric_limits<int8_t>::min())); 192 for (uint32_t n = 0; n < max_input; n += kBlockSize) { 193 for (uint32_t i = 0; i < kBlockSize; i++) { 194 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input)); 195 } 196 xnn_math_f32_qs8_cvt__neonv8(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point)); 197 for (uint32_t i = 0; i < kBlockSize; i++) { 198 long reference_output = std::lrintf(inputs[i]) + long(zero_point); 199 if (inputs[i] >= float(std::numeric_limits<long>::max())) { 200 reference_output = std::numeric_limits<int8_t>::max(); 201 } else if (inputs[i] <= float(std::numeric_limits<long>::min())) { 202 reference_output = std::numeric_limits<int8_t>::min(); 203 } 204 ASSERT_EQ(reference_output, long(outputs[i])) 205 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i]) 206 << ", reference = " << std::dec << reference_output 207 << ", optimized = " << std::dec << int32_t(outputs[i]) 208 << ", zero point = " << std::dec << zero_point; 209 } 210 } 211 } 212 } 213 TEST(CVT__NEONV8,positive_saturation)214 TEST(CVT__NEONV8, positive_saturation) { 215 TEST_REQUIRES_ARM_NEON_V8; 216 217 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 218 std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize); 219 for (int32_t zero_point = std::numeric_limits<int8_t>::min(); 220 zero_point <= std::numeric_limits<int8_t>::max(); 221 zero_point++) 222 { 223 const uint32_t min_input = fp32_to_bits((float) (std::numeric_limits<int8_t>::max() - zero_point)); 224 const uint32_t max_input = UINT32_C(0x7F800000); 225 for (uint32_t n = min_input; n < max_input; n += kBlockSize) { 226 for (uint32_t i = 0; i < kBlockSize; i++) { 227 inputs[i] = fp32_from_bits(std::min<uint32_t>(n + i, max_input)); 228 } 229 xnn_math_f32_qs8_cvt__neonv8(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point)); 230 for (uint32_t i = 0; i < kBlockSize; i++) { 231 const int32_t reference_output = std::numeric_limits<int8_t>::max(); 232 ASSERT_EQ(reference_output, int32_t(outputs[i])) 233 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i]) 234 << ", reference = " << std::dec << reference_output 235 << ", optimized = " << std::dec << int32_t(outputs[i]) 236 << ", zero point = " << std::dec << zero_point; 237 } 238 } 239 } 240 } 241 TEST(CVT__NEONV8,negative_saturation)242 TEST(CVT__NEONV8, negative_saturation) { 243 TEST_REQUIRES_ARM_NEON_V8; 244 245 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize); 246 std::vector<int8_t, AlignedAllocator<int8_t, 64>> outputs(kBlockSize); 247 for (int32_t zero_point = std::numeric_limits<int8_t>::min(); 248 zero_point <= std::numeric_limits<int8_t>::max(); 249 zero_point++) 250 { 251 const uint32_t min_input = fp32_to_bits((float) (zero_point - std::numeric_limits<int8_t>::min())); 252 const uint32_t max_input = UINT32_C(0x7F800000); 253 for (uint32_t n = min_input; n < max_input; n += kBlockSize) { 254 for (uint32_t i = 0; i < kBlockSize; i++) { 255 inputs[i] = fp32_from_bits(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, max_input)); 256 } 257 xnn_math_f32_qs8_cvt__neonv8(kBlockSize * sizeof(int8_t), inputs.data(), outputs.data(), int8_t(zero_point)); 258 for (uint32_t i = 0; i < kBlockSize; i++) { 259 const int32_t reference_output = std::numeric_limits<int8_t>::min(); 260 ASSERT_EQ(reference_output, int32_t(outputs[i])) 261 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i]) 262 << ", reference = " << std::dec << reference_output 263 << ", optimized = " << std::dec << int32_t(outputs[i]) 264 << ", zero point = " << std::dec << zero_point; 265 } 266 } 267 } 268 } 269 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 270