1 // Copyright 2021 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #pragma once 7 8 #include <gtest/gtest.h> 9 10 #include <algorithm> 11 #include <cassert> 12 #include <cmath> 13 #include <cstddef> 14 #include <cstdlib> 15 #include <functional> 16 #include <limits> 17 #include <random> 18 #include <vector> 19 20 #include <fp16.h> 21 22 #include <xnnpack.h> 23 #include <xnnpack/math.h> 24 #include <xnnpack/microfnptr.h> 25 #include <xnnpack/microparams-init.h> 26 27 28 class VCvtMicrokernelTester { 29 public: batch_size(size_t batch_size)30 inline VCvtMicrokernelTester& batch_size(size_t batch_size) { 31 assert(batch_size != 0); 32 this->batch_size_ = batch_size; 33 return *this; 34 } 35 batch_size()36 inline size_t batch_size() const { 37 return this->batch_size_; 38 } 39 scale(float scale)40 inline VCvtMicrokernelTester& scale(float scale) { 41 assert(scale > 0.0f); 42 assert(std::isnormal(scale)); 43 this->scale_ = scale; 44 return *this; 45 } 46 scale()47 inline float scale() const { 48 return this->scale_; 49 } 50 input_zero_point(int16_t input_zero_point)51 inline VCvtMicrokernelTester& input_zero_point(int16_t input_zero_point) { 52 this->input_zero_point_ = input_zero_point; 53 return *this; 54 } 55 input_zero_point()56 inline int16_t input_zero_point() const { 57 return this->input_zero_point_; 58 } 59 output_zero_point(int16_t output_zero_point)60 inline VCvtMicrokernelTester& output_zero_point(int16_t output_zero_point) { 61 this->output_zero_point_ = output_zero_point; 62 return *this; 63 } 64 output_zero_point()65 inline int16_t output_zero_point() const { 66 return this->output_zero_point_; 67 } 68 qmin(int16_t qmin)69 inline VCvtMicrokernelTester& qmin(int16_t qmin) { 70 this->qmin_ = qmin; 71 return *this; 72 } 73 qmin()74 inline int16_t qmin() const { 75 return this->qmin_; 76 } 77 qmax(int16_t qmax)78 inline VCvtMicrokernelTester& qmax(int16_t qmax) { 79 this->qmax_ = qmax; 80 return *this; 81 } 82 qmax()83 inline int16_t qmax() const { 84 return this->qmax_; 85 } 86 iterations(size_t iterations)87 inline VCvtMicrokernelTester& iterations(size_t iterations) { 88 this->iterations_ = iterations; 89 return *this; 90 } 91 iterations()92 inline size_t iterations() const { 93 return this->iterations_; 94 } 95 96 void Test(xnn_f16_f32_vcvt_ukernel_function vcvt, xnn_init_f16_f32_cvt_params_fn init_params = nullptr) const { 97 std::random_device random_device; 98 auto rng = std::mt19937(random_device()); 99 std::uniform_real_distribution<float> f32dist(-100.0f, 100.0f); 100 101 std::vector<uint16_t> input(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 102 std::vector<float> output(batch_size()); 103 for (size_t iteration = 0; iteration < iterations(); iteration++) { 104 std::generate(input.begin(), input.end(), [&]() { return fp16_ieee_from_fp32_value(f32dist(rng)); }); 105 std::fill(output.begin(), output.end(), nanf("")); 106 107 union xnn_f16_f32_cvt_params params; 108 if (init_params) { 109 init_params(¶ms); 110 } 111 112 // Call optimized micro-kernel. 113 vcvt(batch_size() * sizeof(uint16_t), input.data(), output.data(), ¶ms); 114 115 // Verify results. 116 for (size_t i = 0; i < batch_size(); i++) { 117 ASSERT_EQ(float_as_uint32(output[i]), float_as_uint32(fp16_ieee_to_fp32_value(input[i]))) 118 << "at " << i << " / " << batch_size() 119 << ", x[" << i << "] = 0x" << std::hex << std::setw(4) << std::setfill('0') << input[i]; 120 } 121 } 122 } 123 124 void Test(xnn_f32_f16_vcvt_ukernel_function vcvt, xnn_init_f32_f16_cvt_params_fn init_params = nullptr) const { 125 std::random_device random_device; 126 auto rng = std::mt19937(random_device()); 127 std::uniform_real_distribution<float> f32dist(-100.0f, 100.0f); 128 129 std::vector<float> input(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 130 std::vector<uint16_t> output(batch_size()); 131 for (size_t iteration = 0; iteration < iterations(); iteration++) { 132 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 133 std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */); 134 135 union xnn_f32_f16_cvt_params params; 136 if (init_params) { 137 init_params(¶ms); 138 } 139 140 // Call optimized micro-kernel. 141 vcvt(batch_size() * sizeof(float), input.data(), output.data(), ¶ms); 142 143 // Verify results. 144 for (size_t i = 0; i < batch_size(); i++) { 145 ASSERT_EQ(output[i], fp16_ieee_from_fp32_value(input[i])) 146 << "at " << i << " / " << batch_size() 147 << ", x[" << i << "] = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(input[i]) 148 << " (" << input[i] << ")"; 149 } 150 } 151 } 152 Test(xnn_f32_qs8_vcvt_ukernel_function vcvt,xnn_init_f32_qs8_cvt_params_fn init_params)153 void Test(xnn_f32_qs8_vcvt_ukernel_function vcvt, xnn_init_f32_qs8_cvt_params_fn init_params) const { 154 ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min()); 155 ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max()); 156 ASSERT_LT(qmin(), qmax()); 157 158 ASSERT_GE(output_zero_point(), std::numeric_limits<int8_t>::min()); 159 ASSERT_LE(output_zero_point(), std::numeric_limits<int8_t>::max()); 160 161 std::random_device random_device; 162 auto rng = std::mt19937(random_device()); 163 std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f); 164 165 std::vector<float> input(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 166 std::vector<int8_t> output(batch_size()); 167 std::vector<int8_t> output_ref(batch_size()); 168 for (size_t iteration = 0; iteration < iterations(); iteration++) { 169 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 170 std::fill(output.begin(), output.end(), INT8_C(0xA5)); 171 172 union xnn_f32_qs8_cvt_params params; 173 if (init_params) { 174 init_params(¶ms, scale(), output_zero_point(), qmin(), qmax()); 175 } 176 177 // Call optimized micro-kernel. 178 vcvt(batch_size() * sizeof(float), input.data(), output.data(), ¶ms); 179 180 // Compute reference results 181 for (size_t i = 0; i < batch_size(); i++) { 182 float scaled_input = input[i] * scale(); 183 scaled_input = std::min<float>(scaled_input, float(qmax() - output_zero_point())); 184 scaled_input = std::max<float>(scaled_input, float(qmin() - output_zero_point())); 185 output_ref[i] = int8_t(std::lrintf(scaled_input) + long(output_zero_point())); 186 } 187 188 // Verify results. 189 for (size_t i = 0; i < batch_size(); i++) { 190 ASSERT_EQ(int32_t(output[i]), int32_t(output_ref[i])) 191 << "at " << i << " / " << batch_size() 192 << ", x[" << i << "] = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(input[i]) 193 << " (" << input[i] << ")"; 194 } 195 } 196 } 197 Test(xnn_f32_qu8_vcvt_ukernel_function vcvt,xnn_init_f32_qu8_cvt_params_fn init_params)198 void Test(xnn_f32_qu8_vcvt_ukernel_function vcvt, xnn_init_f32_qu8_cvt_params_fn init_params) const { 199 ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min()); 200 ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max()); 201 ASSERT_LT(qmin(), qmax()); 202 203 ASSERT_GE(output_zero_point(), std::numeric_limits<uint8_t>::min()); 204 ASSERT_LE(output_zero_point(), std::numeric_limits<uint8_t>::max()); 205 206 std::random_device random_device; 207 auto rng = std::mt19937(random_device()); 208 std::uniform_real_distribution<float> f32dist(-1.0f, 1.0f); 209 210 std::vector<float> input(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 211 std::vector<uint8_t> output(batch_size()); 212 std::vector<uint8_t> output_ref(batch_size()); 213 for (size_t iteration = 0; iteration < iterations(); iteration++) { 214 std::generate(input.begin(), input.end(), [&]() { return f32dist(rng); }); 215 std::fill(output.begin(), output.end(), UINT8_C(0xA5)); 216 217 union xnn_f32_qu8_cvt_params params; 218 init_params(¶ms, scale(), output_zero_point(), qmin(), qmax()); 219 220 // Call optimized micro-kernel. 221 vcvt(batch_size() * sizeof(float), input.data(), output.data(), ¶ms); 222 223 // Compute reference results 224 for (size_t i = 0; i < batch_size(); i++) { 225 float scaled_input = input[i] * scale(); 226 scaled_input = std::min<float>(scaled_input, float(qmax() - output_zero_point())); 227 scaled_input = std::max<float>(scaled_input, float(qmin() - output_zero_point())); 228 output_ref[i] = uint8_t(std::lrintf(scaled_input) + long(output_zero_point())); 229 } 230 231 // Verify results. 232 for (size_t i = 0; i < batch_size(); i++) { 233 ASSERT_EQ(int32_t(output[i]), int32_t(output_ref[i])) 234 << "at " << i << " / " << batch_size() 235 << ", x[" << i << "] = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(input[i]) 236 << " (" << input[i] << ")"; 237 } 238 } 239 } 240 Test(xnn_qs8_vcvt_ukernel_function vcvt,xnn_init_qs8_cvt_params_fn init_params)241 void Test(xnn_qs8_vcvt_ukernel_function vcvt, xnn_init_qs8_cvt_params_fn init_params) const { 242 ASSERT_GE(input_zero_point(), std::numeric_limits<int8_t>::min()); 243 ASSERT_LE(input_zero_point(), std::numeric_limits<int8_t>::max()); 244 ASSERT_GE(output_zero_point(), std::numeric_limits<int8_t>::min()); 245 ASSERT_LE(output_zero_point(), std::numeric_limits<int8_t>::max()); 246 247 std::random_device random_device; 248 auto rng = std::mt19937(random_device()); 249 std::uniform_int_distribution<int32_t> i8dist( 250 std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()); 251 252 std::vector<int8_t> input(batch_size() + XNN_EXTRA_BYTES / sizeof(int8_t)); 253 std::vector<int8_t> output(batch_size()); 254 std::vector<int8_t> output_ref(batch_size()); 255 for (size_t iteration = 0; iteration < iterations(); iteration++) { 256 std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); }); 257 std::fill(output.begin(), output.end(), INT8_C(0xA5)); 258 259 union xnn_qs8_cvt_params params; 260 init_params(¶ms, scale(), input_zero_point(), output_zero_point()); 261 262 // Call optimized micro-kernel. 263 vcvt(batch_size() * sizeof(int8_t), input.data(), output.data(), ¶ms); 264 265 // Compute reference results 266 const int32_t multiplier = (int32_t) lrintf(-256.0f * scale()); 267 for (size_t i = 0; i < batch_size(); i++) { 268 const int32_t input_value = (input_zero_point() - input[i]) << 7; 269 int32_t output_value = math_asr_s32(input_value * multiplier + INT32_C(0x4000), 15) + output_zero_point(); 270 output_value = std::min<int32_t>(output_value, std::numeric_limits<int8_t>::max()); 271 output_value = std::max<int32_t>(output_value, std::numeric_limits<int8_t>::min()); 272 output_ref[i] = static_cast<int8_t>(output_value); 273 } 274 275 // Verify results. 276 for (size_t i = 0; i < batch_size(); i++) { 277 ASSERT_EQ(int32_t(output[i]), int32_t(output_ref[i])) 278 << "at " << i << " / " << batch_size() 279 << ", x[" << i << "] = " << int32_t(input[i]); 280 } 281 } 282 } 283 Test(xnn_qs8_f32_vcvt_ukernel_function vcvt,xnn_init_qs8_f32_cvt_params_fn init_params)284 void Test(xnn_qs8_f32_vcvt_ukernel_function vcvt, xnn_init_qs8_f32_cvt_params_fn init_params) const { 285 ASSERT_GE(input_zero_point(), std::numeric_limits<int8_t>::min()); 286 ASSERT_LE(input_zero_point(), std::numeric_limits<int8_t>::max()); 287 288 std::random_device random_device; 289 auto rng = std::mt19937(random_device()); 290 std::uniform_int_distribution<int32_t> i8dist( 291 std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()); 292 293 std::vector<int8_t> input(batch_size() + XNN_EXTRA_BYTES / sizeof(int8_t)); 294 std::vector<float> output(batch_size()); 295 std::vector<float> output_ref(batch_size()); 296 for (size_t iteration = 0; iteration < iterations(); iteration++) { 297 std::generate(input.begin(), input.end(), [&]() { return i8dist(rng); }); 298 std::fill(output.begin(), output.end(), std::nanf("")); 299 300 union xnn_qs8_f32_cvt_params params; 301 init_params(¶ms, scale(), input_zero_point()); 302 303 // Call optimized micro-kernel. 304 vcvt(batch_size() * sizeof(int8_t), input.data(), output.data(), ¶ms); 305 306 // Compute reference results 307 for (size_t i = 0; i < batch_size(); i++) { 308 output_ref[i] = float(int16_t(input[i]) - input_zero_point()) * scale(); 309 } 310 311 // Verify results. 312 for (size_t i = 0; i < batch_size(); i++) { 313 ASSERT_EQ(output[i], output_ref[i]) 314 << "at " << i << " / " << batch_size() 315 << ", x[" << i << "] = " << int32_t(input[i]); 316 } 317 } 318 } 319 Test(xnn_qu8_vcvt_ukernel_function vcvt,xnn_init_qu8_cvt_params_fn init_params)320 void Test(xnn_qu8_vcvt_ukernel_function vcvt, xnn_init_qu8_cvt_params_fn init_params) const { 321 ASSERT_GE(input_zero_point(), std::numeric_limits<uint8_t>::min()); 322 ASSERT_LE(input_zero_point(), std::numeric_limits<uint8_t>::max()); 323 ASSERT_GE(output_zero_point(), std::numeric_limits<uint8_t>::min()); 324 ASSERT_LE(output_zero_point(), std::numeric_limits<uint8_t>::max()); 325 326 std::random_device random_device; 327 auto rng = std::mt19937(random_device()); 328 std::uniform_int_distribution<int32_t> u8dist( 329 std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max()); 330 331 std::vector<uint8_t> input(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 332 std::vector<uint8_t> output(batch_size()); 333 std::vector<uint8_t> output_ref(batch_size()); 334 for (size_t iteration = 0; iteration < iterations(); iteration++) { 335 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); }); 336 std::fill(output.begin(), output.end(), UINT8_C(0xA5)); 337 338 union xnn_qu8_cvt_params params; 339 init_params(¶ms, scale(), input_zero_point(), output_zero_point()); 340 341 // Call optimized micro-kernel. 342 vcvt(batch_size() * sizeof(uint8_t), input.data(), output.data(), ¶ms); 343 344 // Compute reference results 345 const int32_t multiplier = (int32_t) lrintf(-256.0f * scale()); 346 for (size_t i = 0; i < batch_size(); i++) { 347 const int32_t input_value = (input_zero_point() - input[i]) << 7; 348 int32_t output_value = math_asr_s32(input_value * multiplier + INT32_C(0x4000), 15) + output_zero_point(); 349 output_value = std::min<int32_t>(output_value, std::numeric_limits<uint8_t>::max()); 350 output_value = std::max<int32_t>(output_value, std::numeric_limits<uint8_t>::min()); 351 output_ref[i] = static_cast<uint8_t>(output_value); 352 } 353 354 // Verify results. 355 for (size_t i = 0; i < batch_size(); i++) { 356 ASSERT_EQ(int32_t(output[i]), int32_t(output_ref[i])) 357 << "at " << i << " / " << batch_size() 358 << ", x[" << i << "] = " << int32_t(input[i]); 359 } 360 } 361 } 362 Test(xnn_qu8_f32_vcvt_ukernel_function vcvt,xnn_init_qu8_f32_cvt_params_fn init_params)363 void Test(xnn_qu8_f32_vcvt_ukernel_function vcvt, xnn_init_qu8_f32_cvt_params_fn init_params) const { 364 ASSERT_GE(input_zero_point(), std::numeric_limits<uint8_t>::min()); 365 ASSERT_LE(input_zero_point(), std::numeric_limits<uint8_t>::max()); 366 367 std::random_device random_device; 368 auto rng = std::mt19937(random_device()); 369 std::uniform_int_distribution<int32_t> u8dist( 370 std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max()); 371 372 std::vector<uint8_t> input(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 373 std::vector<float> output(batch_size()); 374 std::vector<float> output_ref(batch_size()); 375 for (size_t iteration = 0; iteration < iterations(); iteration++) { 376 std::generate(input.begin(), input.end(), [&]() { return u8dist(rng); }); 377 std::fill(output.begin(), output.end(), std::nanf("")); 378 379 union xnn_qu8_f32_cvt_params params; 380 init_params(¶ms, scale(), input_zero_point()); 381 382 // Call optimized micro-kernel. 383 vcvt(batch_size() * sizeof(uint8_t), input.data(), output.data(), ¶ms); 384 385 // Compute reference results 386 for (size_t i = 0; i < batch_size(); i++) { 387 output_ref[i] = float(int16_t(input[i]) - input_zero_point()) * scale(); 388 } 389 390 // Verify results. 391 for (size_t i = 0; i < batch_size(); i++) { 392 ASSERT_EQ(output[i], output_ref[i]) 393 << "at " << i << " / " << batch_size() 394 << ", x[" << i << "] = " << int32_t(input[i]); 395 } 396 } 397 } 398 399 private: 400 float scale_ = 1.75f; 401 int16_t input_zero_point_ = 1; 402 int16_t output_zero_point_ = 5; 403 int16_t qmin_ = std::numeric_limits<int16_t>::min(); 404 int16_t qmax_ = std::numeric_limits<int16_t>::max(); 405 size_t batch_size_ = 1; 406 size_t iterations_ = 15; 407 }; 408