1 // Copyright 2021 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #pragma once 7 8 #include <gtest/gtest.h> 9 10 #include <algorithm> 11 #include <cassert> 12 #include <cmath> 13 #include <cstddef> 14 #include <cstdlib> 15 #include <functional> 16 #include <limits> 17 #include <random> 18 #include <vector> 19 20 #include <fp16.h> 21 22 #include <xnnpack.h> 23 #include <xnnpack/params.h> 24 #include <xnnpack/params-init.h> 25 26 27 class VCvtMicrokernelTester { 28 public: batch_size(size_t batch_size)29 inline VCvtMicrokernelTester& batch_size(size_t batch_size) { 30 assert(batch_size != 0); 31 this->batch_size_ = batch_size; 32 return *this; 33 } 34 batch_size()35 inline size_t batch_size() const { 36 return this->batch_size_; 37 } 38 scale(float scale)39 inline VCvtMicrokernelTester& scale(float scale) { 40 assert(scale > 0.0f); 41 assert(std::isnormal(scale)); 42 this->scale_ = scale; 43 return *this; 44 } 45 scale()46 inline float scale() const { 47 return this->scale_; 48 } 49 zero_point(int16_t zero_point)50 inline VCvtMicrokernelTester& zero_point(int16_t zero_point) { 51 this->zero_point_ = zero_point; 52 return *this; 53 } 54 zero_point()55 inline int16_t zero_point() const { 56 return this->zero_point_; 57 } 58 qmin(int16_t qmin)59 inline VCvtMicrokernelTester& qmin(int16_t qmin) { 60 this->qmin_ = qmin; 61 return *this; 62 } 63 qmin()64 inline int16_t qmin() const { 65 return this->qmin_; 66 } 67 qmax(int16_t qmax)68 inline VCvtMicrokernelTester& qmax(int16_t qmax) { 69 this->qmax_ = qmax; 70 return *this; 71 } 72 qmax()73 inline int16_t qmax() const { 74 return this->qmax_; 75 } 76 iterations(size_t iterations)77 inline VCvtMicrokernelTester& iterations(size_t iterations) { 78 this->iterations_ = iterations; 79 return *this; 80 } 81 iterations()82 inline size_t iterations() const { 83 return this->iterations_; 84 } 85 86 void Test(xnn_f16_f32_vcvt_ukernel_function vcvt, xnn_init_f16_f32_cvt_params_fn init_params = nullptr) const { 87 std::random_device random_device; 88 auto rng = std::mt19937(random_device()); 89 auto distribution = std::uniform_real_distribution<float>(-100.0f, 100.0f); 90 auto f32rng = std::bind(distribution, std::ref(rng)); 91 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng); 92 93 std::vector<uint16_t> input(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t)); 94 std::vector<float> output(batch_size()); 95 for (size_t iteration = 0; iteration < iterations(); iteration++) { 96 std::generate(input.begin(), input.end(), std::ref(f16rng)); 97 std::fill(output.begin(), output.end(), nanf("")); 98 99 union xnn_f16_f32_cvt_params params; 100 if (init_params) { 101 init_params(¶ms); 102 } 103 104 // Call optimized micro-kernel. 105 vcvt(batch_size() * sizeof(uint16_t), input.data(), output.data(), ¶ms); 106 107 // Verify results. 108 for (size_t i = 0; i < batch_size(); i++) { 109 ASSERT_EQ(fp32_to_bits(output[i]), fp32_to_bits(fp16_ieee_to_fp32_value(input[i]))) 110 << "at " << i << " / " << batch_size() 111 << ", x[" << i << "] = 0x" << std::hex << std::setw(4) << std::setfill('0') << input[i]; 112 } 113 } 114 } 115 116 void Test(xnn_f32_f16_vcvt_ukernel_function vcvt, xnn_init_f32_f16_cvt_params_fn init_params = nullptr) const { 117 std::random_device random_device; 118 auto rng = std::mt19937(random_device()); 119 auto distribution = std::uniform_real_distribution<float>(-100.0f, 100.0f); 120 auto f32rng = std::bind(distribution, std::ref(rng)); 121 122 std::vector<float> input(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 123 std::vector<uint16_t> output(batch_size()); 124 for (size_t iteration = 0; iteration < iterations(); iteration++) { 125 std::generate(input.begin(), input.end(), std::ref(f32rng)); 126 std::fill(output.begin(), output.end(), UINT16_C(0x7E)); 127 128 union xnn_f32_f16_cvt_params params; 129 if (init_params) { 130 init_params(¶ms); 131 } 132 133 // Call optimized micro-kernel. 134 vcvt(batch_size() * sizeof(float), input.data(), output.data(), ¶ms); 135 136 // Verify results. 137 for (size_t i = 0; i < batch_size(); i++) { 138 ASSERT_EQ(output[i], fp16_ieee_from_fp32_value(input[i])) 139 << "at " << i << " / " << batch_size() 140 << ", x[" << i << "] = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(input[i]) 141 << " (" << input[i] << ")"; 142 } 143 } 144 } 145 Test(xnn_f32_qs8_vcvt_ukernel_function vcvt,xnn_init_f32_qs8_cvt_params_fn init_params)146 void Test(xnn_f32_qs8_vcvt_ukernel_function vcvt, xnn_init_f32_qs8_cvt_params_fn init_params) const { 147 ASSERT_GE(qmin(), std::numeric_limits<int8_t>::min()); 148 ASSERT_LE(qmax(), std::numeric_limits<int8_t>::max()); 149 ASSERT_LT(qmin(), qmax()); 150 151 ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min()); 152 ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max()); 153 154 std::random_device random_device; 155 auto rng = std::mt19937(random_device()); 156 auto distribution = std::uniform_real_distribution<float>(-1.0f, 1.0f); 157 auto f32rng = std::bind(distribution, std::ref(rng)); 158 159 std::vector<float> input(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 160 std::vector<int8_t> output(batch_size()); 161 std::vector<int8_t> output_ref(batch_size()); 162 for (size_t iteration = 0; iteration < iterations(); iteration++) { 163 std::generate(input.begin(), input.end(), std::ref(f32rng)); 164 std::fill(output.begin(), output.end(), INT8_C(0xA5)); 165 166 union xnn_f32_qs8_cvt_params params; 167 if (init_params) { 168 init_params(¶ms, scale(), zero_point(), qmin(), qmax()); 169 } 170 171 // Call optimized micro-kernel. 172 vcvt(batch_size() * sizeof(float), input.data(), output.data(), ¶ms); 173 174 // Compute reference results 175 for (size_t i = 0; i < batch_size(); i++) { 176 float scaled_input = input[i] * scale(); 177 scaled_input = std::min<float>(scaled_input, float(qmax() - zero_point())); 178 scaled_input = std::max<float>(scaled_input, float(qmin() - zero_point())); 179 output_ref[i] = int8_t(std::lrintf(scaled_input) + long(zero_point())); 180 } 181 182 // Verify results. 183 for (size_t i = 0; i < batch_size(); i++) { 184 ASSERT_EQ(int32_t(output[i]), int32_t(output_ref[i])) 185 << "at " << i << " / " << batch_size() 186 << ", x[" << i << "] = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(input[i]) 187 << " (" << input[i] << ")"; 188 } 189 } 190 } 191 Test(xnn_f32_qu8_vcvt_ukernel_function vcvt,xnn_init_f32_qu8_cvt_params_fn init_params)192 void Test(xnn_f32_qu8_vcvt_ukernel_function vcvt, xnn_init_f32_qu8_cvt_params_fn init_params) const { 193 ASSERT_GE(qmin(), std::numeric_limits<uint8_t>::min()); 194 ASSERT_LE(qmax(), std::numeric_limits<uint8_t>::max()); 195 ASSERT_LT(qmin(), qmax()); 196 197 ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min()); 198 ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max()); 199 200 std::random_device random_device; 201 auto rng = std::mt19937(random_device()); 202 auto distribution = std::uniform_real_distribution<float>(-1.0f, 1.0f); 203 auto f32rng = std::bind(distribution, std::ref(rng)); 204 205 std::vector<float> input(batch_size() + XNN_EXTRA_BYTES / sizeof(float)); 206 std::vector<uint8_t> output(batch_size()); 207 std::vector<uint8_t> output_ref(batch_size()); 208 for (size_t iteration = 0; iteration < iterations(); iteration++) { 209 std::generate(input.begin(), input.end(), std::ref(f32rng)); 210 std::fill(output.begin(), output.end(), UINT8_C(0xA5)); 211 212 union xnn_f32_qu8_cvt_params params; 213 init_params(¶ms, scale(), zero_point(), qmin(), qmax()); 214 215 // Call optimized micro-kernel. 216 vcvt(batch_size() * sizeof(float), input.data(), output.data(), ¶ms); 217 218 // Compute reference results 219 for (size_t i = 0; i < batch_size(); i++) { 220 float scaled_input = input[i] * scale(); 221 scaled_input = std::min<float>(scaled_input, float(qmax() - zero_point())); 222 scaled_input = std::max<float>(scaled_input, float(qmin() - zero_point())); 223 output_ref[i] = uint8_t(std::lrintf(scaled_input) + long(zero_point())); 224 } 225 226 // Verify results. 227 for (size_t i = 0; i < batch_size(); i++) { 228 ASSERT_EQ(int32_t(output[i]), int32_t(output_ref[i])) 229 << "at " << i << " / " << batch_size() 230 << ", x[" << i << "] = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(input[i]) 231 << " (" << input[i] << ")"; 232 } 233 } 234 } 235 Test(xnn_qs8_f32_vcvt_ukernel_function vcvt,xnn_init_qs8_f32_cvt_params_fn init_params)236 void Test(xnn_qs8_f32_vcvt_ukernel_function vcvt, xnn_init_qs8_f32_cvt_params_fn init_params) const { 237 ASSERT_GE(zero_point(), std::numeric_limits<int8_t>::min()); 238 ASSERT_LE(zero_point(), std::numeric_limits<int8_t>::max()); 239 240 std::random_device random_device; 241 auto rng = std::mt19937(random_device()); 242 auto distribution = 243 std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()); 244 auto i8rng = std::bind(distribution, std::ref(rng)); 245 246 std::vector<int8_t> input(batch_size() + XNN_EXTRA_BYTES / sizeof(int8_t)); 247 std::vector<float> output(batch_size()); 248 std::vector<float> output_ref(batch_size()); 249 for (size_t iteration = 0; iteration < iterations(); iteration++) { 250 std::generate(input.begin(), input.end(), std::ref(i8rng)); 251 std::fill(output.begin(), output.end(), std::nanf("")); 252 253 union xnn_qs8_f32_cvt_params params; 254 init_params(¶ms, scale(), zero_point()); 255 256 // Call optimized micro-kernel. 257 vcvt(batch_size() * sizeof(int8_t), input.data(), output.data(), ¶ms); 258 259 // Compute reference results 260 for (size_t i = 0; i < batch_size(); i++) { 261 output_ref[i] = float(int16_t(input[i]) - zero_point()) * scale(); 262 } 263 264 // Verify results. 265 for (size_t i = 0; i < batch_size(); i++) { 266 ASSERT_EQ(output[i], output_ref[i]) 267 << "at " << i << " / " << batch_size() 268 << ", x[" << i << "] = " << int32_t(input[i]); 269 } 270 } 271 } 272 Test(xnn_qu8_f32_vcvt_ukernel_function vcvt,xnn_init_qu8_f32_cvt_params_fn init_params)273 void Test(xnn_qu8_f32_vcvt_ukernel_function vcvt, xnn_init_qu8_f32_cvt_params_fn init_params) const { 274 ASSERT_GE(zero_point(), std::numeric_limits<uint8_t>::min()); 275 ASSERT_LE(zero_point(), std::numeric_limits<uint8_t>::max()); 276 277 std::random_device random_device; 278 auto rng = std::mt19937(random_device()); 279 auto distribution = 280 std::uniform_int_distribution<int32_t>(std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max()); 281 auto u8rng = std::bind(distribution, std::ref(rng)); 282 283 std::vector<uint8_t> input(batch_size() + XNN_EXTRA_BYTES / sizeof(uint8_t)); 284 std::vector<float> output(batch_size()); 285 std::vector<float> output_ref(batch_size()); 286 for (size_t iteration = 0; iteration < iterations(); iteration++) { 287 std::generate(input.begin(), input.end(), std::ref(u8rng)); 288 std::fill(output.begin(), output.end(), std::nanf("")); 289 290 union xnn_qu8_f32_cvt_params params; 291 init_params(¶ms, scale(), zero_point()); 292 293 // Call optimized micro-kernel. 294 vcvt(batch_size() * sizeof(uint8_t), input.data(), output.data(), ¶ms); 295 296 // Compute reference results 297 for (size_t i = 0; i < batch_size(); i++) { 298 output_ref[i] = float(int16_t(input[i]) - zero_point()) * scale(); 299 } 300 301 // Verify results. 302 for (size_t i = 0; i < batch_size(); i++) { 303 ASSERT_EQ(output[i], output_ref[i]) 304 << "at " << i << " / " << batch_size() 305 << ", x[" << i << "] = " << int32_t(input[i]); 306 } 307 } 308 } 309 310 private: 311 float scale_ = 1.75f; 312 int16_t zero_point_ = 1; 313 int16_t qmin_ = std::numeric_limits<int16_t>::min(); 314 int16_t qmax_ = std::numeric_limits<int16_t>::max(); 315 size_t batch_size_ = 1; 316 size_t iterations_ = 15; 317 }; 318