1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 9 #pragma once 10 11 #include <gtest/gtest.h> 12 13 #include <cstddef> 14 #include <cstdlib> 15 16 #include <algorithm> 17 #include <cfloat> 18 #include <cmath> 19 #include <functional> 20 #include <random> 21 #include <vector> 22 23 #include <xnnpack/params.h> 24 #include <xnnpack/scalar-utils.h> 25 26 27 class RequantizationTester { 28 public: s(uint32_t s)29 inline RequantizationTester& s(uint32_t s) { 30 this->s_ = s; 31 return *this; 32 } 33 s()34 inline uint32_t s() const { 35 return this->s_; 36 } 37 scale()38 inline float scale() const { 39 return ldexpf(1.0f, -s()); 40 } 41 zeroPoint(int32_t zeroPoint)42 inline RequantizationTester& zeroPoint(int32_t zeroPoint) { 43 this->zeroPoint_ = zeroPoint; 44 return *this; 45 } 46 zeroPoint()47 inline int32_t zeroPoint() const { 48 return this->zeroPoint_; 49 } 50 qmin(uint8_t qmin)51 inline RequantizationTester& qmin(uint8_t qmin) { 52 this->qmin_ = qmin; 53 return *this; 54 } 55 qmin()56 inline uint8_t qmin() const { 57 return this->qmin_; 58 } 59 qmax(uint8_t qmax)60 inline RequantizationTester& qmax(uint8_t qmax) { 61 this->qmax_ = qmax; 62 return *this; 63 } 64 qmax()65 inline uint8_t qmax() const { 66 return this->qmax_; 67 } 68 iterations(size_t iterations)69 inline RequantizationTester& iterations(size_t iterations) { 70 this->iterations_ = iterations; 71 return *this; 72 } 73 iterations()74 inline size_t iterations() const { 75 return this->iterations_; 76 } 77 78 /* 79 * Test that requantization of numbers ((i - zero point) * 2**s) with 80 * - scale = exp2(-s) 81 * - zero point in [0, 255] 82 * - no output clamping 83 * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow. 84 */ testExactDivideByPO2(requantization_function requantize)85 void testExactDivideByPO2(requantization_function requantize) const { 86 ASSERT_GE(zeroPoint(), 0); 87 ASSERT_LE(zeroPoint(), 255); 88 89 /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */ 90 ASSERT_GE(s(), 1); 91 ASSERT_LT(s(), 32); 92 93 std::vector<int32_t> inputs(256); 94 std::vector<uint8_t> outputs(inputs.size()); 95 const int32_t maxI = (uint32_t(std::numeric_limits<int32_t>::max()) >> s()) + zeroPoint(); 96 const int32_t minI = -(-uint32_t(std::numeric_limits<int32_t>::min()) >> s()) + zeroPoint(); 97 for (int32_t i = 0; i < 256; i++) { 98 const int32_t clampedI = std::max(minI, std::min(maxI, i)); 99 inputs[i] = int32_t(uint32_t(clampedI - zeroPoint()) << s()); 100 } 101 requantize(inputs.size(), inputs.data(), 102 scale(), zeroPoint(), qmin(), qmax(), 103 outputs.data()); 104 for (int32_t i = 0; i < 256; i++) { 105 const int32_t clampedI = std::max(minI, std::min(maxI, i)); 106 ASSERT_EQ(clampedI, outputs[i]) << "i = " << i << ", clamped i = " << clampedI << 107 ", min i = " << minI << ", max i = " << maxI << 108 ", s = " << s() << ", zero point = " << zeroPoint(); 109 } 110 } 111 112 /* 113 * Test that requantization of numbers (i * 2**s + sign(i - zero point) * 2**(s-1)) with 114 * - scale = exp2(-s) 115 * - zero point in [1, 255] 116 * - no output clamping 117 * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow. 118 */ testDivideByPO2WithRoundingUp(requantization_function requantize)119 void testDivideByPO2WithRoundingUp(requantization_function requantize) { 120 ASSERT_GE(zeroPoint(), 0); 121 ASSERT_LE(zeroPoint(), 255); 122 123 /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */ 124 ASSERT_GE(s(), 1); 125 ASSERT_LT(s(), 32); 126 127 std::vector<int32_t> inputs(256); 128 std::vector<uint8_t> outputs(inputs.size()); 129 for (int32_t i = 0; i < 256; i++) { 130 const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) - 131 (INT64_C(1) << (s() - 1)) + (int64_t) (i <= zeroPoint()); 132 inputs[i] = int32_t(input); 133 } 134 requantize(inputs.size(), inputs.data(), 135 scale(), zeroPoint(), qmin(), qmax(), 136 outputs.data()); 137 for (int32_t i = 0; i < 256; i++) { 138 const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) - 139 (INT64_C(1) << (s() - 1)) + (int64_t) (i <= zeroPoint()); 140 if (int32_t(input) == input) { 141 ASSERT_EQ(i, uint32_t(outputs[i])) << "i = " << i << ", input = " << input << 142 ", s = " << s() << ", zero point = " << zeroPoint(); 143 } 144 } 145 } 146 147 /* 148 * Test that requantization of numbers (i * 2**s + sign(i - zero point) * 2**(s-1)) with 149 * - scale = exp2(-s) 150 * - zero point in [1, 255] 151 * - no output clamping 152 * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow. 153 */ testDivideByPO2WithRoundingDown(requantization_function requantize)154 void testDivideByPO2WithRoundingDown(requantization_function requantize) { 155 ASSERT_GE(zeroPoint(), 0); 156 ASSERT_LE(zeroPoint(), 255); 157 158 /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */ 159 ASSERT_GE(s(), 1); 160 ASSERT_LT(s(), 32); 161 162 std::vector<int32_t> inputs(256); 163 std::vector<uint8_t> outputs(inputs.size()); 164 for (int32_t i = 0; i < 256; i++) { 165 const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) + 166 (INT64_C(1) << (s() - 1)) - (int64_t) (i >= zeroPoint()); 167 inputs[i] = int32_t(input); 168 } 169 requantize(inputs.size(), inputs.data(), 170 scale(), zeroPoint(), qmin(), qmax(), 171 outputs.data()); 172 for (int32_t i = 0; i < 256; i++) { 173 const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) + 174 (INT64_C(1) << (s() - 1)) - (int64_t) (i >= zeroPoint()); 175 if (int32_t(input) == input) { 176 ASSERT_EQ(i, uint32_t(outputs[i])) << "i = " << i << ", input = " << input << 177 ", s = " << s() << ", zero point = " << zeroPoint(); 178 } 179 } 180 } 181 testDivideByPO2WithRoundingAway(requantization_function requantize)182 void testDivideByPO2WithRoundingAway(requantization_function requantize) { 183 ASSERT_GE(zeroPoint(), 0); 184 ASSERT_LE(zeroPoint(), 255); 185 186 /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */ 187 ASSERT_GE(s(), 1); 188 ASSERT_LT(s(), 32); 189 190 std::vector<int32_t> inputs(256); 191 std::vector<uint8_t> outputs(inputs.size()); 192 for (int32_t i = 0; i < 256; i++) { 193 int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()); 194 if (input > 0) { 195 input -= INT64_C(1) << (s() - 1); 196 } else if (input < 0) { 197 input += INT64_C(1) << (s() - 1); 198 } 199 inputs[i] = int32_t(input); 200 } 201 requantize(inputs.size(), inputs.data(), 202 scale(), zeroPoint(), qmin(), qmax(), 203 outputs.data()); 204 for (uint32_t i = 0; i < 256; i++) { 205 int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()); 206 if (input > 0) { 207 input -= INT64_C(1) << (s() - 1); 208 } else if (input < 0) { 209 input += INT64_C(1) << (s() - 1); 210 } 211 if (int32_t(input) == input) { 212 ASSERT_EQ(i, uint32_t(outputs[i])) << "i = " << i << ", input = " << input << 213 ", s = " << s() << ", zero point = " << zeroPoint(); 214 } 215 } 216 } 217 testSpecialCases(requantization_function requantize)218 void testSpecialCases(requantization_function requantize) { 219 std::vector<int32_t> inputs(256); 220 std::vector<uint8_t> outputs(inputs.size()); 221 222 std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::min()); 223 for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) { 224 requantize( 225 inputs.size(), 226 inputs.data(), 227 ldexpf(1.0f, -32) /* scale */, 228 zeroPoint /* zero point */, 229 std::numeric_limits<uint8_t>::min(), 230 std::numeric_limits<uint8_t>::max(), 231 outputs.data()); 232 ASSERT_EQ(std::max(int32_t(0), zeroPoint - 1), *std::min_element(outputs.cbegin(), outputs.cend())); 233 } 234 235 std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::max()); 236 requantize( 237 inputs.size(), 238 inputs.data(), 239 0x1.FFFFFEp-1f /* scale */, 240 std::numeric_limits<uint8_t>::max() /* zero point */, 241 std::numeric_limits<uint8_t>::min(), 242 std::numeric_limits<uint8_t>::max(), 243 outputs.data()); 244 for (size_t i = 0; i < inputs.size(); i++) { 245 ASSERT_EQ(std::numeric_limits<uint8_t>::max(), outputs[i]); 246 } 247 } 248 testRandomCasesPrecise(requantization_function requantize)249 void testRandomCasesPrecise(requantization_function requantize) { 250 std::random_device random_device; 251 std::mt19937 mtRng(random_device()); 252 for (size_t iteration = 0; iteration < iterations(); iteration++) { 253 auto rng = std::bind(std::uniform_int_distribution<uint8_t>(), mtRng); 254 255 std::vector<int32_t> inputs(4096); 256 std::vector<uint8_t> outputs(inputs.size()); 257 258 const uint8_t zeroPoint = UINT8_C(128); 259 std::uniform_real_distribution<float> scaleDistribution(0x1.000000p-23f, 0x1.FFFFFEp-1f); 260 const float scale = scaleDistribution(mtRng); 261 for (size_t i = 0; i < inputs.size(); i++) { 262 const uint8_t approximateOutput = rng(); 263 const int32_t input = int32_t(double(approximateOutput) / double(scale)); 264 inputs[i] = input; 265 } 266 267 requantize( 268 inputs.size(), inputs.data(), scale, zeroPoint, 269 std::numeric_limits<uint8_t>::min(), 270 std::numeric_limits<uint8_t>::max(), 271 outputs.data()); 272 273 /* Ensure that outputs are not all identical, as in this case Test doesn't validate much */ 274 ASSERT_NE( 275 *std::max_element(outputs.cbegin(), outputs.cend()), 276 *std::min_element(outputs.cbegin(), outputs.cend())); 277 278 for (size_t i = 0; i < inputs.size(); i++) { 279 const uint8_t referenceOutput = 280 scalar_requantize_precise( 281 inputs[i], scale, zeroPoint, 282 std::numeric_limits<uint8_t>::min(), 283 std::numeric_limits<uint8_t>::max()); 284 ASSERT_EQ(uint32_t(referenceOutput), uint32_t(outputs[i])); 285 } 286 } 287 } 288 testRandomCasesApproximate(requantization_function requantize)289 void testRandomCasesApproximate(requantization_function requantize) { 290 std::random_device random_device; 291 std::mt19937 mtRng(random_device()); 292 for (size_t iteration = 0; iteration < iterations(); iteration++) { 293 auto rng = std::bind(std::uniform_int_distribution<uint8_t>(), mtRng); 294 295 std::vector<int32_t> inputs(4096); 296 std::vector<uint8_t> outputs(inputs.size()); 297 298 const uint8_t zeroPoint = UINT8_C(128); 299 std::uniform_real_distribution<float> scaleDistribution(0x1.000000p-23f, 0x1.FFFFFEp-1f); 300 const float scale = scaleDistribution(mtRng); 301 for (size_t i = 0; i < inputs.size(); i++) { 302 const uint8_t approximateOutput = rng(); 303 const int32_t input = int32_t(double(approximateOutput) / double(scale)); 304 inputs[i] = input; 305 } 306 307 requantize( 308 inputs.size(), inputs.data(), scale, zeroPoint, 309 std::numeric_limits<uint8_t>::min(), 310 std::numeric_limits<uint8_t>::max(), 311 outputs.data()); 312 313 /* Ensure that outputs are not all identical, as in this case Test doesn't validate much */ 314 ASSERT_NE( 315 *std::max_element(outputs.cbegin(), outputs.cend()), 316 *std::min_element(outputs.cbegin(), outputs.cend())); 317 318 for (size_t i = 0; i < inputs.size(); i++) { 319 const double referenceOutput = 320 RequantizationTester::requantizeApproximate( 321 inputs[i], scale, zeroPoint, 322 std::numeric_limits<uint8_t>::min(), 323 std::numeric_limits<uint8_t>::max()); 324 ASSERT_LE(fabs(referenceOutput - double(outputs[i])), 0.55) << 325 "input = " << inputs[i] << 326 ", output = " << uint32_t(outputs[i]) << ", reference output = " << referenceOutput; 327 } 328 } 329 } 330 testRandomCasesAgainstReference(requantization_function requantize,requantization_function requantizeReference)331 void testRandomCasesAgainstReference(requantization_function requantize, requantization_function requantizeReference) { 332 std::random_device random_device; 333 std::mt19937 mtRng(random_device()); 334 for (size_t iteration = 0; iteration < iterations(); iteration++) { 335 auto rng = std::bind(std::uniform_int_distribution<uint8_t>(), mtRng); 336 337 std::vector<int32_t> inputs(4096); 338 std::vector<uint8_t> outputs(inputs.size()); 339 std::vector<uint8_t> referenceOutputs(inputs.size()); 340 341 const uint8_t zeroPoint = UINT8_C(128); 342 std::uniform_real_distribution<float> scaleDistribution(0x1.000000p-23f, 0x1.FFFFFEp-1f); 343 const float scale = scaleDistribution(mtRng); 344 for (size_t i = 0; i < inputs.size(); i++) { 345 const uint8_t approximateOutput = rng(); 346 const int32_t input = int32_t(double(approximateOutput) / double(scale)); 347 inputs[i] = input; 348 } 349 350 requantize( 351 inputs.size(), inputs.data(), scale, zeroPoint, 352 std::numeric_limits<uint8_t>::min(), 353 std::numeric_limits<uint8_t>::max(), 354 outputs.data()); 355 356 requantizeReference( 357 inputs.size(), inputs.data(), scale, zeroPoint, 358 std::numeric_limits<uint8_t>::min(), 359 std::numeric_limits<uint8_t>::max(), 360 referenceOutputs.data()); 361 362 /* Ensure that outputs are not all identical, as in this case Test doesn't validate much */ 363 ASSERT_NE( 364 *std::max_element(outputs.cbegin(), outputs.cend()), 365 *std::min_element(outputs.cbegin(), outputs.cend())); 366 367 for (size_t i = 0; i < inputs.size(); i++) { 368 ASSERT_EQ(uint32_t(referenceOutputs[i]), uint32_t(outputs[i])); 369 } 370 } 371 } 372 shiftLeft(int64_t w,uint32_t n)373 static inline int64_t shiftLeft(int64_t w, uint32_t n) { 374 return (int64_t) ((uint64_t) w << n); 375 } 376 requantizeApproximate(int32_t value,float scale,uint8_t zeroPoint,uint8_t qmin,uint8_t qmax)377 static inline double requantizeApproximate( 378 int32_t value, 379 float scale, 380 uint8_t zeroPoint, 381 uint8_t qmin, 382 uint8_t qmax) 383 { 384 assert(scale < 1.0f); 385 assert(scale >= 0x1.0p-32f); 386 387 double clampedValue = double(value) * double(scale) + double(zeroPoint); 388 389 const double fmin = double(qmin); 390 if (clampedValue < fmin) { 391 clampedValue = fmin; 392 } 393 394 const double fmax = double(qmax); 395 if (clampedValue > fmax) { 396 clampedValue = fmax; 397 } 398 399 return clampedValue; 400 } 401 402 private: 403 size_t zeroPoint_{0}; 404 size_t s_{1}; 405 uint8_t qmin_{std::numeric_limits<uint8_t>::min()}; 406 uint8_t qmax_{std::numeric_limits<uint8_t>::max()}; 407 size_t iterations_{1}; 408 }; 409