• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #pragma once
10 
11 #include <gtest/gtest.h>
12 
13 #include <cstddef>
14 #include <cstdlib>
15 
16 #include <algorithm>
17 #include <cfloat>
18 #include <cmath>
19 #include <functional>
20 #include <random>
21 #include <vector>
22 
23 #include <xnnpack/params.h>
24 #include <xnnpack/scalar-utils.h>
25 
26 
27 class RequantizationTester {
28  public:
s(uint32_t s)29   inline RequantizationTester& s(uint32_t s) {
30     this->s_ = s;
31     return *this;
32   }
33 
s()34   inline uint32_t s() const {
35     return this->s_;
36   }
37 
scale()38   inline float scale() const {
39     return ldexpf(1.0f, -s());
40   }
41 
zeroPoint(int32_t zeroPoint)42   inline RequantizationTester& zeroPoint(int32_t zeroPoint) {
43     this->zeroPoint_ = zeroPoint;
44     return *this;
45   }
46 
zeroPoint()47   inline int32_t zeroPoint() const {
48     return this->zeroPoint_;
49   }
50 
qmin(uint8_t qmin)51   inline RequantizationTester& qmin(uint8_t qmin) {
52     this->qmin_ = qmin;
53     return *this;
54   }
55 
qmin()56   inline uint8_t qmin() const {
57     return this->qmin_;
58   }
59 
qmax(uint8_t qmax)60   inline RequantizationTester& qmax(uint8_t qmax) {
61     this->qmax_ = qmax;
62     return *this;
63   }
64 
qmax()65   inline uint8_t qmax() const {
66     return this->qmax_;
67   }
68 
iterations(size_t iterations)69   inline RequantizationTester& iterations(size_t iterations) {
70     this->iterations_ = iterations;
71     return *this;
72   }
73 
iterations()74   inline size_t iterations() const {
75     return this->iterations_;
76   }
77 
78   /*
79    * Test that requantization of numbers ((i - zero point) * 2**s) with
80    * - scale = exp2(-s)
81    * - zero point in [0, 255]
82    * - no output clamping
83    * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
84    */
testExactDivideByPO2(requantization_function requantize)85   void testExactDivideByPO2(requantization_function requantize) const {
86     ASSERT_GE(zeroPoint(), 0);
87     ASSERT_LE(zeroPoint(), 255);
88 
89     /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
90     ASSERT_GE(s(), 1);
91     ASSERT_LT(s(), 32);
92 
93     std::vector<int32_t> inputs(256);
94     std::vector<uint8_t> outputs(inputs.size());
95     const int32_t maxI = (uint32_t(std::numeric_limits<int32_t>::max()) >> s()) + zeroPoint();
96     const int32_t minI = -(-uint32_t(std::numeric_limits<int32_t>::min()) >> s()) + zeroPoint();
97     for (int32_t i = 0; i < 256; i++) {
98       const int32_t clampedI = std::max(minI, std::min(maxI, i));
99       inputs[i] = int32_t(uint32_t(clampedI - zeroPoint()) << s());
100     }
101     requantize(inputs.size(), inputs.data(),
102         scale(), zeroPoint(), qmin(), qmax(),
103         outputs.data());
104     for (int32_t i = 0; i < 256; i++) {
105       const int32_t clampedI = std::max(minI, std::min(maxI, i));
106       ASSERT_EQ(clampedI, outputs[i]) << "i = " << i << ", clamped i = " << clampedI <<
107         ", min i = " << minI << ", max i = " << maxI <<
108         ", s = " << s() << ", zero point = " << zeroPoint();
109     }
110   }
111 
112   /*
113    * Test that requantization of numbers (i * 2**s + sign(i - zero point) * 2**(s-1)) with
114    * - scale = exp2(-s)
115    * - zero point in [1, 255]
116    * - no output clamping
117    * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
118    */
testDivideByPO2WithRoundingUp(requantization_function requantize)119   void testDivideByPO2WithRoundingUp(requantization_function requantize) {
120     ASSERT_GE(zeroPoint(), 0);
121     ASSERT_LE(zeroPoint(), 255);
122 
123     /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
124     ASSERT_GE(s(), 1);
125     ASSERT_LT(s(), 32);
126 
127     std::vector<int32_t> inputs(256);
128     std::vector<uint8_t> outputs(inputs.size());
129     for (int32_t i = 0; i < 256; i++) {
130       const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) -
131         (INT64_C(1) << (s() - 1)) + (int64_t) (i <= zeroPoint());
132       inputs[i] = int32_t(input);
133     }
134     requantize(inputs.size(), inputs.data(),
135         scale(), zeroPoint(), qmin(), qmax(),
136         outputs.data());
137     for (int32_t i = 0; i < 256; i++) {
138       const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) -
139         (INT64_C(1) << (s() - 1)) + (int64_t) (i <= zeroPoint());
140       if (int32_t(input) == input) {
141         ASSERT_EQ(i, uint32_t(outputs[i])) << "i = " << i << ", input = " << input <<
142           ", s = " << s() << ", zero point = " << zeroPoint();
143       }
144     }
145   }
146 
147   /*
148    * Test that requantization of numbers (i * 2**s + sign(i - zero point) * 2**(s-1)) with
149    * - scale = exp2(-s)
150    * - zero point in [1, 255]
151    * - no output clamping
152    * produces exactly i, provided that ((i - zero point) * 2**s) does not overflow.
153    */
testDivideByPO2WithRoundingDown(requantization_function requantize)154   void testDivideByPO2WithRoundingDown(requantization_function requantize) {
155     ASSERT_GE(zeroPoint(), 0);
156     ASSERT_LE(zeroPoint(), 255);
157 
158     /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
159     ASSERT_GE(s(), 1);
160     ASSERT_LT(s(), 32);
161 
162     std::vector<int32_t> inputs(256);
163     std::vector<uint8_t> outputs(inputs.size());
164     for (int32_t i = 0; i < 256; i++) {
165       const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) +
166         (INT64_C(1) << (s() - 1)) - (int64_t) (i >= zeroPoint());
167       inputs[i] = int32_t(input);
168     }
169     requantize(inputs.size(), inputs.data(),
170         scale(), zeroPoint(), qmin(), qmax(),
171         outputs.data());
172     for (int32_t i = 0; i < 256; i++) {
173       const int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s()) +
174         (INT64_C(1) << (s() - 1)) - (int64_t) (i >= zeroPoint());
175       if (int32_t(input) == input) {
176         ASSERT_EQ(i, uint32_t(outputs[i])) << "i = " << i << ", input = " << input <<
177           ", s = " << s() << ", zero point = " << zeroPoint();
178       }
179     }
180   }
181 
testDivideByPO2WithRoundingAway(requantization_function requantize)182   void testDivideByPO2WithRoundingAway(requantization_function requantize) {
183     ASSERT_GE(zeroPoint(), 0);
184     ASSERT_LE(zeroPoint(), 255);
185 
186     /* Note: need s >= 1 to ensure scale = exp2(-s) < 1.0 */
187     ASSERT_GE(s(), 1);
188     ASSERT_LT(s(), 32);
189 
190     std::vector<int32_t> inputs(256);
191     std::vector<uint8_t> outputs(inputs.size());
192     for (int32_t i = 0; i < 256; i++) {
193       int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s());
194       if (input > 0) {
195         input -= INT64_C(1) << (s() - 1);
196       } else if (input < 0) {
197         input += INT64_C(1) << (s() - 1);
198       }
199       inputs[i] = int32_t(input);
200     }
201     requantize(inputs.size(), inputs.data(),
202         scale(), zeroPoint(), qmin(), qmax(),
203         outputs.data());
204     for (uint32_t i = 0; i < 256; i++) {
205       int64_t input = RequantizationTester::shiftLeft(i - zeroPoint(), s());
206       if (input > 0) {
207         input -= INT64_C(1) << (s() - 1);
208       } else if (input < 0) {
209         input += INT64_C(1) << (s() - 1);
210       }
211       if (int32_t(input) == input) {
212         ASSERT_EQ(i, uint32_t(outputs[i])) << "i = " << i << ", input = " << input <<
213           ", s = " << s() << ", zero point = " << zeroPoint();
214       }
215     }
216   }
217 
testSpecialCases(requantization_function requantize)218   void testSpecialCases(requantization_function requantize) {
219     std::vector<int32_t> inputs(256);
220     std::vector<uint8_t> outputs(inputs.size());
221 
222     std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::min());
223     for (int32_t zeroPoint = 0; zeroPoint < 256; zeroPoint++) {
224       requantize(
225           inputs.size(),
226           inputs.data(),
227           ldexpf(1.0f, -32) /* scale */,
228           zeroPoint /* zero point */,
229           std::numeric_limits<uint8_t>::min(),
230           std::numeric_limits<uint8_t>::max(),
231           outputs.data());
232       ASSERT_EQ(std::max(int32_t(0), zeroPoint - 1), *std::min_element(outputs.cbegin(), outputs.cend()));
233     }
234 
235     std::fill(inputs.begin(), inputs.end(), std::numeric_limits<int32_t>::max());
236     requantize(
237         inputs.size(),
238         inputs.data(),
239         0x1.FFFFFEp-1f /* scale */,
240         std::numeric_limits<uint8_t>::max() /* zero point */,
241         std::numeric_limits<uint8_t>::min(),
242         std::numeric_limits<uint8_t>::max(),
243         outputs.data());
244     for (size_t i = 0; i < inputs.size(); i++) {
245       ASSERT_EQ(std::numeric_limits<uint8_t>::max(), outputs[i]);
246     }
247   }
248 
testRandomCasesPrecise(requantization_function requantize)249   void testRandomCasesPrecise(requantization_function requantize) {
250     std::random_device random_device;
251     std::mt19937 mtRng(random_device());
252     for (size_t iteration = 0; iteration < iterations(); iteration++) {
253       auto rng = std::bind(std::uniform_int_distribution<uint8_t>(), mtRng);
254 
255       std::vector<int32_t> inputs(4096);
256       std::vector<uint8_t> outputs(inputs.size());
257 
258       const uint8_t zeroPoint = UINT8_C(128);
259       std::uniform_real_distribution<float> scaleDistribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
260       const float scale = scaleDistribution(mtRng);
261       for (size_t i = 0; i < inputs.size(); i++) {
262         const uint8_t approximateOutput = rng();
263         const int32_t input = int32_t(double(approximateOutput) / double(scale));
264         inputs[i] = input;
265       }
266 
267       requantize(
268         inputs.size(), inputs.data(), scale, zeroPoint,
269         std::numeric_limits<uint8_t>::min(),
270         std::numeric_limits<uint8_t>::max(),
271         outputs.data());
272 
273       /* Ensure that outputs are not all identical, as in this case Test doesn't validate much */
274       ASSERT_NE(
275         *std::max_element(outputs.cbegin(), outputs.cend()),
276         *std::min_element(outputs.cbegin(), outputs.cend()));
277 
278       for (size_t i = 0; i < inputs.size(); i++) {
279         const uint8_t referenceOutput =
280           scalar_requantize_precise(
281             inputs[i], scale, zeroPoint,
282             std::numeric_limits<uint8_t>::min(),
283             std::numeric_limits<uint8_t>::max());
284         ASSERT_EQ(uint32_t(referenceOutput), uint32_t(outputs[i]));
285       }
286     }
287   }
288 
testRandomCasesApproximate(requantization_function requantize)289   void testRandomCasesApproximate(requantization_function requantize) {
290     std::random_device random_device;
291     std::mt19937 mtRng(random_device());
292     for (size_t iteration = 0; iteration < iterations(); iteration++) {
293       auto rng = std::bind(std::uniform_int_distribution<uint8_t>(), mtRng);
294 
295       std::vector<int32_t> inputs(4096);
296       std::vector<uint8_t> outputs(inputs.size());
297 
298       const uint8_t zeroPoint = UINT8_C(128);
299       std::uniform_real_distribution<float> scaleDistribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
300       const float scale = scaleDistribution(mtRng);
301       for (size_t i = 0; i < inputs.size(); i++) {
302         const uint8_t approximateOutput = rng();
303         const int32_t input = int32_t(double(approximateOutput) / double(scale));
304         inputs[i] = input;
305       }
306 
307       requantize(
308         inputs.size(), inputs.data(), scale, zeroPoint,
309         std::numeric_limits<uint8_t>::min(),
310         std::numeric_limits<uint8_t>::max(),
311         outputs.data());
312 
313       /* Ensure that outputs are not all identical, as in this case Test doesn't validate much */
314       ASSERT_NE(
315         *std::max_element(outputs.cbegin(), outputs.cend()),
316         *std::min_element(outputs.cbegin(), outputs.cend()));
317 
318       for (size_t i = 0; i < inputs.size(); i++) {
319         const double referenceOutput =
320           RequantizationTester::requantizeApproximate(
321             inputs[i], scale, zeroPoint,
322             std::numeric_limits<uint8_t>::min(),
323             std::numeric_limits<uint8_t>::max());
324         ASSERT_LE(fabs(referenceOutput - double(outputs[i])), 0.55) <<
325           "input = " << inputs[i] <<
326           ", output = " << uint32_t(outputs[i]) << ", reference output = " << referenceOutput;
327       }
328     }
329   }
330 
testRandomCasesAgainstReference(requantization_function requantize,requantization_function requantizeReference)331   void testRandomCasesAgainstReference(requantization_function requantize, requantization_function requantizeReference) {
332     std::random_device random_device;
333     std::mt19937 mtRng(random_device());
334     for (size_t iteration = 0; iteration < iterations(); iteration++) {
335       auto rng = std::bind(std::uniform_int_distribution<uint8_t>(), mtRng);
336 
337       std::vector<int32_t> inputs(4096);
338       std::vector<uint8_t> outputs(inputs.size());
339       std::vector<uint8_t> referenceOutputs(inputs.size());
340 
341       const uint8_t zeroPoint = UINT8_C(128);
342       std::uniform_real_distribution<float> scaleDistribution(0x1.000000p-23f, 0x1.FFFFFEp-1f);
343       const float scale = scaleDistribution(mtRng);
344       for (size_t i = 0; i < inputs.size(); i++) {
345         const uint8_t approximateOutput = rng();
346         const int32_t input = int32_t(double(approximateOutput) / double(scale));
347         inputs[i] = input;
348       }
349 
350       requantize(
351         inputs.size(), inputs.data(), scale, zeroPoint,
352         std::numeric_limits<uint8_t>::min(),
353         std::numeric_limits<uint8_t>::max(),
354         outputs.data());
355 
356       requantizeReference(
357         inputs.size(), inputs.data(), scale, zeroPoint,
358         std::numeric_limits<uint8_t>::min(),
359         std::numeric_limits<uint8_t>::max(),
360         referenceOutputs.data());
361 
362       /* Ensure that outputs are not all identical, as in this case Test doesn't validate much */
363       ASSERT_NE(
364         *std::max_element(outputs.cbegin(), outputs.cend()),
365         *std::min_element(outputs.cbegin(), outputs.cend()));
366 
367       for (size_t i = 0; i < inputs.size(); i++) {
368         ASSERT_EQ(uint32_t(referenceOutputs[i]), uint32_t(outputs[i]));
369       }
370     }
371   }
372 
shiftLeft(int64_t w,uint32_t n)373   static inline int64_t shiftLeft(int64_t w, uint32_t n) {
374     return (int64_t) ((uint64_t) w << n);
375   }
376 
requantizeApproximate(int32_t value,float scale,uint8_t zeroPoint,uint8_t qmin,uint8_t qmax)377   static inline double requantizeApproximate(
378     int32_t value,
379     float scale,
380     uint8_t zeroPoint,
381     uint8_t qmin,
382     uint8_t qmax)
383   {
384     assert(scale < 1.0f);
385     assert(scale >= 0x1.0p-32f);
386 
387     double clampedValue = double(value) * double(scale) + double(zeroPoint);
388 
389     const double fmin = double(qmin);
390     if (clampedValue < fmin) {
391       clampedValue = fmin;
392     }
393 
394     const double fmax = double(qmax);
395     if (clampedValue > fmax) {
396       clampedValue = fmax;
397     }
398 
399     return clampedValue;
400   }
401 
402  private:
403   size_t zeroPoint_{0};
404   size_t s_{1};
405   uint8_t qmin_{std::numeric_limits<uint8_t>::min()};
406   uint8_t qmax_{std::numeric_limits<uint8_t>::max()};
407   size_t iterations_{1};
408 };
409