1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include <cmath>
17 #define EIGEN_USE_THREADS
18
19 #include <limits>
20
21 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
22 #include "tensorflow/core/framework/allocator.h"
23 #include "tensorflow/core/framework/tensor_testutil.h"
24 #include "tensorflow/core/framework/types.h"
25 #include "tensorflow/core/kernels/quantization_utils.h"
26 #include "tensorflow/core/lib/core/threadpool.h"
27 #include "tensorflow/core/lib/random/simple_philox.h"
28 #include "tensorflow/core/lib/strings/strcat.h"
29 #include "tensorflow/core/platform/test.h"
30
31 namespace tensorflow {
32 namespace {
33
TestRequantizeMany(Eigen::ThreadPoolDevice * eigen_device,float input_min,float input_max,float output_min,float output_max,const std::vector<qint32> & values_quantized,int tolerance=1)34 void TestRequantizeMany(Eigen::ThreadPoolDevice* eigen_device, float input_min,
35 float input_max, float output_min, float output_max,
36 const std::vector<qint32>& values_quantized,
37 int tolerance = 1) {
38 const int values_count = values_quantized.size();
39 std::vector<quint8> expected_values;
40 expected_values.reserve(values_count);
41 for (int value_index = 0; value_index < values_count; ++value_index) {
42 expected_values.push_back(FloatToQuantized<quint8>(
43 QuantizedToFloat(values_quantized[value_index], input_min, input_max),
44 output_min, output_max));
45 }
46
47 Tensor i_tensor =
48 tensorflow::test::AsTensor(gtl::ArraySlice<qint32>(values_quantized));
49 Tensor o_tensor(DT_QUINT8, TensorShape{values_count});
50 auto output_values = o_tensor.flat<quint8>();
51
52 if (eigen_device == nullptr) {
53 auto input_array = i_tensor.flat<qint32>();
54 RequantizeManyInNewRange(input_array.data(), input_array.size(), input_min,
55 input_max, output_min, output_max,
56 output_values.data());
57 } else {
58 RequantizeManyInNewRangeUsingEigen<qint32, quint8>(
59 *eigen_device, i_tensor, input_min, input_max, output_min, output_max,
60 &o_tensor);
61 }
62
63 const string tolerance_str = strings::StrCat("+-", tolerance);
64 for (size_t value_index = 0; value_index < values_count; ++value_index) {
65 int e = expected_values[value_index];
66 int v = output_values(value_index);
67 ASSERT_TRUE(std::abs(e - v) <= tolerance)
68 << "actual=" << v << ", expected=" << e << tolerance_str
69 << ", values_quantized[" << value_index
70 << "]=" << values_quantized[value_index] << ", input_min=" << input_min
71 << ", input_max=" << input_max << ", output_min=" << output_min
72 << ", output_max=" << output_max << ", value_index=" << value_index;
73 }
74 }
75
TestRequantizeMany8To32Bit(float input_min,float input_max,float output_min,float output_max,const std::vector<quint8> & values_quantized,int tolerance=256)76 void TestRequantizeMany8To32Bit(float input_min, float input_max,
77 float output_min, float output_max,
78 const std::vector<quint8>& values_quantized,
79 int tolerance = 256) {
80 const int values_count = values_quantized.size();
81 std::vector<qint32> expected_values;
82 expected_values.reserve(values_count);
83 for (int value_index = 0; value_index < values_count; ++value_index) {
84 expected_values.push_back(FloatToQuantized<qint32>(
85 QuantizedToFloat(values_quantized[value_index], input_min, input_max),
86 output_min, output_max));
87 }
88
89 const Tensor i_tensor =
90 tensorflow::test::AsTensor(gtl::ArraySlice<quint8>(values_quantized));
91 Tensor o_tensor(DT_QINT32, TensorShape{values_count});
92 auto output_values = o_tensor.flat<qint32>();
93
94 const auto input_array = i_tensor.flat<quint8>();
95 RequantizeManyInNewRange(input_array.data(), input_array.size(), input_min,
96 input_max, output_min, output_max,
97 output_values.data());
98
99 const string tolerance_str = strings::StrCat("+-", tolerance);
100 for (int value_index = 0; value_index < values_count; ++value_index) {
101 const qint32 e = expected_values[value_index];
102 const qint32 v = output_values(value_index);
103 ASSERT_TRUE(std::abs(e - v) <= tolerance)
104 << "actual=" << v << ", expected=" << e << tolerance_str
105 << ", values_quantized[" << value_index
106 << "]=" << values_quantized[value_index] << ", input_min=" << input_min
107 << ", input_max=" << input_max << ", output_min=" << output_min
108 << ", output_max=" << output_max << ", value_index=" << value_index;
109 }
110 }
111
112 // If eigen_device is NULL, then the reference implementation is tested.
TestRequantizeManyInNewRange32To8Bit(Eigen::ThreadPoolDevice * eigen_device)113 void TestRequantizeManyInNewRange32To8Bit(
114 Eigen::ThreadPoolDevice* eigen_device) {
115 if (true) {
116 // These are the float values we're going to test the conversions on.
117 const size_t values_count = 6;
118 const float values[values_count] = {0.0f, 0.45f, 1.0f,
119 -1.0f, 127.0f, 255.0f};
120 // These are the input and output ranges we'll test.
121 const size_t ranges_count = 6;
122 const float ranges[ranges_count][4] = {
123 {0.0f, 255.0f, 0.0f, 255.0f}, //
124 {0.0f, 1.0f, 0.0f, 1.0f}, //
125 {-1.0f, 1.0f, -1.0f, 1.0f}, //
126 {-1.0f, 1.0f, -255.0f, 255.0f}, //
127 {3.0f, 3.0f, 0.0f, 255.0f}, // input min == max
128 {0.0f, 255.0f, 5.0f, 5.0f}, // output min == max
129 };
130 for (int i = 0; i < ranges_count; ++i) {
131 const auto& r = ranges[i];
132 std::vector<qint32> values_quantized;
133 for (int value_index = 0; value_index < values_count; ++value_index) {
134 const float v = values[value_index];
135 values_quantized.push_back(FloatToQuantized<qint32>(v, r[0], r[1]));
136 }
137 TestRequantizeMany(eigen_device, r[0], r[1], r[2], r[3],
138 values_quantized);
139 }
140
141 // Test with many different values in the input quantized range.
142 qint32 low = Eigen::NumTraits<qint32>::lowest();
143 qint32 high = Eigen::NumTraits<qint32>::highest();
144 std::vector<qint32> vals{low, high};
145 int num_steps = 14419;
146 qint32 step = static_cast<int32>((1LL << 32) / num_steps);
147 qint32 v = low + static_cast<qint32>(1);
148 for (int i = 0; i < num_steps; ++i) {
149 vals.push_back(v);
150 v += step;
151 }
152 TestRequantizeMany(eigen_device, -1.0f, 1.0f, -1.0f, 1.0f, vals);
153 TestRequantizeMany(eigen_device, -255.0f, 255.0f, -255.0f, 255.0f, vals);
154 TestRequantizeMany(eigen_device, -1.0f, 1.0f, -12345678.0f, 12345678.0f,
155 vals);
156 TestRequantizeMany(eigen_device, -1.0f, 12345678.0f, -12345678.0f,
157 12345678.0f, vals);
158 }
159 // Test when the input range is large and output range is small.
160 // Use all quantized values where the float is in the output range.
161 const float out_min = -29.1234;
162 const float out_max = 23.1234;
163 const float in_min = -1e6;
164 const float in_max = 1e6;
165
166 qint32 low = FloatToQuantized<qint32>(out_min, in_min, in_max);
167 qint32 high = FloatToQuantized<qint32>(out_max, in_min, in_max);
168 std::vector<qint32> vals;
169 vals.clear();
170 for (int32_t i = low; i <= high; ++i) vals.push_back(i);
171 TestRequantizeMany(eigen_device, in_min, in_max, out_min, out_max, vals);
172 }
173
TestRequantizeManyInNewRange8To32Bit()174 void TestRequantizeManyInNewRange8To32Bit() {
175 // These are the float values we're going to test the conversions on.
176 const size_t values_count = 6;
177 const float values[values_count] = {0.0f, 0.45f, 1.0f, -1.0f, 127.0f, 255.0f};
178 // These are the input and output ranges we'll test.
179 const size_t ranges_count = 6;
180 const float ranges[ranges_count][4] = {
181 {0.0f, 255.0f, 0.0f, 255.0f}, //
182 {0.0f, 1.0f, 0.0f, 1.0f}, //
183 {-1.0f, 1.0f, -1.0f, 1.0f}, //
184 {-1.0f, 1.0f, -255.0f, 255.0f}, //
185 {3.0f, 3.0f, 0.0f, 255.0f}, // input min == max
186 {0.0f, 255.0f, 5.0f, 5.0f}, // output min == max
187 };
188 for (int i = 0; i < ranges_count; ++i) {
189 const auto& r = ranges[i];
190 std::vector<quint8> values_quantized;
191 for (int value_index = 0; value_index < values_count; ++value_index) {
192 const float v = values[value_index];
193 values_quantized.push_back(FloatToQuantized<quint8>(v, r[0], r[1]));
194 }
195 TestRequantizeMany8To32Bit(r[0], r[1], r[2], r[3], values_quantized);
196 }
197
198 // Test with many different values in the input quantized range.
199 int low = Eigen::NumTraits<quint8>::lowest();
200 int high = Eigen::NumTraits<quint8>::highest();
201 std::vector<quint8> vals;
202 for (int val = low; val <= high; ++val) {
203 vals.push_back(val);
204 }
205 TestRequantizeMany8To32Bit(-1.0f, 1.0f, -1.0f, 1.0f, vals);
206 TestRequantizeMany8To32Bit(-255.0f, 255.0f, -255.0f, 255.0f, vals);
207 TestRequantizeMany8To32Bit(-1.0f, 1.0f, -12345678.0f, 12345678.0f, vals);
208 TestRequantizeMany8To32Bit(-1.0f, 12345678.0f, -12345678.0f, 12345678.0f,
209 vals);
210 }
211
212 template <typename InputType, typename OutputType>
TestRequantizeManyInNewRangeEigenVsNonEigen()213 void TestRequantizeManyInNewRangeEigenVsNonEigen() {
214 thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */);
215 Eigen::ThreadPoolDevice eigen_device(threadpool.AsEigenThreadPool(),
216 2 /* num_threads */);
217
218 const size_t ranges_count = 6;
219 const float ranges[ranges_count][4] = {
220 {0.0f, 255.0f, 0.0f, 255.0f}, //
221 {0.0f, 1.0f, 0.0f, 1.0f}, //
222 {-1.0f, 1.0f, -1.0f, 1.0f}, //
223 {-1.0f, 1.0f, -255.0f, 255.0f}, //
224 {3.0f, 3.0f, 0.0f, 255.0f}, // input min == max
225 {0.0f, 255.0f, 5.0f, 5.0f}, // output min == max
226 };
227
228 // Random values.
229 for (size_t range_index = 0; range_index < ranges_count; ++range_index) {
230 const float input_min = ranges[range_index][0];
231 const float input_max = ranges[range_index][1];
232 const float output_min = ranges[range_index][2];
233 const float output_max = ranges[range_index][3];
234 const int values_count = 10000;
235 random::PhiloxRandom philox(testing::RandomSeed(), 17);
236 random::SimplePhilox rnd(&philox);
237 std::vector<InputType> values_quantized;
238 for (int i = 0; i < values_count; ++i) {
239 float v = (rnd.RandFloat() * (input_max - input_min)) + input_min;
240 values_quantized.push_back(
241 FloatToQuantized<InputType>(v, input_min, input_max));
242 }
243
244 Tensor i_tensor = tensorflow::test::AsTensor(
245 gtl::ArraySlice<InputType>(values_quantized));
246 const auto i_array = i_tensor.flat<InputType>();
247 Tensor o_tensor_eigen(DataTypeToEnum<OutputType>::v(),
248 TensorShape{values_count});
249 auto output_values_eigen = o_tensor_eigen.flat<OutputType>();
250 Tensor o_tensor_ref(DataTypeToEnum<OutputType>::v(),
251 TensorShape{values_count});
252 auto output_values_ref = o_tensor_ref.flat<OutputType>();
253
254 RequantizeManyInNewRange(i_array.data(), i_array.size(), input_min,
255 input_max, output_min, output_max,
256 output_values_ref.data());
257 RequantizeManyInNewRangeUsingEigen<InputType, OutputType>(
258 eigen_device, i_tensor, input_min, input_max, output_min, output_max,
259 &o_tensor_eigen);
260
261 const int tolerance = 1;
262 for (int i = 0; i < values_quantized.size(); ++i) {
263 auto expected = output_values_ref(i);
264 auto actual = output_values_eigen(i);
265 // The eigen computation uses float for constants and computation
266 // instead of doubles, so can be different by 1 or 2 in some cases
267 // (e.g., input value 144.062744140625, min -1, max 255, type quint8).
268 ASSERT_TRUE(std::abs(expected - actual) <= tolerance)
269 << "expected=" << expected << " actual=" << actual
270 << " tolerance=" << tolerance << " v=" << values_quantized[i]
271 << " i=" << i << " input_min=" << input_min
272 << " input_max=" << input_max
273 << " input_type=" << DataTypeString(DataTypeToEnum<InputType>::v())
274 << " output_type=" << DataTypeString(DataTypeToEnum<OutputType>::v());
275 }
276 }
277 }
278
279 template <typename InputType, typename OutputType>
TimeRequantizeManyInNewRange(int64_t num_elements,int64_t iterations,bool use_eigen)280 void TimeRequantizeManyInNewRange(int64_t num_elements, int64_t iterations,
281 bool use_eigen) {
282 const float input_min = -100.0f;
283 const float input_max = 100.0f;
284 const float output_min = -1000000.0f;
285 const float output_max = 1000000.0f;
286
287 random::PhiloxRandom philox(testing::RandomSeed(), 17);
288 random::SimplePhilox rnd(&philox);
289 std::vector<InputType> values_quantized;
290 for (int i = 0; i < num_elements; ++i) {
291 float v = (rnd.RandFloat() * (input_max - input_min)) + input_min;
292 values_quantized.push_back(
293 FloatToQuantized<InputType>(v, input_min, input_max));
294 }
295
296 thread::ThreadPool threadpool(Env::Default(), "test", 4 /* num_threads */);
297 Eigen::ThreadPoolDevice eigen_device(threadpool.AsEigenThreadPool(),
298 4 /* num_threads */);
299
300 Tensor i_tensor =
301 tensorflow::test::AsTensor(gtl::ArraySlice<InputType>(values_quantized));
302 const auto i_array = i_tensor.flat<InputType>();
303 Tensor o_tensor_eigen(DataTypeToEnum<OutputType>::v(),
304 TensorShape{num_elements});
305 Tensor o_tensor_ref(DataTypeToEnum<OutputType>::v(),
306 TensorShape{num_elements});
307 auto output_values_ref = o_tensor_ref.flat<OutputType>();
308
309 int64_t total_duration = 0;
310 for (int i = 0; i < iterations; ++i) {
311 const int64_t start_time = Env::Default()->NowMicros();
312 if (use_eigen) {
313 RequantizeManyInNewRangeUsingEigen<InputType, OutputType>(
314 eigen_device, i_tensor, input_min, input_max, output_min, output_max,
315 &o_tensor_eigen);
316 } else {
317 RequantizeManyInNewRange<InputType, OutputType>(
318 i_array.data(), i_array.size(), input_min, input_max, output_min,
319 output_max, output_values_ref.data());
320 }
321 const int64_t end_time = Env::Default()->NowMicros();
322 total_duration += end_time - start_time;
323 }
324 const int64_t one_run_duration = total_duration / iterations;
325
326 const int64_t num_ops = num_elements;
327
328 const double million_ops_per_second =
329 (iterations * num_ops) / static_cast<double>(total_duration);
330
331 LOG(INFO) << "TimeRequantizeManyInNewRange: " << num_elements
332 << (use_eigen ? " eigen" : " ref") << ": iterations=" << iterations
333 << ", MOps/s=" << million_ops_per_second
334 << ", one_run_duration=" << one_run_duration
335 << ", total_duration=" << total_duration;
336 }
337
338 template <typename T>
TestFloatToQuantizedInPlaceUsingEigen(Eigen::ThreadPoolDevice * eigen_device)339 void TestFloatToQuantizedInPlaceUsingEigen(
340 Eigen::ThreadPoolDevice* eigen_device) {
341 // These are the float values we're going to test the conversions on.
342 typedef std::pair<float, float> FPair;
343 for (FPair min_and_max : std::vector<FPair>{FPair(-255.0f, 255.0f), //
344 FPair(-1.0f, 1.0f), //
345 FPair(-1.0f, 255.0f), //
346 FPair(0.0f, 1e6), //
347 FPair(0.0f, 1.0f), //
348 FPair(-31.0f, 13.0f)}) {
349 const float f_min = min_and_max.first;
350 const float f_max = min_and_max.second;
351 const float f_range = f_max - f_min;
352 const int values_count = 50000;
353 Tensor input(DT_FLOAT, TensorShape{values_count});
354 auto input_array = input.flat<float>();
355 for (int i = 0; i < values_count; ++i) {
356 input_array(i) = f_min + f_range * i / (values_count - 1);
357 }
358
359 Tensor output(DataTypeToEnum<T>::v(), TensorShape{values_count});
360 FloatTensorToQuantizedInPlaceUsingEigen<T>(*eigen_device, input, f_min,
361 f_max, &output);
362 auto output_array = output.flat<T>();
363
364 const int tolerance = 1;
365 for (int i = 0; i < values_count; ++i) {
366 int32_t expected = FloatToQuantized<T>(input_array(i), f_min, f_max);
367 int32_t actual = output_array(i);
368
369 // The eigen computation uses float for constants and computation
370 // instead
371 // of doubles, so can be different by 1 or 2 in some cases (e.g., input
372 // value 144.062744140625, min -1, max 255, type quint8).
373 ASSERT_TRUE(std::abs(expected - actual) <= tolerance)
374 << "expected=" << expected << " actual=" << actual
375 << " tolerance=" << tolerance << " v=" << input_array(i) << " i=" << i
376 << " f_min=" << f_min << " f_max=" << f_max
377 << " type=" << DataTypeString(DataTypeToEnum<T>::v());
378 }
379 }
380 }
381
382 template <typename T>
TestQuantizedToFloatInPlaceUsingEigen(Eigen::ThreadPoolDevice * eigen_device)383 void TestQuantizedToFloatInPlaceUsingEigen(
384 Eigen::ThreadPoolDevice* eigen_device) {
385 // These are the float values we're going to test the conversions on.
386 typedef std::pair<float, float> FPair;
387 for (FPair min_and_max : std::vector<FPair>{
388 FPair(-255.0f, 255.0f),
389 FPair(-1.0f, 1.0f),
390 FPair(-1.0f, 255.0f),
391 FPair(0.0f, 1e6),
392 FPair(0.0f, 1.0f),
393 FPair(-31.0f, 13.0f),
394 FPair(-5.89505e+08, 5.89505e+08),
395 }) {
396 const float f_min = min_and_max.first;
397 const float f_max = min_and_max.second;
398 const int values_count = sizeof(T) == 1 ? 256 : 50000;
399 Tensor input(DataTypeToEnum<T>::v(), TensorShape{values_count});
400 auto input_array = input.flat<T>();
401 const double q_range = static_cast<double>(Eigen::NumTraits<T>::highest()) -
402 Eigen::NumTraits<T>::lowest();
403 for (int i = 0; i < values_count; ++i) {
404 if (sizeof(T) == 1) {
405 input_array(i) = Eigen::NumTraits<T>::lowest() + i;
406 } else {
407 int64_t offset = static_cast<int64_t>(q_range / values_count * i);
408 input_array(i) = static_cast<int32>(
409 std::min<int64_t>(Eigen::NumTraits<T>::lowest() + offset,
410 Eigen::NumTraits<T>::highest()));
411 }
412 }
413
414 Tensor output(DT_FLOAT, TensorShape{values_count});
415 QuantizedTensorToFloatInPlaceUsingEigen<T>(*eigen_device, input, f_min,
416 f_max, &output);
417 auto output_array = output.flat<float>();
418 const double range = static_cast<double>(f_max) - f_min;
419 for (int i = 0; i < values_count; ++i) {
420 float expected = QuantizedToFloat<T>(input_array(i), f_min, f_max);
421 float actual = output_array(i);
422 ASSERT_NEAR(expected, actual, range * 1.1e-7)
423 << "expected=" << expected << " actual=" << actual
424 << " v=" << input_array(i) << " i=" << i << " f_min=" << f_min
425 << " f_max=" << f_max
426 << " type=" << DataTypeString(DataTypeToEnum<T>::v());
427 }
428 }
429 }
430
431 } // namespace
432
TestFloatToQuantized()433 void TestFloatToQuantized() {
434 EXPECT_EQ(quint8(0), FloatToQuantized<quint8>(0.0f, 0.0f, 1.0f));
435 EXPECT_EQ(quint8(0), FloatToQuantized<quint8>(0.0f, 0.0f, 2.0f));
436 EXPECT_EQ(quint8(128), FloatToQuantized<quint8>(0.5f, 0.0f, 1.0f));
437 EXPECT_EQ(quint8(128), FloatToQuantized<quint8>(1.0f, 0.0f, 2.0f));
438 EXPECT_EQ(quint8(255), FloatToQuantized<quint8>(1.0f, 0.0f, 1.0f));
439 EXPECT_EQ(quint8(255), FloatToQuantized<quint8>(2.0f, 0.0f, 2.0f));
440 EXPECT_EQ(quint8(0), FloatToQuantized<quint8>(-128.0f, -128.0f, 127.0f));
441 EXPECT_EQ(quint8(128), FloatToQuantized<quint8>(0.0f, -128.0f, 127.0f));
442 EXPECT_EQ(quint8(255), FloatToQuantized<quint8>(127.0f, -128.0f, 127.0f));
443 EXPECT_EQ(quint8(0), FloatToQuantized<quint8>(1.0f, 1.0f, 256.0f));
444 EXPECT_EQ(quint8(127), FloatToQuantized<quint8>(128.0f, 1.0f, 256.0f));
445 EXPECT_EQ(quint8(255), FloatToQuantized<quint8>(256.0f, 1.0f, 256.0f));
446
447 const int int32_min = std::numeric_limits<int>::min();
448 const int int32_max = std::numeric_limits<int>::max();
449
450 EXPECT_EQ(qint32(int32_min),
451 FloatToQuantized<qint32>(-128.0f, -128.0f, 128.0f));
452 EXPECT_EQ(qint32(0), FloatToQuantized<qint32>(0.0f, -128.0f, 128.0f));
453 EXPECT_EQ(qint32(int32_max),
454 FloatToQuantized<qint32>(128.0f, -128.0f, 128.0f));
455 }
456
TestQuantizedToFloat()457 void TestQuantizedToFloat() {
458 EXPECT_LT(fabsf(0.0f - QuantizedToFloat<quint8>(0, 0.0f, 1.0f)), 1 / 255.0f);
459 EXPECT_LT(fabsf(0.0f - QuantizedToFloat<quint8>(0, 0.0f, 2.0f)), 1 / 255.0f);
460 EXPECT_LT(fabsf(0.5f - QuantizedToFloat<quint8>(127, 0.0f, 1.0f)),
461 1 / 255.0f);
462 EXPECT_LT(fabsf(1.0f - QuantizedToFloat<quint8>(127, 0.0f, 2.0f)),
463 1 / 255.0f);
464 EXPECT_LT(fabsf(1.0f - QuantizedToFloat<quint8>(255, 0.0f, 1.0f)),
465 1 / 255.0f);
466 EXPECT_LT(fabsf(2.0f - QuantizedToFloat<quint8>(255, 0.0f, 2.0f)),
467 1 / 255.0f);
468 EXPECT_LT(fabsf(1.0f - QuantizedToFloat<quint8>(0, 1.0f, 256.0f)),
469 1 / 255.0f);
470 EXPECT_LT(fabsf(128.0f - QuantizedToFloat<quint8>(127, 1.0f, 256.0f)),
471 1 / 255.0f);
472 EXPECT_LT(fabsf(256.0f - QuantizedToFloat<quint8>(255, 1.0f, 256.0f)),
473 1 / 255.0f);
474
475 const int int32_min = std::numeric_limits<int>::min();
476 const int int32_max = std::numeric_limits<int>::max();
477
478 EXPECT_NEAR(-1.0f, QuantizedToFloat<qint32>(qint32(int32_min), -1.0f, 1.0f),
479 1e-5f);
480 EXPECT_NEAR(0.0f, QuantizedToFloat<qint32>(qint32(0), -1.0f, 1.0f), 1e-5f);
481 EXPECT_NEAR(1.0f, QuantizedToFloat<qint32>(qint32(int32_max), -1.0f, 1.0f),
482 1e-5f);
483
484 EXPECT_NEAR(32.0f, QuantizedToFloat<qint32>(qint32(32), int32_min, int32_max),
485 1.0);
486 }
487
TestAvoidBias()488 void TestAvoidBias() {
489 for (int i = 0; i < 256; ++i) {
490 const float as_float = QuantizedToFloat<quint8>(i, 0.0f, 2.0f);
491 const int back_to_int = FloatToQuantized<quint8>(as_float, 0.0f, 2.0f);
492 EXPECT_EQ(i, back_to_int);
493 }
494
495 // All perfectly representable floats should survive quantization, even
496 // if we pick a range where min is not itself perfectly representable.
497 const float min = -0.1375f;
498 const float max = 1.1385f;
499 const float step_size = (max - min) / 255.0f;
500 const float tolerance = step_size / 1000.0f;
501 // This is the smallest perfectly representable float in the range.
502 float first_float = std::ceil(min / step_size) * step_size;
503 for (float f = first_float; f <= max; f += step_size) {
504 const int as_int = FloatToQuantized<quint8>(f, min, max);
505 const float back_to_float = QuantizedToFloat<quint8>(as_int, min, max);
506 EXPECT_NEAR(f, back_to_float, tolerance);
507 }
508 }
509
TestRequantizeInNewRange()510 void TestRequantizeInNewRange() {
511 // These are the float values we're going to test the conversions on.
512 const size_t values_count = 6;
513 const float values[values_count] = {0.0f, 0.5f, 1.0f, -1.0f, 127.0f, 255.0f};
514 // These are the input and output ranges we'll test.
515 const size_t ranges_count = 4;
516 const float ranges[ranges_count][4] = {
517 {0.0f, 255.0f, 0.0f, 255.0f},
518 {0.0f, 1.0f, 0.0f, 1.0f},
519 {-1.0f, 1.0f, -1.0f, 1.0f},
520 {-1.0f, 1.0f, -255.0f, 255.0f},
521 };
522 for (size_t value_index = 0; value_index < values_count; ++value_index) {
523 const float value_float = values[value_index];
524 for (size_t range_index = 0; range_index < ranges_count; ++range_index) {
525 const float input_min = ranges[range_index][0];
526 const float input_max = ranges[range_index][1];
527 const float output_min = ranges[range_index][2];
528 const float output_max = ranges[range_index][3];
529 const quint8 input_value =
530 FloatToQuantized<quint8>(value_float, input_min, input_max);
531 // Here we convert the quantized input value to what we expect
532 // to get in the output range.
533 const qint32 expected_value = FloatToQuantized<qint32>(
534 QuantizedToFloat(input_value, input_min, input_max), output_min,
535 output_max);
536 EXPECT_EQ(expected_value,
537 (RequantizeInNewRange<quint8, qint32>(
538 input_value, input_min, input_max, output_min, output_max)))
539 << "value_float=" << value_float << ", input_min=" << input_min
540 << ", input_max=" << input_max << ", output_min=" << output_min
541 << ", output_max=" << output_max;
542 }
543 }
544 }
545
TestRequantizeInNewRangeRealData()546 void TestRequantizeInNewRangeRealData() {
547 const float input_min = -0.739539f;
548 const float input_max = 0.641057f;
549 const float output_min = -2381.49f;
550 const float output_max = 2207.6f;
551
552 // Start with a value that can be perfectly represented in 8 bits. This
553 // ensures minimal quantization error, and allows us to use EXPECT_LT below.
554 const float value_as_float =
555 QuantizedToFloat<quint8>(83, input_min, input_max);
556
557 const quint8 value_as_quint8 =
558 FloatToQuantized<quint8>(value_as_float, input_min, input_max);
559 EXPECT_EQ(quint8(83), value_as_quint8);
560 const qint32 actual_output = RequantizeInNewRange<quint8, qint32>(
561 value_as_quint8, input_min, input_max, output_min, output_max);
562 const qint32 value_as_qint32 =
563 FloatToQuantized<qint32>(value_as_float, output_min, output_max);
564 EXPECT_LT(std::abs(value_as_qint32 - actual_output), 10);
565 }
566
TestRequantizeInNewRange32To8Bit()567 void TestRequantizeInNewRange32To8Bit() {
568 // These are the float values we're going to test the conversions on.
569 const size_t values_count = 6;
570 const float values[values_count] = {0.0f, 0.45f, 1.0f, -1.0f, 127.0f, 255.0f};
571 // These are the input and output ranges we'll test.
572 const size_t ranges_count = 4;
573 const float ranges[ranges_count][4] = {
574 {0.0f, 255.0f, 0.0f, 255.0f},
575 {0.0f, 1.0f, 0.0f, 1.0f},
576 {-1.0f, 1.0f, -1.0f, 1.0f},
577 {-1.0f, 1.0f, -255.0f, 255.0f},
578 };
579 for (size_t value_index = 0; value_index < values_count; ++value_index) {
580 const float value_float = values[value_index];
581 for (size_t range_index = 0; range_index < ranges_count; ++range_index) {
582 const float input_min = ranges[range_index][0];
583 const float input_max = ranges[range_index][1];
584 const float output_min = ranges[range_index][2];
585 const float output_max = ranges[range_index][3];
586 const qint32 input_value =
587 FloatToQuantized<qint32>(value_float, input_min, input_max);
588 // Here we convert the quantized input value to what we expect
589 // to get in the output range.
590 const quint8 expected_value = FloatToQuantized<quint8>(
591 QuantizedToFloat(input_value, input_min, input_max), output_min,
592 output_max);
593 EXPECT_EQ(expected_value,
594 (RequantizeInNewRange<qint32, quint8>(
595 input_value, input_min, input_max, output_min, output_max)))
596 << "input_value=" << input_value << ", value_float=" << value_float
597 << ", input_min=" << input_min << ", input_max=" << input_max
598 << ", output_min=" << output_min << ", output_max=" << output_max;
599 }
600 }
601 }
602
TestRequantizeManyInNewRange32To8Bit()603 void TestRequantizeManyInNewRange32To8Bit() {
604 TestRequantizeManyInNewRange32To8Bit(nullptr /* eigen_device */);
605 }
606
TestRequantizeManyInNewRange32To8BitUsingEigen()607 void TestRequantizeManyInNewRange32To8BitUsingEigen() {
608 thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */);
609 Eigen::ThreadPoolDevice eigen_device(threadpool.AsEigenThreadPool(),
610 2 /* num_threads */);
611 TestRequantizeManyInNewRange32To8Bit(&eigen_device);
612 }
613
TestRequantizeManyInNewRange32To8BitEigenVsNonEigen()614 void TestRequantizeManyInNewRange32To8BitEigenVsNonEigen() {
615 TestRequantizeManyInNewRangeEigenVsNonEigen<qint32, quint8>();
616 }
617
TestRequantizeManyInNewRange32To8BitSignedEigenVsNonEigen()618 void TestRequantizeManyInNewRange32To8BitSignedEigenVsNonEigen() {
619 TestRequantizeManyInNewRangeEigenVsNonEigen<qint32, qint8>();
620 }
621
TestFloatTensorToQuantized()622 void TestFloatTensorToQuantized() {
623 const int input_width = 3;
624 const int input_height = 3;
625 const float input_min = 0.0f;
626 const float input_max = 255.0f;
627 Tensor input(DT_FLOAT, TensorShape({input_height, input_width}));
628 test::FillValues<float>(&input, {1.0f, -1.0f, 10.0f, 10.25f, 127.0f, 255.0f,
629 512.0f, 0.0f, 23.0f});
630 Tensor expected(DT_QUINT8, TensorShape({input_height, input_width}));
631 test::FillValues<quint8>(&expected, {1, 0, 10, 10, 127, 255, 255, 0, 23});
632 Tensor output = FloatTensorToQuantized<quint8>(input, input_min, input_max);
633 test::ExpectTensorEqual<quint8>(expected, output);
634 }
635
636 // Verify that FloatToQuantizedInPlaceUsingEigen is same result as
637 // FloatToQuantized.
TestFloatToQuantizedInPlaceUsingEigen()638 void TestFloatToQuantizedInPlaceUsingEigen() {
639 thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */);
640 Eigen::ThreadPoolDevice eigen_device(threadpool.AsEigenThreadPool(),
641 2 /* num_threads */);
642
643 TestFloatToQuantizedInPlaceUsingEigen<quint8>(&eigen_device);
644 TestFloatToQuantizedInPlaceUsingEigen<qint8>(&eigen_device);
645 TestFloatToQuantizedInPlaceUsingEigen<quint16>(&eigen_device);
646 TestFloatToQuantizedInPlaceUsingEigen<qint16>(&eigen_device);
647 }
648
TestOverflowWithEigen()649 void TestOverflowWithEigen() {
650 thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */);
651 Eigen::ThreadPoolDevice eigen_device(threadpool.AsEigenThreadPool(),
652 2 /* num_threads */);
653
654 const int num_vals = 4;
655 const float input_min = 0.0f;
656 const float input_max = 2400.0f;
657 TensorShape shape({num_vals});
658 Tensor input(DT_FLOAT, shape);
659 test::FillValues<float>(&input, {-100.f, 0.f, 2400.0f, 2400.0f});
660 Tensor expected(DT_QINT32, shape);
661 // Note that the positive expected values are not the highest int32 value,
662 // because the implementation does a bounds check using float, not int32.
663 test::FillValues<qint32>(
664 &expected,
665 {static_cast<int32>(-2147483648), static_cast<int32>(-2147483648),
666 static_cast<int32>(2147483520), static_cast<int32>(2147483520)});
667
668 FloatToQuantizedStruct<qint32> f2q(input_min, input_max);
669 Tensor output(DT_QINT32, shape);
670 auto input_array = input.flat<float>();
671 output.flat<qint32>() = QUANTIZE_WITH_EIGEN(input_array, f2q, qint32);
672 test::ExpectTensorEqual<qint32>(expected, output);
673 }
674
TestQuantizedTensorToFloat()675 void TestQuantizedTensorToFloat() {
676 const int input_width = 3;
677 const int input_height = 3;
678 const float input_min = -128.0f;
679 const float input_max = 127.0f;
680 Tensor input(DT_QUINT8, TensorShape({input_height, input_width}));
681 test::FillValues<quint8>(&input, {0, 128, 255, 23, 24, 25, 243, 244, 245});
682 Tensor expected(DT_FLOAT, TensorShape({input_height, input_width}));
683 test::FillValues<float>(&expected, {-128.0f, 0.0f, 127.0f, -105.0f, -104.0f,
684 -103.0f, 115.0f, 116.0f, 117.0f});
685 Tensor output = QuantizedTensorToFloat<quint8>(input, input_min, input_max);
686 test::ExpectTensorEqual<float>(expected, output);
687
688 // Test for signed 32 bit.
689 // Note that we cannot use input mins and maxes that match the range because
690 // there are 7 too few bits of mantissa accuracy in floats to represent
691 // 2**31-1 accurately. Also there is no good fraction to use because 2**31-1
692 // is a mersenne prime.
693 Tensor input32(DT_QINT32, TensorShape({input_height, input_width}));
694
695 // Use a quantizer centered at 0.
696 float input_range = 1LL << 25;
697 int64_t num_levels = (1LL << 32) - 1;
698 float step_size =
699 static_cast<float>(static_cast<double>(input_range) / num_levels);
700 float q_compatible_min_value =
701 roundf(-(input_range / 2.0) / step_size) * step_size;
702 float q_compatible_max_value = q_compatible_min_value + input_range;
703 test::FillValues<qint32>(&input32, {-16384, 0, 16256, -13440, -13312, -13184,
704 14720, 14848, 14976});
705
706 Tensor output32 = QuantizedTensorToFloat<qint32>(
707 input32, q_compatible_min_value, q_compatible_max_value);
708 test::FillValues<float>(&expected, {-128.0f, 0.0f, 127.0f, -105.0f, -104.0f,
709 -103.0f, 115.0f, 116.0f, 117.0f});
710 // The quantization error in going between 1<<25 and 1<<32 levels.
711 const double kTolerance = .5 / 128.0;
712 test::ExpectTensorNear<float>(expected, output32, kTolerance);
713 }
714
715 // Verify that QuantizedToFloatInPlaceUsingEigen is same result as
716 // QuantizedToFloat.
TestQuantizedToFloatInPlaceUsingEigen()717 void TestQuantizedToFloatInPlaceUsingEigen() {
718 thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */);
719 Eigen::ThreadPoolDevice eigen_device(threadpool.AsEigenThreadPool(),
720 2 /* num_threads */);
721
722 TestQuantizedToFloatInPlaceUsingEigen<quint8>(&eigen_device);
723 TestQuantizedToFloatInPlaceUsingEigen<qint8>(&eigen_device);
724 TestQuantizedToFloatInPlaceUsingEigen<quint16>(&eigen_device);
725 TestQuantizedToFloatInPlaceUsingEigen<qint16>(&eigen_device);
726 TestQuantizedToFloatInPlaceUsingEigen<qint32>(&eigen_device);
727 }
728
BenchmarkRequantizeManyInNewRange()729 void BenchmarkRequantizeManyInNewRange() {
730 TimeRequantizeManyInNewRange<qint32, quint8>(1000, 1000, false);
731 TimeRequantizeManyInNewRange<qint32, quint8>(1000, 1000, true);
732 TimeRequantizeManyInNewRange<qint32, quint8>(100000, 100, false);
733 TimeRequantizeManyInNewRange<qint32, quint8>(100000, 100, true);
734 TimeRequantizeManyInNewRange<qint32, quint8>(1000000, 10, false);
735 TimeRequantizeManyInNewRange<qint32, quint8>(1000000, 10, true);
736
737 TimeRequantizeManyInNewRange<quint8, qint32>(1000, 1000, false);
738 TimeRequantizeManyInNewRange<quint8, qint32>(1000, 1000, true);
739 TimeRequantizeManyInNewRange<quint8, qint32>(100000, 100, false);
740 TimeRequantizeManyInNewRange<quint8, qint32>(100000, 100, true);
741 TimeRequantizeManyInNewRange<quint8, qint32>(1000000, 10, false);
742 TimeRequantizeManyInNewRange<quint8, qint32>(1000000, 10, true);
743 }
744
745 #ifdef QUANTIZATION_UTILS_USE_NEON
746 template <int POW>
TestDivide64x2Pow(int64 val,int64 ref)747 void TestDivide64x2Pow(int64 val, int64 ref) {
748 const int64x2_t val_64x2 = vmovq_n_s64(val);
749 const int64x2_t ret = Divide64x2Pow<POW>(val_64x2);
750 // TODO(b/70947959) Change back to int64 when possible
751 int64_t rets[2];
752 vst1q_s64(rets, ret);
753 EXPECT_EQ(rets[0], ref);
754 EXPECT_EQ(rets[1], ref);
755 VLOG(1) << "div: val " << val << ", " << ref;
756 }
757
758 template <int POW>
TestDivide64x2PowRound(int64 val,int64 ref)759 void TestDivide64x2PowRound(int64 val, int64 ref) {
760 const int64x2_t val_64x2 = vmovq_n_s64(val);
761 const int64x2_t shifted = Divide64x2PowRound<POW>(val_64x2);
762 // TODO(b/70947959) Change back to int64 when possible
763 int64_t rets[2];
764 vst1q_s64(rets, shifted);
765 EXPECT_EQ(rets[0], ref) << "in = " << val << ", " << POW
766 << ", act = " << rets[0] << ", ref = " << ref;
767 EXPECT_EQ(rets[1], ref);
768 VLOG(1) << "div round: " << val << ", " << rets[0];
769 }
770
TestDivide64x2PowAll()771 void TestDivide64x2PowAll() {
772 for (int64 i = 0; i < 1000; ++i) {
773 TestDivide64x2PowRound<1>(
774 i, static_cast<int64_t>(static_cast<float>(i) / 2.0f + 0.5f));
775 TestDivide64x2PowRound<1>(
776 -i, static_cast<int64_t>(static_cast<float>(-i) / 2.0f - 0.5f));
777 TestDivide64x2PowRound<2>(
778 i, static_cast<int64_t>(static_cast<float>(i) / 4.0f + 0.5f));
779 TestDivide64x2PowRound<2>(
780 -i, static_cast<int64_t>(static_cast<float>(-i) / 4.0f - 0.5f));
781 TestDivide64x2PowRound<4>(
782 i, static_cast<int64_t>(static_cast<float>(i) / 16.0f + 0.5f));
783 TestDivide64x2PowRound<4>(
784 -i, static_cast<int64_t>(static_cast<float>(-i) / 16.0f - 0.5f));
785 TestDivide64x2PowRound<8>(
786 i, static_cast<int64_t>(static_cast<float>(i) / 256.0f + 0.5f));
787 TestDivide64x2PowRound<8>(
788 -i, static_cast<int64_t>(static_cast<float>(-i) / 256.0f - 0.5f));
789 TestDivide64x2PowRound<16>(
790 i, static_cast<int64_t>(static_cast<float>(i) / 65536.0f + 0.5f));
791 TestDivide64x2PowRound<16>(
792 -i, static_cast<int64_t>(static_cast<float>(-i) / 65536.0f - 0.5f));
793 }
794
795 TestDivide64x2Pow<2>(100, 25);
796 TestDivide64x2Pow<2>(-100, -25);
797 TestDivide64x2Pow<4>(100, 6);
798 TestDivide64x2Pow<4>(-100, -6);
799
800 for (int64 i = 0; i < 1000; ++i) {
801 TestDivide64x2Pow<1>(i, i / 2);
802 TestDivide64x2Pow<1>(-i, -i / 2);
803 TestDivide64x2Pow<2>(i, i / 4);
804 TestDivide64x2Pow<2>(-i, -i / 4);
805 TestDivide64x2Pow<4>(i, i / 16);
806 TestDivide64x2Pow<4>(-i, -i / 16);
807 TestDivide64x2Pow<8>(i, i / 256);
808 TestDivide64x2Pow<8>(-i, -i / 256);
809 TestDivide64x2Pow<16>(i, i / 65536);
810 TestDivide64x2Pow<16>(-i, -i / 65536);
811 }
812 }
813
To8x8(uint8 val)814 uint8x8_t To8x8(uint8 val) { return vmov_n_u8(val); }
815
To16x8(int16 val)816 int16x8_t To16x8(int16 val) { return vmovq_n_s16(val); }
817
To32x2(int32 val)818 int32x2_t To32x2(int32 val) {
819 int32 vals[2];
820 vals[0] = val;
821 vals[1] = val;
822 return vld1_s32(vals);
823 }
824
825 template <int RESOLUTION, typename T_CALC>
ComputeRefLerp(T_CALC top_left,T_CALC top_right,T_CALC bottom_left,T_CALC bottom_right,T_CALC x_lerp,T_CALC y_lerp)826 T_CALC ComputeRefLerp(T_CALC top_left, T_CALC top_right, T_CALC bottom_left,
827 T_CALC bottom_right, T_CALC x_lerp, T_CALC y_lerp) {
828 constexpr T_CALC RESOLUTION_POW = (1 << RESOLUTION);
829 const T_CALC top =
830 top_left * RESOLUTION_POW + (top_right - top_left) * x_lerp;
831 const T_CALC bottom =
832 bottom_left * RESOLUTION_POW + (bottom_right - bottom_left) * x_lerp;
833 const T_CALC out = top + (bottom - top) / RESOLUTION_POW * y_lerp;
834 return (out + RESOLUTION_POW / 2) / RESOLUTION_POW;
835 }
836
837 template <int RESOLUTION>
TestComputeLerp8x8(uint8 top_left,uint8 top_right,uint8 bottom_left,uint8 bottom_right,int16 x_lerp,int16 y_lerp)838 void TestComputeLerp8x8(uint8 top_left, uint8 top_right, uint8 bottom_left,
839 uint8 bottom_right, int16 x_lerp, int16 y_lerp) {
840 uint8x8_t top_left8x8 = To8x8(top_left);
841 uint8x8_t top_right8x8 = To8x8(top_right);
842 uint8x8_t bottom_left8x8 = To8x8(bottom_left);
843 uint8x8_t bottom_right8x8 = To8x8(bottom_right);
844 int16x8_t x_lerp16x8 = To16x8(x_lerp);
845 int16x8_t y_lerp16x8 = To16x8(y_lerp);
846 const uint8x8_t ret =
847 ComputeLerp8x8<RESOLUTION>(top_left8x8, top_right8x8, bottom_left8x8,
848 bottom_right8x8, x_lerp16x8, y_lerp16x8);
849
850 uint8 rets[8];
851 vst1_u8(rets, ret);
852
853 const int16 ref = ComputeRefLerp<RESOLUTION, int16>(
854 static_cast<int16>(top_left), static_cast<int16>(top_right),
855 static_cast<int16>(bottom_left), static_cast<int16>(bottom_right), x_lerp,
856 y_lerp);
857
858 for (int i = 0; i < 8; ++i) {
859 EXPECT_EQ(ref, static_cast<int16>(rets[i]));
860 }
861
862 VLOG(1) << "Lerp(8): " << static_cast<int>(top_left) << ", "
863 << static_cast<int>(top_right) << ", "
864 << static_cast<int>(bottom_left) << ", "
865 << static_cast<int>(bottom_right) << ", " << x_lerp << ", " << y_lerp
866 << ", " << static_cast<int>(rets[0]) << ", " << ref;
867 }
868
869 template <int RESOLUTION>
TestComputeLerp32x2(int32 top_left,int32 top_right,int32 bottom_left,int32 bottom_right,int32 x_lerp,int32 y_lerp)870 void TestComputeLerp32x2(int32 top_left, int32 top_right, int32 bottom_left,
871 int32 bottom_right, int32 x_lerp, int32 y_lerp) {
872 int32x2_t top_left32x2 = To32x2(top_left);
873 int32x2_t top_right32x2 = To32x2(top_right);
874 int32x2_t bottom_left32x2 = To32x2(bottom_left);
875 int32x2_t bottom_right32x2 = To32x2(bottom_right);
876 int32x2_t x_lerp32x2 = To32x2(x_lerp);
877 int32x2_t y_lerp32x2 = To32x2(y_lerp);
878 const int32x2_t ret =
879 ComputeLerp32x2<RESOLUTION>(top_left32x2, top_right32x2, bottom_left32x2,
880 bottom_right32x2, x_lerp32x2, y_lerp32x2);
881 int32 rets[2];
882 vst1_s32(rets, ret);
883 const int64 ref = ComputeRefLerp<RESOLUTION, int64>(
884 static_cast<int64_t>(top_left), static_cast<int64_t>(top_right),
885 static_cast<int64_t>(bottom_left), static_cast<int64_t>(bottom_right),
886 static_cast<int64_t>(x_lerp), static_cast<int64_t>(y_lerp));
887 EXPECT_EQ(static_cast<int64_t>(rets[0]), ref);
888 VLOG(1) << "Lerp(32): " << top_left << ", " << top_right << ", "
889 << bottom_left << ", " << bottom_right << ", " << x_lerp << ", "
890 << y_lerp << ", " << rets[0] << ", " << ref;
891 }
892
TestComputeLerp4xAll()893 void TestComputeLerp4xAll() {
894 constexpr int32 RESOLUTION_32 = 30;
895 constexpr int32 RESOLUTION_MULT_32 = (1 << RESOLUTION_32);
896 constexpr int32 HALF_32 = RESOLUTION_MULT_32 / 2;
897 TestComputeLerp32x2<RESOLUTION_32>(100, 200, 300, 400, HALF_32, HALF_32);
898 TestComputeLerp32x2<RESOLUTION_32>(100, 100, 200, 200, HALF_32, HALF_32);
899 TestComputeLerp32x2<RESOLUTION_32>(200, 200, 100, 100, HALF_32, HALF_32);
900 TestComputeLerp32x2<RESOLUTION_32>(100, 200, 100, 200, HALF_32, HALF_32);
901 TestComputeLerp32x2<RESOLUTION_32>(200, 100, 200, 100, HALF_32, HALF_32);
902 TestComputeLerp32x2<RESOLUTION_32>(200, 200, 200, 200, HALF_32, HALF_32);
903
904 constexpr int32 RESOLUTION_8 = 7;
905 constexpr int32 RESOLUTION_MULT_8 = (1 << RESOLUTION_8);
906 constexpr int32 HALF_8 = RESOLUTION_MULT_8 / 2;
907 TestComputeLerp8x8<RESOLUTION_8>(10, 20, 30, 40, HALF_8, HALF_8);
908 TestComputeLerp8x8<RESOLUTION_8>(100, 100, 200, 200, HALF_8, HALF_8);
909 TestComputeLerp8x8<RESOLUTION_8>(200, 200, 100, 100, HALF_8, HALF_8);
910 TestComputeLerp8x8<RESOLUTION_8>(100, 200, 100, 200, HALF_8, HALF_8);
911 TestComputeLerp8x8<RESOLUTION_8>(200, 100, 200, 100, HALF_8, HALF_8);
912 TestComputeLerp8x8<RESOLUTION_8>(200, 200, 200, 200, HALF_8, HALF_8);
913 }
914
915 #endif
916
917 } // namespace tensorflow
918
919 #define RUN_TEST(t) \
920 TEST(QuantizationUtilsTest, t) { tensorflow::t(); }
921
922 RUN_TEST(TestFloatToQuantized);
923 RUN_TEST(TestQuantizedToFloat);
924 RUN_TEST(TestAvoidBias);
925 RUN_TEST(TestRequantizeInNewRange);
926 RUN_TEST(TestRequantizeInNewRangeRealData);
927 RUN_TEST(TestRequantizeInNewRange32To8Bit);
928 RUN_TEST(TestRequantizeManyInNewRange32To8Bit);
929 RUN_TEST(TestRequantizeManyInNewRange32To8BitUsingEigen);
930 RUN_TEST(TestRequantizeManyInNewRange32To8BitEigenVsNonEigen);
931 RUN_TEST(TestRequantizeManyInNewRange32To8BitSignedEigenVsNonEigen);
932 RUN_TEST(TestFloatTensorToQuantized);
933 RUN_TEST(TestRequantizeManyInNewRange8To32Bit);
934 RUN_TEST(TestFloatToQuantizedInPlaceUsingEigen);
935 RUN_TEST(TestOverflowWithEigen);
936 RUN_TEST(TestQuantizedTensorToFloat);
937 RUN_TEST(TestQuantizedToFloatInPlaceUsingEigen);
938
939 #if defined(__ANDROID__)
940
941 RUN_TEST(BenchmarkRequantizeManyInNewRange);
942
943 #ifdef QUANTIZATION_UTILS_USE_NEON
944
945 RUN_TEST(TestDivide64x2PowAll);
946 RUN_TEST(TestComputeLerp4xAll);
947
948 #endif // QUANTIZATION_UTILS_USE_NEON
949
950 #endif // __ANDROID__
951
main(int argc,char ** argv)952 int main(int argc, char** argv) {
953 // On Linux, add: absl::SetFlag(&FLAGS_logtostderr, true);
954 ::testing::InitGoogleTest(&argc, argv);
955 return RUN_ALL_TESTS();
956 }
957