• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/compiler/mlir/lite/quantization/numerical_utils.h"
16 
17 #include <assert.h>
18 
19 #include <algorithm>
20 #include <cmath>
21 #include <limits>
22 
23 #include "absl/types/optional.h"
24 
25 namespace mlir {
26 namespace quant {
27 
28 // This method is adopted from TFLite:
29 // ["tensorflow/lite/kernels/internal/quantization_util.cc"]
QuantizeMultiplier(double double_multiplier)30 QuantizedMultiplier QuantizeMultiplier(double double_multiplier) {
31   if (double_multiplier < 1e-6) {
32     return {0, 0};
33   }
34 
35   int32_t shift;
36   const double q = frexp(double_multiplier, &shift);
37   auto q_fixed = static_cast<int64_t>(round(q * (1ll << 31)));
38   assert(q_fixed <= (1ll << 31));
39   if (q_fixed == (1ll << 31)) {
40     q_fixed /= 2;
41     ++shift;
42   }
43   assert(q_fixed <= std::numeric_limits<int32_t>::max());
44   // A shift amount smaller than -31 would cause all bits to be shifted out
45   // and thus all results would be zero. We implement that instead with
46   // q_fixed==0, so as to avoid hitting issues with right-shift
47   // operations with shift amounts greater than 31. Note that this happens
48   // roughly when abs(double_multiplier) < 2^-31 and the present handling means
49   // that we're effectively flushing tiny double_multiplier's to zero.
50   // We could conceivably handle values in the range (roughly) [32, 63]
51   // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
52   // the present handling is just doing 'flush denormals to zero'. We could
53   // reconsider and actually generate nonzero denormals if a need arises.
54   if (shift < -31) {
55     shift = 0;
56     q_fixed = 0;
57   }
58   return {static_cast<int32_t>(q_fixed), shift};
59 }
60 
CalculateQuantizedRange(double scale,int32_t zero_point,absl::optional<double> rmin,absl::optional<double> rmax,int32_t qmin,int32_t qmax)61 QuantizedRange CalculateQuantizedRange(double scale, int32_t zero_point,
62                                        absl::optional<double> rmin,
63                                        absl::optional<double> rmax,
64                                        int32_t qmin, int32_t qmax) {
65   auto quantize = [scale, zero_point](float f) {
66     return zero_point + static_cast<int32_t>(std::round(f / scale));
67   };
68 
69   if (rmin.has_value() && rmax.has_value()) {
70     return {std::max(qmin, quantize(rmin.value())),
71             std::min(qmax, quantize(rmax.value()))};
72   } else if (rmin.has_value()) {
73     return {std::max(qmin, quantize(rmin.value())), qmax};
74   } else if (rmax.has_value()) {
75     return {qmin, std::min(qmax, quantize(rmax.value()))};
76   } else {
77     return {qmin, qmax};
78   }
79 }
80 
81 }  // namespace quant
82 }  // namespace mlir
83