• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/core/kernels/uniform_quant_ops/math_utils.h"
16 
17 #include <algorithm>
18 #include <cmath>
19 
20 #include "tensorflow/core/platform/errors.h"
21 
22 namespace tensorflow {
23 
24 using errors::InvalidArgument;
25 
26 // Reference:
27 // https://github.com/tensorflow/tensorflow/blob/57946ceb4b6119d6d0f49abbb2e3d1636a3b83a0/tensorflow/lite/kernels/internal/quantization_util.cc#L53
28 // Where double_multiplier >= 0 and TFLITE_EMULATE_FLOAT is not defined.
QuantizeMultiplier(double double_multiplier,int32_t & quantized_multiplier,int32_t & shift)29 Status QuantizeMultiplier(double double_multiplier,
30                           int32_t& quantized_multiplier, int32_t& shift) {
31   if (!isfinite(double_multiplier) || double_multiplier <= 0) {
32     return InvalidArgument(
33         "double_multiplier must be a poisitive finite number. Given ",
34         double_multiplier);
35   }
36   const double q = std::frexp(double_multiplier, &shift);
37   auto q_fixed = static_cast<int64_t>(std::round(q * (1LL << 31)));
38   if (q_fixed == (1LL << 31)) {
39     q_fixed /= 2;
40     ++shift;
41   }
42   if (shift < -31) {
43     shift = 0;
44     q_fixed = 0;
45   }
46   if (shift > 30) {
47     shift = 30;
48     q_fixed = (1LL << 31) - 1;
49   }
50   quantized_multiplier = static_cast<int32_t>(q_fixed);
51   return OkStatus();
52 }
53 
AsymmetricQuantize(const Tensor & tensor,int apply_offset,int apply_size,int32_t quantization_min_val,int32_t quantization_max_val,float & scale,int32 & zero_point,Tensor & quantized_tensor)54 void AsymmetricQuantize(const Tensor& tensor, int apply_offset, int apply_size,
55                         int32_t quantization_min_val,
56                         int32_t quantization_max_val, float& scale,
57                         int32& zero_point, Tensor& quantized_tensor) {
58   Eigen::DSizes<Eigen::Index, 1> apply_offset_array{apply_offset};
59   Eigen::DSizes<Eigen::Index, 1> apply_size_array{apply_size};
60 
61   auto tensor_slice =
62       tensor.flat<float>().slice(apply_offset_array, apply_size_array);
63   auto quantized_tensor_slice = quantized_tensor.flat<qint8>().slice(
64       apply_offset_array, apply_size_array);
65 
66   Eigen::Tensor<float, 0, Eigen::RowMajor> tensor_slice_min =
67       tensor_slice.minimum();
68   Eigen::Tensor<float, 0, Eigen::RowMajor> tensor_slice_max =
69       tensor_slice.maximum();
70   const double rmin = static_cast<double>(std::min(0.0f, tensor_slice_min()));
71   const double rmax = static_cast<double>(std::max(0.0f, tensor_slice_max()));
72   const double qmin_double = quantization_min_val;
73   const double qmax_double = quantization_max_val;
74 
75   float inv_scale = 0;
76   scale = (rmax - rmin) / (qmax_double - qmin_double);
77   if (rmax - rmin != 0) {
78     // Re-calculate the inverse instead of using (1./scale), to avoid loss of
79     // precision.
80     inv_scale = (qmax_double - qmin_double) / (rmax - rmin);
81   }
82   if (scale == 0 || !std::isfinite(inv_scale)) {
83     quantized_tensor_slice.setZero();
84     scale = 1.0;
85     zero_point = 0;
86     return;
87   }
88 
89   const double zero_point_from_min = qmin_double - rmin / scale;
90   const double zero_point_from_max = qmax_double - rmax / scale;
91   const double zero_point_from_min_error =
92       std::abs(qmin_double) + std::abs(rmin / scale);
93   const double zero_point_from_max_error =
94       std::abs(qmax_double) + std::abs(rmax / scale);
95   const double zero_point_double =
96       zero_point_from_min_error < zero_point_from_max_error
97           ? zero_point_from_min
98           : zero_point_from_max;
99 
100   int8_t nudged_zero_point = 0;
101   if (zero_point_double <= qmin_double) {
102     nudged_zero_point = quantization_min_val;
103   } else if (zero_point_double >= qmax_double) {
104     nudged_zero_point = quantization_max_val;
105   } else {
106     nudged_zero_point = static_cast<int8_t>(round(zero_point_double));
107   }
108   zero_point = nudged_zero_point;
109 
110   AffineQuantize(tensor_slice, inv_scale, zero_point, quantization_min_val,
111                  quantization_max_val, quantized_tensor_slice);
112 }
113 
114 }  // namespace tensorflow
115