• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/xnnpack/test_util.h"
17 
18 #include <algorithm>
19 #include <limits>
20 
21 #include "tensorflow/lite/kernels/internal/cppmath.h"
22 #include "tensorflow/lite/kernels/internal/types.h"
23 
24 namespace tflite {
25 namespace xnnpack {
26 
QuantizeInt8(float value,int32_t zero_point,float scale)27 int8_t QuantizeInt8(float value, int32_t zero_point, float scale) {
28   static constexpr int32_t min_val = std::numeric_limits<int8_t>::min();
29   static constexpr int32_t max_val = std::numeric_limits<int8_t>::max();
30 
31   int32_t unclamped =
32       static_cast<int32_t>(TfLiteRound(value / scale)) + zero_point;
33   int32_t clamped = std::min(std::max(unclamped, min_val), max_val);
34   return static_cast<int8_t>(clamped);
35 }
36 
QuantizeInt8PerChannel(const float * scale,const int64_t * zero_point,int32_t quantized_dimension,const float * input_data,int8_t * output_data,const std::vector<int32_t> & shape)37 void QuantizeInt8PerChannel(const float* scale, const int64_t* zero_point,
38                             int32_t quantized_dimension,
39                             const float* input_data, int8_t* output_data,
40                             const std::vector<int32_t>& shape) {
41   const int32_t num_dims = shape.size();
42   const int32_t* dims_data = shape.data();
43   std::vector<int> current_dim(num_dims, 0);
44 
45   do {
46     const size_t offset =
47         ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
48                             current_dim.data(), 0, nullptr);
49     const int channel_idx = current_dim[quantized_dimension];
50     output_data[offset] = QuantizeInt8(
51         input_data[offset], zero_point[channel_idx], scale[channel_idx]);
52   } while (NextIndex(num_dims, reinterpret_cast<const int*>(dims_data),
53                      current_dim.data()));
54 }
55 
GetInt8QuantizationScaleFromMinMax(float min,float max)56 float GetInt8QuantizationScaleFromMinMax(float min, float max) {
57   static constexpr int8_t qmin_val = std::numeric_limits<int8_t>::min();
58   static constexpr int8_t qmax_val = std::numeric_limits<int8_t>::max();
59   static constexpr float qmin_float = qmin_val;
60   static constexpr float qmax_float = qmax_val;
61 
62   float range = max - min;
63   if (range == 0) {
64     range = std::max(std::abs(max), std::abs(min));
65   }
66   return range / (qmax_float - qmin_float);
67 }
68 
GetInt8QuantizationScale(const std::vector<float> & data)69 float GetInt8QuantizationScale(const std::vector<float>& data) {
70   return GetInt8QuantizationScaleFromMinMax(
71       *std::max_element(data.begin(), data.end()),
72       *std::min_element(data.begin(), data.end()));
73 }
74 
GetInt8QuantizationScalePerChannel(const float * data,int32_t quantized_dimension,const std::vector<int32_t> & shape)75 std::vector<float> GetInt8QuantizationScalePerChannel(
76     const float* data, int32_t quantized_dimension,
77     const std::vector<int32_t>& shape) {
78   const int32_t num_dims = shape.size();
79   const int32_t* dims_data = shape.data();
80   const int32_t channel_dim_size = shape[quantized_dimension];
81   std::vector<float> min(channel_dim_size, std::numeric_limits<float>::max());
82   std::vector<float> max(channel_dim_size, std::numeric_limits<float>::min());
83   std::vector<int> current_dim(num_dims, 0);
84 
85   do {
86     const size_t offset =
87         ReducedOutputOffset(num_dims, reinterpret_cast<const int*>(dims_data),
88                             current_dim.data(), 0, nullptr);
89     const int channel_idx = current_dim[quantized_dimension];
90     const float val = data[offset];
91     min[channel_idx] = std::min(min[channel_idx], val);
92     max[channel_idx] = std::max(max[channel_idx], val);
93   } while (NextIndex(num_dims, reinterpret_cast<const int*>(dims_data),
94                      current_dim.data()));
95 
96   std::vector<float> scale;
97   scale.reserve(channel_dim_size);
98   for (int32_t idx = 0; idx < channel_dim_size; ++idx) {
99     scale.push_back(GetInt8QuantizationScaleFromMinMax(min[idx], max[idx]));
100   }
101   return scale;
102 }
103 
104 }  // namespace xnnpack
105 }  // namespace tflite
106