• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/kernels/kernel_util.h"
16 
17 #include <algorithm>
18 #include <cmath>
19 #include <memory>
20 
21 #include "tensorflow/lite/kernels/internal/quantization_util.h"
22 #include "tensorflow/lite/kernels/internal/round.h"
23 
24 namespace tflite {
25 
GuardedQuantizeMultiplier(double effective_output_scale,int32_t * significand,int * shift)26 void GuardedQuantizeMultiplier(double effective_output_scale,
27                                int32_t* significand, int* shift) {
28   QuantizeMultiplier(effective_output_scale, significand, shift);
29   // Additional guard to make sure RoundingDivideByPOT does not fail.
30   if (*shift < -31) {
31     // If shift is less than -31, RoundingDivideByPOT fails. This happens when
32     // min and max are close and small. For this particular case, both
33     // significand and shift are set to zero.
34     *significand = 0;
35     *shift = 0;
36   }
37 }
38 
PopulateConvolutionQuantizationParams(TfLiteContext * context,const TfLiteTensor * input,const TfLiteTensor * filter,const TfLiteTensor * bias,TfLiteTensor * output,const TfLiteFusedActivation & activation,int32_t * multiplier,int * shift,int32_t * output_activation_min,int32_t * output_activation_max,int32_t * per_channel_multiplier,int * per_channel_shift)39 TfLiteStatus PopulateConvolutionQuantizationParams(
40     TfLiteContext* context, const TfLiteTensor* input,
41     const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
42     const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
43     int32_t* output_activation_min, int32_t* output_activation_max,
44     int32_t* per_channel_multiplier, int* per_channel_shift) {
45   TF_LITE_ENSURE_EQ(context, input->quantization.type,
46                     kTfLiteAffineQuantization);
47   TF_LITE_ENSURE_EQ(context, filter->quantization.type,
48                     kTfLiteAffineQuantization);
49   // TODO(jianlijianli): Enable bias type check and bias scale == input scale
50   // * filter scale for each channel in affine quantization once bias
51   // quantization is properly populated.
52   // TF_LITE_ENSURE_EQ(context, bias->quantization.type,
53   // kTfLiteAffineQuantization);
54 
55   // Check data type.
56   const auto* affine_quantization =
57       reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
58   TF_LITE_ENSURE(context, affine_quantization);
59   TF_LITE_ENSURE(context, affine_quantization->scale);
60   const bool is_per_channel = affine_quantization->scale->size > 1;
61   if (is_per_channel) {
62     //  Currently only Int8 is supported for per channel quantization.
63     TF_LITE_ENSURE_EQ(context, input->type, kTfLiteInt8);
64     TF_LITE_ENSURE_EQ(context, filter->type, kTfLiteInt8);
65     TF_LITE_ENSURE_EQ(
66         context, affine_quantization->scale->size,
67         filter->dims->data[affine_quantization->quantized_dimension]);
68   }
69 
70   // Populate multiplier and shift using affine quantization.
71   const int num_channels = affine_quantization->scale->size;
72   const float input_scale = input->params.scale;
73   const float output_scale = output->params.scale;
74   const float* filter_scales = affine_quantization->scale->data;
75   for (int i = 0; i < num_channels; ++i) {
76     const double filter_scale = static_cast<double>(filter_scales[i]);
77     const double effective_output_scale = static_cast<double>(input_scale) *
78                                           filter_scale /
79                                           static_cast<double>(output_scale);
80     int32_t significand;
81     int shift;
82     GuardedQuantizeMultiplier(effective_output_scale, &significand, &shift);
83     per_channel_multiplier[i] = significand;
84     per_channel_shift[i] = shift;
85   }
86 
87   // Populate scalar quantization parameters.
88   // This check on legacy quantization parameters is kept only for backward
89   // compatibility.
90   if (input->type == kTfLiteUInt8) {
91     // Check bias scale == input scale * filter scale.
92     double real_multiplier = 0.0;
93     TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
94         context, input, filter, bias, output, &real_multiplier));
95     int exponent;
96 
97     // Populate quantization parameteters with multiplier and shift.
98     QuantizeMultiplier(real_multiplier, multiplier, &exponent);
99     *shift = -exponent;
100     CalculateActivationRangeUint8(activation, output, output_activation_min,
101                                   output_activation_max);
102   }
103   return kTfLiteOk;
104 }
105 
GetQuantizedConvolutionMultipler(TfLiteContext * context,const TfLiteTensor * input,const TfLiteTensor * filter,const TfLiteTensor * bias,TfLiteTensor * output,double * multiplier)106 TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
107                                               const TfLiteTensor* input,
108                                               const TfLiteTensor* filter,
109                                               const TfLiteTensor* bias,
110                                               TfLiteTensor* output,
111                                               double* multiplier) {
112   const double input_product_scale = input->params.scale * filter->params.scale;
113   const double bias_scale = bias->params.scale;
114   const double output_scale = output->params.scale;
115 
116   // TODO(ahentz): The following conditions must be guaranteed by the training
117   // pipeline.
118   TF_LITE_ENSURE(context, std::abs(input_product_scale - bias_scale) <=
119                               1e-6 * std::min(input_product_scale, bias_scale));
120   TF_LITE_ENSURE(context, input_product_scale >= 0);
121 
122   *multiplier = input_product_scale / output_scale;
123 
124   return kTfLiteOk;
125 }
126 
127 namespace {
CalculateActivationRangeQuantizedImpl(TfLiteFusedActivation activation,int32_t qmin,int32_t qmax,TfLiteTensor * output,int32_t * act_min,int32_t * act_max)128 void CalculateActivationRangeQuantizedImpl(TfLiteFusedActivation activation,
129                                            int32_t qmin, int32_t qmax,
130                                            TfLiteTensor* output,
131                                            int32_t* act_min, int32_t* act_max) {
132   const auto scale = output->params.scale;
133   const auto zero_point = output->params.zero_point;
134 
135   auto quantize = [scale, zero_point](float f) {
136     return zero_point + static_cast<int32_t>(TfLiteRound(f / scale));
137   };
138 
139   if (activation == kTfLiteActRelu) {
140     *act_min = std::max(qmin, quantize(0.0));
141     *act_max = qmax;
142   } else if (activation == kTfLiteActRelu6) {
143     *act_min = std::max(qmin, quantize(0.0));
144     *act_max = std::min(qmax, quantize(6.0));
145   } else if (activation == kTfLiteActRelu1) {
146     *act_min = std::max(qmin, quantize(-1.0));
147     *act_max = std::min(qmax, quantize(1.0));
148   } else {
149     *act_min = qmin;
150     *act_max = qmax;
151   }
152 }
153 }  // namespace
154 
CalculateActivationRangeQuantized(TfLiteContext * context,TfLiteFusedActivation activation,TfLiteTensor * output,int32_t * act_min,int32_t * act_max)155 TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context,
156                                                TfLiteFusedActivation activation,
157                                                TfLiteTensor* output,
158                                                int32_t* act_min,
159                                                int32_t* act_max) {
160   int32_t qmin = 0;
161   int32_t qmax = 0;
162   if (output->type == kTfLiteUInt8) {
163     qmin = std::numeric_limits<uint8_t>::min();
164     qmax = std::numeric_limits<uint8_t>::max();
165   } else if (output->type == kTfLiteInt8) {
166     qmin = std::numeric_limits<int8_t>::min();
167     qmax = std::numeric_limits<int8_t>::max();
168   } else if (output->type == kTfLiteInt16) {
169     qmin = std::numeric_limits<int16_t>::min();
170     qmax = std::numeric_limits<int16_t>::max();
171   } else {
172     TF_LITE_ENSURE(context, false);
173   }
174 
175   CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min,
176                                         act_max);
177   return kTfLiteOk;
178 }
179 
CalculateActivationRangeUint8(TfLiteFusedActivation activation,TfLiteTensor * output,int32_t * act_min,int32_t * act_max)180 void CalculateActivationRangeUint8(TfLiteFusedActivation activation,
181                                    TfLiteTensor* output, int32_t* act_min,
182                                    int32_t* act_max) {
183   const int32_t qmin = std::numeric_limits<uint8_t>::min();
184   const int32_t qmax = std::numeric_limits<uint8_t>::max();
185 
186   CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min,
187                                         act_max);
188 }
189 
CalculateActivationRangeInt8(TfLiteFusedActivation activation,TfLiteTensor * output,int32_t * act_min,int32_t * act_max)190 void CalculateActivationRangeInt8(TfLiteFusedActivation activation,
191                                   TfLiteTensor* output, int32_t* act_min,
192                                   int32_t* act_max) {
193   const int32_t qmin = std::numeric_limits<int8_t>::min();
194   const int32_t qmax = std::numeric_limits<int8_t>::max();
195 
196   CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min,
197                                         act_max);
198 }
199 
HaveSameShapes(const TfLiteTensor * input1,const TfLiteTensor * input2)200 bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) {
201   return TfLiteIntArrayEqual(input1->dims, input2->dims);
202 }
203 
204 // TODO(petewarden): Having macros around this is ugly, look at other strategies
205 // before replicating this approach elsewhere.
206 #ifndef TF_LITE_STATIC_MEMORY
CalculateShapeForBroadcast(TfLiteContext * context,const TfLiteTensor * input1,const TfLiteTensor * input2,TfLiteIntArray ** output_shape)207 TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
208                                         const TfLiteTensor* input1,
209                                         const TfLiteTensor* input2,
210                                         TfLiteIntArray** output_shape) {
211   int64_t dims1 = NumDimensions(input1);
212   int64_t dims2 = NumDimensions(input2);
213   int64_t out_dims = std::max(dims1, dims2);
214   if (NumElements(input1) == 0) {
215     *output_shape = TfLiteIntArrayCopy(input1->dims);
216     return kTfLiteOk;
217   }
218   std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
219       TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
220   for (int i = 0; i < out_dims; ++i) {
221     int64_t d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
222     int64_t d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
223     TF_LITE_ENSURE(context, d1 == d2 || d1 == 1 || d2 == 1);
224     shape->data[out_dims - i - 1] = std::max(d1, d2);
225   }
226   *output_shape = shape.release();
227   return kTfLiteOk;
228 }
229 #endif  // TF_LITE_STATIC_MEMORY
230 
231 }  // namespace tflite
232