1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/kernels/kernel_util.h"
16
17 #include <algorithm>
18 #include <cmath>
19 #include <memory>
20
21 #include "tensorflow/lite/kernels/internal/quantization_util.h"
22 #include "tensorflow/lite/kernels/internal/round.h"
23
24 namespace tflite {
25
GuardedQuantizeMultiplier(double effective_output_scale,int32_t * significand,int * shift)26 void GuardedQuantizeMultiplier(double effective_output_scale,
27 int32_t* significand, int* shift) {
28 QuantizeMultiplier(effective_output_scale, significand, shift);
29 // Additional guard to make sure RoundingDivideByPOT does not fail.
30 if (*shift < -31) {
31 // If shift is less than -31, RoundingDivideByPOT fails. This happens when
32 // min and max are close and small. For this particular case, both
33 // significand and shift are set to zero.
34 *significand = 0;
35 *shift = 0;
36 }
37 }
38
PopulateConvolutionQuantizationParams(TfLiteContext * context,const TfLiteTensor * input,const TfLiteTensor * filter,const TfLiteTensor * bias,TfLiteTensor * output,const TfLiteFusedActivation & activation,int32_t * multiplier,int * shift,int32_t * output_activation_min,int32_t * output_activation_max,int32_t * per_channel_multiplier,int * per_channel_shift)39 TfLiteStatus PopulateConvolutionQuantizationParams(
40 TfLiteContext* context, const TfLiteTensor* input,
41 const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output,
42 const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift,
43 int32_t* output_activation_min, int32_t* output_activation_max,
44 int32_t* per_channel_multiplier, int* per_channel_shift) {
45 TF_LITE_ENSURE_EQ(context, input->quantization.type,
46 kTfLiteAffineQuantization);
47 TF_LITE_ENSURE_EQ(context, filter->quantization.type,
48 kTfLiteAffineQuantization);
49 // TODO(jianlijianli): Enable bias type check and bias scale == input scale
50 // * filter scale for each channel in affine quantization once bias
51 // quantization is properly populated.
52 // TF_LITE_ENSURE_EQ(context, bias->quantization.type,
53 // kTfLiteAffineQuantization);
54
55 // Check data type.
56 const auto* affine_quantization =
57 reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
58 TF_LITE_ENSURE(context, affine_quantization);
59 TF_LITE_ENSURE(context, affine_quantization->scale);
60 const bool is_per_channel = affine_quantization->scale->size > 1;
61 if (is_per_channel) {
62 // Currently only Int8 is supported for per channel quantization.
63 TF_LITE_ENSURE_EQ(context, input->type, kTfLiteInt8);
64 TF_LITE_ENSURE_EQ(context, filter->type, kTfLiteInt8);
65 TF_LITE_ENSURE_EQ(
66 context, affine_quantization->scale->size,
67 filter->dims->data[affine_quantization->quantized_dimension]);
68 }
69
70 // Populate multiplier and shift using affine quantization.
71 const int num_channels = affine_quantization->scale->size;
72 const float input_scale = input->params.scale;
73 const float output_scale = output->params.scale;
74 const float* filter_scales = affine_quantization->scale->data;
75 for (int i = 0; i < num_channels; ++i) {
76 const double filter_scale = static_cast<double>(filter_scales[i]);
77 const double effective_output_scale = static_cast<double>(input_scale) *
78 filter_scale /
79 static_cast<double>(output_scale);
80 int32_t significand;
81 int shift;
82 GuardedQuantizeMultiplier(effective_output_scale, &significand, &shift);
83 per_channel_multiplier[i] = significand;
84 per_channel_shift[i] = shift;
85 }
86
87 // Populate scalar quantization parameters.
88 // This check on legacy quantization parameters is kept only for backward
89 // compatibility.
90 if (input->type == kTfLiteUInt8) {
91 // Check bias scale == input scale * filter scale.
92 double real_multiplier = 0.0;
93 TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
94 context, input, filter, bias, output, &real_multiplier));
95 int exponent;
96
97 // Populate quantization parameteters with multiplier and shift.
98 QuantizeMultiplier(real_multiplier, multiplier, &exponent);
99 *shift = -exponent;
100 CalculateActivationRangeUint8(activation, output, output_activation_min,
101 output_activation_max);
102 }
103 return kTfLiteOk;
104 }
105
GetQuantizedConvolutionMultipler(TfLiteContext * context,const TfLiteTensor * input,const TfLiteTensor * filter,const TfLiteTensor * bias,TfLiteTensor * output,double * multiplier)106 TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
107 const TfLiteTensor* input,
108 const TfLiteTensor* filter,
109 const TfLiteTensor* bias,
110 TfLiteTensor* output,
111 double* multiplier) {
112 const double input_product_scale = input->params.scale * filter->params.scale;
113 const double bias_scale = bias->params.scale;
114 const double output_scale = output->params.scale;
115
116 // TODO(ahentz): The following conditions must be guaranteed by the training
117 // pipeline.
118 TF_LITE_ENSURE(context, std::abs(input_product_scale - bias_scale) <=
119 1e-6 * std::min(input_product_scale, bias_scale));
120 TF_LITE_ENSURE(context, input_product_scale >= 0);
121
122 *multiplier = input_product_scale / output_scale;
123
124 return kTfLiteOk;
125 }
126
127 namespace {
CalculateActivationRangeQuantizedImpl(TfLiteFusedActivation activation,int32_t qmin,int32_t qmax,TfLiteTensor * output,int32_t * act_min,int32_t * act_max)128 void CalculateActivationRangeQuantizedImpl(TfLiteFusedActivation activation,
129 int32_t qmin, int32_t qmax,
130 TfLiteTensor* output,
131 int32_t* act_min, int32_t* act_max) {
132 const auto scale = output->params.scale;
133 const auto zero_point = output->params.zero_point;
134
135 auto quantize = [scale, zero_point](float f) {
136 return zero_point + static_cast<int32_t>(TfLiteRound(f / scale));
137 };
138
139 if (activation == kTfLiteActRelu) {
140 *act_min = std::max(qmin, quantize(0.0));
141 *act_max = qmax;
142 } else if (activation == kTfLiteActRelu6) {
143 *act_min = std::max(qmin, quantize(0.0));
144 *act_max = std::min(qmax, quantize(6.0));
145 } else if (activation == kTfLiteActRelu1) {
146 *act_min = std::max(qmin, quantize(-1.0));
147 *act_max = std::min(qmax, quantize(1.0));
148 } else {
149 *act_min = qmin;
150 *act_max = qmax;
151 }
152 }
153 } // namespace
154
CalculateActivationRangeQuantized(TfLiteContext * context,TfLiteFusedActivation activation,TfLiteTensor * output,int32_t * act_min,int32_t * act_max)155 TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context,
156 TfLiteFusedActivation activation,
157 TfLiteTensor* output,
158 int32_t* act_min,
159 int32_t* act_max) {
160 int32_t qmin = 0;
161 int32_t qmax = 0;
162 if (output->type == kTfLiteUInt8) {
163 qmin = std::numeric_limits<uint8_t>::min();
164 qmax = std::numeric_limits<uint8_t>::max();
165 } else if (output->type == kTfLiteInt8) {
166 qmin = std::numeric_limits<int8_t>::min();
167 qmax = std::numeric_limits<int8_t>::max();
168 } else if (output->type == kTfLiteInt16) {
169 qmin = std::numeric_limits<int16_t>::min();
170 qmax = std::numeric_limits<int16_t>::max();
171 } else {
172 TF_LITE_ENSURE(context, false);
173 }
174
175 CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min,
176 act_max);
177 return kTfLiteOk;
178 }
179
CalculateActivationRangeUint8(TfLiteFusedActivation activation,TfLiteTensor * output,int32_t * act_min,int32_t * act_max)180 void CalculateActivationRangeUint8(TfLiteFusedActivation activation,
181 TfLiteTensor* output, int32_t* act_min,
182 int32_t* act_max) {
183 const int32_t qmin = std::numeric_limits<uint8_t>::min();
184 const int32_t qmax = std::numeric_limits<uint8_t>::max();
185
186 CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min,
187 act_max);
188 }
189
CalculateActivationRangeInt8(TfLiteFusedActivation activation,TfLiteTensor * output,int32_t * act_min,int32_t * act_max)190 void CalculateActivationRangeInt8(TfLiteFusedActivation activation,
191 TfLiteTensor* output, int32_t* act_min,
192 int32_t* act_max) {
193 const int32_t qmin = std::numeric_limits<int8_t>::min();
194 const int32_t qmax = std::numeric_limits<int8_t>::max();
195
196 CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min,
197 act_max);
198 }
199
HaveSameShapes(const TfLiteTensor * input1,const TfLiteTensor * input2)200 bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) {
201 return TfLiteIntArrayEqual(input1->dims, input2->dims);
202 }
203
204 // TODO(petewarden): Having macros around this is ugly, look at other strategies
205 // before replicating this approach elsewhere.
206 #ifndef TF_LITE_STATIC_MEMORY
CalculateShapeForBroadcast(TfLiteContext * context,const TfLiteTensor * input1,const TfLiteTensor * input2,TfLiteIntArray ** output_shape)207 TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context,
208 const TfLiteTensor* input1,
209 const TfLiteTensor* input2,
210 TfLiteIntArray** output_shape) {
211 int64_t dims1 = NumDimensions(input1);
212 int64_t dims2 = NumDimensions(input2);
213 int64_t out_dims = std::max(dims1, dims2);
214 if (NumElements(input1) == 0) {
215 *output_shape = TfLiteIntArrayCopy(input1->dims);
216 return kTfLiteOk;
217 }
218 std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape(
219 TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree);
220 for (int i = 0; i < out_dims; ++i) {
221 int64_t d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1);
222 int64_t d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1);
223 TF_LITE_ENSURE(context, d1 == d2 || d1 == 1 || d2 == 1);
224 shape->data[out_dims - i - 1] = std::max(d1, d2);
225 }
226 *output_shape = shape.release();
227 return kTfLiteOk;
228 }
229 #endif // TF_LITE_STATIC_MEMORY
230
231 } // namespace tflite
232