1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/tools/optimize/quantization_utils.h"
16
17 #include <cmath>
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21
22 #include "absl/memory/memory.h"
23 #include "third_party/eigen3/Eigen/Core"
24 #include "tensorflow/lite/c/common.h"
25 #include "tensorflow/lite/core/api/error_reporter.h"
26 #include "tensorflow/lite/kernels/internal/quantization_util.h"
27 #include "tensorflow/lite/kernels/internal/round.h"
28 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
29 #include "tensorflow/lite/kernels/internal/types.h"
30 #include "tensorflow/lite/minimal_logging.h"
31 #include "tensorflow/lite/schema/schema_generated.h"
32 #include "tensorflow/lite/tools/optimize/model_utils.h"
33
34 namespace tflite {
35 namespace optimize {
36 namespace utils {
37
38 namespace {
39 const int8_t kMinQuantizedValue = -127;
40 const int8_t kMaxQuantizedValue = 127;
41 } // namespace
42
NumElements(const TensorT & tensor,uint64_t * num_elements)43 TfLiteStatus NumElements(const TensorT& tensor, uint64_t* num_elements) {
44 *num_elements = 1;
45 for (const int64_t dim : tensor.shape) {
46 if (dim <= 0 || *num_elements > UINT64_MAX / static_cast<uint64_t>(dim)) {
47 return kTfLiteError;
48 }
49 *num_elements *= dim;
50 }
51 return kTfLiteOk;
52 }
53
54 // Nudge min and max so that floating point 0 falls exactly on a quantized
55 // value, returning the nudges scale and zero_point.
56 //
57 // Although this code originates from FakeQuantization in quantized training,
58 // we may deviate from that implementation as we please since we do not fine
59 // tune the weights with quantized training.
GetAsymmetricQuantizationParams(float min,float max,const int quant_min,const int quant_max,QuantizationParametersT * quantization_params)60 void GetAsymmetricQuantizationParams(
61 float min, float max, const int quant_min, const int quant_max,
62 QuantizationParametersT* quantization_params) {
63 const float quant_min_float = static_cast<float>(quant_min);
64 const float quant_max_float = static_cast<float>(quant_max);
65 // Adjust the boundaries to guarantee 0 is included.
66 min = std::min(static_cast<float>(min), 0.0f);
67 max = std::max(static_cast<float>(max), 0.0f);
68 const float scale = (max - min) / (quant_max_float - quant_min_float);
69 // Scale can be zero if min and max are exactly 0.0f.
70 float zero_point_from_min = quant_min_float;
71 if (scale != 0) {
72 zero_point_from_min = quant_min_float - min / scale;
73 }
74 int64_t zero_point;
75 if (zero_point_from_min < quant_min_float) {
76 zero_point = static_cast<int64_t>(quant_min);
77 } else if (zero_point_from_min > quant_max_float) {
78 zero_point = static_cast<int64_t>(quant_max);
79 } else {
80 zero_point = static_cast<int64_t>(std::round(zero_point_from_min));
81 }
82 quantization_params->min = std::vector<float>(1, min);
83 quantization_params->max = std::vector<float>(1, max);
84 quantization_params->scale = std::vector<float>(1, scale);
85 quantization_params->zero_point = std::vector<int64_t>(1, zero_point);
86 }
87
88 // Set the max and min quantization parameter for a single tensor given its
89 // values.
FillSingleMinMax(const float * const input,const uint64_t input_size,QuantizationParametersT * quantization_params)90 void FillSingleMinMax(const float* const input, const uint64_t input_size,
91 QuantizationParametersT* quantization_params) {
92 const auto minmax = std::minmax_element(input, input + input_size);
93 quantization_params->min.assign(1, *minmax.first);
94 quantization_params->max.assign(1, *minmax.second);
95 }
96
FillPerChannelMinMax(const float * const input,const std::vector<int32_t> & dimension,int32_t channel_dim_index,QuantizationParametersT * quantization_params,ErrorReporter * error_reporter)97 TfLiteStatus FillPerChannelMinMax(const float* const input,
98 const std::vector<int32_t>& dimension,
99 int32_t channel_dim_index,
100 QuantizationParametersT* quantization_params,
101 ErrorReporter* error_reporter) {
102 if (!quantization_params->min.empty() || !quantization_params->max.empty()) {
103 error_reporter->Report(
104 "Min or max already present in tensor quantization params.");
105 return kTfLiteError;
106 }
107 if (dimension.size() != 4) {
108 error_reporter->Report("Expected tensor with four dimensions, but got %d.",
109 dimension.size());
110 return kTfLiteError;
111 }
112 if (channel_dim_index > 3) {
113 error_reporter->Report(
114 "Expected channel_dim_index to be less than four, but got %d.",
115 channel_dim_index);
116 return kTfLiteError;
117 }
118 const int32_t channel_dim_size = dimension[channel_dim_index];
119 quantization_params->quantized_dimension = channel_dim_index;
120 quantization_params->min = std::vector<float>(channel_dim_size);
121 quantization_params->max = std::vector<float>(channel_dim_size);
122 std::vector<bool> has_min_max_value(channel_dim_size, false);
123 int indices[4];
124 RuntimeShape tensor_dims{dimension[0], dimension[1], dimension[2],
125 dimension[3]};
126
127 // Compute min max ranges per channel
128 for (indices[0] = 0; indices[0] < dimension[0]; indices[0]++) {
129 for (indices[1] = 0; indices[1] < dimension[1]; indices[1]++) {
130 for (indices[2] = 0; indices[2] < dimension[2]; indices[2]++) {
131 for (indices[3] = 0; indices[3] < dimension[3]; indices[3]++) {
132 int channel_idx = indices[channel_dim_index];
133 const float val = input[Offset(tensor_dims, indices)];
134 if (has_min_max_value[channel_idx]) {
135 if (quantization_params->min[channel_idx] > val) {
136 quantization_params->min[channel_idx] = val;
137 } else if (quantization_params->max[channel_idx] < val) {
138 quantization_params->max[channel_idx] = val;
139 }
140 } else {
141 quantization_params->min[channel_idx] = val;
142 quantization_params->max[channel_idx] = val;
143 has_min_max_value[channel_idx] = true;
144 }
145 }
146 }
147 }
148 }
149 return kTfLiteOk;
150 }
151
152 // Populates the scales vector based on max and min values of quant_params
GetSymmetricScalesFromMaxMin(QuantizationParametersT * quant_params,std::vector<float> * scales,ErrorReporter * error_reporter)153 TfLiteStatus GetSymmetricScalesFromMaxMin(QuantizationParametersT* quant_params,
154 std::vector<float>* scales,
155 ErrorReporter* error_reporter) {
156 // Check that max and min values are present and their sizes match.
157 if (quant_params->min.empty() || quant_params->max.empty()) {
158 error_reporter->Report("Max and min values are not populated.");
159 return kTfLiteError;
160 }
161 if (quant_params->min.size() != quant_params->max.size()) {
162 error_reporter->Report("Dimensions of max and min values do not match.");
163 return kTfLiteError;
164 }
165 if (scales->size() != quant_params->min.size()) {
166 error_reporter->Report("Provided scale vector has incorrect size.");
167 return kTfLiteError;
168 }
169
170 // num_channels is calculated from min.size() to infer whether quantization
171 // is per axis.
172 int num_channels = quant_params->min.size();
173 // Calculate scales per channel.
174 for (int channel_idx = 0; channel_idx < num_channels; ++channel_idx) {
175 const float half_range = std::max(std::abs(quant_params->min[channel_idx]),
176 std::abs(quant_params->max[channel_idx]));
177 scales->at(channel_idx) = half_range / kMaxQuantizedValue;
178 }
179 return kTfLiteOk;
180 }
181
182 // Checks that the bias is quantized to within the middle half of the
183 // allowable bit range determined by the scales of the input and weight tensors.
184 // If this condition is not satisfied, the scale of the weights is increased in
185 // order to prevent overflow. The scale of the bias is not set here, only the
186 // min/max.
187 // The quant_params are the quantization parameters that correspond to the
188 // weight tensor.
AdjustWeightsForBiasScale(QuantizationParametersT * quant_params,const float * bias_data,const size_t bias_size,const float input_scale,ErrorReporter * error_reporter)189 TfLiteStatus AdjustWeightsForBiasScale(QuantizationParametersT* quant_params,
190 const float* bias_data,
191 const size_t bias_size,
192 const float input_scale,
193 ErrorReporter* error_reporter) {
194 // TODO(dmolitor) Allow adjusting activation scale.
195 // TODO(dmolitor) Tighten scale adjustment.
196 // TODO(dmolitor) Test using a separate strategy for scales of 0.
197 const int32_t kScale = std::numeric_limits<int32_t>::max();
198 if (quant_params == nullptr) {
199 error_reporter->Report("Missing max and min values for weight tensor.");
200 return kTfLiteError;
201 }
202 // channel_dim_size is calculated from min.size() to infer whether
203 // quantization is per axis
204 int channel_dim_size = quant_params->min.size();
205 if (channel_dim_size == 0) {
206 error_reporter->Report(
207 "Missing weight scales. Unable to check compatibility with bias "
208 "scale.");
209 return kTfLiteError;
210 }
211
212 std::vector<float> weight_scales(channel_dim_size);
213 TF_LITE_ENSURE_STATUS(GetSymmetricScalesFromMaxMin(
214 quant_params, &weight_scales, error_reporter));
215
216 // Per channel quantization
217 if (channel_dim_size > 1) {
218 for (size_t i = 0; i < channel_dim_size; ++i) {
219 // Current scale is not compatible with bias. Adjust max/min values.
220 if (std::abs(bias_data[i]) >=
221 0.5 * input_scale * weight_scales[i] * kScale) {
222 quant_params->max[i] = 2.0 * std::abs(bias_data[i]) / kScale *
223 (kMaxQuantizedValue / input_scale);
224 quant_params->min[i] = -quant_params->max[i];
225 }
226 }
227 // Per layer quantization
228 } else if (channel_dim_size == 1) {
229 const auto minmax = std::minmax_element(bias_data, bias_data + bias_size);
230 const float bias_half_range =
231 std::max(std::abs(*minmax.first), std::abs(*minmax.second));
232
233 // Need to adjust weight min/max; not compatible with bias.
234 if (bias_half_range / kScale >= 0.5 * input_scale * weight_scales[0]) {
235 quant_params->min[0] =
236 2.0 * bias_half_range / kScale * (kMinQuantizedValue / input_scale);
237 quant_params->max[0] =
238 2.0 * bias_half_range / kScale * (kMaxQuantizedValue / input_scale);
239 }
240 }
241 return kTfLiteOk;
242 }
243
244 // Per-channel quantize a tensor at the given index and fills both scales and
245 // quantized values.
SymmetricPerChannelQuantization(TensorT * tensor,const float * const input,int32_t channel_dim_index,std::vector<float> * output_scales,std::vector<int8_t> * output_value,ErrorReporter * error_reporter)246 TfLiteStatus SymmetricPerChannelQuantization(TensorT* tensor,
247 const float* const input,
248 int32_t channel_dim_index,
249 std::vector<float>* output_scales,
250 std::vector<int8_t>* output_value,
251 ErrorReporter* error_reporter) {
252 if (tensor == nullptr) {
253 error_reporter->Report("Cannot quantize. Tensor is null.");
254 return kTfLiteError;
255 }
256 const int32_t channel_dim_size = tensor->shape[channel_dim_index];
257 // Fill per channel max and min values if needed
258 if (tensor->quantization == nullptr) {
259 tensor->quantization = absl::make_unique<QuantizationParametersT>();
260 }
261 if (!HasMinMax(tensor)) {
262 TF_LITE_ENSURE_STATUS(
263 FillPerChannelMinMax(input, tensor->shape, channel_dim_index,
264 tensor->quantization.get(), error_reporter));
265 }
266
267 // Calculate scales per channel using max and min values from tensor.
268 std::vector<float> scale_invs(channel_dim_size);
269 const float half_scale = kMaxQuantizedValue;
270 for (int channel_idx = 0; channel_idx < channel_dim_size; channel_idx++) {
271 const float half_range =
272 std::max(std::abs(tensor->quantization->min[channel_idx]),
273 std::abs(tensor->quantization->max[channel_idx]));
274 output_scales->at(channel_idx) = half_range / half_scale;
275 if (half_range == 0) {
276 scale_invs[channel_idx] = 0;
277 } else {
278 scale_invs[channel_idx] = half_scale / half_range;
279 }
280 }
281
282 // Quantize the input values.
283 SymmetricPerChannelQuantizeValues(input, scale_invs, tensor->shape,
284 channel_dim_index, output_value);
285 return kTfLiteOk;
286 }
287
SymmetricQuantizeFloatsToInt16(ModelT * model,TensorT * tensor,float scaling_factor,ErrorReporter * error_reporter)288 TfLiteStatus SymmetricQuantizeFloatsToInt16(ModelT* model, TensorT* tensor,
289 float scaling_factor,
290 ErrorReporter* error_reporter) {
291 // Compute the inverse of scale.
292 const float scaling_factor_inv =
293 (scaling_factor == 0) ? 0 : 1.0 / scaling_factor;
294
295 const BufferT* buffer = model->buffers[tensor->buffer].get();
296 const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
297 uint64_t num_elements;
298 TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
299
300 std::vector<int16_t> final_buffer(num_elements);
301 const int32_t kScale = std::numeric_limits<int16_t>::max();
302
303 for (size_t i = 0; i < num_elements; i++) {
304 const int32_t quantized_value =
305 static_cast<int32_t>(TfLiteRound(float_data[i] * scaling_factor_inv));
306 final_buffer[i] = std::min(kScale, std::max(-kScale, quantized_value));
307 }
308
309 // Set the buffers and output type.
310 uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
311 size_t buffer_size = num_elements * sizeof(int16_t);
312 std::vector<float> scales(1, scaling_factor);
313 std::vector<int64_t> zero_points(1, 0);
314 return AddQuantizationParams(scales, zero_points, 0, uint8_buffer,
315 buffer_size, TensorType_INT16, model, tensor,
316 error_reporter);
317 }
318
SymmetricPerChannelQuantizeValues(const float * const input,const std::vector<float> & scales_inv,const std::vector<int32_t> & dimension,int32_t channel_dim_index,std::vector<int8_t> * output_value)319 void SymmetricPerChannelQuantizeValues(const float* const input,
320 const std::vector<float>& scales_inv,
321 const std::vector<int32_t>& dimension,
322 int32_t channel_dim_index,
323 std::vector<int8_t>* output_value) {
324 // Quantize the values.
325 int indices[4];
326 RuntimeShape tensor_dims{dimension[0], dimension[1], dimension[2],
327 dimension[3]};
328 for (indices[0] = 0; indices[0] < dimension[0]; indices[0]++) {
329 for (indices[1] = 0; indices[1] < dimension[1]; indices[1]++) {
330 for (indices[2] = 0; indices[2] < dimension[2]; indices[2]++) {
331 for (indices[3] = 0; indices[3] < dimension[3]; indices[3]++) {
332 int channel_idx = indices[channel_dim_index];
333 int index = Offset(tensor_dims, indices);
334 const float val = input[index];
335 const int32_t quantized_value =
336 static_cast<int32_t>(TfLiteRound(val * scales_inv[channel_idx]));
337 output_value->at(index) = std::min<int8_t>(
338 kMaxQuantizedValue,
339 std::max<int8_t>(kMinQuantizedValue, quantized_value));
340 }
341 }
342 }
343 }
344 }
345
346 // Quantize the tensor using the max and min values recorded in its quantization
347 // parameters. Applies per-layer quantization.
SymmetricQuantizeTensorFromMinMax(ModelT * model,TensorT * tensor,ErrorReporter * error_reporter)348 TfLiteStatus SymmetricQuantizeTensorFromMinMax(ModelT* model, TensorT* tensor,
349 ErrorReporter* error_reporter) {
350 if (model == nullptr || tensor == nullptr) {
351 error_reporter->Report("No tensor to quantize.");
352 return kTfLiteError;
353 }
354
355 BufferT* buffer = model->buffers[tensor->buffer].get();
356 if (buffer == nullptr) {
357 error_reporter->Report("Missing buffer.");
358 return kTfLiteError;
359 }
360
361 if (!HasMinMax(tensor)) {
362 error_reporter->Report("Missing min or max values for quantization.");
363 return kTfLiteError;
364 }
365 if (tensor->quantization->min.size() != 1 ||
366 tensor->quantization->max.size() != 1) {
367 error_reporter->Report("Expected single entry in max and min.");
368 return kTfLiteError;
369 }
370
371 const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
372 uint64_t num_elements;
373 TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
374
375 std::vector<int8_t> quantized_buffer;
376 quantized_buffer.resize(num_elements);
377
378 // Quantize tensor using recorded min and max values
379 float scaling_factor;
380 tensor_utils::SymmetricQuantizeFloats(
381 float_data, num_elements, quantized_buffer.data(),
382 tensor->quantization->min[0], tensor->quantization->max[0],
383 &scaling_factor);
384 tensor->quantization->scale = std::vector<float>(1, scaling_factor);
385 tensor->quantization->zero_point = std::vector<int64_t>(1, 0);
386
387 uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(quantized_buffer.data());
388 model->buffers[tensor->buffer]->data.assign(uint8_buffer,
389 uint8_buffer + num_elements);
390 // Update the tensor type.
391 tensor->type = TensorType_INT8;
392
393 return kTfLiteOk;
394 }
395
SymmetricQuantizeTensor(ModelT * model,TensorT * tensor)396 TfLiteStatus SymmetricQuantizeTensor(ModelT* model, TensorT* tensor) {
397 if (model == nullptr || tensor == nullptr) {
398 TFLITE_LOG(TFLITE_LOG_ERROR, "No tensor to quantize.");
399 return kTfLiteError;
400 }
401
402 BufferT* buffer = model->buffers[tensor->buffer].get();
403 if (buffer == nullptr) {
404 TFLITE_LOG(TFLITE_LOG_ERROR, "Missing buffer.");
405 return kTfLiteError;
406 }
407 const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
408 uint64_t num_elements;
409 TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
410
411 std::vector<int8_t> quantized_buffer;
412 quantized_buffer.resize(num_elements);
413
414 float min_value, max_value, scaling_factor;
415 tensor_utils::SymmetricQuantizeFloats(float_data, num_elements,
416 quantized_buffer.data(), &min_value,
417 &max_value, &scaling_factor);
418
419 if (tensor->quantization == nullptr) {
420 tensor->quantization = absl::make_unique<QuantizationParametersT>();
421 }
422 tensor->quantization->scale = std::vector<float>(1, scaling_factor);
423 tensor->quantization->zero_point = std::vector<int64_t>(1, 0);
424
425 uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(quantized_buffer.data());
426 model->buffers[tensor->buffer]->data.assign(uint8_buffer,
427 uint8_buffer + num_elements);
428
429 // Update the tensor type.
430 tensor->type = TensorType_INT8;
431
432 return kTfLiteOk;
433 }
434
QuantizeTensorFloat16(ModelT * model,TensorT * tensor)435 TfLiteStatus QuantizeTensorFloat16(ModelT* model, TensorT* tensor) {
436 if (model == nullptr || tensor == nullptr) {
437 TFLITE_LOG(TFLITE_LOG_ERROR, "No tensor to quantize.");
438 return kTfLiteError;
439 }
440
441 BufferT* buffer = model->buffers[tensor->buffer].get();
442 if (buffer == nullptr) {
443 TFLITE_LOG(TFLITE_LOG_ERROR, "Missing buffer.");
444 return kTfLiteError;
445 }
446
447 uint64_t num_elements;
448 TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
449
450 // Copy single byte buffer data to float vector to guard against misalignment.
451 std::vector<float> float_vector(num_elements);
452 uint8_t* first = buffer->data.data();
453 std::copy(first, first + buffer->data.size(),
454 reinterpret_cast<uint8_t*>(float_vector.data()));
455
456 // Transform float data to float16.
457 std::vector<Eigen::half> quantized_buffer;
458 quantized_buffer.resize(num_elements);
459 std::transform(
460 float_vector.begin(), float_vector.end(), quantized_buffer.begin(),
461 [](float a) { return Eigen::half_impl::float_to_half_rtne(a); });
462
463 char* half_buffer = reinterpret_cast<char*>(quantized_buffer.data());
464 model->buffers[tensor->buffer]->data.assign(
465 half_buffer, half_buffer + sizeof(Eigen::half) * num_elements);
466
467 // Update the tensor type.
468 tensor->type = TensorType_FLOAT16;
469
470 return kTfLiteOk;
471 }
472
AddQuantizationParams(const std::vector<float> & scales,const std::vector<int64_t> & zero_point,int quantized_dimension,const uint8_t * buffer_data,size_t buffer_size,TensorType output_type,ModelT * model,TensorT * tensor,ErrorReporter * error_reporter)473 TfLiteStatus AddQuantizationParams(const std::vector<float>& scales,
474 const std::vector<int64_t>& zero_point,
475 int quantized_dimension,
476 const uint8_t* buffer_data,
477 size_t buffer_size, TensorType output_type,
478 ModelT* model, TensorT* tensor,
479 ErrorReporter* error_reporter) {
480 if (tensor->quantization == nullptr) {
481 tensor->quantization = absl::make_unique<QuantizationParametersT>();
482 }
483 tensor->quantization->scale.assign(scales.begin(), scales.end());
484 if (zero_point.size() != scales.size()) {
485 error_reporter->Report(
486 "Received zero_point of size %d and scales of size %d. "
487 "These sizes should match.",
488 zero_point.size(), scales.size());
489 return kTfLiteError;
490 }
491 tensor->quantization->zero_point.assign(zero_point.begin(), zero_point.end());
492 tensor->quantization->quantized_dimension = quantized_dimension;
493 model->buffers[tensor->buffer]->data.assign(buffer_data,
494 buffer_data + buffer_size);
495 // Update the tensor type.
496 tensor->type = output_type;
497 return kTfLiteOk;
498 }
499
SymmetricQuantizeTensorPerChannel(ModelT * model,TensorT * tensor,int32_t channel_dim_index,ErrorReporter * error_reporter)500 TfLiteStatus SymmetricQuantizeTensorPerChannel(ModelT* model, TensorT* tensor,
501 int32_t channel_dim_index,
502 ErrorReporter* error_reporter) {
503 if (tensor->shape.size() != 4) {
504 error_reporter->Report(
505 "SymmetricQuantizeTensorPerChannel requires tensor with four "
506 "dimensions, but got %d dimension(s).",
507 tensor->shape.size());
508 return kTfLiteError;
509 }
510
511 // Get dimensions.
512 uint64_t num_elements;
513 TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
514 const int32_t channel_dim_size = tensor->shape[channel_dim_index];
515
516 // Get input float data.
517 const BufferT* buffer = model->buffers[tensor->buffer].get();
518 const float* float_input_data =
519 reinterpret_cast<const float*>(buffer->data.data());
520
521 // Create container for output scale and output data.
522 std::vector<float> scales(channel_dim_size);
523 std::vector<int8_t> final_buffer(num_elements);
524
525 // Quantize the input data with respect to channel_dim_index.
526 TF_LITE_ENSURE_STATUS(SymmetricPerChannelQuantization(
527 tensor, float_input_data, channel_dim_index, &scales, &final_buffer,
528 error_reporter));
529
530 // Set the buffers and output type.
531 uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
532 const size_t buffer_size = num_elements * sizeof(int8_t);
533 std::vector<int64_t> zero_point(scales.size(), 0);
534 return AddQuantizationParams(scales, zero_point, channel_dim_index,
535 uint8_buffer, buffer_size, TensorType_INT8,
536 model, tensor, error_reporter);
537 }
538
SymmetricPerLayerBiasQuantize(ModelT * model,TensorT * tensor,float scaling_factor,ErrorReporter * error_reporter)539 TfLiteStatus SymmetricPerLayerBiasQuantize(ModelT* model, TensorT* tensor,
540 float scaling_factor,
541 ErrorReporter* error_reporter) {
542 // Compute the inverse of scale.
543 const float scaling_factor_inv =
544 (scaling_factor == 0) ? 0 : 1.0 / scaling_factor;
545
546 const BufferT* buffer = model->buffers[tensor->buffer].get();
547 const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
548 uint64_t num_elements;
549 TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
550
551 std::vector<int32_t> final_buffer(num_elements);
552 const int32_t kScale = std::numeric_limits<int32_t>::max();
553
554 for (size_t i = 0; i < num_elements; i++) {
555 const int32_t quantized_value = tflite::SafeCast<int32_t>(
556 TfLiteRound(float_data[i] * scaling_factor_inv));
557 final_buffer[i] = std::min(kScale, std::max(-kScale, quantized_value));
558 }
559
560 // Set the buffers and output type.
561 uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
562 size_t buffer_size = num_elements * sizeof(int32_t);
563 std::vector<float> scales(1, scaling_factor);
564 std::vector<int64_t> zero_points(1, 0);
565 return AddQuantizationParams(scales, zero_points, 0, uint8_buffer,
566 buffer_size, TensorType_INT32, model, tensor,
567 error_reporter);
568 }
569
SymmetricPerChannelBiasQuantize(ModelT * model,TensorT * tensor,float input_scale,const float * weight_scales,int number_of_dimension,ErrorReporter * error_reporter)570 TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor,
571 float input_scale,
572 const float* weight_scales,
573 int number_of_dimension,
574 ErrorReporter* error_reporter) {
575 // Compute scales.
576 std::vector<float> scales(number_of_dimension);
577 for (size_t i = 0; i < number_of_dimension; i++) {
578 scales[i] = input_scale * weight_scales[i];
579 }
580
581 const BufferT* buffer = model->buffers[tensor->buffer].get();
582 const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
583 uint64_t num_elements;
584 TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
585
586 std::vector<int32_t> final_buffer(num_elements);
587 const int32_t kScale = std::numeric_limits<int32_t>::max();
588
589 for (int32_t channel_idx = 0; channel_idx < number_of_dimension;
590 channel_idx++) {
591 float scaling_factor = scales[channel_idx];
592 float scaling_factor_inv = (scaling_factor == 0) ? 0 : 1.0 / scaling_factor;
593 const int32_t quantized_value = tflite::SafeCast<int32_t>(
594 TfLiteRound(float_data[channel_idx] * scaling_factor_inv));
595 final_buffer[channel_idx] =
596 std::min(kScale, std::max(-kScale, quantized_value));
597 }
598
599 // Set the buffers and output type.
600 uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
601 size_t buffer_size = num_elements * sizeof(int32_t);
602 std::vector<int64_t> zero_point(scales.size(), 0);
603 return AddQuantizationParams(scales, zero_point, 0, uint8_buffer, buffer_size,
604 TensorType_INT32, model, tensor, error_reporter);
605 }
606
QuantizeWeight(ModelT * model,TensorT * tensor,bool per_channel,int per_axis_index,ErrorReporter * error_reporter)607 TfLiteStatus QuantizeWeight(ModelT* model, TensorT* tensor, bool per_channel,
608 int per_axis_index, ErrorReporter* error_reporter) {
609 // TODO(suharshs): Currently we conflate quantizing weights and constants. Its
610 // possible that the right thing to do is asymmetric quantize the weight. Add
611 // support for this.
612 if (per_channel) {
613 return SymmetricQuantizeTensorPerChannel(model, tensor, per_axis_index,
614 error_reporter);
615 } else if (HasMinMax(tensor)) {
616 // Quantize using recorded min/max values.
617 return SymmetricQuantizeTensorFromMinMax(model, tensor, error_reporter);
618 } else {
619 // Quantize using min/max from buffer.
620 return SymmetricQuantizeTensor(model, tensor);
621 }
622 }
623
GetEffectiveScale(ModelT * model,SubGraphT * subgraph,int op_idx,std::vector<int> input_index,std::vector<int> intermediate_index,std::vector<float> factors)624 float GetEffectiveScale(ModelT* model, SubGraphT* subgraph, int op_idx,
625 std::vector<int> input_index,
626 std::vector<int> intermediate_index,
627 std::vector<float> factors) {
628 float scale = 1.0f;
629 OperatorT* op = subgraph->operators[op_idx].get();
630 for (int i = 0; i < input_index.size(); ++i) {
631 const int index_local = input_index[i];
632 const int index_global = op->inputs[index_local];
633 const TensorT* tensor = subgraph->tensors[index_global].get();
634 scale *= tensor->quantization->scale[0];
635 }
636 for (int i = 0; i < intermediate_index.size(); ++i) {
637 const int index_local = intermediate_index[i];
638 const int index_global = op->intermediates[index_local];
639 const TensorT* tensor = subgraph->tensors[index_global].get();
640 scale *= tensor->quantization->scale[0];
641 }
642 for (int i = 0; i < factors.size(); ++i) {
643 scale *= factors[i];
644 }
645 return scale;
646 }
647
QuantizeActivation(TensorT * tensor)648 void QuantizeActivation(TensorT* tensor) {
649 GetAsymmetricQuantizationParams(
650 tensor->quantization->min[0], tensor->quantization->max[0],
651 std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max(),
652 tensor->quantization.get());
653 tensor->type = TensorType_INT8;
654 }
655
QuantizeActivationToInt16(TensorT * tensor,float scale)656 TfLiteStatus QuantizeActivationToInt16(TensorT* tensor, float scale) {
657 const int32 zero_point = 0;
658 tensor->quantization = absl::make_unique<QuantizationParametersT>();
659 tensor->quantization->scale.push_back(scale);
660 tensor->quantization->zero_point.push_back(zero_point);
661 tensor->type = TensorType_INT16;
662 return kTfLiteOk;
663 }
664
GetPowerOfTwoScale(float min,float max)665 int GetPowerOfTwoScale(float min, float max) {
666 const float range = std::max(std::abs(min), std::abs(max));
667 int pot = 0;
668 for (int i = 0; i < 10; i++) {
669 if (std::pow(2, pot) < range) {
670 pot++;
671 }
672 }
673 return pot;
674 }
675
676 } // namespace utils
677 } // namespace optimize
678 } // namespace tflite
679