1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/tools/optimize/quantization_utils.h"
16
17 #include <cmath>
18 #include <cstdint>
19 #include <iostream>
20 #include <memory>
21 #include <string>
22
23 #include "absl/memory/memory.h"
24 #include "third_party/eigen3/Eigen/Core"
25 #include "tensorflow/lite/c/common.h"
26 #include "tensorflow/lite/core/api/error_reporter.h"
27 #include "tensorflow/lite/kernels/internal/cppmath.h"
28 #include "tensorflow/lite/kernels/internal/quantization_util.h"
29 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
30 #include "tensorflow/lite/kernels/internal/types.h"
31 #include "tensorflow/lite/minimal_logging.h"
32 #include "tensorflow/lite/schema/schema_generated.h"
33 #include "tensorflow/lite/tools/optimize/model_utils.h"
34
35 namespace tflite {
36 namespace optimize {
37 namespace utils {
38
39 namespace {
40 const int8_t kMinQuantizedValue = -127;
41 const int8_t kMaxQuantizedValue = 127;
42 } // namespace
43
NumElements(const TensorT & tensor,uint64_t * num_elements)44 TfLiteStatus NumElements(const TensorT& tensor, uint64_t* num_elements) {
45 *num_elements = 1;
46 for (const int64_t dim : tensor.shape) {
47 if (dim <= 0 || *num_elements > UINT64_MAX / static_cast<uint64_t>(dim)) {
48 return kTfLiteError;
49 }
50 *num_elements *= dim;
51 }
52 return kTfLiteOk;
53 }
54
55 // Nudge min and max so that floating point 0 falls exactly on a quantized
56 // value, returning the nudges scale and zero_point.
57 //
58 // Although this code originates from FakeQuantization in quantized training,
59 // we may deviate from that implementation as we please since we do not fine
60 // tune the weights with quantized training.
GetAsymmetricQuantizationParams(float min,float max,const int quant_min,const int quant_max,QuantizationParametersT * quantization_params)61 void GetAsymmetricQuantizationParams(
62 float min, float max, const int quant_min, const int quant_max,
63 QuantizationParametersT* quantization_params) {
64 const float quant_min_float = static_cast<float>(quant_min);
65 const float quant_max_float = static_cast<float>(quant_max);
66 // Adjust the boundaries to guarantee 0 is included.
67 min = std::min(static_cast<float>(min), 0.0f);
68 max = std::max(static_cast<float>(max), 0.0f);
69 const float scale = (max - min) / (quant_max_float - quant_min_float);
70 // Scale can be zero if min and max are exactly 0.0f.
71 float zero_point_from_min = quant_min_float;
72 if (scale != 0) {
73 zero_point_from_min = quant_min_float - min / scale;
74 }
75 int64_t zero_point;
76 if (zero_point_from_min < quant_min_float) {
77 zero_point = static_cast<int64_t>(quant_min);
78 } else if (zero_point_from_min > quant_max_float) {
79 zero_point = static_cast<int64_t>(quant_max);
80 } else {
81 zero_point = static_cast<int64_t>(std::round(zero_point_from_min));
82 }
83 quantization_params->min = std::vector<float>(1, min);
84 quantization_params->max = std::vector<float>(1, max);
85 quantization_params->scale = std::vector<float>(1, scale);
86 quantization_params->zero_point = std::vector<int64_t>(1, zero_point);
87 }
88
GetSymmetricQuantizationParams(float min,float max,const int half_quant_range,QuantizationParametersT * quantization_params)89 void GetSymmetricQuantizationParams(
90 float min, float max, const int half_quant_range,
91 QuantizationParametersT* quantization_params) {
92 // Adjust the boundaries to guarantee 0 is included.
93 min = std::min(min, 0.0f);
94 max = std::max(max, 0.0f);
95 const float scale = std::max(std::abs(max), std::abs(min)) / half_quant_range;
96 quantization_params->min = std::vector<float>(1, min);
97 quantization_params->max = std::vector<float>(1, max);
98 quantization_params->scale = std::vector<float>(1, scale);
99 quantization_params->zero_point = std::vector<int64_t>(1, 0);
100 }
101
GetQuantizationParams(TensorT * tensor,TensorType activations_type,QuantizationParametersT * quantization_params,ErrorReporter * error_reporter)102 TfLiteStatus GetQuantizationParams(TensorT* tensor, TensorType activations_type,
103 QuantizationParametersT* quantization_params,
104 ErrorReporter* error_reporter) {
105 if (activations_type == TensorType_INT8) {
106 GetAsymmetricQuantizationParams(
107 tensor->quantization->min[0], tensor->quantization->max[0],
108 std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max(),
109 quantization_params);
110 } else if (activations_type == TensorType_INT16) {
111 const int half_quantized_range = 32767;
112 GetSymmetricQuantizationParams(tensor->quantization->min[0],
113 tensor->quantization->max[0],
114 half_quantized_range, quantization_params);
115 } else {
116 TF_LITE_REPORT_ERROR(
117 error_reporter,
118 "Unsupported activation type for quantize-activation: %d",
119 activations_type);
120 return kTfLiteError;
121 }
122 return kTfLiteOk;
123 }
124
125 // Set the max and min quantization parameter for a single tensor given its
126 // values.
FillSingleMinMax(const float * const input,const uint64_t input_size,QuantizationParametersT * quantization_params)127 void FillSingleMinMax(const float* const input, const uint64_t input_size,
128 QuantizationParametersT* quantization_params) {
129 const auto minmax = std::minmax_element(input, input + input_size);
130 quantization_params->min.assign(1, *minmax.first);
131 quantization_params->max.assign(1, *minmax.second);
132 }
133
FillPerChannelMinMax(const float * const input,const std::vector<int32_t> & dimension,int32_t channel_dim_index,QuantizationParametersT * quantization_params,ErrorReporter * error_reporter)134 TfLiteStatus FillPerChannelMinMax(const float* const input,
135 const std::vector<int32_t>& dimension,
136 int32_t channel_dim_index,
137 QuantizationParametersT* quantization_params,
138 ErrorReporter* error_reporter) {
139 if (!quantization_params->min.empty() || !quantization_params->max.empty()) {
140 TF_LITE_REPORT_ERROR(
141 error_reporter,
142 "Min or max already present in tensor quantization params.");
143 return kTfLiteError;
144 }
145 if (dimension.size() != 4) {
146 TF_LITE_REPORT_ERROR(error_reporter,
147 "Expected tensor with four dimensions, but got %d.",
148 dimension.size());
149 return kTfLiteError;
150 }
151 if (channel_dim_index > 3) {
152 TF_LITE_REPORT_ERROR(
153 error_reporter,
154 "Expected channel_dim_index to be less than four, but got %d.",
155 channel_dim_index);
156 return kTfLiteError;
157 }
158 const int32_t channel_dim_size = dimension[channel_dim_index];
159 quantization_params->quantized_dimension = channel_dim_index;
160 quantization_params->min = std::vector<float>(channel_dim_size);
161 quantization_params->max = std::vector<float>(channel_dim_size);
162 std::vector<bool> has_min_max_value(channel_dim_size, false);
163 int indices[4];
164 RuntimeShape tensor_dims{dimension[0], dimension[1], dimension[2],
165 dimension[3]};
166
167 // Compute min max ranges per channel
168 for (indices[0] = 0; indices[0] < dimension[0]; indices[0]++) {
169 for (indices[1] = 0; indices[1] < dimension[1]; indices[1]++) {
170 for (indices[2] = 0; indices[2] < dimension[2]; indices[2]++) {
171 for (indices[3] = 0; indices[3] < dimension[3]; indices[3]++) {
172 int channel_idx = indices[channel_dim_index];
173 const float val = input[Offset(tensor_dims, indices)];
174 if (has_min_max_value[channel_idx]) {
175 if (quantization_params->min[channel_idx] > val) {
176 quantization_params->min[channel_idx] = val;
177 } else if (quantization_params->max[channel_idx] < val) {
178 quantization_params->max[channel_idx] = val;
179 }
180 } else {
181 quantization_params->min[channel_idx] = val;
182 quantization_params->max[channel_idx] = val;
183 has_min_max_value[channel_idx] = true;
184 }
185 }
186 }
187 }
188 }
189 return kTfLiteOk;
190 }
191
192 // Populates the scales vector based on max and min values of quant_params
GetSymmetricScalesFromMaxMin(QuantizationParametersT * quant_params,std::vector<float> * scales,ErrorReporter * error_reporter)193 TfLiteStatus GetSymmetricScalesFromMaxMin(QuantizationParametersT* quant_params,
194 std::vector<float>* scales,
195 ErrorReporter* error_reporter) {
196 // Check that max and min values are present and their sizes match.
197 if (quant_params->min.empty() || quant_params->max.empty()) {
198 TF_LITE_REPORT_ERROR(error_reporter,
199 "Max and min values are not populated.");
200 return kTfLiteError;
201 }
202 if (quant_params->min.size() != quant_params->max.size()) {
203 TF_LITE_REPORT_ERROR(error_reporter,
204 "Dimensions of max and min values do not match.");
205 return kTfLiteError;
206 }
207 if (scales->size() != quant_params->min.size()) {
208 TF_LITE_REPORT_ERROR(error_reporter,
209 "Provided scale vector has incorrect size.");
210 return kTfLiteError;
211 }
212
213 // num_channels is calculated from min.size() to infer whether quantization
214 // is per axis.
215 int num_channels = quant_params->min.size();
216 // Calculate scales per channel.
217 for (int channel_idx = 0; channel_idx < num_channels; ++channel_idx) {
218 const float half_range = std::max(std::abs(quant_params->min[channel_idx]),
219 std::abs(quant_params->max[channel_idx]));
220 scales->at(channel_idx) = half_range / kMaxQuantizedValue;
221 }
222 return kTfLiteOk;
223 }
224
225 // Checks that the bias is quantized to within the middle half of the
226 // allowable bit range determined by the scales of the input and weight tensors.
227 // If this condition is not satisfied, the scale of the weights is increased in
228 // order to prevent overflow. The scale of the bias is not set here, only the
229 // min/max.
230 // The quant_params are the quantization parameters that correspond to the
231 // weight tensor.
AdjustWeightsForBiasScale(QuantizationParametersT * quant_params,const float * bias_data,const size_t bias_size,const float input_scale,ErrorReporter * error_reporter)232 TfLiteStatus AdjustWeightsForBiasScale(QuantizationParametersT* quant_params,
233 const float* bias_data,
234 const size_t bias_size,
235 const float input_scale,
236 ErrorReporter* error_reporter) {
237 // TODO(dmolitor) Allow adjusting activation scale.
238 // TODO(dmolitor) Tighten scale adjustment.
239 // TODO(dmolitor) Test using a separate strategy for scales of 0.
240 const int32_t kScale = std::numeric_limits<int32_t>::max();
241 if (quant_params == nullptr) {
242 TF_LITE_REPORT_ERROR(error_reporter,
243 "Missing max and min values for weight tensor.");
244 return kTfLiteError;
245 }
246 // channel_dim_size is calculated from min.size() to infer whether
247 // quantization is per axis
248 int channel_dim_size = quant_params->min.size();
249 if (channel_dim_size == 0) {
250 TF_LITE_REPORT_ERROR(
251 error_reporter,
252 "Missing weight scales. Unable to check compatibility with bias "
253 "scale.");
254 return kTfLiteError;
255 }
256
257 std::vector<float> weight_scales(channel_dim_size);
258 TF_LITE_ENSURE_STATUS(GetSymmetricScalesFromMaxMin(
259 quant_params, &weight_scales, error_reporter));
260
261 // Per channel quantization
262 if (channel_dim_size > 1) {
263 for (int i = 0; i < channel_dim_size; ++i) {
264 // Current scale is not compatible with bias. Adjust max/min values.
265 if (std::abs(bias_data[i]) >=
266 0.5 * input_scale * weight_scales[i] * kScale) {
267 quant_params->max[i] = 2.0 * std::abs(bias_data[i]) / kScale *
268 (kMaxQuantizedValue / input_scale);
269 quant_params->min[i] = -quant_params->max[i];
270 }
271 }
272 // Per layer quantization
273 } else if (channel_dim_size == 1) {
274 const auto minmax = std::minmax_element(bias_data, bias_data + bias_size);
275 const float bias_half_range =
276 std::max(std::abs(*minmax.first), std::abs(*minmax.second));
277
278 // Need to adjust weight min/max; not compatible with bias.
279 if (bias_half_range / kScale >= 0.5 * input_scale * weight_scales[0]) {
280 quant_params->min[0] =
281 2.0 * bias_half_range / kScale * (kMinQuantizedValue / input_scale);
282 quant_params->max[0] =
283 2.0 * bias_half_range / kScale * (kMaxQuantizedValue / input_scale);
284 }
285 }
286 return kTfLiteOk;
287 }
288
289 // Per-channel quantize a tensor at the given index and fills both scales and
290 // quantized values.
SymmetricPerChannelQuantization(TensorT * tensor,const float * const input,int32_t channel_dim_index,std::vector<float> * output_scales,std::vector<int8_t> * output_value,ErrorReporter * error_reporter)291 TfLiteStatus SymmetricPerChannelQuantization(TensorT* tensor,
292 const float* const input,
293 int32_t channel_dim_index,
294 std::vector<float>* output_scales,
295 std::vector<int8_t>* output_value,
296 ErrorReporter* error_reporter) {
297 if (tensor == nullptr) {
298 TF_LITE_REPORT_ERROR(error_reporter, "Cannot quantize. Tensor is null.");
299 return kTfLiteError;
300 }
301 const int32_t channel_dim_size = tensor->shape[channel_dim_index];
302 // Fill per channel max and min values if needed
303 if (tensor->quantization == nullptr) {
304 tensor->quantization = absl::make_unique<QuantizationParametersT>();
305 }
306 if (!HasMinMax(tensor)) {
307 TF_LITE_ENSURE_STATUS(
308 FillPerChannelMinMax(input, tensor->shape, channel_dim_index,
309 tensor->quantization.get(), error_reporter));
310 }
311
312 // Calculate scales per channel using max and min values from tensor.
313 std::vector<float> scale_invs(channel_dim_size);
314 const float half_scale = kMaxQuantizedValue;
315 for (int channel_idx = 0; channel_idx < channel_dim_size; channel_idx++) {
316 const float half_range =
317 std::max(std::abs(tensor->quantization->min[channel_idx]),
318 std::abs(tensor->quantization->max[channel_idx]));
319 output_scales->at(channel_idx) = half_range / half_scale;
320 if (half_range == 0) {
321 scale_invs[channel_idx] = 0;
322 } else {
323 scale_invs[channel_idx] = half_scale / half_range;
324 }
325 }
326
327 // Quantize the input values.
328 SymmetricPerChannelQuantizeValues(input, scale_invs, tensor->shape,
329 channel_dim_index, output_value);
330 return kTfLiteOk;
331 }
332
SymmetricQuantizeFloatsToInt16(const float * data,uint64_t num_elements,float scaling_factor)333 std::vector<int16_t> SymmetricQuantizeFloatsToInt16(const float* data,
334 uint64_t num_elements,
335 float scaling_factor) {
336 // Compute the inverse of scale.
337 const float scaling_factor_inv =
338 (scaling_factor == 0) ? 0 : 1.0 / scaling_factor;
339 std::vector<int16_t> buffer(num_elements);
340 const int32_t kScale = std::numeric_limits<int16_t>::max();
341
342 for (size_t i = 0; i < num_elements; i++) {
343 const int32_t quantized_value =
344 static_cast<int32_t>(TfLiteRound(data[i] * scaling_factor_inv));
345 buffer[i] = std::min(kScale, std::max(-kScale, quantized_value));
346 }
347 return buffer;
348 }
349
SymmetricQuantizeFloatsToInt16(ModelT * model,TensorT * tensor,float scaling_factor,ErrorReporter * error_reporter)350 TfLiteStatus SymmetricQuantizeFloatsToInt16(ModelT* model, TensorT* tensor,
351 float scaling_factor,
352 ErrorReporter* error_reporter) {
353 const BufferT* buffer = model->buffers[tensor->buffer].get();
354 const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
355 uint64_t num_elements;
356 TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
357
358 auto final_buffer =
359 SymmetricQuantizeFloatsToInt16(float_data, num_elements, scaling_factor);
360 // Set the buffers and output type.
361 uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
362 size_t buffer_size = num_elements * sizeof(int16_t);
363 std::vector<float> scales(1, scaling_factor);
364 std::vector<int64_t> zero_points(1, 0);
365 return AddQuantizationParams(scales, zero_points, 0, uint8_buffer,
366 buffer_size, TensorType_INT16, model, tensor,
367 error_reporter);
368 }
369
SymmetricPerChannelQuantizeValues(const float * const input,const std::vector<float> & scales_inv,const std::vector<int32_t> & dimension,int32_t channel_dim_index,std::vector<int8_t> * output_value)370 void SymmetricPerChannelQuantizeValues(const float* const input,
371 const std::vector<float>& scales_inv,
372 const std::vector<int32_t>& dimension,
373 int32_t channel_dim_index,
374 std::vector<int8_t>* output_value) {
375 // Quantize the values.
376 int indices[4];
377 RuntimeShape tensor_dims{dimension[0], dimension[1], dimension[2],
378 dimension[3]};
379 for (indices[0] = 0; indices[0] < dimension[0]; indices[0]++) {
380 for (indices[1] = 0; indices[1] < dimension[1]; indices[1]++) {
381 for (indices[2] = 0; indices[2] < dimension[2]; indices[2]++) {
382 for (indices[3] = 0; indices[3] < dimension[3]; indices[3]++) {
383 int channel_idx = indices[channel_dim_index];
384 int index = Offset(tensor_dims, indices);
385 const float val = input[index];
386 const int32_t quantized_value =
387 static_cast<int32_t>(TfLiteRound(val * scales_inv[channel_idx]));
388 output_value->at(index) = std::min<int8_t>(
389 kMaxQuantizedValue,
390 std::max<int8_t>(kMinQuantizedValue, quantized_value));
391 }
392 }
393 }
394 }
395 }
396
397 // Quantize the tensor using the max and min values recorded in its quantization
398 // parameters. Applies per-layer quantization.
SymmetricQuantizeTensorFromMinMax(ModelT * model,TensorT * tensor,ErrorReporter * error_reporter)399 TfLiteStatus SymmetricQuantizeTensorFromMinMax(ModelT* model, TensorT* tensor,
400 ErrorReporter* error_reporter) {
401 if (model == nullptr || tensor == nullptr) {
402 TF_LITE_REPORT_ERROR(error_reporter, "No tensor to quantize.");
403 return kTfLiteError;
404 }
405
406 BufferT* buffer = model->buffers[tensor->buffer].get();
407 if (buffer == nullptr) {
408 TF_LITE_REPORT_ERROR(error_reporter, "Missing buffer.");
409 return kTfLiteError;
410 }
411
412 if (!HasMinMax(tensor)) {
413 TF_LITE_REPORT_ERROR(error_reporter,
414 "Missing min or max values for quantization.");
415 return kTfLiteError;
416 }
417 if (tensor->quantization->min.size() != 1 ||
418 tensor->quantization->max.size() != 1) {
419 TF_LITE_REPORT_ERROR(error_reporter,
420 "Expected single entry in max and min.");
421 return kTfLiteError;
422 }
423
424 const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
425 uint64_t num_elements;
426 TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
427
428 std::vector<int8_t> quantized_buffer;
429 quantized_buffer.resize(num_elements);
430
431 // Quantize tensor using recorded min and max values
432 float scaling_factor;
433 tensor_utils::SymmetricQuantizeFloats(
434 float_data, num_elements, quantized_buffer.data(),
435 tensor->quantization->min[0], tensor->quantization->max[0],
436 &scaling_factor);
437 tensor->quantization->scale = std::vector<float>(1, scaling_factor);
438 tensor->quantization->zero_point = std::vector<int64_t>(1, 0);
439
440 uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(quantized_buffer.data());
441 model->buffers[tensor->buffer]->data.assign(uint8_buffer,
442 uint8_buffer + num_elements);
443 // Update the tensor type.
444 tensor->type = TensorType_INT8;
445
446 return kTfLiteOk;
447 }
448
SymmetricQuantizeTensor(ModelT * model,TensorT * tensor)449 TfLiteStatus SymmetricQuantizeTensor(ModelT* model, TensorT* tensor) {
450 if (model == nullptr || tensor == nullptr) {
451 TFLITE_LOG(TFLITE_LOG_ERROR, "No tensor to quantize.");
452 return kTfLiteError;
453 }
454
455 BufferT* buffer = model->buffers[tensor->buffer].get();
456 if (buffer == nullptr) {
457 TFLITE_LOG(TFLITE_LOG_ERROR, "Missing buffer.");
458 return kTfLiteError;
459 }
460 const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
461 uint64_t num_elements;
462 TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
463
464 std::vector<int8_t> quantized_buffer;
465 quantized_buffer.resize(num_elements);
466
467 float min_value, max_value, scaling_factor;
468 tensor_utils::SymmetricQuantizeFloats(float_data, num_elements,
469 quantized_buffer.data(), &min_value,
470 &max_value, &scaling_factor);
471
472 if (tensor->quantization == nullptr) {
473 tensor->quantization = absl::make_unique<QuantizationParametersT>();
474 }
475 tensor->quantization->scale = std::vector<float>(1, scaling_factor);
476 tensor->quantization->zero_point = std::vector<int64_t>(1, 0);
477
478 uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(quantized_buffer.data());
479 model->buffers[tensor->buffer]->data.assign(uint8_buffer,
480 uint8_buffer + num_elements);
481
482 // Update the tensor type.
483 tensor->type = TensorType_INT8;
484
485 return kTfLiteOk;
486 }
487
QuantizeTensorFloat16(ModelT * model,TensorT * tensor)488 TfLiteStatus QuantizeTensorFloat16(ModelT* model, TensorT* tensor) {
489 if (model == nullptr || tensor == nullptr) {
490 TFLITE_LOG(TFLITE_LOG_ERROR, "No tensor to quantize.");
491 return kTfLiteError;
492 }
493
494 BufferT* buffer = model->buffers[tensor->buffer].get();
495 if (buffer == nullptr) {
496 TFLITE_LOG(TFLITE_LOG_ERROR, "Missing buffer.");
497 return kTfLiteError;
498 }
499
500 uint64_t num_elements;
501 TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
502
503 // Copy single byte buffer data to float vector to guard against misalignment.
504 std::vector<float> float_vector(num_elements);
505 uint8_t* first = buffer->data.data();
506 std::copy(first, first + buffer->data.size(),
507 reinterpret_cast<uint8_t*>(float_vector.data()));
508
509 // Transform float data to float16.
510 std::vector<Eigen::half> quantized_buffer;
511 quantized_buffer.resize(num_elements);
512 constexpr float kMaxFloat16Value = 65504.f;
513 constexpr float kMinFloat16Value = -65504.f;
514 std::transform(float_vector.begin(), float_vector.end(),
515 quantized_buffer.begin(), [=](float a) {
516 float clamped = std::min(std::max(a, kMinFloat16Value),
517 kMaxFloat16Value);
518 return Eigen::half_impl::float_to_half_rtne(clamped);
519 });
520
521 char* half_buffer = reinterpret_cast<char*>(quantized_buffer.data());
522 model->buffers[tensor->buffer]->data.assign(
523 half_buffer, half_buffer + sizeof(Eigen::half) * num_elements);
524
525 // Update the tensor type.
526 tensor->type = TensorType_FLOAT16;
527
528 return kTfLiteOk;
529 }
530
AddQuantizationParams(const std::vector<float> & scales,const std::vector<int64_t> & zero_point,int quantized_dimension,const uint8_t * buffer_data,size_t buffer_size,TensorType output_type,ModelT * model,TensorT * tensor,ErrorReporter * error_reporter)531 TfLiteStatus AddQuantizationParams(const std::vector<float>& scales,
532 const std::vector<int64_t>& zero_point,
533 int quantized_dimension,
534 const uint8_t* buffer_data,
535 size_t buffer_size, TensorType output_type,
536 ModelT* model, TensorT* tensor,
537 ErrorReporter* error_reporter) {
538 if (tensor->quantization == nullptr) {
539 tensor->quantization = absl::make_unique<QuantizationParametersT>();
540 }
541 tensor->quantization->scale.assign(scales.begin(), scales.end());
542 if (zero_point.size() != scales.size()) {
543 TF_LITE_REPORT_ERROR(
544 error_reporter,
545 "Received zero_point of size %d and scales of size %d. "
546 "These sizes should match.",
547 zero_point.size(), scales.size());
548 return kTfLiteError;
549 }
550 tensor->quantization->zero_point.assign(zero_point.begin(), zero_point.end());
551 tensor->quantization->quantized_dimension = quantized_dimension;
552 model->buffers[tensor->buffer]->data.assign(buffer_data,
553 buffer_data + buffer_size);
554 // Update the tensor type.
555 tensor->type = output_type;
556 return kTfLiteOk;
557 }
558
SymmetricQuantizeTensorPerChannel(ModelT * model,TensorT * tensor,int32_t channel_dim_index,ErrorReporter * error_reporter)559 TfLiteStatus SymmetricQuantizeTensorPerChannel(ModelT* model, TensorT* tensor,
560 int32_t channel_dim_index,
561 ErrorReporter* error_reporter) {
562 if (tensor->shape.size() != 4) {
563 TF_LITE_REPORT_ERROR(
564 error_reporter,
565 "SymmetricQuantizeTensorPerChannel requires tensor with four "
566 "dimensions, but got %d dimension(s).",
567 tensor->shape.size());
568 return kTfLiteError;
569 }
570
571 // Get dimensions.
572 uint64_t num_elements;
573 TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
574 const int32_t channel_dim_size = tensor->shape[channel_dim_index];
575
576 // Get input float data.
577 const BufferT* buffer = model->buffers[tensor->buffer].get();
578 const float* float_input_data =
579 reinterpret_cast<const float*>(buffer->data.data());
580
581 // Create container for output scale and output data.
582 std::vector<float> scales(channel_dim_size);
583 std::vector<int8_t> final_buffer(num_elements);
584
585 // Quantize the input data with respect to channel_dim_index.
586 TF_LITE_ENSURE_STATUS(SymmetricPerChannelQuantization(
587 tensor, float_input_data, channel_dim_index, &scales, &final_buffer,
588 error_reporter));
589
590 // Set the buffers and output type.
591 uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
592 const size_t buffer_size = num_elements * sizeof(int8_t);
593 std::vector<int64_t> zero_point(scales.size(), 0);
594 return AddQuantizationParams(scales, zero_point, channel_dim_index,
595 uint8_buffer, buffer_size, TensorType_INT8,
596 model, tensor, error_reporter);
597 }
598
599 template <class BiasType>
SymmetricBiasQuantize(const float * data,uint64_t num_elements,const std::vector<float> & scales)600 std::vector<BiasType> SymmetricBiasQuantize(const float* data,
601 uint64_t num_elements,
602 const std::vector<float>& scales) {
603 std::vector<BiasType> buffer(num_elements);
604 const BiasType kScale = std::numeric_limits<BiasType>::max();
605 float scaling_factor_inv_per_layer = (scales[0] == 0) ? 0 : 1.0 / scales[0];
606
607 for (int32_t idx = 0; idx < num_elements; idx++) {
608 float scaling_factor_inv =
609 scales.size() == 1 ? scaling_factor_inv_per_layer
610 : ((scales[idx] == 0) ? 0 : 1.0 / scales[idx]);
611 const BiasType quantized_value =
612 tflite::SafeCast<BiasType>(TfLiteRound(data[idx] * scaling_factor_inv));
613 buffer[idx] = std::min(kScale, std::max(-kScale, quantized_value));
614 }
615 return buffer;
616 }
617
618 template std::vector<std::int32_t> SymmetricBiasQuantize<std::int32_t>(
619 const float* data, uint64_t num_elements, const std::vector<float>& scales);
620
621 template <class BiasType>
SymmetricPerLayerBiasQuantize(ModelT * model,TensorT * tensor,float scaling_factor,ErrorReporter * error_reporter)622 TfLiteStatus SymmetricPerLayerBiasQuantize(ModelT* model, TensorT* tensor,
623 float scaling_factor,
624 ErrorReporter* error_reporter) {
625 const BufferT* buffer = model->buffers[tensor->buffer].get();
626 const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
627 uint64_t num_elements;
628 TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
629
630 auto final_buffer = SymmetricBiasQuantize<BiasType>(float_data, num_elements,
631 {scaling_factor});
632
633 // Set the buffers and output type.
634 uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
635 size_t buffer_size = num_elements * sizeof(BiasType);
636 std::vector<float> scales(1, scaling_factor);
637 std::vector<int64_t> zero_points(1, 0);
638
639 auto output_type = std::is_same<BiasType, std::int32_t>::value
640 ? TensorType_INT32
641 : TensorType_INT64;
642 return AddQuantizationParams(scales, zero_points, 0, uint8_buffer,
643 buffer_size, output_type, model, tensor,
644 error_reporter);
645 }
646
647 template TfLiteStatus SymmetricPerLayerBiasQuantize<std::int32_t>(
648 ModelT* model, TensorT* tensor, float scaling_factor,
649 ErrorReporter* error_reporter);
650
651 template TfLiteStatus SymmetricPerLayerBiasQuantize<std::int64_t>(
652 ModelT* model, TensorT* tensor, float scaling_factor,
653 ErrorReporter* error_reporter);
654
655 template <class BiasType>
SymmetricPerChannelBiasQuantize(ModelT * model,TensorT * tensor,float input_scale,const float * weight_scales,int number_of_dimension,ErrorReporter * error_reporter)656 TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor,
657 float input_scale,
658 const float* weight_scales,
659 int number_of_dimension,
660 ErrorReporter* error_reporter) {
661 // Compute scales.
662 std::vector<float> scales(number_of_dimension);
663 for (int i = 0; i < number_of_dimension; i++) {
664 scales[i] = input_scale * weight_scales[i];
665 }
666
667 const BufferT* buffer = model->buffers[tensor->buffer].get();
668 const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
669 uint64_t num_elements;
670 TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
671
672 auto final_buffer =
673 SymmetricBiasQuantize<BiasType>(float_data, num_elements, scales);
674
675 // Set the buffers and output type.
676 uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
677 size_t buffer_size = num_elements * sizeof(BiasType);
678 std::vector<int64_t> zero_point(scales.size(), 0);
679
680 auto output_type = std::is_same<BiasType, std::int32_t>::value
681 ? TensorType_INT32
682 : TensorType_INT64;
683 return AddQuantizationParams(scales, zero_point, 0, uint8_buffer, buffer_size,
684 output_type, model, tensor, error_reporter);
685 }
686
687 template TfLiteStatus SymmetricPerChannelBiasQuantize<std::int64_t>(
688 ModelT* model, TensorT* tensor, float input_scale,
689 const float* weight_scales, int number_of_dimension,
690 ErrorReporter* error_reporter);
691
692 template TfLiteStatus SymmetricPerChannelBiasQuantize<std::int32_t>(
693 ModelT* model, TensorT* tensor, float input_scale,
694 const float* weight_scales, int number_of_dimension,
695 ErrorReporter* error_reporter);
696
QuantizeWeight(ModelT * model,TensorT * tensor,bool per_channel,int per_axis_index,ErrorReporter * error_reporter)697 TfLiteStatus QuantizeWeight(ModelT* model, TensorT* tensor, bool per_channel,
698 int per_axis_index, ErrorReporter* error_reporter) {
699 // TODO(suharshs): Currently we conflate quantizing weights and constants. Its
700 // possible that the right thing to do is asymmetric quantize the weight. Add
701 // support for this.
702 if (per_channel) {
703 return SymmetricQuantizeTensorPerChannel(model, tensor, per_axis_index,
704 error_reporter);
705 } else if (HasMinMax(tensor)) {
706 // Quantize using recorded min/max values.
707 return SymmetricQuantizeTensorFromMinMax(model, tensor, error_reporter);
708 } else {
709 // Quantize using min/max from buffer.
710 return SymmetricQuantizeTensor(model, tensor);
711 }
712 }
713
GetEffectiveScale(ModelT * model,SubGraphT * subgraph,int op_idx,std::vector<int> input_index,std::vector<int> intermediate_index,std::vector<float> factors)714 float GetEffectiveScale(ModelT* model, SubGraphT* subgraph, int op_idx,
715 std::vector<int> input_index,
716 std::vector<int> intermediate_index,
717 std::vector<float> factors) {
718 float scale = 1.0f;
719 OperatorT* op = subgraph->operators[op_idx].get();
720 for (int i = 0, end = input_index.size(); i < end; ++i) {
721 const int index_local = input_index[i];
722 const int index_global = op->inputs[index_local];
723 const TensorT* tensor = subgraph->tensors[index_global].get();
724 scale *= tensor->quantization->scale[0];
725 }
726 for (int i = 0, end = intermediate_index.size(); i < end; ++i) {
727 const int index_local = intermediate_index[i];
728 const int index_global = op->intermediates[index_local];
729 const TensorT* tensor = subgraph->tensors[index_global].get();
730 scale *= tensor->quantization->scale[0];
731 }
732 for (int i = 0, end = factors.size(); i < end; ++i) {
733 scale *= factors[i];
734 }
735 return scale;
736 }
737
QuantizeActivation(TensorT * tensor,TensorType activations_type,ErrorReporter * error_reporter)738 TfLiteStatus QuantizeActivation(TensorT* tensor, TensorType activations_type,
739 ErrorReporter* error_reporter) {
740 TF_LITE_ENSURE_STATUS(GetQuantizationParams(
741 tensor, activations_type, tensor->quantization.get(), error_reporter));
742 tensor->type = activations_type;
743 return kTfLiteOk;
744 }
745
QuantizeActivationToInt16(TensorT * tensor,float scale)746 TfLiteStatus QuantizeActivationToInt16(TensorT* tensor, float scale) {
747 const int32 zero_point = 0;
748 tensor->quantization = absl::make_unique<QuantizationParametersT>();
749 tensor->quantization->scale.push_back(scale);
750 tensor->quantization->zero_point.push_back(zero_point);
751 tensor->type = TensorType_INT16;
752 return kTfLiteOk;
753 }
754
GetPowerOfTwoScale(float min,float max)755 int GetPowerOfTwoScale(float min, float max) {
756 const float range = std::max(std::abs(min), std::abs(max));
757 int pot = 0;
758 for (int i = 0; i < 10; i++) {
759 if (std::pow(2, pot) < range) {
760 pot++;
761 }
762 }
763 return pot;
764 }
765
766 } // namespace utils
767 } // namespace optimize
768 } // namespace tflite
769