• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/tools/optimize/quantization_utils.h"
16 
17 #include <cmath>
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 
22 #include "absl/memory/memory.h"
23 #include "third_party/eigen3/Eigen/Core"
24 #include "tensorflow/lite/c/common.h"
25 #include "tensorflow/lite/core/api/error_reporter.h"
26 #include "tensorflow/lite/kernels/internal/quantization_util.h"
27 #include "tensorflow/lite/kernels/internal/round.h"
28 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
29 #include "tensorflow/lite/kernels/internal/types.h"
30 #include "tensorflow/lite/minimal_logging.h"
31 #include "tensorflow/lite/schema/schema_generated.h"
32 #include "tensorflow/lite/tools/optimize/model_utils.h"
33 
34 namespace tflite {
35 namespace optimize {
36 namespace utils {
37 
38 namespace {
39 const int8_t kMinQuantizedValue = -127;
40 const int8_t kMaxQuantizedValue = 127;
41 }  // namespace
42 
NumElements(const TensorT & tensor,uint64_t * num_elements)43 TfLiteStatus NumElements(const TensorT& tensor, uint64_t* num_elements) {
44   *num_elements = 1;
45   for (const int64_t dim : tensor.shape) {
46     if (dim <= 0 || *num_elements > UINT64_MAX / static_cast<uint64_t>(dim)) {
47       return kTfLiteError;
48     }
49     *num_elements *= dim;
50   }
51   return kTfLiteOk;
52 }
53 
54 // Nudge min and max so that floating point 0 falls exactly on a quantized
55 // value, returning the nudges scale and zero_point.
56 //
57 // Although this code originates from FakeQuantization in quantized training,
58 // we may deviate from that implementation as we please since we do not fine
59 // tune the weights with quantized training.
GetAsymmetricQuantizationParams(float min,float max,const int quant_min,const int quant_max,QuantizationParametersT * quantization_params)60 void GetAsymmetricQuantizationParams(
61     float min, float max, const int quant_min, const int quant_max,
62     QuantizationParametersT* quantization_params) {
63   const float quant_min_float = static_cast<float>(quant_min);
64   const float quant_max_float = static_cast<float>(quant_max);
65   // Adjust the boundaries to guarantee 0 is included.
66   min = std::min(static_cast<float>(min), 0.0f);
67   max = std::max(static_cast<float>(max), 0.0f);
68   const float scale = (max - min) / (quant_max_float - quant_min_float);
69   // Scale can be zero if min and max are exactly 0.0f.
70   float zero_point_from_min = quant_min_float;
71   if (scale != 0) {
72     zero_point_from_min = quant_min_float - min / scale;
73   }
74   int64_t zero_point;
75   if (zero_point_from_min < quant_min_float) {
76     zero_point = static_cast<int64_t>(quant_min);
77   } else if (zero_point_from_min > quant_max_float) {
78     zero_point = static_cast<int64_t>(quant_max);
79   } else {
80     zero_point = static_cast<int64_t>(std::round(zero_point_from_min));
81   }
82   quantization_params->min = std::vector<float>(1, min);
83   quantization_params->max = std::vector<float>(1, max);
84   quantization_params->scale = std::vector<float>(1, scale);
85   quantization_params->zero_point = std::vector<int64_t>(1, zero_point);
86 }
87 
88 // Set the max and min quantization parameter for a single tensor given its
89 // values.
FillSingleMinMax(const float * const input,const uint64_t input_size,QuantizationParametersT * quantization_params)90 void FillSingleMinMax(const float* const input, const uint64_t input_size,
91                       QuantizationParametersT* quantization_params) {
92   const auto minmax = std::minmax_element(input, input + input_size);
93   quantization_params->min.assign(1, *minmax.first);
94   quantization_params->max.assign(1, *minmax.second);
95 }
96 
FillPerChannelMinMax(const float * const input,const std::vector<int32_t> & dimension,int32_t channel_dim_index,QuantizationParametersT * quantization_params,ErrorReporter * error_reporter)97 TfLiteStatus FillPerChannelMinMax(const float* const input,
98                                   const std::vector<int32_t>& dimension,
99                                   int32_t channel_dim_index,
100                                   QuantizationParametersT* quantization_params,
101                                   ErrorReporter* error_reporter) {
102   if (!quantization_params->min.empty() || !quantization_params->max.empty()) {
103     error_reporter->Report(
104         "Min or max already present in tensor quantization params.");
105     return kTfLiteError;
106   }
107   if (dimension.size() != 4) {
108     error_reporter->Report("Expected tensor with four dimensions, but got %d.",
109                            dimension.size());
110     return kTfLiteError;
111   }
112   if (channel_dim_index > 3) {
113     error_reporter->Report(
114         "Expected channel_dim_index to be less than four, but got %d.",
115         channel_dim_index);
116     return kTfLiteError;
117   }
118   const int32_t channel_dim_size = dimension[channel_dim_index];
119   quantization_params->quantized_dimension = channel_dim_index;
120   quantization_params->min = std::vector<float>(channel_dim_size);
121   quantization_params->max = std::vector<float>(channel_dim_size);
122   std::vector<bool> has_min_max_value(channel_dim_size, false);
123   int indices[4];
124   RuntimeShape tensor_dims{dimension[0], dimension[1], dimension[2],
125                            dimension[3]};
126 
127   // Compute min max ranges per channel
128   for (indices[0] = 0; indices[0] < dimension[0]; indices[0]++) {
129     for (indices[1] = 0; indices[1] < dimension[1]; indices[1]++) {
130       for (indices[2] = 0; indices[2] < dimension[2]; indices[2]++) {
131         for (indices[3] = 0; indices[3] < dimension[3]; indices[3]++) {
132           int channel_idx = indices[channel_dim_index];
133           const float val = input[Offset(tensor_dims, indices)];
134           if (has_min_max_value[channel_idx]) {
135             if (quantization_params->min[channel_idx] > val) {
136               quantization_params->min[channel_idx] = val;
137             } else if (quantization_params->max[channel_idx] < val) {
138               quantization_params->max[channel_idx] = val;
139             }
140           } else {
141             quantization_params->min[channel_idx] = val;
142             quantization_params->max[channel_idx] = val;
143             has_min_max_value[channel_idx] = true;
144           }
145         }
146       }
147     }
148   }
149   return kTfLiteOk;
150 }
151 
152 // Populates the scales vector based on max and min values of quant_params
GetSymmetricScalesFromMaxMin(QuantizationParametersT * quant_params,std::vector<float> * scales,ErrorReporter * error_reporter)153 TfLiteStatus GetSymmetricScalesFromMaxMin(QuantizationParametersT* quant_params,
154                                           std::vector<float>* scales,
155                                           ErrorReporter* error_reporter) {
156   // Check that max and min values are present and their sizes match.
157   if (quant_params->min.empty() || quant_params->max.empty()) {
158     error_reporter->Report("Max and min values are not populated.");
159     return kTfLiteError;
160   }
161   if (quant_params->min.size() != quant_params->max.size()) {
162     error_reporter->Report("Dimensions of max and min values do not match.");
163     return kTfLiteError;
164   }
165   if (scales->size() != quant_params->min.size()) {
166     error_reporter->Report("Provided scale vector has incorrect size.");
167     return kTfLiteError;
168   }
169 
170   // num_channels is calculated from min.size() to infer whether quantization
171   // is per axis.
172   int num_channels = quant_params->min.size();
173   // Calculate scales per channel.
174   for (int channel_idx = 0; channel_idx < num_channels; ++channel_idx) {
175     const float half_range = std::max(std::abs(quant_params->min[channel_idx]),
176                                       std::abs(quant_params->max[channel_idx]));
177     scales->at(channel_idx) = half_range / kMaxQuantizedValue;
178   }
179   return kTfLiteOk;
180 }
181 
182 // Checks that the bias is quantized to within the middle half of the
183 // allowable bit range determined by the scales of the input and weight tensors.
184 // If this condition is not satisfied, the scale of the weights is increased in
185 // order to prevent overflow. The scale of the bias is not set here, only the
186 // min/max.
187 // The quant_params are the quantization parameters that correspond to the
188 // weight tensor.
AdjustWeightsForBiasScale(QuantizationParametersT * quant_params,const float * bias_data,const size_t bias_size,const float input_scale,ErrorReporter * error_reporter)189 TfLiteStatus AdjustWeightsForBiasScale(QuantizationParametersT* quant_params,
190                                        const float* bias_data,
191                                        const size_t bias_size,
192                                        const float input_scale,
193                                        ErrorReporter* error_reporter) {
194   // TODO(dmolitor) Allow adjusting activation scale.
195   // TODO(dmolitor) Tighten scale adjustment.
196   // TODO(dmolitor) Test using a separate strategy for scales of 0.
197   const int32_t kScale = std::numeric_limits<int32_t>::max();
198   if (quant_params == nullptr) {
199     error_reporter->Report("Missing max and min values for weight tensor.");
200     return kTfLiteError;
201   }
202   // channel_dim_size is calculated from min.size() to infer whether
203   // quantization is per axis
204   int channel_dim_size = quant_params->min.size();
205   if (channel_dim_size == 0) {
206     error_reporter->Report(
207         "Missing weight scales. Unable to check compatibility with bias "
208         "scale.");
209     return kTfLiteError;
210   }
211 
212   std::vector<float> weight_scales(channel_dim_size);
213   TF_LITE_ENSURE_STATUS(GetSymmetricScalesFromMaxMin(
214       quant_params, &weight_scales, error_reporter));
215 
216   // Per channel quantization
217   if (channel_dim_size > 1) {
218     for (size_t i = 0; i < channel_dim_size; ++i) {
219       // Current scale is not compatible with bias. Adjust max/min values.
220       if (std::abs(bias_data[i]) >=
221           0.5 * input_scale * weight_scales[i] * kScale) {
222         quant_params->max[i] = 2.0 * std::abs(bias_data[i]) / kScale *
223                                (kMaxQuantizedValue / input_scale);
224         quant_params->min[i] = -quant_params->max[i];
225       }
226     }
227     // Per layer quantization
228   } else if (channel_dim_size == 1) {
229     const auto minmax = std::minmax_element(bias_data, bias_data + bias_size);
230     const float bias_half_range =
231         std::max(std::abs(*minmax.first), std::abs(*minmax.second));
232 
233     // Need to adjust weight min/max; not compatible with bias.
234     if (bias_half_range / kScale >= 0.5 * input_scale * weight_scales[0]) {
235       quant_params->min[0] =
236           2.0 * bias_half_range / kScale * (kMinQuantizedValue / input_scale);
237       quant_params->max[0] =
238           2.0 * bias_half_range / kScale * (kMaxQuantizedValue / input_scale);
239     }
240   }
241   return kTfLiteOk;
242 }
243 
244 // Per-channel quantize a tensor at the given index and fills both scales and
245 // quantized values.
SymmetricPerChannelQuantization(TensorT * tensor,const float * const input,int32_t channel_dim_index,std::vector<float> * output_scales,std::vector<int8_t> * output_value,ErrorReporter * error_reporter)246 TfLiteStatus SymmetricPerChannelQuantization(TensorT* tensor,
247                                              const float* const input,
248                                              int32_t channel_dim_index,
249                                              std::vector<float>* output_scales,
250                                              std::vector<int8_t>* output_value,
251                                              ErrorReporter* error_reporter) {
252   if (tensor == nullptr) {
253     error_reporter->Report("Cannot quantize. Tensor is null.");
254     return kTfLiteError;
255   }
256   const int32_t channel_dim_size = tensor->shape[channel_dim_index];
257   // Fill per channel max and min values if needed
258   if (tensor->quantization == nullptr) {
259     tensor->quantization = absl::make_unique<QuantizationParametersT>();
260   }
261   if (!HasMinMax(tensor)) {
262     TF_LITE_ENSURE_STATUS(
263         FillPerChannelMinMax(input, tensor->shape, channel_dim_index,
264                              tensor->quantization.get(), error_reporter));
265   }
266 
267   // Calculate scales per channel using max and min values from tensor.
268   std::vector<float> scale_invs(channel_dim_size);
269   const float half_scale = kMaxQuantizedValue;
270   for (int channel_idx = 0; channel_idx < channel_dim_size; channel_idx++) {
271     const float half_range =
272         std::max(std::abs(tensor->quantization->min[channel_idx]),
273                  std::abs(tensor->quantization->max[channel_idx]));
274     output_scales->at(channel_idx) = half_range / half_scale;
275     if (half_range == 0) {
276       scale_invs[channel_idx] = 0;
277     } else {
278       scale_invs[channel_idx] = half_scale / half_range;
279     }
280   }
281 
282   // Quantize the input values.
283   SymmetricPerChannelQuantizeValues(input, scale_invs, tensor->shape,
284                                     channel_dim_index, output_value);
285   return kTfLiteOk;
286 }
287 
SymmetricQuantizeFloatsToInt16(ModelT * model,TensorT * tensor,float scaling_factor,ErrorReporter * error_reporter)288 TfLiteStatus SymmetricQuantizeFloatsToInt16(ModelT* model, TensorT* tensor,
289                                             float scaling_factor,
290                                             ErrorReporter* error_reporter) {
291   // Compute the inverse of scale.
292   const float scaling_factor_inv =
293       (scaling_factor == 0) ? 0 : 1.0 / scaling_factor;
294 
295   const BufferT* buffer = model->buffers[tensor->buffer].get();
296   const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
297   uint64_t num_elements;
298   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
299 
300   std::vector<int16_t> final_buffer(num_elements);
301   const int32_t kScale = std::numeric_limits<int16_t>::max();
302 
303   for (size_t i = 0; i < num_elements; i++) {
304     const int32_t quantized_value =
305         static_cast<int32_t>(TfLiteRound(float_data[i] * scaling_factor_inv));
306     final_buffer[i] = std::min(kScale, std::max(-kScale, quantized_value));
307   }
308 
309   // Set the buffers and output type.
310   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
311   size_t buffer_size = num_elements * sizeof(int16_t);
312   std::vector<float> scales(1, scaling_factor);
313   std::vector<int64_t> zero_points(1, 0);
314   return AddQuantizationParams(scales, zero_points, 0, uint8_buffer,
315                                buffer_size, TensorType_INT16, model, tensor,
316                                error_reporter);
317 }
318 
SymmetricPerChannelQuantizeValues(const float * const input,const std::vector<float> & scales_inv,const std::vector<int32_t> & dimension,int32_t channel_dim_index,std::vector<int8_t> * output_value)319 void SymmetricPerChannelQuantizeValues(const float* const input,
320                                        const std::vector<float>& scales_inv,
321                                        const std::vector<int32_t>& dimension,
322                                        int32_t channel_dim_index,
323                                        std::vector<int8_t>* output_value) {
324   // Quantize the values.
325   int indices[4];
326   RuntimeShape tensor_dims{dimension[0], dimension[1], dimension[2],
327                            dimension[3]};
328   for (indices[0] = 0; indices[0] < dimension[0]; indices[0]++) {
329     for (indices[1] = 0; indices[1] < dimension[1]; indices[1]++) {
330       for (indices[2] = 0; indices[2] < dimension[2]; indices[2]++) {
331         for (indices[3] = 0; indices[3] < dimension[3]; indices[3]++) {
332           int channel_idx = indices[channel_dim_index];
333           int index = Offset(tensor_dims, indices);
334           const float val = input[index];
335           const int32_t quantized_value =
336               static_cast<int32_t>(TfLiteRound(val * scales_inv[channel_idx]));
337           output_value->at(index) = std::min<int8_t>(
338               kMaxQuantizedValue,
339               std::max<int8_t>(kMinQuantizedValue, quantized_value));
340         }
341       }
342     }
343   }
344 }
345 
346 // Quantize the tensor using the max and min values recorded in its quantization
347 // parameters. Applies per-layer quantization.
SymmetricQuantizeTensorFromMinMax(ModelT * model,TensorT * tensor,ErrorReporter * error_reporter)348 TfLiteStatus SymmetricQuantizeTensorFromMinMax(ModelT* model, TensorT* tensor,
349                                                ErrorReporter* error_reporter) {
350   if (model == nullptr || tensor == nullptr) {
351     error_reporter->Report("No tensor to quantize.");
352     return kTfLiteError;
353   }
354 
355   BufferT* buffer = model->buffers[tensor->buffer].get();
356   if (buffer == nullptr) {
357     error_reporter->Report("Missing buffer.");
358     return kTfLiteError;
359   }
360 
361   if (!HasMinMax(tensor)) {
362     error_reporter->Report("Missing min or max values for quantization.");
363     return kTfLiteError;
364   }
365   if (tensor->quantization->min.size() != 1 ||
366       tensor->quantization->max.size() != 1) {
367     error_reporter->Report("Expected single entry in max and min.");
368     return kTfLiteError;
369   }
370 
371   const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
372   uint64_t num_elements;
373   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
374 
375   std::vector<int8_t> quantized_buffer;
376   quantized_buffer.resize(num_elements);
377 
378   // Quantize tensor using recorded min and max values
379   float scaling_factor;
380   tensor_utils::SymmetricQuantizeFloats(
381       float_data, num_elements, quantized_buffer.data(),
382       tensor->quantization->min[0], tensor->quantization->max[0],
383       &scaling_factor);
384   tensor->quantization->scale = std::vector<float>(1, scaling_factor);
385   tensor->quantization->zero_point = std::vector<int64_t>(1, 0);
386 
387   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(quantized_buffer.data());
388   model->buffers[tensor->buffer]->data.assign(uint8_buffer,
389                                               uint8_buffer + num_elements);
390   // Update the tensor type.
391   tensor->type = TensorType_INT8;
392 
393   return kTfLiteOk;
394 }
395 
SymmetricQuantizeTensor(ModelT * model,TensorT * tensor)396 TfLiteStatus SymmetricQuantizeTensor(ModelT* model, TensorT* tensor) {
397   if (model == nullptr || tensor == nullptr) {
398     TFLITE_LOG(TFLITE_LOG_ERROR, "No tensor to quantize.");
399     return kTfLiteError;
400   }
401 
402   BufferT* buffer = model->buffers[tensor->buffer].get();
403   if (buffer == nullptr) {
404     TFLITE_LOG(TFLITE_LOG_ERROR, "Missing buffer.");
405     return kTfLiteError;
406   }
407   const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
408   uint64_t num_elements;
409   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
410 
411   std::vector<int8_t> quantized_buffer;
412   quantized_buffer.resize(num_elements);
413 
414   float min_value, max_value, scaling_factor;
415   tensor_utils::SymmetricQuantizeFloats(float_data, num_elements,
416                                         quantized_buffer.data(), &min_value,
417                                         &max_value, &scaling_factor);
418 
419   if (tensor->quantization == nullptr) {
420     tensor->quantization = absl::make_unique<QuantizationParametersT>();
421   }
422   tensor->quantization->scale = std::vector<float>(1, scaling_factor);
423   tensor->quantization->zero_point = std::vector<int64_t>(1, 0);
424 
425   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(quantized_buffer.data());
426   model->buffers[tensor->buffer]->data.assign(uint8_buffer,
427                                               uint8_buffer + num_elements);
428 
429   // Update the tensor type.
430   tensor->type = TensorType_INT8;
431 
432   return kTfLiteOk;
433 }
434 
QuantizeTensorFloat16(ModelT * model,TensorT * tensor)435 TfLiteStatus QuantizeTensorFloat16(ModelT* model, TensorT* tensor) {
436   if (model == nullptr || tensor == nullptr) {
437     TFLITE_LOG(TFLITE_LOG_ERROR, "No tensor to quantize.");
438     return kTfLiteError;
439   }
440 
441   BufferT* buffer = model->buffers[tensor->buffer].get();
442   if (buffer == nullptr) {
443     TFLITE_LOG(TFLITE_LOG_ERROR, "Missing buffer.");
444     return kTfLiteError;
445   }
446 
447   uint64_t num_elements;
448   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
449 
450   // Copy single byte buffer data to float vector to guard against misalignment.
451   std::vector<float> float_vector(num_elements);
452   uint8_t* first = buffer->data.data();
453   std::copy(first, first + buffer->data.size(),
454             reinterpret_cast<uint8_t*>(float_vector.data()));
455 
456   // Transform float data to float16.
457   std::vector<Eigen::half> quantized_buffer;
458   quantized_buffer.resize(num_elements);
459   std::transform(
460       float_vector.begin(), float_vector.end(), quantized_buffer.begin(),
461       [](float a) { return Eigen::half_impl::float_to_half_rtne(a); });
462 
463   char* half_buffer = reinterpret_cast<char*>(quantized_buffer.data());
464   model->buffers[tensor->buffer]->data.assign(
465       half_buffer, half_buffer + sizeof(Eigen::half) * num_elements);
466 
467   // Update the tensor type.
468   tensor->type = TensorType_FLOAT16;
469 
470   return kTfLiteOk;
471 }
472 
AddQuantizationParams(const std::vector<float> & scales,const std::vector<int64_t> & zero_point,int quantized_dimension,const uint8_t * buffer_data,size_t buffer_size,TensorType output_type,ModelT * model,TensorT * tensor,ErrorReporter * error_reporter)473 TfLiteStatus AddQuantizationParams(const std::vector<float>& scales,
474                                    const std::vector<int64_t>& zero_point,
475                                    int quantized_dimension,
476                                    const uint8_t* buffer_data,
477                                    size_t buffer_size, TensorType output_type,
478                                    ModelT* model, TensorT* tensor,
479                                    ErrorReporter* error_reporter) {
480   if (tensor->quantization == nullptr) {
481     tensor->quantization = absl::make_unique<QuantizationParametersT>();
482   }
483   tensor->quantization->scale.assign(scales.begin(), scales.end());
484   if (zero_point.size() != scales.size()) {
485     error_reporter->Report(
486         "Received zero_point of size %d and scales of size %d. "
487         "These sizes should match.",
488         zero_point.size(), scales.size());
489     return kTfLiteError;
490   }
491   tensor->quantization->zero_point.assign(zero_point.begin(), zero_point.end());
492   tensor->quantization->quantized_dimension = quantized_dimension;
493   model->buffers[tensor->buffer]->data.assign(buffer_data,
494                                               buffer_data + buffer_size);
495   // Update the tensor type.
496   tensor->type = output_type;
497   return kTfLiteOk;
498 }
499 
SymmetricQuantizeTensorPerChannel(ModelT * model,TensorT * tensor,int32_t channel_dim_index,ErrorReporter * error_reporter)500 TfLiteStatus SymmetricQuantizeTensorPerChannel(ModelT* model, TensorT* tensor,
501                                                int32_t channel_dim_index,
502                                                ErrorReporter* error_reporter) {
503   if (tensor->shape.size() != 4) {
504     error_reporter->Report(
505         "SymmetricQuantizeTensorPerChannel requires tensor with four "
506         "dimensions, but got %d dimension(s).",
507         tensor->shape.size());
508     return kTfLiteError;
509   }
510 
511   // Get dimensions.
512   uint64_t num_elements;
513   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
514   const int32_t channel_dim_size = tensor->shape[channel_dim_index];
515 
516   // Get input float data.
517   const BufferT* buffer = model->buffers[tensor->buffer].get();
518   const float* float_input_data =
519       reinterpret_cast<const float*>(buffer->data.data());
520 
521   // Create container for output scale and output data.
522   std::vector<float> scales(channel_dim_size);
523   std::vector<int8_t> final_buffer(num_elements);
524 
525   // Quantize the input data with respect to channel_dim_index.
526   TF_LITE_ENSURE_STATUS(SymmetricPerChannelQuantization(
527       tensor, float_input_data, channel_dim_index, &scales, &final_buffer,
528       error_reporter));
529 
530   // Set the buffers and output type.
531   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
532   const size_t buffer_size = num_elements * sizeof(int8_t);
533   std::vector<int64_t> zero_point(scales.size(), 0);
534   return AddQuantizationParams(scales, zero_point, channel_dim_index,
535                                uint8_buffer, buffer_size, TensorType_INT8,
536                                model, tensor, error_reporter);
537 }
538 
SymmetricPerLayerBiasQuantize(ModelT * model,TensorT * tensor,float scaling_factor,ErrorReporter * error_reporter)539 TfLiteStatus SymmetricPerLayerBiasQuantize(ModelT* model, TensorT* tensor,
540                                            float scaling_factor,
541                                            ErrorReporter* error_reporter) {
542   // Compute the inverse of scale.
543   const float scaling_factor_inv =
544       (scaling_factor == 0) ? 0 : 1.0 / scaling_factor;
545 
546   const BufferT* buffer = model->buffers[tensor->buffer].get();
547   const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
548   uint64_t num_elements;
549   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
550 
551   std::vector<int32_t> final_buffer(num_elements);
552   const int32_t kScale = std::numeric_limits<int32_t>::max();
553 
554   for (size_t i = 0; i < num_elements; i++) {
555     const int32_t quantized_value = tflite::SafeCast<int32_t>(
556         TfLiteRound(float_data[i] * scaling_factor_inv));
557     final_buffer[i] = std::min(kScale, std::max(-kScale, quantized_value));
558   }
559 
560   // Set the buffers and output type.
561   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
562   size_t buffer_size = num_elements * sizeof(int32_t);
563   std::vector<float> scales(1, scaling_factor);
564   std::vector<int64_t> zero_points(1, 0);
565   return AddQuantizationParams(scales, zero_points, 0, uint8_buffer,
566                                buffer_size, TensorType_INT32, model, tensor,
567                                error_reporter);
568 }
569 
SymmetricPerChannelBiasQuantize(ModelT * model,TensorT * tensor,float input_scale,const float * weight_scales,int number_of_dimension,ErrorReporter * error_reporter)570 TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor,
571                                              float input_scale,
572                                              const float* weight_scales,
573                                              int number_of_dimension,
574                                              ErrorReporter* error_reporter) {
575   // Compute scales.
576   std::vector<float> scales(number_of_dimension);
577   for (size_t i = 0; i < number_of_dimension; i++) {
578     scales[i] = input_scale * weight_scales[i];
579   }
580 
581   const BufferT* buffer = model->buffers[tensor->buffer].get();
582   const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
583   uint64_t num_elements;
584   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
585 
586   std::vector<int32_t> final_buffer(num_elements);
587   const int32_t kScale = std::numeric_limits<int32_t>::max();
588 
589   for (int32_t channel_idx = 0; channel_idx < number_of_dimension;
590        channel_idx++) {
591     float scaling_factor = scales[channel_idx];
592     float scaling_factor_inv = (scaling_factor == 0) ? 0 : 1.0 / scaling_factor;
593     const int32_t quantized_value = tflite::SafeCast<int32_t>(
594         TfLiteRound(float_data[channel_idx] * scaling_factor_inv));
595     final_buffer[channel_idx] =
596         std::min(kScale, std::max(-kScale, quantized_value));
597   }
598 
599   // Set the buffers and output type.
600   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
601   size_t buffer_size = num_elements * sizeof(int32_t);
602   std::vector<int64_t> zero_point(scales.size(), 0);
603   return AddQuantizationParams(scales, zero_point, 0, uint8_buffer, buffer_size,
604                                TensorType_INT32, model, tensor, error_reporter);
605 }
606 
QuantizeWeight(ModelT * model,TensorT * tensor,bool per_channel,int per_axis_index,ErrorReporter * error_reporter)607 TfLiteStatus QuantizeWeight(ModelT* model, TensorT* tensor, bool per_channel,
608                             int per_axis_index, ErrorReporter* error_reporter) {
609   // TODO(suharshs): Currently we conflate quantizing weights and constants. Its
610   // possible that the right thing to do is asymmetric quantize the weight. Add
611   // support for this.
612   if (per_channel) {
613     return SymmetricQuantizeTensorPerChannel(model, tensor, per_axis_index,
614                                              error_reporter);
615   } else if (HasMinMax(tensor)) {
616     // Quantize using recorded min/max values.
617     return SymmetricQuantizeTensorFromMinMax(model, tensor, error_reporter);
618   } else {
619     // Quantize using min/max from buffer.
620     return SymmetricQuantizeTensor(model, tensor);
621   }
622 }
623 
GetEffectiveScale(ModelT * model,SubGraphT * subgraph,int op_idx,std::vector<int> input_index,std::vector<int> intermediate_index,std::vector<float> factors)624 float GetEffectiveScale(ModelT* model, SubGraphT* subgraph, int op_idx,
625                         std::vector<int> input_index,
626                         std::vector<int> intermediate_index,
627                         std::vector<float> factors) {
628   float scale = 1.0f;
629   OperatorT* op = subgraph->operators[op_idx].get();
630   for (int i = 0; i < input_index.size(); ++i) {
631     const int index_local = input_index[i];
632     const int index_global = op->inputs[index_local];
633     const TensorT* tensor = subgraph->tensors[index_global].get();
634     scale *= tensor->quantization->scale[0];
635   }
636   for (int i = 0; i < intermediate_index.size(); ++i) {
637     const int index_local = intermediate_index[i];
638     const int index_global = op->intermediates[index_local];
639     const TensorT* tensor = subgraph->tensors[index_global].get();
640     scale *= tensor->quantization->scale[0];
641   }
642   for (int i = 0; i < factors.size(); ++i) {
643     scale *= factors[i];
644   }
645   return scale;
646 }
647 
QuantizeActivation(TensorT * tensor)648 void QuantizeActivation(TensorT* tensor) {
649   GetAsymmetricQuantizationParams(
650       tensor->quantization->min[0], tensor->quantization->max[0],
651       std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max(),
652       tensor->quantization.get());
653   tensor->type = TensorType_INT8;
654 }
655 
QuantizeActivationToInt16(TensorT * tensor,float scale)656 TfLiteStatus QuantizeActivationToInt16(TensorT* tensor, float scale) {
657   const int32 zero_point = 0;
658   tensor->quantization = absl::make_unique<QuantizationParametersT>();
659   tensor->quantization->scale.push_back(scale);
660   tensor->quantization->zero_point.push_back(zero_point);
661   tensor->type = TensorType_INT16;
662   return kTfLiteOk;
663 }
664 
GetPowerOfTwoScale(float min,float max)665 int GetPowerOfTwoScale(float min, float max) {
666   const float range = std::max(std::abs(min), std::abs(max));
667   int pot = 0;
668   for (int i = 0; i < 10; i++) {
669     if (std::pow(2, pot) < range) {
670       pot++;
671     }
672   }
673   return pot;
674 }
675 
676 }  // namespace utils
677 }  // namespace optimize
678 }  // namespace tflite
679