• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/tools/optimize/quantization_utils.h"
16 
17 #include <cmath>
18 #include <cstdint>
19 #include <iostream>
20 #include <memory>
21 #include <string>
22 
23 #include "absl/memory/memory.h"
24 #include "third_party/eigen3/Eigen/Core"
25 #include "tensorflow/lite/c/common.h"
26 #include "tensorflow/lite/core/api/error_reporter.h"
27 #include "tensorflow/lite/kernels/internal/cppmath.h"
28 #include "tensorflow/lite/kernels/internal/quantization_util.h"
29 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
30 #include "tensorflow/lite/kernels/internal/types.h"
31 #include "tensorflow/lite/minimal_logging.h"
32 #include "tensorflow/lite/schema/schema_generated.h"
33 #include "tensorflow/lite/tools/optimize/model_utils.h"
34 
35 namespace tflite {
36 namespace optimize {
37 namespace utils {
38 
39 namespace {
40 const int8_t kMinQuantizedValue = -127;
41 const int8_t kMaxQuantizedValue = 127;
42 }  // namespace
43 
NumElements(const TensorT & tensor,uint64_t * num_elements)44 TfLiteStatus NumElements(const TensorT& tensor, uint64_t* num_elements) {
45   *num_elements = 1;
46   for (const int64_t dim : tensor.shape) {
47     if (dim <= 0 || *num_elements > UINT64_MAX / static_cast<uint64_t>(dim)) {
48       return kTfLiteError;
49     }
50     *num_elements *= dim;
51   }
52   return kTfLiteOk;
53 }
54 
55 // Nudge min and max so that floating point 0 falls exactly on a quantized
56 // value, returning the nudges scale and zero_point.
57 //
58 // Although this code originates from FakeQuantization in quantized training,
59 // we may deviate from that implementation as we please since we do not fine
60 // tune the weights with quantized training.
GetAsymmetricQuantizationParams(float min,float max,const int quant_min,const int quant_max,QuantizationParametersT * quantization_params)61 void GetAsymmetricQuantizationParams(
62     float min, float max, const int quant_min, const int quant_max,
63     QuantizationParametersT* quantization_params) {
64   const float quant_min_float = static_cast<float>(quant_min);
65   const float quant_max_float = static_cast<float>(quant_max);
66   // Adjust the boundaries to guarantee 0 is included.
67   min = std::min(static_cast<float>(min), 0.0f);
68   max = std::max(static_cast<float>(max), 0.0f);
69   const float scale = (max - min) / (quant_max_float - quant_min_float);
70   // Scale can be zero if min and max are exactly 0.0f.
71   float zero_point_from_min = quant_min_float;
72   if (scale != 0) {
73     zero_point_from_min = quant_min_float - min / scale;
74   }
75   int64_t zero_point;
76   if (zero_point_from_min < quant_min_float) {
77     zero_point = static_cast<int64_t>(quant_min);
78   } else if (zero_point_from_min > quant_max_float) {
79     zero_point = static_cast<int64_t>(quant_max);
80   } else {
81     zero_point = static_cast<int64_t>(std::round(zero_point_from_min));
82   }
83   quantization_params->min = std::vector<float>(1, min);
84   quantization_params->max = std::vector<float>(1, max);
85   quantization_params->scale = std::vector<float>(1, scale);
86   quantization_params->zero_point = std::vector<int64_t>(1, zero_point);
87 }
88 
GetSymmetricQuantizationParams(float min,float max,const int half_quant_range,QuantizationParametersT * quantization_params)89 void GetSymmetricQuantizationParams(
90     float min, float max, const int half_quant_range,
91     QuantizationParametersT* quantization_params) {
92   // Adjust the boundaries to guarantee 0 is included.
93   min = std::min(min, 0.0f);
94   max = std::max(max, 0.0f);
95   const float scale = std::max(std::abs(max), std::abs(min)) / half_quant_range;
96   quantization_params->min = std::vector<float>(1, min);
97   quantization_params->max = std::vector<float>(1, max);
98   quantization_params->scale = std::vector<float>(1, scale);
99   quantization_params->zero_point = std::vector<int64_t>(1, 0);
100 }
101 
GetQuantizationParams(TensorT * tensor,TensorType activations_type,QuantizationParametersT * quantization_params,ErrorReporter * error_reporter)102 TfLiteStatus GetQuantizationParams(TensorT* tensor, TensorType activations_type,
103                                    QuantizationParametersT* quantization_params,
104                                    ErrorReporter* error_reporter) {
105   if (activations_type == TensorType_INT8) {
106     GetAsymmetricQuantizationParams(
107         tensor->quantization->min[0], tensor->quantization->max[0],
108         std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max(),
109         quantization_params);
110   } else if (activations_type == TensorType_INT16) {
111     const int half_quantized_range = 32767;
112     GetSymmetricQuantizationParams(tensor->quantization->min[0],
113                                    tensor->quantization->max[0],
114                                    half_quantized_range, quantization_params);
115   } else {
116     TF_LITE_REPORT_ERROR(
117         error_reporter,
118         "Unsupported activation type for quantize-activation: %d",
119         activations_type);
120     return kTfLiteError;
121   }
122   return kTfLiteOk;
123 }
124 
125 // Set the max and min quantization parameter for a single tensor given its
126 // values.
FillSingleMinMax(const float * const input,const uint64_t input_size,QuantizationParametersT * quantization_params)127 void FillSingleMinMax(const float* const input, const uint64_t input_size,
128                       QuantizationParametersT* quantization_params) {
129   const auto minmax = std::minmax_element(input, input + input_size);
130   quantization_params->min.assign(1, *minmax.first);
131   quantization_params->max.assign(1, *minmax.second);
132 }
133 
FillPerChannelMinMax(const float * const input,const std::vector<int32_t> & dimension,int32_t channel_dim_index,QuantizationParametersT * quantization_params,ErrorReporter * error_reporter)134 TfLiteStatus FillPerChannelMinMax(const float* const input,
135                                   const std::vector<int32_t>& dimension,
136                                   int32_t channel_dim_index,
137                                   QuantizationParametersT* quantization_params,
138                                   ErrorReporter* error_reporter) {
139   if (!quantization_params->min.empty() || !quantization_params->max.empty()) {
140     TF_LITE_REPORT_ERROR(
141         error_reporter,
142         "Min or max already present in tensor quantization params.");
143     return kTfLiteError;
144   }
145   if (dimension.size() != 4) {
146     TF_LITE_REPORT_ERROR(error_reporter,
147                          "Expected tensor with four dimensions, but got %d.",
148                          dimension.size());
149     return kTfLiteError;
150   }
151   if (channel_dim_index > 3) {
152     TF_LITE_REPORT_ERROR(
153         error_reporter,
154         "Expected channel_dim_index to be less than four, but got %d.",
155         channel_dim_index);
156     return kTfLiteError;
157   }
158   const int32_t channel_dim_size = dimension[channel_dim_index];
159   quantization_params->quantized_dimension = channel_dim_index;
160   quantization_params->min = std::vector<float>(channel_dim_size);
161   quantization_params->max = std::vector<float>(channel_dim_size);
162   std::vector<bool> has_min_max_value(channel_dim_size, false);
163   int indices[4];
164   RuntimeShape tensor_dims{dimension[0], dimension[1], dimension[2],
165                            dimension[3]};
166 
167   // Compute min max ranges per channel
168   for (indices[0] = 0; indices[0] < dimension[0]; indices[0]++) {
169     for (indices[1] = 0; indices[1] < dimension[1]; indices[1]++) {
170       for (indices[2] = 0; indices[2] < dimension[2]; indices[2]++) {
171         for (indices[3] = 0; indices[3] < dimension[3]; indices[3]++) {
172           int channel_idx = indices[channel_dim_index];
173           const float val = input[Offset(tensor_dims, indices)];
174           if (has_min_max_value[channel_idx]) {
175             if (quantization_params->min[channel_idx] > val) {
176               quantization_params->min[channel_idx] = val;
177             } else if (quantization_params->max[channel_idx] < val) {
178               quantization_params->max[channel_idx] = val;
179             }
180           } else {
181             quantization_params->min[channel_idx] = val;
182             quantization_params->max[channel_idx] = val;
183             has_min_max_value[channel_idx] = true;
184           }
185         }
186       }
187     }
188   }
189   return kTfLiteOk;
190 }
191 
192 // Populates the scales vector based on max and min values of quant_params
GetSymmetricScalesFromMaxMin(QuantizationParametersT * quant_params,std::vector<float> * scales,ErrorReporter * error_reporter)193 TfLiteStatus GetSymmetricScalesFromMaxMin(QuantizationParametersT* quant_params,
194                                           std::vector<float>* scales,
195                                           ErrorReporter* error_reporter) {
196   // Check that max and min values are present and their sizes match.
197   if (quant_params->min.empty() || quant_params->max.empty()) {
198     TF_LITE_REPORT_ERROR(error_reporter,
199                          "Max and min values are not populated.");
200     return kTfLiteError;
201   }
202   if (quant_params->min.size() != quant_params->max.size()) {
203     TF_LITE_REPORT_ERROR(error_reporter,
204                          "Dimensions of max and min values do not match.");
205     return kTfLiteError;
206   }
207   if (scales->size() != quant_params->min.size()) {
208     TF_LITE_REPORT_ERROR(error_reporter,
209                          "Provided scale vector has incorrect size.");
210     return kTfLiteError;
211   }
212 
213   // num_channels is calculated from min.size() to infer whether quantization
214   // is per axis.
215   int num_channels = quant_params->min.size();
216   // Calculate scales per channel.
217   for (int channel_idx = 0; channel_idx < num_channels; ++channel_idx) {
218     const float half_range = std::max(std::abs(quant_params->min[channel_idx]),
219                                       std::abs(quant_params->max[channel_idx]));
220     scales->at(channel_idx) = half_range / kMaxQuantizedValue;
221   }
222   return kTfLiteOk;
223 }
224 
225 // Checks that the bias is quantized to within the middle half of the
226 // allowable bit range determined by the scales of the input and weight tensors.
227 // If this condition is not satisfied, the scale of the weights is increased in
228 // order to prevent overflow. The scale of the bias is not set here, only the
229 // min/max.
230 // The quant_params are the quantization parameters that correspond to the
231 // weight tensor.
AdjustWeightsForBiasScale(QuantizationParametersT * quant_params,const float * bias_data,const size_t bias_size,const float input_scale,ErrorReporter * error_reporter)232 TfLiteStatus AdjustWeightsForBiasScale(QuantizationParametersT* quant_params,
233                                        const float* bias_data,
234                                        const size_t bias_size,
235                                        const float input_scale,
236                                        ErrorReporter* error_reporter) {
237   // TODO(dmolitor) Allow adjusting activation scale.
238   // TODO(dmolitor) Tighten scale adjustment.
239   // TODO(dmolitor) Test using a separate strategy for scales of 0.
240   const int32_t kScale = std::numeric_limits<int32_t>::max();
241   if (quant_params == nullptr) {
242     TF_LITE_REPORT_ERROR(error_reporter,
243                          "Missing max and min values for weight tensor.");
244     return kTfLiteError;
245   }
246   // channel_dim_size is calculated from min.size() to infer whether
247   // quantization is per axis
248   int channel_dim_size = quant_params->min.size();
249   if (channel_dim_size == 0) {
250     TF_LITE_REPORT_ERROR(
251         error_reporter,
252         "Missing weight scales. Unable to check compatibility with bias "
253         "scale.");
254     return kTfLiteError;
255   }
256 
257   std::vector<float> weight_scales(channel_dim_size);
258   TF_LITE_ENSURE_STATUS(GetSymmetricScalesFromMaxMin(
259       quant_params, &weight_scales, error_reporter));
260 
261   // Per channel quantization
262   if (channel_dim_size > 1) {
263     for (int i = 0; i < channel_dim_size; ++i) {
264       // Current scale is not compatible with bias. Adjust max/min values.
265       if (std::abs(bias_data[i]) >=
266           0.5 * input_scale * weight_scales[i] * kScale) {
267         quant_params->max[i] = 2.0 * std::abs(bias_data[i]) / kScale *
268                                (kMaxQuantizedValue / input_scale);
269         quant_params->min[i] = -quant_params->max[i];
270       }
271     }
272     // Per layer quantization
273   } else if (channel_dim_size == 1) {
274     const auto minmax = std::minmax_element(bias_data, bias_data + bias_size);
275     const float bias_half_range =
276         std::max(std::abs(*minmax.first), std::abs(*minmax.second));
277 
278     // Need to adjust weight min/max; not compatible with bias.
279     if (bias_half_range / kScale >= 0.5 * input_scale * weight_scales[0]) {
280       quant_params->min[0] =
281           2.0 * bias_half_range / kScale * (kMinQuantizedValue / input_scale);
282       quant_params->max[0] =
283           2.0 * bias_half_range / kScale * (kMaxQuantizedValue / input_scale);
284     }
285   }
286   return kTfLiteOk;
287 }
288 
289 // Per-channel quantize a tensor at the given index and fills both scales and
290 // quantized values.
SymmetricPerChannelQuantization(TensorT * tensor,const float * const input,int32_t channel_dim_index,std::vector<float> * output_scales,std::vector<int8_t> * output_value,ErrorReporter * error_reporter)291 TfLiteStatus SymmetricPerChannelQuantization(TensorT* tensor,
292                                              const float* const input,
293                                              int32_t channel_dim_index,
294                                              std::vector<float>* output_scales,
295                                              std::vector<int8_t>* output_value,
296                                              ErrorReporter* error_reporter) {
297   if (tensor == nullptr) {
298     TF_LITE_REPORT_ERROR(error_reporter, "Cannot quantize. Tensor is null.");
299     return kTfLiteError;
300   }
301   const int32_t channel_dim_size = tensor->shape[channel_dim_index];
302   // Fill per channel max and min values if needed
303   if (tensor->quantization == nullptr) {
304     tensor->quantization = absl::make_unique<QuantizationParametersT>();
305   }
306   if (!HasMinMax(tensor)) {
307     TF_LITE_ENSURE_STATUS(
308         FillPerChannelMinMax(input, tensor->shape, channel_dim_index,
309                              tensor->quantization.get(), error_reporter));
310   }
311 
312   // Calculate scales per channel using max and min values from tensor.
313   std::vector<float> scale_invs(channel_dim_size);
314   const float half_scale = kMaxQuantizedValue;
315   for (int channel_idx = 0; channel_idx < channel_dim_size; channel_idx++) {
316     const float half_range =
317         std::max(std::abs(tensor->quantization->min[channel_idx]),
318                  std::abs(tensor->quantization->max[channel_idx]));
319     output_scales->at(channel_idx) = half_range / half_scale;
320     if (half_range == 0) {
321       scale_invs[channel_idx] = 0;
322     } else {
323       scale_invs[channel_idx] = half_scale / half_range;
324     }
325   }
326 
327   // Quantize the input values.
328   SymmetricPerChannelQuantizeValues(input, scale_invs, tensor->shape,
329                                     channel_dim_index, output_value);
330   return kTfLiteOk;
331 }
332 
SymmetricQuantizeFloatsToInt16(const float * data,uint64_t num_elements,float scaling_factor)333 std::vector<int16_t> SymmetricQuantizeFloatsToInt16(const float* data,
334                                                     uint64_t num_elements,
335                                                     float scaling_factor) {
336   // Compute the inverse of scale.
337   const float scaling_factor_inv =
338       (scaling_factor == 0) ? 0 : 1.0 / scaling_factor;
339   std::vector<int16_t> buffer(num_elements);
340   const int32_t kScale = std::numeric_limits<int16_t>::max();
341 
342   for (size_t i = 0; i < num_elements; i++) {
343     const int32_t quantized_value =
344         static_cast<int32_t>(TfLiteRound(data[i] * scaling_factor_inv));
345     buffer[i] = std::min(kScale, std::max(-kScale, quantized_value));
346   }
347   return buffer;
348 }
349 
SymmetricQuantizeFloatsToInt16(ModelT * model,TensorT * tensor,float scaling_factor,ErrorReporter * error_reporter)350 TfLiteStatus SymmetricQuantizeFloatsToInt16(ModelT* model, TensorT* tensor,
351                                             float scaling_factor,
352                                             ErrorReporter* error_reporter) {
353   const BufferT* buffer = model->buffers[tensor->buffer].get();
354   const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
355   uint64_t num_elements;
356   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
357 
358   auto final_buffer =
359       SymmetricQuantizeFloatsToInt16(float_data, num_elements, scaling_factor);
360   // Set the buffers and output type.
361   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
362   size_t buffer_size = num_elements * sizeof(int16_t);
363   std::vector<float> scales(1, scaling_factor);
364   std::vector<int64_t> zero_points(1, 0);
365   return AddQuantizationParams(scales, zero_points, 0, uint8_buffer,
366                                buffer_size, TensorType_INT16, model, tensor,
367                                error_reporter);
368 }
369 
SymmetricPerChannelQuantizeValues(const float * const input,const std::vector<float> & scales_inv,const std::vector<int32_t> & dimension,int32_t channel_dim_index,std::vector<int8_t> * output_value)370 void SymmetricPerChannelQuantizeValues(const float* const input,
371                                        const std::vector<float>& scales_inv,
372                                        const std::vector<int32_t>& dimension,
373                                        int32_t channel_dim_index,
374                                        std::vector<int8_t>* output_value) {
375   // Quantize the values.
376   int indices[4];
377   RuntimeShape tensor_dims{dimension[0], dimension[1], dimension[2],
378                            dimension[3]};
379   for (indices[0] = 0; indices[0] < dimension[0]; indices[0]++) {
380     for (indices[1] = 0; indices[1] < dimension[1]; indices[1]++) {
381       for (indices[2] = 0; indices[2] < dimension[2]; indices[2]++) {
382         for (indices[3] = 0; indices[3] < dimension[3]; indices[3]++) {
383           int channel_idx = indices[channel_dim_index];
384           int index = Offset(tensor_dims, indices);
385           const float val = input[index];
386           const int32_t quantized_value =
387               static_cast<int32_t>(TfLiteRound(val * scales_inv[channel_idx]));
388           output_value->at(index) = std::min<int8_t>(
389               kMaxQuantizedValue,
390               std::max<int8_t>(kMinQuantizedValue, quantized_value));
391         }
392       }
393     }
394   }
395 }
396 
397 // Quantize the tensor using the max and min values recorded in its quantization
398 // parameters. Applies per-layer quantization.
SymmetricQuantizeTensorFromMinMax(ModelT * model,TensorT * tensor,ErrorReporter * error_reporter)399 TfLiteStatus SymmetricQuantizeTensorFromMinMax(ModelT* model, TensorT* tensor,
400                                                ErrorReporter* error_reporter) {
401   if (model == nullptr || tensor == nullptr) {
402     TF_LITE_REPORT_ERROR(error_reporter, "No tensor to quantize.");
403     return kTfLiteError;
404   }
405 
406   BufferT* buffer = model->buffers[tensor->buffer].get();
407   if (buffer == nullptr) {
408     TF_LITE_REPORT_ERROR(error_reporter, "Missing buffer.");
409     return kTfLiteError;
410   }
411 
412   if (!HasMinMax(tensor)) {
413     TF_LITE_REPORT_ERROR(error_reporter,
414                          "Missing min or max values for quantization.");
415     return kTfLiteError;
416   }
417   if (tensor->quantization->min.size() != 1 ||
418       tensor->quantization->max.size() != 1) {
419     TF_LITE_REPORT_ERROR(error_reporter,
420                          "Expected single entry in max and min.");
421     return kTfLiteError;
422   }
423 
424   const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
425   uint64_t num_elements;
426   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
427 
428   std::vector<int8_t> quantized_buffer;
429   quantized_buffer.resize(num_elements);
430 
431   // Quantize tensor using recorded min and max values
432   float scaling_factor;
433   tensor_utils::SymmetricQuantizeFloats(
434       float_data, num_elements, quantized_buffer.data(),
435       tensor->quantization->min[0], tensor->quantization->max[0],
436       &scaling_factor);
437   tensor->quantization->scale = std::vector<float>(1, scaling_factor);
438   tensor->quantization->zero_point = std::vector<int64_t>(1, 0);
439 
440   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(quantized_buffer.data());
441   model->buffers[tensor->buffer]->data.assign(uint8_buffer,
442                                               uint8_buffer + num_elements);
443   // Update the tensor type.
444   tensor->type = TensorType_INT8;
445 
446   return kTfLiteOk;
447 }
448 
SymmetricQuantizeTensor(ModelT * model,TensorT * tensor)449 TfLiteStatus SymmetricQuantizeTensor(ModelT* model, TensorT* tensor) {
450   if (model == nullptr || tensor == nullptr) {
451     TFLITE_LOG(TFLITE_LOG_ERROR, "No tensor to quantize.");
452     return kTfLiteError;
453   }
454 
455   BufferT* buffer = model->buffers[tensor->buffer].get();
456   if (buffer == nullptr) {
457     TFLITE_LOG(TFLITE_LOG_ERROR, "Missing buffer.");
458     return kTfLiteError;
459   }
460   const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
461   uint64_t num_elements;
462   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
463 
464   std::vector<int8_t> quantized_buffer;
465   quantized_buffer.resize(num_elements);
466 
467   float min_value, max_value, scaling_factor;
468   tensor_utils::SymmetricQuantizeFloats(float_data, num_elements,
469                                         quantized_buffer.data(), &min_value,
470                                         &max_value, &scaling_factor);
471 
472   if (tensor->quantization == nullptr) {
473     tensor->quantization = absl::make_unique<QuantizationParametersT>();
474   }
475   tensor->quantization->scale = std::vector<float>(1, scaling_factor);
476   tensor->quantization->zero_point = std::vector<int64_t>(1, 0);
477 
478   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(quantized_buffer.data());
479   model->buffers[tensor->buffer]->data.assign(uint8_buffer,
480                                               uint8_buffer + num_elements);
481 
482   // Update the tensor type.
483   tensor->type = TensorType_INT8;
484 
485   return kTfLiteOk;
486 }
487 
QuantizeTensorFloat16(ModelT * model,TensorT * tensor)488 TfLiteStatus QuantizeTensorFloat16(ModelT* model, TensorT* tensor) {
489   if (model == nullptr || tensor == nullptr) {
490     TFLITE_LOG(TFLITE_LOG_ERROR, "No tensor to quantize.");
491     return kTfLiteError;
492   }
493 
494   BufferT* buffer = model->buffers[tensor->buffer].get();
495   if (buffer == nullptr) {
496     TFLITE_LOG(TFLITE_LOG_ERROR, "Missing buffer.");
497     return kTfLiteError;
498   }
499 
500   uint64_t num_elements;
501   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
502 
503   // Copy single byte buffer data to float vector to guard against misalignment.
504   std::vector<float> float_vector(num_elements);
505   uint8_t* first = buffer->data.data();
506   std::copy(first, first + buffer->data.size(),
507             reinterpret_cast<uint8_t*>(float_vector.data()));
508 
509   // Transform float data to float16.
510   std::vector<Eigen::half> quantized_buffer;
511   quantized_buffer.resize(num_elements);
512   constexpr float kMaxFloat16Value = 65504.f;
513   constexpr float kMinFloat16Value = -65504.f;
514   std::transform(float_vector.begin(), float_vector.end(),
515                  quantized_buffer.begin(), [=](float a) {
516                    float clamped = std::min(std::max(a, kMinFloat16Value),
517                                             kMaxFloat16Value);
518                    return Eigen::half_impl::float_to_half_rtne(clamped);
519                  });
520 
521   char* half_buffer = reinterpret_cast<char*>(quantized_buffer.data());
522   model->buffers[tensor->buffer]->data.assign(
523       half_buffer, half_buffer + sizeof(Eigen::half) * num_elements);
524 
525   // Update the tensor type.
526   tensor->type = TensorType_FLOAT16;
527 
528   return kTfLiteOk;
529 }
530 
AddQuantizationParams(const std::vector<float> & scales,const std::vector<int64_t> & zero_point,int quantized_dimension,const uint8_t * buffer_data,size_t buffer_size,TensorType output_type,ModelT * model,TensorT * tensor,ErrorReporter * error_reporter)531 TfLiteStatus AddQuantizationParams(const std::vector<float>& scales,
532                                    const std::vector<int64_t>& zero_point,
533                                    int quantized_dimension,
534                                    const uint8_t* buffer_data,
535                                    size_t buffer_size, TensorType output_type,
536                                    ModelT* model, TensorT* tensor,
537                                    ErrorReporter* error_reporter) {
538   if (tensor->quantization == nullptr) {
539     tensor->quantization = absl::make_unique<QuantizationParametersT>();
540   }
541   tensor->quantization->scale.assign(scales.begin(), scales.end());
542   if (zero_point.size() != scales.size()) {
543     TF_LITE_REPORT_ERROR(
544         error_reporter,
545         "Received zero_point of size %d and scales of size %d. "
546         "These sizes should match.",
547         zero_point.size(), scales.size());
548     return kTfLiteError;
549   }
550   tensor->quantization->zero_point.assign(zero_point.begin(), zero_point.end());
551   tensor->quantization->quantized_dimension = quantized_dimension;
552   model->buffers[tensor->buffer]->data.assign(buffer_data,
553                                               buffer_data + buffer_size);
554   // Update the tensor type.
555   tensor->type = output_type;
556   return kTfLiteOk;
557 }
558 
SymmetricQuantizeTensorPerChannel(ModelT * model,TensorT * tensor,int32_t channel_dim_index,ErrorReporter * error_reporter)559 TfLiteStatus SymmetricQuantizeTensorPerChannel(ModelT* model, TensorT* tensor,
560                                                int32_t channel_dim_index,
561                                                ErrorReporter* error_reporter) {
562   if (tensor->shape.size() != 4) {
563     TF_LITE_REPORT_ERROR(
564         error_reporter,
565         "SymmetricQuantizeTensorPerChannel requires tensor with four "
566         "dimensions, but got %d dimension(s).",
567         tensor->shape.size());
568     return kTfLiteError;
569   }
570 
571   // Get dimensions.
572   uint64_t num_elements;
573   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
574   const int32_t channel_dim_size = tensor->shape[channel_dim_index];
575 
576   // Get input float data.
577   const BufferT* buffer = model->buffers[tensor->buffer].get();
578   const float* float_input_data =
579       reinterpret_cast<const float*>(buffer->data.data());
580 
581   // Create container for output scale and output data.
582   std::vector<float> scales(channel_dim_size);
583   std::vector<int8_t> final_buffer(num_elements);
584 
585   // Quantize the input data with respect to channel_dim_index.
586   TF_LITE_ENSURE_STATUS(SymmetricPerChannelQuantization(
587       tensor, float_input_data, channel_dim_index, &scales, &final_buffer,
588       error_reporter));
589 
590   // Set the buffers and output type.
591   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
592   const size_t buffer_size = num_elements * sizeof(int8_t);
593   std::vector<int64_t> zero_point(scales.size(), 0);
594   return AddQuantizationParams(scales, zero_point, channel_dim_index,
595                                uint8_buffer, buffer_size, TensorType_INT8,
596                                model, tensor, error_reporter);
597 }
598 
599 template <class BiasType>
SymmetricBiasQuantize(const float * data,uint64_t num_elements,const std::vector<float> & scales)600 std::vector<BiasType> SymmetricBiasQuantize(const float* data,
601                                             uint64_t num_elements,
602                                             const std::vector<float>& scales) {
603   std::vector<BiasType> buffer(num_elements);
604   const BiasType kScale = std::numeric_limits<BiasType>::max();
605   float scaling_factor_inv_per_layer = (scales[0] == 0) ? 0 : 1.0 / scales[0];
606 
607   for (int32_t idx = 0; idx < num_elements; idx++) {
608     float scaling_factor_inv =
609         scales.size() == 1 ? scaling_factor_inv_per_layer
610                            : ((scales[idx] == 0) ? 0 : 1.0 / scales[idx]);
611     const BiasType quantized_value =
612         tflite::SafeCast<BiasType>(TfLiteRound(data[idx] * scaling_factor_inv));
613     buffer[idx] = std::min(kScale, std::max(-kScale, quantized_value));
614   }
615   return buffer;
616 }
617 
618 template std::vector<std::int32_t> SymmetricBiasQuantize<std::int32_t>(
619     const float* data, uint64_t num_elements, const std::vector<float>& scales);
620 
621 template <class BiasType>
SymmetricPerLayerBiasQuantize(ModelT * model,TensorT * tensor,float scaling_factor,ErrorReporter * error_reporter)622 TfLiteStatus SymmetricPerLayerBiasQuantize(ModelT* model, TensorT* tensor,
623                                            float scaling_factor,
624                                            ErrorReporter* error_reporter) {
625   const BufferT* buffer = model->buffers[tensor->buffer].get();
626   const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
627   uint64_t num_elements;
628   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
629 
630   auto final_buffer = SymmetricBiasQuantize<BiasType>(float_data, num_elements,
631                                                       {scaling_factor});
632 
633   // Set the buffers and output type.
634   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
635   size_t buffer_size = num_elements * sizeof(BiasType);
636   std::vector<float> scales(1, scaling_factor);
637   std::vector<int64_t> zero_points(1, 0);
638 
639   auto output_type = std::is_same<BiasType, std::int32_t>::value
640                          ? TensorType_INT32
641                          : TensorType_INT64;
642   return AddQuantizationParams(scales, zero_points, 0, uint8_buffer,
643                                buffer_size, output_type, model, tensor,
644                                error_reporter);
645 }
646 
647 template TfLiteStatus SymmetricPerLayerBiasQuantize<std::int32_t>(
648     ModelT* model, TensorT* tensor, float scaling_factor,
649     ErrorReporter* error_reporter);
650 
651 template TfLiteStatus SymmetricPerLayerBiasQuantize<std::int64_t>(
652     ModelT* model, TensorT* tensor, float scaling_factor,
653     ErrorReporter* error_reporter);
654 
655 template <class BiasType>
SymmetricPerChannelBiasQuantize(ModelT * model,TensorT * tensor,float input_scale,const float * weight_scales,int number_of_dimension,ErrorReporter * error_reporter)656 TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor,
657                                              float input_scale,
658                                              const float* weight_scales,
659                                              int number_of_dimension,
660                                              ErrorReporter* error_reporter) {
661   // Compute scales.
662   std::vector<float> scales(number_of_dimension);
663   for (int i = 0; i < number_of_dimension; i++) {
664     scales[i] = input_scale * weight_scales[i];
665   }
666 
667   const BufferT* buffer = model->buffers[tensor->buffer].get();
668   const float* float_data = reinterpret_cast<const float*>(buffer->data.data());
669   uint64_t num_elements;
670   TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements));
671 
672   auto final_buffer =
673       SymmetricBiasQuantize<BiasType>(float_data, num_elements, scales);
674 
675   // Set the buffers and output type.
676   uint8_t* uint8_buffer = reinterpret_cast<uint8_t*>(final_buffer.data());
677   size_t buffer_size = num_elements * sizeof(BiasType);
678   std::vector<int64_t> zero_point(scales.size(), 0);
679 
680   auto output_type = std::is_same<BiasType, std::int32_t>::value
681                          ? TensorType_INT32
682                          : TensorType_INT64;
683   return AddQuantizationParams(scales, zero_point, 0, uint8_buffer, buffer_size,
684                                output_type, model, tensor, error_reporter);
685 }
686 
687 template TfLiteStatus SymmetricPerChannelBiasQuantize<std::int64_t>(
688     ModelT* model, TensorT* tensor, float input_scale,
689     const float* weight_scales, int number_of_dimension,
690     ErrorReporter* error_reporter);
691 
692 template TfLiteStatus SymmetricPerChannelBiasQuantize<std::int32_t>(
693     ModelT* model, TensorT* tensor, float input_scale,
694     const float* weight_scales, int number_of_dimension,
695     ErrorReporter* error_reporter);
696 
QuantizeWeight(ModelT * model,TensorT * tensor,bool per_channel,int per_axis_index,ErrorReporter * error_reporter)697 TfLiteStatus QuantizeWeight(ModelT* model, TensorT* tensor, bool per_channel,
698                             int per_axis_index, ErrorReporter* error_reporter) {
699   // TODO(suharshs): Currently we conflate quantizing weights and constants. Its
700   // possible that the right thing to do is asymmetric quantize the weight. Add
701   // support for this.
702   if (per_channel) {
703     return SymmetricQuantizeTensorPerChannel(model, tensor, per_axis_index,
704                                              error_reporter);
705   } else if (HasMinMax(tensor)) {
706     // Quantize using recorded min/max values.
707     return SymmetricQuantizeTensorFromMinMax(model, tensor, error_reporter);
708   } else {
709     // Quantize using min/max from buffer.
710     return SymmetricQuantizeTensor(model, tensor);
711   }
712 }
713 
GetEffectiveScale(ModelT * model,SubGraphT * subgraph,int op_idx,std::vector<int> input_index,std::vector<int> intermediate_index,std::vector<float> factors)714 float GetEffectiveScale(ModelT* model, SubGraphT* subgraph, int op_idx,
715                         std::vector<int> input_index,
716                         std::vector<int> intermediate_index,
717                         std::vector<float> factors) {
718   float scale = 1.0f;
719   OperatorT* op = subgraph->operators[op_idx].get();
720   for (int i = 0, end = input_index.size(); i < end; ++i) {
721     const int index_local = input_index[i];
722     const int index_global = op->inputs[index_local];
723     const TensorT* tensor = subgraph->tensors[index_global].get();
724     scale *= tensor->quantization->scale[0];
725   }
726   for (int i = 0, end = intermediate_index.size(); i < end; ++i) {
727     const int index_local = intermediate_index[i];
728     const int index_global = op->intermediates[index_local];
729     const TensorT* tensor = subgraph->tensors[index_global].get();
730     scale *= tensor->quantization->scale[0];
731   }
732   for (int i = 0, end = factors.size(); i < end; ++i) {
733     scale *= factors[i];
734   }
735   return scale;
736 }
737 
QuantizeActivation(TensorT * tensor,TensorType activations_type,ErrorReporter * error_reporter)738 TfLiteStatus QuantizeActivation(TensorT* tensor, TensorType activations_type,
739                                 ErrorReporter* error_reporter) {
740   TF_LITE_ENSURE_STATUS(GetQuantizationParams(
741       tensor, activations_type, tensor->quantization.get(), error_reporter));
742   tensor->type = activations_type;
743   return kTfLiteOk;
744 }
745 
QuantizeActivationToInt16(TensorT * tensor,float scale)746 TfLiteStatus QuantizeActivationToInt16(TensorT* tensor, float scale) {
747   const int32 zero_point = 0;
748   tensor->quantization = absl::make_unique<QuantizationParametersT>();
749   tensor->quantization->scale.push_back(scale);
750   tensor->quantization->zero_point.push_back(zero_point);
751   tensor->type = TensorType_INT16;
752   return kTfLiteOk;
753 }
754 
GetPowerOfTwoScale(float min,float max)755 int GetPowerOfTwoScale(float min, float max) {
756   const float range = std::max(std::abs(min), std::abs(max));
757   int pot = 0;
758   for (int i = 0; i < 10; i++) {
759     if (std::pow(2, pot) < range) {
760       pot++;
761     }
762   }
763   return pot;
764 }
765 
766 }  // namespace utils
767 }  // namespace optimize
768 }  // namespace tflite
769