/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include "tensorflow/lite/tools/optimize/quantization_utils.h" #include #include #include #include #include #include "absl/memory/memory.h" #include "third_party/eigen3/Eigen/Core" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/core/api/error_reporter.h" #include "tensorflow/lite/kernels/internal/cppmath.h" #include "tensorflow/lite/kernels/internal/quantization_util.h" #include "tensorflow/lite/kernels/internal/tensor_utils.h" #include "tensorflow/lite/kernels/internal/types.h" #include "tensorflow/lite/minimal_logging.h" #include "tensorflow/lite/schema/schema_generated.h" #include "tensorflow/lite/tools/optimize/model_utils.h" namespace tflite { namespace optimize { namespace utils { namespace { const int8_t kMinQuantizedValue = -127; const int8_t kMaxQuantizedValue = 127; } // namespace TfLiteStatus NumElements(const TensorT& tensor, uint64_t* num_elements) { *num_elements = 1; for (const int64_t dim : tensor.shape) { if (dim <= 0 || *num_elements > UINT64_MAX / static_cast(dim)) { return kTfLiteError; } *num_elements *= dim; } return kTfLiteOk; } // Nudge min and max so that floating point 0 falls exactly on a quantized // value, returning the nudges scale and zero_point. // // Although this code originates from FakeQuantization in quantized training, // we may deviate from that implementation as we please since we do not fine // tune the weights with quantized training. void GetAsymmetricQuantizationParams( float min, float max, const int quant_min, const int quant_max, QuantizationParametersT* quantization_params) { const float quant_min_float = static_cast(quant_min); const float quant_max_float = static_cast(quant_max); // Adjust the boundaries to guarantee 0 is included. min = std::min(static_cast(min), 0.0f); max = std::max(static_cast(max), 0.0f); const float scale = (max - min) / (quant_max_float - quant_min_float); // Scale can be zero if min and max are exactly 0.0f. float zero_point_from_min = quant_min_float; if (scale != 0) { zero_point_from_min = quant_min_float - min / scale; } int64_t zero_point; if (zero_point_from_min < quant_min_float) { zero_point = static_cast(quant_min); } else if (zero_point_from_min > quant_max_float) { zero_point = static_cast(quant_max); } else { zero_point = static_cast(std::round(zero_point_from_min)); } quantization_params->min = std::vector(1, min); quantization_params->max = std::vector(1, max); quantization_params->scale = std::vector(1, scale); quantization_params->zero_point = std::vector(1, zero_point); } void GetSymmetricQuantizationParams( float min, float max, const int half_quant_range, QuantizationParametersT* quantization_params) { // Adjust the boundaries to guarantee 0 is included. min = std::min(min, 0.0f); max = std::max(max, 0.0f); const float scale = std::max(std::abs(max), std::abs(min)) / half_quant_range; quantization_params->min = std::vector(1, min); quantization_params->max = std::vector(1, max); quantization_params->scale = std::vector(1, scale); quantization_params->zero_point = std::vector(1, 0); } TfLiteStatus GetQuantizationParams(TensorT* tensor, TensorType activations_type, QuantizationParametersT* quantization_params, ErrorReporter* error_reporter) { if (activations_type == TensorType_INT8) { GetAsymmetricQuantizationParams( tensor->quantization->min[0], tensor->quantization->max[0], std::numeric_limits::min(), std::numeric_limits::max(), quantization_params); } else if (activations_type == TensorType_INT16) { const int half_quantized_range = 32767; GetSymmetricQuantizationParams(tensor->quantization->min[0], tensor->quantization->max[0], half_quantized_range, quantization_params); } else { TF_LITE_REPORT_ERROR( error_reporter, "Unsupported activation type for quantize-activation: %d", activations_type); return kTfLiteError; } return kTfLiteOk; } // Set the max and min quantization parameter for a single tensor given its // values. void FillSingleMinMax(const float* const input, const uint64_t input_size, QuantizationParametersT* quantization_params) { const auto minmax = std::minmax_element(input, input + input_size); quantization_params->min.assign(1, *minmax.first); quantization_params->max.assign(1, *minmax.second); } TfLiteStatus FillPerChannelMinMax(const float* const input, const std::vector& dimension, int32_t channel_dim_index, QuantizationParametersT* quantization_params, ErrorReporter* error_reporter) { if (!quantization_params->min.empty() || !quantization_params->max.empty()) { TF_LITE_REPORT_ERROR( error_reporter, "Min or max already present in tensor quantization params."); return kTfLiteError; } if (dimension.size() != 4) { TF_LITE_REPORT_ERROR(error_reporter, "Expected tensor with four dimensions, but got %d.", dimension.size()); return kTfLiteError; } if (channel_dim_index > 3) { TF_LITE_REPORT_ERROR( error_reporter, "Expected channel_dim_index to be less than four, but got %d.", channel_dim_index); return kTfLiteError; } const int32_t channel_dim_size = dimension[channel_dim_index]; quantization_params->quantized_dimension = channel_dim_index; quantization_params->min = std::vector(channel_dim_size); quantization_params->max = std::vector(channel_dim_size); std::vector has_min_max_value(channel_dim_size, false); int indices[4]; RuntimeShape tensor_dims{dimension[0], dimension[1], dimension[2], dimension[3]}; // Compute min max ranges per channel for (indices[0] = 0; indices[0] < dimension[0]; indices[0]++) { for (indices[1] = 0; indices[1] < dimension[1]; indices[1]++) { for (indices[2] = 0; indices[2] < dimension[2]; indices[2]++) { for (indices[3] = 0; indices[3] < dimension[3]; indices[3]++) { int channel_idx = indices[channel_dim_index]; const float val = input[Offset(tensor_dims, indices)]; if (has_min_max_value[channel_idx]) { if (quantization_params->min[channel_idx] > val) { quantization_params->min[channel_idx] = val; } else if (quantization_params->max[channel_idx] < val) { quantization_params->max[channel_idx] = val; } } else { quantization_params->min[channel_idx] = val; quantization_params->max[channel_idx] = val; has_min_max_value[channel_idx] = true; } } } } } return kTfLiteOk; } // Populates the scales vector based on max and min values of quant_params TfLiteStatus GetSymmetricScalesFromMaxMin(QuantizationParametersT* quant_params, std::vector* scales, ErrorReporter* error_reporter) { // Check that max and min values are present and their sizes match. if (quant_params->min.empty() || quant_params->max.empty()) { TF_LITE_REPORT_ERROR(error_reporter, "Max and min values are not populated."); return kTfLiteError; } if (quant_params->min.size() != quant_params->max.size()) { TF_LITE_REPORT_ERROR(error_reporter, "Dimensions of max and min values do not match."); return kTfLiteError; } if (scales->size() != quant_params->min.size()) { TF_LITE_REPORT_ERROR(error_reporter, "Provided scale vector has incorrect size."); return kTfLiteError; } // num_channels is calculated from min.size() to infer whether quantization // is per axis. int num_channels = quant_params->min.size(); // Calculate scales per channel. for (int channel_idx = 0; channel_idx < num_channels; ++channel_idx) { const float half_range = std::max(std::abs(quant_params->min[channel_idx]), std::abs(quant_params->max[channel_idx])); scales->at(channel_idx) = half_range / kMaxQuantizedValue; } return kTfLiteOk; } // Checks that the bias is quantized to within the middle half of the // allowable bit range determined by the scales of the input and weight tensors. // If this condition is not satisfied, the scale of the weights is increased in // order to prevent overflow. The scale of the bias is not set here, only the // min/max. // The quant_params are the quantization parameters that correspond to the // weight tensor. TfLiteStatus AdjustWeightsForBiasScale(QuantizationParametersT* quant_params, const float* bias_data, const size_t bias_size, const float input_scale, ErrorReporter* error_reporter) { // TODO(dmolitor) Allow adjusting activation scale. // TODO(dmolitor) Tighten scale adjustment. // TODO(dmolitor) Test using a separate strategy for scales of 0. const int32_t kScale = std::numeric_limits::max(); if (quant_params == nullptr) { TF_LITE_REPORT_ERROR(error_reporter, "Missing max and min values for weight tensor."); return kTfLiteError; } // channel_dim_size is calculated from min.size() to infer whether // quantization is per axis int channel_dim_size = quant_params->min.size(); if (channel_dim_size == 0) { TF_LITE_REPORT_ERROR( error_reporter, "Missing weight scales. Unable to check compatibility with bias " "scale."); return kTfLiteError; } std::vector weight_scales(channel_dim_size); TF_LITE_ENSURE_STATUS(GetSymmetricScalesFromMaxMin( quant_params, &weight_scales, error_reporter)); // Per channel quantization if (channel_dim_size > 1) { for (int i = 0; i < channel_dim_size; ++i) { // Current scale is not compatible with bias. Adjust max/min values. if (std::abs(bias_data[i]) >= 0.5 * input_scale * weight_scales[i] * kScale) { quant_params->max[i] = 2.0 * std::abs(bias_data[i]) / kScale * (kMaxQuantizedValue / input_scale); quant_params->min[i] = -quant_params->max[i]; } } // Per layer quantization } else if (channel_dim_size == 1) { const auto minmax = std::minmax_element(bias_data, bias_data + bias_size); const float bias_half_range = std::max(std::abs(*minmax.first), std::abs(*minmax.second)); // Need to adjust weight min/max; not compatible with bias. if (bias_half_range / kScale >= 0.5 * input_scale * weight_scales[0]) { quant_params->min[0] = 2.0 * bias_half_range / kScale * (kMinQuantizedValue / input_scale); quant_params->max[0] = 2.0 * bias_half_range / kScale * (kMaxQuantizedValue / input_scale); } } return kTfLiteOk; } // Per-channel quantize a tensor at the given index and fills both scales and // quantized values. TfLiteStatus SymmetricPerChannelQuantization(TensorT* tensor, const float* const input, int32_t channel_dim_index, std::vector* output_scales, std::vector* output_value, ErrorReporter* error_reporter) { if (tensor == nullptr) { TF_LITE_REPORT_ERROR(error_reporter, "Cannot quantize. Tensor is null."); return kTfLiteError; } const int32_t channel_dim_size = tensor->shape[channel_dim_index]; // Fill per channel max and min values if needed if (tensor->quantization == nullptr) { tensor->quantization = absl::make_unique(); } if (!HasMinMax(tensor)) { TF_LITE_ENSURE_STATUS( FillPerChannelMinMax(input, tensor->shape, channel_dim_index, tensor->quantization.get(), error_reporter)); } // Calculate scales per channel using max and min values from tensor. std::vector scale_invs(channel_dim_size); const float half_scale = kMaxQuantizedValue; for (int channel_idx = 0; channel_idx < channel_dim_size; channel_idx++) { const float half_range = std::max(std::abs(tensor->quantization->min[channel_idx]), std::abs(tensor->quantization->max[channel_idx])); output_scales->at(channel_idx) = half_range / half_scale; if (half_range == 0) { scale_invs[channel_idx] = 0; } else { scale_invs[channel_idx] = half_scale / half_range; } } // Quantize the input values. SymmetricPerChannelQuantizeValues(input, scale_invs, tensor->shape, channel_dim_index, output_value); return kTfLiteOk; } std::vector SymmetricQuantizeFloatsToInt16(const float* data, uint64_t num_elements, float scaling_factor) { // Compute the inverse of scale. const float scaling_factor_inv = (scaling_factor == 0) ? 0 : 1.0 / scaling_factor; std::vector buffer(num_elements); const int32_t kScale = std::numeric_limits::max(); for (size_t i = 0; i < num_elements; i++) { const int32_t quantized_value = static_cast(TfLiteRound(data[i] * scaling_factor_inv)); buffer[i] = std::min(kScale, std::max(-kScale, quantized_value)); } return buffer; } TfLiteStatus SymmetricQuantizeFloatsToInt16(ModelT* model, TensorT* tensor, float scaling_factor, ErrorReporter* error_reporter) { const BufferT* buffer = model->buffers[tensor->buffer].get(); const float* float_data = reinterpret_cast(buffer->data.data()); uint64_t num_elements; TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements)); auto final_buffer = SymmetricQuantizeFloatsToInt16(float_data, num_elements, scaling_factor); // Set the buffers and output type. uint8_t* uint8_buffer = reinterpret_cast(final_buffer.data()); size_t buffer_size = num_elements * sizeof(int16_t); std::vector scales(1, scaling_factor); std::vector zero_points(1, 0); return AddQuantizationParams(scales, zero_points, 0, uint8_buffer, buffer_size, TensorType_INT16, model, tensor, error_reporter); } void SymmetricPerChannelQuantizeValues(const float* const input, const std::vector& scales_inv, const std::vector& dimension, int32_t channel_dim_index, std::vector* output_value) { // Quantize the values. int indices[4]; RuntimeShape tensor_dims{dimension[0], dimension[1], dimension[2], dimension[3]}; for (indices[0] = 0; indices[0] < dimension[0]; indices[0]++) { for (indices[1] = 0; indices[1] < dimension[1]; indices[1]++) { for (indices[2] = 0; indices[2] < dimension[2]; indices[2]++) { for (indices[3] = 0; indices[3] < dimension[3]; indices[3]++) { int channel_idx = indices[channel_dim_index]; int index = Offset(tensor_dims, indices); const float val = input[index]; const int32_t quantized_value = static_cast(TfLiteRound(val * scales_inv[channel_idx])); output_value->at(index) = std::min( kMaxQuantizedValue, std::max(kMinQuantizedValue, quantized_value)); } } } } } // Quantize the tensor using the max and min values recorded in its quantization // parameters. Applies per-layer quantization. TfLiteStatus SymmetricQuantizeTensorFromMinMax(ModelT* model, TensorT* tensor, ErrorReporter* error_reporter) { if (model == nullptr || tensor == nullptr) { TF_LITE_REPORT_ERROR(error_reporter, "No tensor to quantize."); return kTfLiteError; } BufferT* buffer = model->buffers[tensor->buffer].get(); if (buffer == nullptr) { TF_LITE_REPORT_ERROR(error_reporter, "Missing buffer."); return kTfLiteError; } if (!HasMinMax(tensor)) { TF_LITE_REPORT_ERROR(error_reporter, "Missing min or max values for quantization."); return kTfLiteError; } if (tensor->quantization->min.size() != 1 || tensor->quantization->max.size() != 1) { TF_LITE_REPORT_ERROR(error_reporter, "Expected single entry in max and min."); return kTfLiteError; } const float* float_data = reinterpret_cast(buffer->data.data()); uint64_t num_elements; TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements)); std::vector quantized_buffer; quantized_buffer.resize(num_elements); // Quantize tensor using recorded min and max values float scaling_factor; tensor_utils::SymmetricQuantizeFloats( float_data, num_elements, quantized_buffer.data(), tensor->quantization->min[0], tensor->quantization->max[0], &scaling_factor); tensor->quantization->scale = std::vector(1, scaling_factor); tensor->quantization->zero_point = std::vector(1, 0); uint8_t* uint8_buffer = reinterpret_cast(quantized_buffer.data()); model->buffers[tensor->buffer]->data.assign(uint8_buffer, uint8_buffer + num_elements); // Update the tensor type. tensor->type = TensorType_INT8; return kTfLiteOk; } TfLiteStatus SymmetricQuantizeTensor(ModelT* model, TensorT* tensor) { if (model == nullptr || tensor == nullptr) { TFLITE_LOG(TFLITE_LOG_ERROR, "No tensor to quantize."); return kTfLiteError; } BufferT* buffer = model->buffers[tensor->buffer].get(); if (buffer == nullptr) { TFLITE_LOG(TFLITE_LOG_ERROR, "Missing buffer."); return kTfLiteError; } const float* float_data = reinterpret_cast(buffer->data.data()); uint64_t num_elements; TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements)); std::vector quantized_buffer; quantized_buffer.resize(num_elements); float min_value, max_value, scaling_factor; tensor_utils::SymmetricQuantizeFloats(float_data, num_elements, quantized_buffer.data(), &min_value, &max_value, &scaling_factor); if (tensor->quantization == nullptr) { tensor->quantization = absl::make_unique(); } tensor->quantization->scale = std::vector(1, scaling_factor); tensor->quantization->zero_point = std::vector(1, 0); uint8_t* uint8_buffer = reinterpret_cast(quantized_buffer.data()); model->buffers[tensor->buffer]->data.assign(uint8_buffer, uint8_buffer + num_elements); // Update the tensor type. tensor->type = TensorType_INT8; return kTfLiteOk; } TfLiteStatus QuantizeTensorFloat16(ModelT* model, TensorT* tensor) { if (model == nullptr || tensor == nullptr) { TFLITE_LOG(TFLITE_LOG_ERROR, "No tensor to quantize."); return kTfLiteError; } BufferT* buffer = model->buffers[tensor->buffer].get(); if (buffer == nullptr) { TFLITE_LOG(TFLITE_LOG_ERROR, "Missing buffer."); return kTfLiteError; } uint64_t num_elements; TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements)); // Copy single byte buffer data to float vector to guard against misalignment. std::vector float_vector(num_elements); uint8_t* first = buffer->data.data(); std::copy(first, first + buffer->data.size(), reinterpret_cast(float_vector.data())); // Transform float data to float16. std::vector quantized_buffer; quantized_buffer.resize(num_elements); constexpr float kMaxFloat16Value = 65504.f; constexpr float kMinFloat16Value = -65504.f; std::transform(float_vector.begin(), float_vector.end(), quantized_buffer.begin(), [=](float a) { float clamped = std::min(std::max(a, kMinFloat16Value), kMaxFloat16Value); return Eigen::half_impl::float_to_half_rtne(clamped); }); char* half_buffer = reinterpret_cast(quantized_buffer.data()); model->buffers[tensor->buffer]->data.assign( half_buffer, half_buffer + sizeof(Eigen::half) * num_elements); // Update the tensor type. tensor->type = TensorType_FLOAT16; return kTfLiteOk; } TfLiteStatus AddQuantizationParams(const std::vector& scales, const std::vector& zero_point, int quantized_dimension, const uint8_t* buffer_data, size_t buffer_size, TensorType output_type, ModelT* model, TensorT* tensor, ErrorReporter* error_reporter) { if (tensor->quantization == nullptr) { tensor->quantization = absl::make_unique(); } tensor->quantization->scale.assign(scales.begin(), scales.end()); if (zero_point.size() != scales.size()) { TF_LITE_REPORT_ERROR( error_reporter, "Received zero_point of size %d and scales of size %d. " "These sizes should match.", zero_point.size(), scales.size()); return kTfLiteError; } tensor->quantization->zero_point.assign(zero_point.begin(), zero_point.end()); tensor->quantization->quantized_dimension = quantized_dimension; model->buffers[tensor->buffer]->data.assign(buffer_data, buffer_data + buffer_size); // Update the tensor type. tensor->type = output_type; return kTfLiteOk; } TfLiteStatus SymmetricQuantizeTensorPerChannel(ModelT* model, TensorT* tensor, int32_t channel_dim_index, ErrorReporter* error_reporter) { if (tensor->shape.size() != 4) { TF_LITE_REPORT_ERROR( error_reporter, "SymmetricQuantizeTensorPerChannel requires tensor with four " "dimensions, but got %d dimension(s).", tensor->shape.size()); return kTfLiteError; } // Get dimensions. uint64_t num_elements; TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements)); const int32_t channel_dim_size = tensor->shape[channel_dim_index]; // Get input float data. const BufferT* buffer = model->buffers[tensor->buffer].get(); const float* float_input_data = reinterpret_cast(buffer->data.data()); // Create container for output scale and output data. std::vector scales(channel_dim_size); std::vector final_buffer(num_elements); // Quantize the input data with respect to channel_dim_index. TF_LITE_ENSURE_STATUS(SymmetricPerChannelQuantization( tensor, float_input_data, channel_dim_index, &scales, &final_buffer, error_reporter)); // Set the buffers and output type. uint8_t* uint8_buffer = reinterpret_cast(final_buffer.data()); const size_t buffer_size = num_elements * sizeof(int8_t); std::vector zero_point(scales.size(), 0); return AddQuantizationParams(scales, zero_point, channel_dim_index, uint8_buffer, buffer_size, TensorType_INT8, model, tensor, error_reporter); } template std::vector SymmetricBiasQuantize(const float* data, uint64_t num_elements, const std::vector& scales) { std::vector buffer(num_elements); const BiasType kScale = std::numeric_limits::max(); float scaling_factor_inv_per_layer = (scales[0] == 0) ? 0 : 1.0 / scales[0]; for (int32_t idx = 0; idx < num_elements; idx++) { float scaling_factor_inv = scales.size() == 1 ? scaling_factor_inv_per_layer : ((scales[idx] == 0) ? 0 : 1.0 / scales[idx]); const BiasType quantized_value = tflite::SafeCast(TfLiteRound(data[idx] * scaling_factor_inv)); buffer[idx] = std::min(kScale, std::max(-kScale, quantized_value)); } return buffer; } template std::vector SymmetricBiasQuantize( const float* data, uint64_t num_elements, const std::vector& scales); template TfLiteStatus SymmetricPerLayerBiasQuantize(ModelT* model, TensorT* tensor, float scaling_factor, ErrorReporter* error_reporter) { const BufferT* buffer = model->buffers[tensor->buffer].get(); const float* float_data = reinterpret_cast(buffer->data.data()); uint64_t num_elements; TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements)); auto final_buffer = SymmetricBiasQuantize(float_data, num_elements, {scaling_factor}); // Set the buffers and output type. uint8_t* uint8_buffer = reinterpret_cast(final_buffer.data()); size_t buffer_size = num_elements * sizeof(BiasType); std::vector scales(1, scaling_factor); std::vector zero_points(1, 0); auto output_type = std::is_same::value ? TensorType_INT32 : TensorType_INT64; return AddQuantizationParams(scales, zero_points, 0, uint8_buffer, buffer_size, output_type, model, tensor, error_reporter); } template TfLiteStatus SymmetricPerLayerBiasQuantize( ModelT* model, TensorT* tensor, float scaling_factor, ErrorReporter* error_reporter); template TfLiteStatus SymmetricPerLayerBiasQuantize( ModelT* model, TensorT* tensor, float scaling_factor, ErrorReporter* error_reporter); template TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor, float input_scale, const float* weight_scales, int number_of_dimension, ErrorReporter* error_reporter) { // Compute scales. std::vector scales(number_of_dimension); for (int i = 0; i < number_of_dimension; i++) { scales[i] = input_scale * weight_scales[i]; } const BufferT* buffer = model->buffers[tensor->buffer].get(); const float* float_data = reinterpret_cast(buffer->data.data()); uint64_t num_elements; TF_LITE_ENSURE_STATUS(NumElements(*tensor, &num_elements)); auto final_buffer = SymmetricBiasQuantize(float_data, num_elements, scales); // Set the buffers and output type. uint8_t* uint8_buffer = reinterpret_cast(final_buffer.data()); size_t buffer_size = num_elements * sizeof(BiasType); std::vector zero_point(scales.size(), 0); auto output_type = std::is_same::value ? TensorType_INT32 : TensorType_INT64; return AddQuantizationParams(scales, zero_point, 0, uint8_buffer, buffer_size, output_type, model, tensor, error_reporter); } template TfLiteStatus SymmetricPerChannelBiasQuantize( ModelT* model, TensorT* tensor, float input_scale, const float* weight_scales, int number_of_dimension, ErrorReporter* error_reporter); template TfLiteStatus SymmetricPerChannelBiasQuantize( ModelT* model, TensorT* tensor, float input_scale, const float* weight_scales, int number_of_dimension, ErrorReporter* error_reporter); TfLiteStatus QuantizeWeight(ModelT* model, TensorT* tensor, bool per_channel, int per_axis_index, ErrorReporter* error_reporter) { // TODO(suharshs): Currently we conflate quantizing weights and constants. Its // possible that the right thing to do is asymmetric quantize the weight. Add // support for this. if (per_channel) { return SymmetricQuantizeTensorPerChannel(model, tensor, per_axis_index, error_reporter); } else if (HasMinMax(tensor)) { // Quantize using recorded min/max values. return SymmetricQuantizeTensorFromMinMax(model, tensor, error_reporter); } else { // Quantize using min/max from buffer. return SymmetricQuantizeTensor(model, tensor); } } float GetEffectiveScale(ModelT* model, SubGraphT* subgraph, int op_idx, std::vector input_index, std::vector intermediate_index, std::vector factors) { float scale = 1.0f; OperatorT* op = subgraph->operators[op_idx].get(); for (int i = 0, end = input_index.size(); i < end; ++i) { const int index_local = input_index[i]; const int index_global = op->inputs[index_local]; const TensorT* tensor = subgraph->tensors[index_global].get(); scale *= tensor->quantization->scale[0]; } for (int i = 0, end = intermediate_index.size(); i < end; ++i) { const int index_local = intermediate_index[i]; const int index_global = op->intermediates[index_local]; const TensorT* tensor = subgraph->tensors[index_global].get(); scale *= tensor->quantization->scale[0]; } for (int i = 0, end = factors.size(); i < end; ++i) { scale *= factors[i]; } return scale; } TfLiteStatus QuantizeActivation(TensorT* tensor, TensorType activations_type, ErrorReporter* error_reporter) { TF_LITE_ENSURE_STATUS(GetQuantizationParams( tensor, activations_type, tensor->quantization.get(), error_reporter)); tensor->type = activations_type; return kTfLiteOk; } TfLiteStatus QuantizeActivationToInt16(TensorT* tensor, float scale) { const int32 zero_point = 0; tensor->quantization = absl::make_unique(); tensor->quantization->scale.push_back(scale); tensor->quantization->zero_point.push_back(zero_point); tensor->type = TensorType_INT16; return kTfLiteOk; } int GetPowerOfTwoScale(float min, float max) { const float range = std::max(std::abs(min), std::abs(max)); int pot = 0; for (int i = 0; i < 10; i++) { if (std::pow(2, pot) < range) { pot++; } } return pot; } } // namespace utils } // namespace optimize } // namespace tflite