1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 #ifndef TENSORFLOW_LITE_TOOLS_OPTIMIZE_QUANTIZATION_UTILS_H_ 16 #define TENSORFLOW_LITE_TOOLS_OPTIMIZE_QUANTIZATION_UTILS_H_ 17 18 #include <cstdint> 19 20 #include "tensorflow/lite/context.h" 21 #include "tensorflow/lite/core/api/error_reporter.h" 22 #include "tensorflow/lite/schema/schema_generated.h" 23 24 namespace tflite { 25 namespace optimize { 26 namespace utils { 27 28 // Returns the number of elements in the given tensor. 29 TfLiteStatus NumElements(const TensorT& tensor, uint64_t* num_elements); 30 31 // Populates the scale and zero point for quantization parameters. 32 // 33 // Nudges min and max so that floating point 0 falls exactly on a quantized 34 // value, returning the nudges scale and zero_point. 35 void GetAsymmetricQuantizationParams( 36 float min, float max, const int quant_min, const int quant_max, 37 QuantizationParametersT* quantization_params); 38 39 // Populates the single total max and min values for a tensor. 40 void FillSingleMinMax(const float* const input, const uint64_t input_size, 41 QuantizationParametersT* quantization_params); 42 43 // Populates the max and min values for per channel quantization. 44 TfLiteStatus FillPerChannelMinMax(const float* const input, 45 const std::vector<int>& dimension, 46 int32_t channel_dim_index, 47 QuantizationParametersT* quantization_params, 48 ErrorReporter* error_reporter); 49 50 // Per-channel quantize a tensor at the given index and returns both scales and 51 // quantized values. 52 // Parameters: 53 // - tensor is the tensor to be quantized, needed to access associated 54 // quantization parameters 55 // - input is the float input data to be quantized. 56 // - channel_dim_index is the channel index within "dimension". 57 // dimension[channel_dim_index] gives the number of channels. 58 // - output_scale is the output scale, the size of which equals the number of 59 // channels. 60 // - output_value is the output data, the size of which equals the number of 61 // inputs. 62 TfLiteStatus SymmetricPerChannelQuantization(TensorT* tensor, 63 const float* const input, 64 int32_t channel_dim_index, 65 std::vector<float>* output_scales, 66 std::vector<int8_t>* output_value, 67 ErrorReporter* error_reporter); 68 69 // Quantize the values given an array of scales. 70 void SymmetricPerChannelQuantizeValues(const float* const input, 71 const std::vector<float>& scales_inv, 72 const std::vector<int32_t>& dimension, 73 int32_t channel_dim_index, 74 std::vector<int8_t>* output_value); 75 76 // Quantizes tensor using symmetric quantization with the min and max elements 77 // of the tensor. 78 TfLiteStatus SymmetricQuantizeTensor(ModelT* model, TensorT* tensor); 79 80 // Quantizes tensor to float16. 81 TfLiteStatus QuantizeTensorFloat16(ModelT* model, TensorT* tensor); 82 83 // Add quantization parameters. 84 TfLiteStatus AddQuantizationParams(const std::vector<float>& scales, 85 const std::vector<int64_t>& zero_point, 86 int quantized_dimension, 87 const uint8_t* buffer_data, 88 size_t buffer_size, TensorType output_type, 89 ModelT* model, TensorT* tensor, 90 ErrorReporter* error_reporter); 91 92 // Populates the scales vector based on max and min values of quant_params 93 TfLiteStatus GetSymmetricScalesFromMaxMin(QuantizationParametersT* quant_params, 94 std::vector<float>* scales, 95 ErrorReporter* error_reporter); 96 97 // Adjusts scale of weights if incompatible with bias scale and likely to 98 // cause overflow. 99 TfLiteStatus AdjustWeightsForBiasScale(QuantizationParametersT* quant_params, 100 const float* bias_data, 101 const size_t bias_size, 102 const float input_scale, 103 ErrorReporter* error_reporter); 104 105 // Quantizes tensor with per channel. 106 TfLiteStatus SymmetricQuantizeTensorPerChannel(ModelT* model, TensorT* tensor, 107 int32_t channel_dim_index, 108 ErrorReporter* error_reporter); 109 110 // Symmetrically quantizes float to 16bits. 111 TfLiteStatus SymmetricQuantizeFloatsToInt16(ModelT* model, TensorT* tensor, 112 float scaling_factor, 113 ErrorReporter* error_reporter); 114 115 std::vector<int16_t> SymmetricQuantizeFloatsToInt16(const float* data, 116 uint64_t num_elements, 117 float scaling_factor); 118 119 // Symmetrically quantizes the bias for per-layer ops (i.e. FullyConnected). 120 template <typename BiasType> 121 TfLiteStatus SymmetricPerLayerBiasQuantize(ModelT* model, TensorT* tensor, 122 float scaling_factor, 123 ErrorReporter* error_reporter); 124 125 // Symmetrically quantizes the bias for ops like Conv and DepthwiseConv. 126 // The scale of bias if weight_per_channel_scale[channel] * input_scale. 127 template <typename BiasType> 128 TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor, 129 float input_scale, 130 const float* weight_scales, 131 int number_of_dimension, 132 ErrorReporter* error_reporter); 133 134 template <typename BiasType> 135 std::vector<BiasType> SymmetricBiasQuantize(const float* data, 136 uint64_t num_elements, 137 const std::vector<float>& scales); 138 139 // Quantize weight with or without per channel. 140 TfLiteStatus QuantizeWeight(ModelT* model, TensorT* tensor, bool per_channel, 141 int per_axis_index, ErrorReporter* error_reporter); 142 143 // Get effective scale by combining input scale, intermediate scale and factors. 144 float GetEffectiveScale(ModelT* model, SubGraphT* subgraph, int op_idx, 145 std::vector<int> input_index, 146 std::vector<int> intermediate_index, 147 std::vector<float> factors); 148 149 // Return quantization parameters depending on activations type. 150 TfLiteStatus GetQuantizationParams(TensorT* tensor, TensorType activations_type, 151 QuantizationParametersT* quantization_params, 152 ErrorReporter* error_reporter); 153 154 // Quantize activation. 155 TfLiteStatus QuantizeActivation(TensorT* tensor, TensorType activations_type, 156 ErrorReporter* error_reporter); 157 158 // Quantize activation to 16bit. 159 TfLiteStatus QuantizeActivationToInt16(TensorT* tensor, float scale); 160 161 // Get the power of two scale for min and max for symmetric quantization case. 162 int GetPowerOfTwoScale(float min, float max); 163 164 } // namespace utils 165 } // namespace optimize 166 } // namespace tflite 167 168 #endif // TENSORFLOW_LITE_TOOLS_OPTIMIZE_QUANTIZATION_UTILS_H_ 169