1syntax = "proto3"; 2 3package tensorflow.quantization; 4 5option cc_enable_arenas = true; 6 7// This file contains the definition of TF GraphDef-level mixed-precision 8// quantization configuration. The configuration will be used in the 9// quantization path to determine the following factors: 10// 1) What will be the quantization method for the model. 11// 2) What will be the default quantization precision for the model. 12// 3) What will be the quantization precision for each unit (nodes / ops) in the 13// model. 14 15// TODO(b/240220915): Add a checker for the quantization configuration. 16// There will be inconsistencies in the quantization configuration that users 17// write. Also, users can write an invalid quantization configuration. 18// Therefore, our quantization path will perform validation check for the 19// configuration in the future. 20 21// Model quantization method for optimization. 22// 23// Various techniques for model quantization are defined within this message 24// along with a field that specifies a method to be used for a particular 25// quantization request. 26message QuantizationMethod { 27 // Quantization methods that are supported as a stable API. 28 enum Method { 29 // This should never be used. Using this will generally result in an error. 30 METHOD_UNSPECIFIED = 0; // go/do-include-enum-unspecified 31 } 32 33 // Experimental quantization methods. 34 // These methods are either not implemented or provided with an unstable 35 // behavior. 36 enum ExperimentalMethod { 37 // This should never be used. Using this will generally result in an error. 38 EXPERIMENTAL_METHOD_UNSPECIFIED = 0; // go/do-include-enum-unspecified 39 40 // Static range quantization. Quantized tensor values' ranges are statically 41 // determined. 42 STATIC_RANGE = 1; 43 44 // Dynamic range quantization. Quantized tensor values' ranges are 45 // determined in the graph executions. The weights are quantized during 46 // conversion. 47 DYNAMIC_RANGE = 2; 48 } 49 50 // Quantization method is either exprimental or non-experimental method. 51 oneof method_oneof { 52 Method method = 1; 53 ExperimentalMethod experimental_method = 2; 54 } 55} 56 57// Quantization precisions. If the specified quantization 58// precision is not available, our quantizer needs to raise an error. 59enum QuantizationPrecision { 60 PRECISION_UNSPECIFIED = 0; 61 // Full Precision (Do not quantize) 62 PRECISION_FULL = 1; 63 // Weight 4 bit and activation 4 bit quantization 64 PRECISION_W4A4 = 2; 65 // Weight 4 bit and activation 8 bit quantization 66 PRECISION_W4A8 = 3; 67 // Weight 8 bit and activation 8 bit quantization 68 PRECISION_W8A8 = 4; 69} 70 71// Unit (either nodes or ops at this moment) wise quantization method for 72// mixed bit precision quantization. It contains the name of the unit, 73// the granularity of the unit, and the quantization method for each unit. 74message UnitWiseQuantizationPrecision { 75 // Quantization unit granularity. 76 enum UnitType { 77 // This should never be used. Using this will generally result in an error. 78 UNIT_UNSPECIFIED = 0; 79 UNIT_NODE = 1; 80 UNIT_OP = 2; 81 } 82 83 // Available quantization unit. Currently node-wise and op-wise are 84 // available quantization units. 85 UnitType unit_type = 1; 86 // Uniqueness isn't guaranteed across SavedModels but within each function 87 // def's level, uniqueness is guaranteed. Updated 88 // the configuration interfaces to reflect such circumstances. 89 // If users do not need to guarantee uniqueness func_name can be omitted. 90 string func_name = 2; 91 string unit_name = 3; 92 93 // Quantization option information for the current unit. 94 // TODO(b/241322587): Support specifying quantization method for each unit of 95 // TF GraphDef. 96 QuantizationPrecision quantization_precision = 5; 97} 98 99// List of supported opsets to deploy the quantized model. 100// The quantized model contains different set of ops depending on the opset. 101enum OpSet { 102 OP_SET_UNSPECIFIED = 0; // go/do-include-enum-unspecified 103 // Uses TF ops that mimic quantization behavior. Used when the corresponding 104 // integer op is not yet present. 105 TF = 1; 106 // Uses TF XLA ops 107 XLA = 2; 108 // Uses TF Uniform Quantized ops 109 UNIFORM_QUANTIZED = 3; 110} 111 112// Defines various options to specify and control the behavior of the quantizer. 113// It consists of 114// 1) Model-wise quantization configuration as a default configuration. If it is 115// None, the default configuration is "do not quantize the model". 116// 2) A set of supported operations. 117// 3) Unit wise quantization precision. 118// 4) Target hardware name. 119message QuantizationOptions { 120 // The default quantization configuration for the model. If the below 121 // unit-wise configuration does not exist, we use this default quantization 122 // configuration for the entire model. If the below unit-wise configuration 123 // exists, this default one will become the quantization configuration for 124 // units that are not specified in unit-wise configurations. 125 QuantizationMethod quantization_method = 1; 126 OpSet op_set = 2; 127 128 QuantizationPrecision quantization_precision = 3; 129 // Quantization precision for each unit. Units can become either 130 // nodes or ops, and the mixture of those different units are allowed. 131 // If there are conflicts or ambiguity in this unit-wise precision, our 132 // quantizer will raise an error. 133 repeated UnitWiseQuantizationPrecision unit_wise_quantization_precision = 4; 134 135 // Minimum number of weight elements to apply quantization. Currently only 136 // supported for Post-training Dynamic Range Quantization. By default, it is 137 // set to 1024. To disable this, set the value to -1 explicitly. 138 int64 min_num_elements_for_weights = 5; 139} 140