• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1syntax = "proto3";
2
3package tensorflow.quantization;
4
5option cc_enable_arenas = true;
6
7// This file contains the definition of TF GraphDef-level mixed-precision
8// quantization configuration. The configuration will be used in the
9// quantization path to determine the following factors:
10// 1) What will be the quantization method for the model.
11// 2) What will be the default quantization precision for the model.
12// 3) What will be the quantization precision for each unit (nodes / ops) in the
13//    model.
14
15// TODO(b/240220915): Add a checker for the quantization configuration.
16// There will be inconsistencies in the quantization configuration that users
17// write. Also, users can write an invalid quantization configuration.
18// Therefore, our quantization path will perform validation check for the
19// configuration in the future.
20
21// Model quantization method for optimization.
22//
23// Various techniques for model quantization are defined within this message
24// along with a field that specifies a method to be used for a particular
25// quantization request.
26message QuantizationMethod {
27  // Quantization methods that are supported as a stable API.
28  enum Method {
29    // This should never be used. Using this will generally result in an error.
30    METHOD_UNSPECIFIED = 0;  // go/do-include-enum-unspecified
31  }
32
33  // Experimental quantization methods.
34  // These methods are either not implemented or provided with an unstable
35  // behavior.
36  enum ExperimentalMethod {
37    // This should never be used. Using this will generally result in an error.
38    EXPERIMENTAL_METHOD_UNSPECIFIED = 0;  // go/do-include-enum-unspecified
39
40    // Static range quantization. Quantized tensor values' ranges are statically
41    // determined.
42    STATIC_RANGE = 1;
43
44    // Dynamic range quantization. Quantized tensor values' ranges are
45    // determined in the graph executions. The weights are quantized during
46    // conversion.
47    DYNAMIC_RANGE = 2;
48  }
49
50  // Quantization method is either exprimental or non-experimental method.
51  oneof method_oneof {
52    Method method = 1;
53    ExperimentalMethod experimental_method = 2;
54  }
55}
56
57// Quantization precisions. If the specified quantization
58// precision is not available, our quantizer needs to raise an error.
59enum QuantizationPrecision {
60  PRECISION_UNSPECIFIED = 0;
61  // Full Precision (Do not quantize)
62  PRECISION_FULL = 1;
63  // Weight 4 bit and activation 4 bit quantization
64  PRECISION_W4A4 = 2;
65  // Weight 4 bit and activation 8 bit quantization
66  PRECISION_W4A8 = 3;
67  // Weight 8 bit and activation 8 bit quantization
68  PRECISION_W8A8 = 4;
69}
70
71// Unit (either nodes or ops at this moment) wise quantization method for
72// mixed bit precision quantization. It contains the name of the unit,
73// the granularity of the unit, and the quantization method for each unit.
74message UnitWiseQuantizationPrecision {
75  // Quantization unit granularity.
76  enum UnitType {
77    // This should never be used. Using this will generally result in an error.
78    UNIT_UNSPECIFIED = 0;
79    UNIT_NODE = 1;
80    UNIT_OP = 2;
81  }
82
83  // Available quantization unit. Currently node-wise and op-wise are
84  // available quantization units.
85  UnitType unit_type = 1;
86  // Uniqueness isn't guaranteed across SavedModels but within each function
87  // def's level, uniqueness is guaranteed. Updated
88  // the configuration interfaces to reflect such circumstances.
89  // If users do not need to guarantee uniqueness func_name can be omitted.
90  string func_name = 2;
91  string unit_name = 3;
92
93  // Quantization option information for the current unit.
94  // TODO(b/241322587): Support specifying quantization method for each unit of
95  // TF GraphDef.
96  QuantizationPrecision quantization_precision = 5;
97}
98
99// List of supported opsets to deploy the quantized model.
100// The quantized model contains different set of ops depending on the opset.
101enum OpSet {
102  OP_SET_UNSPECIFIED = 0;  // go/do-include-enum-unspecified
103  // Uses TF ops that mimic quantization behavior. Used when the corresponding
104  // integer op is not yet present.
105  TF = 1;
106  // Uses TF XLA ops
107  XLA = 2;
108  // Uses TF Uniform Quantized ops
109  UNIFORM_QUANTIZED = 3;
110}
111
112// Defines various options to specify and control the behavior of the quantizer.
113// It consists of
114// 1) Model-wise quantization configuration as a default configuration. If it is
115// None, the default configuration is "do not quantize the model".
116// 2) A set of supported operations.
117// 3) Unit wise quantization precision.
118// 4) Target hardware name.
119message QuantizationOptions {
120  // The default quantization configuration for the model. If the below
121  // unit-wise configuration does not exist, we use this default quantization
122  // configuration for the entire model. If the below unit-wise configuration
123  // exists, this default one will become the quantization configuration for
124  // units that are not specified in unit-wise configurations.
125  QuantizationMethod quantization_method = 1;
126  OpSet op_set = 2;
127
128  QuantizationPrecision quantization_precision = 3;
129  // Quantization precision for each unit. Units can become either
130  // nodes or ops, and the mixture of those different units are allowed.
131  // If there are conflicts or ambiguity in this unit-wise precision, our
132  // quantizer will raise an error.
133  repeated UnitWiseQuantizationPrecision unit_wise_quantization_precision = 4;
134
135  // Minimum number of weight elements to apply quantization. Currently only
136  // supported for Post-training Dynamic Range Quantization. By default, it is
137  // set to 1024. To disable this, set the value to -1 explicitly.
138  int64 min_num_elements_for_weights = 5;
139}
140