• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2022 The TensorFlow Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14syntax = "proto2";
15
16package toco;
17
18import "tensorflow/lite/toco/types.proto";
19
20// Supported I/O file formats. Some formats may be input-only or output-only.
21enum FileFormat {
22  FILE_FORMAT_UNKNOWN = 0;
23
24  // GraphDef, third_party/tensorflow/core/framework/graph.proto
25  TENSORFLOW_GRAPHDEF = 1;
26
27  // Tensorflow's mobile inference model.
28  // third_party/tensorflow/lite/schema/schema.fbs
29  TFLITE = 2;
30
31  // GraphViz
32  // Export-only.
33  GRAPHVIZ_DOT = 3;
34}
35
36// TocoFlags encodes extra parameters that drive tooling operations, that
37// are not normally encoded in model files and in general may not be thought
38// of as properties of models, instead describing how models are to be
39// processed in the context of the present tooling job.
40//
41// Next ID to use: 51.
42message TocoFlags {
43  // Input file format
44  optional FileFormat input_format = 1;
45
46  // Output file format
47  optional FileFormat output_format = 2;
48
49  // Similar to inference_type, but allows to control specifically the
50  // quantization of input arrays, separately from other arrays.
51  //
52  // If not set, then the value of inference_type is implicitly used, i.e.
53  // by default input arrays are quantized like other arrays.
54  //
55  // Like inference_type, this only affects real-number arrays. By "real-number"
56  // we mean float arrays, and quantized arrays. This excludes plain
57  // integer arrays, strings arrays, and every other data type.
58  //
59  // The typical use for this flag is for vision models taking a bitmap
60  // as input, typically with uint8 channels, yet still requiring floating-point
61  // inference. For such image models, the uint8 input is quantized, i.e.
62  // the uint8 values are interpreted as real numbers, and the quantization
63  // parameters used for such input arrays are their mean_value, std_value
64  // parameters.
65  optional IODataType inference_input_type = 11;
66
67  // Sets the type of real-number arrays in the output file, that is, controls
68  // the representation (quantization) of real numbers in the output file,
69  // except for input arrays, which are controlled by inference_input_type.
70  //
71  // NOTE: this flag only impacts real-number arrays. By "real-number"
72  // we mean float arrays, and quantized arrays. This excludes plain
73  // integer arrays, strings arrays, and every other data type.
74  //
75  // For real-number arrays, the impact of this flag is to allow the output
76  // file to choose a different real-numbers representation (quantization)
77  // from what the input file used. For any other types of arrays, changing
78  // the data type would not make sense.
79  //
80  // Specifically:
81  //    - If FLOAT, then real-numbers arrays will be of type float in
82  //      the output file. If they were quantized in the input file, then
83  //      they get dequantized.
84  //    - If QUANTIZED_UINT8, then real-numbers arrays will be quantized
85  //      as uint8 in the output file. If they were float in the input file,
86  //      then they get quantized.
87  //    - If not set, then all real-numbers arrays retain the same type in the
88  //      output file as they have in the input file.
89  //
90  optional IODataType inference_type = 4;
91
92  // default_ranges_min and default_ranges_max are helpers to experiment
93  // with quantization of models. Normally, quantization requires the input
94  // model to have (min, max) range information for every activations array.
95  // This is needed in order to know how to quantize arrays and still achieve
96  // satisfactory accuracy. However, in some circumstances one would just like
97  // to estimate the performance of quantized inference, without caring about
98  // accuracy. That is what default_ranges_min and default_ranges_max are for:
99  // when specified, they will be used as default (min, max) range boundaries
100  // for all activation arrays that lack (min, max) range information, thus
101  // allowing for quantization to proceed.
102  //
103  // It should be clear from the above explanation that these parameters are
104  // for experimentation purposes only and should not be used in production:
105  // they make it easy to quantize models, but the resulting quantized model
106  // will be inaccurate.
107  //
108  // These values only apply to arrays quantized with the kUint8 data type.
109  optional float default_ranges_min = 5;
110  optional float default_ranges_max = 6;
111  // Equivalent versions of default_ranges_min/_max for arrays quantized with
112  // the kInt16 data type.
113  optional float default_int16_ranges_min = 15;
114  optional float default_int16_ranges_max = 16;
115
116  // Ignore and discard FakeQuant nodes. For instance, that can be used to
117  // generate plain float code without fake-quantization from a quantized
118  // graph.
119  optional bool drop_fake_quant = 7;
120
121  // Normally, FakeQuant nodes must be strict boundaries for graph
122  // transformations, in order to ensure that quantized inference has the
123  // exact same arithmetic behavior as quantized training --- which is the
124  // whole point of quantized training and of FakeQuant nodes in the first
125  // place. However, that entails subtle requirements on where exactly
126  // FakeQuant nodes must be placed in the graph. Some quantized graphs
127  // have FakeQuant nodes at unexpected locations, that prevent graph
128  // transformations that are necessary in order to generate inference
129  // code for these graphs. Such graphs should be fixed, but as a
130  // temporary work-around, setting this reorder_across_fake_quant flag
131  // allows toco to perform necessary graph transformations on them,
132  // at the cost of no longer faithfully matching inference and training
133  // arithmetic.
134  optional bool reorder_across_fake_quant = 8;
135
136  // If true, allow TOCO to create TF Lite Custom operators for all the
137  // unsupported Tensorflow ops.
138  optional bool allow_custom_ops = 10;
139
140  // Applies only to the case when the input format is TENSORFLOW_GRAPHDEF.
141  // If true, then control dependencies will be immediately dropped during
142  // import.
143  // If not set, the default behavior is as follows:
144  //    - Default to false if the output format is TENSORFLOW_GRAPHDEF.
145  //    - Default to true in all other cases.
146  optional bool drop_control_dependency = 12;
147
148  // Disables transformations that fuse subgraphs such as known LSTMs (not all
149  // LSTMs are identified).
150  optional bool debug_disable_recurrent_cell_fusion = 13;
151
152  // Uses the FakeQuantWithMinMaxArgs.num_bits attribute to adjust quantized
153  // array data types throughout the graph. The graph must be properly annotated
154  // with FakeQuant* ops on at least the edges and may contain additional ops on
155  // the interior of the graph to widen/narrow as desired.
156  //
157  // Input and output array data types may change because of this propagation
158  // and users must be sure to query the final data_type values.
159  optional bool propagate_fake_quant_num_bits = 14;
160
161  // Some fast uint8 GEMM kernels require uint8 weights to avoid the value 0.
162  // This flag allows nudging them to 1 to allow proceeding, with moderate
163  // inaccuracy.
164  optional bool allow_nudging_weights_to_use_fast_gemm_kernel = 17;
165
166  // Minimum size of constant arrays to deduplicate; arrays smaller will not be
167  // deduplicated.
168  optional int64 dedupe_array_min_size_bytes = 18 [default = 64];
169
170  // Split the LSTM inputs from 5 tensors to 18 tensors for TFLite.
171  // Ignored if the output format is not TFLite.
172  optional bool split_tflite_lstm_inputs = 19 [default = true];
173
174  // Store weights as quantized weights followed by dequantize operations.
175  // Computation is still done in float, but reduces model size (at the cost of
176  // accuracy and latency).
177  // DEPRECATED: Please use post_training_quantize instead.
178  optional bool quantize_weights = 20 [default = false];
179
180  // Full filepath of folder to dump the graphs at various stages of processing
181  // GraphViz .dot files. Preferred over --output_format=GRAPHVIZ_DOT in order
182  // to keep the requirements of the output file.
183  optional string dump_graphviz_dir = 24;
184
185  // Boolean indicating whether to dump the graph after every graph
186  // transformation.
187  optional bool dump_graphviz_include_video = 25;
188
189  // Boolean indicating whether to quantize the weights of the converted float
190  // model. Model size will be reduced and there will be latency improvements
191  // (at the cost of accuracy).
192  optional bool post_training_quantize = 26 [default = false];
193
194  // This flag only works when converting to TensorFlow Lite format.
195  // When enabled, unsupported ops will be converted to select TensorFlow ops.
196  // TODO(ycling): Consider to rename the following 2 flags and don't call it
197  // "Flex".
198  // `enable_select_tf_ops` should always be used with `allow_custom_ops`.
199  // WARNING: Experimental interface, subject to change
200  optional bool enable_select_tf_ops = 27 [default = false];
201
202  // This flag only works when converting to TensorFlow Lite format.
203  // When enabled, all TensorFlow ops will be converted to select TensorFlow
204  // ops.
205  // This will force `enable_select_tf_ops` to true.
206  // `force_select_tf_ops` should always be used with `enable_select_tf_ops`.
207  // WARNING: Experimental interface, subject to change
208  optional bool force_select_tf_ops = 28 [default = false];
209
210  // Boolean indicating whether to convert float32 constant buffers to
211  // float16. This is typically done to reduce model size. Delegates may also
212  // wish to implement kernels on reduced precision floats for performance
213  // gains.
214  optional bool quantize_to_float16 = 29 [default = false];
215
216  // Boolean flag indicating whether the converter should allow models with
217  // dynamic Tensor shape. When set to False, the converter will generate
218  // runtime memory offsets for activation Tensors (with 128 bits alignment)
219  // and error out on models with undetermined Tensor shape. (Default: True)
220  optional bool allow_dynamic_tensors = 30 [default = true];
221
222  // Full filepath of the folder to dump conversion logs. This includes a global
223  // view of the conversion process, and user can choose to submit those logs.
224  optional string conversion_summary_dir = 31;
225
226  // String representing the custom ops OpDefs that are included in the
227  // GraphDef.
228  // Deprecated do not use.
229  repeated string custom_opdefs = 32 [deprecated = true];
230
231  // Name of user's defined Tensorflow ops required in the TensorFlow Lite
232  // runtime. These ops will be supported as select TensorFlow ops.
233  repeated string select_user_tf_ops = 33;
234
235  // Whether to enable tflite resource variables during conversion or not.
236  // Note: This is an experimental feature.
237  optional bool enable_tflite_resource_variables = 34 [default = true];
238
239  // Whether to unfold tf.BatchMatMul to a set of tfl.fully_connected ops. If
240  // not, translate to tfl.batch_matmul.
241  // WARNING: Experimental interface, subject to change.
242  optional bool unfold_batchmatmul = 35 [default = true];
243
244  // Whether to lower static Tensor List ops to builtin ops. If not, use Flex
245  // tensor list ops.
246  // WARNING: Experimental interface, subject to change.
247  optional bool lower_tensor_list_ops = 36 [default = true];
248
249  // The accumulation type to use when quantize_to_float16 is true. Typical
250  // choices would be either float16 or float32.
251  optional IODataType accumulation_type = 37;
252
253  // Whether this model supports inference in bfloat16.
254  // Note: This is an experimental feature.
255  optional bool allow_bfloat16 = 38 [default = false];
256
257  // If true, automatically adds all tf ops into the model as select Tensorflow
258  // ops.
259  optional bool allow_all_select_tf_ops = 39;
260
261  // Whether to unfold large splat constant tensors in the flatbuffer to reduce
262  // model size.
263  optional bool unfold_large_splat_constant = 40 [default = false];
264
265  // Name of TFLite backends which are needed to check compatibility.
266  // WARNING: Experimental interface, subject to change.
267  repeated string supported_backends = 41;
268
269  // Whether to force to use batch size one when the batch size is None during
270  // lowering tensor list ops.
271  optional bool default_to_single_batch_in_tensor_list_ops = 42
272      [default = false];
273
274  // Disable per_channel quantization for dynamic range quantization.
275  // Note: This is an experimental feature
276  optional bool disable_per_channel_quantization = 43 [default = false];
277
278  // If false, the old TOCO dynamic range quantization is used.
279  // Note: This is an experimental feature
280  optional bool enable_mlir_dynamic_range_quantizer = 44 [default = false];
281
282  // When the output model is used for TF Quantization, this flag indicates the
283  // mode of TF Quantization. Ex: DEFAULT, LEGACY_INTEGER,...
284  optional string tf_quantization_mode = 45;
285
286  // Disable inferring tensor range for quantization.
287  // Note: This is an experimental feature
288  optional bool disable_infer_tensor_range = 46 [default = false];
289
290  // Enable using num bits set in fake quant attributes for quantization.
291  // Note: This is an experimental feature
292  optional bool use_fake_quant_num_bits = 47 [default = false];
293
294  // Enable converting to DynamicUpdateSlice op (for ops like TensorListSetItem)
295  // Note: This is an experimental feature
296  optional bool enable_dynamic_update_slice = 48 [default = false];
297
298  // Whether to preserve `TF::AssertOp`.
299  optional bool preserve_assert_op = 49 [default = false];
300
301  // Whether to ensure each function has a single use.
302  optional bool guarantee_all_funcs_one_use = 50 [default = false];
303}
304