1// Copyright 2022 The TensorFlow Authors. All Rights Reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14syntax = "proto2"; 15 16package toco; 17 18import "tensorflow/lite/toco/types.proto"; 19 20// Supported I/O file formats. Some formats may be input-only or output-only. 21enum FileFormat { 22 FILE_FORMAT_UNKNOWN = 0; 23 24 // GraphDef, third_party/tensorflow/core/framework/graph.proto 25 TENSORFLOW_GRAPHDEF = 1; 26 27 // Tensorflow's mobile inference model. 28 // third_party/tensorflow/lite/schema/schema.fbs 29 TFLITE = 2; 30 31 // GraphViz 32 // Export-only. 33 GRAPHVIZ_DOT = 3; 34} 35 36// TocoFlags encodes extra parameters that drive tooling operations, that 37// are not normally encoded in model files and in general may not be thought 38// of as properties of models, instead describing how models are to be 39// processed in the context of the present tooling job. 40// 41// Next ID to use: 51. 42message TocoFlags { 43 // Input file format 44 optional FileFormat input_format = 1; 45 46 // Output file format 47 optional FileFormat output_format = 2; 48 49 // Similar to inference_type, but allows to control specifically the 50 // quantization of input arrays, separately from other arrays. 51 // 52 // If not set, then the value of inference_type is implicitly used, i.e. 53 // by default input arrays are quantized like other arrays. 54 // 55 // Like inference_type, this only affects real-number arrays. By "real-number" 56 // we mean float arrays, and quantized arrays. This excludes plain 57 // integer arrays, strings arrays, and every other data type. 58 // 59 // The typical use for this flag is for vision models taking a bitmap 60 // as input, typically with uint8 channels, yet still requiring floating-point 61 // inference. For such image models, the uint8 input is quantized, i.e. 62 // the uint8 values are interpreted as real numbers, and the quantization 63 // parameters used for such input arrays are their mean_value, std_value 64 // parameters. 65 optional IODataType inference_input_type = 11; 66 67 // Sets the type of real-number arrays in the output file, that is, controls 68 // the representation (quantization) of real numbers in the output file, 69 // except for input arrays, which are controlled by inference_input_type. 70 // 71 // NOTE: this flag only impacts real-number arrays. By "real-number" 72 // we mean float arrays, and quantized arrays. This excludes plain 73 // integer arrays, strings arrays, and every other data type. 74 // 75 // For real-number arrays, the impact of this flag is to allow the output 76 // file to choose a different real-numbers representation (quantization) 77 // from what the input file used. For any other types of arrays, changing 78 // the data type would not make sense. 79 // 80 // Specifically: 81 // - If FLOAT, then real-numbers arrays will be of type float in 82 // the output file. If they were quantized in the input file, then 83 // they get dequantized. 84 // - If QUANTIZED_UINT8, then real-numbers arrays will be quantized 85 // as uint8 in the output file. If they were float in the input file, 86 // then they get quantized. 87 // - If not set, then all real-numbers arrays retain the same type in the 88 // output file as they have in the input file. 89 // 90 optional IODataType inference_type = 4; 91 92 // default_ranges_min and default_ranges_max are helpers to experiment 93 // with quantization of models. Normally, quantization requires the input 94 // model to have (min, max) range information for every activations array. 95 // This is needed in order to know how to quantize arrays and still achieve 96 // satisfactory accuracy. However, in some circumstances one would just like 97 // to estimate the performance of quantized inference, without caring about 98 // accuracy. That is what default_ranges_min and default_ranges_max are for: 99 // when specified, they will be used as default (min, max) range boundaries 100 // for all activation arrays that lack (min, max) range information, thus 101 // allowing for quantization to proceed. 102 // 103 // It should be clear from the above explanation that these parameters are 104 // for experimentation purposes only and should not be used in production: 105 // they make it easy to quantize models, but the resulting quantized model 106 // will be inaccurate. 107 // 108 // These values only apply to arrays quantized with the kUint8 data type. 109 optional float default_ranges_min = 5; 110 optional float default_ranges_max = 6; 111 // Equivalent versions of default_ranges_min/_max for arrays quantized with 112 // the kInt16 data type. 113 optional float default_int16_ranges_min = 15; 114 optional float default_int16_ranges_max = 16; 115 116 // Ignore and discard FakeQuant nodes. For instance, that can be used to 117 // generate plain float code without fake-quantization from a quantized 118 // graph. 119 optional bool drop_fake_quant = 7; 120 121 // Normally, FakeQuant nodes must be strict boundaries for graph 122 // transformations, in order to ensure that quantized inference has the 123 // exact same arithmetic behavior as quantized training --- which is the 124 // whole point of quantized training and of FakeQuant nodes in the first 125 // place. However, that entails subtle requirements on where exactly 126 // FakeQuant nodes must be placed in the graph. Some quantized graphs 127 // have FakeQuant nodes at unexpected locations, that prevent graph 128 // transformations that are necessary in order to generate inference 129 // code for these graphs. Such graphs should be fixed, but as a 130 // temporary work-around, setting this reorder_across_fake_quant flag 131 // allows toco to perform necessary graph transformations on them, 132 // at the cost of no longer faithfully matching inference and training 133 // arithmetic. 134 optional bool reorder_across_fake_quant = 8; 135 136 // If true, allow TOCO to create TF Lite Custom operators for all the 137 // unsupported Tensorflow ops. 138 optional bool allow_custom_ops = 10; 139 140 // Applies only to the case when the input format is TENSORFLOW_GRAPHDEF. 141 // If true, then control dependencies will be immediately dropped during 142 // import. 143 // If not set, the default behavior is as follows: 144 // - Default to false if the output format is TENSORFLOW_GRAPHDEF. 145 // - Default to true in all other cases. 146 optional bool drop_control_dependency = 12; 147 148 // Disables transformations that fuse subgraphs such as known LSTMs (not all 149 // LSTMs are identified). 150 optional bool debug_disable_recurrent_cell_fusion = 13; 151 152 // Uses the FakeQuantWithMinMaxArgs.num_bits attribute to adjust quantized 153 // array data types throughout the graph. The graph must be properly annotated 154 // with FakeQuant* ops on at least the edges and may contain additional ops on 155 // the interior of the graph to widen/narrow as desired. 156 // 157 // Input and output array data types may change because of this propagation 158 // and users must be sure to query the final data_type values. 159 optional bool propagate_fake_quant_num_bits = 14; 160 161 // Some fast uint8 GEMM kernels require uint8 weights to avoid the value 0. 162 // This flag allows nudging them to 1 to allow proceeding, with moderate 163 // inaccuracy. 164 optional bool allow_nudging_weights_to_use_fast_gemm_kernel = 17; 165 166 // Minimum size of constant arrays to deduplicate; arrays smaller will not be 167 // deduplicated. 168 optional int64 dedupe_array_min_size_bytes = 18 [default = 64]; 169 170 // Split the LSTM inputs from 5 tensors to 18 tensors for TFLite. 171 // Ignored if the output format is not TFLite. 172 optional bool split_tflite_lstm_inputs = 19 [default = true]; 173 174 // Store weights as quantized weights followed by dequantize operations. 175 // Computation is still done in float, but reduces model size (at the cost of 176 // accuracy and latency). 177 // DEPRECATED: Please use post_training_quantize instead. 178 optional bool quantize_weights = 20 [default = false]; 179 180 // Full filepath of folder to dump the graphs at various stages of processing 181 // GraphViz .dot files. Preferred over --output_format=GRAPHVIZ_DOT in order 182 // to keep the requirements of the output file. 183 optional string dump_graphviz_dir = 24; 184 185 // Boolean indicating whether to dump the graph after every graph 186 // transformation. 187 optional bool dump_graphviz_include_video = 25; 188 189 // Boolean indicating whether to quantize the weights of the converted float 190 // model. Model size will be reduced and there will be latency improvements 191 // (at the cost of accuracy). 192 optional bool post_training_quantize = 26 [default = false]; 193 194 // This flag only works when converting to TensorFlow Lite format. 195 // When enabled, unsupported ops will be converted to select TensorFlow ops. 196 // TODO(ycling): Consider to rename the following 2 flags and don't call it 197 // "Flex". 198 // `enable_select_tf_ops` should always be used with `allow_custom_ops`. 199 // WARNING: Experimental interface, subject to change 200 optional bool enable_select_tf_ops = 27 [default = false]; 201 202 // This flag only works when converting to TensorFlow Lite format. 203 // When enabled, all TensorFlow ops will be converted to select TensorFlow 204 // ops. 205 // This will force `enable_select_tf_ops` to true. 206 // `force_select_tf_ops` should always be used with `enable_select_tf_ops`. 207 // WARNING: Experimental interface, subject to change 208 optional bool force_select_tf_ops = 28 [default = false]; 209 210 // Boolean indicating whether to convert float32 constant buffers to 211 // float16. This is typically done to reduce model size. Delegates may also 212 // wish to implement kernels on reduced precision floats for performance 213 // gains. 214 optional bool quantize_to_float16 = 29 [default = false]; 215 216 // Boolean flag indicating whether the converter should allow models with 217 // dynamic Tensor shape. When set to False, the converter will generate 218 // runtime memory offsets for activation Tensors (with 128 bits alignment) 219 // and error out on models with undetermined Tensor shape. (Default: True) 220 optional bool allow_dynamic_tensors = 30 [default = true]; 221 222 // Full filepath of the folder to dump conversion logs. This includes a global 223 // view of the conversion process, and user can choose to submit those logs. 224 optional string conversion_summary_dir = 31; 225 226 // String representing the custom ops OpDefs that are included in the 227 // GraphDef. 228 // Deprecated do not use. 229 repeated string custom_opdefs = 32 [deprecated = true]; 230 231 // Name of user's defined Tensorflow ops required in the TensorFlow Lite 232 // runtime. These ops will be supported as select TensorFlow ops. 233 repeated string select_user_tf_ops = 33; 234 235 // Whether to enable tflite resource variables during conversion or not. 236 // Note: This is an experimental feature. 237 optional bool enable_tflite_resource_variables = 34 [default = true]; 238 239 // Whether to unfold tf.BatchMatMul to a set of tfl.fully_connected ops. If 240 // not, translate to tfl.batch_matmul. 241 // WARNING: Experimental interface, subject to change. 242 optional bool unfold_batchmatmul = 35 [default = true]; 243 244 // Whether to lower static Tensor List ops to builtin ops. If not, use Flex 245 // tensor list ops. 246 // WARNING: Experimental interface, subject to change. 247 optional bool lower_tensor_list_ops = 36 [default = true]; 248 249 // The accumulation type to use when quantize_to_float16 is true. Typical 250 // choices would be either float16 or float32. 251 optional IODataType accumulation_type = 37; 252 253 // Whether this model supports inference in bfloat16. 254 // Note: This is an experimental feature. 255 optional bool allow_bfloat16 = 38 [default = false]; 256 257 // If true, automatically adds all tf ops into the model as select Tensorflow 258 // ops. 259 optional bool allow_all_select_tf_ops = 39; 260 261 // Whether to unfold large splat constant tensors in the flatbuffer to reduce 262 // model size. 263 optional bool unfold_large_splat_constant = 40 [default = false]; 264 265 // Name of TFLite backends which are needed to check compatibility. 266 // WARNING: Experimental interface, subject to change. 267 repeated string supported_backends = 41; 268 269 // Whether to force to use batch size one when the batch size is None during 270 // lowering tensor list ops. 271 optional bool default_to_single_batch_in_tensor_list_ops = 42 272 [default = false]; 273 274 // Disable per_channel quantization for dynamic range quantization. 275 // Note: This is an experimental feature 276 optional bool disable_per_channel_quantization = 43 [default = false]; 277 278 // If false, the old TOCO dynamic range quantization is used. 279 // Note: This is an experimental feature 280 optional bool enable_mlir_dynamic_range_quantizer = 44 [default = false]; 281 282 // When the output model is used for TF Quantization, this flag indicates the 283 // mode of TF Quantization. Ex: DEFAULT, LEGACY_INTEGER,... 284 optional string tf_quantization_mode = 45; 285 286 // Disable inferring tensor range for quantization. 287 // Note: This is an experimental feature 288 optional bool disable_infer_tensor_range = 46 [default = false]; 289 290 // Enable using num bits set in fake quant attributes for quantization. 291 // Note: This is an experimental feature 292 optional bool use_fake_quant_num_bits = 47 [default = false]; 293 294 // Enable converting to DynamicUpdateSlice op (for ops like TensorListSetItem) 295 // Note: This is an experimental feature 296 optional bool enable_dynamic_update_slice = 48 [default = false]; 297 298 // Whether to preserve `TF::AssertOp`. 299 optional bool preserve_assert_op = 49 [default = false]; 300 301 // Whether to ensure each function has a single use. 302 optional bool guarantee_all_funcs_one_use = 50 [default = false]; 303} 304