1// Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14syntax = "proto2"; 15import "tensorflow/lite/toco/types.proto"; 16 17package toco; 18 19// Supported I/O file formats. Some formats may be input-only or output-only. 20enum FileFormat { 21 FILE_FORMAT_UNKNOWN = 0; 22 23 // GraphDef, third_party/tensorflow/core/framework/graph.proto 24 TENSORFLOW_GRAPHDEF = 1; 25 26 // Tensorflow's mobile inference model. 27 // third_party/tensorflow/contrib/tflite/schema.fbs 28 TFLITE = 2; 29 30 // GraphViz 31 // Export-only. 32 GRAPHVIZ_DOT = 3; 33} 34 35// TocoFlags encodes extra parameters that drive tooling operations, that 36// are not normally encoded in model files and in general may not be thought 37// of as properties of models, instead describing how models are to be 38// processed in the context of the present tooling job. 39// 40// Next ID to use: 29. 41message TocoFlags { 42 // Input file format 43 optional FileFormat input_format = 1; 44 45 // Output file format 46 optional FileFormat output_format = 2; 47 48 // Similar to inference_type, but allows to control specifically the 49 // quantization of input arrays, separately from other arrays. 50 // 51 // If not set, then the value of inference_type is implicitly used, i.e. 52 // by default input arrays are quantized like other arrays. 53 // 54 // Like inference_type, this only affects real-number arrays. By "real-number" 55 // we mean float arrays, and quantized arrays. This excludes plain 56 // integer arrays, strings arrays, and every other data type. 57 // 58 // The typical use for this flag is for vision models taking a bitmap 59 // as input, typically with uint8 channels, yet still requiring floating-point 60 // inference. For such image models, the uint8 input is quantized, i.e. 61 // the uint8 values are interpreted as real numbers, and the quantization 62 // parameters used for such input arrays are their mean_value, std_value 63 // parameters. 64 optional IODataType inference_input_type = 11; 65 66 // Sets the type of real-number arrays in the output file, that is, controls 67 // the representation (quantization) of real numbers in the output file, 68 // except for input arrays, which are controlled by inference_input_type. 69 // 70 // NOTE: this flag only impacts real-number arrays. By "real-number" 71 // we mean float arrays, and quantized arrays. This excludes plain 72 // integer arrays, strings arrays, and every other data type. 73 // 74 // For real-number arrays, the impact of this flag is to allow the output 75 // file to choose a different real-numbers representation (quantization) 76 // from what the input file used. For any other types of arrays, changing 77 // the data type would not make sense. 78 // 79 // Specifically: 80 // - If FLOAT, then real-numbers arrays will be of type float in 81 // the output file. If they were quantized in the input file, then 82 // they get dequantized. 83 // - If QUANTIZED_UINT8, then real-numbers arrays will be quantized 84 // as uint8 in the output file. If they were float in the input file, 85 // then they get quantized. 86 // - If not set, then all real-numbers arrays retain the same type in the 87 // output file as they have in the input file. 88 // 89 optional IODataType inference_type = 4; 90 91 // default_ranges_min and default_ranges_max are helpers to experiment 92 // with quantization of models. Normally, quantization requires the input 93 // model to have (min, max) range information for every activations array. 94 // This is needed in order to know how to quantize arrays and still achieve 95 // satisfactory accuracy. However, in some circumstances one would just like 96 // to estimate the performance of quantized inference, without caring about 97 // accuracy. That is what default_ranges_min and default_ranges_max are for: 98 // when specified, they will be used as default (min, max) range boundaries 99 // for all activation arrays that lack (min, max) range information, thus 100 // allowing for quantization to proceed. 101 // 102 // It should be clear from the above explanation that these parameters are 103 // for experimentation purposes only and should not be used in production: 104 // they make it easy to quantize models, but the resulting quantized model 105 // will be inaccurate. 106 // 107 // These values only apply to arrays quantized with the kUint8 data type. 108 optional float default_ranges_min = 5; 109 optional float default_ranges_max = 6; 110 // Equivalent versions of default_ranges_min/_max for arrays quantized with 111 // the kInt16 data type. 112 optional float default_int16_ranges_min = 15; 113 optional float default_int16_ranges_max = 16; 114 115 // Ignore and discard FakeQuant nodes. For instance, that can be used to 116 // generate plain float code without fake-quantization from a quantized 117 // graph. 118 optional bool drop_fake_quant = 7; 119 120 // Normally, FakeQuant nodes must be strict boundaries for graph 121 // transformations, in order to ensure that quantized inference has the 122 // exact same arithmetic behavior as quantized training --- which is the 123 // whole point of quantized training and of FakeQuant nodes in the first 124 // place. However, that entails subtle requirements on where exactly 125 // FakeQuant nodes must be placed in the graph. Some quantized graphs 126 // have FakeQuant nodes at unexpected locations, that prevent graph 127 // transformations that are necessary in order to generate inference 128 // code for these graphs. Such graphs should be fixed, but as a 129 // temporary work-around, setting this reorder_across_fake_quant flag 130 // allows toco to perform necessary graph transformations on them, 131 // at the cost of no longer faithfully matching inference and training 132 // arithmetic. 133 optional bool reorder_across_fake_quant = 8; 134 135 // If true, allow TOCO to create TF Lite Custom operators for all the 136 // unsupported Tensorflow ops. 137 optional bool allow_custom_ops = 10; 138 139 // Applies only to the case when the input format is TENSORFLOW_GRAPHDEF. 140 // If true, then control dependencies will be immediately dropped during 141 // import. 142 // If not set, the default behavior is as follows: 143 // - Default to false if the output format is TENSORFLOW_GRAPHDEF. 144 // - Default to true in all other cases. 145 optional bool drop_control_dependency = 12; 146 147 // Disables transformations that fuse subgraphs such as known LSTMs (not all 148 // LSTMs are identified). 149 optional bool debug_disable_recurrent_cell_fusion = 13; 150 151 // Uses the FakeQuantWithMinMaxArgs.num_bits attribute to adjust quantized 152 // array data types throughout the graph. The graph must be properly annotated 153 // with FakeQuant* ops on at least the edges and may contain additional ops on 154 // the interior of the graph to widen/narrow as desired. 155 // 156 // Input and output array data types may change because of this propagation 157 // and users must be sure to query the final data_type values. 158 optional bool propagate_fake_quant_num_bits = 14; 159 160 // Some fast uint8 GEMM kernels require uint8 weights to avoid the value 0. 161 // This flag allows nudging them to 1 to allow proceeding, with moderate 162 // inaccuracy. 163 optional bool allow_nudging_weights_to_use_fast_gemm_kernel = 17; 164 165 // Minimum size of constant arrays to deduplicate; arrays smaller will not be 166 // deduplicated. 167 optional int64 dedupe_array_min_size_bytes = 18 [default = 64]; 168 169 // Split the LSTM inputs from 5 tensors to 18 tensors for TFLite. 170 // Ignored if the output format is not TFLite. 171 optional bool split_tflite_lstm_inputs = 19 [default = true]; 172 173 // Store weights as quantized weights followed by dequantize operations. 174 // Computation is still done in float, but reduces model size (at the cost of 175 // accuracy and latency). 176 // DEPRECATED: Please use post_training_quantize instead. 177 optional bool quantize_weights = 20 [default = false]; 178 179 // Full filepath of folder to dump the graphs at various stages of processing 180 // GraphViz .dot files. Preferred over --output_format=GRAPHVIZ_DOT in order 181 // to keep the requirements of the output file. 182 optional string dump_graphviz_dir = 24; 183 184 // Boolean indicating whether to dump the graph after every graph 185 // transformation. 186 optional bool dump_graphviz_include_video = 25; 187 188 // Boolean indicating whether to quantize the weights of the converted float 189 // model. Model size will be reduced and there will be latency improvements 190 // (at the cost of accuracy). 191 optional bool post_training_quantize = 26 [default = false]; 192 193 // This flag only works when converting to TensorFlow Lite format. 194 // When enabled, unsupported ops will be converted to select TensorFlow ops. 195 // TODO(ycling): Consider to rename the following 2 flags and don't call it 196 // "Flex". 197 // `enable_select_tf_ops` should always be used with `allow_custom_ops`. 198 // WARNING: Experimental interface, subject to change 199 optional bool enable_select_tf_ops = 27 [default = false]; 200 201 // This flag only works when converting to TensorFlow Lite format. 202 // When enabled, all TensorFlow ops will be converted to select TensorFlow 203 // ops. 204 // This will force `enable_select_tf_ops` to true. 205 // `force_select_tf_ops` should always be used with `enable_select_tf_ops`. 206 // WARNING: Experimental interface, subject to change 207 optional bool force_select_tf_ops = 28 [default = false]; 208} 209