android-14.0.0_r21/s

// Copyright 2022 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";

package toco;

import "tensorflow/lite/toco/types.proto";

// Supported I/O file formats. Some formats may be input-only or output-only.
enum FileFormat {
  FILE_FORMAT_UNKNOWN = 0;

  // GraphDef, third_party/tensorflow/core/framework/graph.proto
  TENSORFLOW_GRAPHDEF = 1;

  // Tensorflow's mobile inference model.
  // third_party/tensorflow/lite/schema/schema.fbs
  TFLITE = 2;

  // GraphViz
  // Export-only.
  GRAPHVIZ_DOT = 3;
}

// TocoFlags encodes extra parameters that drive tooling operations, that
// are not normally encoded in model files and in general may not be thought
// of as properties of models, instead describing how models are to be
// processed in the context of the present tooling job.
//
// Next ID to use: 51.
message TocoFlags {
  // Input file format
  optional FileFormat input_format = 1;

  // Output file format
  optional FileFormat output_format = 2;

  // Similar to inference_type, but allows to control specifically the
  // quantization of input arrays, separately from other arrays.
  //
  // If not set, then the value of inference_type is implicitly used, i.e.
  // by default input arrays are quantized like other arrays.
  //
  // Like inference_type, this only affects real-number arrays. By "real-number"
  // we mean float arrays, and quantized arrays. This excludes plain
  // integer arrays, strings arrays, and every other data type.
  //
  // The typical use for this flag is for vision models taking a bitmap
  // as input, typically with uint8 channels, yet still requiring floating-point
  // inference. For such image models, the uint8 input is quantized, i.e.
  // the uint8 values are interpreted as real numbers, and the quantization
  // parameters used for such input arrays are their mean_value, std_value
  // parameters.
  optional IODataType inference_input_type = 11;

  // Sets the type of real-number arrays in the output file, that is, controls
  // the representation (quantization) of real numbers in the output file,
  // except for input arrays, which are controlled by inference_input_type.
  //
  // NOTE: this flag only impacts real-number arrays. By "real-number"
  // we mean float arrays, and quantized arrays. This excludes plain
  // integer arrays, strings arrays, and every other data type.
  //
  // For real-number arrays, the impact of this flag is to allow the output
  // file to choose a different real-numbers representation (quantization)
  // from what the input file used. For any other types of arrays, changing
  // the data type would not make sense.
  //
  // Specifically:
  //    - If FLOAT, then real-numbers arrays will be of type float in
  //      the output file. If they were quantized in the input file, then
  //      they get dequantized.
  //    - If QUANTIZED_UINT8, then real-numbers arrays will be quantized
  //      as uint8 in the output file. If they were float in the input file,
  //      then they get quantized.
  //    - If not set, then all real-numbers arrays retain the same type in the
  //      output file as they have in the input file.
  //
  optional IODataType inference_type = 4;

  // default_ranges_min and default_ranges_max are helpers to experiment
  // with quantization of models. Normally, quantization requires the input
  // model to have (min, max) range information for every activations array.
  // This is needed in order to know how to quantize arrays and still achieve
  // satisfactory accuracy. However, in some circumstances one would just like
  // to estimate the performance of quantized inference, without caring about
  // accuracy. That is what default_ranges_min and default_ranges_max are for:
  // when specified, they will be used as default (min, max) range boundaries
  // for all activation arrays that lack (min, max) range information, thus
  // allowing for quantization to proceed.
  //
  // It should be clear from the above explanation that these parameters are
  // for experimentation purposes only and should not be used in production:
  // they make it easy to quantize models, but the resulting quantized model
  // will be inaccurate.
  //
  // These values only apply to arrays quantized with the kUint8 data type.
  optional float default_ranges_min = 5;
  optional float default_ranges_max = 6;
  // Equivalent versions of default_ranges_min/_max for arrays quantized with
  // the kInt16 data type.
  optional float default_int16_ranges_min = 15;
  optional float default_int16_ranges_max = 16;

  // Ignore and discard FakeQuant nodes. For instance, that can be used to
  // generate plain float code without fake-quantization from a quantized
  // graph.
  optional bool drop_fake_quant = 7;

  // Normally, FakeQuant nodes must be strict boundaries for graph
  // transformations, in order to ensure that quantized inference has the
  // exact same arithmetic behavior as quantized training --- which is the
  // whole point of quantized training and of FakeQuant nodes in the first
  // place. However, that entails subtle requirements on where exactly
  // FakeQuant nodes must be placed in the graph. Some quantized graphs
  // have FakeQuant nodes at unexpected locations, that prevent graph
  // transformations that are necessary in order to generate inference
  // code for these graphs. Such graphs should be fixed, but as a
  // temporary work-around, setting this reorder_across_fake_quant flag
  // allows toco to perform necessary graph transformations on them,
  // at the cost of no longer faithfully matching inference and training
  // arithmetic.
  optional bool reorder_across_fake_quant = 8;

  // If true, allow TOCO to create TF Lite Custom operators for all the
  // unsupported Tensorflow ops.
  optional bool allow_custom_ops = 10;

  // Applies only to the case when the input format is TENSORFLOW_GRAPHDEF.
  // If true, then control dependencies will be immediately dropped during
  // import.
  // If not set, the default behavior is as follows:
  //    - Default to false if the output format is TENSORFLOW_GRAPHDEF.
  //    - Default to true in all other cases.
  optional bool drop_control_dependency = 12;

  // Disables transformations that fuse subgraphs such as known LSTMs (not all
  // LSTMs are identified).
  optional bool debug_disable_recurrent_cell_fusion = 13;

  // Uses the FakeQuantWithMinMaxArgs.num_bits attribute to adjust quantized
  // array data types throughout the graph. The graph must be properly annotated
  // with FakeQuant* ops on at least the edges and may contain additional ops on
  // the interior of the graph to widen/narrow as desired.
  //
  // Input and output array data types may change because of this propagation
  // and users must be sure to query the final data_type values.
  optional bool propagate_fake_quant_num_bits = 14;

  // Some fast uint8 GEMM kernels require uint8 weights to avoid the value 0.
  // This flag allows nudging them to 1 to allow proceeding, with moderate
  // inaccuracy.
  optional bool allow_nudging_weights_to_use_fast_gemm_kernel = 17;

  // Minimum size of constant arrays to deduplicate; arrays smaller will not be
  // deduplicated.
  optional int64 dedupe_array_min_size_bytes = 18 [default = 64];

  // Split the LSTM inputs from 5 tensors to 18 tensors for TFLite.
  // Ignored if the output format is not TFLite.
  optional bool split_tflite_lstm_inputs = 19 [default = true];

  // Store weights as quantized weights followed by dequantize operations.
  // Computation is still done in float, but reduces model size (at the cost of
  // accuracy and latency).
  // DEPRECATED: Please use post_training_quantize instead.
  optional bool quantize_weights = 20 [default = false];

  // Full filepath of folder to dump the graphs at various stages of processing
  // GraphViz .dot files. Preferred over --output_format=GRAPHVIZ_DOT in order
  // to keep the requirements of the output file.
  optional string dump_graphviz_dir = 24;

  // Boolean indicating whether to dump the graph after every graph
  // transformation.
  optional bool dump_graphviz_include_video = 25;

  // Boolean indicating whether to quantize the weights of the converted float
  // model. Model size will be reduced and there will be latency improvements
  // (at the cost of accuracy).
  optional bool post_training_quantize = 26 [default = false];

  // This flag only works when converting to TensorFlow Lite format.
  // When enabled, unsupported ops will be converted to select TensorFlow ops.
  // TODO(ycling): Consider to rename the following 2 flags and don't call it
  // "Flex".
  // `enable_select_tf_ops` should always be used with `allow_custom_ops`.
  // WARNING: Experimental interface, subject to change
  optional bool enable_select_tf_ops = 27 [default = false];

  // This flag only works when converting to TensorFlow Lite format.
  // When enabled, all TensorFlow ops will be converted to select TensorFlow
  // ops.
  // This will force `enable_select_tf_ops` to true.
  // `force_select_tf_ops` should always be used with `enable_select_tf_ops`.
  // WARNING: Experimental interface, subject to change
  optional bool force_select_tf_ops = 28 [default = false];

  // Boolean indicating whether to convert float32 constant buffers to
  // float16. This is typically done to reduce model size. Delegates may also
  // wish to implement kernels on reduced precision floats for performance
  // gains.
  optional bool quantize_to_float16 = 29 [default = false];

  // Boolean flag indicating whether the converter should allow models with
  // dynamic Tensor shape. When set to False, the converter will generate
  // runtime memory offsets for activation Tensors (with 128 bits alignment)
  // and error out on models with undetermined Tensor shape. (Default: True)
  optional bool allow_dynamic_tensors = 30 [default = true];

  // Full filepath of the folder to dump conversion logs. This includes a global
  // view of the conversion process, and user can choose to submit those logs.
  optional string conversion_summary_dir = 31;

  // String representing the custom ops OpDefs that are included in the
  // GraphDef.
  // Deprecated do not use.
  repeated string custom_opdefs = 32 [deprecated = true];

  // Name of user's defined Tensorflow ops required in the TensorFlow Lite
  // runtime. These ops will be supported as select TensorFlow ops.
  repeated string select_user_tf_ops = 33;

  // Whether to enable tflite resource variables during conversion or not.
  // Note: This is an experimental feature.
  optional bool enable_tflite_resource_variables = 34 [default = true];

  // Whether to unfold tf.BatchMatMul to a set of tfl.fully_connected ops. If
  // not, translate to tfl.batch_matmul.
  // WARNING: Experimental interface, subject to change.
  optional bool unfold_batchmatmul = 35 [default = true];

  // Whether to lower static Tensor List ops to builtin ops. If not, use Flex
  // tensor list ops.
  // WARNING: Experimental interface, subject to change.
  optional bool lower_tensor_list_ops = 36 [default = true];

  // The accumulation type to use when quantize_to_float16 is true. Typical
  // choices would be either float16 or float32.
  optional IODataType accumulation_type = 37;

  // Whether this model supports inference in bfloat16.
  // Note: This is an experimental feature.
  optional bool allow_bfloat16 = 38 [default = false];

  // If true, automatically adds all tf ops into the model as select Tensorflow
  // ops.
  optional bool allow_all_select_tf_ops = 39;

  // Whether to unfold large splat constant tensors in the flatbuffer to reduce
  // model size.
  optional bool unfold_large_splat_constant = 40 [default = false];

  // Name of TFLite backends which are needed to check compatibility.
  // WARNING: Experimental interface, subject to change.
  repeated string supported_backends = 41;

  // Whether to force to use batch size one when the batch size is None during
  // lowering tensor list ops.
  optional bool default_to_single_batch_in_tensor_list_ops = 42
      [default = false];

  // Disable per_channel quantization for dynamic range quantization.
  // Note: This is an experimental feature
  optional bool disable_per_channel_quantization = 43 [default = false];

  // If false, the old TOCO dynamic range quantization is used.
  // Note: This is an experimental feature
  optional bool enable_mlir_dynamic_range_quantizer = 44 [default = false];

  // When the output model is used for TF Quantization, this flag indicates the
  // mode of TF Quantization. Ex: DEFAULT, LEGACY_INTEGER,...
  optional string tf_quantization_mode = 45;

  // Disable inferring tensor range for quantization.
  // Note: This is an experimental feature
  optional bool disable_infer_tensor_range = 46 [default = false];

  // Enable using num bits set in fake quant attributes for quantization.
  // Note: This is an experimental feature
  optional bool use_fake_quant_num_bits = 47 [default = false];

  // Enable converting to DynamicUpdateSlice op (for ops like TensorListSetItem)
  // Note: This is an experimental feature
  optional bool enable_dynamic_update_slice = 48 [default = false];

  // Whether to preserve `TF::AssertOp`.
  optional bool preserve_assert_op = 49 [default = false];

  // Whether to ensure each function has a single use.
  optional bool guarantee_all_funcs_one_use = 50 [default = false];
}