1syntax = "proto3"; 2 3package tensorflow; 4 5import "tensorflow/core/framework/attr_value.proto"; 6import "tensorflow/core/protobuf/verifier_config.proto"; 7 8option cc_enable_arenas = true; 9option java_outer_classname = "RewriterConfigProtos"; 10option java_multiple_files = true; 11option java_package = "org.tensorflow.framework"; 12option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; 13 14message AutoParallelOptions { 15 bool enable = 1; 16 int32 num_replicas = 2; 17} 18 19message ScopedAllocatorOptions { 20 // If present, only perform optimization for these ops. 21 repeated string enable_op = 1; 22} 23 24message RewriterConfig { 25 // Graph rewriting is experimental and subject to change, not covered by any 26 // API stability guarantees. 27 28 // Configuration options for the meta-optimizer. Unless otherwise noted, these 29 // configuration options do not apply to explicitly triggered optimization 30 // passes in the optimizers field. 31 32 enum Toggle { 33 DEFAULT = 0; 34 ON = 1; 35 OFF = 2; 36 // Enable some aggressive optimizations that use assumptions that TF graphs 37 // may break. For example, assume the shape of a placeholder matches its 38 // actual feed. 39 AGGRESSIVE = 3; 40 // Run MLIR pass if there's one implemented in TFG, do nothing otherwise. 41 // I.e., if there's no corresponding TFG pass, it's an OFF. This is supposed 42 // to be mapped with `ON` and there's no `AGGRESSIVE` in MLIR pass now. 43 EXPERIMENTAL_MLIR = 4; 44 // Run both MLIR and Grappler passes consecutively and MLIR pass will come 45 // first. 46 EXPERIMENTAL_BOTH = 5; 47 } 48 49 // Enum for layout conversion between NCHW and NHWC on CPU. Default is OFF. 50 enum CpuLayout { 51 NO_CONVERSION_ON_CPU = 0; 52 NCHW_TO_NHWC = 1; 53 NHWC_TO_NCHW = 2; 54 } 55 56 // Enum controlling the number of times to run optimizers. The default is to 57 // run them twice. 58 enum NumIterationsType { 59 DEFAULT_NUM_ITERS = 0; 60 ONE = 1; 61 TWO = 2; 62 } 63 64 // CPU Conversion settings between NHCW and NCHW. 65 CpuLayout cpu_layout_conversion = 50; 66 67 // Optimize tensor layouts (default is ON) 68 // e.g. This will try to use NCHW layout on GPU which is faster. 69 Toggle layout_optimizer = 1; 70 // Fold constants (default is ON) 71 // Statically infer the value of tensors when possible, and materialize the 72 // result using constants. 73 Toggle constant_folding = 3; 74 // Shape optimizations (default is ON) 75 // Simplify computations made on shapes. 76 Toggle shape_optimization = 13; 77 // Remapping (default is ON) 78 // Remap subgraphs onto more efficient implementations. 79 Toggle remapping = 14; 80 // Common subgraph elimination (default is ON) 81 // e.g. Simplify arithmetic ops; merge ops with same value (like constants). 82 Toggle common_subgraph_elimination = 24; 83 // Arithmetic optimizations (default is ON) 84 // e.g. Simplify arithmetic ops; merge ops with same value (like constants). 85 Toggle arithmetic_optimization = 7; 86 // Control dependency optimizations (default is ON). 87 // Remove redundant control dependencies, which may enable other optimization. 88 Toggle dependency_optimization = 8; 89 // Loop optimizations (default is ON). 90 Toggle loop_optimization = 9; 91 // Function optimizations (default is ON). 92 Toggle function_optimization = 10; 93 // Strips debug-related nodes from the graph (off by default). 94 Toggle debug_stripper = 11; 95 // If true, don't remove unnecessary ops from the graph 96 bool disable_model_pruning = 2; 97 // Try to allocate some independent Op outputs contiguously in order to 98 // merge or eliminate downstream Ops (off by default). 99 Toggle scoped_allocator_optimization = 15; 100 // Force small ops onto the CPU (default is OFF). 101 Toggle pin_to_host_optimization = 18; 102 // Enable the swap of kernel implementations based on the device placement 103 // (default is ON). 104 Toggle implementation_selector = 22; 105 // Optimize data types for CUDA (default is OFF). 106 // This will try to use float16 on GPU which is faster. 107 // Note that this can change the numerical stability of the graph and may 108 // require the use of loss scaling to maintain model convergence. 109 Toggle auto_mixed_precision = 23; 110 // Optimize data types for oneDNN (default is OFF). 111 // This will try to use bfloat16 on CPUs, which is faster. 112 // Note that this can change the numerical stability of the graph. 113 // Note: this is deprecated. 114 // It is replaced by auto_mixed_precision_onednn_bfloat16 115 Toggle auto_mixed_precision_mkl = 25; 116 // Optimize data types for oneDNN (default is OFF). 117 // This will try to use bfloat16 on CPUs, which is faster. 118 // Note that this can change the numerical stability of the graph. 119 // Note: this is equivalent to the deprecated option auto_mixed_precision_mkl 120 Toggle auto_mixed_precision_onednn_bfloat16 = 31; 121 // Emulate a model using data type float16 on CPU (default is OFF). 122 // This will try to emulate the float16 inputs and outputs of an operator 123 // on CPU to have better correlation with float16 on GPU; however the 124 // computation in the operator is based on float32. 125 // Note that this can change the numerical stability of the graph. 126 Toggle auto_mixed_precision_cpu = 29; 127 // Disable the entire meta optimizer (off by default). 128 bool disable_meta_optimizer = 19; 129 // Optimizers registered by plugin (default is ON) 130 Toggle use_plugin_optimizers = 28; 131 // Conditional code motion (default is ON). 132 Toggle experimental_conditional_code_motion = 30; 133 134 // Controls how many times we run the optimizers in meta optimizer (default 135 // is once). 136 NumIterationsType meta_optimizer_iterations = 12; 137 138 // The minimum number of nodes in a graph to optimizer. For smaller graphs, 139 // optimization is skipped. 140 // 0 means the system picks an appropriate number. 141 // < 0 means do not skip optimization. 142 int32 min_graph_nodes = 17; 143 144 // Disable optimizations that assume compressed tensors. Note that this flag 145 // is experimental and may be removed in the future. 146 bool experimental_disable_compressed_tensor_optimization = 26; 147 148 // Disable folding quantization emulation ops such as FakeQuantWithMinMax* and 149 // QuantizeAndDequantize*. Some compilers (e.g. the TF-to-tflite converter) 150 // have to extract quantization configs (e.g. min/max range, number of bits, 151 // and per-channel) from the quantization emulation ops. Note that this flag 152 // is experimental and may be removed in the future. See b/174138564 for more 153 // details. 154 bool experimental_disable_folding_quantization_emulation = 27; 155 156 enum MemOptType { 157 // The default setting (SCHEDULING and SWAPPING HEURISTICS only) 158 DEFAULT_MEM_OPT = 0; 159 // Disabled in the meta-optimizer. 160 NO_MEM_OPT = 1; 161 // Driven by manual op-level annotations. 162 MANUAL = 2; 163 164 // Driven by heuristics. The behavior of these heuristics is subject to 165 // change. Currently includes an experimental recomputation and swapping 166 // heuristics. Manual annotations are respected, but additional nodes are 167 // selected automatically. 168 169 // Swapping heuristic will move a tensor from the GPU to the CPU and move 170 // it back when needed to reduce peak memory usage. 171 SWAPPING_HEURISTICS = 4; 172 // Recomputation heuristics will recompute ops (such as Relu activation) 173 // during backprop instead of storing them, reducing peak memory usage. 174 RECOMPUTATION_HEURISTICS = 5; 175 // Scheduling will split big ops such as AddN and try to enforce a schedule 176 // of the new computations that decreases peak memory usage. 177 SCHEDULING_HEURISTICS = 6; 178 // Use any combination of swapping and recomputation heuristics. 179 HEURISTICS = 3; 180 } 181 // Configures memory optimization passes through the meta-optimizer. Has no 182 // effect on manually requested memory optimization passes in the optimizers 183 // field. 184 MemOptType memory_optimization = 4; 185 // A node name scope for node names which are valid outputs of recomputations. 186 // Inputs to nodes that match this scope may be recomputed (subject either to 187 // manual annotation of those input nodes or to manual annotation and 188 // heuristics depending on memory_optimization), but the nodes themselves will 189 // not be recomputed. This matches any sub-scopes as well, meaning the scope 190 // can appear not just as a top-level scope. For example, if the value is 191 // "gradients/", the default, it will match node name "gradients/foo", 192 // "foo/gradients/bar", but not "foo_gradients/" 193 string memory_optimizer_target_node_name_scope = 6; 194 // Maximum number of milliseconds to spend optimizing a single graph before 195 // timing out. If less than or equal to 0 (default value) the optimizer will 196 // never time out. 197 int64 meta_optimizer_timeout_ms = 20; 198 199 // Configures AutoParallel optimization passes either through the 200 // meta-optimizer or when manually specified through the optimizers field. 201 AutoParallelOptions auto_parallel = 5; 202 203 // If true, any optimization pass failing will cause the MetaOptimizer to 204 // stop with an error. By default - or when set to false, failing passes are 205 // skipped silently. 206 bool fail_on_optimizer_errors = 21; 207 208 ScopedAllocatorOptions scoped_allocator_opts = 16; 209 210 // If non-empty, will use this as an alternative way to specify a list of 211 // optimizations to turn on and the order of the optimizations (replacing the 212 // meta-optimizer). 213 // 214 // Of the RewriterConfig options, only the AutoParallel configuration options 215 // (the auto_parallel field) apply to manually requested optimization passes 216 // ("autoparallel"). Memory optimization passes ("memory") invoked here are 217 // not configurable (in contrast to memory optimization passes through the 218 // meta-optimizer) and act only on manual op annotations. 219 // 220 // Custom optimizers (see custom_optimizers) that are not part of this 221 // schedule will be run after - in the order that they were specified. 222 repeated string optimizers = 100; 223 224 // Message to describe custom graph optimizer and its parameters 225 message CustomGraphOptimizer { 226 string name = 1; 227 map<string, AttrValue> parameter_map = 2; 228 } 229 230 // list of CustomGraphOptimizers to apply. 231 repeated CustomGraphOptimizer custom_optimizers = 200; 232 233 // VerifierConfig specifying the verifiers to be run after every optimizer. 234 VerifierConfig inter_optimizer_verifier_config = 300; 235 236 // VerifierConfig specifying the verifiers to be run at the end, after all 237 // optimizers have run. 238 VerifierConfig post_optimization_verifier_config = 301; 239} 240