1syntax = "proto3"; 2 3package tensorflow; 4 5import "tensorflow/core/framework/attr_value.proto"; 6import "tensorflow/core/protobuf/verifier_config.proto"; 7 8option cc_enable_arenas = true; 9option java_outer_classname = "RewriterConfigProtos"; 10option java_multiple_files = true; 11option java_package = "org.tensorflow.framework"; 12option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; 13 14message AutoParallelOptions { 15 bool enable = 1; 16 int32 num_replicas = 2; 17} 18 19message ScopedAllocatorOptions { 20 // If present, only perform optimization for these ops. 21 repeated string enable_op = 1; 22} 23 24message RewriterConfig { 25 // Graph rewriting is experimental and subject to change, not covered by any 26 // API stability guarantees. 27 28 // Configuration options for the meta-optimizer. Unless otherwise noted, these 29 // configuration options do not apply to explicitly triggered optimization 30 // passes in the optimizers field. 31 32 enum Toggle { 33 DEFAULT = 0; 34 ON = 1; 35 OFF = 2; 36 // Enable some aggressive optimizations that use assumptions that TF graphs 37 // may break. For example, assume the shape of a placeholder matches its 38 // actual feed. 39 AGGRESSIVE = 3; 40 } 41 42 // Enum for layout conversion between NCHW and NHWC on CPU. Default is OFF. 43 enum CpuLayout { 44 NO_CONVERSION_ON_CPU = 0; 45 NCHW_TO_NHWC = 1; 46 NHWC_TO_NCHW = 2; 47 } 48 49 // Enum controlling the number of times to run optimizers. The default is to 50 // run them twice. 51 enum NumIterationsType { 52 DEFAULT_NUM_ITERS = 0; 53 ONE = 1; 54 TWO = 2; 55 } 56 57 // CPU Conversion settings between NHCW and NCHW. 58 CpuLayout cpu_layout_conversion = 50; 59 60 // Optimize tensor layouts (default is ON) 61 // e.g. This will try to use NCHW layout on GPU which is faster. 62 Toggle layout_optimizer = 1; 63 // Fold constants (default is ON) 64 // Statically infer the value of tensors when possible, and materialize the 65 // result using constants. 66 Toggle constant_folding = 3; 67 // Shape optimizations (default is ON) 68 // Simplify computations made on shapes. 69 Toggle shape_optimization = 13; 70 // Remapping (default is ON) 71 // Remap subgraphs onto more efficient implementations. 72 Toggle remapping = 14; 73 // Common subgraph elimination (default is ON) 74 // e.g. Simplify arithmetic ops; merge ops with same value (like constants). 75 Toggle common_subgraph_elimination = 24; 76 // Arithmetic optimizations (default is ON) 77 // e.g. Simplify arithmetic ops; merge ops with same value (like constants). 78 Toggle arithmetic_optimization = 7; 79 // Control dependency optimizations (default is ON). 80 // Remove redundant control dependencies, which may enable other optimization. 81 Toggle dependency_optimization = 8; 82 // Loop optimizations (default is ON). 83 Toggle loop_optimization = 9; 84 // Function optimizations (default is ON). 85 Toggle function_optimization = 10; 86 // Strips debug-related nodes from the graph (off by default). 87 Toggle debug_stripper = 11; 88 // If true, don't remove unnecessary ops from the graph 89 bool disable_model_pruning = 2; 90 // Try to allocate some independent Op outputs contiguously in order to 91 // merge or eliminate downstream Ops (off by default). 92 Toggle scoped_allocator_optimization = 15; 93 // Force small ops onto the CPU (default is OFF). 94 Toggle pin_to_host_optimization = 18; 95 // Enable the swap of kernel implementations based on the device placement 96 // (default is ON). 97 Toggle implementation_selector = 22; 98 // Optimize data types for CUDA (default is OFF). 99 // This will try to use float16 on GPU which is faster. 100 // Note that this can change the numerical stability of the graph and may 101 // require the use of loss scaling to maintain model convergence. 102 Toggle auto_mixed_precision = 23; 103 // Optimize data types for MKL (default is OFF). 104 // This will try to use bfloat16 on CPUs, which is faster. 105 // Note that this can change the numerical stability of the graph. 106 Toggle auto_mixed_precision_mkl = 25; 107 // Disable the entire meta optimizer (off by default). 108 bool disable_meta_optimizer = 19; 109 110 // Controls how many times we run the optimizers in meta optimizer (default 111 // is once). 112 NumIterationsType meta_optimizer_iterations = 12; 113 114 // The minimum number of nodes in a graph to optimizer. For smaller graphs, 115 // optimization is skipped. 116 // 0 means the system picks an appropriate number. 117 // < 0 means do not skip optimization. 118 int32 min_graph_nodes = 17; 119 120 // Disable optimizations that assume compressed tensors. Note that this flag 121 // is experimental and may be removed in the future. 122 bool experimental_disable_compressed_tensor_optimization = 26; 123 124 // Disable folding quantization emulation ops such as FakeQuantWithMinMax* and 125 // QuantizeAndDequantize*. Some compilers (e.g. the TF-to-tflite converter) 126 // have to extract quantization configs (e.g. min/max range, number of bits, 127 // and per-channel) from the quantization emulation ops. Note that this flag 128 // is experimental and may be removed in the future. See b/174138564 for more 129 // details. 130 bool experimental_disable_folding_quantization_emulation = 27; 131 132 enum MemOptType { 133 // The default setting (SCHEDULING and SWAPPING HEURISTICS only) 134 DEFAULT_MEM_OPT = 0; 135 // Disabled in the meta-optimizer. 136 NO_MEM_OPT = 1; 137 // Driven by manual op-level annotations. 138 MANUAL = 2; 139 140 // Driven by heuristics. The behavior of these heuristics is subject to 141 // change. Currently includes an experimental recomputation and swapping 142 // heuristics. Manual annotations are respected, but additional nodes are 143 // selected automatically. 144 145 // Swapping heuristic will move a tensor from the GPU to the CPU and move 146 // it back when needed to reduce peak memory usage. 147 SWAPPING_HEURISTICS = 4; 148 // Recomputation heuristics will recompute ops (such as Relu activation) 149 // during backprop instead of storing them, reducing peak memory usage. 150 RECOMPUTATION_HEURISTICS = 5; 151 // Scheduling will split big ops such as AddN and try to enforce a schedule 152 // of the new computations that decreases peak memory usage. 153 SCHEDULING_HEURISTICS = 6; 154 // Use any combination of swapping and recomputation heuristics. 155 HEURISTICS = 3; 156 } 157 // Configures memory optimization passes through the meta-optimizer. Has no 158 // effect on manually requested memory optimization passes in the optimizers 159 // field. 160 MemOptType memory_optimization = 4; 161 // A node name scope for node names which are valid outputs of recomputations. 162 // Inputs to nodes that match this scope may be recomputed (subject either to 163 // manual annotation of those input nodes or to manual annotation and 164 // heuristics depending on memory_optimization), but the nodes themselves will 165 // not be recomputed. This matches any sub-scopes as well, meaning the scope 166 // can appear not just as a top-level scope. For example, if the value is 167 // "gradients/", the default, it will match node name "gradients/foo", 168 // "foo/gradients/bar", but not "foo_gradients/" 169 string memory_optimizer_target_node_name_scope = 6; 170 // Maximum number of milliseconds to spend optimizing a single graph before 171 // timing out. If equal to 0 the system picks a default (currently 5 minutes). 172 // If less than 0 the optimizer will never time out. 173 int64 meta_optimizer_timeout_ms = 20; 174 175 // Configures AutoParallel optimization passes either through the 176 // meta-optimizer or when manually specified through the optimizers field. 177 AutoParallelOptions auto_parallel = 5; 178 179 // If true, any optimization pass failing will cause the MetaOptimizer to 180 // stop with an error. By default - or when set to false, failing passes are 181 // skipped silently. 182 bool fail_on_optimizer_errors = 21; 183 184 ScopedAllocatorOptions scoped_allocator_opts = 16; 185 186 // If non-empty, will use this as an alternative way to specify a list of 187 // optimizations to turn on and the order of the optimizations (replacing the 188 // meta-optimizer). 189 // 190 // Of the RewriterConfig options, only the AutoParallel configuration options 191 // (the auto_parallel field) apply to manually requested optimization passes 192 // ("autoparallel"). Memory optimization passes ("memory") invoked here are 193 // not configurable (in contrast to memory optimization passes through the 194 // meta-optimizer) and act only on manual op annotations. 195 // 196 // Custom optimizers (see custom_optimizers) that are not part of this 197 // schedule will be run after - in the order that they were specified. 198 repeated string optimizers = 100; 199 200 // Message to describe custom graph optimizer and its parameters 201 message CustomGraphOptimizer { 202 string name = 1; 203 map<string, AttrValue> parameter_map = 2; 204 } 205 206 // list of CustomGraphOptimizers to apply. 207 repeated CustomGraphOptimizer custom_optimizers = 200; 208 209 // VerifierConfig specifying the verifiers to be run after every optimizer. 210 VerifierConfig inter_optimizer_verifier_config = 300; 211 212 // VerifierConfig specifying the verifiers to be run at the end, after all 213 // optimizers have run. 214 VerifierConfig post_optimization_verifier_config = 301; 215} 216