1syntax = "proto3"; 2 3package tensorflow; 4 5import "tensorflow/core/framework/attr_value.proto"; 6import "tensorflow/core/protobuf/verifier_config.proto"; 7 8option cc_enable_arenas = true; 9option java_outer_classname = "RewriterConfigProtos"; 10option java_multiple_files = true; 11option java_package = "org.tensorflow.framework"; 12option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; 13 14message AutoParallelOptions { 15 bool enable = 1; 16 int32 num_replicas = 2; 17} 18 19message ScopedAllocatorOptions { 20 // If present, only perform optimization for these ops. 21 repeated string enable_op = 1; 22} 23 24message RewriterConfig { 25 // Graph rewriting is experimental and subject to change, not covered by any 26 // API stability guarantees. 27 28 // Configuration options for the meta-optimizer. Unless otherwise noted, these 29 // configuration options do not apply to explicitly triggered optimization 30 // passes in the optimizers field. 31 32 enum Toggle { 33 DEFAULT = 0; 34 ON = 1; 35 OFF = 2; 36 // Enable some aggressive optimizations that use assumptions that TF graphs 37 // may break. For example, assume the shape of a placeholder matches its 38 // actual feed. 39 AGGRESSIVE = 3; 40 } 41 42 // Enum for layout conversion between NCHW and NHWC on CPU. Default is OFF. 43 enum CpuLayout { 44 NO_CONVERSION_ON_CPU = 0; 45 NCHW_TO_NHWC = 1; 46 NHWC_TO_NCHW = 2; 47 } 48 49 // Enum controlling the number of times to run optimizers. The default is to 50 // run them twice. 51 enum NumIterationsType { 52 DEFAULT_NUM_ITERS = 0; 53 ONE = 1; 54 TWO = 2; 55 } 56 57 // CPU Conversion settings between NHCW and NCHW. 58 CpuLayout cpu_layout_conversion = 50; 59 60 // Optimize tensor layouts (default is ON) 61 // e.g. This will try to use NCHW layout on GPU which is faster. 62 Toggle layout_optimizer = 1; 63 // Fold constants (default is ON) 64 // Statically infer the value of tensors when possible, and materialize the 65 // result using constants. 66 Toggle constant_folding = 3; 67 // Shape optimizations (default is ON) 68 // Simplify computations made on shapes. 69 Toggle shape_optimization = 13; 70 // Remapping (default is ON) 71 // Remap subgraphs onto more efficient implementations. 72 Toggle remapping = 14; 73 // Common subgraph elimination (default is ON) 74 // e.g. Simplify arithmetic ops; merge ops with same value (like constants). 75 Toggle common_subgraph_elimination = 24; 76 // Arithmetic optimizations (default is ON) 77 // e.g. Simplify arithmetic ops; merge ops with same value (like constants). 78 Toggle arithmetic_optimization = 7; 79 // Control dependency optimizations (default is ON). 80 // Remove redundant control dependencies, which may enable other optimization. 81 Toggle dependency_optimization = 8; 82 // Loop optimizations (default is ON). 83 Toggle loop_optimization = 9; 84 // Function optimizations (default is ON). 85 Toggle function_optimization = 10; 86 // Strips debug-related nodes from the graph (off by default). 87 Toggle debug_stripper = 11; 88 // If true, don't remove unnecessary ops from the graph 89 bool disable_model_pruning = 2; 90 // Try to allocate some independent Op outputs contiguously in order to 91 // merge or eliminate downstream Ops (off by default). 92 Toggle scoped_allocator_optimization = 15; 93 // Force small ops onto the CPU (default is OFF). 94 Toggle pin_to_host_optimization = 18; 95 // Enable the swap of kernel implementations based on the device placement 96 // (default is ON). 97 Toggle implementation_selector = 22; 98 // Optimize data types for CUDA (default is OFF). 99 // This will try to use float16 on GPU which is faster. 100 // Note that this can change the numerical stability of the graph and may 101 // require the use of loss scaling to maintain model convergence. 102 Toggle auto_mixed_precision = 23; 103 // Optimize data types for MKL (default is OFF). 104 // This will try to use bfloat16 on CPUs, which is faster. 105 // Note that this can change the numerical stability of the graph. 106 Toggle auto_mixed_precision_mkl = 25; 107 // Disable the entire meta optimizer (off by default). 108 bool disable_meta_optimizer = 19; 109 // Optimizers registered by plugin (default is ON) 110 Toggle use_plugin_optimizers = 28; 111 112 // Controls how many times we run the optimizers in meta optimizer (default 113 // is once). 114 NumIterationsType meta_optimizer_iterations = 12; 115 116 // The minimum number of nodes in a graph to optimizer. For smaller graphs, 117 // optimization is skipped. 118 // 0 means the system picks an appropriate number. 119 // < 0 means do not skip optimization. 120 int32 min_graph_nodes = 17; 121 122 // Disable optimizations that assume compressed tensors. Note that this flag 123 // is experimental and may be removed in the future. 124 bool experimental_disable_compressed_tensor_optimization = 26; 125 126 // Disable folding quantization emulation ops such as FakeQuantWithMinMax* and 127 // QuantizeAndDequantize*. Some compilers (e.g. the TF-to-tflite converter) 128 // have to extract quantization configs (e.g. min/max range, number of bits, 129 // and per-channel) from the quantization emulation ops. Note that this flag 130 // is experimental and may be removed in the future. See b/174138564 for more 131 // details. 132 bool experimental_disable_folding_quantization_emulation = 27; 133 134 enum MemOptType { 135 // The default setting (SCHEDULING and SWAPPING HEURISTICS only) 136 DEFAULT_MEM_OPT = 0; 137 // Disabled in the meta-optimizer. 138 NO_MEM_OPT = 1; 139 // Driven by manual op-level annotations. 140 MANUAL = 2; 141 142 // Driven by heuristics. The behavior of these heuristics is subject to 143 // change. Currently includes an experimental recomputation and swapping 144 // heuristics. Manual annotations are respected, but additional nodes are 145 // selected automatically. 146 147 // Swapping heuristic will move a tensor from the GPU to the CPU and move 148 // it back when needed to reduce peak memory usage. 149 SWAPPING_HEURISTICS = 4; 150 // Recomputation heuristics will recompute ops (such as Relu activation) 151 // during backprop instead of storing them, reducing peak memory usage. 152 RECOMPUTATION_HEURISTICS = 5; 153 // Scheduling will split big ops such as AddN and try to enforce a schedule 154 // of the new computations that decreases peak memory usage. 155 SCHEDULING_HEURISTICS = 6; 156 // Use any combination of swapping and recomputation heuristics. 157 HEURISTICS = 3; 158 } 159 // Configures memory optimization passes through the meta-optimizer. Has no 160 // effect on manually requested memory optimization passes in the optimizers 161 // field. 162 MemOptType memory_optimization = 4; 163 // A node name scope for node names which are valid outputs of recomputations. 164 // Inputs to nodes that match this scope may be recomputed (subject either to 165 // manual annotation of those input nodes or to manual annotation and 166 // heuristics depending on memory_optimization), but the nodes themselves will 167 // not be recomputed. This matches any sub-scopes as well, meaning the scope 168 // can appear not just as a top-level scope. For example, if the value is 169 // "gradients/", the default, it will match node name "gradients/foo", 170 // "foo/gradients/bar", but not "foo_gradients/" 171 string memory_optimizer_target_node_name_scope = 6; 172 // Maximum number of milliseconds to spend optimizing a single graph before 173 // timing out. If less than or equal to 0 (default value) the optimizer will 174 // never time out. 175 int64 meta_optimizer_timeout_ms = 20; 176 177 // Configures AutoParallel optimization passes either through the 178 // meta-optimizer or when manually specified through the optimizers field. 179 AutoParallelOptions auto_parallel = 5; 180 181 // If true, any optimization pass failing will cause the MetaOptimizer to 182 // stop with an error. By default - or when set to false, failing passes are 183 // skipped silently. 184 bool fail_on_optimizer_errors = 21; 185 186 ScopedAllocatorOptions scoped_allocator_opts = 16; 187 188 // If non-empty, will use this as an alternative way to specify a list of 189 // optimizations to turn on and the order of the optimizations (replacing the 190 // meta-optimizer). 191 // 192 // Of the RewriterConfig options, only the AutoParallel configuration options 193 // (the auto_parallel field) apply to manually requested optimization passes 194 // ("autoparallel"). Memory optimization passes ("memory") invoked here are 195 // not configurable (in contrast to memory optimization passes through the 196 // meta-optimizer) and act only on manual op annotations. 197 // 198 // Custom optimizers (see custom_optimizers) that are not part of this 199 // schedule will be run after - in the order that they were specified. 200 repeated string optimizers = 100; 201 202 // Message to describe custom graph optimizer and its parameters 203 message CustomGraphOptimizer { 204 string name = 1; 205 map<string, AttrValue> parameter_map = 2; 206 } 207 208 // list of CustomGraphOptimizers to apply. 209 repeated CustomGraphOptimizer custom_optimizers = 200; 210 211 // VerifierConfig specifying the verifiers to be run after every optimizer. 212 VerifierConfig inter_optimizer_verifier_config = 300; 213 214 // VerifierConfig specifying the verifiers to be run at the end, after all 215 // optimizers have run. 216 VerifierConfig post_optimization_verifier_config = 301; 217} 218