• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1syntax = "proto3";
2
3package tensorflow;
4
5import "tensorflow/core/framework/attr_value.proto";
6import "tensorflow/core/protobuf/verifier_config.proto";
7
8option cc_enable_arenas = true;
9option java_outer_classname = "RewriterConfigProtos";
10option java_multiple_files = true;
11option java_package = "org.tensorflow.framework";
12option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
13
14message AutoParallelOptions {
15  bool enable = 1;
16  int32 num_replicas = 2;
17}
18
19message ScopedAllocatorOptions {
20  // If present, only perform optimization for these ops.
21  repeated string enable_op = 1;
22}
23
24message RewriterConfig {
25  // Graph rewriting is experimental and subject to change, not covered by any
26  // API stability guarantees.
27
28  // Configuration options for the meta-optimizer. Unless otherwise noted, these
29  // configuration options do not apply to explicitly triggered optimization
30  // passes in the optimizers field.
31
32  enum Toggle {
33    DEFAULT = 0;
34    ON = 1;
35    OFF = 2;
36    // Enable some aggressive optimizations that use assumptions that TF graphs
37    // may break. For example, assume the shape of a placeholder matches its
38    // actual feed.
39    AGGRESSIVE = 3;
40    // Run MLIR pass if there's one implemented in TFG, do nothing otherwise.
41    // I.e., if there's no corresponding TFG pass, it's an OFF. This is supposed
42    // to be mapped with `ON` and there's no `AGGRESSIVE` in MLIR pass now.
43    EXPERIMENTAL_MLIR = 4;
44    // Run both MLIR and Grappler passes consecutively and MLIR pass will come
45    // first.
46    EXPERIMENTAL_BOTH = 5;
47  }
48
49  // Enum for layout conversion between NCHW and NHWC on CPU. Default is OFF.
50  enum CpuLayout {
51    NO_CONVERSION_ON_CPU = 0;
52    NCHW_TO_NHWC = 1;
53    NHWC_TO_NCHW = 2;
54  }
55
56  // Enum controlling the number of times to run optimizers. The default is to
57  // run them twice.
58  enum NumIterationsType {
59    DEFAULT_NUM_ITERS = 0;
60    ONE = 1;
61    TWO = 2;
62  }
63
64  // CPU Conversion settings between NHCW and NCHW.
65  CpuLayout cpu_layout_conversion = 50;
66
67  // Optimize tensor layouts (default is ON)
68  // e.g. This will try to use NCHW layout on GPU which is faster.
69  Toggle layout_optimizer = 1;
70  // Fold constants (default is ON)
71  // Statically infer the value of tensors when possible, and materialize the
72  // result using constants.
73  Toggle constant_folding = 3;
74  // Shape optimizations (default is ON)
75  // Simplify computations made on shapes.
76  Toggle shape_optimization = 13;
77  // Remapping (default is ON)
78  // Remap subgraphs onto more efficient implementations.
79  Toggle remapping = 14;
80  // Common subgraph elimination (default is ON)
81  // e.g. Simplify arithmetic ops; merge ops with same value (like constants).
82  Toggle common_subgraph_elimination = 24;
83  // Arithmetic optimizations (default is ON)
84  // e.g. Simplify arithmetic ops; merge ops with same value (like constants).
85  Toggle arithmetic_optimization = 7;
86  // Control dependency optimizations (default is ON).
87  // Remove redundant control dependencies, which may enable other optimization.
88  Toggle dependency_optimization = 8;
89  // Loop optimizations (default is ON).
90  Toggle loop_optimization = 9;
91  // Function optimizations (default is ON).
92  Toggle function_optimization = 10;
93  // Strips debug-related nodes from the graph (off by default).
94  Toggle debug_stripper = 11;
95  // If true, don't remove unnecessary ops from the graph
96  bool disable_model_pruning = 2;
97  // Try to allocate some independent Op outputs contiguously in order to
98  // merge or eliminate downstream Ops (off by default).
99  Toggle scoped_allocator_optimization = 15;
100  // Force small ops onto the CPU (default is OFF).
101  Toggle pin_to_host_optimization = 18;
102  // Enable the swap of kernel implementations based on the device placement
103  // (default is ON).
104  Toggle implementation_selector = 22;
105  // Optimize data types for CUDA (default is OFF).
106  // This will try to use float16 on GPU which is faster.
107  // Note that this can change the numerical stability of the graph and may
108  // require the use of loss scaling to maintain model convergence.
109  Toggle auto_mixed_precision = 23;
110  // Optimize data types for oneDNN (default is OFF).
111  // This will try to use bfloat16 on CPUs, which is faster.
112  // Note that this can change the numerical stability of the graph.
113  // Note: this is deprecated.
114  // It is replaced by auto_mixed_precision_onednn_bfloat16
115  Toggle auto_mixed_precision_mkl = 25;
116  // Optimize data types for oneDNN (default is OFF).
117  // This will try to use bfloat16 on CPUs, which is faster.
118  // Note that this can change the numerical stability of the graph.
119  // Note: this is equivalent to the deprecated option auto_mixed_precision_mkl
120  Toggle auto_mixed_precision_onednn_bfloat16 = 31;
121  // Emulate a model using data type float16 on CPU (default is OFF).
122  // This will try to emulate the float16 inputs and outputs of an operator
123  // on CPU to have better correlation with float16 on GPU; however the
124  // computation in the operator is based on float32.
125  // Note that this can change the numerical stability of the graph.
126  Toggle auto_mixed_precision_cpu = 29;
127  // Disable the entire meta optimizer (off by default).
128  bool disable_meta_optimizer = 19;
129  // Optimizers registered by plugin (default is ON)
130  Toggle use_plugin_optimizers = 28;
131  // Conditional code motion (default is ON).
132  Toggle experimental_conditional_code_motion = 30;
133
134  // Controls how many times we run the optimizers in meta optimizer (default
135  // is once).
136  NumIterationsType meta_optimizer_iterations = 12;
137
138  // The minimum number of nodes in a graph to optimizer. For smaller graphs,
139  // optimization is skipped.
140  // 0 means the system picks an appropriate number.
141  // < 0 means do not skip optimization.
142  int32 min_graph_nodes = 17;
143
144  // Disable optimizations that assume compressed tensors. Note that this flag
145  // is experimental and may be removed in the future.
146  bool experimental_disable_compressed_tensor_optimization = 26;
147
148  // Disable folding quantization emulation ops such as FakeQuantWithMinMax* and
149  // QuantizeAndDequantize*. Some compilers (e.g. the TF-to-tflite converter)
150  // have to extract quantization configs (e.g. min/max range, number of bits,
151  // and per-channel) from the quantization emulation ops. Note that this flag
152  // is experimental and may be removed in the future. See b/174138564 for more
153  // details.
154  bool experimental_disable_folding_quantization_emulation = 27;
155
156  enum MemOptType {
157    // The default setting (SCHEDULING and SWAPPING HEURISTICS only)
158    DEFAULT_MEM_OPT = 0;
159    // Disabled in the meta-optimizer.
160    NO_MEM_OPT = 1;
161    // Driven by manual op-level annotations.
162    MANUAL = 2;
163
164    // Driven by heuristics. The behavior of these heuristics is subject to
165    // change. Currently includes an experimental recomputation and swapping
166    // heuristics. Manual annotations are respected, but additional nodes are
167    // selected automatically.
168
169    // Swapping heuristic will move a tensor from the GPU to the CPU and move
170    // it back when needed to reduce peak memory usage.
171    SWAPPING_HEURISTICS = 4;
172    // Recomputation heuristics will recompute ops (such as Relu activation)
173    // during backprop instead of storing them, reducing peak memory usage.
174    RECOMPUTATION_HEURISTICS = 5;
175    // Scheduling will split big ops such as AddN and try to enforce a schedule
176    // of the new computations that decreases peak memory usage.
177    SCHEDULING_HEURISTICS = 6;
178    // Use any combination of swapping and recomputation heuristics.
179    HEURISTICS = 3;
180  }
181  // Configures memory optimization passes through the meta-optimizer. Has no
182  // effect on manually requested memory optimization passes in the optimizers
183  // field.
184  MemOptType memory_optimization = 4;
185  // A node name scope for node names which are valid outputs of recomputations.
186  // Inputs to nodes that match this scope may be recomputed (subject either to
187  // manual annotation of those input nodes or to manual annotation and
188  // heuristics depending on memory_optimization), but the nodes themselves will
189  // not be recomputed. This matches any sub-scopes as well, meaning the scope
190  // can appear not just as a top-level scope. For example, if the value is
191  // "gradients/", the default, it will match node name "gradients/foo",
192  // "foo/gradients/bar", but not "foo_gradients/"
193  string memory_optimizer_target_node_name_scope = 6;
194  // Maximum number of milliseconds to spend optimizing a single graph before
195  // timing out. If less than or equal to 0 (default value) the optimizer will
196  // never time out.
197  int64 meta_optimizer_timeout_ms = 20;
198
199  // Configures AutoParallel optimization passes either through the
200  // meta-optimizer or when manually specified through the optimizers field.
201  AutoParallelOptions auto_parallel = 5;
202
203  // If true, any optimization pass failing will cause the MetaOptimizer to
204  // stop with an error. By default - or when set to false, failing passes are
205  // skipped silently.
206  bool fail_on_optimizer_errors = 21;
207
208  ScopedAllocatorOptions scoped_allocator_opts = 16;
209
210  // If non-empty, will use this as an alternative way to specify a list of
211  // optimizations to turn on and the order of the optimizations (replacing the
212  // meta-optimizer).
213  //
214  // Of the RewriterConfig options, only the AutoParallel configuration options
215  // (the auto_parallel field) apply to manually requested optimization passes
216  // ("autoparallel"). Memory optimization passes ("memory") invoked here are
217  // not configurable (in contrast to memory optimization passes through the
218  // meta-optimizer) and act only on manual op annotations.
219  //
220  // Custom optimizers (see custom_optimizers) that are not part of this
221  // schedule will be run after - in the order that they were specified.
222  repeated string optimizers = 100;
223
224  // Message to describe custom graph optimizer and its parameters
225  message CustomGraphOptimizer {
226    string name = 1;
227    map<string, AttrValue> parameter_map = 2;
228  }
229
230  // list of CustomGraphOptimizers to apply.
231  repeated CustomGraphOptimizer custom_optimizers = 200;
232
233  // VerifierConfig specifying the verifiers to be run after every optimizer.
234  VerifierConfig inter_optimizer_verifier_config = 300;
235
236  // VerifierConfig specifying the verifiers to be run at the end, after all
237  // optimizers have run.
238  VerifierConfig post_optimization_verifier_config = 301;
239}
240