• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1syntax = "proto3";
2
3package tensorflow;
4
5import "tensorflow/core/framework/attr_value.proto";
6import "tensorflow/core/protobuf/verifier_config.proto";
7
8option cc_enable_arenas = true;
9option java_outer_classname = "RewriterConfigProtos";
10option java_multiple_files = true;
11option java_package = "org.tensorflow.framework";
12option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
13
14message AutoParallelOptions {
15  bool enable = 1;
16  int32 num_replicas = 2;
17}
18
19message ScopedAllocatorOptions {
20  // If present, only perform optimization for these ops.
21  repeated string enable_op = 1;
22}
23
24message RewriterConfig {
25  // Graph rewriting is experimental and subject to change, not covered by any
26  // API stability guarantees.
27
28  // Configuration options for the meta-optimizer. Unless otherwise noted, these
29  // configuration options do not apply to explicitly triggered optimization
30  // passes in the optimizers field.
31
32  enum Toggle {
33    DEFAULT = 0;
34    ON = 1;
35    OFF = 2;
36    // Enable some aggressive optimizations that use assumptions that TF graphs
37    // may break. For example, assume the shape of a placeholder matches its
38    // actual feed.
39    AGGRESSIVE = 3;
40  }
41
42  // Enum for layout conversion between NCHW and NHWC on CPU. Default is OFF.
43  enum CpuLayout {
44    NO_CONVERSION_ON_CPU = 0;
45    NCHW_TO_NHWC = 1;
46    NHWC_TO_NCHW = 2;
47  }
48
49  // Enum controlling the number of times to run optimizers. The default is to
50  // run them twice.
51  enum NumIterationsType {
52    DEFAULT_NUM_ITERS = 0;
53    ONE = 1;
54    TWO = 2;
55  }
56
57  // CPU Conversion settings between NHCW and NCHW.
58  CpuLayout cpu_layout_conversion = 50;
59
60  // Optimize tensor layouts (default is ON)
61  // e.g. This will try to use NCHW layout on GPU which is faster.
62  Toggle layout_optimizer = 1;
63  // Fold constants (default is ON)
64  // Statically infer the value of tensors when possible, and materialize the
65  // result using constants.
66  Toggle constant_folding = 3;
67  // Shape optimizations (default is ON)
68  // Simplify computations made on shapes.
69  Toggle shape_optimization = 13;
70  // Remapping (default is ON)
71  // Remap subgraphs onto more efficient implementations.
72  Toggle remapping = 14;
73  // Common subgraph elimination (default is ON)
74  // e.g. Simplify arithmetic ops; merge ops with same value (like constants).
75  Toggle common_subgraph_elimination = 24;
76  // Arithmetic optimizations (default is ON)
77  // e.g. Simplify arithmetic ops; merge ops with same value (like constants).
78  Toggle arithmetic_optimization = 7;
79  // Control dependency optimizations (default is ON).
80  // Remove redundant control dependencies, which may enable other optimization.
81  Toggle dependency_optimization = 8;
82  // Loop optimizations (default is ON).
83  Toggle loop_optimization = 9;
84  // Function optimizations (default is ON).
85  Toggle function_optimization = 10;
86  // Strips debug-related nodes from the graph (off by default).
87  Toggle debug_stripper = 11;
88  // If true, don't remove unnecessary ops from the graph
89  bool disable_model_pruning = 2;
90  // Try to allocate some independent Op outputs contiguously in order to
91  // merge or eliminate downstream Ops (off by default).
92  Toggle scoped_allocator_optimization = 15;
93  // Force small ops onto the CPU (default is OFF).
94  Toggle pin_to_host_optimization = 18;
95  // Enable the swap of kernel implementations based on the device placement
96  // (default is ON).
97  Toggle implementation_selector = 22;
98  // Optimize data types for CUDA (default is OFF).
99  // This will try to use float16 on GPU which is faster.
100  // Note that this can change the numerical stability of the graph and may
101  // require the use of loss scaling to maintain model convergence.
102  Toggle auto_mixed_precision = 23;
103  // Optimize data types for MKL (default is OFF).
104  // This will try to use bfloat16 on CPUs, which is faster.
105  // Note that this can change the numerical stability of the graph.
106  Toggle auto_mixed_precision_mkl = 25;
107  // Disable the entire meta optimizer (off by default).
108  bool disable_meta_optimizer = 19;
109  // Optimizers registered by plugin (default is ON)
110  Toggle use_plugin_optimizers = 28;
111
112  // Controls how many times we run the optimizers in meta optimizer (default
113  // is once).
114  NumIterationsType meta_optimizer_iterations = 12;
115
116  // The minimum number of nodes in a graph to optimizer. For smaller graphs,
117  // optimization is skipped.
118  // 0 means the system picks an appropriate number.
119  // < 0 means do not skip optimization.
120  int32 min_graph_nodes = 17;
121
122  // Disable optimizations that assume compressed tensors. Note that this flag
123  // is experimental and may be removed in the future.
124  bool experimental_disable_compressed_tensor_optimization = 26;
125
126  // Disable folding quantization emulation ops such as FakeQuantWithMinMax* and
127  // QuantizeAndDequantize*. Some compilers (e.g. the TF-to-tflite converter)
128  // have to extract quantization configs (e.g. min/max range, number of bits,
129  // and per-channel) from the quantization emulation ops. Note that this flag
130  // is experimental and may be removed in the future. See b/174138564 for more
131  // details.
132  bool experimental_disable_folding_quantization_emulation = 27;
133
134  enum MemOptType {
135    // The default setting (SCHEDULING and SWAPPING HEURISTICS only)
136    DEFAULT_MEM_OPT = 0;
137    // Disabled in the meta-optimizer.
138    NO_MEM_OPT = 1;
139    // Driven by manual op-level annotations.
140    MANUAL = 2;
141
142    // Driven by heuristics. The behavior of these heuristics is subject to
143    // change. Currently includes an experimental recomputation and swapping
144    // heuristics. Manual annotations are respected, but additional nodes are
145    // selected automatically.
146
147    // Swapping heuristic will move a tensor from the GPU to the CPU and move
148    // it back when needed to reduce peak memory usage.
149    SWAPPING_HEURISTICS = 4;
150    // Recomputation heuristics will recompute ops (such as Relu activation)
151    // during backprop instead of storing them, reducing peak memory usage.
152    RECOMPUTATION_HEURISTICS = 5;
153    // Scheduling will split big ops such as AddN and try to enforce a schedule
154    // of the new computations that decreases peak memory usage.
155    SCHEDULING_HEURISTICS = 6;
156    // Use any combination of swapping and recomputation heuristics.
157    HEURISTICS = 3;
158  }
159  // Configures memory optimization passes through the meta-optimizer. Has no
160  // effect on manually requested memory optimization passes in the optimizers
161  // field.
162  MemOptType memory_optimization = 4;
163  // A node name scope for node names which are valid outputs of recomputations.
164  // Inputs to nodes that match this scope may be recomputed (subject either to
165  // manual annotation of those input nodes or to manual annotation and
166  // heuristics depending on memory_optimization), but the nodes themselves will
167  // not be recomputed. This matches any sub-scopes as well, meaning the scope
168  // can appear not just as a top-level scope. For example, if the value is
169  // "gradients/", the default, it will match node name "gradients/foo",
170  // "foo/gradients/bar", but not "foo_gradients/"
171  string memory_optimizer_target_node_name_scope = 6;
172  // Maximum number of milliseconds to spend optimizing a single graph before
173  // timing out. If less than or equal to 0 (default value) the optimizer will
174  // never time out.
175  int64 meta_optimizer_timeout_ms = 20;
176
177  // Configures AutoParallel optimization passes either through the
178  // meta-optimizer or when manually specified through the optimizers field.
179  AutoParallelOptions auto_parallel = 5;
180
181  // If true, any optimization pass failing will cause the MetaOptimizer to
182  // stop with an error. By default - or when set to false, failing passes are
183  // skipped silently.
184  bool fail_on_optimizer_errors = 21;
185
186  ScopedAllocatorOptions scoped_allocator_opts = 16;
187
188  // If non-empty, will use this as an alternative way to specify a list of
189  // optimizations to turn on and the order of the optimizations (replacing the
190  // meta-optimizer).
191  //
192  // Of the RewriterConfig options, only the AutoParallel configuration options
193  // (the auto_parallel field) apply to manually requested optimization passes
194  // ("autoparallel"). Memory optimization passes ("memory") invoked here are
195  // not configurable (in contrast to memory optimization passes through the
196  // meta-optimizer) and act only on manual op annotations.
197  //
198  // Custom optimizers (see custom_optimizers) that are not part of this
199  // schedule will be run after - in the order that they were specified.
200  repeated string optimizers = 100;
201
202  // Message to describe custom graph optimizer and its parameters
203  message CustomGraphOptimizer {
204    string name = 1;
205    map<string, AttrValue> parameter_map = 2;
206  }
207
208  // list of CustomGraphOptimizers to apply.
209  repeated CustomGraphOptimizer custom_optimizers = 200;
210
211  // VerifierConfig specifying the verifiers to be run after every optimizer.
212  VerifierConfig inter_optimizer_verifier_config = 300;
213
214  // VerifierConfig specifying the verifiers to be run at the end, after all
215  // optimizers have run.
216  VerifierConfig post_optimization_verifier_config = 301;
217}
218