• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_MLIR_TFRT_TRANSLATE_TFRT_COMPILE_OPTIONS_H_
17 #define TENSORFLOW_COMPILER_MLIR_TFRT_TRANSLATE_TFRT_COMPILE_OPTIONS_H_
18 
19 #include <iosfwd>
20 #include <string>
21 #include <vector>
22 
23 namespace tensorflow {
24 
25 enum class TfrtTpuInfraTarget {
26   kNoTpu,           // No TPU support.
27   kTpurt,           // Target TPURT dialect and kernels.
28   kTfFallback,      // Target TPU kernels in TF Fallback.
29   kBridgeFallback,  // TPU support but choose kTpurt or kTfFallback depending on
30                     // whether the graph has unsupported feature in Bridge
31 };
32 
33 std::ostream& operator<<(std::ostream& os, TfrtTpuInfraTarget tpu_target);
34 
35 struct TfrtCompileOptions {
36   // TODO(tfrt-devs): Ideally, compiler should make the decision where
37   // to place the variable.
38   std::string variable_device = "/job:localhost/replica:0/task:0/device:CPU:0";
39   std::string default_device = "/job:localhost/replica:0/task:0/device:CPU:0";
40 
41   // Enable compiler optimization in TFRT dialect.
42   bool enable_optimizer = true;
43 
44   // If true, native ops will be used if they are implemented in TFRT. If
45   // false, all ops are using fallback.
46   //
47   // This option is experimental. Native ops are still under development and
48   // likely to cause performance issue when enabled.
49   bool enable_native_ops = false;
50 
51   // If true, run grappler passes before compiling.
52   bool enable_grappler = true;
53 
54   // Force data format for all layout sensitive operations, eg. setting it to
55   // "NHWC" will changes all data format in the graph to "NHWC" by inserting
56   // or removing related tf.Transpose op. Currently the supported formats are
57   // "NHWC" and "NCHW".
58   //
59   // TODO(tfrt-devs): Ideally compiler should figure out whether the
60   // data format should be changed, instead of controlled by users.
61   std::string force_data_format;
62 
63   // The target TPU infrastructure to use. This will trigger TPU target specific
64   // compiler passes and runtime initialization.
65   TfrtTpuInfraTarget tpu_target = TfrtTpuInfraTarget::kNoTpu;
66 
67   // If true, use the fused TPU compile_and_execute kernel, which performs all
68   // TPU inference related operations, e.g. core selection, h2d/d2h transfers,
69   // compile and execute.
70   bool tpu_fuse_ops = false;
71 
72   // If true, resource gather ops in the device graph are moved to host graphs
73   // in order to saved TPU memory usage. This option is experimental.
74   bool tpu_move_resource_gather_to_host = false;
75 
76   // The threshold in bytes that controls whether a resource gather op on TPU
77   // should be moved to host. A negative value means there is no threshold. This
78   // option is experimental.
79   int64_t tpu_gather_table_width_threshold_bytes = -1;
80 
81   // If true, fallback executeops that produce inputs to tpu program will use
82   // tpu host allocator. This options is experimental.
83   bool use_tpu_host_allocator_for_inputs = false;
84 
85   // If true, the compiler will try to hoist invariant ops (e.g., const ops and
86   // their non-side-effecting consumers) to loading phase, which avoids the
87   // runtime cost during later running.
88   // TODO(tfrt-devs): Set the default value to true after testing as it is
89   // supposed to be turned on by default.
90   bool hoist_invariant_ops = false;
91 
92   // If true, tf.While's iterations will be parallelized on a best-effort
93   // basis. This is currently experimental.
94   bool enable_while_parallel_iterations = false;
95 
96   // A set of flags to control auto-fusion: automatic clustering of Tensorflow
97   // operations and compiling outlined regions using MLIR based compilation
98   // stack.
99   //
100   // WARNING: These flags are experimental and are intended for manual testing
101   // of different auto-fusion strategies. They will be removed in the future.
102 
103   // A list of Tensorflow operations that are supported by auto-fusion
104   // clustering and compilation (e.g. tf.Tanh).
105   std::vector<std::string> auto_fusion_oplist;
106 
107   // Minimum size of the cluster to be compiled at runtime.
108   int auto_fusion_min_cluster_size = 2;
109 
110   // The cost threshold to decide whether a sequence of operations is cheap, and
111   // then whether it can be executed inline. If the cost is smaller than the
112   // threshold, it will be considered as cheap operations. Since the cost must
113   // be positive integers, setting the threshold to 1 makes all operations
114   // expensive.
115   uint64_t cost_threshold = 1;
116 
117   // The threshold to decie whether an inline execution sequence is too large
118   // even if the operations forms a sequential data dependency as it may occupy
119   // the CPU core for too long. In that case, they are broken into multiple
120   // sequences. The default is -1 which means no limit.
121   int64_t upper_cost_threshold = -1;
122 
123   // If true, streams with inter data depenedencies will be preferred to be
124   // merged for inline execution.
125   bool merge_inter_dependent_streams = false;
126 
127   // Whether to enable the DecomposeResourceOpsPass.
128   bool decompose_resource_ops = true;
129 
130   // Whether to compile to sync TFRT dialect.
131   bool compile_to_sync_tfrt_dialect = false;
132 };
133 
134 }  // namespace tensorflow
135 
136 #endif  // TENSORFLOW_COMPILER_MLIR_TFRT_TRANSLATE_TFRT_COMPILE_OPTIONS_H_
137