• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // Contains utilities for clustering compilable graph nodes via XLA.
17 
18 #ifndef TENSORFLOW_COMPILER_JIT_XLA_CLUSTER_UTIL_H_
19 #define TENSORFLOW_COMPILER_JIT_XLA_CLUSTER_UTIL_H_
20 
21 #include "absl/types/optional.h"
22 #include "tensorflow/compiler/jit/graphcycles/graphcycles.h"
23 #include "tensorflow/compiler/xla/statusor.h"
24 #include "tensorflow/core/common_runtime/optimization_registry.h"
25 #include "tensorflow/core/graph/algorithm.h"
26 #include "tensorflow/stream_executor/lib/statusor.h"
27 
28 namespace tensorflow {
29 
30 // The attribute that marks nodes to be grouped into functions by the
31 // encapsulate subgraphs pass.
32 extern const char* const kXlaClusterAttr;
33 
34 // The attribute that marks nodes in a cluster to be placed outside the xla
35 // compilation by the encapsulate subgraphs pass.
36 extern const char* const kXlaOutsideCompilationAttr;
37 
38 // The attribute that marks certain inputs to a Node as required to be a
39 // constant at compile time.  If this attribute is present then the
40 // CompileTimeConstantInput information in the corresponding XlaOpKernel is
41 // ignored.
42 //
43 // The value for this attribute, if present, has to be a list of strings naming
44 // the inputs to the node that must be constant.
45 extern const char* const kXlaCompileTimeConstantInputsAttr;
46 
47 using OrderedNodeSet = std::set<Node*, NodeComparatorID>;
48 
49 // Returns the DeviceType corresponding to 'device'.
50 Status DeviceToDeviceType(const string& device, DeviceType* device_type);
51 
52 // Returns true if `node` has a ref tensor input that it forwards to its output.
53 bool HasForwardedRefInput(const Node& node);
54 
55 // Creates a graph representation to enable cycle detection when clustering.
56 // This representation handles loops in graph by disconnecting each loop from
57 // the enclosing graph.
58 //
59 // Returns true for success and false for valid graphs that we can't handle yet
60 // (b/127521408).
61 xla::StatusOr<bool> CreateCycleDetectionGraph(const Graph* graph,
62                                               GraphCycles* cycles);
63 
64 // Returns the XLA cluster in which `node` is placed if it is in an XLA cluster,
65 // otherwise returns nullopt.
66 absl::optional<absl::string_view> GetXlaClusterForNode(const Node& node);
67 
68 // Removes `node_def` its XLA cluster (by clearing its _XlaCluster attribute).
69 void RemoveFromXlaCluster(NodeDef* node_def);
70 
71 // Removes `node` its XLA cluster (by clearing its _XlaCluster attribute).
72 void RemoveFromXlaCluster(Node* node);
73 
74 // Returns true if `node` has a DT_RESOURCE typed input or output.
75 bool HasResourceInputOrOutput(const Node& node);
76 
77 // Adds edges to `cycles` to prevent clustering resource operations that cannot
78 // be legally clustered.
79 Status AdjustCycleDetectionGraphForResourceOps(
80     const Graph* graph, const FunctionLibraryDefinition* flib_def,
81     const std::function<Status(const Node&, bool*)>& resource_ops_to_ignore,
82     GraphCycles* cycles);
83 
84 // Picks the device for which XLA should compile a cluster that contains
85 // operations placed in devices in `device_names`.  For instance a cluster that
86 // contains operations solely placed on the CPU will be compiled into a CPU
87 // executable by XLA, whereas a cluster that contains operations placed on the
88 // CPU and also operations placed on the GPU will be compiled into a GPU
89 // executable.
90 //
91 // Returns a non-OK Status if no unambiguous choice of device exists.
92 //
93 // We choose the device using the following rules:
94 //
95 //  - It is an error for `device_names` to contain more than one device of the
96 //    same type.
97 //  - GPU is preferred over CPU.
98 //  - If `allow_mixing_unknown_and_cpu` is true then unknown devices are
99 //    preferred over CPU.
100 //  - XLA devices count as "unrecognized devices".
101 //
102 // This set of rules above implicitly assume that XLA:GPU can compile all
103 // operations in the cluster that XLA:CPU can compile, and if
104 // `allow_mixing_unknown_and_cpu` then the unrecognized device can also compile
105 // all operations in the cluster that XLA:CPU can compile.
106 //
107 // We provide the `allow_mixing_unknown_and_cpu` knob so that we can do both of
108 // the following things:
109 //
110 // - Let MarkForCompilationPass not inject CPU-placed operations into clusters
111 //   that will run on unknown devices (because the unknown XLA backend may not
112 //   support every operation supported by CPU).
113 // - Let BuildXlaOpsPass successfully infer a compilation device for a cluster
114 //   that contains nodes placed on both the CPU and on unknown devices.  In this
115 //   case it is the responsibility of the optimization pass that injected the
116 //   CPU nodes into the cluster to ensure that these nodes can be compiled by
117 //   the unknown XLA backend.
118 Status PickDeviceForXla(absl::Span<const string> device_names,
119                         bool allow_mixing_unknown_and_cpu,
120                         string* out_device_picked);
121 
122 // This is like `PickDeviceForXla` except that it returns false (instead of a
123 // non-OK Status) in `out_can_pick_device` if no unambiguous choice of device
124 // exists.
125 Status CanPickDeviceForXla(absl::Span<const string> device_names,
126                            bool allow_mixing_unknown_and_cpu,
127                            bool* out_can_pick_device);
128 
129 // Determine the global jit level which is ON if either the
130 // GraphOptimizationPassOptions has the jit ON, or if the --tf_xla_auto_jit flag
131 // is true.
132 OptimizerOptions::GlobalJitLevel GetGlobalJitLevel(
133     const GraphOptimizationPassOptions& options);
134 
135 }  // namespace tensorflow
136 
137 #endif  // TENSORFLOW_COMPILER_JIT_XLA_CLUSTER_UTIL_H_
138