1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // Contains utilities for clustering compilable graph nodes via XLA. 17 18 #ifndef TENSORFLOW_COMPILER_JIT_XLA_CLUSTER_UTIL_H_ 19 #define TENSORFLOW_COMPILER_JIT_XLA_CLUSTER_UTIL_H_ 20 21 #include "absl/types/optional.h" 22 #include "tensorflow/compiler/jit/graphcycles/graphcycles.h" 23 #include "tensorflow/compiler/xla/statusor.h" 24 #include "tensorflow/core/common_runtime/optimization_registry.h" 25 #include "tensorflow/core/graph/algorithm.h" 26 #include "tensorflow/stream_executor/lib/statusor.h" 27 28 namespace tensorflow { 29 30 // The attribute that marks nodes to be grouped into functions by the 31 // encapsulate subgraphs pass. 32 extern const char* const kXlaClusterAttr; 33 34 // The attribute that marks nodes in a cluster to be placed outside the xla 35 // compilation by the encapsulate subgraphs pass. 36 extern const char* const kXlaOutsideCompilationAttr; 37 38 // The attribute that marks certain inputs to a Node as required to be a 39 // constant at compile time. If this attribute is present then the 40 // CompileTimeConstantInput information in the corresponding XlaOpKernel is 41 // ignored. 42 // 43 // The value for this attribute, if present, has to be a list of strings naming 44 // the inputs to the node that must be constant. 45 extern const char* const kXlaCompileTimeConstantInputsAttr; 46 47 using OrderedNodeSet = std::set<Node*, NodeComparatorID>; 48 49 // Returns the DeviceType corresponding to 'device'. 50 Status DeviceToDeviceType(const string& device, DeviceType* device_type); 51 52 // Returns true if `node` has a ref tensor input that it forwards to its output. 53 bool HasForwardedRefInput(const Node& node); 54 55 // Creates a graph representation to enable cycle detection when clustering. 56 // This representation handles loops in graph by disconnecting each loop from 57 // the enclosing graph. 58 // 59 // Returns true for success and false for valid graphs that we can't handle yet 60 // (b/127521408). 61 xla::StatusOr<bool> CreateCycleDetectionGraph(const Graph* graph, 62 GraphCycles* cycles); 63 64 // Returns the XLA cluster in which `node` is placed if it is in an XLA cluster, 65 // otherwise returns nullopt. 66 absl::optional<absl::string_view> GetXlaClusterForNode(const Node& node); 67 68 // Removes `node_def` its XLA cluster (by clearing its _XlaCluster attribute). 69 void RemoveFromXlaCluster(NodeDef* node_def); 70 71 // Removes `node` its XLA cluster (by clearing its _XlaCluster attribute). 72 void RemoveFromXlaCluster(Node* node); 73 74 // Returns true if `node` has a DT_RESOURCE typed input or output. 75 bool HasResourceInputOrOutput(const Node& node); 76 77 // Adds edges to `cycles` to prevent clustering resource operations that cannot 78 // be legally clustered. 79 Status AdjustCycleDetectionGraphForResourceOps( 80 const Graph* graph, const FunctionLibraryDefinition* flib_def, 81 const std::function<Status(const Node&, bool*)>& resource_ops_to_ignore, 82 GraphCycles* cycles); 83 84 // Picks the device for which XLA should compile a cluster that contains 85 // operations placed in devices in `device_names`. For instance a cluster that 86 // contains operations solely placed on the CPU will be compiled into a CPU 87 // executable by XLA, whereas a cluster that contains operations placed on the 88 // CPU and also operations placed on the GPU will be compiled into a GPU 89 // executable. 90 // 91 // Returns a non-OK Status if no unambiguous choice of device exists. 92 // 93 // We choose the device using the following rules: 94 // 95 // - It is an error for `device_names` to contain more than one device of the 96 // same type. 97 // - GPU is preferred over CPU. 98 // - If `allow_mixing_unknown_and_cpu` is true then unknown devices are 99 // preferred over CPU. 100 // - XLA devices count as "unrecognized devices". 101 // 102 // This set of rules above implicitly assume that XLA:GPU can compile all 103 // operations in the cluster that XLA:CPU can compile, and if 104 // `allow_mixing_unknown_and_cpu` then the unrecognized device can also compile 105 // all operations in the cluster that XLA:CPU can compile. 106 // 107 // We provide the `allow_mixing_unknown_and_cpu` knob so that we can do both of 108 // the following things: 109 // 110 // - Let MarkForCompilationPass not inject CPU-placed operations into clusters 111 // that will run on unknown devices (because the unknown XLA backend may not 112 // support every operation supported by CPU). 113 // - Let BuildXlaOpsPass successfully infer a compilation device for a cluster 114 // that contains nodes placed on both the CPU and on unknown devices. In this 115 // case it is the responsibility of the optimization pass that injected the 116 // CPU nodes into the cluster to ensure that these nodes can be compiled by 117 // the unknown XLA backend. 118 Status PickDeviceForXla(absl::Span<const string> device_names, 119 bool allow_mixing_unknown_and_cpu, 120 string* out_device_picked); 121 122 // This is like `PickDeviceForXla` except that it returns false (instead of a 123 // non-OK Status) in `out_can_pick_device` if no unambiguous choice of device 124 // exists. 125 Status CanPickDeviceForXla(absl::Span<const string> device_names, 126 bool allow_mixing_unknown_and_cpu, 127 bool* out_can_pick_device); 128 129 // Determine the global jit level which is ON if either the 130 // GraphOptimizationPassOptions has the jit ON, or if the --tf_xla_auto_jit flag 131 // is true. 132 OptimizerOptions::GlobalJitLevel GetGlobalJitLevel( 133 const GraphOptimizationPassOptions& options); 134 135 } // namespace tensorflow 136 137 #endif // TENSORFLOW_COMPILER_JIT_XLA_CLUSTER_UTIL_H_ 138