1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_ 18 19 #include <memory> 20 #include <string> 21 #include <utility> 22 23 #include "absl/container/flat_hash_map.h" 24 #include "tensorflow/compiler/xla/service/call_graph.h" 25 #include "tensorflow/compiler/xla/service/hlo.pb.h" 26 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h" 27 #include "tensorflow/compiler/xla/service/hlo_instruction.h" 28 #include "tensorflow/compiler/xla/service/hlo_module.h" 29 #include "tensorflow/compiler/xla/service/hlo_reachability.h" 30 #include "tensorflow/compiler/xla/service/hlo_schedule.h" 31 #include "tensorflow/compiler/xla/service/hlo_value.h" 32 #include "tensorflow/compiler/xla/types.h" 33 34 namespace xla { 35 36 // Base class for describing a partial ordering of HLO instructions. Used to 37 // determine live range overlap of HLO instruction output buffers. 38 class HloOrdering { 39 public: HloOrdering(const HloModule * module)40 explicit HloOrdering(const HloModule* module) 41 : module_(module), call_graph_(CallGraph::Build(module)) {} 42 virtual ~HloOrdering() = default; 43 44 // Specify the ordering constraints between a pair of instructions a and b. 45 enum class ExecutionConstraint { 46 // Indicate a and b are the same instruction; 47 kIsSame, 48 // Indicate a runs before b; 49 kRunBefore, 50 // Only one of a or b runs each time their common ancestor is evaluated, 51 // and a is in an earlier branch than b. 52 kRunExclusiveBefore, 53 // Only one of a or b runs each time, and a is in a later branch than b. 54 kRunExclusiveAfter, 55 // Indicate a runs after b 56 kRunAfter, 57 // An order cannot be detrermined as a and b do not have a common ancestor. 58 kUnordered, 59 }; 60 // Return the execution constraint between a and b. 61 HloOrdering::ExecutionConstraint GetExecutionConstraint( 62 const HloInstruction* a, const HloInstruction* b) const; 63 64 // Returns true if instruction 'a' executes before instruction 'b'. This is 65 // not reflexive, that is, an instruction does not execute before itself. 66 bool ExecutesBefore(const HloInstruction* a, const HloInstruction* b) const; 67 68 // Returns whether the value 'a' is defined before the value 'b' under the 69 // given ordering. 70 bool IsDefinedBefore(const HloValue& a, const HloValue& b) const; 71 72 // Returns whether the given use is before the given value definition under 73 // the given ordering. 74 bool UsesBeforeValueDefinition(absl::Span<const HloUse* const> uses, 75 const HloValue& value, 76 const HloDataflowAnalysis& dataflow) const; 77 // Returns whether the given values interfere. Two values interfere if they 78 // may both be simultaneously live. 79 bool MayInterfere(const HloValue& a, const HloValue& b, 80 const HloDataflowAnalysis& dataflow) const; 81 82 // Returns true if the live range of the given value 'a' is strictly before 83 // the live range of value 'b' using the given HLO ordering. 84 bool LiveRangeStrictlyBefore(const HloValue& a, const HloValue& b, 85 const HloDataflowAnalysis& dataflow) const; 86 87 // Returns the sequential instruction order for the given computation, or 88 // nullptr if the computation does not have a sequential ordering. 89 virtual const HloInstructionSequence* SequentialOrder( 90 const HloComputation& computation) const = 0; 91 92 // Return the call graph of the module used to compute ordering. call_graph()93 const CallGraph& call_graph() const { return *call_graph_; } 94 95 virtual string ToString() const = 0; 96 97 protected: 98 // Returns true if instruction 'a' executes before instruction 'b'. 99 // Precondition: 'a' and 'b' are in the same computation. 100 // 101 // Derived classes should implement this method for determining order of 102 // instructions in the same computation. ExecutesBefore() analyzes the 103 // callgraph and uses this method to determine ordering of instructions in 104 // different computations. 105 virtual bool ExecutesBeforeInSameComputation( 106 const HloInstruction* a, const HloInstruction* b) const = 0; 107 108 const HloModule* module_; 109 110 std::unique_ptr<CallGraph> call_graph_; 111 }; 112 113 // Base class for partial orderings implemented by a map of predecessors for 114 // each instruction. Subclasses should fill in predecessors_. 115 class PredecessorHloOrdering : public HloOrdering { 116 public: 117 ~PredecessorHloOrdering() override = default; 118 119 // Returns nullptr indicating the computation does not have a sequential 120 // ordering. SequentialOrder(const HloComputation & computation)121 const HloInstructionSequence* SequentialOrder( 122 const HloComputation& computation) const override { 123 return nullptr; 124 } 125 reachability_map(const HloComputation * computation)126 HloReachabilityMap& reachability_map(const HloComputation* computation) { 127 return *predecessors_.at(computation); 128 } reachability_map(const HloComputation * computation)129 const HloReachabilityMap& reachability_map( 130 const HloComputation* computation) const { 131 return *predecessors_.at(computation); 132 } 133 134 protected: 135 explicit PredecessorHloOrdering(const HloModule* module); 136 string ToStringHelper(const string& name) const; 137 138 bool ExecutesBeforeInSameComputation(const HloInstruction* a, 139 const HloInstruction* b) const override; 140 141 // For each computation in the module, this is the set of the instruction's 142 // predecessors. An instruction is an element of its own predecessor set. 143 // 144 // Subclasses should fill this in to define the desired ordering. 145 absl::flat_hash_map<const HloComputation*, 146 std::unique_ptr<HloReachabilityMap>> 147 predecessors_; 148 }; 149 150 // An HLO ordering based on data dependencies in the HLO graph. In this partial 151 // order, instruction A executes before instruction B only if there is a path 152 // from A to B in the HLO graph. For example, given the following graph: 153 /* 154 param 155 / \ 156 negate exp 157 \ / 158 add 159 */ 160 // DependencyHloOrdering gives the following executes-before relations: 161 // param executes before negate, exp, and add 162 // negate executes before add 163 // exp executes before add 164 // add executes before nothing 165 // negate and exp are not ordered because the dependencies allow either to 166 // execute before the other (or in parallel). DependencyHloOrdering ordering 167 // allows maximum parallelism and enables any execution order which satisfies 168 // data dependencies. This requires pessimistic assumptions about buffer live 169 // ranges and can result in more memory used than more constrained orderings. 170 class DependencyHloOrdering : public PredecessorHloOrdering { 171 public: 172 explicit DependencyHloOrdering(const HloModule* module); 173 ~DependencyHloOrdering() override = default; 174 175 string ToString() const override; 176 }; 177 178 // An HLO ordering based on a total order of instructions in each computation. 179 // The computation total order is a sequencing of all of its instructions in 180 // the computation (eg, {inst0, inst1, inst2,...}) as in single-threaded 181 // execution. For example, given the following HLO graph: 182 /* 183 param 184 / \ 185 negate exp 186 \ / 187 add 188 */ 189 // and the following sequence: 190 // 191 // {param, negate, exp, add} 192 // 193 // SequentialHloOrdering gives the following executes-before relations: 194 // param executes before negate, exp, and add 195 // negate executes before exp and add 196 // exp executes before add 197 // add executes before nothing 198 // This is more constrained than DependencyHloOrdering in this example because 199 // negate and exp are ordered (negate before exp). This enables param to share 200 // the same buffer as exp (param buffer is dead after exp). Generally, this 201 // ordering enables more buffer sharing (reduced memory usage) because buffer 202 // interference is reduced relative to DependencyHloOrdering. 203 class SequentialHloOrdering : public HloOrdering { 204 public: 205 explicit SequentialHloOrdering(const HloSchedule& schedule); 206 explicit SequentialHloOrdering(HloSchedule&& schedule); 207 ~SequentialHloOrdering() override = default; 208 209 // Returns the sequential instruction order for the given computation. 210 const HloInstructionSequence* SequentialOrder( 211 const HloComputation& computation) const override; 212 213 string ToString() const override; 214 215 protected: 216 void Initialize(); 217 218 bool ExecutesBeforeInSameComputation(const HloInstruction* a, 219 const HloInstruction* b) const override; 220 221 const HloSchedule schedule_; 222 223 // The position of every instruction in the HLO module in its respective 224 // computation sequence (a value of zero indicates the instruction is first in 225 // the sequence, etc). Instructions from all computations are contained in 226 // this map so more than one instruction may have the same position 227 // value. This is not a problem because ExecutesBefore also verifies 228 // instructions are in the same computation. 229 absl::flat_hash_map<const HloInstruction*, int> order_position_; 230 }; 231 232 } // namespace xla 233 234 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_ 235