1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_ 18 19 #include <memory> 20 #include <string> 21 #include <utility> 22 23 #include "absl/container/flat_hash_map.h" 24 #include "tensorflow/compiler/xla/service/call_graph.h" 25 #include "tensorflow/compiler/xla/service/hlo.pb.h" 26 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h" 27 #include "tensorflow/compiler/xla/service/hlo_instruction.h" 28 #include "tensorflow/compiler/xla/service/hlo_module.h" 29 #include "tensorflow/compiler/xla/service/hlo_reachability.h" 30 #include "tensorflow/compiler/xla/service/hlo_schedule.h" 31 #include "tensorflow/compiler/xla/service/hlo_value.h" 32 #include "tensorflow/compiler/xla/types.h" 33 34 namespace xla { 35 36 // Base class for describing a partial ordering of HLO instructions. Used to 37 // determine live range overlap of HLO instruction output buffers. 38 class HloOrdering { 39 public: HloOrdering(const HloModule * module)40 explicit HloOrdering(const HloModule* module) 41 : module_(module), call_graph_(CallGraph::Build(module)) {} 42 virtual ~HloOrdering() = default; 43 44 // Specify the ordering constraints between a pair of instructions a and b. 45 enum class ExecutionConstraint { 46 // Indicate a and b are the same instruction; 47 kIsSame, 48 // Indicate a runs before b starts; 49 kRunBeforeStart, 50 // Indicate a runs before b ends but after b starts, e.g., when b is a 51 // conditional or while loop; 52 kRunBeforeEnd, 53 // Only one of a or b runs each time their common ancestor is evaluated, 54 // and a is in an earlier branch than b. 55 kRunExclusiveBefore, 56 // Only one of a or b runs each time, and a is in a later branch than b. 57 kRunExclusiveAfter, 58 // Indicate a runs after b ends. 59 kRunAfter, 60 // An order cannot be detrermined as a and b do not have a common ancestor. 61 kUnordered, 62 }; 63 // Return the execution constraint between a and b. 64 HloOrdering::ExecutionConstraint GetExecutionConstraint( 65 const HloInstruction* a, const HloInstruction* b) const; 66 67 // Returns true if instruction 'a' executes before instruction 'b'. This is 68 // not reflexive, that is, an instruction does not execute before itself. 69 bool ExecutesBefore(const HloInstruction* a, const HloInstruction* b) const; 70 71 // Returns whether the value 'a' is defined before the value 'b' under the 72 // given ordering. 73 bool IsDefinedBefore(const HloValue& a, const HloValue& b) const; 74 75 // Returns whether the given use is before the given value definition under 76 // the given ordering. Set use_is_always_before_def_in_same_instr to false if 77 // you want the analysis to always consider a use at an instruction's operand 78 // to be strictly before that instructions definition. The configuration needs 79 // to be false when result will be used to remove unnecessary copy 80 // instructions, due to additional buffer sharing constraints. 81 bool UsesBeforeValueDefinition( 82 absl::Span<const HloUse* const> uses, const HloValue& value, 83 const HloDataflowAnalysis& dataflow, 84 bool use_is_always_before_def_in_same_instr = false) const; 85 // Returns whether the given values interfere. Two values interfere if they 86 // may both be simultaneously live. 87 bool MayInterfere(const HloValue& a, const HloValue& b, 88 const HloDataflowAnalysis& dataflow) const; 89 90 // Returns true if the live range of the given value 'a' is strictly before 91 // the live range of value 'b' using the given HLO ordering. 92 bool LiveRangeStrictlyBefore( 93 const HloValue& a, const HloValue& b, const HloDataflowAnalysis& dataflow, 94 bool use_is_always_before_def_in_same_instr = false) const; 95 96 // Returns the sequential instruction order for the given computation, or 97 // nullptr if the computation does not have a sequential ordering. 98 virtual const HloInstructionSequence* SequentialOrder( 99 const HloComputation& computation) const = 0; 100 101 // Return the call graph of the module used to compute ordering. call_graph()102 const CallGraph& call_graph() const { return *call_graph_; } 103 104 virtual std::string ToString() const = 0; 105 106 protected: 107 // Returns true if instruction 'a' executes before instruction 'b'. 108 // Precondition: 'a' and 'b' are in the same computation. 109 // 110 // Derived classes should implement this method for determining order of 111 // instructions in the same computation. ExecutesBefore() analyzes the 112 // callgraph and uses this method to determine ordering of instructions in 113 // different computations. 114 virtual bool ExecutesBeforeInSameComputation( 115 const HloInstruction* a, const HloInstruction* b) const = 0; 116 117 const HloModule* module_; 118 119 std::unique_ptr<CallGraph> call_graph_; 120 }; 121 122 // Base class for partial orderings implemented by a map of predecessors for 123 // each instruction. Subclasses should fill in predecessors_. 124 class PredecessorHloOrdering : public HloOrdering { 125 public: 126 ~PredecessorHloOrdering() override = default; 127 128 // Returns nullptr indicating the computation does not have a sequential 129 // ordering. SequentialOrder(const HloComputation & computation)130 const HloInstructionSequence* SequentialOrder( 131 const HloComputation& computation) const override { 132 return nullptr; 133 } 134 reachability_map(const HloComputation * computation)135 HloReachabilityMap& reachability_map(const HloComputation* computation) { 136 return *predecessors_.at(computation); 137 } reachability_map(const HloComputation * computation)138 const HloReachabilityMap& reachability_map( 139 const HloComputation* computation) const { 140 return *predecessors_.at(computation); 141 } 142 143 protected: 144 explicit PredecessorHloOrdering(const HloModule* module); 145 std::string ToStringHelper(const std::string& name) const; 146 147 bool ExecutesBeforeInSameComputation(const HloInstruction* a, 148 const HloInstruction* b) const override; 149 150 // For each computation in the module, this is the set of the instruction's 151 // predecessors. An instruction is an element of its own predecessor set. 152 // 153 // Subclasses should fill this in to define the desired ordering. 154 absl::flat_hash_map<const HloComputation*, 155 std::unique_ptr<HloReachabilityMap>> 156 predecessors_; 157 }; 158 159 // An HLO ordering based on data dependencies in the HLO graph. In this partial 160 // order, instruction A executes before instruction B only if there is a path 161 // from A to B in the HLO graph. For example, given the following graph: 162 /* 163 param 164 / \ 165 negate exp 166 \ / 167 add 168 */ 169 // DependencyHloOrdering gives the following executes-before relations: 170 // param executes before negate, exp, and add 171 // negate executes before add 172 // exp executes before add 173 // add executes before nothing 174 // negate and exp are not ordered because the dependencies allow either to 175 // execute before the other (or in parallel). DependencyHloOrdering ordering 176 // allows maximum parallelism and enables any execution order which satisfies 177 // data dependencies. This requires pessimistic assumptions about buffer live 178 // ranges and can result in more memory used than more constrained orderings. 179 class DependencyHloOrdering : public PredecessorHloOrdering { 180 public: 181 explicit DependencyHloOrdering(const HloModule* module); 182 ~DependencyHloOrdering() override = default; 183 184 std::string ToString() const override; 185 }; 186 187 // An HLO ordering based on a total order of instructions in each computation. 188 // The computation total order is a sequencing of all of its instructions in 189 // the computation (eg, {inst0, inst1, inst2,...}) as in single-threaded 190 // execution. For example, given the following HLO graph: 191 /* 192 param 193 / \ 194 negate exp 195 \ / 196 add 197 */ 198 // and the following sequence: 199 // 200 // {param, negate, exp, add} 201 // 202 // SequentialHloOrdering gives the following executes-before relations: 203 // param executes before negate, exp, and add 204 // negate executes before exp and add 205 // exp executes before add 206 // add executes before nothing 207 // This is more constrained than DependencyHloOrdering in this example because 208 // negate and exp are ordered (negate before exp). This enables param to share 209 // the same buffer as exp (param buffer is dead after exp). Generally, this 210 // ordering enables more buffer sharing (reduced memory usage) because buffer 211 // interference is reduced relative to DependencyHloOrdering. 212 class SequentialHloOrdering : public HloOrdering { 213 public: 214 explicit SequentialHloOrdering(const HloSchedule& schedule); 215 explicit SequentialHloOrdering(HloSchedule&& schedule); 216 ~SequentialHloOrdering() override = default; 217 218 // Returns the sequential instruction order for the given computation. 219 const HloInstructionSequence* SequentialOrder( 220 const HloComputation& computation) const override; 221 222 std::string ToString() const override; 223 224 protected: 225 void Initialize(); 226 227 bool ExecutesBeforeInSameComputation(const HloInstruction* a, 228 const HloInstruction* b) const override; 229 230 const HloSchedule schedule_; 231 232 // The position of every instruction in the HLO module in its respective 233 // computation sequence (a value of zero indicates the instruction is first in 234 // the sequence, etc). Instructions from all computations are contained in 235 // this map so more than one instruction may have the same position 236 // value. This is not a problem because ExecutesBefore also verifies 237 // instructions are in the same computation. 238 absl::flat_hash_map<const HloInstruction*, int> order_position_; 239 }; 240 241 } // namespace xla 242 243 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_ 244