1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_ 18 19 #include <memory> 20 #include <string> 21 #include <utility> 22 23 #include "absl/container/flat_hash_map.h" 24 #include "tensorflow/compiler/xla/service/call_graph.h" 25 #include "tensorflow/compiler/xla/service/hlo.pb.h" 26 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h" 27 #include "tensorflow/compiler/xla/service/hlo_instruction.h" 28 #include "tensorflow/compiler/xla/service/hlo_module.h" 29 #include "tensorflow/compiler/xla/service/hlo_reachability.h" 30 #include "tensorflow/compiler/xla/service/hlo_schedule.h" 31 #include "tensorflow/compiler/xla/service/hlo_value.h" 32 #include "tensorflow/compiler/xla/types.h" 33 34 namespace xla { 35 36 // Base class for describing a partial ordering of HLO instructions. Used to 37 // determine live range overlap of HLO instruction output buffers. 38 class HloOrdering { 39 public: HloOrdering(const HloModule * module)40 HloOrdering(const HloModule* module) 41 : module_(module), call_graph_(CallGraph::Build(module)) {} 42 virtual ~HloOrdering() = default; 43 44 // Returns true if instruction 'a' executes before instruction 'b'. This is 45 // not reflexive, that is, an instruction does not execute before itself. 46 bool ExecutesBefore(const HloInstruction* a, const HloInstruction* b) const; 47 48 // Returns whether the value 'a' is defined before the value 'b' under the 49 // given ordering. 50 bool IsDefinedBefore(const HloValue& a, const HloValue& b) const; 51 52 // Returns whether the given use is before the given value definition under 53 // the given ordering. 54 bool UseIsBeforeValueDefinition(const HloUse& use, const HloValue& value, 55 const HloDataflowAnalysis& dataflow) const; 56 // Returns whether the given values interfere. Two values interfere if they 57 // may both be simultaneously live. 58 bool MayInterfere(const HloValue& a, const HloValue& b, 59 const HloDataflowAnalysis& dataflow) const; 60 61 // Returns true if the live range of the given value 'a' is strictly before 62 // the live range of value 'b' using the given HLO ordering. 63 bool LiveRangeStrictlyBefore(const HloValue& a, const HloValue& b, 64 const HloDataflowAnalysis& dataflow) const; 65 66 // Returns the sequential instruction order for the given computation, or 67 // nullptr if the computation does not have a sequential ordering. 68 virtual const HloInstructionSequence* SequentialOrder( 69 const HloComputation& computation) const = 0; 70 71 // Return the call graph of the module used to compute ordering. call_graph()72 const CallGraph& call_graph() const { return *call_graph_; } 73 74 virtual string ToString() const = 0; 75 76 protected: 77 // Returns true if instruction 'a' executes before instruction 'b'. 78 // Precondition: 'a' and 'b' are in the same computation. 79 // 80 // Derived classes should implement this method for determining order of 81 // instructions in the same computation. ExecutesBefore() analyzes the 82 // callgraph and uses this method to determine ordering of instructions in 83 // different computations. 84 virtual bool ExecutesBeforeInSameComputation( 85 const HloInstruction* a, const HloInstruction* b) const = 0; 86 87 const HloModule* module_; 88 89 std::unique_ptr<CallGraph> call_graph_; 90 }; 91 92 // Base class for partial orderings implemented by a map of predecessors for 93 // each instruction. Subclasses should fill in predecessors_. 94 class PredecessorHloOrdering : public HloOrdering { 95 public: 96 ~PredecessorHloOrdering() override = default; 97 98 // Returns nullptr indicating the computation does not have a sequential 99 // ordering. SequentialOrder(const HloComputation & computation)100 const HloInstructionSequence* SequentialOrder( 101 const HloComputation& computation) const override { 102 return nullptr; 103 } 104 reachability_map(const HloComputation * computation)105 HloReachabilityMap& reachability_map(const HloComputation* computation) { 106 return *predecessors_.at(computation); 107 } reachability_map(const HloComputation * computation)108 const HloReachabilityMap& reachability_map( 109 const HloComputation* computation) const { 110 return *predecessors_.at(computation); 111 } 112 113 protected: 114 explicit PredecessorHloOrdering(const HloModule* module); 115 string ToStringHelper(const string& name) const; 116 117 bool ExecutesBeforeInSameComputation(const HloInstruction* a, 118 const HloInstruction* b) const override; 119 120 // For each computation in the module, this is the set of the instruction's 121 // predecessors. An instruction is an element of its own predecessor set. 122 // 123 // Subclasses should fill this in to define the desired ordering. 124 absl::flat_hash_map<const HloComputation*, 125 std::unique_ptr<HloReachabilityMap>> 126 predecessors_; 127 }; 128 129 // An HLO ordering based on data dependencies in the HLO graph. In this partial 130 // order, instruction A executes before instruction B only if there is a path 131 // from A to B in the HLO graph. For example, given the following graph: 132 /* 133 param 134 / \ 135 negate exp 136 \ / 137 add 138 */ 139 // DependencyHloOrdering gives the following executes-before relations: 140 // param executes before negate, exp, and add 141 // negate executes before add 142 // exp executes before add 143 // add executes before nothing 144 // negate and exp are not ordered because the dependencies allow either to 145 // execute before the other (or in parallel). DependencyHloOrdering ordering 146 // allows maximum parallelism and enables any execution order which satisfies 147 // data dependencies. This requires pessimistic assumptions about buffer live 148 // ranges and can result in more memory used than more constrained orderings. 149 class DependencyHloOrdering : public PredecessorHloOrdering { 150 public: 151 explicit DependencyHloOrdering(const HloModule* module); 152 ~DependencyHloOrdering() override = default; 153 154 string ToString() const override; 155 }; 156 157 // An HLO ordering based on a total order of instructions in each computation. 158 // The computation total order is a sequencing of all of its instructions in 159 // the computation (eg, {inst0, inst1, inst2,...}) as in single-threaded 160 // execution. For example, given the following HLO graph: 161 /* 162 param 163 / \ 164 negate exp 165 \ / 166 add 167 */ 168 // and the following sequence: 169 // 170 // {param, negate, exp, add} 171 // 172 // SequentialHloOrdering gives the following executes-before relations: 173 // param executes before negate, exp, and add 174 // negate executes before exp and add 175 // exp executes before add 176 // add executes before nothing 177 // This is more constrained than DependencyHloOrdering in this example because 178 // negate and exp are ordered (negate before exp). This enables param to share 179 // the same buffer as exp (param buffer is dead after exp). Generally, this 180 // ordering enables more buffer sharing (reduced memory usage) because buffer 181 // interference is reduced relative to DependencyHloOrdering. 182 class SequentialHloOrdering : public HloOrdering { 183 public: 184 SequentialHloOrdering(const HloSchedule& schedule); 185 SequentialHloOrdering(HloSchedule&& schedule); 186 ~SequentialHloOrdering() override = default; 187 188 // Returns the sequential instruction order for the given computation. 189 const HloInstructionSequence* SequentialOrder( 190 const HloComputation& computation) const override; 191 192 string ToString() const override; 193 194 protected: 195 void Initialize(); 196 197 bool ExecutesBeforeInSameComputation(const HloInstruction* a, 198 const HloInstruction* b) const override; 199 200 const HloSchedule schedule_; 201 202 // The position of every instruction in the HLO module in its respective 203 // computation sequence (a value of zero indicates the instruction is first in 204 // the sequence, etc). Instructions from all computations are contained in 205 // this map so more than one instruction may have the same position 206 // value. This is not a problem because ExecutesBefore also verifies 207 // instructions are in the same computation. 208 absl::flat_hash_map<const HloInstruction*, int> order_position_; 209 }; 210 211 } // namespace xla 212 213 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_ 214