1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_LLVM_LOOP_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_LLVM_LOOP_H_ 18 19 #include <memory> 20 #include <string> 21 22 #include "absl/strings/str_cat.h" 23 #include "absl/strings/string_view.h" 24 #include "absl/types/span.h" 25 #include "llvm/IR/BasicBlock.h" 26 #include "llvm/IR/IRBuilder.h" 27 #include "llvm/IR/Value.h" 28 #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h" 29 #include "tensorflow/compiler/xla/types.h" 30 #include "tensorflow/compiler/xla/xla_data.pb.h" 31 #include "tensorflow/core/platform/macros.h" 32 #include "tensorflow/core/platform/types.h" 33 34 namespace xla { 35 namespace llvm_ir { 36 37 enum class UnrollMode { 38 kDefaultUnroll, 39 kFullyUnroll, 40 kNoUnroll, 41 }; 42 43 // A class for constructing a for-loop in LLVM IR. 44 class ForLoop { 45 public: 46 ForLoop(const ForLoop&) = delete; 47 ForLoop& operator=(const ForLoop&) = delete; 48 49 // Emit a for-loop at the current insert point of the given IRBuilder. 50 // 51 // start_index and end_index are the loop bounds (end_index is not inclusive). 52 // `step` is the increment of the loop index after each iteration. 53 // 54 // The current insert basic block of the builder is the preheader to the loop 55 // (see below for definition of basic block names). All instructions (if any) 56 // at or after the insert point in the insert basic block are moved to a newly 57 // created exit basic block. Instructions before the insert point remain in 58 // the insert BB: 59 // 60 // +--------------+ +----------------+ 61 // | insert BB | | insert BB | 62 // | ... | | (preheader BB) | 63 // | %foo = ... | | ... | 64 // insert point ->| %bar = ... | ===> | %foo = ... | 65 // | ... | +----------------+ 66 // +--------------+ | 67 // V 68 // [[ LOOP BBs ]] 69 // | 70 // V 71 // +--------------+ 72 // | exit BB | 73 // | %bar = ... | 74 // | ... | 75 // +--------------+ 76 // 77 // `prefix` is used to disambiguate variable and basic block names emitted in 78 // LLVM IR. If non-empty, it is prepended to the name of the induction 79 // variable value and each basic block created for the loop. 80 // 81 // `unroll_mode` specifies the desired LLVM unrolling behavior for generated 82 // loop. 83 static std::unique_ptr<ForLoop> EmitForLoop( 84 absl::string_view prefix, llvm::Value* start_index, 85 llvm::Value* end_index, llvm::Value* step, llvm::IRBuilder<>* b, 86 UnrollMode unroll_mode = llvm_ir::UnrollMode::kDefaultUnroll, 87 bool prevent_vectorization = false); 88 89 // The names of the blocks follow LLVM's conventions. Control flow amongst the 90 // blocks for the example C code looks like: 91 // 92 // for (int i = 0; i < n; ++i) { 93 // do_stuff(i); 94 // } 95 // 96 // +--------------+ 97 // | preheader BB | 98 // | i = 0 | 99 // +--------------+ 100 // | 101 // V 102 // +-------------+ 103 // | header BB |<-+ 104 // | if i < n: | | 105 // | goto body | | 106 // | else: | | 107 // | goto exit | | 108 // +-------------+ | 109 // | | | 110 // +--------+ | | 111 // | V | 112 // | +-------------+ | 113 // | | body BB | | 114 // | | dostuff(i) |--+ 115 // | | ++i | 116 // | +-------------+ 117 // | 118 // | +-------------+ 119 // +->| exit BB | 120 // +-------------+ 121 // 122 // Caller-emitted code to execute within the loop should be placed within the 123 // "body" basic block. 124 // 125 // Return pointers to various blocks in the loop. GetPreheaderBasicBlock()126 llvm::BasicBlock* GetPreheaderBasicBlock() const { return preheader_bb_; } GetHeaderBasicBlock()127 llvm::BasicBlock* GetHeaderBasicBlock() const { return header_bb_; } GetBodyBasicBlock()128 llvm::BasicBlock* GetBodyBasicBlock() const { return body_bb_; } GetExitBasicBlock()129 llvm::BasicBlock* GetExitBasicBlock() const { return exit_bb_; } 130 131 // Return the Value representing the induction variable in the body basic 132 // block of the loop. GetIndVarValue()133 llvm::Value* GetIndVarValue() const { return indvar_; } 134 135 private: 136 // Allow ForLoopNest to call this private constructor. 137 friend class ForLoopNest; 138 139 ForLoop(absl::string_view prefix, absl::string_view suffix, 140 llvm::Value* start_index, llvm::Value* end_index, llvm::Value* step, 141 UnrollMode unroll_mode, bool prevent_vectorization); 142 143 // Emit the loop at the insert point of the builder. 144 void Emit(llvm::IRBuilder<>* b); 145 146 llvm::BasicBlock* CreateLoopBB(absl::string_view name, llvm::IRBuilder<>* b); 147 148 // Creates a name for an LLVM construct, appending prefix_ and suffix_, if 149 // they are set. 150 string GetQualifiedName(absl::string_view name); 151 152 // Return a list of metadata nodes that should be associated with the 153 // llvm::Loop for this `ForLoop`. 154 std::vector<llvm::Metadata*> GetLoopMetadata(llvm::IRBuilder<>* b); 155 156 string prefix_; 157 string suffix_; 158 llvm::Value* start_index_; 159 llvm::Value* end_index_; 160 llvm::Value* step_; 161 162 // To improve readability of the IR, we want the basic blocks to appear 163 // consecutively in the following order: preheader, header, body, loop, 164 // exit. The member insert_before_bb_ points to where the next basic block 165 // should be created to ensure this ordering. 166 llvm::BasicBlock* insert_before_bb_; 167 168 llvm::BasicBlock* preheader_bb_; 169 llvm::BasicBlock* header_bb_; 170 llvm::BasicBlock* body_bb_; 171 llvm::BasicBlock* exit_bb_; 172 llvm::Value* indvar_; 173 UnrollMode unroll_mode_; 174 bool prevent_vectorization_; 175 }; 176 177 // A simple class for constructing nested for-loops. 178 class ForLoopNest { 179 public: 180 ForLoopNest(absl::string_view name, llvm::IRBuilder<>* b, 181 llvm::Type* index_ty = nullptr) name_(name)182 : name_(name), 183 outer_loop_preheader_bb_(nullptr), 184 outer_loop_exit_bb_(nullptr), 185 inner_loop_body_bb_(nullptr), 186 b_(b) { 187 SetIndexType(index_ty); 188 } 189 ForLoopNest(const ForLoopNest&) = delete; 190 ForLoopNest& operator=(const ForLoopNest&) = delete; 191 192 // Adds a loop to the nest. If no loop has been added yet then emit a loop at 193 // the current insert point of the given builder. If one or more loops have 194 // been added then emit loop inside the body of the last added loop. 195 // unroll_mode is used to emit metadata that controls LLVM unrolling. 196 std::unique_ptr<ForLoop> AddLoop( 197 absl::string_view suffix, llvm::Value* start_index, 198 llvm::Value* end_index, llvm::Value* stride, 199 UnrollMode unroll_mode = xla::llvm_ir::UnrollMode::kDefaultUnroll, 200 bool prevent_vectorization = false); 201 202 // Like the above, except that it defaults to a stride of one. 203 std::unique_ptr<ForLoop> AddLoop( 204 absl::string_view suffix, llvm::Value* start_index, 205 llvm::Value* end_index, 206 UnrollMode unroll_mode = xla::llvm_ir::UnrollMode::kDefaultUnroll, 207 bool prevent_vectorization = false); 208 209 // A convenient wrapper of the other flavor of AddLoop. The given start and 210 // end index are constant. 211 std::unique_ptr<ForLoop> AddLoop( 212 int64 start_index, int64 end_index, int64 stride, 213 absl::string_view suffix, 214 UnrollMode unroll_mode = xla::llvm_ir::UnrollMode::kDefaultUnroll, 215 bool prevent_vectorization = false); 216 217 // Like the above, except that it defaults to a stride of one. 218 std::unique_ptr<ForLoop> AddLoop( 219 int64 start_index, int64 end_index, absl::string_view suffix, 220 UnrollMode unroll_mode = xla::llvm_ir::UnrollMode::kDefaultUnroll, 221 bool prevent_vectorization = false); 222 223 // Add loops to iterate through the indices within the specified 224 // shape. The returned index collects the induction variables of the 225 // loops so that it will iterate through all coordinates within the 226 // specified shape. 227 // 228 // E.g. if you pass in a 2x3 shape, you will get back an index with 229 // two entries that are induction variables of the two loops that 230 // will be added. That index will iterate through the 6 coordinates 231 // within the shape. One possible order for that sequence would be: 232 // 233 // (0,0), (0,1), (0,2), (1,0), (1,1), (1,2) 234 IrArray::Index AddLoopsForShape(const Shape& shape, absl::string_view suffix); 235 236 // Add a loop for each dimension in "dimensions". "suffix" is the 237 // name suffix of the indvar and basic blocks in this new loop nest. 238 // 239 // The return value is an index with the induction variables. The 240 // size equals the rank of shape and there is a null for each 241 // dimension that is not in "dimensions". 242 std::vector<llvm::Value*> AddLoopsForShapeOnDimensions( 243 const Shape& shape, absl::Span<const int64> dimensions, 244 absl::string_view suffix); 245 246 // Emits a series of nested loops for iterating over an operand array. Loops 247 // are constructed in major to minor dimension layout order. No loop is 248 // emitted for the given 'dimension_to_skip'. The function returns an IrArray 249 // index for the given operand_array containing the indvars of the loops. All 250 // dimensions of the index are filled except for 'dimension_to_skip'. 251 // name_suffix is the string to append to the names of LLVM constructs (eg, 252 // basic blocks) constructed by this method. 253 std::vector<llvm::Value*> EmitOperandArrayLoopNest( 254 const llvm_ir::IrArray& operand_array, int64 dimension_to_skip, 255 absl::string_view name_suffix); 256 257 // Convenience methods which return particular basic blocks of the outermost 258 // or innermost loops. These methods return nullptr if no loops have been 259 // added yet. GetOuterLoopPreheaderBasicBlock()260 llvm::BasicBlock* GetOuterLoopPreheaderBasicBlock() { 261 return outer_loop_preheader_bb_; 262 } GetOuterLoopExitBasicBlock()263 llvm::BasicBlock* GetOuterLoopExitBasicBlock() { return outer_loop_exit_bb_; } GetInnerLoopBodyBasicBlock()264 llvm::BasicBlock* GetInnerLoopBodyBasicBlock() { return inner_loop_body_bb_; } 265 266 private: SetIndexType(llvm::Type * index_ty)267 void SetIndexType(llvm::Type* index_ty) { 268 index_type_ = index_ty == nullptr ? b_->getInt64Ty() : index_ty; 269 } 270 GetConstantWithIndexType(int64 c)271 llvm::Constant* GetConstantWithIndexType(int64 c) const { 272 return llvm::ConstantInt::get(index_type_, c); 273 } 274 275 // Human-friendly name of the loop nest. 276 string name_; 277 278 // The preheader and exit basic block of the outermost loop, or nullptr if no 279 // loop has been added yet. 280 llvm::BasicBlock* outer_loop_preheader_bb_; 281 llvm::BasicBlock* outer_loop_exit_bb_; 282 283 // The body basic block of the most-recently added loop, or nullptr if no loop 284 // has been added yet. 285 llvm::BasicBlock* inner_loop_body_bb_; 286 287 llvm::IRBuilder<>* b_; 288 289 llvm::Type* index_type_; 290 }; 291 292 } // namespace llvm_ir 293 } // namespace xla 294 295 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_LLVM_LOOP_H_ 296