1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_LLVM_LOOP_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_LLVM_LOOP_H_ 18 19 #include <memory> 20 #include <string> 21 22 #include "llvm/IR/BasicBlock.h" 23 #include "llvm/IR/IRBuilder.h" 24 #include "llvm/IR/Value.h" 25 #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h" 26 #include "tensorflow/compiler/xla/types.h" 27 #include "tensorflow/compiler/xla/xla_data.pb.h" 28 #include "tensorflow/core/lib/core/stringpiece.h" 29 #include "tensorflow/core/lib/gtl/array_slice.h" 30 #include "tensorflow/core/lib/strings/strcat.h" 31 #include "tensorflow/core/platform/macros.h" 32 #include "tensorflow/core/platform/types.h" 33 34 namespace xla { 35 namespace llvm_ir { 36 37 // A class for constructing a for-loop in LLVM IR. 38 class ForLoop { 39 public: 40 // Emit a for-loop at the current insert point of the given IRBuilder. 41 // 42 // start_index and end_index are the loop bounds (end_index is not inclusive). 43 // `step` is the increment of the loop index after each iteration. 44 // 45 // The current insert basic block of the builder is the preheader to the loop 46 // (see below for definition of basic block names). All instructions (if any) 47 // at or after the insert point in the insert basic block are moved to a newly 48 // created exit basic block. Instructions before the insert point remain in 49 // the insert BB: 50 // 51 // +--------------+ +----------------+ 52 // | insert BB | | insert BB | 53 // | ... | | (preheader BB) | 54 // | %foo = ... | | ... | 55 // insert point ->| %bar = ... | ===> | %foo = ... | 56 // | ... | +----------------+ 57 // +--------------+ | 58 // V 59 // [[ LOOP BBs ]] 60 // | 61 // V 62 // +--------------+ 63 // | exit BB | 64 // | %bar = ... | 65 // | ... | 66 // +--------------+ 67 // 68 // `prefix` is used to disambiguate variable and basic block names emitted in 69 // LLVM IR. If non-empty, it is prepended to the name of the induction 70 // variable value and each basic block created for the loop. 71 // 72 // If `prevent_unrolling` is true then emit metadata that directs LLVM to not 73 // unroll the generated loop. 74 static std::unique_ptr<ForLoop> EmitForLoop( 75 tensorflow::StringPiece prefix, llvm::Value* start_index, 76 llvm::Value* end_index, llvm::Value* step, llvm::IRBuilder<>* ir_builder, 77 bool prevent_unrolling = false, bool prevent_vectorization = false); 78 79 // The names of the blocks follow LLVM's conventions. Control flow amongst the 80 // blocks for the example C code looks like: 81 // 82 // for (int i = 0; i < n; ++i) { 83 // do_stuff(i); 84 // } 85 // 86 // +--------------+ 87 // | preheader BB | 88 // | i = 0 | 89 // +--------------+ 90 // | 91 // V 92 // +-------------+ 93 // | header BB |<-+ 94 // | if i < n: | | 95 // | goto body | | 96 // | else: | | 97 // | goto exit | | 98 // +-------------+ | 99 // | | | 100 // +--------+ | | 101 // | V | 102 // | +-------------+ | 103 // | | body BB | | 104 // | | dostuff(i) |--+ 105 // | | ++i | 106 // | +-------------+ 107 // | 108 // | +-------------+ 109 // +->| exit BB | 110 // +-------------+ 111 // 112 // Caller-emitted code to execute within the loop should be placed within the 113 // "body" basic block. 114 // 115 // Return pointers to various blocks in the loop. GetPreheaderBasicBlock()116 llvm::BasicBlock* GetPreheaderBasicBlock() const { return preheader_bb_; } GetHeaderBasicBlock()117 llvm::BasicBlock* GetHeaderBasicBlock() const { return header_bb_; } GetBodyBasicBlock()118 llvm::BasicBlock* GetBodyBasicBlock() const { return body_bb_; } GetExitBasicBlock()119 llvm::BasicBlock* GetExitBasicBlock() const { return exit_bb_; } 120 121 // Return the Value representing the induction variable in the body basic 122 // block of the loop. GetIndVarValue()123 llvm::Value* GetIndVarValue() const { return indvar_; } 124 125 private: 126 // Allow ForLoopNest to call this private constructor. 127 friend class ForLoopNest; 128 129 ForLoop(tensorflow::StringPiece prefix, tensorflow::StringPiece suffix, 130 llvm::Value* start_index, llvm::Value* end_index, llvm::Value* step, 131 bool prevent_unrolling, bool prevent_vectorization); 132 133 // Emit the loop at the insert point of the builder. 134 void Emit(llvm::IRBuilder<>* ir_builder); 135 136 llvm::BasicBlock* CreateLoopBB(tensorflow::StringPiece name, 137 llvm::IRBuilder<>* ir_builder); 138 139 // Creates a name for an LLVM construct, appending prefix_ and suffix_, if 140 // they are set. 141 string GetQualifiedName(tensorflow::StringPiece name); 142 143 // Return a list of metadata nodes that should be associated with the 144 // llvm::Loop for this `ForLoop`. 145 std::vector<llvm::Metadata*> GetLoopMetadata(llvm::IRBuilder<>* ir_builder); 146 147 string prefix_; 148 string suffix_; 149 llvm::Value* start_index_; 150 llvm::Value* end_index_; 151 llvm::Value* step_; 152 153 // To improve readability of the IR, we want the basic blocks to appear 154 // consecutively in the following order: preheader, header, body, loop, 155 // exit. The member insert_before_bb_ points to where the next basic block 156 // should be created to ensure this ordering. 157 llvm::BasicBlock* insert_before_bb_; 158 159 llvm::BasicBlock* preheader_bb_; 160 llvm::BasicBlock* header_bb_; 161 llvm::BasicBlock* body_bb_; 162 llvm::BasicBlock* exit_bb_; 163 llvm::Value* indvar_; 164 bool prevent_unrolling_; 165 bool prevent_vectorization_; 166 167 TF_DISALLOW_COPY_AND_ASSIGN(ForLoop); 168 }; 169 170 // A simple class for constructing nested for-loops. 171 class ForLoopNest { 172 public: ForLoopNest(llvm::IRBuilder<> * ir_builder)173 explicit ForLoopNest(llvm::IRBuilder<>* ir_builder) 174 : ForLoopNest(/*name=*/"", ir_builder) {} 175 ForLoopNest(tensorflow::StringPiece name,llvm::IRBuilder<> * ir_builder)176 ForLoopNest(tensorflow::StringPiece name, llvm::IRBuilder<>* ir_builder) 177 : name_(name.ToString()), 178 outer_loop_preheader_bb_(nullptr), 179 outer_loop_exit_bb_(nullptr), 180 inner_loop_body_bb_(nullptr), 181 ir_builder_(ir_builder) {} 182 183 // Adds a loop to the nest. If no loop has been added yet then emit a loop at 184 // the current insert point of the given builder. If one or more loops have 185 // been added then emit loop inside the body of the last added loop. If 186 // prevent_unrolling is true, then metadata is emitting directing LLVM to not 187 // unroll this loop. 188 std::unique_ptr<ForLoop> AddLoop(tensorflow::StringPiece suffix, 189 llvm::Value* start_index, 190 llvm::Value* end_index, llvm::Value* stride, 191 bool prevent_unrolling = false, 192 bool prevent_vectorization = false); 193 194 // Like the above, except that it defaults to a stride of one. 195 std::unique_ptr<ForLoop> AddLoop(tensorflow::StringPiece suffix, 196 llvm::Value* start_index, 197 llvm::Value* end_index, 198 bool prevent_unrolling = false, 199 bool prevent_vectorization = false); 200 201 // A convenient wrapper of the other flavor of AddLoop. The given start and 202 // end index are constant. 203 std::unique_ptr<ForLoop> AddLoop(int64 start_index, int64 end_index, 204 int64 stride, tensorflow::StringPiece suffix, 205 bool prevent_unrolling = false, 206 bool prevent_vectorization = false); 207 208 // Like the above, except that it defaults to a stride of one. 209 std::unique_ptr<ForLoop> AddLoop(int64 start_index, int64 end_index, 210 tensorflow::StringPiece suffix, 211 bool prevent_unrolling = false, 212 bool prevent_vectorization = false); 213 214 // Add loops to iterate through the indices within the specified 215 // shape. The returned index collects the induction variables of the 216 // loops so that it will iterate through all coordinates within the 217 // specified shape. 218 // 219 // E.g. if you pass in a 2x3 shape, you will get back an index with 220 // two entries that are induction variables of the two loops that 221 // will be added. That index will iterate through the 6 coordinates 222 // within the shape. One possible order for that sequence would be: 223 // 224 // (0,0), (0,1), (0,2), (1,0), (1,1), (1,2) 225 IrArray::Index AddLoopsForShape(const Shape& shape, 226 tensorflow::StringPiece suffix); 227 228 // Add a loop for each dimension in "dimensions". "suffix" is the 229 // name suffix of the indvar and basic blocks in this new loop nest. 230 // 231 // The return value is an index with the induction variables. The 232 // size equals the rank of shape and there is a null for each 233 // dimension that is not in "dimensions". 234 IrArray::Index AddLoopsForShapeOnDimensions( 235 const Shape& shape, tensorflow::gtl::ArraySlice<int64> dimensions, 236 tensorflow::StringPiece suffix); 237 238 // Convenience methods which return particular basic blocks of the outermost 239 // or innermost loops. These methods return nullptr if no loops have been 240 // added yet. GetOuterLoopPreheaderBasicBlock()241 llvm::BasicBlock* GetOuterLoopPreheaderBasicBlock() { 242 return outer_loop_preheader_bb_; 243 } GetOuterLoopExitBasicBlock()244 llvm::BasicBlock* GetOuterLoopExitBasicBlock() { return outer_loop_exit_bb_; } GetInnerLoopBodyBasicBlock()245 llvm::BasicBlock* GetInnerLoopBodyBasicBlock() { return inner_loop_body_bb_; } 246 247 private: 248 // Human-friendly name of the loop nest. 249 string name_; 250 251 // The preheader and exit basic block of the outermost loop, or nullptr if no 252 // loop has been added yet. 253 llvm::BasicBlock* outer_loop_preheader_bb_; 254 llvm::BasicBlock* outer_loop_exit_bb_; 255 256 // The body basic block of the most-recently added loop, or nullptr if no loop 257 // has been added yet. 258 llvm::BasicBlock* inner_loop_body_bb_; 259 260 llvm::IRBuilder<>* ir_builder_; 261 262 TF_DISALLOW_COPY_AND_ASSIGN(ForLoopNest); 263 }; 264 265 } // namespace llvm_ir 266 } // namespace xla 267 268 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_LLVM_LOOP_H_ 269