• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_LLVM_LOOP_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_LLVM_LOOP_H_
18 
19 #include <memory>
20 #include <string>
21 
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/IR/Value.h"
25 #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h"
26 #include "tensorflow/compiler/xla/types.h"
27 #include "tensorflow/compiler/xla/xla_data.pb.h"
28 #include "tensorflow/core/lib/core/stringpiece.h"
29 #include "tensorflow/core/lib/gtl/array_slice.h"
30 #include "tensorflow/core/lib/strings/strcat.h"
31 #include "tensorflow/core/platform/macros.h"
32 #include "tensorflow/core/platform/types.h"
33 
34 namespace xla {
35 namespace llvm_ir {
36 
37 // A class for constructing a for-loop in LLVM IR.
38 class ForLoop {
39  public:
40   // Emit a for-loop at the current insert point of the given IRBuilder.
41   //
42   // start_index and end_index are the loop bounds (end_index is not inclusive).
43   // `step` is the increment of the loop index after each iteration.
44   //
45   // The current insert basic block of the builder is the preheader to the loop
46   // (see below for definition of basic block names). All instructions (if any)
47   // at or after the insert point in the insert basic block are moved to a newly
48   // created exit basic block. Instructions before the insert point remain in
49   // the insert BB:
50   //
51   //                   +--------------+         +----------------+
52   //                   |  insert BB   |         |   insert BB    |
53   //                   |     ...      |         | (preheader BB) |
54   //                   | %foo = ...   |         |      ...       |
55   //    insert point ->| %bar = ...   |  ===>   | %foo = ...     |
56   //                   |     ...      |         +----------------+
57   //                   +--------------+                 |
58   //                                                    V
59   //                                              [[ LOOP BBs ]]
60   //                                                    |
61   //                                                    V
62   //                                             +--------------+
63   //                                             |   exit BB    |
64   //                                             | %bar = ...   |
65   //                                             |     ...      |
66   //                                             +--------------+
67   //
68   // `prefix` is used to disambiguate variable and basic block names emitted in
69   // LLVM IR. If non-empty, it is prepended to the name of the induction
70   // variable value and each basic block created for the loop.
71   //
72   // If `prevent_unrolling` is true then emit metadata that directs LLVM to not
73   // unroll the generated loop.
74   static std::unique_ptr<ForLoop> EmitForLoop(
75       tensorflow::StringPiece prefix, llvm::Value* start_index,
76       llvm::Value* end_index, llvm::Value* step, llvm::IRBuilder<>* ir_builder,
77       bool prevent_unrolling = false, bool prevent_vectorization = false);
78 
79   // The names of the blocks follow LLVM's conventions. Control flow amongst the
80   // blocks for the example C code looks like:
81   //
82   //   for (int i = 0; i < n; ++i) {
83   //     do_stuff(i);
84   //   }
85   //
86   //      +--------------+
87   //      | preheader BB |
88   //      |     i = 0    |
89   //      +--------------+
90   //              |
91   //              V
92   //      +-------------+
93   //      |  header BB  |<-+
94   //      | if i < n:   |  |
95   //      |   goto body |  |
96   //      | else:       |  |
97   //      |   goto exit |  |
98   //      +-------------+  |
99   //            | |        |
100   //   +--------+ |        |
101   //   |          V        |
102   //   |  +-------------+  |
103   //   |  |   body BB   |  |
104   //   |  | dostuff(i)  |--+
105   //   |  | ++i         |
106   //   |  +-------------+
107   //   |
108   //   |  +-------------+
109   //   +->|   exit BB   |
110   //      +-------------+
111   //
112   // Caller-emitted code to execute within the loop should be placed within the
113   // "body" basic block.
114   //
115   // Return pointers to various blocks in the loop.
GetPreheaderBasicBlock()116   llvm::BasicBlock* GetPreheaderBasicBlock() const { return preheader_bb_; }
GetHeaderBasicBlock()117   llvm::BasicBlock* GetHeaderBasicBlock() const { return header_bb_; }
GetBodyBasicBlock()118   llvm::BasicBlock* GetBodyBasicBlock() const { return body_bb_; }
GetExitBasicBlock()119   llvm::BasicBlock* GetExitBasicBlock() const { return exit_bb_; }
120 
121   // Return the Value representing the induction variable in the body basic
122   // block of the loop.
GetIndVarValue()123   llvm::Value* GetIndVarValue() const { return indvar_; }
124 
125  private:
126   // Allow ForLoopNest to call this private constructor.
127   friend class ForLoopNest;
128 
129   ForLoop(tensorflow::StringPiece prefix, tensorflow::StringPiece suffix,
130           llvm::Value* start_index, llvm::Value* end_index, llvm::Value* step,
131           bool prevent_unrolling, bool prevent_vectorization);
132 
133   // Emit the loop at the insert point of the builder.
134   void Emit(llvm::IRBuilder<>* ir_builder);
135 
136   llvm::BasicBlock* CreateLoopBB(tensorflow::StringPiece name,
137                                  llvm::IRBuilder<>* ir_builder);
138 
139   // Creates a name for an LLVM construct, appending prefix_ and suffix_, if
140   // they are set.
141   string GetQualifiedName(tensorflow::StringPiece name);
142 
143   // Return a list of metadata nodes that should be associated with the
144   // llvm::Loop for this `ForLoop`.
145   std::vector<llvm::Metadata*> GetLoopMetadata(llvm::IRBuilder<>* ir_builder);
146 
147   string prefix_;
148   string suffix_;
149   llvm::Value* start_index_;
150   llvm::Value* end_index_;
151   llvm::Value* step_;
152 
153   // To improve readability of the IR, we want the basic blocks to appear
154   // consecutively in the following order: preheader, header, body, loop,
155   // exit. The member insert_before_bb_ points to where the next basic block
156   // should be created to ensure this ordering.
157   llvm::BasicBlock* insert_before_bb_;
158 
159   llvm::BasicBlock* preheader_bb_;
160   llvm::BasicBlock* header_bb_;
161   llvm::BasicBlock* body_bb_;
162   llvm::BasicBlock* exit_bb_;
163   llvm::Value* indvar_;
164   bool prevent_unrolling_;
165   bool prevent_vectorization_;
166 
167   TF_DISALLOW_COPY_AND_ASSIGN(ForLoop);
168 };
169 
170 // A simple class for constructing nested for-loops.
171 class ForLoopNest {
172  public:
ForLoopNest(llvm::IRBuilder<> * ir_builder)173   explicit ForLoopNest(llvm::IRBuilder<>* ir_builder)
174       : ForLoopNest(/*name=*/"", ir_builder) {}
175 
ForLoopNest(tensorflow::StringPiece name,llvm::IRBuilder<> * ir_builder)176   ForLoopNest(tensorflow::StringPiece name, llvm::IRBuilder<>* ir_builder)
177       : name_(name.ToString()),
178         outer_loop_preheader_bb_(nullptr),
179         outer_loop_exit_bb_(nullptr),
180         inner_loop_body_bb_(nullptr),
181         ir_builder_(ir_builder) {}
182 
183   // Adds a loop to the nest. If no loop has been added yet then emit a loop at
184   // the current insert point of the given builder. If one or more loops have
185   // been added then emit loop inside the body of the last added loop.  If
186   // prevent_unrolling is true, then metadata is emitting directing LLVM to not
187   // unroll this loop.
188   std::unique_ptr<ForLoop> AddLoop(tensorflow::StringPiece suffix,
189                                    llvm::Value* start_index,
190                                    llvm::Value* end_index, llvm::Value* stride,
191                                    bool prevent_unrolling = false,
192                                    bool prevent_vectorization = false);
193 
194   // Like the above, except that it defaults to a stride of one.
195   std::unique_ptr<ForLoop> AddLoop(tensorflow::StringPiece suffix,
196                                    llvm::Value* start_index,
197                                    llvm::Value* end_index,
198                                    bool prevent_unrolling = false,
199                                    bool prevent_vectorization = false);
200 
201   // A convenient wrapper of the other flavor of AddLoop. The given start and
202   // end index are constant.
203   std::unique_ptr<ForLoop> AddLoop(int64 start_index, int64 end_index,
204                                    int64 stride, tensorflow::StringPiece suffix,
205                                    bool prevent_unrolling = false,
206                                    bool prevent_vectorization = false);
207 
208   // Like the above, except that it defaults to a stride of one.
209   std::unique_ptr<ForLoop> AddLoop(int64 start_index, int64 end_index,
210                                    tensorflow::StringPiece suffix,
211                                    bool prevent_unrolling = false,
212                                    bool prevent_vectorization = false);
213 
214   // Add loops to iterate through the indices within the specified
215   // shape. The returned index collects the induction variables of the
216   // loops so that it will iterate through all coordinates within the
217   // specified shape.
218   //
219   // E.g. if you pass in a 2x3 shape, you will get back an index with
220   // two entries that are induction variables of the two loops that
221   // will be added. That index will iterate through the 6 coordinates
222   // within the shape. One possible order for that sequence would be:
223   //
224   //   (0,0), (0,1), (0,2), (1,0), (1,1), (1,2)
225   IrArray::Index AddLoopsForShape(const Shape& shape,
226                                   tensorflow::StringPiece suffix);
227 
228   // Add a loop for each dimension in "dimensions". "suffix" is the
229   // name suffix of the indvar and basic blocks in this new loop nest.
230   //
231   // The return value is an index with the induction variables. The
232   // size equals the rank of shape and there is a null for each
233   // dimension that is not in "dimensions".
234   IrArray::Index AddLoopsForShapeOnDimensions(
235       const Shape& shape, tensorflow::gtl::ArraySlice<int64> dimensions,
236       tensorflow::StringPiece suffix);
237 
238   // Convenience methods which return particular basic blocks of the outermost
239   // or innermost loops. These methods return nullptr if no loops have been
240   // added yet.
GetOuterLoopPreheaderBasicBlock()241   llvm::BasicBlock* GetOuterLoopPreheaderBasicBlock() {
242     return outer_loop_preheader_bb_;
243   }
GetOuterLoopExitBasicBlock()244   llvm::BasicBlock* GetOuterLoopExitBasicBlock() { return outer_loop_exit_bb_; }
GetInnerLoopBodyBasicBlock()245   llvm::BasicBlock* GetInnerLoopBodyBasicBlock() { return inner_loop_body_bb_; }
246 
247  private:
248   // Human-friendly name of the loop nest.
249   string name_;
250 
251   // The preheader and exit basic block of the outermost loop, or nullptr if no
252   // loop has been added yet.
253   llvm::BasicBlock* outer_loop_preheader_bb_;
254   llvm::BasicBlock* outer_loop_exit_bb_;
255 
256   // The body basic block of the most-recently added loop, or nullptr if no loop
257   // has been added yet.
258   llvm::BasicBlock* inner_loop_body_bb_;
259 
260   llvm::IRBuilder<>* ir_builder_;
261 
262   TF_DISALLOW_COPY_AND_ASSIGN(ForLoopNest);
263 };
264 
265 }  // namespace llvm_ir
266 }  // namespace xla
267 
268 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_LLVM_LOOP_H_
269