• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2018 Google LLC.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef SOURCE_OPT_LOOP_PEELING_H_
16 #define SOURCE_OPT_LOOP_PEELING_H_
17 
18 #include <algorithm>
19 #include <limits>
20 #include <memory>
21 #include <tuple>
22 #include <unordered_map>
23 #include <unordered_set>
24 #include <utility>
25 #include <vector>
26 
27 #include "source/opt/ir_context.h"
28 #include "source/opt/loop_descriptor.h"
29 #include "source/opt/loop_utils.h"
30 #include "source/opt/pass.h"
31 #include "source/opt/scalar_analysis.h"
32 
33 namespace spvtools {
34 namespace opt {
35 
36 // Utility class to perform the peeling of a given loop.
37 // The loop peeling transformation make a certain amount of a loop iterations to
38 // be executed either before (peel before) or after (peel after) the transformed
39 // loop.
40 //
41 // For peeling cases the transformation does the following steps:
42 //   - It clones the loop and inserts the cloned loop before the original loop;
43 //   - It connects all iterating values of the cloned loop with the
44 //     corresponding original loop values so that the second loop starts with
45 //     the appropriate values.
46 //   - It inserts a new induction variable "i" is inserted into the cloned that
47 //     starts with the value 0 and increment by step of one.
48 //
49 // The last step is specific to each case:
50 //   - Peel before: the transformation is to peel the "N" first iterations.
51 //     The exit condition of the cloned loop is changed so that the loop
52 //     exits when "i < N" becomes false. The original loop is then protected to
53 //     only execute if there is any iteration left to do.
54 //   - Peel after: the transformation is to peel the "N" last iterations,
55 //     then the exit condition of the cloned loop is changed so that the loop
56 //     exits when "i + N < max_iteration" becomes false, where "max_iteration"
57 //     is the upper bound of the loop. The cloned loop is then protected to
58 //     only execute if there is any iteration left to do no covered by the
59 //     second.
60 //
61 // To be peelable:
62 //   - The loop must be in LCSSA form;
63 //   - The loop must not contain any breaks;
64 //   - The loop must not have any ambiguous iterators updates (see
65 //     "CanPeelLoop").
66 // The method "CanPeelLoop" checks that those constrained are met.
67 class LoopPeeling {
68  public:
69   // LoopPeeling constructor.
70   // |loop| is the loop to peel.
71   // |loop_iteration_count| is the instruction holding the |loop| iteration
72   // count, must be invariant for |loop| and must be of an int 32 type (signed
73   // or unsigned).
74   // |canonical_induction_variable| is an induction variable that can be used to
75   // count the number of iterations, must be of the same type as
76   // |loop_iteration_count| and start at 0 and increase by step of one at each
77   // iteration. The value nullptr is interpreted as no suitable variable exists
78   // and one will be created.
79   LoopPeeling(Loop* loop, Instruction* loop_iteration_count,
80               Instruction* canonical_induction_variable = nullptr)
81       : context_(loop->GetContext()),
82         loop_utils_(loop->GetContext(), loop),
83         loop_(loop),
84         loop_iteration_count_(!loop->IsInsideLoop(loop_iteration_count)
85                                   ? loop_iteration_count
86                                   : nullptr),
87         int_type_(nullptr),
88         original_loop_canonical_induction_variable_(
89             canonical_induction_variable),
90         canonical_induction_variable_(nullptr) {
91     if (loop_iteration_count_) {
92       int_type_ = context_->get_type_mgr()
93                       ->GetType(loop_iteration_count_->type_id())
94                       ->AsInteger();
95       if (canonical_induction_variable_) {
96         assert(canonical_induction_variable_->type_id() ==
97                    loop_iteration_count_->type_id() &&
98                "loop_iteration_count and canonical_induction_variable do not "
99                "have the same type");
100       }
101     }
102     GetIteratingExitValues();
103   }
104 
105   // Returns true if the loop can be peeled.
106   // To be peelable, all operation involved in the update of the loop iterators
107   // must not dominates the exit condition. This restriction is a work around to
108   // not miss compile code like:
109   //
110   //   for (int i = 0; i + 1 < N; i++) {}
111   //   for (int i = 0; ++i < N; i++) {}
112   //
113   // The increment will happen before the test on the exit condition leading to
114   // very look-a-like code.
115   //
116   // This restriction will not apply if a loop rotate is applied before (i.e.
117   // becomes a do-while loop).
CanPeelLoop()118   bool CanPeelLoop() const {
119     CFG& cfg = *context_->cfg();
120 
121     if (!loop_iteration_count_) {
122       return false;
123     }
124     if (!int_type_) {
125       return false;
126     }
127     if (int_type_->width() != 32) {
128       return false;
129     }
130     if (!loop_->IsLCSSA()) {
131       return false;
132     }
133     if (!loop_->GetMergeBlock()) {
134       return false;
135     }
136     if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) {
137       return false;
138     }
139     if (!IsConditionCheckSideEffectFree()) {
140       return false;
141     }
142 
143     return !std::any_of(exit_value_.cbegin(), exit_value_.cend(),
144                         [](std::pair<uint32_t, Instruction*> it) {
145                           return it.second == nullptr;
146                         });
147   }
148 
149   // Moves the execution of the |factor| first iterations of the loop into a
150   // dedicated loop.
151   void PeelBefore(uint32_t factor);
152 
153   // Moves the execution of the |factor| last iterations of the loop into a
154   // dedicated loop.
155   void PeelAfter(uint32_t factor);
156 
157   // Returns the cloned loop.
GetClonedLoop()158   Loop* GetClonedLoop() { return cloned_loop_; }
159   // Returns the original loop.
GetOriginalLoop()160   Loop* GetOriginalLoop() { return loop_; }
161 
162  private:
163   IRContext* context_;
164   LoopUtils loop_utils_;
165   // The original loop.
166   Loop* loop_;
167   // The initial |loop_| upper bound.
168   Instruction* loop_iteration_count_;
169   // The int type to use for the canonical_induction_variable_.
170   analysis::Integer* int_type_;
171   // The cloned loop.
172   Loop* cloned_loop_;
173   // This is set to true when the exit and back-edge branch instruction is the
174   // same.
175   bool do_while_form_;
176   // The canonical induction variable from the original loop if it exists.
177   Instruction* original_loop_canonical_induction_variable_;
178   // The canonical induction variable of the cloned loop. The induction variable
179   // is initialized to 0 and incremented by step of 1.
180   Instruction* canonical_induction_variable_;
181   // Map between loop iterators and exit values. Loop iterators
182   std::unordered_map<uint32_t, Instruction*> exit_value_;
183 
184   // Duplicate |loop_| and place the new loop before the cloned loop. Iterating
185   // values from the cloned loop are then connected to the original loop as
186   // initializer.
187   void DuplicateAndConnectLoop(LoopUtils::LoopCloningResult* clone_results);
188 
189   // Insert the canonical induction variable into the first loop as a simplified
190   // counter.
191   void InsertCanonicalInductionVariable(
192       LoopUtils::LoopCloningResult* clone_results);
193 
194   // Fixes the exit condition of the before loop. The function calls
195   // |condition_builder| to get the condition to use in the conditional branch
196   // of the loop exit. The loop will be exited if the condition evaluate to
197   // true. |condition_builder| takes an Instruction* that represent the
198   // insertion point.
199   void FixExitCondition(
200       const std::function<uint32_t(Instruction*)>& condition_builder);
201 
202   // Gathers all operations involved in the update of |iterator| into
203   // |operations|.
204   void GetIteratorUpdateOperations(
205       const Loop* loop, Instruction* iterator,
206       std::unordered_set<Instruction*>* operations);
207 
208   // Gathers exiting iterator values. The function builds a map between each
209   // iterating value in the loop (a phi instruction in the loop header) and its
210   // SSA value when it exit the loop. If no exit value can be accurately found,
211   // it is map to nullptr (see comment on CanPeelLoop).
212   void GetIteratingExitValues();
213 
214   // Returns true if a for-loop has no instruction with effects before the
215   // condition check.
216   bool IsConditionCheckSideEffectFree() const;
217 
218   // Creates a new basic block and insert it between |bb| and the predecessor of
219   // |bb|.
220   BasicBlock* CreateBlockBefore(BasicBlock* bb);
221 
222   // Inserts code to only execute |loop| only if the given |condition| is true.
223   // |if_merge| is a suitable basic block to be used by the if condition as
224   // merge block.
225   // The function returns the if block protecting the loop.
226   BasicBlock* ProtectLoop(Loop* loop, Instruction* condition,
227                           BasicBlock* if_merge);
228 };
229 
230 // Implements a loop peeling optimization.
231 // For each loop, the pass will try to peel it if there is conditions that
232 // are true for the "N" first or last iterations of the loop.
233 // To avoid code size explosion, too large loops will not be peeled.
234 class LoopPeelingPass : public Pass {
235  public:
236   // Describes the peeling direction.
237   enum class PeelDirection {
238     kNone,    // Cannot peel
239     kBefore,  // Can peel before
240     kAfter    // Can peel last
241   };
242 
243   // Holds some statistics about peeled function.
244   struct LoopPeelingStats {
245     std::vector<std::tuple<const Loop*, PeelDirection, uint32_t>> peeled_loops_;
246   };
247 
stats_(stats)248   LoopPeelingPass(LoopPeelingStats* stats = nullptr) : stats_(stats) {}
249 
250   // Sets the loop peeling growth threshold. If the code size increase is above
251   // |code_grow_threshold|, the loop will not be peeled. The code size is
252   // measured in terms of SPIR-V instructions.
SetLoopPeelingThreshold(size_t code_grow_threshold)253   static void SetLoopPeelingThreshold(size_t code_grow_threshold) {
254     code_grow_threshold_ = code_grow_threshold;
255   }
256 
257   // Returns the loop peeling code growth threshold.
GetLoopPeelingThreshold()258   static size_t GetLoopPeelingThreshold() { return code_grow_threshold_; }
259 
name()260   const char* name() const override { return "loop-peeling"; }
261 
262   // Processes the given |module|. Returns Status::Failure if errors occur when
263   // processing. Returns the corresponding Status::Success if processing is
264   // succesful to indicate whether changes have been made to the modue.
265   Pass::Status Process() override;
266 
267  private:
268   // Describes the peeling direction.
269   enum class CmpOperator {
270     kLT,  // less than
271     kGT,  // greater than
272     kLE,  // less than or equal
273     kGE,  // greater than or equal
274   };
275 
276   class LoopPeelingInfo {
277    public:
278     using Direction = std::pair<PeelDirection, uint32_t>;
279 
LoopPeelingInfo(Loop * loop,size_t loop_max_iterations,ScalarEvolutionAnalysis * scev_analysis)280     LoopPeelingInfo(Loop* loop, size_t loop_max_iterations,
281                     ScalarEvolutionAnalysis* scev_analysis)
282         : context_(loop->GetContext()),
283           loop_(loop),
284           scev_analysis_(scev_analysis),
285           loop_max_iterations_(loop_max_iterations) {}
286 
287     // Returns by how much and to which direction a loop should be peeled to
288     // make the conditional branch of the basic block |bb| an unconditional
289     // branch. If |bb|'s terminator is not a conditional branch or the condition
290     // is not workable then it returns PeelDirection::kNone and a 0 factor.
291     Direction GetPeelingInfo(BasicBlock* bb) const;
292 
293    private:
294     // Returns the id of the loop invariant operand of the conditional
295     // expression |condition|. It returns if no operand is invariant.
296     uint32_t GetFirstLoopInvariantOperand(Instruction* condition) const;
297     // Returns the id of the non loop invariant operand of the conditional
298     // expression |condition|. It returns if all operands are invariant.
299     uint32_t GetFirstNonLoopInvariantOperand(Instruction* condition) const;
300 
301     // Returns the value of |rec| at the first loop iteration.
302     SExpression GetValueAtFirstIteration(SERecurrentNode* rec) const;
303     // Returns the value of |rec| at the given |iteration|.
304     SExpression GetValueAtIteration(SERecurrentNode* rec,
305                                     int64_t iteration) const;
306     // Returns the value of |rec| at the last loop iteration.
307     SExpression GetValueAtLastIteration(SERecurrentNode* rec) const;
308 
309     bool EvalOperator(CmpOperator cmp_op, SExpression lhs, SExpression rhs,
310                       bool* result) const;
311 
312     Direction HandleEquality(SExpression lhs, SExpression rhs) const;
313     Direction HandleInequality(CmpOperator cmp_op, SExpression lhs,
314                                SERecurrentNode* rhs) const;
315 
GetNoneDirection()316     static Direction GetNoneDirection() {
317       return Direction{LoopPeelingPass::PeelDirection::kNone, 0};
318     }
319     IRContext* context_;
320     Loop* loop_;
321     ScalarEvolutionAnalysis* scev_analysis_;
322     size_t loop_max_iterations_;
323   };
324   // Peel profitable loops in |f|.
325   bool ProcessFunction(Function* f);
326   // Peel |loop| if profitable.
327   std::pair<bool, Loop*> ProcessLoop(Loop* loop, CodeMetrics* loop_size);
328 
329   static size_t code_grow_threshold_;
330   LoopPeelingStats* stats_;
331 };
332 
333 }  // namespace opt
334 }  // namespace spvtools
335 
336 #endif  // SOURCE_OPT_LOOP_PEELING_H_
337