• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2018 Google LLC.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <algorithm>
16 #include <functional>
17 #include <memory>
18 #include <unordered_map>
19 #include <unordered_set>
20 #include <vector>
21 
22 #include "source/opt/ir_builder.h"
23 #include "source/opt/ir_context.h"
24 #include "source/opt/loop_descriptor.h"
25 #include "source/opt/loop_peeling.h"
26 #include "source/opt/loop_utils.h"
27 #include "source/opt/scalar_analysis.h"
28 #include "source/opt/scalar_analysis_nodes.h"
29 
30 namespace spvtools {
31 namespace opt {
32 namespace {
33 // Gather the set of blocks for all the path from |entry| to |root|.
GetBlocksInPath(uint32_t block,uint32_t entry,std::unordered_set<uint32_t> * blocks_in_path,const CFG & cfg)34 void GetBlocksInPath(uint32_t block, uint32_t entry,
35                      std::unordered_set<uint32_t>* blocks_in_path,
36                      const CFG& cfg) {
37   for (uint32_t pid : cfg.preds(block)) {
38     if (blocks_in_path->insert(pid).second) {
39       if (pid != entry) {
40         GetBlocksInPath(pid, entry, blocks_in_path, cfg);
41       }
42     }
43   }
44 }
45 }  // namespace
46 
47 size_t LoopPeelingPass::code_grow_threshold_ = 1000;
48 
DuplicateAndConnectLoop(LoopUtils::LoopCloningResult * clone_results)49 void LoopPeeling::DuplicateAndConnectLoop(
50     LoopUtils::LoopCloningResult* clone_results) {
51   CFG& cfg = *context_->cfg();
52   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
53 
54   assert(CanPeelLoop() && "Cannot peel loop!");
55 
56   std::vector<BasicBlock*> ordered_loop_blocks;
57   // TODO(1841): Handle failure to create pre-header.
58   BasicBlock* pre_header = loop_->GetOrCreatePreHeaderBlock();
59 
60   loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks);
61 
62   cloned_loop_ = loop_utils_.CloneLoop(clone_results, ordered_loop_blocks);
63 
64   // Add the basic block to the function.
65   Function::iterator it =
66       loop_utils_.GetFunction()->FindBlock(pre_header->id());
67   assert(it != loop_utils_.GetFunction()->end() &&
68          "Pre-header not found in the function.");
69   loop_utils_.GetFunction()->AddBasicBlocks(
70       clone_results->cloned_bb_.begin(), clone_results->cloned_bb_.end(), ++it);
71 
72   // Make the |loop_|'s preheader the |cloned_loop_| one.
73   BasicBlock* cloned_header = cloned_loop_->GetHeaderBlock();
74   pre_header->ForEachSuccessorLabel(
75       [cloned_header](uint32_t* succ) { *succ = cloned_header->id(); });
76 
77   // Update cfg.
78   cfg.RemoveEdge(pre_header->id(), loop_->GetHeaderBlock()->id());
79   cloned_loop_->SetPreHeaderBlock(pre_header);
80   loop_->SetPreHeaderBlock(nullptr);
81 
82   // When cloning the loop, we didn't cloned the merge block, so currently
83   // |cloned_loop_| shares the same block as |loop_|.
84   // We mutate all branches from |cloned_loop_| block to |loop_|'s merge into a
85   // branch to |loop_|'s header (so header will also be the merge of
86   // |cloned_loop_|).
87   uint32_t cloned_loop_exit = 0;
88   for (uint32_t pred_id : cfg.preds(loop_->GetMergeBlock()->id())) {
89     if (loop_->IsInsideLoop(pred_id)) continue;
90     BasicBlock* bb = cfg.block(pred_id);
91     assert(cloned_loop_exit == 0 && "The loop has multiple exits.");
92     cloned_loop_exit = bb->id();
93     bb->ForEachSuccessorLabel([this](uint32_t* succ) {
94       if (*succ == loop_->GetMergeBlock()->id())
95         *succ = loop_->GetHeaderBlock()->id();
96     });
97   }
98 
99   // Update cfg.
100   cfg.RemoveNonExistingEdges(loop_->GetMergeBlock()->id());
101   cfg.AddEdge(cloned_loop_exit, loop_->GetHeaderBlock()->id());
102 
103   // Patch the phi of the original loop header:
104   //  - Set the loop entry branch to come from the cloned loop exit block;
105   //  - Set the initial value of the phi using the corresponding cloned loop
106   //    exit values.
107   //
108   // We patch the iterating value initializers of the original loop using the
109   // corresponding cloned loop exit values. Connects the cloned loop iterating
110   // values to the original loop. This make sure that the initial value of the
111   // second loop starts with the last value of the first loop.
112   //
113   // For example, loops like:
114   //
115   // int z = 0;
116   // for (int i = 0; i++ < M; i += cst1) {
117   //   if (cond)
118   //     z += cst2;
119   // }
120   //
121   // Will become:
122   //
123   // int z = 0;
124   // int i = 0;
125   // for (; i++ < M; i += cst1) {
126   //   if (cond)
127   //     z += cst2;
128   // }
129   // for (; i++ < M; i += cst1) {
130   //   if (cond)
131   //     z += cst2;
132   // }
133   loop_->GetHeaderBlock()->ForEachPhiInst([cloned_loop_exit, def_use_mgr,
134                                            clone_results,
135                                            this](Instruction* phi) {
136     for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
137       if (!loop_->IsInsideLoop(phi->GetSingleWordInOperand(i + 1))) {
138         phi->SetInOperand(i,
139                           {clone_results->value_map_.at(
140                               exit_value_.at(phi->result_id())->result_id())});
141         phi->SetInOperand(i + 1, {cloned_loop_exit});
142         def_use_mgr->AnalyzeInstUse(phi);
143         return;
144       }
145     }
146   });
147 
148   // Force the creation of a new preheader for the original loop and set it as
149   // the merge block for the cloned loop.
150   // TODO(1841): Handle failure to create pre-header.
151   cloned_loop_->SetMergeBlock(loop_->GetOrCreatePreHeaderBlock());
152 }
153 
InsertCanonicalInductionVariable(LoopUtils::LoopCloningResult * clone_results)154 void LoopPeeling::InsertCanonicalInductionVariable(
155     LoopUtils::LoopCloningResult* clone_results) {
156   if (original_loop_canonical_induction_variable_) {
157     canonical_induction_variable_ =
158         context_->get_def_use_mgr()->GetDef(clone_results->value_map_.at(
159             original_loop_canonical_induction_variable_->result_id()));
160     return;
161   }
162 
163   BasicBlock::iterator insert_point = GetClonedLoop()->GetLatchBlock()->tail();
164   if (GetClonedLoop()->GetLatchBlock()->GetMergeInst()) {
165     --insert_point;
166   }
167   InstructionBuilder builder(
168       context_, &*insert_point,
169       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
170   Instruction* uint_1_cst =
171       builder.GetIntConstant<uint32_t>(1, int_type_->IsSigned());
172   // Create the increment.
173   // Note that we do "1 + 1" here, one of the operand should the phi
174   // value but we don't have it yet. The operand will be set latter.
175   Instruction* iv_inc = builder.AddIAdd(
176       uint_1_cst->type_id(), uint_1_cst->result_id(), uint_1_cst->result_id());
177 
178   builder.SetInsertPoint(&*GetClonedLoop()->GetHeaderBlock()->begin());
179 
180   canonical_induction_variable_ = builder.AddPhi(
181       uint_1_cst->type_id(),
182       {builder.GetIntConstant<uint32_t>(0, int_type_->IsSigned())->result_id(),
183        GetClonedLoop()->GetPreHeaderBlock()->id(), iv_inc->result_id(),
184        GetClonedLoop()->GetLatchBlock()->id()});
185   // Connect everything.
186   iv_inc->SetInOperand(0, {canonical_induction_variable_->result_id()});
187 
188   // Update def/use manager.
189   context_->get_def_use_mgr()->AnalyzeInstUse(iv_inc);
190 
191   // If do-while form, use the incremented value.
192   if (do_while_form_) {
193     canonical_induction_variable_ = iv_inc;
194   }
195 }
196 
GetIteratorUpdateOperations(const Loop * loop,Instruction * iterator,std::unordered_set<Instruction * > * operations)197 void LoopPeeling::GetIteratorUpdateOperations(
198     const Loop* loop, Instruction* iterator,
199     std::unordered_set<Instruction*>* operations) {
200   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
201   operations->insert(iterator);
202   iterator->ForEachInId([def_use_mgr, loop, operations, this](uint32_t* id) {
203     Instruction* insn = def_use_mgr->GetDef(*id);
204     if (insn->opcode() == spv::Op::OpLabel) {
205       return;
206     }
207     if (operations->count(insn)) {
208       return;
209     }
210     if (!loop->IsInsideLoop(insn)) {
211       return;
212     }
213     GetIteratorUpdateOperations(loop, insn, operations);
214   });
215 }
216 
IsConditionCheckSideEffectFree() const217 bool LoopPeeling::IsConditionCheckSideEffectFree() const {
218   CFG& cfg = *context_->cfg();
219 
220   // The "do-while" form does not cause issues, the algorithm takes into account
221   // the first iteration.
222   if (!do_while_form_) {
223     uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0];
224 
225     std::unordered_set<uint32_t> blocks_in_path;
226 
227     blocks_in_path.insert(condition_block_id);
228     GetBlocksInPath(condition_block_id, loop_->GetHeaderBlock()->id(),
229                     &blocks_in_path, cfg);
230 
231     for (uint32_t bb_id : blocks_in_path) {
232       BasicBlock* bb = cfg.block(bb_id);
233       if (!bb->WhileEachInst([this](Instruction* insn) {
234             if (insn->IsBranch()) return true;
235             switch (insn->opcode()) {
236               case spv::Op::OpLabel:
237               case spv::Op::OpSelectionMerge:
238               case spv::Op::OpLoopMerge:
239                 return true;
240               default:
241                 break;
242             }
243             return context_->IsCombinatorInstruction(insn);
244           })) {
245         return false;
246       }
247     }
248   }
249 
250   return true;
251 }
252 
GetIteratingExitValues()253 void LoopPeeling::GetIteratingExitValues() {
254   CFG& cfg = *context_->cfg();
255 
256   loop_->GetHeaderBlock()->ForEachPhiInst(
257       [this](Instruction* phi) { exit_value_[phi->result_id()] = nullptr; });
258 
259   if (!loop_->GetMergeBlock()) {
260     return;
261   }
262   if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) {
263     return;
264   }
265   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
266 
267   uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0];
268 
269   auto& header_pred = cfg.preds(loop_->GetHeaderBlock()->id());
270   do_while_form_ = std::find(header_pred.begin(), header_pred.end(),
271                              condition_block_id) != header_pred.end();
272   if (do_while_form_) {
273     loop_->GetHeaderBlock()->ForEachPhiInst(
274         [condition_block_id, def_use_mgr, this](Instruction* phi) {
275           std::unordered_set<Instruction*> operations;
276 
277           for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
278             if (condition_block_id == phi->GetSingleWordInOperand(i + 1)) {
279               exit_value_[phi->result_id()] =
280                   def_use_mgr->GetDef(phi->GetSingleWordInOperand(i));
281             }
282           }
283         });
284   } else {
285     DominatorTree* dom_tree =
286         &context_->GetDominatorAnalysis(loop_utils_.GetFunction())
287              ->GetDomTree();
288     BasicBlock* condition_block = cfg.block(condition_block_id);
289 
290     loop_->GetHeaderBlock()->ForEachPhiInst(
291         [dom_tree, condition_block, this](Instruction* phi) {
292           std::unordered_set<Instruction*> operations;
293 
294           // Not the back-edge value, check if the phi instruction is the only
295           // possible candidate.
296           GetIteratorUpdateOperations(loop_, phi, &operations);
297 
298           for (Instruction* insn : operations) {
299             if (insn == phi) {
300               continue;
301             }
302             if (dom_tree->Dominates(context_->get_instr_block(insn),
303                                     condition_block)) {
304               return;
305             }
306           }
307           exit_value_[phi->result_id()] = phi;
308         });
309   }
310 }
311 
FixExitCondition(const std::function<uint32_t (Instruction *)> & condition_builder)312 void LoopPeeling::FixExitCondition(
313     const std::function<uint32_t(Instruction*)>& condition_builder) {
314   CFG& cfg = *context_->cfg();
315 
316   uint32_t condition_block_id = 0;
317   for (uint32_t id : cfg.preds(GetClonedLoop()->GetMergeBlock()->id())) {
318     if (GetClonedLoop()->IsInsideLoop(id)) {
319       condition_block_id = id;
320       break;
321     }
322   }
323   assert(condition_block_id != 0 && "2nd loop in improperly connected");
324 
325   BasicBlock* condition_block = cfg.block(condition_block_id);
326   Instruction* exit_condition = condition_block->terminator();
327   assert(exit_condition->opcode() == spv::Op::OpBranchConditional);
328   BasicBlock::iterator insert_point = condition_block->tail();
329   if (condition_block->GetMergeInst()) {
330     --insert_point;
331   }
332 
333   exit_condition->SetInOperand(0, {condition_builder(&*insert_point)});
334 
335   uint32_t to_continue_block_idx =
336       GetClonedLoop()->IsInsideLoop(exit_condition->GetSingleWordInOperand(1))
337           ? 1
338           : 2;
339   exit_condition->SetInOperand(
340       1, {exit_condition->GetSingleWordInOperand(to_continue_block_idx)});
341   exit_condition->SetInOperand(2, {GetClonedLoop()->GetMergeBlock()->id()});
342 
343   // Update def/use manager.
344   context_->get_def_use_mgr()->AnalyzeInstUse(exit_condition);
345 }
346 
CreateBlockBefore(BasicBlock * bb)347 BasicBlock* LoopPeeling::CreateBlockBefore(BasicBlock* bb) {
348   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
349   CFG& cfg = *context_->cfg();
350   assert(cfg.preds(bb->id()).size() == 1 && "More than one predecessor");
351 
352   // TODO(1841): Handle id overflow.
353   std::unique_ptr<BasicBlock> new_bb =
354       MakeUnique<BasicBlock>(std::unique_ptr<Instruction>(new Instruction(
355           context_, spv::Op::OpLabel, 0, context_->TakeNextId(), {})));
356   // Update the loop descriptor.
357   Loop* in_loop = (*loop_utils_.GetLoopDescriptor())[bb];
358   if (in_loop) {
359     in_loop->AddBasicBlock(new_bb.get());
360     loop_utils_.GetLoopDescriptor()->SetBasicBlockToLoop(new_bb->id(), in_loop);
361   }
362 
363   context_->set_instr_block(new_bb->GetLabelInst(), new_bb.get());
364   def_use_mgr->AnalyzeInstDefUse(new_bb->GetLabelInst());
365 
366   BasicBlock* bb_pred = cfg.block(cfg.preds(bb->id())[0]);
367   bb_pred->tail()->ForEachInId([bb, &new_bb](uint32_t* id) {
368     if (*id == bb->id()) {
369       *id = new_bb->id();
370     }
371   });
372   cfg.RemoveEdge(bb_pred->id(), bb->id());
373   cfg.AddEdge(bb_pred->id(), new_bb->id());
374   def_use_mgr->AnalyzeInstUse(&*bb_pred->tail());
375 
376   // Update the incoming branch.
377   bb->ForEachPhiInst([&new_bb, def_use_mgr](Instruction* phi) {
378     phi->SetInOperand(1, {new_bb->id()});
379     def_use_mgr->AnalyzeInstUse(phi);
380   });
381   InstructionBuilder(
382       context_, new_bb.get(),
383       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping)
384       .AddBranch(bb->id());
385   cfg.RegisterBlock(new_bb.get());
386 
387   // Add the basic block to the function.
388   Function::iterator it = loop_utils_.GetFunction()->FindBlock(bb->id());
389   assert(it != loop_utils_.GetFunction()->end() &&
390          "Basic block not found in the function.");
391   BasicBlock* ret = new_bb.get();
392   loop_utils_.GetFunction()->AddBasicBlock(std::move(new_bb), it);
393   return ret;
394 }
395 
ProtectLoop(Loop * loop,Instruction * condition,BasicBlock * if_merge)396 BasicBlock* LoopPeeling::ProtectLoop(Loop* loop, Instruction* condition,
397                                      BasicBlock* if_merge) {
398   // TODO(1841): Handle failure to create pre-header.
399   BasicBlock* if_block = loop->GetOrCreatePreHeaderBlock();
400   // Will no longer be a pre-header because of the if.
401   loop->SetPreHeaderBlock(nullptr);
402   // Kill the branch to the header.
403   context_->KillInst(&*if_block->tail());
404 
405   InstructionBuilder builder(
406       context_, if_block,
407       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
408   builder.AddConditionalBranch(condition->result_id(),
409                                loop->GetHeaderBlock()->id(), if_merge->id(),
410                                if_merge->id());
411 
412   return if_block;
413 }
414 
PeelBefore(uint32_t peel_factor)415 void LoopPeeling::PeelBefore(uint32_t peel_factor) {
416   assert(CanPeelLoop() && "Cannot peel loop");
417   LoopUtils::LoopCloningResult clone_results;
418 
419   // Clone the loop and insert the cloned one before the loop.
420   DuplicateAndConnectLoop(&clone_results);
421 
422   // Add a canonical induction variable "canonical_induction_variable_".
423   InsertCanonicalInductionVariable(&clone_results);
424 
425   InstructionBuilder builder(
426       context_, &*cloned_loop_->GetPreHeaderBlock()->tail(),
427       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
428   Instruction* factor =
429       builder.GetIntConstant(peel_factor, int_type_->IsSigned());
430 
431   Instruction* has_remaining_iteration = builder.AddLessThan(
432       factor->result_id(), loop_iteration_count_->result_id());
433   Instruction* max_iteration = builder.AddSelect(
434       factor->type_id(), has_remaining_iteration->result_id(),
435       factor->result_id(), loop_iteration_count_->result_id());
436 
437   // Change the exit condition of the cloned loop to be (exit when become
438   // false):
439   //  "canonical_induction_variable_" < min("factor", "loop_iteration_count_")
440   FixExitCondition([max_iteration, this](Instruction* insert_before_point) {
441     return InstructionBuilder(context_, insert_before_point,
442                               IRContext::kAnalysisDefUse |
443                                   IRContext::kAnalysisInstrToBlockMapping)
444         .AddLessThan(canonical_induction_variable_->result_id(),
445                      max_iteration->result_id())
446         ->result_id();
447   });
448 
449   // "Protect" the second loop: the second loop can only be executed if
450   // |has_remaining_iteration| is true (i.e. factor < loop_iteration_count_).
451   BasicBlock* if_merge_block = loop_->GetMergeBlock();
452   loop_->SetMergeBlock(CreateBlockBefore(loop_->GetMergeBlock()));
453   // Prevent the second loop from being executed if we already executed all the
454   // required iterations.
455   BasicBlock* if_block =
456       ProtectLoop(loop_, has_remaining_iteration, if_merge_block);
457   // Patch the phi of the merge block.
458   if_merge_block->ForEachPhiInst(
459       [&clone_results, if_block, this](Instruction* phi) {
460         // if_merge_block had previously only 1 predecessor.
461         uint32_t incoming_value = phi->GetSingleWordInOperand(0);
462         auto def_in_loop = clone_results.value_map_.find(incoming_value);
463         if (def_in_loop != clone_results.value_map_.end())
464           incoming_value = def_in_loop->second;
465         phi->AddOperand(
466             {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {incoming_value}});
467         phi->AddOperand(
468             {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {if_block->id()}});
469         context_->get_def_use_mgr()->AnalyzeInstUse(phi);
470       });
471 
472   context_->InvalidateAnalysesExceptFor(
473       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping |
474       IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisCFG);
475 }
476 
PeelAfter(uint32_t peel_factor)477 void LoopPeeling::PeelAfter(uint32_t peel_factor) {
478   assert(CanPeelLoop() && "Cannot peel loop");
479   LoopUtils::LoopCloningResult clone_results;
480 
481   // Clone the loop and insert the cloned one before the loop.
482   DuplicateAndConnectLoop(&clone_results);
483 
484   // Add a canonical induction variable "canonical_induction_variable_".
485   InsertCanonicalInductionVariable(&clone_results);
486 
487   InstructionBuilder builder(
488       context_, &*cloned_loop_->GetPreHeaderBlock()->tail(),
489       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
490   Instruction* factor =
491       builder.GetIntConstant(peel_factor, int_type_->IsSigned());
492 
493   Instruction* has_remaining_iteration = builder.AddLessThan(
494       factor->result_id(), loop_iteration_count_->result_id());
495 
496   // Change the exit condition of the cloned loop to be (exit when become
497   // false):
498   //  "canonical_induction_variable_" + "factor" < "loop_iteration_count_"
499   FixExitCondition([factor, this](Instruction* insert_before_point) {
500     InstructionBuilder cond_builder(
501         context_, insert_before_point,
502         IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
503     // Build the following check: canonical_induction_variable_ + factor <
504     // iteration_count
505     return cond_builder
506         .AddLessThan(cond_builder
507                          .AddIAdd(canonical_induction_variable_->type_id(),
508                                   canonical_induction_variable_->result_id(),
509                                   factor->result_id())
510                          ->result_id(),
511                      loop_iteration_count_->result_id())
512         ->result_id();
513   });
514 
515   // "Protect" the first loop: the first loop can only be executed if
516   // factor < loop_iteration_count_.
517 
518   // The original loop's pre-header was the cloned loop merge block.
519   GetClonedLoop()->SetMergeBlock(
520       CreateBlockBefore(GetOriginalLoop()->GetPreHeaderBlock()));
521   // Use the second loop preheader as if merge block.
522 
523   // Prevent the first loop if only the peeled loop needs it.
524   BasicBlock* if_block = ProtectLoop(cloned_loop_, has_remaining_iteration,
525                                      GetOriginalLoop()->GetPreHeaderBlock());
526 
527   // Patch the phi of the header block.
528   // We added an if to enclose the first loop and because the phi node are
529   // connected to the exit value of the first loop, the definition no longer
530   // dominate the preheader.
531   // We had to the preheader (our if merge block) the required phi instruction
532   // and patch the header phi.
533   GetOriginalLoop()->GetHeaderBlock()->ForEachPhiInst(
534       [&clone_results, if_block, this](Instruction* phi) {
535         analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
536 
537         auto find_value_idx = [](Instruction* phi_inst, Loop* loop) {
538           uint32_t preheader_value_idx =
539               !loop->IsInsideLoop(phi_inst->GetSingleWordInOperand(1)) ? 0 : 2;
540           return preheader_value_idx;
541         };
542 
543         Instruction* cloned_phi =
544             def_use_mgr->GetDef(clone_results.value_map_.at(phi->result_id()));
545         uint32_t cloned_preheader_value = cloned_phi->GetSingleWordInOperand(
546             find_value_idx(cloned_phi, GetClonedLoop()));
547 
548         Instruction* new_phi =
549             InstructionBuilder(context_,
550                                &*GetOriginalLoop()->GetPreHeaderBlock()->tail(),
551                                IRContext::kAnalysisDefUse |
552                                    IRContext::kAnalysisInstrToBlockMapping)
553                 .AddPhi(phi->type_id(),
554                         {phi->GetSingleWordInOperand(
555                              find_value_idx(phi, GetOriginalLoop())),
556                          GetClonedLoop()->GetMergeBlock()->id(),
557                          cloned_preheader_value, if_block->id()});
558 
559         phi->SetInOperand(find_value_idx(phi, GetOriginalLoop()),
560                           {new_phi->result_id()});
561         def_use_mgr->AnalyzeInstUse(phi);
562       });
563 
564   context_->InvalidateAnalysesExceptFor(
565       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping |
566       IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisCFG);
567 }
568 
Process()569 Pass::Status LoopPeelingPass::Process() {
570   bool modified = false;
571   Module* module = context()->module();
572 
573   // Process each function in the module
574   for (Function& f : *module) {
575     modified |= ProcessFunction(&f);
576   }
577 
578   return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
579 }
580 
ProcessFunction(Function * f)581 bool LoopPeelingPass::ProcessFunction(Function* f) {
582   bool modified = false;
583   LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(f);
584 
585   std::vector<Loop*> to_process_loop;
586   to_process_loop.reserve(loop_descriptor.NumLoops());
587   for (Loop& l : loop_descriptor) {
588     to_process_loop.push_back(&l);
589   }
590 
591   ScalarEvolutionAnalysis scev_analysis(context());
592 
593   for (Loop* loop : to_process_loop) {
594     CodeMetrics loop_size;
595     loop_size.Analyze(*loop);
596 
597     auto try_peel = [&loop_size, &modified, this](Loop* loop_to_peel) -> Loop* {
598       if (!loop_to_peel->IsLCSSA()) {
599         LoopUtils(context(), loop_to_peel).MakeLoopClosedSSA();
600       }
601 
602       bool peeled_loop;
603       Loop* still_peelable_loop;
604       std::tie(peeled_loop, still_peelable_loop) =
605           ProcessLoop(loop_to_peel, &loop_size);
606 
607       if (peeled_loop) {
608         modified = true;
609       }
610 
611       return still_peelable_loop;
612     };
613 
614     Loop* still_peelable_loop = try_peel(loop);
615     // The pass is working out the maximum factor by which a loop can be peeled.
616     // If the loop can potentially be peeled again, then there is only one
617     // possible direction, so only one call is still needed.
618     if (still_peelable_loop) {
619       try_peel(loop);
620     }
621   }
622 
623   return modified;
624 }
625 
ProcessLoop(Loop * loop,CodeMetrics * loop_size)626 std::pair<bool, Loop*> LoopPeelingPass::ProcessLoop(Loop* loop,
627                                                     CodeMetrics* loop_size) {
628   ScalarEvolutionAnalysis* scev_analysis =
629       context()->GetScalarEvolutionAnalysis();
630   // Default values for bailing out.
631   std::pair<bool, Loop*> bail_out{false, nullptr};
632 
633   BasicBlock* exit_block = loop->FindConditionBlock();
634   if (!exit_block) {
635     return bail_out;
636   }
637 
638   Instruction* exiting_iv = loop->FindConditionVariable(exit_block);
639   if (!exiting_iv) {
640     return bail_out;
641   }
642   size_t iterations = 0;
643   if (!loop->FindNumberOfIterations(exiting_iv, &*exit_block->tail(),
644                                     &iterations)) {
645     return bail_out;
646   }
647   if (!iterations) {
648     return bail_out;
649   }
650 
651   Instruction* canonical_induction_variable = nullptr;
652 
653   loop->GetHeaderBlock()->WhileEachPhiInst([&canonical_induction_variable,
654                                             scev_analysis,
655                                             this](Instruction* insn) {
656     if (const SERecurrentNode* iv =
657             scev_analysis->AnalyzeInstruction(insn)->AsSERecurrentNode()) {
658       const SEConstantNode* offset = iv->GetOffset()->AsSEConstantNode();
659       const SEConstantNode* coeff = iv->GetCoefficient()->AsSEConstantNode();
660       if (offset && coeff && offset->FoldToSingleValue() == 0 &&
661           coeff->FoldToSingleValue() == 1) {
662         if (context()->get_type_mgr()->GetType(insn->type_id())->AsInteger()) {
663           canonical_induction_variable = insn;
664           return false;
665         }
666       }
667     }
668     return true;
669   });
670 
671   bool is_signed = canonical_induction_variable
672                        ? context()
673                              ->get_type_mgr()
674                              ->GetType(canonical_induction_variable->type_id())
675                              ->AsInteger()
676                              ->IsSigned()
677                        : false;
678 
679   LoopPeeling peeler(
680       loop,
681       InstructionBuilder(
682           context(), loop->GetHeaderBlock(),
683           IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping)
684           .GetIntConstant<uint32_t>(static_cast<uint32_t>(iterations),
685                                     is_signed),
686       canonical_induction_variable);
687 
688   if (!peeler.CanPeelLoop()) {
689     return bail_out;
690   }
691 
692   // For each basic block in the loop, check if it can be peeled. If it
693   // can, get the direction (before/after) and by which factor.
694   LoopPeelingInfo peel_info(loop, iterations, scev_analysis);
695 
696   uint32_t peel_before_factor = 0;
697   uint32_t peel_after_factor = 0;
698 
699   for (uint32_t block : loop->GetBlocks()) {
700     if (block == exit_block->id()) {
701       continue;
702     }
703     BasicBlock* bb = cfg()->block(block);
704     PeelDirection direction;
705     uint32_t factor;
706     std::tie(direction, factor) = peel_info.GetPeelingInfo(bb);
707 
708     if (direction == PeelDirection::kNone) {
709       continue;
710     }
711     if (direction == PeelDirection::kBefore) {
712       peel_before_factor = std::max(peel_before_factor, factor);
713     } else {
714       assert(direction == PeelDirection::kAfter);
715       peel_after_factor = std::max(peel_after_factor, factor);
716     }
717   }
718   PeelDirection direction = PeelDirection::kNone;
719   uint32_t factor = 0;
720 
721   // Find which direction we should peel.
722   if (peel_before_factor) {
723     factor = peel_before_factor;
724     direction = PeelDirection::kBefore;
725   }
726   if (peel_after_factor) {
727     if (peel_before_factor < peel_after_factor) {
728       // Favor a peel after here and give the peel before another shot later.
729       factor = peel_after_factor;
730       direction = PeelDirection::kAfter;
731     }
732   }
733 
734   // Do the peel if we can.
735   if (direction == PeelDirection::kNone) return bail_out;
736 
737   // This does not take into account branch elimination opportunities and
738   // the unrolling. It assumes the peeled loop will be unrolled as well.
739   if (factor * loop_size->roi_size_ > code_grow_threshold_) {
740     return bail_out;
741   }
742   loop_size->roi_size_ *= factor;
743 
744   // Find if a loop should be peeled again.
745   Loop* extra_opportunity = nullptr;
746 
747   if (direction == PeelDirection::kBefore) {
748     peeler.PeelBefore(factor);
749     if (stats_) {
750       stats_->peeled_loops_.emplace_back(loop, PeelDirection::kBefore, factor);
751     }
752     if (peel_after_factor) {
753       // We could have peeled after, give it another try.
754       extra_opportunity = peeler.GetOriginalLoop();
755     }
756   } else {
757     peeler.PeelAfter(factor);
758     if (stats_) {
759       stats_->peeled_loops_.emplace_back(loop, PeelDirection::kAfter, factor);
760     }
761     if (peel_before_factor) {
762       // We could have peeled before, give it another try.
763       extra_opportunity = peeler.GetClonedLoop();
764     }
765   }
766 
767   return {true, extra_opportunity};
768 }
769 
GetFirstLoopInvariantOperand(Instruction * condition) const770 uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstLoopInvariantOperand(
771     Instruction* condition) const {
772   for (uint32_t i = 0; i < condition->NumInOperands(); i++) {
773     BasicBlock* bb =
774         context_->get_instr_block(condition->GetSingleWordInOperand(i));
775     if (bb && loop_->IsInsideLoop(bb)) {
776       return condition->GetSingleWordInOperand(i);
777     }
778   }
779 
780   return 0;
781 }
782 
GetFirstNonLoopInvariantOperand(Instruction * condition) const783 uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstNonLoopInvariantOperand(
784     Instruction* condition) const {
785   for (uint32_t i = 0; i < condition->NumInOperands(); i++) {
786     BasicBlock* bb =
787         context_->get_instr_block(condition->GetSingleWordInOperand(i));
788     if (!bb || !loop_->IsInsideLoop(bb)) {
789       return condition->GetSingleWordInOperand(i);
790     }
791   }
792 
793   return 0;
794 }
795 
IsHandledCondition(spv::Op opcode)796 static bool IsHandledCondition(spv::Op opcode) {
797   switch (opcode) {
798     case spv::Op::OpIEqual:
799     case spv::Op::OpINotEqual:
800     case spv::Op::OpUGreaterThan:
801     case spv::Op::OpSGreaterThan:
802     case spv::Op::OpUGreaterThanEqual:
803     case spv::Op::OpSGreaterThanEqual:
804     case spv::Op::OpULessThan:
805     case spv::Op::OpSLessThan:
806     case spv::Op::OpULessThanEqual:
807     case spv::Op::OpSLessThanEqual:
808       return true;
809     default:
810       return false;
811   }
812 }
813 
814 LoopPeelingPass::LoopPeelingInfo::Direction
GetPeelingInfo(BasicBlock * bb) const815 LoopPeelingPass::LoopPeelingInfo::GetPeelingInfo(BasicBlock* bb) const {
816   if (bb->terminator()->opcode() != spv::Op::OpBranchConditional) {
817     return GetNoneDirection();
818   }
819 
820   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
821 
822   Instruction* condition =
823       def_use_mgr->GetDef(bb->terminator()->GetSingleWordInOperand(0));
824 
825   if (!IsHandledCondition(condition->opcode())) {
826     return GetNoneDirection();
827   }
828 
829   if (!GetFirstLoopInvariantOperand(condition)) {
830     // No loop invariant, it cannot be peeled by this pass.
831     return GetNoneDirection();
832   }
833   if (!GetFirstNonLoopInvariantOperand(condition)) {
834     // Seems to be a job for the unswitch pass.
835     return GetNoneDirection();
836   }
837 
838   // Left hand-side.
839   SExpression lhs = scev_analysis_->AnalyzeInstruction(
840       def_use_mgr->GetDef(condition->GetSingleWordInOperand(0)));
841   if (lhs->GetType() == SENode::CanNotCompute) {
842     // Can't make any conclusion.
843     return GetNoneDirection();
844   }
845 
846   // Right hand-side.
847   SExpression rhs = scev_analysis_->AnalyzeInstruction(
848       def_use_mgr->GetDef(condition->GetSingleWordInOperand(1)));
849   if (rhs->GetType() == SENode::CanNotCompute) {
850     // Can't make any conclusion.
851     return GetNoneDirection();
852   }
853 
854   // Only take into account recurrent expression over the current loop.
855   bool is_lhs_rec = !scev_analysis_->IsLoopInvariant(loop_, lhs);
856   bool is_rhs_rec = !scev_analysis_->IsLoopInvariant(loop_, rhs);
857 
858   if ((is_lhs_rec && is_rhs_rec) || (!is_lhs_rec && !is_rhs_rec)) {
859     return GetNoneDirection();
860   }
861 
862   if (is_lhs_rec) {
863     if (!lhs->AsSERecurrentNode() ||
864         lhs->AsSERecurrentNode()->GetLoop() != loop_) {
865       return GetNoneDirection();
866     }
867   }
868   if (is_rhs_rec) {
869     if (!rhs->AsSERecurrentNode() ||
870         rhs->AsSERecurrentNode()->GetLoop() != loop_) {
871       return GetNoneDirection();
872     }
873   }
874 
875   // If the op code is ==, then we try a peel before or after.
876   // If opcode is not <, >, <= or >=, we bail out.
877   //
878   // For the remaining cases, we canonicalize the expression so that the
879   // constant expression is on the left hand side and the recurring expression
880   // is on the right hand side. If we swap hand side, then < becomes >, <=
881   // becomes >= etc.
882   // If the opcode is <=, then we add 1 to the right hand side and do the peel
883   // check on <.
884   // If the opcode is >=, then we add 1 to the left hand side and do the peel
885   // check on >.
886 
887   CmpOperator cmp_operator;
888   switch (condition->opcode()) {
889     default:
890       return GetNoneDirection();
891     case spv::Op::OpIEqual:
892     case spv::Op::OpINotEqual:
893       return HandleEquality(lhs, rhs);
894     case spv::Op::OpUGreaterThan:
895     case spv::Op::OpSGreaterThan: {
896       cmp_operator = CmpOperator::kGT;
897       break;
898     }
899     case spv::Op::OpULessThan:
900     case spv::Op::OpSLessThan: {
901       cmp_operator = CmpOperator::kLT;
902       break;
903     }
904     // We add one to transform >= into > and <= into <.
905     case spv::Op::OpUGreaterThanEqual:
906     case spv::Op::OpSGreaterThanEqual: {
907       cmp_operator = CmpOperator::kGE;
908       break;
909     }
910     case spv::Op::OpULessThanEqual:
911     case spv::Op::OpSLessThanEqual: {
912       cmp_operator = CmpOperator::kLE;
913       break;
914     }
915   }
916 
917   // Force the left hand side to be the non recurring expression.
918   if (is_lhs_rec) {
919     std::swap(lhs, rhs);
920     switch (cmp_operator) {
921       case CmpOperator::kLT: {
922         cmp_operator = CmpOperator::kGT;
923         break;
924       }
925       case CmpOperator::kGT: {
926         cmp_operator = CmpOperator::kLT;
927         break;
928       }
929       case CmpOperator::kLE: {
930         cmp_operator = CmpOperator::kGE;
931         break;
932       }
933       case CmpOperator::kGE: {
934         cmp_operator = CmpOperator::kLE;
935         break;
936       }
937     }
938   }
939   return HandleInequality(cmp_operator, lhs, rhs->AsSERecurrentNode());
940 }
941 
GetValueAtFirstIteration(SERecurrentNode * rec) const942 SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtFirstIteration(
943     SERecurrentNode* rec) const {
944   return rec->GetOffset();
945 }
946 
GetValueAtIteration(SERecurrentNode * rec,int64_t iteration) const947 SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtIteration(
948     SERecurrentNode* rec, int64_t iteration) const {
949   SExpression coeff = rec->GetCoefficient();
950   SExpression offset = rec->GetOffset();
951 
952   return (coeff * iteration) + offset;
953 }
954 
GetValueAtLastIteration(SERecurrentNode * rec) const955 SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtLastIteration(
956     SERecurrentNode* rec) const {
957   return GetValueAtIteration(rec, loop_max_iterations_ - 1);
958 }
959 
EvalOperator(CmpOperator cmp_op,SExpression lhs,SExpression rhs,bool * result) const960 bool LoopPeelingPass::LoopPeelingInfo::EvalOperator(CmpOperator cmp_op,
961                                                     SExpression lhs,
962                                                     SExpression rhs,
963                                                     bool* result) const {
964   assert(scev_analysis_->IsLoopInvariant(loop_, lhs));
965   assert(scev_analysis_->IsLoopInvariant(loop_, rhs));
966   // We perform the test: 0 cmp_op rhs - lhs
967   // What is left is then to determine the sign of the expression.
968   switch (cmp_op) {
969     case CmpOperator::kLT: {
970       return scev_analysis_->IsAlwaysGreaterThanZero(rhs - lhs, result);
971     }
972     case CmpOperator::kGT: {
973       return scev_analysis_->IsAlwaysGreaterThanZero(lhs - rhs, result);
974     }
975     case CmpOperator::kLE: {
976       return scev_analysis_->IsAlwaysGreaterOrEqualToZero(rhs - lhs, result);
977     }
978     case CmpOperator::kGE: {
979       return scev_analysis_->IsAlwaysGreaterOrEqualToZero(lhs - rhs, result);
980     }
981   }
982   return false;
983 }
984 
985 LoopPeelingPass::LoopPeelingInfo::Direction
HandleEquality(SExpression lhs,SExpression rhs) const986 LoopPeelingPass::LoopPeelingInfo::HandleEquality(SExpression lhs,
987                                                  SExpression rhs) const {
988   {
989     // Try peel before opportunity.
990     SExpression lhs_cst = lhs;
991     if (SERecurrentNode* rec_node = lhs->AsSERecurrentNode()) {
992       lhs_cst = rec_node->GetOffset();
993     }
994     SExpression rhs_cst = rhs;
995     if (SERecurrentNode* rec_node = rhs->AsSERecurrentNode()) {
996       rhs_cst = rec_node->GetOffset();
997     }
998 
999     if (lhs_cst == rhs_cst) {
1000       return Direction{LoopPeelingPass::PeelDirection::kBefore, 1};
1001     }
1002   }
1003 
1004   {
1005     // Try peel after opportunity.
1006     SExpression lhs_cst = lhs;
1007     if (SERecurrentNode* rec_node = lhs->AsSERecurrentNode()) {
1008       // rec_node(x) = a * x + b
1009       // assign to lhs: a * (loop_max_iterations_ - 1) + b
1010       lhs_cst = GetValueAtLastIteration(rec_node);
1011     }
1012     SExpression rhs_cst = rhs;
1013     if (SERecurrentNode* rec_node = rhs->AsSERecurrentNode()) {
1014       // rec_node(x) = a * x + b
1015       // assign to lhs: a * (loop_max_iterations_ - 1) + b
1016       rhs_cst = GetValueAtLastIteration(rec_node);
1017     }
1018 
1019     if (lhs_cst == rhs_cst) {
1020       return Direction{LoopPeelingPass::PeelDirection::kAfter, 1};
1021     }
1022   }
1023 
1024   return GetNoneDirection();
1025 }
1026 
1027 LoopPeelingPass::LoopPeelingInfo::Direction
HandleInequality(CmpOperator cmp_op,SExpression lhs,SERecurrentNode * rhs) const1028 LoopPeelingPass::LoopPeelingInfo::HandleInequality(CmpOperator cmp_op,
1029                                                    SExpression lhs,
1030                                                    SERecurrentNode* rhs) const {
1031   SExpression offset = rhs->GetOffset();
1032   SExpression coefficient = rhs->GetCoefficient();
1033   // Compute (cst - B) / A.
1034   std::pair<SExpression, int64_t> flip_iteration = (lhs - offset) / coefficient;
1035   if (!flip_iteration.first->AsSEConstantNode()) {
1036     return GetNoneDirection();
1037   }
1038   // note: !!flip_iteration.second normalize to 0/1 (via bool cast).
1039   int64_t iteration =
1040       flip_iteration.first->AsSEConstantNode()->FoldToSingleValue() +
1041       !!flip_iteration.second;
1042   if (iteration <= 0 ||
1043       loop_max_iterations_ <= static_cast<uint64_t>(iteration)) {
1044     // Always true or false within the loop bounds.
1045     return GetNoneDirection();
1046   }
1047   // If this is a <= or >= operator and the iteration, make sure |iteration| is
1048   // the one flipping the condition.
1049   // If (cst - B) and A are not divisible, this equivalent to a < or > check, so
1050   // we skip this test.
1051   if (!flip_iteration.second &&
1052       (cmp_op == CmpOperator::kLE || cmp_op == CmpOperator::kGE)) {
1053     bool first_iteration;
1054     bool current_iteration;
1055     if (!EvalOperator(cmp_op, lhs, offset, &first_iteration) ||
1056         !EvalOperator(cmp_op, lhs, GetValueAtIteration(rhs, iteration),
1057                       &current_iteration)) {
1058       return GetNoneDirection();
1059     }
1060     // If the condition did not flip the next will.
1061     if (first_iteration == current_iteration) {
1062       iteration++;
1063     }
1064   }
1065 
1066   uint32_t cast_iteration = 0;
1067   // Integrity check: can we fit |iteration| in a uint32_t ?
1068   if (static_cast<uint64_t>(iteration) < std::numeric_limits<uint32_t>::max()) {
1069     cast_iteration = static_cast<uint32_t>(iteration);
1070   }
1071 
1072   if (cast_iteration) {
1073     // Peel before if we are closer to the start, after if closer to the end.
1074     if (loop_max_iterations_ / 2 > cast_iteration) {
1075       return Direction{LoopPeelingPass::PeelDirection::kBefore, cast_iteration};
1076     } else {
1077       return Direction{
1078           LoopPeelingPass::PeelDirection::kAfter,
1079           static_cast<uint32_t>(loop_max_iterations_ - cast_iteration)};
1080     }
1081   }
1082 
1083   return GetNoneDirection();
1084 }
1085 
1086 }  // namespace opt
1087 }  // namespace spvtools
1088