• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2018 Google LLC.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <algorithm>
16 #include <functional>
17 #include <memory>
18 #include <unordered_map>
19 #include <unordered_set>
20 #include <vector>
21 
22 #include "source/opt/ir_builder.h"
23 #include "source/opt/ir_context.h"
24 #include "source/opt/loop_descriptor.h"
25 #include "source/opt/loop_peeling.h"
26 #include "source/opt/loop_utils.h"
27 #include "source/opt/scalar_analysis.h"
28 #include "source/opt/scalar_analysis_nodes.h"
29 
30 namespace spvtools {
31 namespace opt {
32 size_t LoopPeelingPass::code_grow_threshold_ = 1000;
33 
DuplicateAndConnectLoop(LoopUtils::LoopCloningResult * clone_results)34 void LoopPeeling::DuplicateAndConnectLoop(
35     LoopUtils::LoopCloningResult* clone_results) {
36   CFG& cfg = *context_->cfg();
37   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
38 
39   assert(CanPeelLoop() && "Cannot peel loop!");
40 
41   std::vector<BasicBlock*> ordered_loop_blocks;
42   // TODO(1841): Handle failure to create pre-header.
43   BasicBlock* pre_header = loop_->GetOrCreatePreHeaderBlock();
44 
45   loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks);
46 
47   cloned_loop_ = loop_utils_.CloneLoop(clone_results, ordered_loop_blocks);
48 
49   // Add the basic block to the function.
50   Function::iterator it =
51       loop_utils_.GetFunction()->FindBlock(pre_header->id());
52   assert(it != loop_utils_.GetFunction()->end() &&
53          "Pre-header not found in the function.");
54   loop_utils_.GetFunction()->AddBasicBlocks(
55       clone_results->cloned_bb_.begin(), clone_results->cloned_bb_.end(), ++it);
56 
57   // Make the |loop_|'s preheader the |cloned_loop_| one.
58   BasicBlock* cloned_header = cloned_loop_->GetHeaderBlock();
59   pre_header->ForEachSuccessorLabel(
60       [cloned_header](uint32_t* succ) { *succ = cloned_header->id(); });
61 
62   // Update cfg.
63   cfg.RemoveEdge(pre_header->id(), loop_->GetHeaderBlock()->id());
64   cloned_loop_->SetPreHeaderBlock(pre_header);
65   loop_->SetPreHeaderBlock(nullptr);
66 
67   // When cloning the loop, we didn't cloned the merge block, so currently
68   // |cloned_loop_| shares the same block as |loop_|.
69   // We mutate all branches from |cloned_loop_| block to |loop_|'s merge into a
70   // branch to |loop_|'s header (so header will also be the merge of
71   // |cloned_loop_|).
72   uint32_t cloned_loop_exit = 0;
73   for (uint32_t pred_id : cfg.preds(loop_->GetMergeBlock()->id())) {
74     if (loop_->IsInsideLoop(pred_id)) continue;
75     BasicBlock* bb = cfg.block(pred_id);
76     assert(cloned_loop_exit == 0 && "The loop has multiple exits.");
77     cloned_loop_exit = bb->id();
78     bb->ForEachSuccessorLabel([this](uint32_t* succ) {
79       if (*succ == loop_->GetMergeBlock()->id())
80         *succ = loop_->GetHeaderBlock()->id();
81     });
82   }
83 
84   // Update cfg.
85   cfg.RemoveNonExistingEdges(loop_->GetMergeBlock()->id());
86   cfg.AddEdge(cloned_loop_exit, loop_->GetHeaderBlock()->id());
87 
88   // Patch the phi of the original loop header:
89   //  - Set the loop entry branch to come from the cloned loop exit block;
90   //  - Set the initial value of the phi using the corresponding cloned loop
91   //    exit values.
92   //
93   // We patch the iterating value initializers of the original loop using the
94   // corresponding cloned loop exit values. Connects the cloned loop iterating
95   // values to the original loop. This make sure that the initial value of the
96   // second loop starts with the last value of the first loop.
97   //
98   // For example, loops like:
99   //
100   // int z = 0;
101   // for (int i = 0; i++ < M; i += cst1) {
102   //   if (cond)
103   //     z += cst2;
104   // }
105   //
106   // Will become:
107   //
108   // int z = 0;
109   // int i = 0;
110   // for (; i++ < M; i += cst1) {
111   //   if (cond)
112   //     z += cst2;
113   // }
114   // for (; i++ < M; i += cst1) {
115   //   if (cond)
116   //     z += cst2;
117   // }
118   loop_->GetHeaderBlock()->ForEachPhiInst([cloned_loop_exit, def_use_mgr,
119                                            clone_results,
120                                            this](Instruction* phi) {
121     for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
122       if (!loop_->IsInsideLoop(phi->GetSingleWordInOperand(i + 1))) {
123         phi->SetInOperand(i,
124                           {clone_results->value_map_.at(
125                               exit_value_.at(phi->result_id())->result_id())});
126         phi->SetInOperand(i + 1, {cloned_loop_exit});
127         def_use_mgr->AnalyzeInstUse(phi);
128         return;
129       }
130     }
131   });
132 
133   // Force the creation of a new preheader for the original loop and set it as
134   // the merge block for the cloned loop.
135   // TODO(1841): Handle failure to create pre-header.
136   cloned_loop_->SetMergeBlock(loop_->GetOrCreatePreHeaderBlock());
137 }
138 
InsertCanonicalInductionVariable(LoopUtils::LoopCloningResult * clone_results)139 void LoopPeeling::InsertCanonicalInductionVariable(
140     LoopUtils::LoopCloningResult* clone_results) {
141   if (original_loop_canonical_induction_variable_) {
142     canonical_induction_variable_ =
143         context_->get_def_use_mgr()->GetDef(clone_results->value_map_.at(
144             original_loop_canonical_induction_variable_->result_id()));
145     return;
146   }
147 
148   BasicBlock::iterator insert_point = GetClonedLoop()->GetLatchBlock()->tail();
149   if (GetClonedLoop()->GetLatchBlock()->GetMergeInst()) {
150     --insert_point;
151   }
152   InstructionBuilder builder(
153       context_, &*insert_point,
154       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
155   Instruction* uint_1_cst =
156       builder.GetIntConstant<uint32_t>(1, int_type_->IsSigned());
157   // Create the increment.
158   // Note that we do "1 + 1" here, one of the operand should the phi
159   // value but we don't have it yet. The operand will be set latter.
160   Instruction* iv_inc = builder.AddIAdd(
161       uint_1_cst->type_id(), uint_1_cst->result_id(), uint_1_cst->result_id());
162 
163   builder.SetInsertPoint(&*GetClonedLoop()->GetHeaderBlock()->begin());
164 
165   canonical_induction_variable_ = builder.AddPhi(
166       uint_1_cst->type_id(),
167       {builder.GetIntConstant<uint32_t>(0, int_type_->IsSigned())->result_id(),
168        GetClonedLoop()->GetPreHeaderBlock()->id(), iv_inc->result_id(),
169        GetClonedLoop()->GetLatchBlock()->id()});
170   // Connect everything.
171   iv_inc->SetInOperand(0, {canonical_induction_variable_->result_id()});
172 
173   // Update def/use manager.
174   context_->get_def_use_mgr()->AnalyzeInstUse(iv_inc);
175 
176   // If do-while form, use the incremented value.
177   if (do_while_form_) {
178     canonical_induction_variable_ = iv_inc;
179   }
180 }
181 
GetIteratorUpdateOperations(const Loop * loop,Instruction * iterator,std::unordered_set<Instruction * > * operations)182 void LoopPeeling::GetIteratorUpdateOperations(
183     const Loop* loop, Instruction* iterator,
184     std::unordered_set<Instruction*>* operations) {
185   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
186   operations->insert(iterator);
187   iterator->ForEachInId([def_use_mgr, loop, operations, this](uint32_t* id) {
188     Instruction* insn = def_use_mgr->GetDef(*id);
189     if (insn->opcode() == SpvOpLabel) {
190       return;
191     }
192     if (operations->count(insn)) {
193       return;
194     }
195     if (!loop->IsInsideLoop(insn)) {
196       return;
197     }
198     GetIteratorUpdateOperations(loop, insn, operations);
199   });
200 }
201 
202 // Gather the set of blocks for all the path from |entry| to |root|.
GetBlocksInPath(uint32_t block,uint32_t entry,std::unordered_set<uint32_t> * blocks_in_path,const CFG & cfg)203 static void GetBlocksInPath(uint32_t block, uint32_t entry,
204                             std::unordered_set<uint32_t>* blocks_in_path,
205                             const CFG& cfg) {
206   for (uint32_t pid : cfg.preds(block)) {
207     if (blocks_in_path->insert(pid).second) {
208       if (pid != entry) {
209         GetBlocksInPath(pid, entry, blocks_in_path, cfg);
210       }
211     }
212   }
213 }
214 
IsConditionCheckSideEffectFree() const215 bool LoopPeeling::IsConditionCheckSideEffectFree() const {
216   CFG& cfg = *context_->cfg();
217 
218   // The "do-while" form does not cause issues, the algorithm takes into account
219   // the first iteration.
220   if (!do_while_form_) {
221     uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0];
222 
223     std::unordered_set<uint32_t> blocks_in_path;
224 
225     blocks_in_path.insert(condition_block_id);
226     GetBlocksInPath(condition_block_id, loop_->GetHeaderBlock()->id(),
227                     &blocks_in_path, cfg);
228 
229     for (uint32_t bb_id : blocks_in_path) {
230       BasicBlock* bb = cfg.block(bb_id);
231       if (!bb->WhileEachInst([this](Instruction* insn) {
232             if (insn->IsBranch()) return true;
233             switch (insn->opcode()) {
234               case SpvOpLabel:
235               case SpvOpSelectionMerge:
236               case SpvOpLoopMerge:
237                 return true;
238               default:
239                 break;
240             }
241             return context_->IsCombinatorInstruction(insn);
242           })) {
243         return false;
244       }
245     }
246   }
247 
248   return true;
249 }
250 
GetIteratingExitValues()251 void LoopPeeling::GetIteratingExitValues() {
252   CFG& cfg = *context_->cfg();
253 
254   loop_->GetHeaderBlock()->ForEachPhiInst(
255       [this](Instruction* phi) { exit_value_[phi->result_id()] = nullptr; });
256 
257   if (!loop_->GetMergeBlock()) {
258     return;
259   }
260   if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) {
261     return;
262   }
263   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
264 
265   uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0];
266 
267   auto& header_pred = cfg.preds(loop_->GetHeaderBlock()->id());
268   do_while_form_ = std::find(header_pred.begin(), header_pred.end(),
269                              condition_block_id) != header_pred.end();
270   if (do_while_form_) {
271     loop_->GetHeaderBlock()->ForEachPhiInst(
272         [condition_block_id, def_use_mgr, this](Instruction* phi) {
273           std::unordered_set<Instruction*> operations;
274 
275           for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
276             if (condition_block_id == phi->GetSingleWordInOperand(i + 1)) {
277               exit_value_[phi->result_id()] =
278                   def_use_mgr->GetDef(phi->GetSingleWordInOperand(i));
279             }
280           }
281         });
282   } else {
283     DominatorTree* dom_tree =
284         &context_->GetDominatorAnalysis(loop_utils_.GetFunction())
285              ->GetDomTree();
286     BasicBlock* condition_block = cfg.block(condition_block_id);
287 
288     loop_->GetHeaderBlock()->ForEachPhiInst(
289         [dom_tree, condition_block, this](Instruction* phi) {
290           std::unordered_set<Instruction*> operations;
291 
292           // Not the back-edge value, check if the phi instruction is the only
293           // possible candidate.
294           GetIteratorUpdateOperations(loop_, phi, &operations);
295 
296           for (Instruction* insn : operations) {
297             if (insn == phi) {
298               continue;
299             }
300             if (dom_tree->Dominates(context_->get_instr_block(insn),
301                                     condition_block)) {
302               return;
303             }
304           }
305           exit_value_[phi->result_id()] = phi;
306         });
307   }
308 }
309 
FixExitCondition(const std::function<uint32_t (Instruction *)> & condition_builder)310 void LoopPeeling::FixExitCondition(
311     const std::function<uint32_t(Instruction*)>& condition_builder) {
312   CFG& cfg = *context_->cfg();
313 
314   uint32_t condition_block_id = 0;
315   for (uint32_t id : cfg.preds(GetClonedLoop()->GetMergeBlock()->id())) {
316     if (GetClonedLoop()->IsInsideLoop(id)) {
317       condition_block_id = id;
318       break;
319     }
320   }
321   assert(condition_block_id != 0 && "2nd loop in improperly connected");
322 
323   BasicBlock* condition_block = cfg.block(condition_block_id);
324   Instruction* exit_condition = condition_block->terminator();
325   assert(exit_condition->opcode() == SpvOpBranchConditional);
326   BasicBlock::iterator insert_point = condition_block->tail();
327   if (condition_block->GetMergeInst()) {
328     --insert_point;
329   }
330 
331   exit_condition->SetInOperand(0, {condition_builder(&*insert_point)});
332 
333   uint32_t to_continue_block_idx =
334       GetClonedLoop()->IsInsideLoop(exit_condition->GetSingleWordInOperand(1))
335           ? 1
336           : 2;
337   exit_condition->SetInOperand(
338       1, {exit_condition->GetSingleWordInOperand(to_continue_block_idx)});
339   exit_condition->SetInOperand(2, {GetClonedLoop()->GetMergeBlock()->id()});
340 
341   // Update def/use manager.
342   context_->get_def_use_mgr()->AnalyzeInstUse(exit_condition);
343 }
344 
CreateBlockBefore(BasicBlock * bb)345 BasicBlock* LoopPeeling::CreateBlockBefore(BasicBlock* bb) {
346   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
347   CFG& cfg = *context_->cfg();
348   assert(cfg.preds(bb->id()).size() == 1 && "More than one predecessor");
349 
350   // TODO(1841): Handle id overflow.
351   std::unique_ptr<BasicBlock> new_bb =
352       MakeUnique<BasicBlock>(std::unique_ptr<Instruction>(new Instruction(
353           context_, SpvOpLabel, 0, context_->TakeNextId(), {})));
354   // Update the loop descriptor.
355   Loop* in_loop = (*loop_utils_.GetLoopDescriptor())[bb];
356   if (in_loop) {
357     in_loop->AddBasicBlock(new_bb.get());
358     loop_utils_.GetLoopDescriptor()->SetBasicBlockToLoop(new_bb->id(), in_loop);
359   }
360 
361   context_->set_instr_block(new_bb->GetLabelInst(), new_bb.get());
362   def_use_mgr->AnalyzeInstDefUse(new_bb->GetLabelInst());
363 
364   BasicBlock* bb_pred = cfg.block(cfg.preds(bb->id())[0]);
365   bb_pred->tail()->ForEachInId([bb, &new_bb](uint32_t* id) {
366     if (*id == bb->id()) {
367       *id = new_bb->id();
368     }
369   });
370   cfg.RemoveEdge(bb_pred->id(), bb->id());
371   cfg.AddEdge(bb_pred->id(), new_bb->id());
372   def_use_mgr->AnalyzeInstUse(&*bb_pred->tail());
373 
374   // Update the incoming branch.
375   bb->ForEachPhiInst([&new_bb, def_use_mgr](Instruction* phi) {
376     phi->SetInOperand(1, {new_bb->id()});
377     def_use_mgr->AnalyzeInstUse(phi);
378   });
379   InstructionBuilder(
380       context_, new_bb.get(),
381       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping)
382       .AddBranch(bb->id());
383   cfg.RegisterBlock(new_bb.get());
384 
385   // Add the basic block to the function.
386   Function::iterator it = loop_utils_.GetFunction()->FindBlock(bb->id());
387   assert(it != loop_utils_.GetFunction()->end() &&
388          "Basic block not found in the function.");
389   BasicBlock* ret = new_bb.get();
390   loop_utils_.GetFunction()->AddBasicBlock(std::move(new_bb), it);
391   return ret;
392 }
393 
ProtectLoop(Loop * loop,Instruction * condition,BasicBlock * if_merge)394 BasicBlock* LoopPeeling::ProtectLoop(Loop* loop, Instruction* condition,
395                                      BasicBlock* if_merge) {
396   // TODO(1841): Handle failure to create pre-header.
397   BasicBlock* if_block = loop->GetOrCreatePreHeaderBlock();
398   // Will no longer be a pre-header because of the if.
399   loop->SetPreHeaderBlock(nullptr);
400   // Kill the branch to the header.
401   context_->KillInst(&*if_block->tail());
402 
403   InstructionBuilder builder(
404       context_, if_block,
405       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
406   builder.AddConditionalBranch(condition->result_id(),
407                                loop->GetHeaderBlock()->id(), if_merge->id(),
408                                if_merge->id());
409 
410   return if_block;
411 }
412 
PeelBefore(uint32_t peel_factor)413 void LoopPeeling::PeelBefore(uint32_t peel_factor) {
414   assert(CanPeelLoop() && "Cannot peel loop");
415   LoopUtils::LoopCloningResult clone_results;
416 
417   // Clone the loop and insert the cloned one before the loop.
418   DuplicateAndConnectLoop(&clone_results);
419 
420   // Add a canonical induction variable "canonical_induction_variable_".
421   InsertCanonicalInductionVariable(&clone_results);
422 
423   InstructionBuilder builder(
424       context_, &*cloned_loop_->GetPreHeaderBlock()->tail(),
425       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
426   Instruction* factor =
427       builder.GetIntConstant(peel_factor, int_type_->IsSigned());
428 
429   Instruction* has_remaining_iteration = builder.AddLessThan(
430       factor->result_id(), loop_iteration_count_->result_id());
431   Instruction* max_iteration = builder.AddSelect(
432       factor->type_id(), has_remaining_iteration->result_id(),
433       factor->result_id(), loop_iteration_count_->result_id());
434 
435   // Change the exit condition of the cloned loop to be (exit when become
436   // false):
437   //  "canonical_induction_variable_" < min("factor", "loop_iteration_count_")
438   FixExitCondition([max_iteration, this](Instruction* insert_before_point) {
439     return InstructionBuilder(context_, insert_before_point,
440                               IRContext::kAnalysisDefUse |
441                                   IRContext::kAnalysisInstrToBlockMapping)
442         .AddLessThan(canonical_induction_variable_->result_id(),
443                      max_iteration->result_id())
444         ->result_id();
445   });
446 
447   // "Protect" the second loop: the second loop can only be executed if
448   // |has_remaining_iteration| is true (i.e. factor < loop_iteration_count_).
449   BasicBlock* if_merge_block = loop_->GetMergeBlock();
450   loop_->SetMergeBlock(CreateBlockBefore(loop_->GetMergeBlock()));
451   // Prevent the second loop from being executed if we already executed all the
452   // required iterations.
453   BasicBlock* if_block =
454       ProtectLoop(loop_, has_remaining_iteration, if_merge_block);
455   // Patch the phi of the merge block.
456   if_merge_block->ForEachPhiInst(
457       [&clone_results, if_block, this](Instruction* phi) {
458         // if_merge_block had previously only 1 predecessor.
459         uint32_t incoming_value = phi->GetSingleWordInOperand(0);
460         auto def_in_loop = clone_results.value_map_.find(incoming_value);
461         if (def_in_loop != clone_results.value_map_.end())
462           incoming_value = def_in_loop->second;
463         phi->AddOperand(
464             {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {incoming_value}});
465         phi->AddOperand(
466             {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {if_block->id()}});
467         context_->get_def_use_mgr()->AnalyzeInstUse(phi);
468       });
469 
470   context_->InvalidateAnalysesExceptFor(
471       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping |
472       IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisCFG);
473 }
474 
PeelAfter(uint32_t peel_factor)475 void LoopPeeling::PeelAfter(uint32_t peel_factor) {
476   assert(CanPeelLoop() && "Cannot peel loop");
477   LoopUtils::LoopCloningResult clone_results;
478 
479   // Clone the loop and insert the cloned one before the loop.
480   DuplicateAndConnectLoop(&clone_results);
481 
482   // Add a canonical induction variable "canonical_induction_variable_".
483   InsertCanonicalInductionVariable(&clone_results);
484 
485   InstructionBuilder builder(
486       context_, &*cloned_loop_->GetPreHeaderBlock()->tail(),
487       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
488   Instruction* factor =
489       builder.GetIntConstant(peel_factor, int_type_->IsSigned());
490 
491   Instruction* has_remaining_iteration = builder.AddLessThan(
492       factor->result_id(), loop_iteration_count_->result_id());
493 
494   // Change the exit condition of the cloned loop to be (exit when become
495   // false):
496   //  "canonical_induction_variable_" + "factor" < "loop_iteration_count_"
497   FixExitCondition([factor, this](Instruction* insert_before_point) {
498     InstructionBuilder cond_builder(
499         context_, insert_before_point,
500         IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
501     // Build the following check: canonical_induction_variable_ + factor <
502     // iteration_count
503     return cond_builder
504         .AddLessThan(cond_builder
505                          .AddIAdd(canonical_induction_variable_->type_id(),
506                                   canonical_induction_variable_->result_id(),
507                                   factor->result_id())
508                          ->result_id(),
509                      loop_iteration_count_->result_id())
510         ->result_id();
511   });
512 
513   // "Protect" the first loop: the first loop can only be executed if
514   // factor < loop_iteration_count_.
515 
516   // The original loop's pre-header was the cloned loop merge block.
517   GetClonedLoop()->SetMergeBlock(
518       CreateBlockBefore(GetOriginalLoop()->GetPreHeaderBlock()));
519   // Use the second loop preheader as if merge block.
520 
521   // Prevent the first loop if only the peeled loop needs it.
522   BasicBlock* if_block = ProtectLoop(cloned_loop_, has_remaining_iteration,
523                                      GetOriginalLoop()->GetPreHeaderBlock());
524 
525   // Patch the phi of the header block.
526   // We added an if to enclose the first loop and because the phi node are
527   // connected to the exit value of the first loop, the definition no longer
528   // dominate the preheader.
529   // We had to the preheader (our if merge block) the required phi instruction
530   // and patch the header phi.
531   GetOriginalLoop()->GetHeaderBlock()->ForEachPhiInst(
532       [&clone_results, if_block, this](Instruction* phi) {
533         analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
534 
535         auto find_value_idx = [](Instruction* phi_inst, Loop* loop) {
536           uint32_t preheader_value_idx =
537               !loop->IsInsideLoop(phi_inst->GetSingleWordInOperand(1)) ? 0 : 2;
538           return preheader_value_idx;
539         };
540 
541         Instruction* cloned_phi =
542             def_use_mgr->GetDef(clone_results.value_map_.at(phi->result_id()));
543         uint32_t cloned_preheader_value = cloned_phi->GetSingleWordInOperand(
544             find_value_idx(cloned_phi, GetClonedLoop()));
545 
546         Instruction* new_phi =
547             InstructionBuilder(context_,
548                                &*GetOriginalLoop()->GetPreHeaderBlock()->tail(),
549                                IRContext::kAnalysisDefUse |
550                                    IRContext::kAnalysisInstrToBlockMapping)
551                 .AddPhi(phi->type_id(),
552                         {phi->GetSingleWordInOperand(
553                              find_value_idx(phi, GetOriginalLoop())),
554                          GetClonedLoop()->GetMergeBlock()->id(),
555                          cloned_preheader_value, if_block->id()});
556 
557         phi->SetInOperand(find_value_idx(phi, GetOriginalLoop()),
558                           {new_phi->result_id()});
559         def_use_mgr->AnalyzeInstUse(phi);
560       });
561 
562   context_->InvalidateAnalysesExceptFor(
563       IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping |
564       IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisCFG);
565 }
566 
Process()567 Pass::Status LoopPeelingPass::Process() {
568   bool modified = false;
569   Module* module = context()->module();
570 
571   // Process each function in the module
572   for (Function& f : *module) {
573     modified |= ProcessFunction(&f);
574   }
575 
576   return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
577 }
578 
ProcessFunction(Function * f)579 bool LoopPeelingPass::ProcessFunction(Function* f) {
580   bool modified = false;
581   LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(f);
582 
583   std::vector<Loop*> to_process_loop;
584   to_process_loop.reserve(loop_descriptor.NumLoops());
585   for (Loop& l : loop_descriptor) {
586     to_process_loop.push_back(&l);
587   }
588 
589   ScalarEvolutionAnalysis scev_analysis(context());
590 
591   for (Loop* loop : to_process_loop) {
592     CodeMetrics loop_size;
593     loop_size.Analyze(*loop);
594 
595     auto try_peel = [&loop_size, &modified, this](Loop* loop_to_peel) -> Loop* {
596       if (!loop_to_peel->IsLCSSA()) {
597         LoopUtils(context(), loop_to_peel).MakeLoopClosedSSA();
598       }
599 
600       bool peeled_loop;
601       Loop* still_peelable_loop;
602       std::tie(peeled_loop, still_peelable_loop) =
603           ProcessLoop(loop_to_peel, &loop_size);
604 
605       if (peeled_loop) {
606         modified = true;
607       }
608 
609       return still_peelable_loop;
610     };
611 
612     Loop* still_peelable_loop = try_peel(loop);
613     // The pass is working out the maximum factor by which a loop can be peeled.
614     // If the loop can potentially be peeled again, then there is only one
615     // possible direction, so only one call is still needed.
616     if (still_peelable_loop) {
617       try_peel(loop);
618     }
619   }
620 
621   return modified;
622 }
623 
ProcessLoop(Loop * loop,CodeMetrics * loop_size)624 std::pair<bool, Loop*> LoopPeelingPass::ProcessLoop(Loop* loop,
625                                                     CodeMetrics* loop_size) {
626   ScalarEvolutionAnalysis* scev_analysis =
627       context()->GetScalarEvolutionAnalysis();
628   // Default values for bailing out.
629   std::pair<bool, Loop*> bail_out{false, nullptr};
630 
631   BasicBlock* exit_block = loop->FindConditionBlock();
632   if (!exit_block) {
633     return bail_out;
634   }
635 
636   Instruction* exiting_iv = loop->FindConditionVariable(exit_block);
637   if (!exiting_iv) {
638     return bail_out;
639   }
640   size_t iterations = 0;
641   if (!loop->FindNumberOfIterations(exiting_iv, &*exit_block->tail(),
642                                     &iterations)) {
643     return bail_out;
644   }
645   if (!iterations) {
646     return bail_out;
647   }
648 
649   Instruction* canonical_induction_variable = nullptr;
650 
651   loop->GetHeaderBlock()->WhileEachPhiInst([&canonical_induction_variable,
652                                             scev_analysis,
653                                             this](Instruction* insn) {
654     if (const SERecurrentNode* iv =
655             scev_analysis->AnalyzeInstruction(insn)->AsSERecurrentNode()) {
656       const SEConstantNode* offset = iv->GetOffset()->AsSEConstantNode();
657       const SEConstantNode* coeff = iv->GetCoefficient()->AsSEConstantNode();
658       if (offset && coeff && offset->FoldToSingleValue() == 0 &&
659           coeff->FoldToSingleValue() == 1) {
660         if (context()->get_type_mgr()->GetType(insn->type_id())->AsInteger()) {
661           canonical_induction_variable = insn;
662           return false;
663         }
664       }
665     }
666     return true;
667   });
668 
669   bool is_signed = canonical_induction_variable
670                        ? context()
671                              ->get_type_mgr()
672                              ->GetType(canonical_induction_variable->type_id())
673                              ->AsInteger()
674                              ->IsSigned()
675                        : false;
676 
677   LoopPeeling peeler(
678       loop,
679       InstructionBuilder(
680           context(), loop->GetHeaderBlock(),
681           IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping)
682           .GetIntConstant<uint32_t>(static_cast<uint32_t>(iterations),
683                                     is_signed),
684       canonical_induction_variable);
685 
686   if (!peeler.CanPeelLoop()) {
687     return bail_out;
688   }
689 
690   // For each basic block in the loop, check if it can be peeled. If it
691   // can, get the direction (before/after) and by which factor.
692   LoopPeelingInfo peel_info(loop, iterations, scev_analysis);
693 
694   uint32_t peel_before_factor = 0;
695   uint32_t peel_after_factor = 0;
696 
697   for (uint32_t block : loop->GetBlocks()) {
698     if (block == exit_block->id()) {
699       continue;
700     }
701     BasicBlock* bb = cfg()->block(block);
702     PeelDirection direction;
703     uint32_t factor;
704     std::tie(direction, factor) = peel_info.GetPeelingInfo(bb);
705 
706     if (direction == PeelDirection::kNone) {
707       continue;
708     }
709     if (direction == PeelDirection::kBefore) {
710       peel_before_factor = std::max(peel_before_factor, factor);
711     } else {
712       assert(direction == PeelDirection::kAfter);
713       peel_after_factor = std::max(peel_after_factor, factor);
714     }
715   }
716   PeelDirection direction = PeelDirection::kNone;
717   uint32_t factor = 0;
718 
719   // Find which direction we should peel.
720   if (peel_before_factor) {
721     factor = peel_before_factor;
722     direction = PeelDirection::kBefore;
723   }
724   if (peel_after_factor) {
725     if (peel_before_factor < peel_after_factor) {
726       // Favor a peel after here and give the peel before another shot later.
727       factor = peel_after_factor;
728       direction = PeelDirection::kAfter;
729     }
730   }
731 
732   // Do the peel if we can.
733   if (direction == PeelDirection::kNone) return bail_out;
734 
735   // This does not take into account branch elimination opportunities and
736   // the unrolling. It assumes the peeled loop will be unrolled as well.
737   if (factor * loop_size->roi_size_ > code_grow_threshold_) {
738     return bail_out;
739   }
740   loop_size->roi_size_ *= factor;
741 
742   // Find if a loop should be peeled again.
743   Loop* extra_opportunity = nullptr;
744 
745   if (direction == PeelDirection::kBefore) {
746     peeler.PeelBefore(factor);
747     if (stats_) {
748       stats_->peeled_loops_.emplace_back(loop, PeelDirection::kBefore, factor);
749     }
750     if (peel_after_factor) {
751       // We could have peeled after, give it another try.
752       extra_opportunity = peeler.GetOriginalLoop();
753     }
754   } else {
755     peeler.PeelAfter(factor);
756     if (stats_) {
757       stats_->peeled_loops_.emplace_back(loop, PeelDirection::kAfter, factor);
758     }
759     if (peel_before_factor) {
760       // We could have peeled before, give it another try.
761       extra_opportunity = peeler.GetClonedLoop();
762     }
763   }
764 
765   return {true, extra_opportunity};
766 }
767 
GetFirstLoopInvariantOperand(Instruction * condition) const768 uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstLoopInvariantOperand(
769     Instruction* condition) const {
770   for (uint32_t i = 0; i < condition->NumInOperands(); i++) {
771     BasicBlock* bb =
772         context_->get_instr_block(condition->GetSingleWordInOperand(i));
773     if (bb && loop_->IsInsideLoop(bb)) {
774       return condition->GetSingleWordInOperand(i);
775     }
776   }
777 
778   return 0;
779 }
780 
GetFirstNonLoopInvariantOperand(Instruction * condition) const781 uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstNonLoopInvariantOperand(
782     Instruction* condition) const {
783   for (uint32_t i = 0; i < condition->NumInOperands(); i++) {
784     BasicBlock* bb =
785         context_->get_instr_block(condition->GetSingleWordInOperand(i));
786     if (!bb || !loop_->IsInsideLoop(bb)) {
787       return condition->GetSingleWordInOperand(i);
788     }
789   }
790 
791   return 0;
792 }
793 
IsHandledCondition(SpvOp opcode)794 static bool IsHandledCondition(SpvOp opcode) {
795   switch (opcode) {
796     case SpvOpIEqual:
797     case SpvOpINotEqual:
798     case SpvOpUGreaterThan:
799     case SpvOpSGreaterThan:
800     case SpvOpUGreaterThanEqual:
801     case SpvOpSGreaterThanEqual:
802     case SpvOpULessThan:
803     case SpvOpSLessThan:
804     case SpvOpULessThanEqual:
805     case SpvOpSLessThanEqual:
806       return true;
807     default:
808       return false;
809   }
810 }
811 
812 LoopPeelingPass::LoopPeelingInfo::Direction
GetPeelingInfo(BasicBlock * bb) const813 LoopPeelingPass::LoopPeelingInfo::GetPeelingInfo(BasicBlock* bb) const {
814   if (bb->terminator()->opcode() != SpvOpBranchConditional) {
815     return GetNoneDirection();
816   }
817 
818   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
819 
820   Instruction* condition =
821       def_use_mgr->GetDef(bb->terminator()->GetSingleWordInOperand(0));
822 
823   if (!IsHandledCondition(condition->opcode())) {
824     return GetNoneDirection();
825   }
826 
827   if (!GetFirstLoopInvariantOperand(condition)) {
828     // No loop invariant, it cannot be peeled by this pass.
829     return GetNoneDirection();
830   }
831   if (!GetFirstNonLoopInvariantOperand(condition)) {
832     // Seems to be a job for the unswitch pass.
833     return GetNoneDirection();
834   }
835 
836   // Left hand-side.
837   SExpression lhs = scev_analysis_->AnalyzeInstruction(
838       def_use_mgr->GetDef(condition->GetSingleWordInOperand(0)));
839   if (lhs->GetType() == SENode::CanNotCompute) {
840     // Can't make any conclusion.
841     return GetNoneDirection();
842   }
843 
844   // Right hand-side.
845   SExpression rhs = scev_analysis_->AnalyzeInstruction(
846       def_use_mgr->GetDef(condition->GetSingleWordInOperand(1)));
847   if (rhs->GetType() == SENode::CanNotCompute) {
848     // Can't make any conclusion.
849     return GetNoneDirection();
850   }
851 
852   // Only take into account recurrent expression over the current loop.
853   bool is_lhs_rec = !scev_analysis_->IsLoopInvariant(loop_, lhs);
854   bool is_rhs_rec = !scev_analysis_->IsLoopInvariant(loop_, rhs);
855 
856   if ((is_lhs_rec && is_rhs_rec) || (!is_lhs_rec && !is_rhs_rec)) {
857     return GetNoneDirection();
858   }
859 
860   if (is_lhs_rec) {
861     if (!lhs->AsSERecurrentNode() ||
862         lhs->AsSERecurrentNode()->GetLoop() != loop_) {
863       return GetNoneDirection();
864     }
865   }
866   if (is_rhs_rec) {
867     if (!rhs->AsSERecurrentNode() ||
868         rhs->AsSERecurrentNode()->GetLoop() != loop_) {
869       return GetNoneDirection();
870     }
871   }
872 
873   // If the op code is ==, then we try a peel before or after.
874   // If opcode is not <, >, <= or >=, we bail out.
875   //
876   // For the remaining cases, we canonicalize the expression so that the
877   // constant expression is on the left hand side and the recurring expression
878   // is on the right hand side. If we swap hand side, then < becomes >, <=
879   // becomes >= etc.
880   // If the opcode is <=, then we add 1 to the right hand side and do the peel
881   // check on <.
882   // If the opcode is >=, then we add 1 to the left hand side and do the peel
883   // check on >.
884 
885   CmpOperator cmp_operator;
886   switch (condition->opcode()) {
887     default:
888       return GetNoneDirection();
889     case SpvOpIEqual:
890     case SpvOpINotEqual:
891       return HandleEquality(lhs, rhs);
892     case SpvOpUGreaterThan:
893     case SpvOpSGreaterThan: {
894       cmp_operator = CmpOperator::kGT;
895       break;
896     }
897     case SpvOpULessThan:
898     case SpvOpSLessThan: {
899       cmp_operator = CmpOperator::kLT;
900       break;
901     }
902     // We add one to transform >= into > and <= into <.
903     case SpvOpUGreaterThanEqual:
904     case SpvOpSGreaterThanEqual: {
905       cmp_operator = CmpOperator::kGE;
906       break;
907     }
908     case SpvOpULessThanEqual:
909     case SpvOpSLessThanEqual: {
910       cmp_operator = CmpOperator::kLE;
911       break;
912     }
913   }
914 
915   // Force the left hand side to be the non recurring expression.
916   if (is_lhs_rec) {
917     std::swap(lhs, rhs);
918     switch (cmp_operator) {
919       case CmpOperator::kLT: {
920         cmp_operator = CmpOperator::kGT;
921         break;
922       }
923       case CmpOperator::kGT: {
924         cmp_operator = CmpOperator::kLT;
925         break;
926       }
927       case CmpOperator::kLE: {
928         cmp_operator = CmpOperator::kGE;
929         break;
930       }
931       case CmpOperator::kGE: {
932         cmp_operator = CmpOperator::kLE;
933         break;
934       }
935     }
936   }
937   return HandleInequality(cmp_operator, lhs, rhs->AsSERecurrentNode());
938 }
939 
GetValueAtFirstIteration(SERecurrentNode * rec) const940 SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtFirstIteration(
941     SERecurrentNode* rec) const {
942   return rec->GetOffset();
943 }
944 
GetValueAtIteration(SERecurrentNode * rec,int64_t iteration) const945 SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtIteration(
946     SERecurrentNode* rec, int64_t iteration) const {
947   SExpression coeff = rec->GetCoefficient();
948   SExpression offset = rec->GetOffset();
949 
950   return (coeff * iteration) + offset;
951 }
952 
GetValueAtLastIteration(SERecurrentNode * rec) const953 SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtLastIteration(
954     SERecurrentNode* rec) const {
955   return GetValueAtIteration(rec, loop_max_iterations_ - 1);
956 }
957 
EvalOperator(CmpOperator cmp_op,SExpression lhs,SExpression rhs,bool * result) const958 bool LoopPeelingPass::LoopPeelingInfo::EvalOperator(CmpOperator cmp_op,
959                                                     SExpression lhs,
960                                                     SExpression rhs,
961                                                     bool* result) const {
962   assert(scev_analysis_->IsLoopInvariant(loop_, lhs));
963   assert(scev_analysis_->IsLoopInvariant(loop_, rhs));
964   // We perform the test: 0 cmp_op rhs - lhs
965   // What is left is then to determine the sign of the expression.
966   switch (cmp_op) {
967     case CmpOperator::kLT: {
968       return scev_analysis_->IsAlwaysGreaterThanZero(rhs - lhs, result);
969     }
970     case CmpOperator::kGT: {
971       return scev_analysis_->IsAlwaysGreaterThanZero(lhs - rhs, result);
972     }
973     case CmpOperator::kLE: {
974       return scev_analysis_->IsAlwaysGreaterOrEqualToZero(rhs - lhs, result);
975     }
976     case CmpOperator::kGE: {
977       return scev_analysis_->IsAlwaysGreaterOrEqualToZero(lhs - rhs, result);
978     }
979   }
980   return false;
981 }
982 
983 LoopPeelingPass::LoopPeelingInfo::Direction
HandleEquality(SExpression lhs,SExpression rhs) const984 LoopPeelingPass::LoopPeelingInfo::HandleEquality(SExpression lhs,
985                                                  SExpression rhs) const {
986   {
987     // Try peel before opportunity.
988     SExpression lhs_cst = lhs;
989     if (SERecurrentNode* rec_node = lhs->AsSERecurrentNode()) {
990       lhs_cst = rec_node->GetOffset();
991     }
992     SExpression rhs_cst = rhs;
993     if (SERecurrentNode* rec_node = rhs->AsSERecurrentNode()) {
994       rhs_cst = rec_node->GetOffset();
995     }
996 
997     if (lhs_cst == rhs_cst) {
998       return Direction{LoopPeelingPass::PeelDirection::kBefore, 1};
999     }
1000   }
1001 
1002   {
1003     // Try peel after opportunity.
1004     SExpression lhs_cst = lhs;
1005     if (SERecurrentNode* rec_node = lhs->AsSERecurrentNode()) {
1006       // rec_node(x) = a * x + b
1007       // assign to lhs: a * (loop_max_iterations_ - 1) + b
1008       lhs_cst = GetValueAtLastIteration(rec_node);
1009     }
1010     SExpression rhs_cst = rhs;
1011     if (SERecurrentNode* rec_node = rhs->AsSERecurrentNode()) {
1012       // rec_node(x) = a * x + b
1013       // assign to lhs: a * (loop_max_iterations_ - 1) + b
1014       rhs_cst = GetValueAtLastIteration(rec_node);
1015     }
1016 
1017     if (lhs_cst == rhs_cst) {
1018       return Direction{LoopPeelingPass::PeelDirection::kAfter, 1};
1019     }
1020   }
1021 
1022   return GetNoneDirection();
1023 }
1024 
1025 LoopPeelingPass::LoopPeelingInfo::Direction
HandleInequality(CmpOperator cmp_op,SExpression lhs,SERecurrentNode * rhs) const1026 LoopPeelingPass::LoopPeelingInfo::HandleInequality(CmpOperator cmp_op,
1027                                                    SExpression lhs,
1028                                                    SERecurrentNode* rhs) const {
1029   SExpression offset = rhs->GetOffset();
1030   SExpression coefficient = rhs->GetCoefficient();
1031   // Compute (cst - B) / A.
1032   std::pair<SExpression, int64_t> flip_iteration = (lhs - offset) / coefficient;
1033   if (!flip_iteration.first->AsSEConstantNode()) {
1034     return GetNoneDirection();
1035   }
1036   // note: !!flip_iteration.second normalize to 0/1 (via bool cast).
1037   int64_t iteration =
1038       flip_iteration.first->AsSEConstantNode()->FoldToSingleValue() +
1039       !!flip_iteration.second;
1040   if (iteration <= 0 ||
1041       loop_max_iterations_ <= static_cast<uint64_t>(iteration)) {
1042     // Always true or false within the loop bounds.
1043     return GetNoneDirection();
1044   }
1045   // If this is a <= or >= operator and the iteration, make sure |iteration| is
1046   // the one flipping the condition.
1047   // If (cst - B) and A are not divisible, this equivalent to a < or > check, so
1048   // we skip this test.
1049   if (!flip_iteration.second &&
1050       (cmp_op == CmpOperator::kLE || cmp_op == CmpOperator::kGE)) {
1051     bool first_iteration;
1052     bool current_iteration;
1053     if (!EvalOperator(cmp_op, lhs, offset, &first_iteration) ||
1054         !EvalOperator(cmp_op, lhs, GetValueAtIteration(rhs, iteration),
1055                       &current_iteration)) {
1056       return GetNoneDirection();
1057     }
1058     // If the condition did not flip the next will.
1059     if (first_iteration == current_iteration) {
1060       iteration++;
1061     }
1062   }
1063 
1064   uint32_t cast_iteration = 0;
1065   // Integrity check: can we fit |iteration| in a uint32_t ?
1066   if (static_cast<uint64_t>(iteration) < std::numeric_limits<uint32_t>::max()) {
1067     cast_iteration = static_cast<uint32_t>(iteration);
1068   }
1069 
1070   if (cast_iteration) {
1071     // Peel before if we are closer to the start, after if closer to the end.
1072     if (loop_max_iterations_ / 2 > cast_iteration) {
1073       return Direction{LoopPeelingPass::PeelDirection::kBefore, cast_iteration};
1074     } else {
1075       return Direction{
1076           LoopPeelingPass::PeelDirection::kAfter,
1077           static_cast<uint32_t>(loop_max_iterations_ - cast_iteration)};
1078     }
1079   }
1080 
1081   return GetNoneDirection();
1082 }
1083 
1084 }  // namespace opt
1085 }  // namespace spvtools
1086