• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2018 Google LLC.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "source/opt/loop_unroller.h"
16 
17 #include <limits>
18 #include <memory>
19 #include <unordered_map>
20 #include <utility>
21 #include <vector>
22 
23 #include "source/opt/ir_builder.h"
24 #include "source/opt/loop_utils.h"
25 
26 // Implements loop util unrolling functionality for fully and partially
27 // unrolling loops. Given a factor it will duplicate the loop that many times,
28 // appending each one to the end of the old loop and removing backedges, to
29 // create a new unrolled loop.
30 //
31 // 1 - User calls LoopUtils::FullyUnroll or LoopUtils::PartiallyUnroll with a
32 // loop they wish to unroll. LoopUtils::CanPerformUnroll is used to
33 // validate that a given loop can be unrolled. That method (along with the
34 // constructor of loop) checks that the IR is in the expected canonicalised
35 // format.
36 //
37 // 2 - The LoopUtils methods create a LoopUnrollerUtilsImpl object to actually
38 // perform the unrolling. This implements helper methods to copy the loop basic
39 // blocks and remap the ids of instructions used inside them.
40 //
41 // 3 - The core of LoopUnrollerUtilsImpl is the Unroll method, this method
42 // actually performs the loop duplication. It does this by creating a
43 // LoopUnrollState object and then copying the loop as given by the factor
44 // parameter. The LoopUnrollState object retains the state of the unroller
45 // between the loop body copies as each iteration needs information on the last
46 // to adjust the phi induction variable, adjust the OpLoopMerge instruction in
47 // the main loop header, and change the previous continue block to point to the
48 // new header and the new continue block to the main loop header.
49 //
50 // 4 - If the loop is to be fully unrolled then it is simply closed after step
51 // 3, with the OpLoopMerge being deleted, the backedge removed, and the
52 // condition blocks folded.
53 //
54 // 5 - If it is being partially unrolled: if the unrolling factor leaves the
55 // loop with an even number of bodies with respect to the number of loop
56 // iterations then step 3 is all that is needed. If it is uneven then we need to
57 // duplicate the loop completely and unroll the duplicated loop to cover the
58 // residual part and adjust the first loop to cover only the "even" part. For
59 // instance if you request an unroll factor of 3 on a loop with 10 iterations
60 // then copying the body three times would leave you with three bodies in the
61 // loop
62 // where the loop still iterates over each 4 times. So we make two loops one
63 // iterating once then a second loop of three iterating 3 times.
64 
65 namespace spvtools {
66 namespace opt {
67 namespace {
68 
69 // Loop control constant value for DontUnroll flag.
70 constexpr uint32_t kLoopControlDontUnrollIndex = 2;
71 
72 // Operand index of the loop control parameter of the OpLoopMerge.
73 constexpr uint32_t kLoopControlIndex = 2;
74 
75 // This utility class encapsulates some of the state we need to maintain between
76 // loop unrolls. Specifically it maintains key blocks and the induction variable
77 // in the current loop duplication step and the blocks from the previous one.
78 // This is because each step of the unroll needs to use data from both the
79 // preceding step and the original loop.
80 struct LoopUnrollState {
LoopUnrollStatespvtools::opt::__anon1c3262630111::LoopUnrollState81   LoopUnrollState()
82       : previous_phi_(nullptr),
83         previous_latch_block_(nullptr),
84         previous_condition_block_(nullptr),
85         new_phi(nullptr),
86         new_continue_block(nullptr),
87         new_condition_block(nullptr),
88         new_header_block(nullptr) {}
89 
90   // Initialize from the loop descriptor class.
LoopUnrollStatespvtools::opt::__anon1c3262630111::LoopUnrollState91   LoopUnrollState(Instruction* induction, BasicBlock* latch_block,
92                   BasicBlock* condition, std::vector<Instruction*>&& phis)
93       : previous_phi_(induction),
94         previous_latch_block_(latch_block),
95         previous_condition_block_(condition),
96         new_phi(nullptr),
97         new_continue_block(nullptr),
98         new_condition_block(nullptr),
99         new_header_block(nullptr) {
100     previous_phis_ = std::move(phis);
101   }
102 
103   // Swap the state so that the new nodes are now the previous nodes.
NextIterationStatespvtools::opt::__anon1c3262630111::LoopUnrollState104   void NextIterationState() {
105     previous_phi_ = new_phi;
106     previous_latch_block_ = new_latch_block;
107     previous_condition_block_ = new_condition_block;
108     previous_phis_ = std::move(new_phis_);
109 
110     // Clear new nodes.
111     new_phi = nullptr;
112     new_continue_block = nullptr;
113     new_condition_block = nullptr;
114     new_header_block = nullptr;
115     new_latch_block = nullptr;
116 
117     // Clear new block/instruction maps.
118     new_blocks.clear();
119     new_inst.clear();
120     ids_to_new_inst.clear();
121   }
122 
123   // The induction variable from the immediately preceding loop body.
124   Instruction* previous_phi_;
125 
126   // All the phi nodes from the previous loop iteration.
127   std::vector<Instruction*> previous_phis_;
128 
129   std::vector<Instruction*> new_phis_;
130 
131   // The previous latch block. The backedge will be removed from this and
132   // added to the new latch block.
133   BasicBlock* previous_latch_block_;
134 
135   // The previous condition block. This may be folded to flatten the loop.
136   BasicBlock* previous_condition_block_;
137 
138   // The new induction variable.
139   Instruction* new_phi;
140 
141   // The new continue block.
142   BasicBlock* new_continue_block;
143 
144   // The new condition block.
145   BasicBlock* new_condition_block;
146 
147   // The new header block.
148   BasicBlock* new_header_block;
149 
150   // The new latch block.
151   BasicBlock* new_latch_block;
152 
153   // A mapping of new block ids to the original blocks which they were copied
154   // from.
155   std::unordered_map<uint32_t, BasicBlock*> new_blocks;
156 
157   // A mapping of the original instruction ids to the instruction ids to their
158   // copies.
159   std::unordered_map<uint32_t, uint32_t> new_inst;
160 
161   std::unordered_map<uint32_t, Instruction*> ids_to_new_inst;
162 };
163 
164 // This class implements the actual unrolling. It uses a LoopUnrollState to
165 // maintain the state of the unrolling in between steps.
166 class LoopUnrollerUtilsImpl {
167  public:
168   using BasicBlockListTy = std::vector<std::unique_ptr<BasicBlock>>;
169 
LoopUnrollerUtilsImpl(IRContext * c,Function * function)170   LoopUnrollerUtilsImpl(IRContext* c, Function* function)
171       : context_(c),
172         function_(*function),
173         loop_condition_block_(nullptr),
174         loop_induction_variable_(nullptr),
175         number_of_loop_iterations_(0),
176         loop_step_value_(0),
177         loop_init_value_(0) {}
178 
179   // Unroll the |loop| by given |factor| by copying the whole body |factor|
180   // times. The resulting basicblock structure will remain a loop.
181   void PartiallyUnroll(Loop*, size_t factor);
182 
183   // If partially unrolling the |loop| would leave the loop with too many bodies
184   // for its number of iterations then this method should be used. This method
185   // will duplicate the |loop| completely, making the duplicated loop the
186   // successor of the original's merge block. The original loop will have its
187   // condition changed to loop over the residual part and the duplicate will be
188   // partially unrolled. The resulting structure will be two loops.
189   void PartiallyUnrollResidualFactor(Loop* loop, size_t factor);
190 
191   // Fully unroll the |loop| by copying the full body by the total number of
192   // loop iterations, folding all conditions, and removing the backedge from the
193   // continue block to the header.
194   void FullyUnroll(Loop* loop);
195 
196   // Get the ID of the variable in the |phi| paired with |label|.
197   uint32_t GetPhiDefID(const Instruction* phi, uint32_t label) const;
198 
199   // Close the loop by removing the OpLoopMerge from the |loop| header block and
200   // making the backedge point to the merge block.
201   void CloseUnrolledLoop(Loop* loop);
202 
203   // Remove the OpConditionalBranch instruction inside |conditional_block| used
204   // to branch to either exit or continue the loop and replace it with an
205   // unconditional OpBranch to block |new_target|.
206   void FoldConditionBlock(BasicBlock* condtion_block, uint32_t new_target);
207 
208   // Add all blocks_to_add_ to function_ at the |insert_point|.
209   void AddBlocksToFunction(const BasicBlock* insert_point);
210 
211   // Duplicates the |old_loop|, cloning each body and remapping the ids without
212   // removing instructions or changing relative structure. Result will be stored
213   // in |new_loop|.
214   void DuplicateLoop(Loop* old_loop, Loop* new_loop);
215 
GetLoopIterationCount() const216   inline size_t GetLoopIterationCount() const {
217     return number_of_loop_iterations_;
218   }
219 
220   // Extracts the initial state information from the |loop|.
221   void Init(Loop* loop);
222 
223   // Replace the uses of each induction variable outside the loop with the final
224   // value of the induction variable before the loop exit. To reflect the proper
225   // state of a fully unrolled loop.
226   void ReplaceInductionUseWithFinalValue(Loop* loop);
227 
228   // Remove all the instructions in the invalidated_instructions_ vector.
229   void RemoveDeadInstructions();
230 
231   // Replace any use of induction variables outwith the loop with the final
232   // value of the induction variable in the unrolled loop.
233   void ReplaceOutsideLoopUseWithFinalValue(Loop* loop);
234 
235   // Set the LoopControl operand of the OpLoopMerge instruction to be
236   // DontUnroll.
237   void MarkLoopControlAsDontUnroll(Loop* loop) const;
238 
239  private:
240   // Remap all the in |basic_block| to new IDs and keep the mapping of new ids
241   // to old
242   // ids. |loop| is used to identify special loop blocks (header, continue,
243   // etc).
244   void AssignNewResultIds(BasicBlock* basic_block);
245 
246   // Using the map built by AssignNewResultIds, replace the uses in |inst|
247   // by the id that the use maps to.
248   void RemapOperands(Instruction* inst);
249 
250   // Using the map built by AssignNewResultIds, for each instruction in
251   // |basic_block| use
252   // that map to substitute the IDs used by instructions (in the operands) with
253   // the new ids.
254   void RemapOperands(BasicBlock* basic_block);
255 
256   // Copy the whole body of the loop, all blocks dominated by the |loop| header
257   // and not dominated by the |loop| merge. The copied body will be linked to by
258   // the old |loop| continue block and the new body will link to the |loop|
259   // header via the new continue block. |eliminate_conditions| is used to decide
260   // whether or not to fold all the condition blocks other than the last one.
261   void CopyBody(Loop* loop, bool eliminate_conditions);
262 
263   // Copy a given |block_to_copy| in the |loop| and record the mapping of the
264   // old/new ids. |preserve_instructions| determines whether or not the method
265   // will modify (other than result_id) instructions which are copied.
266   void CopyBasicBlock(Loop* loop, const BasicBlock* block_to_copy,
267                       bool preserve_instructions);
268 
269   // The actual implementation of the unroll step. Unrolls |loop| by given
270   // |factor| by copying the body by |factor| times. Also propagates the
271   // induction variable value throughout the copies.
272   void Unroll(Loop* loop, size_t factor);
273 
274   // Fills the loop_blocks_inorder_ field with the ordered list of basic blocks
275   // as computed by the method ComputeLoopOrderedBlocks.
276   void ComputeLoopOrderedBlocks(Loop* loop);
277 
278   // Adds the blocks_to_add_ to both the |loop| and to the parent of |loop| if
279   // the parent exists.
280   void AddBlocksToLoop(Loop* loop) const;
281 
282   // After the partially unroll step the phi instructions in the header block
283   // will be in an illegal format. This function makes the phis legal by making
284   // the edge from the latch block come from the new latch block and the value
285   // to be the actual value of the phi at that point.
286   void LinkLastPhisToStart(Loop* loop) const;
287 
288   // Kill all debug declaration instructions from |bb|.
289   void KillDebugDeclares(BasicBlock* bb);
290 
291   // A pointer to the IRContext. Used to add/remove instructions and for usedef
292   // chains.
293   IRContext* context_;
294 
295   // A reference the function the loop is within.
296   Function& function_;
297 
298   // A list of basic blocks to be added to the loop at the end of an unroll
299   // step.
300   BasicBlockListTy blocks_to_add_;
301 
302   // List of instructions which are now dead and can be removed.
303   std::vector<Instruction*> invalidated_instructions_;
304 
305   // Maintains the current state of the transform between calls to unroll.
306   LoopUnrollState state_;
307 
308   // An ordered list containing the loop basic blocks.
309   std::vector<BasicBlock*> loop_blocks_inorder_;
310 
311   // The block containing the condition check which contains a conditional
312   // branch to the merge and continue block.
313   BasicBlock* loop_condition_block_;
314 
315   // The induction variable of the loop.
316   Instruction* loop_induction_variable_;
317 
318   // Phis used in the loop need to be remapped to use the actual result values
319   // and then be remapped at the end.
320   std::vector<Instruction*> loop_phi_instructions_;
321 
322   // The number of loop iterations that the loop would perform pre-unroll.
323   size_t number_of_loop_iterations_;
324 
325   // The amount that the loop steps each iteration.
326   int64_t loop_step_value_;
327 
328   // The value the loop starts stepping from.
329   int64_t loop_init_value_;
330 };
331 
332 /*
333  * Static helper functions.
334  */
335 
336 // Retrieve the index of the OpPhi instruction |phi| which corresponds to the
337 // incoming |block| id.
GetPhiIndexFromLabel(const BasicBlock * block,const Instruction * phi)338 uint32_t GetPhiIndexFromLabel(const BasicBlock* block, const Instruction* phi) {
339   for (uint32_t i = 1; i < phi->NumInOperands(); i += 2) {
340     if (block->id() == phi->GetSingleWordInOperand(i)) {
341       return i;
342     }
343   }
344   assert(false && "Could not find operand in instruction.");
345   return 0;
346 }
347 
Init(Loop * loop)348 void LoopUnrollerUtilsImpl::Init(Loop* loop) {
349   loop_condition_block_ = loop->FindConditionBlock();
350 
351   // When we reinit the second loop during PartiallyUnrollResidualFactor we need
352   // to use the cached value from the duplicate step as the dominator tree
353   // basded solution, loop->FindConditionBlock, requires all the nodes to be
354   // connected up with the correct branches. They won't be at this point.
355   if (!loop_condition_block_) {
356     loop_condition_block_ = state_.new_condition_block;
357   }
358   assert(loop_condition_block_);
359 
360   loop_induction_variable_ = loop->FindConditionVariable(loop_condition_block_);
361   assert(loop_induction_variable_);
362 
363   bool found = loop->FindNumberOfIterations(
364       loop_induction_variable_, &*loop_condition_block_->ctail(),
365       &number_of_loop_iterations_, &loop_step_value_, &loop_init_value_);
366   (void)found;  // To silence unused variable warning on release builds.
367   assert(found);
368 
369   // Blocks are stored in an unordered set of ids in the loop class, we need to
370   // create the dominator ordered list.
371   ComputeLoopOrderedBlocks(loop);
372 }
373 
374 // This function is used to partially unroll the loop when the factor provided
375 // would normally lead to an illegal optimization. Instead of just unrolling the
376 // loop it creates two loops and unrolls one and adjusts the condition on the
377 // other. The end result being that the new loop pair iterates over the correct
378 // number of bodies.
PartiallyUnrollResidualFactor(Loop * loop,size_t factor)379 void LoopUnrollerUtilsImpl::PartiallyUnrollResidualFactor(Loop* loop,
380                                                           size_t factor) {
381   // TODO(1841): Handle id overflow.
382   std::unique_ptr<Instruction> new_label{new Instruction(
383       context_, spv::Op::OpLabel, 0, context_->TakeNextId(), {})};
384   std::unique_ptr<BasicBlock> new_exit_bb{new BasicBlock(std::move(new_label))};
385   new_exit_bb->SetParent(&function_);
386 
387   // Save the id of the block before we move it.
388   uint32_t new_merge_id = new_exit_bb->id();
389 
390   // Add the block the list of blocks to add, we want this merge block to be
391   // right at the start of the new blocks.
392   blocks_to_add_.push_back(std::move(new_exit_bb));
393   BasicBlock* new_exit_bb_raw = blocks_to_add_[0].get();
394   Instruction& original_conditional_branch = *loop_condition_block_->tail();
395   // Duplicate the loop, providing access to the blocks of both loops.
396   // This is a naked new due to the VS2013 requirement of not having unique
397   // pointers in vectors, as it will be inserted into a vector with
398   // loop_descriptor.AddLoop.
399   std::unique_ptr<Loop> new_loop = MakeUnique<Loop>(*loop);
400 
401   // Clear the basic blocks of the new loop.
402   new_loop->ClearBlocks();
403 
404   DuplicateLoop(loop, new_loop.get());
405 
406   // Add the blocks to the function.
407   AddBlocksToFunction(loop->GetMergeBlock());
408   blocks_to_add_.clear();
409 
410   // Create a new merge block for the first loop.
411   InstructionBuilder builder{context_, new_exit_bb_raw};
412   // Make the first loop branch to the second.
413   builder.AddBranch(new_loop->GetHeaderBlock()->id());
414 
415   loop_condition_block_ = state_.new_condition_block;
416   loop_induction_variable_ = state_.new_phi;
417   // Unroll the new loop by the factor with the usual -1 to account for the
418   // existing block iteration.
419   Unroll(new_loop.get(), factor);
420 
421   LinkLastPhisToStart(new_loop.get());
422   AddBlocksToLoop(new_loop.get());
423 
424   // Add the new merge block to the back of the list of blocks to be added. It
425   // needs to be the last block added to maintain dominator order in the binary.
426   blocks_to_add_.push_back(
427       std::unique_ptr<BasicBlock>(new_loop->GetMergeBlock()));
428 
429   // Add the blocks to the function.
430   AddBlocksToFunction(loop->GetMergeBlock());
431 
432   // Reset the usedef analysis.
433   context_->InvalidateAnalysesExceptFor(
434       IRContext::Analysis::kAnalysisLoopAnalysis);
435   analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr();
436 
437   // The loop condition.
438   Instruction* condition_check = def_use_manager->GetDef(
439       original_conditional_branch.GetSingleWordOperand(0));
440 
441   // This should have been checked by the LoopUtils::CanPerformUnroll function
442   // before entering this.
443   assert(loop->IsSupportedCondition(condition_check->opcode()));
444 
445   // We need to account for the initial body when calculating the remainder.
446   int64_t remainder = Loop::GetResidualConditionValue(
447       condition_check->opcode(), loop_init_value_, loop_step_value_,
448       number_of_loop_iterations_, factor);
449 
450   assert(remainder > std::numeric_limits<int32_t>::min() &&
451          remainder < std::numeric_limits<int32_t>::max());
452 
453   Instruction* new_constant = nullptr;
454 
455   // If the remainder is negative then we add a signed constant, otherwise just
456   // add an unsigned constant.
457   if (remainder < 0) {
458     new_constant = builder.GetSintConstant(static_cast<int32_t>(remainder));
459   } else {
460     new_constant = builder.GetUintConstant(static_cast<int32_t>(remainder));
461   }
462 
463   uint32_t constant_id = new_constant->result_id();
464 
465   // Update the condition check.
466   condition_check->SetInOperand(1, {constant_id});
467 
468   // Update the next phi node. The phi will have a constant value coming in from
469   // the preheader block. For the duplicated loop we need to update the constant
470   // to be the amount of iterations covered by the first loop and the incoming
471   // block to be the first loops new merge block.
472   std::vector<Instruction*> new_inductions;
473   new_loop->GetInductionVariables(new_inductions);
474 
475   std::vector<Instruction*> old_inductions;
476   loop->GetInductionVariables(old_inductions);
477   for (size_t index = 0; index < new_inductions.size(); ++index) {
478     Instruction* new_induction = new_inductions[index];
479     Instruction* old_induction = old_inductions[index];
480     // Get the index of the loop initalizer, the value coming in from the
481     // preheader.
482     uint32_t initalizer_index =
483         GetPhiIndexFromLabel(new_loop->GetPreHeaderBlock(), old_induction);
484 
485     // Replace the second loop initalizer with the phi from the first
486     new_induction->SetInOperand(initalizer_index - 1,
487                                 {old_induction->result_id()});
488     new_induction->SetInOperand(initalizer_index, {new_merge_id});
489 
490     // If the use of the first loop induction variable is outside of the loop
491     // then replace that use with the second loop induction variable.
492     uint32_t second_loop_induction = new_induction->result_id();
493     auto replace_use_outside_of_loop = [loop, second_loop_induction](
494                                            Instruction* user,
495                                            uint32_t operand_index) {
496       if (!loop->IsInsideLoop(user)) {
497         user->SetOperand(operand_index, {second_loop_induction});
498       }
499     };
500 
501     context_->get_def_use_mgr()->ForEachUse(old_induction,
502                                             replace_use_outside_of_loop);
503   }
504 
505   context_->InvalidateAnalysesExceptFor(
506       IRContext::Analysis::kAnalysisLoopAnalysis);
507 
508   context_->ReplaceAllUsesWith(loop->GetMergeBlock()->id(), new_merge_id);
509 
510   LoopDescriptor& loop_descriptor = *context_->GetLoopDescriptor(&function_);
511 
512   loop_descriptor.AddLoop(std::move(new_loop), loop->GetParent());
513 
514   RemoveDeadInstructions();
515 }
516 
517 // Mark this loop as DontUnroll as it will already be unrolled and it may not
518 // be safe to unroll a previously partially unrolled loop.
MarkLoopControlAsDontUnroll(Loop * loop) const519 void LoopUnrollerUtilsImpl::MarkLoopControlAsDontUnroll(Loop* loop) const {
520   Instruction* loop_merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
521   assert(loop_merge_inst &&
522          "Loop merge instruction could not be found after entering unroller "
523          "(should have exited before this)");
524   loop_merge_inst->SetInOperand(kLoopControlIndex,
525                                 {kLoopControlDontUnrollIndex});
526 }
527 
528 // Duplicate the |loop| body |factor| - 1 number of times while keeping the loop
529 // backedge intact. This will leave the loop with |factor| number of bodies
530 // after accounting for the initial body.
Unroll(Loop * loop,size_t factor)531 void LoopUnrollerUtilsImpl::Unroll(Loop* loop, size_t factor) {
532   // If we unroll a loop partially it will not be safe to unroll it further.
533   // This is due to the current method of calculating the number of loop
534   // iterations.
535   MarkLoopControlAsDontUnroll(loop);
536 
537   std::vector<Instruction*> inductions;
538   loop->GetInductionVariables(inductions);
539   state_ = LoopUnrollState{loop_induction_variable_, loop->GetLatchBlock(),
540                            loop_condition_block_, std::move(inductions)};
541   for (size_t i = 0; i < factor - 1; ++i) {
542     CopyBody(loop, true);
543   }
544 }
545 
RemoveDeadInstructions()546 void LoopUnrollerUtilsImpl::RemoveDeadInstructions() {
547   // Remove the dead instructions.
548   for (Instruction* inst : invalidated_instructions_) {
549     context_->KillInst(inst);
550   }
551 }
552 
ReplaceInductionUseWithFinalValue(Loop * loop)553 void LoopUnrollerUtilsImpl::ReplaceInductionUseWithFinalValue(Loop* loop) {
554   context_->InvalidateAnalysesExceptFor(
555       IRContext::Analysis::kAnalysisLoopAnalysis |
556       IRContext::Analysis::kAnalysisDefUse |
557       IRContext::Analysis::kAnalysisInstrToBlockMapping);
558 
559   std::vector<Instruction*> inductions;
560   loop->GetInductionVariables(inductions);
561 
562   for (size_t index = 0; index < inductions.size(); ++index) {
563     uint32_t trip_step_id = GetPhiDefID(state_.previous_phis_[index],
564                                         state_.previous_latch_block_->id());
565     context_->ReplaceAllUsesWith(inductions[index]->result_id(), trip_step_id);
566     invalidated_instructions_.push_back(inductions[index]);
567   }
568 }
569 
570 // Fully unroll the loop by partially unrolling it by the number of loop
571 // iterations minus one for the body already accounted for.
FullyUnroll(Loop * loop)572 void LoopUnrollerUtilsImpl::FullyUnroll(Loop* loop) {
573   // We unroll the loop by number of iterations in the loop.
574   Unroll(loop, number_of_loop_iterations_);
575 
576   // The first condition block is preserved until now so it can be copied.
577   FoldConditionBlock(loop_condition_block_, 1);
578 
579   // Delete the OpLoopMerge and remove the backedge to the header.
580   CloseUnrolledLoop(loop);
581 
582   // Mark the loop for later deletion. This allows us to preserve the loop
583   // iterators but still disregard dead loops.
584   loop->MarkLoopForRemoval();
585 
586   // If the loop has a parent add the new blocks to the parent.
587   if (loop->GetParent()) {
588     AddBlocksToLoop(loop->GetParent());
589   }
590 
591   // Add the blocks to the function.
592   AddBlocksToFunction(loop->GetMergeBlock());
593 
594   ReplaceInductionUseWithFinalValue(loop);
595 
596   RemoveDeadInstructions();
597   // Invalidate all analyses.
598   context_->InvalidateAnalysesExceptFor(
599       IRContext::Analysis::kAnalysisLoopAnalysis |
600       IRContext::Analysis::kAnalysisDefUse);
601 }
602 
KillDebugDeclares(BasicBlock * bb)603 void LoopUnrollerUtilsImpl::KillDebugDeclares(BasicBlock* bb) {
604   // We cannot kill an instruction inside BasicBlock::ForEachInst()
605   // because it will generate dangling pointers. We use |to_be_killed|
606   // to kill them after the loop.
607   std::vector<Instruction*> to_be_killed;
608 
609   bb->ForEachInst([&to_be_killed, this](Instruction* inst) {
610     if (context_->get_debug_info_mgr()->IsDebugDeclare(inst)) {
611       to_be_killed.push_back(inst);
612     }
613   });
614   for (auto* inst : to_be_killed) context_->KillInst(inst);
615 }
616 
617 // Copy a given basic block, give it a new result_id, and store the new block
618 // and the id mapping in the state. |preserve_instructions| is used to determine
619 // whether or not this function should edit instructions other than the
620 // |result_id|.
CopyBasicBlock(Loop * loop,const BasicBlock * itr,bool preserve_instructions)621 void LoopUnrollerUtilsImpl::CopyBasicBlock(Loop* loop, const BasicBlock* itr,
622                                            bool preserve_instructions) {
623   // Clone the block exactly, including the IDs.
624   BasicBlock* basic_block = itr->Clone(context_);
625   basic_block->SetParent(itr->GetParent());
626 
627   // We do not want to duplicate DebugDeclare.
628   KillDebugDeclares(basic_block);
629 
630   // Assign each result a new unique ID and keep a mapping of the old ids to
631   // the new ones.
632   AssignNewResultIds(basic_block);
633 
634   // If this is the continue block we are copying.
635   if (itr == loop->GetContinueBlock()) {
636     // Make the OpLoopMerge point to this block for the continue.
637     if (!preserve_instructions) {
638       Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
639       merge_inst->SetInOperand(1, {basic_block->id()});
640       context_->UpdateDefUse(merge_inst);
641     }
642 
643     state_.new_continue_block = basic_block;
644   }
645 
646   // If this is the header block we are copying.
647   if (itr == loop->GetHeaderBlock()) {
648     state_.new_header_block = basic_block;
649 
650     if (!preserve_instructions) {
651       // Remove the loop merge instruction if it exists.
652       Instruction* merge_inst = basic_block->GetLoopMergeInst();
653       if (merge_inst) invalidated_instructions_.push_back(merge_inst);
654     }
655   }
656 
657   // If this is the latch block being copied, record it in the state.
658   if (itr == loop->GetLatchBlock()) state_.new_latch_block = basic_block;
659 
660   // If this is the condition block we are copying.
661   if (itr == loop_condition_block_) {
662     state_.new_condition_block = basic_block;
663   }
664 
665   // Add this block to the list of blocks to add to the function at the end of
666   // the unrolling process.
667   blocks_to_add_.push_back(std::unique_ptr<BasicBlock>(basic_block));
668 
669   // Keep tracking the old block via a map.
670   state_.new_blocks[itr->id()] = basic_block;
671 }
672 
CopyBody(Loop * loop,bool eliminate_conditions)673 void LoopUnrollerUtilsImpl::CopyBody(Loop* loop, bool eliminate_conditions) {
674   // Copy each basic block in the loop, give them new ids, and save state
675   // information.
676   for (const BasicBlock* itr : loop_blocks_inorder_) {
677     CopyBasicBlock(loop, itr, false);
678   }
679 
680   // Set the previous latch block to point to the new header.
681   Instruction* latch_branch = state_.previous_latch_block_->terminator();
682   latch_branch->SetInOperand(0, {state_.new_header_block->id()});
683   context_->UpdateDefUse(latch_branch);
684 
685   // As the algorithm copies the original loop blocks exactly, the tail of the
686   // latch block on iterations after the first one will be a branch to the new
687   // header and not the actual loop header. The last continue block in the loop
688   // should always be a backedge to the global header.
689   Instruction* new_latch_branch = state_.new_latch_block->terminator();
690   new_latch_branch->SetInOperand(0, {loop->GetHeaderBlock()->id()});
691   context_->AnalyzeUses(new_latch_branch);
692 
693   std::vector<Instruction*> inductions;
694   loop->GetInductionVariables(inductions);
695   for (size_t index = 0; index < inductions.size(); ++index) {
696     Instruction* primary_copy = inductions[index];
697 
698     assert(primary_copy->result_id() != 0);
699     Instruction* induction_clone =
700         state_.ids_to_new_inst[state_.new_inst[primary_copy->result_id()]];
701 
702     state_.new_phis_.push_back(induction_clone);
703     assert(induction_clone->result_id() != 0);
704 
705     if (!state_.previous_phis_.empty()) {
706       state_.new_inst[primary_copy->result_id()] = GetPhiDefID(
707           state_.previous_phis_[index], state_.previous_latch_block_->id());
708     } else {
709       // Do not replace the first phi block ids.
710       state_.new_inst[primary_copy->result_id()] = primary_copy->result_id();
711     }
712   }
713 
714   if (eliminate_conditions &&
715       state_.new_condition_block != loop_condition_block_) {
716     FoldConditionBlock(state_.new_condition_block, 1);
717   }
718 
719   // Only reference to the header block is the backedge in the latch block,
720   // don't change this.
721   state_.new_inst[loop->GetHeaderBlock()->id()] = loop->GetHeaderBlock()->id();
722 
723   for (auto& pair : state_.new_blocks) {
724     RemapOperands(pair.second);
725   }
726 
727   for (Instruction* dead_phi : state_.new_phis_)
728     invalidated_instructions_.push_back(dead_phi);
729 
730   // Swap the state so the new is now the previous.
731   state_.NextIterationState();
732 }
733 
GetPhiDefID(const Instruction * phi,uint32_t label) const734 uint32_t LoopUnrollerUtilsImpl::GetPhiDefID(const Instruction* phi,
735                                             uint32_t label) const {
736   for (uint32_t operand = 3; operand < phi->NumOperands(); operand += 2) {
737     if (phi->GetSingleWordOperand(operand) == label) {
738       return phi->GetSingleWordOperand(operand - 1);
739     }
740   }
741   assert(false && "Could not find a phi index matching the provided label");
742   return 0;
743 }
744 
FoldConditionBlock(BasicBlock * condition_block,uint32_t operand_label)745 void LoopUnrollerUtilsImpl::FoldConditionBlock(BasicBlock* condition_block,
746                                                uint32_t operand_label) {
747   // Remove the old conditional branch to the merge and continue blocks.
748   Instruction& old_branch = *condition_block->tail();
749   uint32_t new_target = old_branch.GetSingleWordOperand(operand_label);
750 
751   DebugScope scope = old_branch.GetDebugScope();
752   const std::vector<Instruction> lines = old_branch.dbg_line_insts();
753 
754   context_->KillInst(&old_branch);
755   // Add the new unconditional branch to the merge block.
756   InstructionBuilder builder(
757       context_, condition_block,
758       IRContext::Analysis::kAnalysisDefUse |
759           IRContext::Analysis::kAnalysisInstrToBlockMapping);
760   Instruction* new_branch = builder.AddBranch(new_target);
761 
762   if (!lines.empty()) new_branch->AddDebugLine(&lines.back());
763   new_branch->SetDebugScope(scope);
764 }
765 
CloseUnrolledLoop(Loop * loop)766 void LoopUnrollerUtilsImpl::CloseUnrolledLoop(Loop* loop) {
767   // Remove the OpLoopMerge instruction from the function.
768   Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
769   invalidated_instructions_.push_back(merge_inst);
770 
771   // Remove the final backedge to the header and make it point instead to the
772   // merge block.
773   Instruction* latch_instruction = state_.previous_latch_block_->terminator();
774   latch_instruction->SetInOperand(0, {loop->GetMergeBlock()->id()});
775   context_->UpdateDefUse(latch_instruction);
776 
777   // Remove all induction variables as the phis will now be invalid. Replace all
778   // uses with the constant initializer value (all uses of phis will be in
779   // the first iteration with the subsequent phis already having been removed).
780   std::vector<Instruction*> inductions;
781   loop->GetInductionVariables(inductions);
782 
783   // We can use the state instruction mechanism to replace all internal loop
784   // values within the first loop trip (as the subsequent ones will be updated
785   // by the copy function) with the value coming in from the preheader and then
786   // use context ReplaceAllUsesWith for the uses outside the loop with the final
787   // trip phi value.
788   state_.new_inst.clear();
789   for (Instruction* induction : inductions) {
790     uint32_t initalizer_id =
791         GetPhiDefID(induction, loop->GetPreHeaderBlock()->id());
792 
793     state_.new_inst[induction->result_id()] = initalizer_id;
794   }
795 
796   for (BasicBlock* block : loop_blocks_inorder_) {
797     RemapOperands(block);
798   }
799   for (auto& block_itr : blocks_to_add_) {
800     RemapOperands(block_itr.get());
801   }
802 
803   // Rewrite the last phis, since they may still reference the original phi.
804   for (Instruction* last_phi : state_.previous_phis_) {
805     RemapOperands(last_phi);
806   }
807 }
808 
809 // Uses the first loop to create a copy of the loop with new IDs.
DuplicateLoop(Loop * old_loop,Loop * new_loop)810 void LoopUnrollerUtilsImpl::DuplicateLoop(Loop* old_loop, Loop* new_loop) {
811   std::vector<BasicBlock*> new_block_order;
812 
813   // Copy every block in the old loop.
814   for (const BasicBlock* itr : loop_blocks_inorder_) {
815     CopyBasicBlock(old_loop, itr, true);
816     new_block_order.push_back(blocks_to_add_.back().get());
817   }
818 
819   // Clone the merge block, give it a new id and record it in the state.
820   BasicBlock* new_merge = old_loop->GetMergeBlock()->Clone(context_);
821   new_merge->SetParent(old_loop->GetMergeBlock()->GetParent());
822   AssignNewResultIds(new_merge);
823   state_.new_blocks[old_loop->GetMergeBlock()->id()] = new_merge;
824 
825   // Remap the operands of every instruction in the loop to point to the new
826   // copies.
827   for (auto& pair : state_.new_blocks) {
828     RemapOperands(pair.second);
829   }
830 
831   loop_blocks_inorder_ = std::move(new_block_order);
832 
833   AddBlocksToLoop(new_loop);
834 
835   new_loop->SetHeaderBlock(state_.new_header_block);
836   new_loop->SetContinueBlock(state_.new_continue_block);
837   new_loop->SetLatchBlock(state_.new_latch_block);
838   new_loop->SetMergeBlock(new_merge);
839 }
840 
841 // Whenever the utility copies a block it stores it in a temporary buffer, this
842 // function adds the buffer into the Function. The blocks will be inserted
843 // after the block |insert_point|.
AddBlocksToFunction(const BasicBlock * insert_point)844 void LoopUnrollerUtilsImpl::AddBlocksToFunction(
845     const BasicBlock* insert_point) {
846   for (auto basic_block_iterator = function_.begin();
847        basic_block_iterator != function_.end(); ++basic_block_iterator) {
848     if (basic_block_iterator->id() == insert_point->id()) {
849       basic_block_iterator.InsertBefore(&blocks_to_add_);
850       return;
851     }
852   }
853 
854   assert(
855       false &&
856       "Could not add basic blocks to function as insert point was not found.");
857 }
858 
859 // Assign all result_ids in |basic_block| instructions to new IDs and preserve
860 // the mapping of new ids to old ones.
AssignNewResultIds(BasicBlock * basic_block)861 void LoopUnrollerUtilsImpl::AssignNewResultIds(BasicBlock* basic_block) {
862   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
863 
864   // Label instructions aren't covered by normal traversal of the
865   // instructions.
866   // TODO(1841): Handle id overflow.
867   uint32_t new_label_id = context_->TakeNextId();
868 
869   // Assign a new id to the label.
870   state_.new_inst[basic_block->GetLabelInst()->result_id()] = new_label_id;
871   basic_block->GetLabelInst()->SetResultId(new_label_id);
872   def_use_mgr->AnalyzeInstDefUse(basic_block->GetLabelInst());
873 
874   for (Instruction& inst : *basic_block) {
875     // Do def/use analysis on new lines
876     for (auto& line : inst.dbg_line_insts())
877       def_use_mgr->AnalyzeInstDefUse(&line);
878 
879     uint32_t old_id = inst.result_id();
880 
881     // Ignore stores etc.
882     if (old_id == 0) {
883       continue;
884     }
885 
886     // Give the instruction a new id.
887     // TODO(1841): Handle id overflow.
888     inst.SetResultId(context_->TakeNextId());
889     def_use_mgr->AnalyzeInstDef(&inst);
890 
891     // Save the mapping of old_id -> new_id.
892     state_.new_inst[old_id] = inst.result_id();
893     // Check if this instruction is the induction variable.
894     if (loop_induction_variable_->result_id() == old_id) {
895       // Save a pointer to the new copy of it.
896       state_.new_phi = &inst;
897     }
898     state_.ids_to_new_inst[inst.result_id()] = &inst;
899   }
900 }
901 
RemapOperands(Instruction * inst)902 void LoopUnrollerUtilsImpl::RemapOperands(Instruction* inst) {
903   auto remap_operands_to_new_ids = [this](uint32_t* id) {
904     auto itr = state_.new_inst.find(*id);
905 
906     if (itr != state_.new_inst.end()) {
907       *id = itr->second;
908     }
909   };
910 
911   inst->ForEachInId(remap_operands_to_new_ids);
912   context_->AnalyzeUses(inst);
913 }
914 
RemapOperands(BasicBlock * basic_block)915 void LoopUnrollerUtilsImpl::RemapOperands(BasicBlock* basic_block) {
916   for (Instruction& inst : *basic_block) {
917     RemapOperands(&inst);
918   }
919 }
920 
921 // Generate the ordered list of basic blocks in the |loop| and cache it for
922 // later use.
ComputeLoopOrderedBlocks(Loop * loop)923 void LoopUnrollerUtilsImpl::ComputeLoopOrderedBlocks(Loop* loop) {
924   loop_blocks_inorder_.clear();
925   loop->ComputeLoopStructuredOrder(&loop_blocks_inorder_);
926 }
927 
928 // Adds the blocks_to_add_ to both the loop and to the parent.
AddBlocksToLoop(Loop * loop) const929 void LoopUnrollerUtilsImpl::AddBlocksToLoop(Loop* loop) const {
930   // Add the blocks to this loop.
931   for (auto& block_itr : blocks_to_add_) {
932     loop->AddBasicBlock(block_itr.get());
933   }
934 
935   // Add the blocks to the parent as well.
936   if (loop->GetParent()) AddBlocksToLoop(loop->GetParent());
937 }
938 
LinkLastPhisToStart(Loop * loop) const939 void LoopUnrollerUtilsImpl::LinkLastPhisToStart(Loop* loop) const {
940   std::vector<Instruction*> inductions;
941   loop->GetInductionVariables(inductions);
942 
943   for (size_t i = 0; i < inductions.size(); ++i) {
944     Instruction* last_phi_in_block = state_.previous_phis_[i];
945 
946     uint32_t phi_index =
947         GetPhiIndexFromLabel(state_.previous_latch_block_, last_phi_in_block);
948     uint32_t phi_variable =
949         last_phi_in_block->GetSingleWordInOperand(phi_index - 1);
950     uint32_t phi_label = last_phi_in_block->GetSingleWordInOperand(phi_index);
951 
952     Instruction* phi = inductions[i];
953     phi->SetInOperand(phi_index - 1, {phi_variable});
954     phi->SetInOperand(phi_index, {phi_label});
955   }
956 }
957 
958 // Duplicate the |loop| body |factor| number of times while keeping the loop
959 // backedge intact.
PartiallyUnroll(Loop * loop,size_t factor)960 void LoopUnrollerUtilsImpl::PartiallyUnroll(Loop* loop, size_t factor) {
961   Unroll(loop, factor);
962   LinkLastPhisToStart(loop);
963   AddBlocksToLoop(loop);
964   AddBlocksToFunction(loop->GetMergeBlock());
965   RemoveDeadInstructions();
966 }
967 
968 /*
969  * End LoopUtilsImpl.
970  */
971 
972 }  // namespace
973 
974 /*
975  *
976  *  Begin Utils.
977  *
978  * */
979 
CanPerformUnroll()980 bool LoopUtils::CanPerformUnroll() {
981   // The loop is expected to be in structured order.
982   if (!loop_->GetHeaderBlock()->GetMergeInst()) {
983     return false;
984   }
985 
986   // Find check the loop has a condition we can find and evaluate.
987   const BasicBlock* condition = loop_->FindConditionBlock();
988   if (!condition) return false;
989 
990   // Check that we can find and process the induction variable.
991   const Instruction* induction = loop_->FindConditionVariable(condition);
992   if (!induction || induction->opcode() != spv::Op::OpPhi) return false;
993 
994   // Check that we can find the number of loop iterations.
995   if (!loop_->FindNumberOfIterations(induction, &*condition->ctail(), nullptr))
996     return false;
997 
998 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
999   // ClusterFuzz/OSS-Fuzz is likely to yield examples with very high loop
1000   // iteration counts. This can cause timeouts and memouts during fuzzing that
1001   // are not classed as bugs. To avoid this noise, loop unrolling is not applied
1002   // to loops with large iteration counts when fuzzing.
1003   constexpr size_t kFuzzerIterationLimit = 100;
1004   size_t num_iterations;
1005   loop_->FindNumberOfIterations(induction, &*condition->ctail(),
1006                                 &num_iterations);
1007   if (num_iterations > kFuzzerIterationLimit) {
1008     return false;
1009   }
1010 #endif
1011 
1012   // Make sure the latch block is a unconditional branch to the header
1013   // block.
1014   const Instruction& branch = *loop_->GetLatchBlock()->ctail();
1015   bool branching_assumption =
1016       branch.opcode() == spv::Op::OpBranch &&
1017       branch.GetSingleWordInOperand(0) == loop_->GetHeaderBlock()->id();
1018   if (!branching_assumption) {
1019     return false;
1020   }
1021 
1022   std::vector<Instruction*> inductions;
1023   loop_->GetInductionVariables(inductions);
1024 
1025   // Ban breaks within the loop.
1026   const std::vector<uint32_t>& merge_block_preds =
1027       context_->cfg()->preds(loop_->GetMergeBlock()->id());
1028   if (merge_block_preds.size() != 1) {
1029     return false;
1030   }
1031 
1032   // Ban continues within the loop.
1033   const std::vector<uint32_t>& continue_block_preds =
1034       context_->cfg()->preds(loop_->GetContinueBlock()->id());
1035   if (continue_block_preds.size() != 1) {
1036     return false;
1037   }
1038 
1039   // Ban returns in the loop.
1040   // Iterate over all the blocks within the loop and check that none of them
1041   // exit the loop.
1042   for (uint32_t label_id : loop_->GetBlocks()) {
1043     const BasicBlock* block = context_->cfg()->block(label_id);
1044     if (block->ctail()->opcode() == spv::Op::OpKill ||
1045         block->ctail()->opcode() == spv::Op::OpReturn ||
1046         block->ctail()->opcode() == spv::Op::OpReturnValue ||
1047         block->ctail()->opcode() == spv::Op::OpTerminateInvocation) {
1048       return false;
1049     }
1050   }
1051   // Can only unroll inner loops.
1052   if (!loop_->AreAllChildrenMarkedForRemoval()) {
1053     return false;
1054   }
1055 
1056   return true;
1057 }
1058 
PartiallyUnroll(size_t factor)1059 bool LoopUtils::PartiallyUnroll(size_t factor) {
1060   if (factor == 1 || !CanPerformUnroll()) return false;
1061 
1062   // Create the unroller utility.
1063   LoopUnrollerUtilsImpl unroller{context_,
1064                                  loop_->GetHeaderBlock()->GetParent()};
1065   unroller.Init(loop_);
1066 
1067   // If the unrolling factor is larger than or the same size as the loop just
1068   // fully unroll the loop.
1069   if (factor >= unroller.GetLoopIterationCount()) {
1070     unroller.FullyUnroll(loop_);
1071     return true;
1072   }
1073 
1074   // If the loop unrolling factor is an residual number of iterations we need to
1075   // let run the loop for the residual part then let it branch into the unrolled
1076   // remaining part. We add one when calucating the remainder to take into
1077   // account the one iteration already in the loop.
1078   if (unroller.GetLoopIterationCount() % factor != 0) {
1079     unroller.PartiallyUnrollResidualFactor(loop_, factor);
1080   } else {
1081     unroller.PartiallyUnroll(loop_, factor);
1082   }
1083 
1084   return true;
1085 }
1086 
FullyUnroll()1087 bool LoopUtils::FullyUnroll() {
1088   if (!CanPerformUnroll()) return false;
1089 
1090   std::vector<Instruction*> inductions;
1091   loop_->GetInductionVariables(inductions);
1092 
1093   LoopUnrollerUtilsImpl unroller{context_,
1094                                  loop_->GetHeaderBlock()->GetParent()};
1095 
1096   unroller.Init(loop_);
1097   unroller.FullyUnroll(loop_);
1098 
1099   return true;
1100 }
1101 
Finalize()1102 void LoopUtils::Finalize() {
1103   // Clean up the loop descriptor to preserve the analysis.
1104 
1105   LoopDescriptor* LD = context_->GetLoopDescriptor(&function_);
1106   LD->PostModificationCleanup();
1107 }
1108 
1109 /*
1110  *
1111  * Begin Pass.
1112  *
1113  */
1114 
Process()1115 Pass::Status LoopUnroller::Process() {
1116   bool changed = false;
1117   for (Function& f : *context()->module()) {
1118     if (f.IsDeclaration()) {
1119       continue;
1120     }
1121 
1122     LoopDescriptor* LD = context()->GetLoopDescriptor(&f);
1123     for (Loop& loop : *LD) {
1124       LoopUtils loop_utils{context(), &loop};
1125       if (!loop.HasUnrollLoopControl() || !loop_utils.CanPerformUnroll()) {
1126         continue;
1127       }
1128 
1129       if (fully_unroll_) {
1130         loop_utils.FullyUnroll();
1131       } else {
1132         loop_utils.PartiallyUnroll(unroll_factor_);
1133       }
1134       changed = true;
1135     }
1136     LD->PostModificationCleanup();
1137   }
1138 
1139   return changed ? Status::SuccessWithChange : Status::SuccessWithoutChange;
1140 }
1141 
1142 }  // namespace opt
1143 }  // namespace spvtools
1144