1 // Copyright (c) 2018 Google LLC.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <algorithm>
16 #include <functional>
17 #include <memory>
18 #include <unordered_map>
19 #include <unordered_set>
20 #include <vector>
21
22 #include "source/opt/ir_builder.h"
23 #include "source/opt/ir_context.h"
24 #include "source/opt/loop_descriptor.h"
25 #include "source/opt/loop_peeling.h"
26 #include "source/opt/loop_utils.h"
27 #include "source/opt/scalar_analysis.h"
28 #include "source/opt/scalar_analysis_nodes.h"
29
30 namespace spvtools {
31 namespace opt {
32 namespace {
33 // Gather the set of blocks for all the path from |entry| to |root|.
GetBlocksInPath(uint32_t block,uint32_t entry,std::unordered_set<uint32_t> * blocks_in_path,const CFG & cfg)34 void GetBlocksInPath(uint32_t block, uint32_t entry,
35 std::unordered_set<uint32_t>* blocks_in_path,
36 const CFG& cfg) {
37 for (uint32_t pid : cfg.preds(block)) {
38 if (blocks_in_path->insert(pid).second) {
39 if (pid != entry) {
40 GetBlocksInPath(pid, entry, blocks_in_path, cfg);
41 }
42 }
43 }
44 }
45 } // namespace
46
47 size_t LoopPeelingPass::code_grow_threshold_ = 1000;
48
DuplicateAndConnectLoop(LoopUtils::LoopCloningResult * clone_results)49 void LoopPeeling::DuplicateAndConnectLoop(
50 LoopUtils::LoopCloningResult* clone_results) {
51 CFG& cfg = *context_->cfg();
52 analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
53
54 assert(CanPeelLoop() && "Cannot peel loop!");
55
56 std::vector<BasicBlock*> ordered_loop_blocks;
57 // TODO(1841): Handle failure to create pre-header.
58 BasicBlock* pre_header = loop_->GetOrCreatePreHeaderBlock();
59
60 loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks);
61
62 cloned_loop_ = loop_utils_.CloneLoop(clone_results, ordered_loop_blocks);
63
64 // Add the basic block to the function.
65 Function::iterator it =
66 loop_utils_.GetFunction()->FindBlock(pre_header->id());
67 assert(it != loop_utils_.GetFunction()->end() &&
68 "Pre-header not found in the function.");
69 loop_utils_.GetFunction()->AddBasicBlocks(
70 clone_results->cloned_bb_.begin(), clone_results->cloned_bb_.end(), ++it);
71
72 // Make the |loop_|'s preheader the |cloned_loop_| one.
73 BasicBlock* cloned_header = cloned_loop_->GetHeaderBlock();
74 pre_header->ForEachSuccessorLabel(
75 [cloned_header](uint32_t* succ) { *succ = cloned_header->id(); });
76
77 // Update cfg.
78 cfg.RemoveEdge(pre_header->id(), loop_->GetHeaderBlock()->id());
79 cloned_loop_->SetPreHeaderBlock(pre_header);
80 loop_->SetPreHeaderBlock(nullptr);
81
82 // When cloning the loop, we didn't cloned the merge block, so currently
83 // |cloned_loop_| shares the same block as |loop_|.
84 // We mutate all branches from |cloned_loop_| block to |loop_|'s merge into a
85 // branch to |loop_|'s header (so header will also be the merge of
86 // |cloned_loop_|).
87 uint32_t cloned_loop_exit = 0;
88 for (uint32_t pred_id : cfg.preds(loop_->GetMergeBlock()->id())) {
89 if (loop_->IsInsideLoop(pred_id)) continue;
90 BasicBlock* bb = cfg.block(pred_id);
91 assert(cloned_loop_exit == 0 && "The loop has multiple exits.");
92 cloned_loop_exit = bb->id();
93 bb->ForEachSuccessorLabel([this](uint32_t* succ) {
94 if (*succ == loop_->GetMergeBlock()->id())
95 *succ = loop_->GetHeaderBlock()->id();
96 });
97 }
98
99 // Update cfg.
100 cfg.RemoveNonExistingEdges(loop_->GetMergeBlock()->id());
101 cfg.AddEdge(cloned_loop_exit, loop_->GetHeaderBlock()->id());
102
103 // Patch the phi of the original loop header:
104 // - Set the loop entry branch to come from the cloned loop exit block;
105 // - Set the initial value of the phi using the corresponding cloned loop
106 // exit values.
107 //
108 // We patch the iterating value initializers of the original loop using the
109 // corresponding cloned loop exit values. Connects the cloned loop iterating
110 // values to the original loop. This make sure that the initial value of the
111 // second loop starts with the last value of the first loop.
112 //
113 // For example, loops like:
114 //
115 // int z = 0;
116 // for (int i = 0; i++ < M; i += cst1) {
117 // if (cond)
118 // z += cst2;
119 // }
120 //
121 // Will become:
122 //
123 // int z = 0;
124 // int i = 0;
125 // for (; i++ < M; i += cst1) {
126 // if (cond)
127 // z += cst2;
128 // }
129 // for (; i++ < M; i += cst1) {
130 // if (cond)
131 // z += cst2;
132 // }
133 loop_->GetHeaderBlock()->ForEachPhiInst([cloned_loop_exit, def_use_mgr,
134 clone_results,
135 this](Instruction* phi) {
136 for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
137 if (!loop_->IsInsideLoop(phi->GetSingleWordInOperand(i + 1))) {
138 phi->SetInOperand(i,
139 {clone_results->value_map_.at(
140 exit_value_.at(phi->result_id())->result_id())});
141 phi->SetInOperand(i + 1, {cloned_loop_exit});
142 def_use_mgr->AnalyzeInstUse(phi);
143 return;
144 }
145 }
146 });
147
148 // Force the creation of a new preheader for the original loop and set it as
149 // the merge block for the cloned loop.
150 // TODO(1841): Handle failure to create pre-header.
151 cloned_loop_->SetMergeBlock(loop_->GetOrCreatePreHeaderBlock());
152 }
153
InsertCanonicalInductionVariable(LoopUtils::LoopCloningResult * clone_results)154 void LoopPeeling::InsertCanonicalInductionVariable(
155 LoopUtils::LoopCloningResult* clone_results) {
156 if (original_loop_canonical_induction_variable_) {
157 canonical_induction_variable_ =
158 context_->get_def_use_mgr()->GetDef(clone_results->value_map_.at(
159 original_loop_canonical_induction_variable_->result_id()));
160 return;
161 }
162
163 BasicBlock::iterator insert_point = GetClonedLoop()->GetLatchBlock()->tail();
164 if (GetClonedLoop()->GetLatchBlock()->GetMergeInst()) {
165 --insert_point;
166 }
167 InstructionBuilder builder(
168 context_, &*insert_point,
169 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
170 Instruction* uint_1_cst =
171 builder.GetIntConstant<uint32_t>(1, int_type_->IsSigned());
172 // Create the increment.
173 // Note that we do "1 + 1" here, one of the operand should the phi
174 // value but we don't have it yet. The operand will be set latter.
175 Instruction* iv_inc = builder.AddIAdd(
176 uint_1_cst->type_id(), uint_1_cst->result_id(), uint_1_cst->result_id());
177
178 builder.SetInsertPoint(&*GetClonedLoop()->GetHeaderBlock()->begin());
179
180 canonical_induction_variable_ = builder.AddPhi(
181 uint_1_cst->type_id(),
182 {builder.GetIntConstant<uint32_t>(0, int_type_->IsSigned())->result_id(),
183 GetClonedLoop()->GetPreHeaderBlock()->id(), iv_inc->result_id(),
184 GetClonedLoop()->GetLatchBlock()->id()});
185 // Connect everything.
186 iv_inc->SetInOperand(0, {canonical_induction_variable_->result_id()});
187
188 // Update def/use manager.
189 context_->get_def_use_mgr()->AnalyzeInstUse(iv_inc);
190
191 // If do-while form, use the incremented value.
192 if (do_while_form_) {
193 canonical_induction_variable_ = iv_inc;
194 }
195 }
196
GetIteratorUpdateOperations(const Loop * loop,Instruction * iterator,std::unordered_set<Instruction * > * operations)197 void LoopPeeling::GetIteratorUpdateOperations(
198 const Loop* loop, Instruction* iterator,
199 std::unordered_set<Instruction*>* operations) {
200 analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
201 operations->insert(iterator);
202 iterator->ForEachInId([def_use_mgr, loop, operations, this](uint32_t* id) {
203 Instruction* insn = def_use_mgr->GetDef(*id);
204 if (insn->opcode() == spv::Op::OpLabel) {
205 return;
206 }
207 if (operations->count(insn)) {
208 return;
209 }
210 if (!loop->IsInsideLoop(insn)) {
211 return;
212 }
213 GetIteratorUpdateOperations(loop, insn, operations);
214 });
215 }
216
IsConditionCheckSideEffectFree() const217 bool LoopPeeling::IsConditionCheckSideEffectFree() const {
218 CFG& cfg = *context_->cfg();
219
220 // The "do-while" form does not cause issues, the algorithm takes into account
221 // the first iteration.
222 if (!do_while_form_) {
223 uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0];
224
225 std::unordered_set<uint32_t> blocks_in_path;
226
227 blocks_in_path.insert(condition_block_id);
228 GetBlocksInPath(condition_block_id, loop_->GetHeaderBlock()->id(),
229 &blocks_in_path, cfg);
230
231 for (uint32_t bb_id : blocks_in_path) {
232 BasicBlock* bb = cfg.block(bb_id);
233 if (!bb->WhileEachInst([this](Instruction* insn) {
234 if (insn->IsBranch()) return true;
235 switch (insn->opcode()) {
236 case spv::Op::OpLabel:
237 case spv::Op::OpSelectionMerge:
238 case spv::Op::OpLoopMerge:
239 return true;
240 default:
241 break;
242 }
243 return context_->IsCombinatorInstruction(insn);
244 })) {
245 return false;
246 }
247 }
248 }
249
250 return true;
251 }
252
GetIteratingExitValues()253 void LoopPeeling::GetIteratingExitValues() {
254 CFG& cfg = *context_->cfg();
255
256 loop_->GetHeaderBlock()->ForEachPhiInst(
257 [this](Instruction* phi) { exit_value_[phi->result_id()] = nullptr; });
258
259 if (!loop_->GetMergeBlock()) {
260 return;
261 }
262 if (cfg.preds(loop_->GetMergeBlock()->id()).size() != 1) {
263 return;
264 }
265 analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
266
267 uint32_t condition_block_id = cfg.preds(loop_->GetMergeBlock()->id())[0];
268
269 auto& header_pred = cfg.preds(loop_->GetHeaderBlock()->id());
270 do_while_form_ = std::find(header_pred.begin(), header_pred.end(),
271 condition_block_id) != header_pred.end();
272 if (do_while_form_) {
273 loop_->GetHeaderBlock()->ForEachPhiInst(
274 [condition_block_id, def_use_mgr, this](Instruction* phi) {
275 std::unordered_set<Instruction*> operations;
276
277 for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
278 if (condition_block_id == phi->GetSingleWordInOperand(i + 1)) {
279 exit_value_[phi->result_id()] =
280 def_use_mgr->GetDef(phi->GetSingleWordInOperand(i));
281 }
282 }
283 });
284 } else {
285 DominatorTree* dom_tree =
286 &context_->GetDominatorAnalysis(loop_utils_.GetFunction())
287 ->GetDomTree();
288 BasicBlock* condition_block = cfg.block(condition_block_id);
289
290 loop_->GetHeaderBlock()->ForEachPhiInst(
291 [dom_tree, condition_block, this](Instruction* phi) {
292 std::unordered_set<Instruction*> operations;
293
294 // Not the back-edge value, check if the phi instruction is the only
295 // possible candidate.
296 GetIteratorUpdateOperations(loop_, phi, &operations);
297
298 for (Instruction* insn : operations) {
299 if (insn == phi) {
300 continue;
301 }
302 if (dom_tree->Dominates(context_->get_instr_block(insn),
303 condition_block)) {
304 return;
305 }
306 }
307 exit_value_[phi->result_id()] = phi;
308 });
309 }
310 }
311
FixExitCondition(const std::function<uint32_t (Instruction *)> & condition_builder)312 void LoopPeeling::FixExitCondition(
313 const std::function<uint32_t(Instruction*)>& condition_builder) {
314 CFG& cfg = *context_->cfg();
315
316 uint32_t condition_block_id = 0;
317 for (uint32_t id : cfg.preds(GetClonedLoop()->GetMergeBlock()->id())) {
318 if (GetClonedLoop()->IsInsideLoop(id)) {
319 condition_block_id = id;
320 break;
321 }
322 }
323 assert(condition_block_id != 0 && "2nd loop in improperly connected");
324
325 BasicBlock* condition_block = cfg.block(condition_block_id);
326 Instruction* exit_condition = condition_block->terminator();
327 assert(exit_condition->opcode() == spv::Op::OpBranchConditional);
328 BasicBlock::iterator insert_point = condition_block->tail();
329 if (condition_block->GetMergeInst()) {
330 --insert_point;
331 }
332
333 exit_condition->SetInOperand(0, {condition_builder(&*insert_point)});
334
335 uint32_t to_continue_block_idx =
336 GetClonedLoop()->IsInsideLoop(exit_condition->GetSingleWordInOperand(1))
337 ? 1
338 : 2;
339 exit_condition->SetInOperand(
340 1, {exit_condition->GetSingleWordInOperand(to_continue_block_idx)});
341 exit_condition->SetInOperand(2, {GetClonedLoop()->GetMergeBlock()->id()});
342
343 // Update def/use manager.
344 context_->get_def_use_mgr()->AnalyzeInstUse(exit_condition);
345 }
346
CreateBlockBefore(BasicBlock * bb)347 BasicBlock* LoopPeeling::CreateBlockBefore(BasicBlock* bb) {
348 analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
349 CFG& cfg = *context_->cfg();
350 assert(cfg.preds(bb->id()).size() == 1 && "More than one predecessor");
351
352 // TODO(1841): Handle id overflow.
353 std::unique_ptr<BasicBlock> new_bb =
354 MakeUnique<BasicBlock>(std::unique_ptr<Instruction>(new Instruction(
355 context_, spv::Op::OpLabel, 0, context_->TakeNextId(), {})));
356 // Update the loop descriptor.
357 Loop* in_loop = (*loop_utils_.GetLoopDescriptor())[bb];
358 if (in_loop) {
359 in_loop->AddBasicBlock(new_bb.get());
360 loop_utils_.GetLoopDescriptor()->SetBasicBlockToLoop(new_bb->id(), in_loop);
361 }
362
363 context_->set_instr_block(new_bb->GetLabelInst(), new_bb.get());
364 def_use_mgr->AnalyzeInstDefUse(new_bb->GetLabelInst());
365
366 BasicBlock* bb_pred = cfg.block(cfg.preds(bb->id())[0]);
367 bb_pred->tail()->ForEachInId([bb, &new_bb](uint32_t* id) {
368 if (*id == bb->id()) {
369 *id = new_bb->id();
370 }
371 });
372 cfg.RemoveEdge(bb_pred->id(), bb->id());
373 cfg.AddEdge(bb_pred->id(), new_bb->id());
374 def_use_mgr->AnalyzeInstUse(&*bb_pred->tail());
375
376 // Update the incoming branch.
377 bb->ForEachPhiInst([&new_bb, def_use_mgr](Instruction* phi) {
378 phi->SetInOperand(1, {new_bb->id()});
379 def_use_mgr->AnalyzeInstUse(phi);
380 });
381 InstructionBuilder(
382 context_, new_bb.get(),
383 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping)
384 .AddBranch(bb->id());
385 cfg.RegisterBlock(new_bb.get());
386
387 // Add the basic block to the function.
388 Function::iterator it = loop_utils_.GetFunction()->FindBlock(bb->id());
389 assert(it != loop_utils_.GetFunction()->end() &&
390 "Basic block not found in the function.");
391 BasicBlock* ret = new_bb.get();
392 loop_utils_.GetFunction()->AddBasicBlock(std::move(new_bb), it);
393 return ret;
394 }
395
ProtectLoop(Loop * loop,Instruction * condition,BasicBlock * if_merge)396 BasicBlock* LoopPeeling::ProtectLoop(Loop* loop, Instruction* condition,
397 BasicBlock* if_merge) {
398 // TODO(1841): Handle failure to create pre-header.
399 BasicBlock* if_block = loop->GetOrCreatePreHeaderBlock();
400 // Will no longer be a pre-header because of the if.
401 loop->SetPreHeaderBlock(nullptr);
402 // Kill the branch to the header.
403 context_->KillInst(&*if_block->tail());
404
405 InstructionBuilder builder(
406 context_, if_block,
407 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
408 builder.AddConditionalBranch(condition->result_id(),
409 loop->GetHeaderBlock()->id(), if_merge->id(),
410 if_merge->id());
411
412 return if_block;
413 }
414
PeelBefore(uint32_t peel_factor)415 void LoopPeeling::PeelBefore(uint32_t peel_factor) {
416 assert(CanPeelLoop() && "Cannot peel loop");
417 LoopUtils::LoopCloningResult clone_results;
418
419 // Clone the loop and insert the cloned one before the loop.
420 DuplicateAndConnectLoop(&clone_results);
421
422 // Add a canonical induction variable "canonical_induction_variable_".
423 InsertCanonicalInductionVariable(&clone_results);
424
425 InstructionBuilder builder(
426 context_, &*cloned_loop_->GetPreHeaderBlock()->tail(),
427 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
428 Instruction* factor =
429 builder.GetIntConstant(peel_factor, int_type_->IsSigned());
430
431 Instruction* has_remaining_iteration = builder.AddLessThan(
432 factor->result_id(), loop_iteration_count_->result_id());
433 Instruction* max_iteration = builder.AddSelect(
434 factor->type_id(), has_remaining_iteration->result_id(),
435 factor->result_id(), loop_iteration_count_->result_id());
436
437 // Change the exit condition of the cloned loop to be (exit when become
438 // false):
439 // "canonical_induction_variable_" < min("factor", "loop_iteration_count_")
440 FixExitCondition([max_iteration, this](Instruction* insert_before_point) {
441 return InstructionBuilder(context_, insert_before_point,
442 IRContext::kAnalysisDefUse |
443 IRContext::kAnalysisInstrToBlockMapping)
444 .AddLessThan(canonical_induction_variable_->result_id(),
445 max_iteration->result_id())
446 ->result_id();
447 });
448
449 // "Protect" the second loop: the second loop can only be executed if
450 // |has_remaining_iteration| is true (i.e. factor < loop_iteration_count_).
451 BasicBlock* if_merge_block = loop_->GetMergeBlock();
452 loop_->SetMergeBlock(CreateBlockBefore(loop_->GetMergeBlock()));
453 // Prevent the second loop from being executed if we already executed all the
454 // required iterations.
455 BasicBlock* if_block =
456 ProtectLoop(loop_, has_remaining_iteration, if_merge_block);
457 // Patch the phi of the merge block.
458 if_merge_block->ForEachPhiInst(
459 [&clone_results, if_block, this](Instruction* phi) {
460 // if_merge_block had previously only 1 predecessor.
461 uint32_t incoming_value = phi->GetSingleWordInOperand(0);
462 auto def_in_loop = clone_results.value_map_.find(incoming_value);
463 if (def_in_loop != clone_results.value_map_.end())
464 incoming_value = def_in_loop->second;
465 phi->AddOperand(
466 {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {incoming_value}});
467 phi->AddOperand(
468 {spv_operand_type_t::SPV_OPERAND_TYPE_ID, {if_block->id()}});
469 context_->get_def_use_mgr()->AnalyzeInstUse(phi);
470 });
471
472 context_->InvalidateAnalysesExceptFor(
473 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping |
474 IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisCFG);
475 }
476
PeelAfter(uint32_t peel_factor)477 void LoopPeeling::PeelAfter(uint32_t peel_factor) {
478 assert(CanPeelLoop() && "Cannot peel loop");
479 LoopUtils::LoopCloningResult clone_results;
480
481 // Clone the loop and insert the cloned one before the loop.
482 DuplicateAndConnectLoop(&clone_results);
483
484 // Add a canonical induction variable "canonical_induction_variable_".
485 InsertCanonicalInductionVariable(&clone_results);
486
487 InstructionBuilder builder(
488 context_, &*cloned_loop_->GetPreHeaderBlock()->tail(),
489 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
490 Instruction* factor =
491 builder.GetIntConstant(peel_factor, int_type_->IsSigned());
492
493 Instruction* has_remaining_iteration = builder.AddLessThan(
494 factor->result_id(), loop_iteration_count_->result_id());
495
496 // Change the exit condition of the cloned loop to be (exit when become
497 // false):
498 // "canonical_induction_variable_" + "factor" < "loop_iteration_count_"
499 FixExitCondition([factor, this](Instruction* insert_before_point) {
500 InstructionBuilder cond_builder(
501 context_, insert_before_point,
502 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
503 // Build the following check: canonical_induction_variable_ + factor <
504 // iteration_count
505 return cond_builder
506 .AddLessThan(cond_builder
507 .AddIAdd(canonical_induction_variable_->type_id(),
508 canonical_induction_variable_->result_id(),
509 factor->result_id())
510 ->result_id(),
511 loop_iteration_count_->result_id())
512 ->result_id();
513 });
514
515 // "Protect" the first loop: the first loop can only be executed if
516 // factor < loop_iteration_count_.
517
518 // The original loop's pre-header was the cloned loop merge block.
519 GetClonedLoop()->SetMergeBlock(
520 CreateBlockBefore(GetOriginalLoop()->GetPreHeaderBlock()));
521 // Use the second loop preheader as if merge block.
522
523 // Prevent the first loop if only the peeled loop needs it.
524 BasicBlock* if_block = ProtectLoop(cloned_loop_, has_remaining_iteration,
525 GetOriginalLoop()->GetPreHeaderBlock());
526
527 // Patch the phi of the header block.
528 // We added an if to enclose the first loop and because the phi node are
529 // connected to the exit value of the first loop, the definition no longer
530 // dominate the preheader.
531 // We had to the preheader (our if merge block) the required phi instruction
532 // and patch the header phi.
533 GetOriginalLoop()->GetHeaderBlock()->ForEachPhiInst(
534 [&clone_results, if_block, this](Instruction* phi) {
535 analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
536
537 auto find_value_idx = [](Instruction* phi_inst, Loop* loop) {
538 uint32_t preheader_value_idx =
539 !loop->IsInsideLoop(phi_inst->GetSingleWordInOperand(1)) ? 0 : 2;
540 return preheader_value_idx;
541 };
542
543 Instruction* cloned_phi =
544 def_use_mgr->GetDef(clone_results.value_map_.at(phi->result_id()));
545 uint32_t cloned_preheader_value = cloned_phi->GetSingleWordInOperand(
546 find_value_idx(cloned_phi, GetClonedLoop()));
547
548 Instruction* new_phi =
549 InstructionBuilder(context_,
550 &*GetOriginalLoop()->GetPreHeaderBlock()->tail(),
551 IRContext::kAnalysisDefUse |
552 IRContext::kAnalysisInstrToBlockMapping)
553 .AddPhi(phi->type_id(),
554 {phi->GetSingleWordInOperand(
555 find_value_idx(phi, GetOriginalLoop())),
556 GetClonedLoop()->GetMergeBlock()->id(),
557 cloned_preheader_value, if_block->id()});
558
559 phi->SetInOperand(find_value_idx(phi, GetOriginalLoop()),
560 {new_phi->result_id()});
561 def_use_mgr->AnalyzeInstUse(phi);
562 });
563
564 context_->InvalidateAnalysesExceptFor(
565 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping |
566 IRContext::kAnalysisLoopAnalysis | IRContext::kAnalysisCFG);
567 }
568
Process()569 Pass::Status LoopPeelingPass::Process() {
570 bool modified = false;
571 Module* module = context()->module();
572
573 // Process each function in the module
574 for (Function& f : *module) {
575 modified |= ProcessFunction(&f);
576 }
577
578 return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
579 }
580
ProcessFunction(Function * f)581 bool LoopPeelingPass::ProcessFunction(Function* f) {
582 bool modified = false;
583 LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(f);
584
585 std::vector<Loop*> to_process_loop;
586 to_process_loop.reserve(loop_descriptor.NumLoops());
587 for (Loop& l : loop_descriptor) {
588 to_process_loop.push_back(&l);
589 }
590
591 ScalarEvolutionAnalysis scev_analysis(context());
592
593 for (Loop* loop : to_process_loop) {
594 CodeMetrics loop_size;
595 loop_size.Analyze(*loop);
596
597 auto try_peel = [&loop_size, &modified, this](Loop* loop_to_peel) -> Loop* {
598 if (!loop_to_peel->IsLCSSA()) {
599 LoopUtils(context(), loop_to_peel).MakeLoopClosedSSA();
600 }
601
602 bool peeled_loop;
603 Loop* still_peelable_loop;
604 std::tie(peeled_loop, still_peelable_loop) =
605 ProcessLoop(loop_to_peel, &loop_size);
606
607 if (peeled_loop) {
608 modified = true;
609 }
610
611 return still_peelable_loop;
612 };
613
614 Loop* still_peelable_loop = try_peel(loop);
615 // The pass is working out the maximum factor by which a loop can be peeled.
616 // If the loop can potentially be peeled again, then there is only one
617 // possible direction, so only one call is still needed.
618 if (still_peelable_loop) {
619 try_peel(loop);
620 }
621 }
622
623 return modified;
624 }
625
ProcessLoop(Loop * loop,CodeMetrics * loop_size)626 std::pair<bool, Loop*> LoopPeelingPass::ProcessLoop(Loop* loop,
627 CodeMetrics* loop_size) {
628 ScalarEvolutionAnalysis* scev_analysis =
629 context()->GetScalarEvolutionAnalysis();
630 // Default values for bailing out.
631 std::pair<bool, Loop*> bail_out{false, nullptr};
632
633 BasicBlock* exit_block = loop->FindConditionBlock();
634 if (!exit_block) {
635 return bail_out;
636 }
637
638 Instruction* exiting_iv = loop->FindConditionVariable(exit_block);
639 if (!exiting_iv) {
640 return bail_out;
641 }
642 size_t iterations = 0;
643 if (!loop->FindNumberOfIterations(exiting_iv, &*exit_block->tail(),
644 &iterations)) {
645 return bail_out;
646 }
647 if (!iterations) {
648 return bail_out;
649 }
650
651 Instruction* canonical_induction_variable = nullptr;
652
653 loop->GetHeaderBlock()->WhileEachPhiInst([&canonical_induction_variable,
654 scev_analysis,
655 this](Instruction* insn) {
656 if (const SERecurrentNode* iv =
657 scev_analysis->AnalyzeInstruction(insn)->AsSERecurrentNode()) {
658 const SEConstantNode* offset = iv->GetOffset()->AsSEConstantNode();
659 const SEConstantNode* coeff = iv->GetCoefficient()->AsSEConstantNode();
660 if (offset && coeff && offset->FoldToSingleValue() == 0 &&
661 coeff->FoldToSingleValue() == 1) {
662 if (context()->get_type_mgr()->GetType(insn->type_id())->AsInteger()) {
663 canonical_induction_variable = insn;
664 return false;
665 }
666 }
667 }
668 return true;
669 });
670
671 bool is_signed = canonical_induction_variable
672 ? context()
673 ->get_type_mgr()
674 ->GetType(canonical_induction_variable->type_id())
675 ->AsInteger()
676 ->IsSigned()
677 : false;
678
679 LoopPeeling peeler(
680 loop,
681 InstructionBuilder(
682 context(), loop->GetHeaderBlock(),
683 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping)
684 .GetIntConstant<uint32_t>(static_cast<uint32_t>(iterations),
685 is_signed),
686 canonical_induction_variable);
687
688 if (!peeler.CanPeelLoop()) {
689 return bail_out;
690 }
691
692 // For each basic block in the loop, check if it can be peeled. If it
693 // can, get the direction (before/after) and by which factor.
694 LoopPeelingInfo peel_info(loop, iterations, scev_analysis);
695
696 uint32_t peel_before_factor = 0;
697 uint32_t peel_after_factor = 0;
698
699 for (uint32_t block : loop->GetBlocks()) {
700 if (block == exit_block->id()) {
701 continue;
702 }
703 BasicBlock* bb = cfg()->block(block);
704 PeelDirection direction;
705 uint32_t factor;
706 std::tie(direction, factor) = peel_info.GetPeelingInfo(bb);
707
708 if (direction == PeelDirection::kNone) {
709 continue;
710 }
711 if (direction == PeelDirection::kBefore) {
712 peel_before_factor = std::max(peel_before_factor, factor);
713 } else {
714 assert(direction == PeelDirection::kAfter);
715 peel_after_factor = std::max(peel_after_factor, factor);
716 }
717 }
718 PeelDirection direction = PeelDirection::kNone;
719 uint32_t factor = 0;
720
721 // Find which direction we should peel.
722 if (peel_before_factor) {
723 factor = peel_before_factor;
724 direction = PeelDirection::kBefore;
725 }
726 if (peel_after_factor) {
727 if (peel_before_factor < peel_after_factor) {
728 // Favor a peel after here and give the peel before another shot later.
729 factor = peel_after_factor;
730 direction = PeelDirection::kAfter;
731 }
732 }
733
734 // Do the peel if we can.
735 if (direction == PeelDirection::kNone) return bail_out;
736
737 // This does not take into account branch elimination opportunities and
738 // the unrolling. It assumes the peeled loop will be unrolled as well.
739 if (factor * loop_size->roi_size_ > code_grow_threshold_) {
740 return bail_out;
741 }
742 loop_size->roi_size_ *= factor;
743
744 // Find if a loop should be peeled again.
745 Loop* extra_opportunity = nullptr;
746
747 if (direction == PeelDirection::kBefore) {
748 peeler.PeelBefore(factor);
749 if (stats_) {
750 stats_->peeled_loops_.emplace_back(loop, PeelDirection::kBefore, factor);
751 }
752 if (peel_after_factor) {
753 // We could have peeled after, give it another try.
754 extra_opportunity = peeler.GetOriginalLoop();
755 }
756 } else {
757 peeler.PeelAfter(factor);
758 if (stats_) {
759 stats_->peeled_loops_.emplace_back(loop, PeelDirection::kAfter, factor);
760 }
761 if (peel_before_factor) {
762 // We could have peeled before, give it another try.
763 extra_opportunity = peeler.GetClonedLoop();
764 }
765 }
766
767 return {true, extra_opportunity};
768 }
769
GetFirstLoopInvariantOperand(Instruction * condition) const770 uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstLoopInvariantOperand(
771 Instruction* condition) const {
772 for (uint32_t i = 0; i < condition->NumInOperands(); i++) {
773 BasicBlock* bb =
774 context_->get_instr_block(condition->GetSingleWordInOperand(i));
775 if (bb && loop_->IsInsideLoop(bb)) {
776 return condition->GetSingleWordInOperand(i);
777 }
778 }
779
780 return 0;
781 }
782
GetFirstNonLoopInvariantOperand(Instruction * condition) const783 uint32_t LoopPeelingPass::LoopPeelingInfo::GetFirstNonLoopInvariantOperand(
784 Instruction* condition) const {
785 for (uint32_t i = 0; i < condition->NumInOperands(); i++) {
786 BasicBlock* bb =
787 context_->get_instr_block(condition->GetSingleWordInOperand(i));
788 if (!bb || !loop_->IsInsideLoop(bb)) {
789 return condition->GetSingleWordInOperand(i);
790 }
791 }
792
793 return 0;
794 }
795
IsHandledCondition(spv::Op opcode)796 static bool IsHandledCondition(spv::Op opcode) {
797 switch (opcode) {
798 case spv::Op::OpIEqual:
799 case spv::Op::OpINotEqual:
800 case spv::Op::OpUGreaterThan:
801 case spv::Op::OpSGreaterThan:
802 case spv::Op::OpUGreaterThanEqual:
803 case spv::Op::OpSGreaterThanEqual:
804 case spv::Op::OpULessThan:
805 case spv::Op::OpSLessThan:
806 case spv::Op::OpULessThanEqual:
807 case spv::Op::OpSLessThanEqual:
808 return true;
809 default:
810 return false;
811 }
812 }
813
814 LoopPeelingPass::LoopPeelingInfo::Direction
GetPeelingInfo(BasicBlock * bb) const815 LoopPeelingPass::LoopPeelingInfo::GetPeelingInfo(BasicBlock* bb) const {
816 if (bb->terminator()->opcode() != spv::Op::OpBranchConditional) {
817 return GetNoneDirection();
818 }
819
820 analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
821
822 Instruction* condition =
823 def_use_mgr->GetDef(bb->terminator()->GetSingleWordInOperand(0));
824
825 if (!IsHandledCondition(condition->opcode())) {
826 return GetNoneDirection();
827 }
828
829 if (!GetFirstLoopInvariantOperand(condition)) {
830 // No loop invariant, it cannot be peeled by this pass.
831 return GetNoneDirection();
832 }
833 if (!GetFirstNonLoopInvariantOperand(condition)) {
834 // Seems to be a job for the unswitch pass.
835 return GetNoneDirection();
836 }
837
838 // Left hand-side.
839 SExpression lhs = scev_analysis_->AnalyzeInstruction(
840 def_use_mgr->GetDef(condition->GetSingleWordInOperand(0)));
841 if (lhs->GetType() == SENode::CanNotCompute) {
842 // Can't make any conclusion.
843 return GetNoneDirection();
844 }
845
846 // Right hand-side.
847 SExpression rhs = scev_analysis_->AnalyzeInstruction(
848 def_use_mgr->GetDef(condition->GetSingleWordInOperand(1)));
849 if (rhs->GetType() == SENode::CanNotCompute) {
850 // Can't make any conclusion.
851 return GetNoneDirection();
852 }
853
854 // Only take into account recurrent expression over the current loop.
855 bool is_lhs_rec = !scev_analysis_->IsLoopInvariant(loop_, lhs);
856 bool is_rhs_rec = !scev_analysis_->IsLoopInvariant(loop_, rhs);
857
858 if ((is_lhs_rec && is_rhs_rec) || (!is_lhs_rec && !is_rhs_rec)) {
859 return GetNoneDirection();
860 }
861
862 if (is_lhs_rec) {
863 if (!lhs->AsSERecurrentNode() ||
864 lhs->AsSERecurrentNode()->GetLoop() != loop_) {
865 return GetNoneDirection();
866 }
867 }
868 if (is_rhs_rec) {
869 if (!rhs->AsSERecurrentNode() ||
870 rhs->AsSERecurrentNode()->GetLoop() != loop_) {
871 return GetNoneDirection();
872 }
873 }
874
875 // If the op code is ==, then we try a peel before or after.
876 // If opcode is not <, >, <= or >=, we bail out.
877 //
878 // For the remaining cases, we canonicalize the expression so that the
879 // constant expression is on the left hand side and the recurring expression
880 // is on the right hand side. If we swap hand side, then < becomes >, <=
881 // becomes >= etc.
882 // If the opcode is <=, then we add 1 to the right hand side and do the peel
883 // check on <.
884 // If the opcode is >=, then we add 1 to the left hand side and do the peel
885 // check on >.
886
887 CmpOperator cmp_operator;
888 switch (condition->opcode()) {
889 default:
890 return GetNoneDirection();
891 case spv::Op::OpIEqual:
892 case spv::Op::OpINotEqual:
893 return HandleEquality(lhs, rhs);
894 case spv::Op::OpUGreaterThan:
895 case spv::Op::OpSGreaterThan: {
896 cmp_operator = CmpOperator::kGT;
897 break;
898 }
899 case spv::Op::OpULessThan:
900 case spv::Op::OpSLessThan: {
901 cmp_operator = CmpOperator::kLT;
902 break;
903 }
904 // We add one to transform >= into > and <= into <.
905 case spv::Op::OpUGreaterThanEqual:
906 case spv::Op::OpSGreaterThanEqual: {
907 cmp_operator = CmpOperator::kGE;
908 break;
909 }
910 case spv::Op::OpULessThanEqual:
911 case spv::Op::OpSLessThanEqual: {
912 cmp_operator = CmpOperator::kLE;
913 break;
914 }
915 }
916
917 // Force the left hand side to be the non recurring expression.
918 if (is_lhs_rec) {
919 std::swap(lhs, rhs);
920 switch (cmp_operator) {
921 case CmpOperator::kLT: {
922 cmp_operator = CmpOperator::kGT;
923 break;
924 }
925 case CmpOperator::kGT: {
926 cmp_operator = CmpOperator::kLT;
927 break;
928 }
929 case CmpOperator::kLE: {
930 cmp_operator = CmpOperator::kGE;
931 break;
932 }
933 case CmpOperator::kGE: {
934 cmp_operator = CmpOperator::kLE;
935 break;
936 }
937 }
938 }
939 return HandleInequality(cmp_operator, lhs, rhs->AsSERecurrentNode());
940 }
941
GetValueAtFirstIteration(SERecurrentNode * rec) const942 SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtFirstIteration(
943 SERecurrentNode* rec) const {
944 return rec->GetOffset();
945 }
946
GetValueAtIteration(SERecurrentNode * rec,int64_t iteration) const947 SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtIteration(
948 SERecurrentNode* rec, int64_t iteration) const {
949 SExpression coeff = rec->GetCoefficient();
950 SExpression offset = rec->GetOffset();
951
952 return (coeff * iteration) + offset;
953 }
954
GetValueAtLastIteration(SERecurrentNode * rec) const955 SExpression LoopPeelingPass::LoopPeelingInfo::GetValueAtLastIteration(
956 SERecurrentNode* rec) const {
957 return GetValueAtIteration(rec, loop_max_iterations_ - 1);
958 }
959
EvalOperator(CmpOperator cmp_op,SExpression lhs,SExpression rhs,bool * result) const960 bool LoopPeelingPass::LoopPeelingInfo::EvalOperator(CmpOperator cmp_op,
961 SExpression lhs,
962 SExpression rhs,
963 bool* result) const {
964 assert(scev_analysis_->IsLoopInvariant(loop_, lhs));
965 assert(scev_analysis_->IsLoopInvariant(loop_, rhs));
966 // We perform the test: 0 cmp_op rhs - lhs
967 // What is left is then to determine the sign of the expression.
968 switch (cmp_op) {
969 case CmpOperator::kLT: {
970 return scev_analysis_->IsAlwaysGreaterThanZero(rhs - lhs, result);
971 }
972 case CmpOperator::kGT: {
973 return scev_analysis_->IsAlwaysGreaterThanZero(lhs - rhs, result);
974 }
975 case CmpOperator::kLE: {
976 return scev_analysis_->IsAlwaysGreaterOrEqualToZero(rhs - lhs, result);
977 }
978 case CmpOperator::kGE: {
979 return scev_analysis_->IsAlwaysGreaterOrEqualToZero(lhs - rhs, result);
980 }
981 }
982 return false;
983 }
984
985 LoopPeelingPass::LoopPeelingInfo::Direction
HandleEquality(SExpression lhs,SExpression rhs) const986 LoopPeelingPass::LoopPeelingInfo::HandleEquality(SExpression lhs,
987 SExpression rhs) const {
988 {
989 // Try peel before opportunity.
990 SExpression lhs_cst = lhs;
991 if (SERecurrentNode* rec_node = lhs->AsSERecurrentNode()) {
992 lhs_cst = rec_node->GetOffset();
993 }
994 SExpression rhs_cst = rhs;
995 if (SERecurrentNode* rec_node = rhs->AsSERecurrentNode()) {
996 rhs_cst = rec_node->GetOffset();
997 }
998
999 if (lhs_cst == rhs_cst) {
1000 return Direction{LoopPeelingPass::PeelDirection::kBefore, 1};
1001 }
1002 }
1003
1004 {
1005 // Try peel after opportunity.
1006 SExpression lhs_cst = lhs;
1007 if (SERecurrentNode* rec_node = lhs->AsSERecurrentNode()) {
1008 // rec_node(x) = a * x + b
1009 // assign to lhs: a * (loop_max_iterations_ - 1) + b
1010 lhs_cst = GetValueAtLastIteration(rec_node);
1011 }
1012 SExpression rhs_cst = rhs;
1013 if (SERecurrentNode* rec_node = rhs->AsSERecurrentNode()) {
1014 // rec_node(x) = a * x + b
1015 // assign to lhs: a * (loop_max_iterations_ - 1) + b
1016 rhs_cst = GetValueAtLastIteration(rec_node);
1017 }
1018
1019 if (lhs_cst == rhs_cst) {
1020 return Direction{LoopPeelingPass::PeelDirection::kAfter, 1};
1021 }
1022 }
1023
1024 return GetNoneDirection();
1025 }
1026
1027 LoopPeelingPass::LoopPeelingInfo::Direction
HandleInequality(CmpOperator cmp_op,SExpression lhs,SERecurrentNode * rhs) const1028 LoopPeelingPass::LoopPeelingInfo::HandleInequality(CmpOperator cmp_op,
1029 SExpression lhs,
1030 SERecurrentNode* rhs) const {
1031 SExpression offset = rhs->GetOffset();
1032 SExpression coefficient = rhs->GetCoefficient();
1033 // Compute (cst - B) / A.
1034 std::pair<SExpression, int64_t> flip_iteration = (lhs - offset) / coefficient;
1035 if (!flip_iteration.first->AsSEConstantNode()) {
1036 return GetNoneDirection();
1037 }
1038 // note: !!flip_iteration.second normalize to 0/1 (via bool cast).
1039 int64_t iteration =
1040 flip_iteration.first->AsSEConstantNode()->FoldToSingleValue() +
1041 !!flip_iteration.second;
1042 if (iteration <= 0 ||
1043 loop_max_iterations_ <= static_cast<uint64_t>(iteration)) {
1044 // Always true or false within the loop bounds.
1045 return GetNoneDirection();
1046 }
1047 // If this is a <= or >= operator and the iteration, make sure |iteration| is
1048 // the one flipping the condition.
1049 // If (cst - B) and A are not divisible, this equivalent to a < or > check, so
1050 // we skip this test.
1051 if (!flip_iteration.second &&
1052 (cmp_op == CmpOperator::kLE || cmp_op == CmpOperator::kGE)) {
1053 bool first_iteration;
1054 bool current_iteration;
1055 if (!EvalOperator(cmp_op, lhs, offset, &first_iteration) ||
1056 !EvalOperator(cmp_op, lhs, GetValueAtIteration(rhs, iteration),
1057 ¤t_iteration)) {
1058 return GetNoneDirection();
1059 }
1060 // If the condition did not flip the next will.
1061 if (first_iteration == current_iteration) {
1062 iteration++;
1063 }
1064 }
1065
1066 uint32_t cast_iteration = 0;
1067 // Integrity check: can we fit |iteration| in a uint32_t ?
1068 if (static_cast<uint64_t>(iteration) < std::numeric_limits<uint32_t>::max()) {
1069 cast_iteration = static_cast<uint32_t>(iteration);
1070 }
1071
1072 if (cast_iteration) {
1073 // Peel before if we are closer to the start, after if closer to the end.
1074 if (loop_max_iterations_ / 2 > cast_iteration) {
1075 return Direction{LoopPeelingPass::PeelDirection::kBefore, cast_iteration};
1076 } else {
1077 return Direction{
1078 LoopPeelingPass::PeelDirection::kAfter,
1079 static_cast<uint32_t>(loop_max_iterations_ - cast_iteration)};
1080 }
1081 }
1082
1083 return GetNoneDirection();
1084 }
1085
1086 } // namespace opt
1087 } // namespace spvtools
1088