1 // Copyright (c) 2018 Google LLC.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <algorithm>
16 #include <memory>
17 #include <unordered_map>
18 #include <unordered_set>
19 #include <utility>
20 #include <vector>
21
22 #include "source/cfa.h"
23 #include "source/opt/cfg.h"
24 #include "source/opt/ir_builder.h"
25 #include "source/opt/ir_context.h"
26 #include "source/opt/loop_descriptor.h"
27 #include "source/opt/loop_utils.h"
28
29 namespace spvtools {
30 namespace opt {
31
32 namespace {
33 // Return true if |bb| is dominated by at least one block in |exits|
DominatesAnExit(BasicBlock * bb,const std::unordered_set<BasicBlock * > & exits,const DominatorTree & dom_tree)34 static inline bool DominatesAnExit(BasicBlock* bb,
35 const std::unordered_set<BasicBlock*>& exits,
36 const DominatorTree& dom_tree) {
37 for (BasicBlock* e_bb : exits)
38 if (dom_tree.Dominates(bb, e_bb)) return true;
39 return false;
40 }
41
42 // Utility class to rewrite out-of-loop uses of an in-loop definition in terms
43 // of phi instructions to achieve a LCSSA form.
44 // For a given definition, the class user registers phi instructions using that
45 // definition in all loop exit blocks by which the definition escapes.
46 // Then, when rewriting a use of the definition, the rewriter walks the
47 // paths from the use the loop exits. At each step, it will insert a phi
48 // instruction to merge the incoming value according to exit blocks definition.
49 class LCSSARewriter {
50 public:
LCSSARewriter(IRContext * context,const DominatorTree & dom_tree,const std::unordered_set<BasicBlock * > & exit_bb,BasicBlock * merge_block)51 LCSSARewriter(IRContext* context, const DominatorTree& dom_tree,
52 const std::unordered_set<BasicBlock*>& exit_bb,
53 BasicBlock* merge_block)
54 : context_(context),
55 cfg_(context_->cfg()),
56 dom_tree_(dom_tree),
57 exit_bb_(exit_bb),
58 merge_block_id_(merge_block ? merge_block->id() : 0) {}
59
60 struct UseRewriter {
UseRewriterspvtools::opt::__anoncf61d8e60111::LCSSARewriter::UseRewriter61 explicit UseRewriter(LCSSARewriter* base, const Instruction& def_insn)
62 : base_(base), def_insn_(def_insn) {}
63 // Rewrites the use of |def_insn_| by the instruction |user| at the index
64 // |operand_index| in terms of phi instruction. This recursively builds new
65 // phi instructions from |user| to the loop exit blocks' phis. The use of
66 // |def_insn_| in |user| is replaced by the relevant phi instruction at the
67 // end of the operation.
68 // It is assumed that |user| does not dominates any of the loop exit basic
69 // block. This operation does not update the def/use manager, instead it
70 // records what needs to be updated. The actual update is performed by
71 // UpdateManagers.
RewriteUsespvtools::opt::__anoncf61d8e60111::LCSSARewriter::UseRewriter72 void RewriteUse(BasicBlock* bb, Instruction* user, uint32_t operand_index) {
73 assert(
74 (user->opcode() != SpvOpPhi || bb != GetParent(user)) &&
75 "The root basic block must be the incoming edge if |user| is a phi "
76 "instruction");
77 assert((user->opcode() == SpvOpPhi || bb == GetParent(user)) &&
78 "The root basic block must be the instruction parent if |user| is "
79 "not "
80 "phi instruction");
81
82 Instruction* new_def = GetOrBuildIncoming(bb->id());
83
84 user->SetOperand(operand_index, {new_def->result_id()});
85 rewritten_.insert(user);
86 }
87
88 // In-place update of some managers (avoid full invalidation).
UpdateManagersspvtools::opt::__anoncf61d8e60111::LCSSARewriter::UseRewriter89 inline void UpdateManagers() {
90 analysis::DefUseManager* def_use_mgr = base_->context_->get_def_use_mgr();
91 // Register all new definitions.
92 for (Instruction* insn : rewritten_) {
93 def_use_mgr->AnalyzeInstDef(insn);
94 }
95 // Register all new uses.
96 for (Instruction* insn : rewritten_) {
97 def_use_mgr->AnalyzeInstUse(insn);
98 }
99 }
100
101 private:
102 // Return the basic block that |instr| belongs to.
GetParentspvtools::opt::__anoncf61d8e60111::LCSSARewriter::UseRewriter103 BasicBlock* GetParent(Instruction* instr) {
104 return base_->context_->get_instr_block(instr);
105 }
106
107 // Builds a phi instruction for the basic block |bb|. The function assumes
108 // that |defining_blocks| contains the list of basic block that define the
109 // usable value for each predecessor of |bb|.
CreatePhiInstructionspvtools::opt::__anoncf61d8e60111::LCSSARewriter::UseRewriter110 inline Instruction* CreatePhiInstruction(
111 BasicBlock* bb, const std::vector<uint32_t>& defining_blocks) {
112 std::vector<uint32_t> incomings;
113 const std::vector<uint32_t>& bb_preds = base_->cfg_->preds(bb->id());
114 assert(bb_preds.size() == defining_blocks.size());
115 for (size_t i = 0; i < bb_preds.size(); i++) {
116 incomings.push_back(
117 GetOrBuildIncoming(defining_blocks[i])->result_id());
118 incomings.push_back(bb_preds[i]);
119 }
120 InstructionBuilder builder(base_->context_, &*bb->begin(),
121 IRContext::kAnalysisInstrToBlockMapping);
122 Instruction* incoming_phi =
123 builder.AddPhi(def_insn_.type_id(), incomings);
124
125 rewritten_.insert(incoming_phi);
126 return incoming_phi;
127 }
128
129 // Builds a phi instruction for the basic block |bb|, all incoming values
130 // will be |value|.
CreatePhiInstructionspvtools::opt::__anoncf61d8e60111::LCSSARewriter::UseRewriter131 inline Instruction* CreatePhiInstruction(BasicBlock* bb,
132 const Instruction& value) {
133 std::vector<uint32_t> incomings;
134 const std::vector<uint32_t>& bb_preds = base_->cfg_->preds(bb->id());
135 for (size_t i = 0; i < bb_preds.size(); i++) {
136 incomings.push_back(value.result_id());
137 incomings.push_back(bb_preds[i]);
138 }
139 InstructionBuilder builder(base_->context_, &*bb->begin(),
140 IRContext::kAnalysisInstrToBlockMapping);
141 Instruction* incoming_phi =
142 builder.AddPhi(def_insn_.type_id(), incomings);
143
144 rewritten_.insert(incoming_phi);
145 return incoming_phi;
146 }
147
148 // Return the new def to use for the basic block |bb_id|.
149 // If |bb_id| does not have a suitable def to use then we:
150 // - return the common def used by all predecessors;
151 // - if there is no common def, then we build a new phi instr at the
152 // beginning of |bb_id| and return this new instruction.
GetOrBuildIncomingspvtools::opt::__anoncf61d8e60111::LCSSARewriter::UseRewriter153 Instruction* GetOrBuildIncoming(uint32_t bb_id) {
154 assert(base_->cfg_->block(bb_id) != nullptr && "Unknown basic block");
155
156 Instruction*& incoming_phi = bb_to_phi_[bb_id];
157 if (incoming_phi) {
158 return incoming_phi;
159 }
160
161 BasicBlock* bb = &*base_->cfg_->block(bb_id);
162 // If this is an exit basic block, look if there already is an eligible
163 // phi instruction. An eligible phi has |def_insn_| as all incoming
164 // values.
165 if (base_->exit_bb_.count(bb)) {
166 // Look if there is an eligible phi in this block.
167 if (!bb->WhileEachPhiInst([&incoming_phi, this](Instruction* phi) {
168 for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
169 if (phi->GetSingleWordInOperand(i) != def_insn_.result_id())
170 return true;
171 }
172 incoming_phi = phi;
173 rewritten_.insert(incoming_phi);
174 return false;
175 })) {
176 return incoming_phi;
177 }
178 incoming_phi = CreatePhiInstruction(bb, def_insn_);
179 return incoming_phi;
180 }
181
182 // Get the block that defines the value to use for each predecessor.
183 // If the vector has 1 value, then it means that this block does not need
184 // to build a phi instruction unless |bb_id| is the loop merge block.
185 const std::vector<uint32_t>& defining_blocks =
186 base_->GetDefiningBlocks(bb_id);
187
188 // Special case for structured loops: merge block might be different from
189 // the exit block set. To maintain structured properties it will ease
190 // transformations if the merge block also holds a phi instruction like
191 // the exit ones.
192 if (defining_blocks.size() > 1 || bb_id == base_->merge_block_id_) {
193 if (defining_blocks.size() > 1) {
194 incoming_phi = CreatePhiInstruction(bb, defining_blocks);
195 } else {
196 assert(bb_id == base_->merge_block_id_);
197 incoming_phi =
198 CreatePhiInstruction(bb, *GetOrBuildIncoming(defining_blocks[0]));
199 }
200 } else {
201 incoming_phi = GetOrBuildIncoming(defining_blocks[0]);
202 }
203
204 return incoming_phi;
205 }
206
207 LCSSARewriter* base_;
208 const Instruction& def_insn_;
209 std::unordered_map<uint32_t, Instruction*> bb_to_phi_;
210 std::unordered_set<Instruction*> rewritten_;
211 };
212
213 private:
214 // Return the new def to use for the basic block |bb_id|.
215 // If |bb_id| does not have a suitable def to use then we:
216 // - return the common def used by all predecessors;
217 // - if there is no common def, then we build a new phi instr at the
218 // beginning of |bb_id| and return this new instruction.
GetDefiningBlocks(uint32_t bb_id)219 const std::vector<uint32_t>& GetDefiningBlocks(uint32_t bb_id) {
220 assert(cfg_->block(bb_id) != nullptr && "Unknown basic block");
221 std::vector<uint32_t>& defining_blocks = bb_to_defining_blocks_[bb_id];
222
223 if (defining_blocks.size()) return defining_blocks;
224
225 // Check if one of the loop exit basic block dominates |bb_id|.
226 for (const BasicBlock* e_bb : exit_bb_) {
227 if (dom_tree_.Dominates(e_bb->id(), bb_id)) {
228 defining_blocks.push_back(e_bb->id());
229 return defining_blocks;
230 }
231 }
232
233 // Process parents, they will returns their suitable blocks.
234 // If they are all the same, this means this basic block is dominated by a
235 // common block, so we won't need to build a phi instruction.
236 for (uint32_t pred_id : cfg_->preds(bb_id)) {
237 const std::vector<uint32_t>& pred_blocks = GetDefiningBlocks(pred_id);
238 if (pred_blocks.size() == 1)
239 defining_blocks.push_back(pred_blocks[0]);
240 else
241 defining_blocks.push_back(pred_id);
242 }
243 assert(defining_blocks.size());
244 if (std::all_of(defining_blocks.begin(), defining_blocks.end(),
245 [&defining_blocks](uint32_t id) {
246 return id == defining_blocks[0];
247 })) {
248 // No need for a phi.
249 defining_blocks.resize(1);
250 }
251
252 return defining_blocks;
253 }
254
255 IRContext* context_;
256 CFG* cfg_;
257 const DominatorTree& dom_tree_;
258 const std::unordered_set<BasicBlock*>& exit_bb_;
259 uint32_t merge_block_id_;
260 // This map represent the set of known paths. For each key, the vector
261 // represent the set of blocks holding the definition to be used to build the
262 // phi instruction.
263 // If the vector has 0 value, then the path is unknown yet, and must be built.
264 // If the vector has 1 value, then the value defined by that basic block
265 // should be used.
266 // If the vector has more than 1 value, then a phi node must be created, the
267 // basic block ordering is the same as the predecessor ordering.
268 std::unordered_map<uint32_t, std::vector<uint32_t>> bb_to_defining_blocks_;
269 };
270
271 // Make the set |blocks| closed SSA. The set is closed SSA if all the uses
272 // outside the set are phi instructions in exiting basic block set (hold by
273 // |lcssa_rewriter|).
MakeSetClosedSSA(IRContext * context,Function * function,const std::unordered_set<uint32_t> & blocks,const std::unordered_set<BasicBlock * > & exit_bb,LCSSARewriter * lcssa_rewriter)274 inline void MakeSetClosedSSA(IRContext* context, Function* function,
275 const std::unordered_set<uint32_t>& blocks,
276 const std::unordered_set<BasicBlock*>& exit_bb,
277 LCSSARewriter* lcssa_rewriter) {
278 CFG& cfg = *context->cfg();
279 DominatorTree& dom_tree =
280 context->GetDominatorAnalysis(function)->GetDomTree();
281 analysis::DefUseManager* def_use_manager = context->get_def_use_mgr();
282
283 for (uint32_t bb_id : blocks) {
284 BasicBlock* bb = cfg.block(bb_id);
285 // If bb does not dominate an exit block, then it cannot have escaping defs.
286 if (!DominatesAnExit(bb, exit_bb, dom_tree)) continue;
287 for (Instruction& inst : *bb) {
288 LCSSARewriter::UseRewriter rewriter(lcssa_rewriter, inst);
289 def_use_manager->ForEachUse(
290 &inst, [&blocks, &rewriter, &exit_bb, context](
291 Instruction* use, uint32_t operand_index) {
292 BasicBlock* use_parent = context->get_instr_block(use);
293 assert(use_parent);
294 if (blocks.count(use_parent->id())) return;
295
296 if (use->opcode() == SpvOpPhi) {
297 // If the use is a Phi instruction and the incoming block is
298 // coming from the loop, then that's consistent with LCSSA form.
299 if (exit_bb.count(use_parent)) {
300 return;
301 } else {
302 // That's not an exit block, but the user is a phi instruction.
303 // Consider the incoming branch only.
304 use_parent = context->get_instr_block(
305 use->GetSingleWordOperand(operand_index + 1));
306 }
307 }
308 // Rewrite the use. Note that this call does not invalidate the
309 // def/use manager. So this operation is safe.
310 rewriter.RewriteUse(use_parent, use, operand_index);
311 });
312 rewriter.UpdateManagers();
313 }
314 }
315 }
316
317 } // namespace
318
CreateLoopDedicatedExits()319 void LoopUtils::CreateLoopDedicatedExits() {
320 Function* function = loop_->GetHeaderBlock()->GetParent();
321 LoopDescriptor& loop_desc = *context_->GetLoopDescriptor(function);
322 CFG& cfg = *context_->cfg();
323 analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
324
325 const IRContext::Analysis PreservedAnalyses =
326 IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping;
327
328 // Gathers the set of basic block that are not in this loop and have at least
329 // one predecessor in the loop and one not in the loop.
330 std::unordered_set<uint32_t> exit_bb_set;
331 loop_->GetExitBlocks(&exit_bb_set);
332
333 std::unordered_set<BasicBlock*> new_loop_exits;
334 bool made_change = false;
335 // For each block, we create a new one that gathers all branches from
336 // the loop and fall into the block.
337 for (uint32_t non_dedicate_id : exit_bb_set) {
338 BasicBlock* non_dedicate = cfg.block(non_dedicate_id);
339 const std::vector<uint32_t>& bb_pred = cfg.preds(non_dedicate_id);
340 // Ignore the block if all the predecessors are in the loop.
341 if (std::all_of(bb_pred.begin(), bb_pred.end(),
342 [this](uint32_t id) { return loop_->IsInsideLoop(id); })) {
343 new_loop_exits.insert(non_dedicate);
344 continue;
345 }
346
347 made_change = true;
348 Function::iterator insert_pt = function->begin();
349 for (; insert_pt != function->end() && &*insert_pt != non_dedicate;
350 ++insert_pt) {
351 }
352 assert(insert_pt != function->end() && "Basic Block not found");
353
354 // Create the dedicate exit basic block.
355 // TODO(1841): Handle id overflow.
356 BasicBlock& exit = *insert_pt.InsertBefore(std::unique_ptr<BasicBlock>(
357 new BasicBlock(std::unique_ptr<Instruction>(new Instruction(
358 context_, SpvOpLabel, 0, context_->TakeNextId(), {})))));
359 exit.SetParent(function);
360
361 // Redirect in loop predecessors to |exit| block.
362 for (uint32_t exit_pred_id : bb_pred) {
363 if (loop_->IsInsideLoop(exit_pred_id)) {
364 BasicBlock* pred_block = cfg.block(exit_pred_id);
365 pred_block->ForEachSuccessorLabel([non_dedicate, &exit](uint32_t* id) {
366 if (*id == non_dedicate->id()) *id = exit.id();
367 });
368 // Update the CFG.
369 // |non_dedicate|'s predecessor list will be updated at the end of the
370 // loop.
371 cfg.RegisterBlock(pred_block);
372 }
373 }
374
375 // Register the label to the def/use manager, requires for the phi patching.
376 def_use_mgr->AnalyzeInstDefUse(exit.GetLabelInst());
377 context_->set_instr_block(exit.GetLabelInst(), &exit);
378
379 InstructionBuilder builder(context_, &exit, PreservedAnalyses);
380 // Now jump from our dedicate basic block to the old exit.
381 // We also reset the insert point so all instructions are inserted before
382 // the branch.
383 builder.SetInsertPoint(builder.AddBranch(non_dedicate->id()));
384 non_dedicate->ForEachPhiInst(
385 [&builder, &exit, def_use_mgr, this](Instruction* phi) {
386 // New phi operands for this instruction.
387 std::vector<uint32_t> new_phi_op;
388 // Phi operands for the dedicated exit block.
389 std::vector<uint32_t> exit_phi_op;
390 for (uint32_t i = 0; i < phi->NumInOperands(); i += 2) {
391 uint32_t def_id = phi->GetSingleWordInOperand(i);
392 uint32_t incoming_id = phi->GetSingleWordInOperand(i + 1);
393 if (loop_->IsInsideLoop(incoming_id)) {
394 exit_phi_op.push_back(def_id);
395 exit_phi_op.push_back(incoming_id);
396 } else {
397 new_phi_op.push_back(def_id);
398 new_phi_op.push_back(incoming_id);
399 }
400 }
401
402 // Build the new phi instruction dedicated exit block.
403 Instruction* exit_phi = builder.AddPhi(phi->type_id(), exit_phi_op);
404 // Build the new incoming branch.
405 new_phi_op.push_back(exit_phi->result_id());
406 new_phi_op.push_back(exit.id());
407 // Rewrite operands.
408 uint32_t idx = 0;
409 for (; idx < new_phi_op.size(); idx++)
410 phi->SetInOperand(idx, {new_phi_op[idx]});
411 // Remove extra operands, from last to first (more efficient).
412 for (uint32_t j = phi->NumInOperands() - 1; j >= idx; j--)
413 phi->RemoveInOperand(j);
414 // Update the def/use manager for this |phi|.
415 def_use_mgr->AnalyzeInstUse(phi);
416 });
417 // Update the CFG.
418 cfg.RegisterBlock(&exit);
419 cfg.RemoveNonExistingEdges(non_dedicate->id());
420 new_loop_exits.insert(&exit);
421 // If non_dedicate is in a loop, add the new dedicated exit in that loop.
422 if (Loop* parent_loop = loop_desc[non_dedicate])
423 parent_loop->AddBasicBlock(&exit);
424 }
425
426 if (new_loop_exits.size() == 1) {
427 loop_->SetMergeBlock(*new_loop_exits.begin());
428 }
429
430 if (made_change) {
431 context_->InvalidateAnalysesExceptFor(
432 PreservedAnalyses | IRContext::kAnalysisCFG |
433 IRContext::Analysis::kAnalysisLoopAnalysis);
434 }
435 }
436
MakeLoopClosedSSA()437 void LoopUtils::MakeLoopClosedSSA() {
438 CreateLoopDedicatedExits();
439
440 Function* function = loop_->GetHeaderBlock()->GetParent();
441 CFG& cfg = *context_->cfg();
442 DominatorTree& dom_tree =
443 context_->GetDominatorAnalysis(function)->GetDomTree();
444
445 std::unordered_set<BasicBlock*> exit_bb;
446 {
447 std::unordered_set<uint32_t> exit_bb_id;
448 loop_->GetExitBlocks(&exit_bb_id);
449 for (uint32_t bb_id : exit_bb_id) {
450 exit_bb.insert(cfg.block(bb_id));
451 }
452 }
453
454 LCSSARewriter lcssa_rewriter(context_, dom_tree, exit_bb,
455 loop_->GetMergeBlock());
456 MakeSetClosedSSA(context_, function, loop_->GetBlocks(), exit_bb,
457 &lcssa_rewriter);
458
459 // Make sure all defs post-dominated by the merge block have their last use no
460 // further than the merge block.
461 if (loop_->GetMergeBlock()) {
462 std::unordered_set<uint32_t> merging_bb_id;
463 loop_->GetMergingBlocks(&merging_bb_id);
464 merging_bb_id.erase(loop_->GetMergeBlock()->id());
465 // Reset the exit set, now only the merge block is the exit.
466 exit_bb.clear();
467 exit_bb.insert(loop_->GetMergeBlock());
468 // LCSSARewriter is reusable here only because it forces the creation of a
469 // phi instruction in the merge block.
470 MakeSetClosedSSA(context_, function, merging_bb_id, exit_bb,
471 &lcssa_rewriter);
472 }
473
474 context_->InvalidateAnalysesExceptFor(
475 IRContext::Analysis::kAnalysisCFG |
476 IRContext::Analysis::kAnalysisDominatorAnalysis |
477 IRContext::Analysis::kAnalysisLoopAnalysis);
478 }
479
CloneLoop(LoopCloningResult * cloning_result) const480 Loop* LoopUtils::CloneLoop(LoopCloningResult* cloning_result) const {
481 // Compute the structured order of the loop basic blocks and store it in the
482 // vector ordered_loop_blocks.
483 std::vector<BasicBlock*> ordered_loop_blocks;
484 loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks);
485
486 // Clone the loop.
487 return CloneLoop(cloning_result, ordered_loop_blocks);
488 }
489
CloneAndAttachLoopToHeader(LoopCloningResult * cloning_result)490 Loop* LoopUtils::CloneAndAttachLoopToHeader(LoopCloningResult* cloning_result) {
491 // Clone the loop.
492 Loop* new_loop = CloneLoop(cloning_result);
493
494 // Create a new exit block/label for the new loop.
495 // TODO(1841): Handle id overflow.
496 std::unique_ptr<Instruction> new_label{new Instruction(
497 context_, SpvOp::SpvOpLabel, 0, context_->TakeNextId(), {})};
498 std::unique_ptr<BasicBlock> new_exit_bb{new BasicBlock(std::move(new_label))};
499 new_exit_bb->SetParent(loop_->GetMergeBlock()->GetParent());
500
501 // Create an unconditional branch to the header block.
502 InstructionBuilder builder{context_, new_exit_bb.get()};
503 builder.AddBranch(loop_->GetHeaderBlock()->id());
504
505 // Save the ids of the new and old merge block.
506 const uint32_t old_merge_block = loop_->GetMergeBlock()->id();
507 const uint32_t new_merge_block = new_exit_bb->id();
508
509 // Replace the uses of the old merge block in the new loop with the new merge
510 // block.
511 for (std::unique_ptr<BasicBlock>& basic_block : cloning_result->cloned_bb_) {
512 for (Instruction& inst : *basic_block) {
513 // For each operand in each instruction check if it is using the old merge
514 // block and change it to be the new merge block.
515 auto replace_merge_use = [old_merge_block,
516 new_merge_block](uint32_t* id) {
517 if (*id == old_merge_block) *id = new_merge_block;
518 };
519 inst.ForEachInOperand(replace_merge_use);
520 }
521 }
522
523 const uint32_t old_header = loop_->GetHeaderBlock()->id();
524 const uint32_t new_header = new_loop->GetHeaderBlock()->id();
525 analysis::DefUseManager* def_use = context_->get_def_use_mgr();
526
527 def_use->ForEachUse(old_header,
528 [new_header, this](Instruction* inst, uint32_t operand) {
529 if (!this->loop_->IsInsideLoop(inst))
530 inst->SetOperand(operand, {new_header});
531 });
532
533 // TODO(1841): Handle failure to create pre-header.
534 def_use->ForEachUse(
535 loop_->GetOrCreatePreHeaderBlock()->id(),
536 [new_merge_block, this](Instruction* inst, uint32_t operand) {
537 if (this->loop_->IsInsideLoop(inst))
538 inst->SetOperand(operand, {new_merge_block});
539
540 });
541 new_loop->SetMergeBlock(new_exit_bb.get());
542
543 new_loop->SetPreHeaderBlock(loop_->GetPreHeaderBlock());
544
545 // Add the new block into the cloned instructions.
546 cloning_result->cloned_bb_.push_back(std::move(new_exit_bb));
547
548 return new_loop;
549 }
550
CloneLoop(LoopCloningResult * cloning_result,const std::vector<BasicBlock * > & ordered_loop_blocks) const551 Loop* LoopUtils::CloneLoop(
552 LoopCloningResult* cloning_result,
553 const std::vector<BasicBlock*>& ordered_loop_blocks) const {
554 analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
555
556 std::unique_ptr<Loop> new_loop = MakeUnique<Loop>(context_);
557
558 CFG& cfg = *context_->cfg();
559
560 // Clone and place blocks in a SPIR-V compliant order (dominators first).
561 for (BasicBlock* old_bb : ordered_loop_blocks) {
562 // For each basic block in the loop, we clone it and register the mapping
563 // between old and new ids.
564 BasicBlock* new_bb = old_bb->Clone(context_);
565 new_bb->SetParent(&function_);
566 // TODO(1841): Handle id overflow.
567 new_bb->GetLabelInst()->SetResultId(context_->TakeNextId());
568 def_use_mgr->AnalyzeInstDef(new_bb->GetLabelInst());
569 context_->set_instr_block(new_bb->GetLabelInst(), new_bb);
570 cloning_result->cloned_bb_.emplace_back(new_bb);
571
572 cloning_result->old_to_new_bb_[old_bb->id()] = new_bb;
573 cloning_result->new_to_old_bb_[new_bb->id()] = old_bb;
574 cloning_result->value_map_[old_bb->id()] = new_bb->id();
575
576 if (loop_->IsInsideLoop(old_bb)) new_loop->AddBasicBlock(new_bb);
577
578 for (auto new_inst = new_bb->begin(), old_inst = old_bb->begin();
579 new_inst != new_bb->end(); ++new_inst, ++old_inst) {
580 cloning_result->ptr_map_[&*new_inst] = &*old_inst;
581 if (new_inst->HasResultId()) {
582 // TODO(1841): Handle id overflow.
583 new_inst->SetResultId(context_->TakeNextId());
584 cloning_result->value_map_[old_inst->result_id()] =
585 new_inst->result_id();
586
587 // Only look at the defs for now, uses are not updated yet.
588 def_use_mgr->AnalyzeInstDef(&*new_inst);
589 }
590 }
591 }
592
593 // All instructions (including all labels) have been cloned,
594 // remap instruction operands id with the new ones.
595 for (std::unique_ptr<BasicBlock>& bb_ref : cloning_result->cloned_bb_) {
596 BasicBlock* bb = bb_ref.get();
597
598 for (Instruction& insn : *bb) {
599 insn.ForEachInId([cloning_result](uint32_t* old_id) {
600 // If the operand is defined in the loop, remap the id.
601 auto id_it = cloning_result->value_map_.find(*old_id);
602 if (id_it != cloning_result->value_map_.end()) {
603 *old_id = id_it->second;
604 }
605 });
606 // Only look at what the instruction uses. All defs are register, so all
607 // should be fine now.
608 def_use_mgr->AnalyzeInstUse(&insn);
609 context_->set_instr_block(&insn, bb);
610 }
611 cfg.RegisterBlock(bb);
612 }
613
614 PopulateLoopNest(new_loop.get(), *cloning_result);
615
616 return new_loop.release();
617 }
618
PopulateLoopNest(Loop * new_loop,const LoopCloningResult & cloning_result) const619 void LoopUtils::PopulateLoopNest(
620 Loop* new_loop, const LoopCloningResult& cloning_result) const {
621 std::unordered_map<Loop*, Loop*> loop_mapping;
622 loop_mapping[loop_] = new_loop;
623
624 if (loop_->HasParent()) loop_->GetParent()->AddNestedLoop(new_loop);
625 PopulateLoopDesc(new_loop, loop_, cloning_result);
626
627 for (Loop& sub_loop :
628 make_range(++TreeDFIterator<Loop>(loop_), TreeDFIterator<Loop>())) {
629 Loop* cloned = new Loop(context_);
630 if (Loop* parent = loop_mapping[sub_loop.GetParent()])
631 parent->AddNestedLoop(cloned);
632 loop_mapping[&sub_loop] = cloned;
633 PopulateLoopDesc(cloned, &sub_loop, cloning_result);
634 }
635
636 loop_desc_->AddLoopNest(std::unique_ptr<Loop>(new_loop));
637 }
638
639 // Populates |new_loop| descriptor according to |old_loop|'s one.
PopulateLoopDesc(Loop * new_loop,Loop * old_loop,const LoopCloningResult & cloning_result) const640 void LoopUtils::PopulateLoopDesc(
641 Loop* new_loop, Loop* old_loop,
642 const LoopCloningResult& cloning_result) const {
643 for (uint32_t bb_id : old_loop->GetBlocks()) {
644 BasicBlock* bb = cloning_result.old_to_new_bb_.at(bb_id);
645 new_loop->AddBasicBlock(bb);
646 }
647 new_loop->SetHeaderBlock(
648 cloning_result.old_to_new_bb_.at(old_loop->GetHeaderBlock()->id()));
649 if (old_loop->GetLatchBlock())
650 new_loop->SetLatchBlock(
651 cloning_result.old_to_new_bb_.at(old_loop->GetLatchBlock()->id()));
652 if (old_loop->GetContinueBlock())
653 new_loop->SetContinueBlock(
654 cloning_result.old_to_new_bb_.at(old_loop->GetContinueBlock()->id()));
655 if (old_loop->GetMergeBlock()) {
656 auto it =
657 cloning_result.old_to_new_bb_.find(old_loop->GetMergeBlock()->id());
658 BasicBlock* bb = it != cloning_result.old_to_new_bb_.end()
659 ? it->second
660 : old_loop->GetMergeBlock();
661 new_loop->SetMergeBlock(bb);
662 }
663 if (old_loop->GetPreHeaderBlock()) {
664 auto it =
665 cloning_result.old_to_new_bb_.find(old_loop->GetPreHeaderBlock()->id());
666 if (it != cloning_result.old_to_new_bb_.end()) {
667 new_loop->SetPreHeaderBlock(it->second);
668 }
669 }
670 }
671
672 // Class to gather some metrics about a region of interest.
Analyze(const Loop & loop)673 void CodeMetrics::Analyze(const Loop& loop) {
674 CFG& cfg = *loop.GetContext()->cfg();
675
676 roi_size_ = 0;
677 block_sizes_.clear();
678
679 for (uint32_t id : loop.GetBlocks()) {
680 const BasicBlock* bb = cfg.block(id);
681 size_t bb_size = 0;
682 bb->ForEachInst([&bb_size](const Instruction* insn) {
683 if (insn->opcode() == SpvOpLabel) return;
684 if (insn->IsNop()) return;
685 if (insn->opcode() == SpvOpPhi) return;
686 bb_size++;
687 });
688 block_sizes_[bb->id()] = bb_size;
689 roi_size_ += bb_size;
690 }
691 }
692
693 } // namespace opt
694 } // namespace spvtools
695