1 //===-- Relooper.cpp - Top-level interface for WebAssembly ----*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===---------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This implements the Relooper algorithm. This implementation includes
12 /// optimizations added since the original academic paper [1] was published.
13 ///
14 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
15 /// Proceedings of the ACM international conference companion on Object
16 /// oriented programming systems languages and applications companion
17 /// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
18 /// http://doi.acm.org/10.1145/2048147.2048224
19 ///
20 //===-------------------------------------------------------------------===//
21
22 #include "Relooper.h"
23 #include "WebAssembly.h"
24
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/IR/CFG.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/Pass.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/raw_ostream.h"
32
33 #include <cstring>
34 #include <cstdlib>
35 #include <functional>
36 #include <list>
37 #include <stack>
38 #include <string>
39
40 #define DEBUG_TYPE "relooper"
41
42 using namespace llvm;
43 using namespace Relooper;
44
45 static cl::opt<int> RelooperSplittingFactor(
46 "relooper-splitting-factor",
47 cl::desc(
48 "How much to discount code size when deciding whether to split a node"),
49 cl::init(5));
50
51 static cl::opt<unsigned> RelooperMultipleSwitchThreshold(
52 "relooper-multiple-switch-threshold",
53 cl::desc(
54 "How many entries to allow in a multiple before we use a switch"),
55 cl::init(10));
56
57 static cl::opt<unsigned> RelooperNestingLimit(
58 "relooper-nesting-limit",
59 cl::desc(
60 "How much nesting is acceptable"),
61 cl::init(20));
62
63
64 namespace {
65 ///
66 /// Implements the relooper algorithm for a function's blocks.
67 ///
68 /// Implementation details: The Relooper instance has
69 /// ownership of the blocks and shapes, and frees them when done.
70 ///
71 struct RelooperAlgorithm {
72 std::deque<Block *> Blocks;
73 std::deque<Shape *> Shapes;
74 Shape *Root;
75 bool MinSize;
76 int BlockIdCounter;
77 int ShapeIdCounter;
78
79 RelooperAlgorithm();
80 ~RelooperAlgorithm();
81
82 void AddBlock(Block *New, int Id = -1);
83
84 // Calculates the shapes
85 void Calculate(Block *Entry);
86
87 // Sets us to try to minimize size
SetMinSize__anon38d310420111::RelooperAlgorithm88 void SetMinSize(bool MinSize_) { MinSize = MinSize_; }
89 };
90
91 struct RelooperAnalysis final : public FunctionPass {
92 static char ID;
RelooperAnalysis__anon38d310420111::RelooperAnalysis93 RelooperAnalysis() : FunctionPass(ID) {}
getPassName__anon38d310420111::RelooperAnalysis94 const char *getPassName() const override { return "relooper"; }
getAnalysisUsage__anon38d310420111::RelooperAnalysis95 void getAnalysisUsage(AnalysisUsage &AU) const override {
96 AU.setPreservesAll();
97 }
98 bool runOnFunction(Function &F) override;
99 };
100 }
101
102 // RelooperAnalysis
103
104 char RelooperAnalysis::ID = 0;
createWebAssemblyRelooper()105 FunctionPass *llvm::createWebAssemblyRelooper() {
106 return new RelooperAnalysis();
107 }
108
runOnFunction(Function & F)109 bool RelooperAnalysis::runOnFunction(Function &F) {
110 DEBUG(dbgs() << "Relooping function '" << F.getName() << "'\n");
111 RelooperAlgorithm R;
112 // FIXME: remove duplication between relooper's and LLVM's BBs.
113 std::map<const BasicBlock *, Block *> BB2B;
114 std::map<const Block *, const BasicBlock *> B2BB;
115 for (const BasicBlock &BB : F) {
116 // FIXME: getName is wrong here, Code is meant to represent amount of code.
117 // FIXME: use BranchVarInit for switch.
118 Block *B = new Block(BB.getName().str().data(), /*BranchVarInit=*/nullptr);
119 R.AddBlock(B);
120 assert(BB2B.find(&BB) == BB2B.end() && "Inserting the same block twice");
121 assert(B2BB.find(B) == B2BB.end() && "Inserting the same block twice");
122 BB2B[&BB] = B;
123 B2BB[B] = &BB;
124 }
125 for (Block *B : R.Blocks) {
126 const BasicBlock *BB = B2BB[B];
127 for (const BasicBlock *Successor : successors(BB))
128 // FIXME: add branch's Condition and Code below.
129 B->AddBranchTo(BB2B[Successor], /*Condition=*/nullptr, /*Code=*/nullptr);
130 }
131 R.Calculate(BB2B[&F.getEntryBlock()]);
132 return false; // Analysis passes don't modify anything.
133 }
134
135 // Helpers
136
137 typedef MapVector<Block *, BlockSet> BlockBlockSetMap;
138 typedef std::list<Block *> BlockList;
139
140 template <class T, class U>
contains(const T & container,const U & contained)141 static bool contains(const T &container, const U &contained) {
142 return container.count(contained);
143 }
144
145
146 // Branch
147
Branch(const char * ConditionInit,const char * CodeInit)148 Branch::Branch(const char *ConditionInit, const char *CodeInit)
149 : Ancestor(nullptr), Labeled(true) {
150 // FIXME: move from char* to LLVM data structures
151 Condition = ConditionInit ? strdup(ConditionInit) : nullptr;
152 Code = CodeInit ? strdup(CodeInit) : nullptr;
153 }
154
~Branch()155 Branch::~Branch() {
156 // FIXME: move from char* to LLVM data structures
157 free(static_cast<void *>(const_cast<char *>(Condition)));
158 free(static_cast<void *>(const_cast<char *>(Code)));
159 }
160
161 // Block
162
Block(const char * CodeInit,const char * BranchVarInit)163 Block::Block(const char *CodeInit, const char *BranchVarInit)
164 : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) {
165 // FIXME: move from char* to LLVM data structures
166 Code = strdup(CodeInit);
167 BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr;
168 }
169
~Block()170 Block::~Block() {
171 // FIXME: move from char* to LLVM data structures
172 free(static_cast<void *>(const_cast<char *>(Code)));
173 free(static_cast<void *>(const_cast<char *>(BranchVar)));
174 }
175
AddBranchTo(Block * Target,const char * Condition,const char * Code)176 void Block::AddBranchTo(Block *Target, const char *Condition,
177 const char *Code) {
178 assert(!contains(BranchesOut, Target) &&
179 "cannot add more than one branch to the same target");
180 BranchesOut[Target] = make_unique<Branch>(Condition, Code);
181 }
182
183 // Relooper
184
RelooperAlgorithm()185 RelooperAlgorithm::RelooperAlgorithm()
186 : Root(nullptr), MinSize(false), BlockIdCounter(1),
187 ShapeIdCounter(0) { // block ID 0 is reserved for clearings
188 }
189
~RelooperAlgorithm()190 RelooperAlgorithm::~RelooperAlgorithm() {
191 for (auto Curr : Blocks)
192 delete Curr;
193 for (auto Curr : Shapes)
194 delete Curr;
195 }
196
AddBlock(Block * New,int Id)197 void RelooperAlgorithm::AddBlock(Block *New, int Id) {
198 New->Id = Id == -1 ? BlockIdCounter++ : Id;
199 Blocks.push_back(New);
200 }
201
202 struct RelooperRecursor {
203 RelooperAlgorithm *Parent;
RelooperRecursorRelooperRecursor204 RelooperRecursor(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
205 };
206
Calculate(Block * Entry)207 void RelooperAlgorithm::Calculate(Block *Entry) {
208 // Scan and optimize the input
209 struct PreOptimizer : public RelooperRecursor {
210 PreOptimizer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
211 BlockSet Live;
212
213 void FindLive(Block *Root) {
214 BlockList ToInvestigate;
215 ToInvestigate.push_back(Root);
216 while (!ToInvestigate.empty()) {
217 Block *Curr = ToInvestigate.front();
218 ToInvestigate.pop_front();
219 if (contains(Live, Curr))
220 continue;
221 Live.insert(Curr);
222 for (const auto &iter : Curr->BranchesOut)
223 ToInvestigate.push_back(iter.first);
224 }
225 }
226
227 // If a block has multiple entries but no exits, and it is small enough, it
228 // is useful to split it. A common example is a C++ function where
229 // everything ends up at a final exit block and does some RAII cleanup.
230 // Without splitting, we will be forced to introduce labelled loops to
231 // allow reaching the final block
232 void SplitDeadEnds() {
233 unsigned TotalCodeSize = 0;
234 for (const auto &Curr : Live) {
235 TotalCodeSize += strlen(Curr->Code);
236 }
237 BlockSet Splits;
238 BlockSet Removed;
239 for (const auto &Original : Live) {
240 if (Original->BranchesIn.size() <= 1 ||
241 !Original->BranchesOut.empty())
242 continue; // only dead ends, for now
243 if (contains(Original->BranchesOut, Original))
244 continue; // cannot split a looping node
245 if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) >
246 TotalCodeSize / RelooperSplittingFactor)
247 continue; // if splitting increases raw code size by a significant
248 // amount, abort
249 // Split the node (for simplicity, we replace all the blocks, even
250 // though we could have reused the original)
251 DEBUG(dbgs() << " Splitting '" << Original->Code << "'\n");
252 for (const auto &Prior : Original->BranchesIn) {
253 Block *Split = new Block(Original->Code, Original->BranchVar);
254 Parent->AddBlock(Split, Original->Id);
255 Split->BranchesIn.insert(Prior);
256 std::unique_ptr<Branch> Details;
257 Details.swap(Prior->BranchesOut[Original]);
258 Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition,
259 Details->Code);
260 for (const auto &iter : Original->BranchesOut) {
261 Block *Post = iter.first;
262 Branch *Details = iter.second.get();
263 Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition,
264 Details->Code);
265 Post->BranchesIn.insert(Split);
266 }
267 Splits.insert(Split);
268 Removed.insert(Original);
269 }
270 for (const auto &iter : Original->BranchesOut) {
271 Block *Post = iter.first;
272 Post->BranchesIn.remove(Original);
273 }
274 }
275 for (const auto &iter : Splits)
276 Live.insert(iter);
277 for (const auto &iter : Removed)
278 Live.remove(iter);
279 }
280 };
281 PreOptimizer Pre(this);
282 Pre.FindLive(Entry);
283
284 // Add incoming branches from live blocks, ignoring dead code
285 for (unsigned i = 0; i < Blocks.size(); i++) {
286 Block *Curr = Blocks[i];
287 if (!contains(Pre.Live, Curr))
288 continue;
289 for (const auto &iter : Curr->BranchesOut)
290 iter.first->BranchesIn.insert(Curr);
291 }
292
293 if (!MinSize)
294 Pre.SplitDeadEnds();
295
296 // Recursively process the graph
297
298 struct Analyzer : public RelooperRecursor {
299 Analyzer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
300
301 // Add a shape to the list of shapes in this Relooper calculation
302 void Notice(Shape *New) {
303 New->Id = Parent->ShapeIdCounter++;
304 Parent->Shapes.push_back(New);
305 }
306
307 // Create a list of entries from a block. If LimitTo is provided, only
308 // results in that set will appear
309 void GetBlocksOut(Block *Source, BlockSet &Entries,
310 BlockSet *LimitTo = nullptr) {
311 for (const auto &iter : Source->BranchesOut)
312 if (!LimitTo || contains(*LimitTo, iter.first))
313 Entries.insert(iter.first);
314 }
315
316 // Converts/processes all branchings to a specific target
317 void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor,
318 BlockSet &From) {
319 DEBUG(dbgs() << " Solipsize '" << Target->Code << "' type " << Type
320 << "\n");
321 for (auto iter = Target->BranchesIn.begin();
322 iter != Target->BranchesIn.end();) {
323 Block *Prior = *iter;
324 if (!contains(From, Prior)) {
325 iter++;
326 continue;
327 }
328 std::unique_ptr<Branch> PriorOut;
329 PriorOut.swap(Prior->BranchesOut[Target]);
330 PriorOut->Ancestor = Ancestor;
331 PriorOut->Type = Type;
332 if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor))
333 Multiple->Breaks++; // We are breaking out of this Multiple, so need a
334 // loop
335 iter++; // carefully increment iter before erasing
336 Target->BranchesIn.remove(Prior);
337 Target->ProcessedBranchesIn.insert(Prior);
338 Prior->ProcessedBranchesOut[Target].swap(PriorOut);
339 }
340 }
341
342 Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) {
343 DEBUG(dbgs() << " MakeSimple inner block '" << Inner->Code << "'\n");
344 SimpleShape *Simple = new SimpleShape;
345 Notice(Simple);
346 Simple->Inner = Inner;
347 Inner->Parent = Simple;
348 if (Blocks.size() > 1) {
349 Blocks.remove(Inner);
350 GetBlocksOut(Inner, NextEntries, &Blocks);
351 BlockSet JustInner;
352 JustInner.insert(Inner);
353 for (const auto &iter : NextEntries)
354 Solipsize(iter, Branch::Direct, Simple, JustInner);
355 }
356 return Simple;
357 }
358
359 Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries,
360 BlockSet &NextEntries) {
361 // Find the inner blocks in this loop. Proceed backwards from the entries
362 // until
363 // you reach a seen block, collecting as you go.
364 BlockSet InnerBlocks;
365 BlockSet Queue = Entries;
366 while (!Queue.empty()) {
367 Block *Curr = *(Queue.begin());
368 Queue.remove(*Queue.begin());
369 if (!contains(InnerBlocks, Curr)) {
370 // This element is new, mark it as inner and remove from outer
371 InnerBlocks.insert(Curr);
372 Blocks.remove(Curr);
373 // Add the elements prior to it
374 for (const auto &iter : Curr->BranchesIn)
375 Queue.insert(iter);
376 }
377 }
378 assert(!InnerBlocks.empty());
379
380 for (const auto &Curr : InnerBlocks) {
381 for (const auto &iter : Curr->BranchesOut) {
382 Block *Possible = iter.first;
383 if (!contains(InnerBlocks, Possible))
384 NextEntries.insert(Possible);
385 }
386 }
387
388 LoopShape *Loop = new LoopShape();
389 Notice(Loop);
390
391 // Solipsize the loop, replacing with break/continue and marking branches
392 // as Processed (will not affect later calculations)
393 // A. Branches to the loop entries become a continue to this shape
394 for (const auto &iter : Entries)
395 Solipsize(iter, Branch::Continue, Loop, InnerBlocks);
396 // B. Branches to outside the loop (a next entry) become breaks on this
397 // shape
398 for (const auto &iter : NextEntries)
399 Solipsize(iter, Branch::Break, Loop, InnerBlocks);
400 // Finish up
401 Shape *Inner = Process(InnerBlocks, Entries, nullptr);
402 Loop->Inner = Inner;
403 return Loop;
404 }
405
406 // For each entry, find the independent group reachable by it. The
407 // independent group is the entry itself, plus all the blocks it can
408 // reach that cannot be directly reached by another entry. Note that we
409 // ignore directly reaching the entry itself by another entry.
410 // @param Ignore - previous blocks that are irrelevant
411 void FindIndependentGroups(BlockSet &Entries,
412 BlockBlockSetMap &IndependentGroups,
413 BlockSet *Ignore = nullptr) {
414 typedef std::map<Block *, Block *> BlockBlockMap;
415
416 struct HelperClass {
417 BlockBlockSetMap &IndependentGroups;
418 BlockBlockMap Ownership; // For each block, which entry it belongs to.
419 // We have reached it from there.
420
421 HelperClass(BlockBlockSetMap &IndependentGroupsInit)
422 : IndependentGroups(IndependentGroupsInit) {}
423 void InvalidateWithChildren(Block *New) {
424 // Being in the list means you need to be invalidated
425 BlockList ToInvalidate;
426 ToInvalidate.push_back(New);
427 while (!ToInvalidate.empty()) {
428 Block *Invalidatee = ToInvalidate.front();
429 ToInvalidate.pop_front();
430 Block *Owner = Ownership[Invalidatee];
431 // Owner may have been invalidated, do not add to
432 // IndependentGroups!
433 if (contains(IndependentGroups, Owner))
434 IndependentGroups[Owner].remove(Invalidatee);
435 if (Ownership[Invalidatee]) { // may have been seen before and
436 // invalidated already
437 Ownership[Invalidatee] = nullptr;
438 for (const auto &iter : Invalidatee->BranchesOut) {
439 Block *Target = iter.first;
440 BlockBlockMap::iterator Known = Ownership.find(Target);
441 if (Known != Ownership.end()) {
442 Block *TargetOwner = Known->second;
443 if (TargetOwner)
444 ToInvalidate.push_back(Target);
445 }
446 }
447 }
448 }
449 }
450 };
451 HelperClass Helper(IndependentGroups);
452
453 // We flow out from each of the entries, simultaneously.
454 // When we reach a new block, we add it as belonging to the one we got to
455 // it from.
456 // If we reach a new block that is already marked as belonging to someone,
457 // it is reachable by two entries and is not valid for any of them.
458 // Remove it and all it can reach that have been visited.
459
460 // Being in the queue means we just added this item, and
461 // we need to add its children
462 BlockList Queue;
463 for (const auto &Entry : Entries) {
464 Helper.Ownership[Entry] = Entry;
465 IndependentGroups[Entry].insert(Entry);
466 Queue.push_back(Entry);
467 }
468 while (!Queue.empty()) {
469 Block *Curr = Queue.front();
470 Queue.pop_front();
471 Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership
472 // map if we are in the queue
473 if (!Owner)
474 continue; // we have been invalidated meanwhile after being reached
475 // from two entries
476 // Add all children
477 for (const auto &iter : Curr->BranchesOut) {
478 Block *New = iter.first;
479 BlockBlockMap::iterator Known = Helper.Ownership.find(New);
480 if (Known == Helper.Ownership.end()) {
481 // New node. Add it, and put it in the queue
482 Helper.Ownership[New] = Owner;
483 IndependentGroups[Owner].insert(New);
484 Queue.push_back(New);
485 continue;
486 }
487 Block *NewOwner = Known->second;
488 if (!NewOwner)
489 continue; // We reached an invalidated node
490 if (NewOwner != Owner)
491 // Invalidate this and all reachable that we have seen - we reached
492 // this from two locations
493 Helper.InvalidateWithChildren(New);
494 // otherwise, we have the same owner, so do nothing
495 }
496 }
497
498 // Having processed all the interesting blocks, we remain with just one
499 // potential issue:
500 // If a->b, and a was invalidated, but then b was later reached by
501 // someone else, we must invalidate b. To check for this, we go over all
502 // elements in the independent groups, if an element has a parent which
503 // does *not* have the same owner, we/ must remove it and all its
504 // children.
505
506 for (const auto &iter : Entries) {
507 BlockSet &CurrGroup = IndependentGroups[iter];
508 BlockList ToInvalidate;
509 for (const auto &iter : CurrGroup) {
510 Block *Child = iter;
511 for (const auto &iter : Child->BranchesIn) {
512 Block *Parent = iter;
513 if (Ignore && contains(*Ignore, Parent))
514 continue;
515 if (Helper.Ownership[Parent] != Helper.Ownership[Child])
516 ToInvalidate.push_back(Child);
517 }
518 }
519 while (!ToInvalidate.empty()) {
520 Block *Invalidatee = ToInvalidate.front();
521 ToInvalidate.pop_front();
522 Helper.InvalidateWithChildren(Invalidatee);
523 }
524 }
525
526 // Remove empty groups
527 for (const auto &iter : Entries)
528 if (IndependentGroups[iter].empty())
529 IndependentGroups.erase(iter);
530 }
531
532 Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries,
533 BlockBlockSetMap &IndependentGroups, Shape *Prev,
534 BlockSet &NextEntries) {
535 bool Fused = isa<SimpleShape>(Prev);
536 MultipleShape *Multiple = new MultipleShape();
537 Notice(Multiple);
538 BlockSet CurrEntries;
539 for (auto &iter : IndependentGroups) {
540 Block *CurrEntry = iter.first;
541 BlockSet &CurrBlocks = iter.second;
542 // Create inner block
543 CurrEntries.clear();
544 CurrEntries.insert(CurrEntry);
545 for (const auto &CurrInner : CurrBlocks) {
546 // Remove the block from the remaining blocks
547 Blocks.remove(CurrInner);
548 // Find new next entries and fix branches to them
549 for (auto iter = CurrInner->BranchesOut.begin();
550 iter != CurrInner->BranchesOut.end();) {
551 Block *CurrTarget = iter->first;
552 auto Next = iter;
553 Next++;
554 if (!contains(CurrBlocks, CurrTarget)) {
555 NextEntries.insert(CurrTarget);
556 Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks);
557 }
558 iter = Next; // increment carefully because Solipsize can remove us
559 }
560 }
561 Multiple->InnerMap[CurrEntry->Id] =
562 Process(CurrBlocks, CurrEntries, nullptr);
563 // If we are not fused, then our entries will actually be checked
564 if (!Fused)
565 CurrEntry->IsCheckedMultipleEntry = true;
566 }
567 // Add entries not handled as next entries, they are deferred
568 for (const auto &Entry : Entries)
569 if (!contains(IndependentGroups, Entry))
570 NextEntries.insert(Entry);
571 // The multiple has been created, we can decide how to implement it
572 if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) {
573 Multiple->UseSwitch = true;
574 Multiple->Breaks++; // switch captures breaks
575 }
576 return Multiple;
577 }
578
579 // Main function.
580 // Process a set of blocks with specified entries, returns a shape
581 // The Make* functions receive a NextEntries. If they fill it with data,
582 // those are the entries for the ->Next block on them, and the blocks
583 // are what remains in Blocks (which Make* modify). In this way
584 // we avoid recursing on Next (imagine a long chain of Simples, if we
585 // recursed we could blow the stack).
586 Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) {
587 BlockSet *Entries = &InitialEntries;
588 BlockSet TempEntries[2];
589 int CurrTempIndex = 0;
590 BlockSet *NextEntries;
591 Shape *Ret = nullptr;
592
593 auto Make = [&](Shape *Temp) {
594 if (Prev)
595 Prev->Next = Temp;
596 if (!Ret)
597 Ret = Temp;
598 Prev = Temp;
599 Entries = NextEntries;
600 };
601
602 while (1) {
603 CurrTempIndex = 1 - CurrTempIndex;
604 NextEntries = &TempEntries[CurrTempIndex];
605 NextEntries->clear();
606
607 if (Entries->empty())
608 return Ret;
609 if (Entries->size() == 1) {
610 Block *Curr = *(Entries->begin());
611 if (Curr->BranchesIn.empty()) {
612 // One entry, no looping ==> Simple
613 Make(MakeSimple(Blocks, Curr, *NextEntries));
614 if (NextEntries->empty())
615 return Ret;
616 continue;
617 }
618 // One entry, looping ==> Loop
619 Make(MakeLoop(Blocks, *Entries, *NextEntries));
620 if (NextEntries->empty())
621 return Ret;
622 continue;
623 }
624
625 // More than one entry, try to eliminate through a Multiple groups of
626 // independent blocks from an entry/ies. It is important to remove
627 // through multiples as opposed to looping since the former is more
628 // performant.
629 BlockBlockSetMap IndependentGroups;
630 FindIndependentGroups(*Entries, IndependentGroups);
631
632 if (!IndependentGroups.empty()) {
633 // We can handle a group in a multiple if its entry cannot be reached
634 // by another group.
635 // Note that it might be reachable by itself - a loop. But that is
636 // fine, we will create a loop inside the multiple block (which
637 // is the performant order to do it).
638 for (auto iter = IndependentGroups.begin();
639 iter != IndependentGroups.end();) {
640 Block *Entry = iter->first;
641 BlockSet &Group = iter->second;
642 auto curr = iter++; // iterate carefully, we may delete
643 for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin();
644 iterBranch != Entry->BranchesIn.end(); iterBranch++) {
645 Block *Origin = *iterBranch;
646 if (!contains(Group, Origin)) {
647 // Reached from outside the group, so we cannot handle this
648 IndependentGroups.erase(curr);
649 break;
650 }
651 }
652 }
653
654 // As an optimization, if we have 2 independent groups, and one is a
655 // small dead end, we can handle only that dead end.
656 // The other then becomes a Next - without nesting in the code and
657 // recursion in the analysis.
658 // TODO: if the larger is the only dead end, handle that too
659 // TODO: handle >2 groups
660 // TODO: handle not just dead ends, but also that do not branch to the
661 // NextEntries. However, must be careful there since we create a
662 // Next, and that Next can prevent eliminating a break (since we no
663 // longer naturally reach the same place), which may necessitate a
664 // one-time loop, which makes the unnesting pointless.
665 if (IndependentGroups.size() == 2) {
666 // Find the smaller one
667 auto iter = IndependentGroups.begin();
668 Block *SmallEntry = iter->first;
669 auto SmallSize = iter->second.size();
670 iter++;
671 Block *LargeEntry = iter->first;
672 auto LargeSize = iter->second.size();
673 if (SmallSize != LargeSize) { // ignore the case where they are
674 // identical - keep things symmetrical
675 // there
676 if (SmallSize > LargeSize) {
677 Block *Temp = SmallEntry;
678 SmallEntry = LargeEntry;
679 LargeEntry = Temp; // Note: we did not flip the Sizes too, they
680 // are now invalid. TODO: use the smaller
681 // size as a limit?
682 }
683 // Check if dead end
684 bool DeadEnd = true;
685 BlockSet &SmallGroup = IndependentGroups[SmallEntry];
686 for (const auto &Curr : SmallGroup) {
687 for (const auto &iter : Curr->BranchesOut) {
688 Block *Target = iter.first;
689 if (!contains(SmallGroup, Target)) {
690 DeadEnd = false;
691 break;
692 }
693 }
694 if (!DeadEnd)
695 break;
696 }
697 if (DeadEnd)
698 IndependentGroups.erase(LargeEntry);
699 }
700 }
701
702 if (!IndependentGroups.empty())
703 // Some groups removable ==> Multiple
704 Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev,
705 *NextEntries));
706 if (NextEntries->empty())
707 return Ret;
708 continue;
709 }
710 // No independent groups, must be loopable ==> Loop
711 Make(MakeLoop(Blocks, *Entries, *NextEntries));
712 if (NextEntries->empty())
713 return Ret;
714 continue;
715 }
716 }
717 };
718
719 // Main
720
721 BlockSet AllBlocks;
722 for (const auto &Curr : Pre.Live) {
723 AllBlocks.insert(Curr);
724 }
725
726 BlockSet Entries;
727 Entries.insert(Entry);
728 Root = Analyzer(this).Process(AllBlocks, Entries, nullptr);
729 assert(Root);
730
731 ///
732 /// Relooper post-optimizer
733 ///
734 struct PostOptimizer {
735 RelooperAlgorithm *Parent;
736 std::stack<Shape *> LoopStack;
737
738 PostOptimizer(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
739
740 void ShapeSwitch(Shape* var,
741 std::function<void (SimpleShape*)> simple,
742 std::function<void (MultipleShape*)> multiple,
743 std::function<void (LoopShape*)> loop) {
744 switch (var->getKind()) {
745 case Shape::SK_Simple: {
746 simple(cast<SimpleShape>(var));
747 break;
748 }
749 case Shape::SK_Multiple: {
750 multiple(cast<MultipleShape>(var));
751 break;
752 }
753 case Shape::SK_Loop: {
754 loop(cast<LoopShape>(var));
755 break;
756 }
757 }
758 }
759
760 // Find the blocks that natural control flow can get us directly to, or
761 // through a multiple that we ignore
762 void FollowNaturalFlow(Shape *S, BlockSet &Out) {
763 ShapeSwitch(S, [&](SimpleShape* Simple) {
764 Out.insert(Simple->Inner);
765 }, [&](MultipleShape* Multiple) {
766 for (const auto &iter : Multiple->InnerMap) {
767 FollowNaturalFlow(iter.second, Out);
768 }
769 FollowNaturalFlow(Multiple->Next, Out);
770 }, [&](LoopShape* Loop) {
771 FollowNaturalFlow(Loop->Inner, Out);
772 });
773 }
774
775 void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) {
776 if (Root->Next) {
777 Root->Natural = Root->Next;
778 FindNaturals(Root->Next, Otherwise);
779 } else {
780 Root->Natural = Otherwise;
781 }
782
783 ShapeSwitch(Root, [](SimpleShape* Simple) {
784 }, [&](MultipleShape* Multiple) {
785 for (const auto &iter : Multiple->InnerMap) {
786 FindNaturals(iter.second, Root->Natural);
787 }
788 }, [&](LoopShape* Loop){
789 FindNaturals(Loop->Inner, Loop->Inner);
790 });
791 }
792
793 // Remove unneeded breaks and continues.
794 // A flow operation is trivially unneeded if the shape we naturally get to
795 // by normal code execution is the same as the flow forces us to.
796 void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr,
797 LoopShape *LastLoop = nullptr,
798 unsigned Depth = 0) {
799 BlockSet NaturalBlocks;
800 FollowNaturalFlow(Natural, NaturalBlocks);
801 Shape *Next = Root;
802 while (Next) {
803 Root = Next;
804 Next = nullptr;
805 ShapeSwitch(
806 Root,
807 [&](SimpleShape* Simple) {
808 if (Simple->Inner->BranchVar)
809 LastLoop =
810 nullptr; // a switch clears out the loop (TODO: only for
811 // breaks, not continue)
812
813 if (Simple->Next) {
814 if (!Simple->Inner->BranchVar &&
815 Simple->Inner->ProcessedBranchesOut.size() == 2 &&
816 Depth < RelooperNestingLimit) {
817 // If there is a next block, we already know at Simple
818 // creation time to make direct branches, and we can do
819 // nothing more in general. But, we try to optimize the
820 // case of a break and a direct: This would normally be
821 // if (break?) { break; } ..
822 // but if we make sure to nest the else, we can save the
823 // break,
824 // if (!break?) { .. }
825 // This is also better because the more canonical nested
826 // form is easier to further optimize later. The
827 // downside is more nesting, which adds to size in builds with
828 // whitespace.
829 // Note that we avoid switches, as it complicates control flow
830 // and is not relevant for the common case we optimize here.
831 bool Found = false;
832 bool Abort = false;
833 for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
834 Block *Target = iter.first;
835 Branch *Details = iter.second.get();
836 if (Details->Type == Branch::Break) {
837 Found = true;
838 if (!contains(NaturalBlocks, Target))
839 Abort = true;
840 } else if (Details->Type != Branch::Direct)
841 Abort = true;
842 }
843 if (Found && !Abort) {
844 for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
845 Branch *Details = iter.second.get();
846 if (Details->Type == Branch::Break) {
847 Details->Type = Branch::Direct;
848 if (MultipleShape *Multiple =
849 dyn_cast<MultipleShape>(Details->Ancestor))
850 Multiple->Breaks--;
851 } else {
852 assert(Details->Type == Branch::Direct);
853 Details->Type = Branch::Nested;
854 }
855 }
856 }
857 Depth++; // this optimization increases depth, for us and all
858 // our next chain (i.e., until this call returns)
859 }
860 Next = Simple->Next;
861 } else {
862 // If there is no next then Natural is where we will
863 // go to by doing nothing, so we can potentially optimize some
864 // branches to direct.
865 for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
866 Block *Target = iter.first;
867 Branch *Details = iter.second.get();
868 if (Details->Type != Branch::Direct &&
869 contains(NaturalBlocks,
870 Target)) { // note: cannot handle split blocks
871 Details->Type = Branch::Direct;
872 if (MultipleShape *Multiple =
873 dyn_cast<MultipleShape>(Details->Ancestor))
874 Multiple->Breaks--;
875 } else if (Details->Type == Branch::Break && LastLoop &&
876 LastLoop->Natural == Details->Ancestor->Natural) {
877 // it is important to simplify breaks, as simpler breaks
878 // enable other optimizations
879 Details->Labeled = false;
880 if (MultipleShape *Multiple =
881 dyn_cast<MultipleShape>(Details->Ancestor))
882 Multiple->Breaks--;
883 }
884 }
885 }
886 }, [&](MultipleShape* Multiple)
887 {
888 for (const auto &iter : Multiple->InnerMap) {
889 RemoveUnneededFlows(iter.second, Multiple->Next,
890 Multiple->Breaks ? nullptr : LastLoop,
891 Depth + 1);
892 }
893 Next = Multiple->Next;
894 }, [&](LoopShape* Loop)
895 {
896 RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1);
897 Next = Loop->Next;
898 });
899 }
900 }
901
902 // After we know which loops exist, we can calculate which need to be
903 // labeled
904 void FindLabeledLoops(Shape *Root) {
905 Shape *Next = Root;
906 while (Next) {
907 Root = Next;
908 Next = nullptr;
909
910 ShapeSwitch(
911 Root,
912 [&](SimpleShape *Simple) {
913 MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next);
914 // If we are fusing a Multiple with a loop into this Simple, then
915 // visit it now
916 if (Fused && Fused->Breaks)
917 LoopStack.push(Fused);
918 if (Simple->Inner->BranchVar)
919 LoopStack.push(nullptr); // a switch means breaks are now useless,
920 // push a dummy
921 if (Fused) {
922 if (Fused->UseSwitch)
923 LoopStack.push(nullptr); // a switch means breaks are now
924 // useless, push a dummy
925 for (const auto &iter : Fused->InnerMap) {
926 FindLabeledLoops(iter.second);
927 }
928 }
929 for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
930 Branch *Details = iter.second.get();
931 if (Details->Type == Branch::Break ||
932 Details->Type == Branch::Continue) {
933 assert(!LoopStack.empty());
934 if (Details->Ancestor != LoopStack.top() && Details->Labeled) {
935 if (MultipleShape *Multiple =
936 dyn_cast<MultipleShape>(Details->Ancestor)) {
937 Multiple->Labeled = true;
938 } else {
939 LoopShape *Loop = cast<LoopShape>(Details->Ancestor);
940 Loop->Labeled = true;
941 }
942 } else {
943 Details->Labeled = false;
944 }
945 }
946 if (Fused && Fused->UseSwitch)
947 LoopStack.pop();
948 if (Simple->Inner->BranchVar)
949 LoopStack.pop();
950 if (Fused && Fused->Breaks)
951 LoopStack.pop();
952 if (Fused)
953 Next = Fused->Next;
954 else
955 Next = Root->Next;
956 }
957 }
958 , [&](MultipleShape* Multiple) {
959 if (Multiple->Breaks)
960 LoopStack.push(Multiple);
961 for (const auto &iter : Multiple->InnerMap)
962 FindLabeledLoops(iter.second);
963 if (Multiple->Breaks)
964 LoopStack.pop();
965 Next = Root->Next;
966 }
967 , [&](LoopShape* Loop) {
968 LoopStack.push(Loop);
969 FindLabeledLoops(Loop->Inner);
970 LoopStack.pop();
971 Next = Root->Next;
972 });
973 }
974 }
975
976 void Process(Shape * Root) {
977 FindNaturals(Root);
978 RemoveUnneededFlows(Root);
979 FindLabeledLoops(Root);
980 }
981 };
982
983 PostOptimizer(this).Process(Root);
984 }
985