• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- CFGMST.h - Minimum Spanning Tree for CFG ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a Union-find algorithm to compute Minimum Spanning Tree
10 // for a given CFG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
15 #define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
16 
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/Analysis/BlockFrequencyInfo.h"
20 #include "llvm/Analysis/BranchProbabilityInfo.h"
21 #include "llvm/Analysis/CFG.h"
22 #include "llvm/Support/BranchProbability.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
26 #include <utility>
27 #include <vector>
28 
29 #define DEBUG_TYPE "cfgmst"
30 
31 namespace llvm {
32 
33 /// An union-find based Minimum Spanning Tree for CFG
34 ///
35 /// Implements a Union-find algorithm to compute Minimum Spanning Tree
36 /// for a given CFG.
37 template <class Edge, class BBInfo> class CFGMST {
38 public:
39   Function &F;
40 
41   // Store all the edges in CFG. It may contain some stale edges
42   // when Removed is set.
43   std::vector<std::unique_ptr<Edge>> AllEdges;
44 
45   // This map records the auxiliary information for each BB.
46   DenseMap<const BasicBlock *, std::unique_ptr<BBInfo>> BBInfos;
47 
48   // Whehter the function has an exit block with no successors.
49   // (For function with an infinite loop, this block may be absent)
50   bool ExitBlockFound = false;
51 
52   // Find the root group of the G and compress the path from G to the root.
findAndCompressGroup(BBInfo * G)53   BBInfo *findAndCompressGroup(BBInfo *G) {
54     if (G->Group != G)
55       G->Group = findAndCompressGroup(static_cast<BBInfo *>(G->Group));
56     return static_cast<BBInfo *>(G->Group);
57   }
58 
59   // Union BB1 and BB2 into the same group and return true.
60   // Returns false if BB1 and BB2 are already in the same group.
unionGroups(const BasicBlock * BB1,const BasicBlock * BB2)61   bool unionGroups(const BasicBlock *BB1, const BasicBlock *BB2) {
62     BBInfo *BB1G = findAndCompressGroup(&getBBInfo(BB1));
63     BBInfo *BB2G = findAndCompressGroup(&getBBInfo(BB2));
64 
65     if (BB1G == BB2G)
66       return false;
67 
68     // Make the smaller rank tree a direct child or the root of high rank tree.
69     if (BB1G->Rank < BB2G->Rank)
70       BB1G->Group = BB2G;
71     else {
72       BB2G->Group = BB1G;
73       // If the ranks are the same, increment root of one tree by one.
74       if (BB1G->Rank == BB2G->Rank)
75         BB1G->Rank++;
76     }
77     return true;
78   }
79 
80   // Give BB, return the auxiliary information.
getBBInfo(const BasicBlock * BB)81   BBInfo &getBBInfo(const BasicBlock *BB) const {
82     auto It = BBInfos.find(BB);
83     assert(It->second.get() != nullptr);
84     return *It->second.get();
85   }
86 
87   // Give BB, return the auxiliary information if it's available.
findBBInfo(const BasicBlock * BB)88   BBInfo *findBBInfo(const BasicBlock *BB) const {
89     auto It = BBInfos.find(BB);
90     if (It == BBInfos.end())
91       return nullptr;
92     return It->second.get();
93   }
94 
95   // Traverse the CFG using a stack. Find all the edges and assign the weight.
96   // Edges with large weight will be put into MST first so they are less likely
97   // to be instrumented.
buildEdges()98   void buildEdges() {
99     LLVM_DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n");
100 
101     const BasicBlock *Entry = &(F.getEntryBlock());
102     uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2);
103     Edge *EntryIncoming = nullptr, *EntryOutgoing = nullptr,
104         *ExitOutgoing = nullptr, *ExitIncoming = nullptr;
105     uint64_t MaxEntryOutWeight = 0, MaxExitOutWeight = 0, MaxExitInWeight = 0;
106 
107     // Add a fake edge to the entry.
108     EntryIncoming = &addEdge(nullptr, Entry, EntryWeight);
109     LLVM_DEBUG(dbgs() << "  Edge: from fake node to " << Entry->getName()
110                       << " w = " << EntryWeight << "\n");
111 
112     // Special handling for single BB functions.
113     if (succ_empty(Entry)) {
114       addEdge(Entry, nullptr, EntryWeight);
115       return;
116     }
117 
118     static const uint32_t CriticalEdgeMultiplier = 1000;
119 
120     for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
121       Instruction *TI = BB->getTerminator();
122       uint64_t BBWeight =
123           (BFI != nullptr ? BFI->getBlockFreq(&*BB).getFrequency() : 2);
124       uint64_t Weight = 2;
125       if (int successors = TI->getNumSuccessors()) {
126         for (int i = 0; i != successors; ++i) {
127           BasicBlock *TargetBB = TI->getSuccessor(i);
128           bool Critical = isCriticalEdge(TI, i);
129           uint64_t scaleFactor = BBWeight;
130           if (Critical) {
131             if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
132               scaleFactor *= CriticalEdgeMultiplier;
133             else
134               scaleFactor = UINT64_MAX;
135           }
136           if (BPI != nullptr)
137             Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor);
138           auto *E = &addEdge(&*BB, TargetBB, Weight);
139           E->IsCritical = Critical;
140           LLVM_DEBUG(dbgs() << "  Edge: from " << BB->getName() << " to "
141                             << TargetBB->getName() << "  w=" << Weight << "\n");
142 
143           // Keep track of entry/exit edges:
144           if (&*BB == Entry) {
145             if (Weight > MaxEntryOutWeight) {
146               MaxEntryOutWeight = Weight;
147               EntryOutgoing = E;
148             }
149           }
150 
151           auto *TargetTI = TargetBB->getTerminator();
152           if (TargetTI && !TargetTI->getNumSuccessors()) {
153             if (Weight > MaxExitInWeight) {
154               MaxExitInWeight = Weight;
155               ExitIncoming = E;
156             }
157           }
158         }
159       } else {
160         ExitBlockFound = true;
161         Edge *ExitO = &addEdge(&*BB, nullptr, BBWeight);
162         if (BBWeight > MaxExitOutWeight) {
163           MaxExitOutWeight = BBWeight;
164           ExitOutgoing = ExitO;
165         }
166         LLVM_DEBUG(dbgs() << "  Edge: from " << BB->getName() << " to fake exit"
167                           << " w = " << BBWeight << "\n");
168       }
169     }
170 
171     // Entry/exit edge adjustment heurisitic:
172     // prefer instrumenting entry edge over exit edge
173     // if possible. Those exit edges may never have a chance to be
174     // executed (for instance the program is an event handling loop)
175     // before the profile is asynchronously dumped.
176     //
177     // If EntryIncoming and ExitOutgoing has similar weight, make sure
178     // ExitOutging is selected as the min-edge. Similarly, if EntryOutgoing
179     // and ExitIncoming has similar weight, make sure ExitIncoming becomes
180     // the min-edge.
181     uint64_t EntryInWeight = EntryWeight;
182 
183     if (EntryInWeight >= MaxExitOutWeight &&
184         EntryInWeight * 2 < MaxExitOutWeight * 3) {
185       EntryIncoming->Weight = MaxExitOutWeight;
186       ExitOutgoing->Weight = EntryInWeight + 1;
187     }
188 
189     if (MaxEntryOutWeight >= MaxExitInWeight &&
190         MaxEntryOutWeight * 2 < MaxExitInWeight * 3) {
191       EntryOutgoing->Weight = MaxExitInWeight;
192       ExitIncoming->Weight = MaxEntryOutWeight + 1;
193     }
194   }
195 
196   // Sort CFG edges based on its weight.
sortEdgesByWeight()197   void sortEdgesByWeight() {
198     llvm::stable_sort(AllEdges, [](const std::unique_ptr<Edge> &Edge1,
199                                    const std::unique_ptr<Edge> &Edge2) {
200       return Edge1->Weight > Edge2->Weight;
201     });
202   }
203 
204   // Traverse all the edges and compute the Minimum Weight Spanning Tree
205   // using union-find algorithm.
computeMinimumSpanningTree()206   void computeMinimumSpanningTree() {
207     // First, put all the critical edge with landing-pad as the Dest to MST.
208     // This works around the insufficient support of critical edges split
209     // when destination BB is a landing pad.
210     for (auto &Ei : AllEdges) {
211       if (Ei->Removed)
212         continue;
213       if (Ei->IsCritical) {
214         if (Ei->DestBB && Ei->DestBB->isLandingPad()) {
215           if (unionGroups(Ei->SrcBB, Ei->DestBB))
216             Ei->InMST = true;
217         }
218       }
219     }
220 
221     for (auto &Ei : AllEdges) {
222       if (Ei->Removed)
223         continue;
224       // If we detect infinite loops, force
225       // instrumenting the entry edge:
226       if (!ExitBlockFound && Ei->SrcBB == nullptr)
227         continue;
228       if (unionGroups(Ei->SrcBB, Ei->DestBB))
229         Ei->InMST = true;
230     }
231   }
232 
233   // Dump the Debug information about the instrumentation.
dumpEdges(raw_ostream & OS,const Twine & Message)234   void dumpEdges(raw_ostream &OS, const Twine &Message) const {
235     if (!Message.str().empty())
236       OS << Message << "\n";
237     OS << "  Number of Basic Blocks: " << BBInfos.size() << "\n";
238     for (auto &BI : BBInfos) {
239       const BasicBlock *BB = BI.first;
240       OS << "  BB: " << (BB == nullptr ? "FakeNode" : BB->getName()) << "  "
241          << BI.second->infoString() << "\n";
242     }
243 
244     OS << "  Number of Edges: " << AllEdges.size()
245        << " (*: Instrument, C: CriticalEdge, -: Removed)\n";
246     uint32_t Count = 0;
247     for (auto &EI : AllEdges)
248       OS << "  Edge " << Count++ << ": " << getBBInfo(EI->SrcBB).Index << "-->"
249          << getBBInfo(EI->DestBB).Index << EI->infoString() << "\n";
250   }
251 
252   // Add an edge to AllEdges with weight W.
addEdge(const BasicBlock * Src,const BasicBlock * Dest,uint64_t W)253   Edge &addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W) {
254     uint32_t Index = BBInfos.size();
255     auto Iter = BBInfos.end();
256     bool Inserted;
257     std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Src, nullptr));
258     if (Inserted) {
259       // Newly inserted, update the real info.
260       Iter->second = std::move(std::make_unique<BBInfo>(Index));
261       Index++;
262     }
263     std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Dest, nullptr));
264     if (Inserted)
265       // Newly inserted, update the real info.
266       Iter->second = std::move(std::make_unique<BBInfo>(Index));
267     AllEdges.emplace_back(new Edge(Src, Dest, W));
268     return *AllEdges.back();
269   }
270 
271   BranchProbabilityInfo *BPI;
272   BlockFrequencyInfo *BFI;
273 
274 public:
275   CFGMST(Function &Func, BranchProbabilityInfo *BPI_ = nullptr,
276          BlockFrequencyInfo *BFI_ = nullptr)
F(Func)277       : F(Func), BPI(BPI_), BFI(BFI_) {
278     buildEdges();
279     sortEdgesByWeight();
280     computeMinimumSpanningTree();
281   }
282 };
283 
284 } // end namespace llvm
285 
286 #undef DEBUG_TYPE // "cfgmst"
287 
288 #endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
289