1 //===-- CFGMST.h - Minimum Spanning Tree for CFG ----------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements a Union-find algorithm to compute Minimum Spanning Tree 11 // for a given CFG. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H 16 #define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H 17 18 #include "llvm/ADT/DenseMap.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/Analysis/BlockFrequencyInfo.h" 21 #include "llvm/Analysis/BranchProbabilityInfo.h" 22 #include "llvm/Analysis/CFG.h" 23 #include "llvm/Support/BranchProbability.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 27 #include <utility> 28 #include <vector> 29 30 #define DEBUG_TYPE "cfgmst" 31 32 namespace llvm { 33 34 /// An union-find based Minimum Spanning Tree for CFG 35 /// 36 /// Implements a Union-find algorithm to compute Minimum Spanning Tree 37 /// for a given CFG. 38 template <class Edge, class BBInfo> class CFGMST { 39 public: 40 Function &F; 41 42 // Store all the edges in CFG. It may contain some stale edges 43 // when Removed is set. 44 std::vector<std::unique_ptr<Edge>> AllEdges; 45 46 // This map records the auxiliary information for each BB. 47 DenseMap<const BasicBlock *, std::unique_ptr<BBInfo>> BBInfos; 48 49 // Whehter the function has an exit block with no successors. 50 // (For function with an infinite loop, this block may be absent) 51 bool ExitBlockFound = false; 52 53 // Find the root group of the G and compress the path from G to the root. findAndCompressGroup(BBInfo * G)54 BBInfo *findAndCompressGroup(BBInfo *G) { 55 if (G->Group != G) 56 G->Group = findAndCompressGroup(static_cast<BBInfo *>(G->Group)); 57 return static_cast<BBInfo *>(G->Group); 58 } 59 60 // Union BB1 and BB2 into the same group and return true. 61 // Returns false if BB1 and BB2 are already in the same group. unionGroups(const BasicBlock * BB1,const BasicBlock * BB2)62 bool unionGroups(const BasicBlock *BB1, const BasicBlock *BB2) { 63 BBInfo *BB1G = findAndCompressGroup(&getBBInfo(BB1)); 64 BBInfo *BB2G = findAndCompressGroup(&getBBInfo(BB2)); 65 66 if (BB1G == BB2G) 67 return false; 68 69 // Make the smaller rank tree a direct child or the root of high rank tree. 70 if (BB1G->Rank < BB2G->Rank) 71 BB1G->Group = BB2G; 72 else { 73 BB2G->Group = BB1G; 74 // If the ranks are the same, increment root of one tree by one. 75 if (BB1G->Rank == BB2G->Rank) 76 BB1G->Rank++; 77 } 78 return true; 79 } 80 81 // Give BB, return the auxiliary information. getBBInfo(const BasicBlock * BB)82 BBInfo &getBBInfo(const BasicBlock *BB) const { 83 auto It = BBInfos.find(BB); 84 assert(It->second.get() != nullptr); 85 return *It->second.get(); 86 } 87 88 // Give BB, return the auxiliary information if it's available. findBBInfo(const BasicBlock * BB)89 BBInfo *findBBInfo(const BasicBlock *BB) const { 90 auto It = BBInfos.find(BB); 91 if (It == BBInfos.end()) 92 return nullptr; 93 return It->second.get(); 94 } 95 96 // Traverse the CFG using a stack. Find all the edges and assign the weight. 97 // Edges with large weight will be put into MST first so they are less likely 98 // to be instrumented. buildEdges()99 void buildEdges() { 100 LLVM_DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n"); 101 102 const BasicBlock *Entry = &(F.getEntryBlock()); 103 uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2); 104 Edge *EntryIncoming = nullptr, *EntryOutgoing = nullptr, 105 *ExitOutgoing = nullptr, *ExitIncoming = nullptr; 106 uint64_t MaxEntryOutWeight = 0, MaxExitOutWeight = 0, MaxExitInWeight = 0; 107 108 // Add a fake edge to the entry. 109 EntryIncoming = &addEdge(nullptr, Entry, EntryWeight); 110 LLVM_DEBUG(dbgs() << " Edge: from fake node to " << Entry->getName() 111 << " w = " << EntryWeight << "\n"); 112 113 // Special handling for single BB functions. 114 if (succ_empty(Entry)) { 115 addEdge(Entry, nullptr, EntryWeight); 116 return; 117 } 118 119 static const uint32_t CriticalEdgeMultiplier = 1000; 120 121 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { 122 TerminatorInst *TI = BB->getTerminator(); 123 uint64_t BBWeight = 124 (BFI != nullptr ? BFI->getBlockFreq(&*BB).getFrequency() : 2); 125 uint64_t Weight = 2; 126 if (int successors = TI->getNumSuccessors()) { 127 for (int i = 0; i != successors; ++i) { 128 BasicBlock *TargetBB = TI->getSuccessor(i); 129 bool Critical = isCriticalEdge(TI, i); 130 uint64_t scaleFactor = BBWeight; 131 if (Critical) { 132 if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier) 133 scaleFactor *= CriticalEdgeMultiplier; 134 else 135 scaleFactor = UINT64_MAX; 136 } 137 if (BPI != nullptr) 138 Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor); 139 auto *E = &addEdge(&*BB, TargetBB, Weight); 140 E->IsCritical = Critical; 141 LLVM_DEBUG(dbgs() << " Edge: from " << BB->getName() << " to " 142 << TargetBB->getName() << " w=" << Weight << "\n"); 143 144 // Keep track of entry/exit edges: 145 if (&*BB == Entry) { 146 if (Weight > MaxEntryOutWeight) { 147 MaxEntryOutWeight = Weight; 148 EntryOutgoing = E; 149 } 150 } 151 152 auto *TargetTI = TargetBB->getTerminator(); 153 if (TargetTI && !TargetTI->getNumSuccessors()) { 154 if (Weight > MaxExitInWeight) { 155 MaxExitInWeight = Weight; 156 ExitIncoming = E; 157 } 158 } 159 } 160 } else { 161 ExitBlockFound = true; 162 Edge *ExitO = &addEdge(&*BB, nullptr, BBWeight); 163 if (BBWeight > MaxExitOutWeight) { 164 MaxExitOutWeight = BBWeight; 165 ExitOutgoing = ExitO; 166 } 167 LLVM_DEBUG(dbgs() << " Edge: from " << BB->getName() << " to fake exit" 168 << " w = " << BBWeight << "\n"); 169 } 170 } 171 172 // Entry/exit edge adjustment heurisitic: 173 // prefer instrumenting entry edge over exit edge 174 // if possible. Those exit edges may never have a chance to be 175 // executed (for instance the program is an event handling loop) 176 // before the profile is asynchronously dumped. 177 // 178 // If EntryIncoming and ExitOutgoing has similar weight, make sure 179 // ExitOutging is selected as the min-edge. Similarly, if EntryOutgoing 180 // and ExitIncoming has similar weight, make sure ExitIncoming becomes 181 // the min-edge. 182 uint64_t EntryInWeight = EntryWeight; 183 184 if (EntryInWeight >= MaxExitOutWeight && 185 EntryInWeight * 2 < MaxExitOutWeight * 3) { 186 EntryIncoming->Weight = MaxExitOutWeight; 187 ExitOutgoing->Weight = EntryInWeight + 1; 188 } 189 190 if (MaxEntryOutWeight >= MaxExitInWeight && 191 MaxEntryOutWeight * 2 < MaxExitInWeight * 3) { 192 EntryOutgoing->Weight = MaxExitInWeight; 193 ExitIncoming->Weight = MaxEntryOutWeight + 1; 194 } 195 } 196 197 // Sort CFG edges based on its weight. sortEdgesByWeight()198 void sortEdgesByWeight() { 199 std::stable_sort(AllEdges.begin(), AllEdges.end(), 200 [](const std::unique_ptr<Edge> &Edge1, 201 const std::unique_ptr<Edge> &Edge2) { 202 return Edge1->Weight > Edge2->Weight; 203 }); 204 } 205 206 // Traverse all the edges and compute the Minimum Weight Spanning Tree 207 // using union-find algorithm. computeMinimumSpanningTree()208 void computeMinimumSpanningTree() { 209 // First, put all the critical edge with landing-pad as the Dest to MST. 210 // This works around the insufficient support of critical edges split 211 // when destination BB is a landing pad. 212 for (auto &Ei : AllEdges) { 213 if (Ei->Removed) 214 continue; 215 if (Ei->IsCritical) { 216 if (Ei->DestBB && Ei->DestBB->isLandingPad()) { 217 if (unionGroups(Ei->SrcBB, Ei->DestBB)) 218 Ei->InMST = true; 219 } 220 } 221 } 222 223 for (auto &Ei : AllEdges) { 224 if (Ei->Removed) 225 continue; 226 // If we detect infinite loops, force 227 // instrumenting the entry edge: 228 if (!ExitBlockFound && Ei->SrcBB == nullptr) 229 continue; 230 if (unionGroups(Ei->SrcBB, Ei->DestBB)) 231 Ei->InMST = true; 232 } 233 } 234 235 // Dump the Debug information about the instrumentation. dumpEdges(raw_ostream & OS,const Twine & Message)236 void dumpEdges(raw_ostream &OS, const Twine &Message) const { 237 if (!Message.str().empty()) 238 OS << Message << "\n"; 239 OS << " Number of Basic Blocks: " << BBInfos.size() << "\n"; 240 for (auto &BI : BBInfos) { 241 const BasicBlock *BB = BI.first; 242 OS << " BB: " << (BB == nullptr ? "FakeNode" : BB->getName()) << " " 243 << BI.second->infoString() << "\n"; 244 } 245 246 OS << " Number of Edges: " << AllEdges.size() 247 << " (*: Instrument, C: CriticalEdge, -: Removed)\n"; 248 uint32_t Count = 0; 249 for (auto &EI : AllEdges) 250 OS << " Edge " << Count++ << ": " << getBBInfo(EI->SrcBB).Index << "-->" 251 << getBBInfo(EI->DestBB).Index << EI->infoString() << "\n"; 252 } 253 254 // Add an edge to AllEdges with weight W. addEdge(const BasicBlock * Src,const BasicBlock * Dest,uint64_t W)255 Edge &addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W) { 256 uint32_t Index = BBInfos.size(); 257 auto Iter = BBInfos.end(); 258 bool Inserted; 259 std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Src, nullptr)); 260 if (Inserted) { 261 // Newly inserted, update the real info. 262 Iter->second = std::move(llvm::make_unique<BBInfo>(Index)); 263 Index++; 264 } 265 std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Dest, nullptr)); 266 if (Inserted) 267 // Newly inserted, update the real info. 268 Iter->second = std::move(llvm::make_unique<BBInfo>(Index)); 269 AllEdges.emplace_back(new Edge(Src, Dest, W)); 270 return *AllEdges.back(); 271 } 272 273 BranchProbabilityInfo *BPI; 274 BlockFrequencyInfo *BFI; 275 276 public: 277 CFGMST(Function &Func, BranchProbabilityInfo *BPI_ = nullptr, 278 BlockFrequencyInfo *BFI_ = nullptr) F(Func)279 : F(Func), BPI(BPI_), BFI(BFI_) { 280 buildEdges(); 281 sortEdgesByWeight(); 282 computeMinimumSpanningTree(); 283 } 284 }; 285 286 } // end namespace llvm 287 288 #undef DEBUG_TYPE // "cfgmst" 289 290 #endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H 291