1 //===- CGSCCPassManager.h - Call graph pass management ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This header provides classes for managing passes over SCCs of the call 11 /// graph. These passes form an important component of LLVM's interprocedural 12 /// optimizations. Because they operate on the SCCs of the call graph, and they 13 /// traverse the graph in post-order, they can effectively do pair-wise 14 /// interprocedural optimizations for all call edges in the program while 15 /// incrementally refining it and improving the context of these pair-wise 16 /// optimizations. At each call site edge, the callee has already been 17 /// optimized as much as is possible. This in turn allows very accurate 18 /// analysis of it for IPO. 19 /// 20 /// A secondary more general goal is to be able to isolate optimization on 21 /// unrelated parts of the IR module. This is useful to ensure our 22 /// optimizations are principled and don't miss oportunities where refinement 23 /// of one part of the module influence transformations in another part of the 24 /// module. But this is also useful if we want to parallelize the optimizations 25 /// across common large module graph shapes which tend to be very wide and have 26 /// large regions of unrelated cliques. 27 /// 28 /// To satisfy these goals, we use the LazyCallGraph which provides two graphs 29 /// nested inside each other (and built lazily from the bottom-up): the call 30 /// graph proper, and a reference graph. The reference graph is super set of 31 /// the call graph and is a conservative approximation of what could through 32 /// scalar or CGSCC transforms *become* the call graph. Using this allows us to 33 /// ensure we optimize functions prior to them being introduced into the call 34 /// graph by devirtualization or other technique, and thus ensures that 35 /// subsequent pair-wise interprocedural optimizations observe the optimized 36 /// form of these functions. The (potentially transitive) reference 37 /// reachability used by the reference graph is a conservative approximation 38 /// that still allows us to have independent regions of the graph. 39 /// 40 /// FIXME: There is one major drawback of the reference graph: in its naive 41 /// form it is quadratic because it contains a distinct edge for each 42 /// (potentially indirect) reference, even if are all through some common 43 /// global table of function pointers. This can be fixed in a number of ways 44 /// that essentially preserve enough of the normalization. While it isn't 45 /// expected to completely preclude the usability of this, it will need to be 46 /// addressed. 47 /// 48 /// 49 /// All of these issues are made substantially more complex in the face of 50 /// mutations to the call graph while optimization passes are being run. When 51 /// mutations to the call graph occur we want to achieve two different things: 52 /// 53 /// - We need to update the call graph in-flight and invalidate analyses 54 /// cached on entities in the graph. Because of the cache-based analysis 55 /// design of the pass manager, it is essential to have stable identities for 56 /// the elements of the IR that passes traverse, and to invalidate any 57 /// analyses cached on these elements as the mutations take place. 58 /// 59 /// - We want to preserve the incremental and post-order traversal of the 60 /// graph even as it is refined and mutated. This means we want optimization 61 /// to observe the most refined form of the call graph and to do so in 62 /// post-order. 63 /// 64 /// To address this, the CGSCC manager uses both worklists that can be expanded 65 /// by passes which transform the IR, and provides invalidation tests to skip 66 /// entries that become dead. This extra data is provided to every SCC pass so 67 /// that it can carefully update the manager's traversal as the call graph 68 /// mutates. 69 /// 70 /// We also provide support for running function passes within the CGSCC walk, 71 /// and there we provide automatic update of the call graph including of the 72 /// pass manager to reflect call graph changes that fall out naturally as part 73 /// of scalar transformations. 74 /// 75 /// The patterns used to ensure the goals of post-order visitation of the fully 76 /// refined graph: 77 /// 78 /// 1) Sink toward the "bottom" as the graph is refined. This means that any 79 /// iteration continues in some valid post-order sequence after the mutation 80 /// has altered the structure. 81 /// 82 /// 2) Enqueue in post-order, including the current entity. If the current 83 /// entity's shape changes, it and everything after it in post-order needs 84 /// to be visited to observe that shape. 85 /// 86 //===----------------------------------------------------------------------===// 87 88 #ifndef LLVM_ANALYSIS_CGSCCPASSMANAGER_H 89 #define LLVM_ANALYSIS_CGSCCPASSMANAGER_H 90 91 #include "llvm/ADT/DenseMap.h" 92 #include "llvm/ADT/DenseSet.h" 93 #include "llvm/ADT/PriorityWorklist.h" 94 #include "llvm/ADT/STLExtras.h" 95 #include "llvm/ADT/SmallPtrSet.h" 96 #include "llvm/ADT/SmallVector.h" 97 #include "llvm/Analysis/LazyCallGraph.h" 98 #include "llvm/IR/CallSite.h" 99 #include "llvm/IR/Function.h" 100 #include "llvm/IR/InstIterator.h" 101 #include "llvm/IR/PassManager.h" 102 #include "llvm/IR/ValueHandle.h" 103 #include "llvm/Support/Debug.h" 104 #include "llvm/Support/raw_ostream.h" 105 #include <algorithm> 106 #include <cassert> 107 #include <utility> 108 109 namespace llvm { 110 111 struct CGSCCUpdateResult; 112 class Module; 113 114 // Allow debug logging in this inline function. 115 #define DEBUG_TYPE "cgscc" 116 117 /// Extern template declaration for the analysis set for this IR unit. 118 extern template class AllAnalysesOn<LazyCallGraph::SCC>; 119 120 extern template class AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>; 121 122 /// The CGSCC analysis manager. 123 /// 124 /// See the documentation for the AnalysisManager template for detail 125 /// documentation. This type serves as a convenient way to refer to this 126 /// construct in the adaptors and proxies used to integrate this into the larger 127 /// pass manager infrastructure. 128 using CGSCCAnalysisManager = 129 AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>; 130 131 // Explicit specialization and instantiation declarations for the pass manager. 132 // See the comments on the definition of the specialization for details on how 133 // it differs from the primary template. 134 template <> 135 PreservedAnalyses 136 PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, 137 CGSCCUpdateResult &>::run(LazyCallGraph::SCC &InitialC, 138 CGSCCAnalysisManager &AM, 139 LazyCallGraph &G, CGSCCUpdateResult &UR); 140 extern template class PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, 141 LazyCallGraph &, CGSCCUpdateResult &>; 142 143 /// The CGSCC pass manager. 144 /// 145 /// See the documentation for the PassManager template for details. It runs 146 /// a sequence of SCC passes over each SCC that the manager is run over. This 147 /// type serves as a convenient way to refer to this construct. 148 using CGSCCPassManager = 149 PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, 150 CGSCCUpdateResult &>; 151 152 /// An explicit specialization of the require analysis template pass. 153 template <typename AnalysisT> 154 struct RequireAnalysisPass<AnalysisT, LazyCallGraph::SCC, CGSCCAnalysisManager, 155 LazyCallGraph &, CGSCCUpdateResult &> 156 : PassInfoMixin<RequireAnalysisPass<AnalysisT, LazyCallGraph::SCC, 157 CGSCCAnalysisManager, LazyCallGraph &, 158 CGSCCUpdateResult &>> { 159 PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, 160 LazyCallGraph &CG, CGSCCUpdateResult &) { 161 (void)AM.template getResult<AnalysisT>(C, CG); 162 return PreservedAnalyses::all(); 163 } 164 }; 165 166 /// A proxy from a \c CGSCCAnalysisManager to a \c Module. 167 using CGSCCAnalysisManagerModuleProxy = 168 InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>; 169 170 /// We need a specialized result for the \c CGSCCAnalysisManagerModuleProxy so 171 /// it can have access to the call graph in order to walk all the SCCs when 172 /// invalidating things. 173 template <> class CGSCCAnalysisManagerModuleProxy::Result { 174 public: 175 explicit Result(CGSCCAnalysisManager &InnerAM, LazyCallGraph &G) 176 : InnerAM(&InnerAM), G(&G) {} 177 178 /// Accessor for the analysis manager. 179 CGSCCAnalysisManager &getManager() { return *InnerAM; } 180 181 /// Handler for invalidation of the Module. 182 /// 183 /// If the proxy analysis itself is preserved, then we assume that the set of 184 /// SCCs in the Module hasn't changed. Thus any pointers to SCCs in the 185 /// CGSCCAnalysisManager are still valid, and we don't need to call \c clear 186 /// on the CGSCCAnalysisManager. 187 /// 188 /// Regardless of whether this analysis is marked as preserved, all of the 189 /// analyses in the \c CGSCCAnalysisManager are potentially invalidated based 190 /// on the set of preserved analyses. 191 bool invalidate(Module &M, const PreservedAnalyses &PA, 192 ModuleAnalysisManager::Invalidator &Inv); 193 194 private: 195 CGSCCAnalysisManager *InnerAM; 196 LazyCallGraph *G; 197 }; 198 199 /// Provide a specialized run method for the \c CGSCCAnalysisManagerModuleProxy 200 /// so it can pass the lazy call graph to the result. 201 template <> 202 CGSCCAnalysisManagerModuleProxy::Result 203 CGSCCAnalysisManagerModuleProxy::run(Module &M, ModuleAnalysisManager &AM); 204 205 // Ensure the \c CGSCCAnalysisManagerModuleProxy is provided as an extern 206 // template. 207 extern template class InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>; 208 209 extern template class OuterAnalysisManagerProxy< 210 ModuleAnalysisManager, LazyCallGraph::SCC, LazyCallGraph &>; 211 212 /// A proxy from a \c ModuleAnalysisManager to an \c SCC. 213 using ModuleAnalysisManagerCGSCCProxy = 214 OuterAnalysisManagerProxy<ModuleAnalysisManager, LazyCallGraph::SCC, 215 LazyCallGraph &>; 216 217 /// Support structure for SCC passes to communicate updates the call graph back 218 /// to the CGSCC pass manager infrsatructure. 219 /// 220 /// The CGSCC pass manager runs SCC passes which are allowed to update the call 221 /// graph and SCC structures. This means the structure the pass manager works 222 /// on is mutating underneath it. In order to support that, there needs to be 223 /// careful communication about the precise nature and ramifications of these 224 /// updates to the pass management infrastructure. 225 /// 226 /// All SCC passes will have to accept a reference to the management layer's 227 /// update result struct and use it to reflect the results of any CG updates 228 /// performed. 229 /// 230 /// Passes which do not change the call graph structure in any way can just 231 /// ignore this argument to their run method. 232 struct CGSCCUpdateResult { 233 /// Worklist of the RefSCCs queued for processing. 234 /// 235 /// When a pass refines the graph and creates new RefSCCs or causes them to 236 /// have a different shape or set of component SCCs it should add the RefSCCs 237 /// to this worklist so that we visit them in the refined form. 238 /// 239 /// This worklist is in reverse post-order, as we pop off the back in order 240 /// to observe RefSCCs in post-order. When adding RefSCCs, clients should add 241 /// them in reverse post-order. 242 SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> &RCWorklist; 243 244 /// Worklist of the SCCs queued for processing. 245 /// 246 /// When a pass refines the graph and creates new SCCs or causes them to have 247 /// a different shape or set of component functions it should add the SCCs to 248 /// this worklist so that we visit them in the refined form. 249 /// 250 /// Note that if the SCCs are part of a RefSCC that is added to the \c 251 /// RCWorklist, they don't need to be added here as visiting the RefSCC will 252 /// be sufficient to re-visit the SCCs within it. 253 /// 254 /// This worklist is in reverse post-order, as we pop off the back in order 255 /// to observe SCCs in post-order. When adding SCCs, clients should add them 256 /// in reverse post-order. 257 SmallPriorityWorklist<LazyCallGraph::SCC *, 1> &CWorklist; 258 259 /// The set of invalidated RefSCCs which should be skipped if they are found 260 /// in \c RCWorklist. 261 /// 262 /// This is used to quickly prune out RefSCCs when they get deleted and 263 /// happen to already be on the worklist. We use this primarily to avoid 264 /// scanning the list and removing entries from it. 265 SmallPtrSetImpl<LazyCallGraph::RefSCC *> &InvalidatedRefSCCs; 266 267 /// The set of invalidated SCCs which should be skipped if they are found 268 /// in \c CWorklist. 269 /// 270 /// This is used to quickly prune out SCCs when they get deleted and happen 271 /// to already be on the worklist. We use this primarily to avoid scanning 272 /// the list and removing entries from it. 273 SmallPtrSetImpl<LazyCallGraph::SCC *> &InvalidatedSCCs; 274 275 /// If non-null, the updated current \c RefSCC being processed. 276 /// 277 /// This is set when a graph refinement takes place an the "current" point in 278 /// the graph moves "down" or earlier in the post-order walk. This will often 279 /// cause the "current" RefSCC to be a newly created RefSCC object and the 280 /// old one to be added to the above worklist. When that happens, this 281 /// pointer is non-null and can be used to continue processing the "top" of 282 /// the post-order walk. 283 LazyCallGraph::RefSCC *UpdatedRC; 284 285 /// If non-null, the updated current \c SCC being processed. 286 /// 287 /// This is set when a graph refinement takes place an the "current" point in 288 /// the graph moves "down" or earlier in the post-order walk. This will often 289 /// cause the "current" SCC to be a newly created SCC object and the old one 290 /// to be added to the above worklist. When that happens, this pointer is 291 /// non-null and can be used to continue processing the "top" of the 292 /// post-order walk. 293 LazyCallGraph::SCC *UpdatedC; 294 295 /// Preserved analyses across SCCs. 296 /// 297 /// We specifically want to allow CGSCC passes to mutate ancestor IR 298 /// (changing both the CG structure and the function IR itself). However, 299 /// this means we need to take special care to correctly mark what analyses 300 /// are preserved *across* SCCs. We have to track this out-of-band here 301 /// because within the main `PassManeger` infrastructure we need to mark 302 /// everything within an SCC as preserved in order to avoid repeatedly 303 /// invalidating the same analyses as we unnest pass managers and adaptors. 304 /// So we track the cross-SCC version of the preserved analyses here from any 305 /// code that does direct invalidation of SCC analyses, and then use it 306 /// whenever we move forward in the post-order walk of SCCs before running 307 /// passes over the new SCC. 308 PreservedAnalyses CrossSCCPA; 309 310 /// A hacky area where the inliner can retain history about inlining 311 /// decisions that mutated the call graph's SCC structure in order to avoid 312 /// infinite inlining. See the comments in the inliner's CG update logic. 313 /// 314 /// FIXME: Keeping this here seems like a big layering issue, we should look 315 /// for a better technique. 316 SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4> 317 &InlinedInternalEdges; 318 }; 319 320 /// The core module pass which does a post-order walk of the SCCs and 321 /// runs a CGSCC pass over each one. 322 /// 323 /// Designed to allow composition of a CGSCCPass(Manager) and 324 /// a ModulePassManager. Note that this pass must be run with a module analysis 325 /// manager as it uses the LazyCallGraph analysis. It will also run the 326 /// \c CGSCCAnalysisManagerModuleProxy analysis prior to running the CGSCC 327 /// pass over the module to enable a \c FunctionAnalysisManager to be used 328 /// within this run safely. 329 template <typename CGSCCPassT> 330 class ModuleToPostOrderCGSCCPassAdaptor 331 : public PassInfoMixin<ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>> { 332 public: 333 explicit ModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) 334 : Pass(std::move(Pass)) {} 335 336 // We have to explicitly define all the special member functions because MSVC 337 // refuses to generate them. 338 ModuleToPostOrderCGSCCPassAdaptor( 339 const ModuleToPostOrderCGSCCPassAdaptor &Arg) 340 : Pass(Arg.Pass) {} 341 342 ModuleToPostOrderCGSCCPassAdaptor(ModuleToPostOrderCGSCCPassAdaptor &&Arg) 343 : Pass(std::move(Arg.Pass)) {} 344 345 friend void swap(ModuleToPostOrderCGSCCPassAdaptor &LHS, 346 ModuleToPostOrderCGSCCPassAdaptor &RHS) { 347 std::swap(LHS.Pass, RHS.Pass); 348 } 349 350 ModuleToPostOrderCGSCCPassAdaptor & 351 operator=(ModuleToPostOrderCGSCCPassAdaptor RHS) { 352 swap(*this, RHS); 353 return *this; 354 } 355 356 /// Runs the CGSCC pass across every SCC in the module. 357 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 358 359 private: 360 CGSCCPassT Pass; 361 }; 362 363 /// A function to deduce a function pass type and wrap it in the 364 /// templated adaptor. 365 template <typename CGSCCPassT> 366 ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT> 367 createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) { 368 return ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>(std::move(Pass)); 369 } 370 371 /// A proxy from a \c FunctionAnalysisManager to an \c SCC. 372 /// 373 /// When a module pass runs and triggers invalidation, both the CGSCC and 374 /// Function analysis manager proxies on the module get an invalidation event. 375 /// We don't want to fully duplicate responsibility for most of the 376 /// invalidation logic. Instead, this layer is only responsible for SCC-local 377 /// invalidation events. We work with the module's FunctionAnalysisManager to 378 /// invalidate function analyses. 379 class FunctionAnalysisManagerCGSCCProxy 380 : public AnalysisInfoMixin<FunctionAnalysisManagerCGSCCProxy> { 381 public: 382 class Result { 383 public: 384 explicit Result(FunctionAnalysisManager &FAM) : FAM(&FAM) {} 385 386 /// Accessor for the analysis manager. 387 FunctionAnalysisManager &getManager() { return *FAM; } 388 389 bool invalidate(LazyCallGraph::SCC &C, const PreservedAnalyses &PA, 390 CGSCCAnalysisManager::Invalidator &Inv); 391 392 private: 393 FunctionAnalysisManager *FAM; 394 }; 395 396 /// Computes the \c FunctionAnalysisManager and stores it in the result proxy. 397 Result run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &); 398 399 private: 400 friend AnalysisInfoMixin<FunctionAnalysisManagerCGSCCProxy>; 401 402 static AnalysisKey Key; 403 }; 404 405 extern template class OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function>; 406 407 /// A proxy from a \c CGSCCAnalysisManager to a \c Function. 408 using CGSCCAnalysisManagerFunctionProxy = 409 OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function>; 410 411 /// Helper to update the call graph after running a function pass. 412 /// 413 /// Function passes can only mutate the call graph in specific ways. This 414 /// routine provides a helper that updates the call graph in those ways 415 /// including returning whether any changes were made and populating a CG 416 /// update result struct for the overall CGSCC walk. 417 LazyCallGraph::SCC &updateCGAndAnalysisManagerForFunctionPass( 418 LazyCallGraph &G, LazyCallGraph::SCC &C, LazyCallGraph::Node &N, 419 CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR); 420 421 /// Adaptor that maps from a SCC to its functions. 422 /// 423 /// Designed to allow composition of a FunctionPass(Manager) and 424 /// a CGSCCPassManager. Note that if this pass is constructed with a pointer 425 /// to a \c CGSCCAnalysisManager it will run the 426 /// \c FunctionAnalysisManagerCGSCCProxy analysis prior to running the function 427 /// pass over the SCC to enable a \c FunctionAnalysisManager to be used 428 /// within this run safely. 429 template <typename FunctionPassT> 430 class CGSCCToFunctionPassAdaptor 431 : public PassInfoMixin<CGSCCToFunctionPassAdaptor<FunctionPassT>> { 432 public: 433 explicit CGSCCToFunctionPassAdaptor(FunctionPassT Pass) 434 : Pass(std::move(Pass)) {} 435 436 // We have to explicitly define all the special member functions because MSVC 437 // refuses to generate them. 438 CGSCCToFunctionPassAdaptor(const CGSCCToFunctionPassAdaptor &Arg) 439 : Pass(Arg.Pass) {} 440 441 CGSCCToFunctionPassAdaptor(CGSCCToFunctionPassAdaptor &&Arg) 442 : Pass(std::move(Arg.Pass)) {} 443 444 friend void swap(CGSCCToFunctionPassAdaptor &LHS, 445 CGSCCToFunctionPassAdaptor &RHS) { 446 std::swap(LHS.Pass, RHS.Pass); 447 } 448 449 CGSCCToFunctionPassAdaptor &operator=(CGSCCToFunctionPassAdaptor RHS) { 450 swap(*this, RHS); 451 return *this; 452 } 453 454 /// Runs the function pass across every function in the module. 455 PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, 456 LazyCallGraph &CG, CGSCCUpdateResult &UR) { 457 // Setup the function analysis manager from its proxy. 458 FunctionAnalysisManager &FAM = 459 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 460 461 SmallVector<LazyCallGraph::Node *, 4> Nodes; 462 for (LazyCallGraph::Node &N : C) 463 Nodes.push_back(&N); 464 465 // The SCC may get split while we are optimizing functions due to deleting 466 // edges. If this happens, the current SCC can shift, so keep track of 467 // a pointer we can overwrite. 468 LazyCallGraph::SCC *CurrentC = &C; 469 470 LLVM_DEBUG(dbgs() << "Running function passes across an SCC: " << C 471 << "\n"); 472 473 PreservedAnalyses PA = PreservedAnalyses::all(); 474 for (LazyCallGraph::Node *N : Nodes) { 475 // Skip nodes from other SCCs. These may have been split out during 476 // processing. We'll eventually visit those SCCs and pick up the nodes 477 // there. 478 if (CG.lookupSCC(*N) != CurrentC) 479 continue; 480 481 Function &F = N->getFunction(); 482 483 PassInstrumentation PI = FAM.getResult<PassInstrumentationAnalysis>(F); 484 if (!PI.runBeforePass<Function>(Pass, F)) 485 continue; 486 487 PreservedAnalyses PassPA = Pass.run(F, FAM); 488 489 PI.runAfterPass<Function>(Pass, F); 490 491 // We know that the function pass couldn't have invalidated any other 492 // function's analyses (that's the contract of a function pass), so 493 // directly handle the function analysis manager's invalidation here. 494 FAM.invalidate(F, PassPA); 495 496 // Then intersect the preserved set so that invalidation of module 497 // analyses will eventually occur when the module pass completes. 498 PA.intersect(std::move(PassPA)); 499 500 // If the call graph hasn't been preserved, update it based on this 501 // function pass. This may also update the current SCC to point to 502 // a smaller, more refined SCC. 503 auto PAC = PA.getChecker<LazyCallGraphAnalysis>(); 504 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Module>>()) { 505 CurrentC = &updateCGAndAnalysisManagerForFunctionPass(CG, *CurrentC, *N, 506 AM, UR); 507 assert( 508 CG.lookupSCC(*N) == CurrentC && 509 "Current SCC not updated to the SCC containing the current node!"); 510 } 511 } 512 513 // By definition we preserve the proxy. And we preserve all analyses on 514 // Functions. This precludes *any* invalidation of function analyses by the 515 // proxy, but that's OK because we've taken care to invalidate analyses in 516 // the function analysis manager incrementally above. 517 PA.preserveSet<AllAnalysesOn<Function>>(); 518 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 519 520 // We've also ensured that we updated the call graph along the way. 521 PA.preserve<LazyCallGraphAnalysis>(); 522 523 return PA; 524 } 525 526 private: 527 FunctionPassT Pass; 528 }; 529 530 /// A function to deduce a function pass type and wrap it in the 531 /// templated adaptor. 532 template <typename FunctionPassT> 533 CGSCCToFunctionPassAdaptor<FunctionPassT> 534 createCGSCCToFunctionPassAdaptor(FunctionPassT Pass) { 535 return CGSCCToFunctionPassAdaptor<FunctionPassT>(std::move(Pass)); 536 } 537 538 /// A helper that repeats an SCC pass each time an indirect call is refined to 539 /// a direct call by that pass. 540 /// 541 /// While the CGSCC pass manager works to re-visit SCCs and RefSCCs as they 542 /// change shape, we may also want to repeat an SCC pass if it simply refines 543 /// an indirect call to a direct call, even if doing so does not alter the 544 /// shape of the graph. Note that this only pertains to direct calls to 545 /// functions where IPO across the SCC may be able to compute more precise 546 /// results. For intrinsics, we assume scalar optimizations already can fully 547 /// reason about them. 548 /// 549 /// This repetition has the potential to be very large however, as each one 550 /// might refine a single call site. As a consequence, in practice we use an 551 /// upper bound on the number of repetitions to limit things. 552 template <typename PassT> 553 class DevirtSCCRepeatedPass 554 : public PassInfoMixin<DevirtSCCRepeatedPass<PassT>> { 555 public: 556 explicit DevirtSCCRepeatedPass(PassT Pass, int MaxIterations) 557 : Pass(std::move(Pass)), MaxIterations(MaxIterations) {} 558 559 /// Runs the wrapped pass up to \c MaxIterations on the SCC, iterating 560 /// whenever an indirect call is refined. 561 PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, 562 LazyCallGraph &CG, CGSCCUpdateResult &UR) { 563 PreservedAnalyses PA = PreservedAnalyses::all(); 564 PassInstrumentation PI = 565 AM.getResult<PassInstrumentationAnalysis>(InitialC, CG); 566 567 // The SCC may be refined while we are running passes over it, so set up 568 // a pointer that we can update. 569 LazyCallGraph::SCC *C = &InitialC; 570 571 // Collect value handles for all of the indirect call sites. 572 SmallVector<WeakTrackingVH, 8> CallHandles; 573 574 // Struct to track the counts of direct and indirect calls in each function 575 // of the SCC. 576 struct CallCount { 577 int Direct; 578 int Indirect; 579 }; 580 581 // Put value handles on all of the indirect calls and return the number of 582 // direct calls for each function in the SCC. 583 auto ScanSCC = [](LazyCallGraph::SCC &C, 584 SmallVectorImpl<WeakTrackingVH> &CallHandles) { 585 assert(CallHandles.empty() && "Must start with a clear set of handles."); 586 587 SmallDenseMap<Function *, CallCount> CallCounts; 588 CallCount CountLocal = {0, 0}; 589 for (LazyCallGraph::Node &N : C) { 590 CallCount &Count = 591 CallCounts.insert(std::make_pair(&N.getFunction(), CountLocal)) 592 .first->second; 593 for (Instruction &I : instructions(N.getFunction())) 594 if (auto CS = CallSite(&I)) { 595 if (CS.getCalledFunction()) { 596 ++Count.Direct; 597 } else { 598 ++Count.Indirect; 599 CallHandles.push_back(WeakTrackingVH(&I)); 600 } 601 } 602 } 603 604 return CallCounts; 605 }; 606 607 // Populate the initial call handles and get the initial call counts. 608 auto CallCounts = ScanSCC(*C, CallHandles); 609 610 for (int Iteration = 0;; ++Iteration) { 611 612 if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C)) 613 continue; 614 615 PreservedAnalyses PassPA = Pass.run(*C, AM, CG, UR); 616 617 if (UR.InvalidatedSCCs.count(C)) 618 PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass); 619 else 620 PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C); 621 622 // If the SCC structure has changed, bail immediately and let the outer 623 // CGSCC layer handle any iteration to reflect the refined structure. 624 if (UR.UpdatedC && UR.UpdatedC != C) { 625 PA.intersect(std::move(PassPA)); 626 break; 627 } 628 629 // Check that we didn't miss any update scenario. 630 assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!"); 631 assert(C->begin() != C->end() && "Cannot have an empty SCC!"); 632 633 // Check whether any of the handles were devirtualized. 634 auto IsDevirtualizedHandle = [&](WeakTrackingVH &CallH) { 635 if (!CallH) 636 return false; 637 auto CS = CallSite(CallH); 638 if (!CS) 639 return false; 640 641 // If the call is still indirect, leave it alone. 642 Function *F = CS.getCalledFunction(); 643 if (!F) 644 return false; 645 646 LLVM_DEBUG(dbgs() << "Found devirtualized call from " 647 << CS.getParent()->getParent()->getName() << " to " 648 << F->getName() << "\n"); 649 650 // We now have a direct call where previously we had an indirect call, 651 // so iterate to process this devirtualization site. 652 return true; 653 }; 654 bool Devirt = llvm::any_of(CallHandles, IsDevirtualizedHandle); 655 656 // Rescan to build up a new set of handles and count how many direct 657 // calls remain. If we decide to iterate, this also sets up the input to 658 // the next iteration. 659 CallHandles.clear(); 660 auto NewCallCounts = ScanSCC(*C, CallHandles); 661 662 // If we haven't found an explicit devirtualization already see if we 663 // have decreased the number of indirect calls and increased the number 664 // of direct calls for any function in the SCC. This can be fooled by all 665 // manner of transformations such as DCE and other things, but seems to 666 // work well in practice. 667 if (!Devirt) 668 // Iterate over the keys in NewCallCounts, if Function also exists in 669 // CallCounts, make the check below. 670 for (auto &Pair : NewCallCounts) { 671 auto &CallCountNew = Pair.second; 672 auto CountIt = CallCounts.find(Pair.first); 673 if (CountIt != CallCounts.end()) { 674 const auto &CallCountOld = CountIt->second; 675 if (CallCountOld.Indirect > CallCountNew.Indirect && 676 CallCountOld.Direct < CallCountNew.Direct) { 677 Devirt = true; 678 break; 679 } 680 } 681 } 682 683 if (!Devirt) { 684 PA.intersect(std::move(PassPA)); 685 break; 686 } 687 688 // Otherwise, if we've already hit our max, we're done. 689 if (Iteration >= MaxIterations) { 690 LLVM_DEBUG( 691 dbgs() << "Found another devirtualization after hitting the max " 692 "number of repetitions (" 693 << MaxIterations << ") on SCC: " << *C << "\n"); 694 PA.intersect(std::move(PassPA)); 695 break; 696 } 697 698 LLVM_DEBUG( 699 dbgs() 700 << "Repeating an SCC pass after finding a devirtualization in: " << *C 701 << "\n"); 702 703 // Move over the new call counts in preparation for iterating. 704 CallCounts = std::move(NewCallCounts); 705 706 // Update the analysis manager with each run and intersect the total set 707 // of preserved analyses so we're ready to iterate. 708 AM.invalidate(*C, PassPA); 709 PA.intersect(std::move(PassPA)); 710 } 711 712 // Note that we don't add any preserved entries here unlike a more normal 713 // "pass manager" because we only handle invalidation *between* iterations, 714 // not after the last iteration. 715 return PA; 716 } 717 718 private: 719 PassT Pass; 720 int MaxIterations; 721 }; 722 723 /// A function to deduce a function pass type and wrap it in the 724 /// templated adaptor. 725 template <typename PassT> 726 DevirtSCCRepeatedPass<PassT> createDevirtSCCRepeatedPass(PassT Pass, 727 int MaxIterations) { 728 return DevirtSCCRepeatedPass<PassT>(std::move(Pass), MaxIterations); 729 } 730 731 // Out-of-line implementation details for templates below this point. 732 733 template <typename CGSCCPassT> 734 PreservedAnalyses 735 ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>::run(Module &M, 736 ModuleAnalysisManager &AM) { 737 // Setup the CGSCC analysis manager from its proxy. 738 CGSCCAnalysisManager &CGAM = 739 AM.getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager(); 740 741 // Get the call graph for this module. 742 LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M); 743 744 // We keep worklists to allow us to push more work onto the pass manager as 745 // the passes are run. 746 SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> RCWorklist; 747 SmallPriorityWorklist<LazyCallGraph::SCC *, 1> CWorklist; 748 749 // Keep sets for invalidated SCCs and RefSCCs that should be skipped when 750 // iterating off the worklists. 751 SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet; 752 SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet; 753 754 SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4> 755 InlinedInternalEdges; 756 757 CGSCCUpdateResult UR = { 758 RCWorklist, CWorklist, InvalidRefSCCSet, InvalidSCCSet, 759 nullptr, nullptr, PreservedAnalyses::all(), InlinedInternalEdges}; 760 761 // Request PassInstrumentation from analysis manager, will use it to run 762 // instrumenting callbacks for the passes later. 763 PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M); 764 765 PreservedAnalyses PA = PreservedAnalyses::all(); 766 CG.buildRefSCCs(); 767 for (auto RCI = CG.postorder_ref_scc_begin(), 768 RCE = CG.postorder_ref_scc_end(); 769 RCI != RCE;) { 770 assert(RCWorklist.empty() && 771 "Should always start with an empty RefSCC worklist"); 772 // The postorder_ref_sccs range we are walking is lazily constructed, so 773 // we only push the first one onto the worklist. The worklist allows us 774 // to capture *new* RefSCCs created during transformations. 775 // 776 // We really want to form RefSCCs lazily because that makes them cheaper 777 // to update as the program is simplified and allows us to have greater 778 // cache locality as forming a RefSCC touches all the parts of all the 779 // functions within that RefSCC. 780 // 781 // We also eagerly increment the iterator to the next position because 782 // the CGSCC passes below may delete the current RefSCC. 783 RCWorklist.insert(&*RCI++); 784 785 do { 786 LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val(); 787 if (InvalidRefSCCSet.count(RC)) { 788 LLVM_DEBUG(dbgs() << "Skipping an invalid RefSCC...\n"); 789 continue; 790 } 791 792 assert(CWorklist.empty() && 793 "Should always start with an empty SCC worklist"); 794 795 LLVM_DEBUG(dbgs() << "Running an SCC pass across the RefSCC: " << *RC 796 << "\n"); 797 798 // Push the initial SCCs in reverse post-order as we'll pop off the 799 // back and so see this in post-order. 800 for (LazyCallGraph::SCC &C : llvm::reverse(*RC)) 801 CWorklist.insert(&C); 802 803 do { 804 LazyCallGraph::SCC *C = CWorklist.pop_back_val(); 805 // Due to call graph mutations, we may have invalid SCCs or SCCs from 806 // other RefSCCs in the worklist. The invalid ones are dead and the 807 // other RefSCCs should be queued above, so we just need to skip both 808 // scenarios here. 809 if (InvalidSCCSet.count(C)) { 810 LLVM_DEBUG(dbgs() << "Skipping an invalid SCC...\n"); 811 continue; 812 } 813 if (&C->getOuterRefSCC() != RC) { 814 LLVM_DEBUG(dbgs() << "Skipping an SCC that is now part of some other " 815 "RefSCC...\n"); 816 continue; 817 } 818 819 // Ensure we can proxy analysis updates from from the CGSCC analysis 820 // manager into the Function analysis manager by getting a proxy here. 821 // FIXME: This seems like a bit of a hack. We should find a cleaner 822 // or more costructive way to ensure this happens. 823 (void)CGAM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG); 824 825 // Each time we visit a new SCC pulled off the worklist, 826 // a transformation of a child SCC may have also modified this parent 827 // and invalidated analyses. So we invalidate using the update record's 828 // cross-SCC preserved set. This preserved set is intersected by any 829 // CGSCC pass that handles invalidation (primarily pass managers) prior 830 // to marking its SCC as preserved. That lets us track everything that 831 // might need invalidation across SCCs without excessive invalidations 832 // on a single SCC. 833 // 834 // This essentially allows SCC passes to freely invalidate analyses 835 // of any ancestor SCC. If this becomes detrimental to successfully 836 // caching analyses, we could force each SCC pass to manually 837 // invalidate the analyses for any SCCs other than themselves which 838 // are mutated. However, that seems to lose the robustness of the 839 // pass-manager driven invalidation scheme. 840 // 841 // FIXME: This is redundant in one case -- the top of the worklist may 842 // *also* be the same SCC we just ran over (and invalidated for). In 843 // that case, we'll end up doing a redundant invalidation here as 844 // a consequence. 845 CGAM.invalidate(*C, UR.CrossSCCPA); 846 847 do { 848 // Check that we didn't miss any update scenario. 849 assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!"); 850 assert(C->begin() != C->end() && "Cannot have an empty SCC!"); 851 assert(&C->getOuterRefSCC() == RC && 852 "Processing an SCC in a different RefSCC!"); 853 854 UR.UpdatedRC = nullptr; 855 UR.UpdatedC = nullptr; 856 857 // Check the PassInstrumentation's BeforePass callbacks before 858 // running the pass, skip its execution completely if asked to 859 // (callback returns false). 860 if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C)) 861 continue; 862 863 PreservedAnalyses PassPA = Pass.run(*C, CGAM, CG, UR); 864 865 if (UR.InvalidatedSCCs.count(C)) 866 PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass); 867 else 868 PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C); 869 870 // Update the SCC and RefSCC if necessary. 871 C = UR.UpdatedC ? UR.UpdatedC : C; 872 RC = UR.UpdatedRC ? UR.UpdatedRC : RC; 873 874 // If the CGSCC pass wasn't able to provide a valid updated SCC, 875 // the current SCC may simply need to be skipped if invalid. 876 if (UR.InvalidatedSCCs.count(C)) { 877 LLVM_DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n"); 878 break; 879 } 880 // Check that we didn't miss any update scenario. 881 assert(C->begin() != C->end() && "Cannot have an empty SCC!"); 882 883 // We handle invalidating the CGSCC analysis manager's information 884 // for the (potentially updated) SCC here. Note that any other SCCs 885 // whose structure has changed should have been invalidated by 886 // whatever was updating the call graph. This SCC gets invalidated 887 // late as it contains the nodes that were actively being 888 // processed. 889 CGAM.invalidate(*C, PassPA); 890 891 // Then intersect the preserved set so that invalidation of module 892 // analyses will eventually occur when the module pass completes. 893 // Also intersect with the cross-SCC preserved set to capture any 894 // cross-SCC invalidation. 895 UR.CrossSCCPA.intersect(PassPA); 896 PA.intersect(std::move(PassPA)); 897 898 // The pass may have restructured the call graph and refined the 899 // current SCC and/or RefSCC. We need to update our current SCC and 900 // RefSCC pointers to follow these. Also, when the current SCC is 901 // refined, re-run the SCC pass over the newly refined SCC in order 902 // to observe the most precise SCC model available. This inherently 903 // cannot cycle excessively as it only happens when we split SCCs 904 // apart, at most converging on a DAG of single nodes. 905 // FIXME: If we ever start having RefSCC passes, we'll want to 906 // iterate there too. 907 if (UR.UpdatedC) 908 LLVM_DEBUG(dbgs() 909 << "Re-running SCC passes after a refinement of the " 910 "current SCC: " 911 << *UR.UpdatedC << "\n"); 912 913 // Note that both `C` and `RC` may at this point refer to deleted, 914 // invalid SCC and RefSCCs respectively. But we will short circuit 915 // the processing when we check them in the loop above. 916 } while (UR.UpdatedC); 917 } while (!CWorklist.empty()); 918 919 // We only need to keep internal inlined edge information within 920 // a RefSCC, clear it to save on space and let the next time we visit 921 // any of these functions have a fresh start. 922 InlinedInternalEdges.clear(); 923 } while (!RCWorklist.empty()); 924 } 925 926 // By definition we preserve the call garph, all SCC analyses, and the 927 // analysis proxies by handling them above and in any nested pass managers. 928 PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>(); 929 PA.preserve<LazyCallGraphAnalysis>(); 930 PA.preserve<CGSCCAnalysisManagerModuleProxy>(); 931 PA.preserve<FunctionAnalysisManagerModuleProxy>(); 932 return PA; 933 } 934 935 // Clear out the debug logging macro. 936 #undef DEBUG_TYPE 937 938 } // end namespace llvm 939 940 #endif // LLVM_ANALYSIS_CGSCCPASSMANAGER_H 941