1 //===- MachineScheduler.h - MachineInstr Scheduling Pass --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides an interface for customizing the standard MachineScheduler 10 // pass. Note that the entire pass may be replaced as follows: 11 // 12 // <Target>TargetMachine::createPassConfig(PassManagerBase &PM) { 13 // PM.substitutePass(&MachineSchedulerID, &CustomSchedulerPassID); 14 // ...} 15 // 16 // The MachineScheduler pass is only responsible for choosing the regions to be 17 // scheduled. Targets can override the DAG builder and scheduler without 18 // replacing the pass as follows: 19 // 20 // ScheduleDAGInstrs *<Target>PassConfig:: 21 // createMachineScheduler(MachineSchedContext *C) { 22 // return new CustomMachineScheduler(C); 23 // } 24 // 25 // The default scheduler, ScheduleDAGMILive, builds the DAG and drives list 26 // scheduling while updating the instruction stream, register pressure, and live 27 // intervals. Most targets don't need to override the DAG builder and list 28 // scheduler, but subtargets that require custom scheduling heuristics may 29 // plugin an alternate MachineSchedStrategy. The strategy is responsible for 30 // selecting the highest priority node from the list: 31 // 32 // ScheduleDAGInstrs *<Target>PassConfig:: 33 // createMachineScheduler(MachineSchedContext *C) { 34 // return new ScheduleDAGMILive(C, CustomStrategy(C)); 35 // } 36 // 37 // The DAG builder can also be customized in a sense by adding DAG mutations 38 // that will run after DAG building and before list scheduling. DAG mutations 39 // can adjust dependencies based on target-specific knowledge or add weak edges 40 // to aid heuristics: 41 // 42 // ScheduleDAGInstrs *<Target>PassConfig:: 43 // createMachineScheduler(MachineSchedContext *C) { 44 // ScheduleDAGMI *DAG = createGenericSchedLive(C); 45 // DAG->addMutation(new CustomDAGMutation(...)); 46 // return DAG; 47 // } 48 // 49 // A target that supports alternative schedulers can use the 50 // MachineSchedRegistry to allow command line selection. This can be done by 51 // implementing the following boilerplate: 52 // 53 // static ScheduleDAGInstrs *createCustomMachineSched(MachineSchedContext *C) { 54 // return new CustomMachineScheduler(C); 55 // } 56 // static MachineSchedRegistry 57 // SchedCustomRegistry("custom", "Run my target's custom scheduler", 58 // createCustomMachineSched); 59 // 60 // 61 // Finally, subtargets that don't need to implement custom heuristics but would 62 // like to configure the GenericScheduler's policy for a given scheduler region, 63 // including scheduling direction and register pressure tracking policy, can do 64 // this: 65 // 66 // void <SubTarget>Subtarget:: 67 // overrideSchedPolicy(MachineSchedPolicy &Policy, 68 // unsigned NumRegionInstrs) const { 69 // Policy.<Flag> = true; 70 // } 71 // 72 //===----------------------------------------------------------------------===// 73 74 #ifndef LLVM_CODEGEN_MACHINESCHEDULER_H 75 #define LLVM_CODEGEN_MACHINESCHEDULER_H 76 77 #include "llvm/ADT/ArrayRef.h" 78 #include "llvm/ADT/BitVector.h" 79 #include "llvm/ADT/STLExtras.h" 80 #include "llvm/ADT/SmallVector.h" 81 #include "llvm/ADT/StringRef.h" 82 #include "llvm/ADT/Twine.h" 83 #include "llvm/Analysis/AliasAnalysis.h" 84 #include "llvm/CodeGen/MachineBasicBlock.h" 85 #include "llvm/CodeGen/MachinePassRegistry.h" 86 #include "llvm/CodeGen/RegisterPressure.h" 87 #include "llvm/CodeGen/ScheduleDAG.h" 88 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 89 #include "llvm/CodeGen/ScheduleDAGMutation.h" 90 #include "llvm/CodeGen/TargetSchedule.h" 91 #include "llvm/Support/CommandLine.h" 92 #include "llvm/Support/ErrorHandling.h" 93 #include <algorithm> 94 #include <cassert> 95 #include <memory> 96 #include <string> 97 #include <vector> 98 99 namespace llvm { 100 101 extern cl::opt<bool> ForceTopDown; 102 extern cl::opt<bool> ForceBottomUp; 103 extern cl::opt<bool> VerifyScheduling; 104 105 class LiveIntervals; 106 class MachineDominatorTree; 107 class MachineFunction; 108 class MachineInstr; 109 class MachineLoopInfo; 110 class RegisterClassInfo; 111 class SchedDFSResult; 112 class ScheduleHazardRecognizer; 113 class TargetInstrInfo; 114 class TargetPassConfig; 115 class TargetRegisterInfo; 116 117 /// MachineSchedContext provides enough context from the MachineScheduler pass 118 /// for the target to instantiate a scheduler. 119 struct MachineSchedContext { 120 MachineFunction *MF = nullptr; 121 const MachineLoopInfo *MLI = nullptr; 122 const MachineDominatorTree *MDT = nullptr; 123 const TargetPassConfig *PassConfig = nullptr; 124 AliasAnalysis *AA = nullptr; 125 LiveIntervals *LIS = nullptr; 126 127 RegisterClassInfo *RegClassInfo; 128 129 MachineSchedContext(); 130 virtual ~MachineSchedContext(); 131 }; 132 133 /// MachineSchedRegistry provides a selection of available machine instruction 134 /// schedulers. 135 class MachineSchedRegistry 136 : public MachinePassRegistryNode< 137 ScheduleDAGInstrs *(*)(MachineSchedContext *)> { 138 public: 139 using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *); 140 141 // RegisterPassParser requires a (misnamed) FunctionPassCtor type. 142 using FunctionPassCtor = ScheduleDAGCtor; 143 144 static MachinePassRegistry<ScheduleDAGCtor> Registry; 145 MachineSchedRegistry(const char * N,const char * D,ScheduleDAGCtor C)146 MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C) 147 : MachinePassRegistryNode(N, D, C) { 148 Registry.Add(this); 149 } 150 ~MachineSchedRegistry()151 ~MachineSchedRegistry() { Registry.Remove(this); } 152 153 // Accessors. 154 // getNext()155 MachineSchedRegistry *getNext() const { 156 return (MachineSchedRegistry *)MachinePassRegistryNode::getNext(); 157 } 158 getList()159 static MachineSchedRegistry *getList() { 160 return (MachineSchedRegistry *)Registry.getList(); 161 } 162 setListener(MachinePassRegistryListener<FunctionPassCtor> * L)163 static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) { 164 Registry.setListener(L); 165 } 166 }; 167 168 class ScheduleDAGMI; 169 170 /// Define a generic scheduling policy for targets that don't provide their own 171 /// MachineSchedStrategy. This can be overriden for each scheduling region 172 /// before building the DAG. 173 struct MachineSchedPolicy { 174 // Allow the scheduler to disable register pressure tracking. 175 bool ShouldTrackPressure = false; 176 /// Track LaneMasks to allow reordering of independent subregister writes 177 /// of the same vreg. \sa MachineSchedStrategy::shouldTrackLaneMasks() 178 bool ShouldTrackLaneMasks = false; 179 180 // Allow the scheduler to force top-down or bottom-up scheduling. If neither 181 // is true, the scheduler runs in both directions and converges. 182 bool OnlyTopDown = false; 183 bool OnlyBottomUp = false; 184 185 // Disable heuristic that tries to fetch nodes from long dependency chains 186 // first. 187 bool DisableLatencyHeuristic = false; 188 189 MachineSchedPolicy() = default; 190 }; 191 192 /// MachineSchedStrategy - Interface to the scheduling algorithm used by 193 /// ScheduleDAGMI. 194 /// 195 /// Initialization sequence: 196 /// initPolicy -> shouldTrackPressure -> initialize(DAG) -> registerRoots 197 class MachineSchedStrategy { 198 virtual void anchor(); 199 200 public: 201 virtual ~MachineSchedStrategy() = default; 202 203 /// Optionally override the per-region scheduling policy. initPolicy(MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,unsigned NumRegionInstrs)204 virtual void initPolicy(MachineBasicBlock::iterator Begin, 205 MachineBasicBlock::iterator End, 206 unsigned NumRegionInstrs) {} 207 dumpPolicy()208 virtual void dumpPolicy() const {} 209 210 /// Check if pressure tracking is needed before building the DAG and 211 /// initializing this strategy. Called after initPolicy. shouldTrackPressure()212 virtual bool shouldTrackPressure() const { return true; } 213 214 /// Returns true if lanemasks should be tracked. LaneMask tracking is 215 /// necessary to reorder independent subregister defs for the same vreg. 216 /// This has to be enabled in combination with shouldTrackPressure(). shouldTrackLaneMasks()217 virtual bool shouldTrackLaneMasks() const { return false; } 218 219 // If this method returns true, handling of the scheduling regions 220 // themselves (in case of a scheduling boundary in MBB) will be done 221 // beginning with the topmost region of MBB. doMBBSchedRegionsTopDown()222 virtual bool doMBBSchedRegionsTopDown() const { return false; } 223 224 /// Initialize the strategy after building the DAG for a new region. 225 virtual void initialize(ScheduleDAGMI *DAG) = 0; 226 227 /// Tell the strategy that MBB is about to be processed. enterMBB(MachineBasicBlock * MBB)228 virtual void enterMBB(MachineBasicBlock *MBB) {}; 229 230 /// Tell the strategy that current MBB is done. leaveMBB()231 virtual void leaveMBB() {}; 232 233 /// Notify this strategy that all roots have been released (including those 234 /// that depend on EntrySU or ExitSU). registerRoots()235 virtual void registerRoots() {} 236 237 /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to 238 /// schedule the node at the top of the unscheduled region. Otherwise it will 239 /// be scheduled at the bottom. 240 virtual SUnit *pickNode(bool &IsTopNode) = 0; 241 242 /// Scheduler callback to notify that a new subtree is scheduled. scheduleTree(unsigned SubtreeID)243 virtual void scheduleTree(unsigned SubtreeID) {} 244 245 /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an 246 /// instruction and updated scheduled/remaining flags in the DAG nodes. 247 virtual void schedNode(SUnit *SU, bool IsTopNode) = 0; 248 249 /// When all predecessor dependencies have been resolved, free this node for 250 /// top-down scheduling. 251 virtual void releaseTopNode(SUnit *SU) = 0; 252 253 /// When all successor dependencies have been resolved, free this node for 254 /// bottom-up scheduling. 255 virtual void releaseBottomNode(SUnit *SU) = 0; 256 }; 257 258 /// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply 259 /// schedules machine instructions according to the given MachineSchedStrategy 260 /// without much extra book-keeping. This is the common functionality between 261 /// PreRA and PostRA MachineScheduler. 262 class ScheduleDAGMI : public ScheduleDAGInstrs { 263 protected: 264 AliasAnalysis *AA; 265 LiveIntervals *LIS; 266 std::unique_ptr<MachineSchedStrategy> SchedImpl; 267 268 /// Ordered list of DAG postprocessing steps. 269 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; 270 271 /// The top of the unscheduled zone. 272 MachineBasicBlock::iterator CurrentTop; 273 274 /// The bottom of the unscheduled zone. 275 MachineBasicBlock::iterator CurrentBottom; 276 277 /// Record the next node in a scheduled cluster. 278 const SUnit *NextClusterPred = nullptr; 279 const SUnit *NextClusterSucc = nullptr; 280 281 #ifndef NDEBUG 282 /// The number of instructions scheduled so far. Used to cut off the 283 /// scheduler at the point determined by misched-cutoff. 284 unsigned NumInstrsScheduled = 0; 285 #endif 286 287 public: ScheduleDAGMI(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S,bool RemoveKillFlags)288 ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S, 289 bool RemoveKillFlags) 290 : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA), 291 LIS(C->LIS), SchedImpl(std::move(S)) {} 292 293 // Provide a vtable anchor 294 ~ScheduleDAGMI() override; 295 296 /// If this method returns true, handling of the scheduling regions 297 /// themselves (in case of a scheduling boundary in MBB) will be done 298 /// beginning with the topmost region of MBB. doMBBSchedRegionsTopDown()299 bool doMBBSchedRegionsTopDown() const override { 300 return SchedImpl->doMBBSchedRegionsTopDown(); 301 } 302 303 // Returns LiveIntervals instance for use in DAG mutators and such. getLIS()304 LiveIntervals *getLIS() const { return LIS; } 305 306 /// Return true if this DAG supports VReg liveness and RegPressure. hasVRegLiveness()307 virtual bool hasVRegLiveness() const { return false; } 308 309 /// Add a postprocessing step to the DAG builder. 310 /// Mutations are applied in the order that they are added after normal DAG 311 /// building and before MachineSchedStrategy initialization. 312 /// 313 /// ScheduleDAGMI takes ownership of the Mutation object. addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation)314 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { 315 if (Mutation) 316 Mutations.push_back(std::move(Mutation)); 317 } 318 top()319 MachineBasicBlock::iterator top() const { return CurrentTop; } bottom()320 MachineBasicBlock::iterator bottom() const { return CurrentBottom; } 321 322 /// Implement the ScheduleDAGInstrs interface for handling the next scheduling 323 /// region. This covers all instructions in a block, while schedule() may only 324 /// cover a subset. 325 void enterRegion(MachineBasicBlock *bb, 326 MachineBasicBlock::iterator begin, 327 MachineBasicBlock::iterator end, 328 unsigned regioninstrs) override; 329 330 /// Implement ScheduleDAGInstrs interface for scheduling a sequence of 331 /// reorderable instructions. 332 void schedule() override; 333 334 void startBlock(MachineBasicBlock *bb) override; 335 void finishBlock() override; 336 337 /// Change the position of an instruction within the basic block and update 338 /// live ranges and region boundary iterators. 339 void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); 340 getNextClusterPred()341 const SUnit *getNextClusterPred() const { return NextClusterPred; } 342 getNextClusterSucc()343 const SUnit *getNextClusterSucc() const { return NextClusterSucc; } 344 345 void viewGraph(const Twine &Name, const Twine &Title) override; 346 void viewGraph() override; 347 348 protected: 349 // Top-Level entry points for the schedule() driver... 350 351 /// Apply each ScheduleDAGMutation step in order. This allows different 352 /// instances of ScheduleDAGMI to perform custom DAG postprocessing. 353 void postprocessDAG(); 354 355 /// Release ExitSU predecessors and setup scheduler queues. 356 void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots); 357 358 /// Update scheduler DAG and queues after scheduling an instruction. 359 void updateQueues(SUnit *SU, bool IsTopNode); 360 361 /// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues. 362 void placeDebugValues(); 363 364 /// dump the scheduled Sequence. 365 void dumpSchedule() const; 366 367 // Lesser helpers... 368 bool checkSchedLimit(); 369 370 void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, 371 SmallVectorImpl<SUnit*> &BotRoots); 372 373 void releaseSucc(SUnit *SU, SDep *SuccEdge); 374 void releaseSuccessors(SUnit *SU); 375 void releasePred(SUnit *SU, SDep *PredEdge); 376 void releasePredecessors(SUnit *SU); 377 }; 378 379 /// ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules 380 /// machine instructions while updating LiveIntervals and tracking regpressure. 381 class ScheduleDAGMILive : public ScheduleDAGMI { 382 protected: 383 RegisterClassInfo *RegClassInfo; 384 385 /// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees 386 /// will be empty. 387 SchedDFSResult *DFSResult = nullptr; 388 BitVector ScheduledTrees; 389 390 MachineBasicBlock::iterator LiveRegionEnd; 391 392 /// Maps vregs to the SUnits of their uses in the current scheduling region. 393 VReg2SUnitMultiMap VRegUses; 394 395 // Map each SU to its summary of pressure changes. This array is updated for 396 // liveness during bottom-up scheduling. Top-down scheduling may proceed but 397 // has no affect on the pressure diffs. 398 PressureDiffs SUPressureDiffs; 399 400 /// Register pressure in this region computed by initRegPressure. 401 bool ShouldTrackPressure = false; 402 bool ShouldTrackLaneMasks = false; 403 IntervalPressure RegPressure; 404 RegPressureTracker RPTracker; 405 406 /// List of pressure sets that exceed the target's pressure limit before 407 /// scheduling, listed in increasing set ID order. Each pressure set is paired 408 /// with its max pressure in the currently scheduled regions. 409 std::vector<PressureChange> RegionCriticalPSets; 410 411 /// The top of the unscheduled zone. 412 IntervalPressure TopPressure; 413 RegPressureTracker TopRPTracker; 414 415 /// The bottom of the unscheduled zone. 416 IntervalPressure BotPressure; 417 RegPressureTracker BotRPTracker; 418 419 /// True if disconnected subregister components are already renamed. 420 /// The renaming is only done on demand if lane masks are tracked. 421 bool DisconnectedComponentsRenamed = false; 422 423 public: ScheduleDAGMILive(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S)424 ScheduleDAGMILive(MachineSchedContext *C, 425 std::unique_ptr<MachineSchedStrategy> S) 426 : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false), 427 RegClassInfo(C->RegClassInfo), RPTracker(RegPressure), 428 TopRPTracker(TopPressure), BotRPTracker(BotPressure) {} 429 430 ~ScheduleDAGMILive() override; 431 432 /// Return true if this DAG supports VReg liveness and RegPressure. hasVRegLiveness()433 bool hasVRegLiveness() const override { return true; } 434 435 /// Return true if register pressure tracking is enabled. isTrackingPressure()436 bool isTrackingPressure() const { return ShouldTrackPressure; } 437 438 /// Get current register pressure for the top scheduled instructions. getTopPressure()439 const IntervalPressure &getTopPressure() const { return TopPressure; } getTopRPTracker()440 const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; } 441 442 /// Get current register pressure for the bottom scheduled instructions. getBotPressure()443 const IntervalPressure &getBotPressure() const { return BotPressure; } getBotRPTracker()444 const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; } 445 446 /// Get register pressure for the entire scheduling region before scheduling. getRegPressure()447 const IntervalPressure &getRegPressure() const { return RegPressure; } 448 getRegionCriticalPSets()449 const std::vector<PressureChange> &getRegionCriticalPSets() const { 450 return RegionCriticalPSets; 451 } 452 getPressureDiff(const SUnit * SU)453 PressureDiff &getPressureDiff(const SUnit *SU) { 454 return SUPressureDiffs[SU->NodeNum]; 455 } getPressureDiff(const SUnit * SU)456 const PressureDiff &getPressureDiff(const SUnit *SU) const { 457 return SUPressureDiffs[SU->NodeNum]; 458 } 459 460 /// Compute a DFSResult after DAG building is complete, and before any 461 /// queue comparisons. 462 void computeDFSResult(); 463 464 /// Return a non-null DFS result if the scheduling strategy initialized it. getDFSResult()465 const SchedDFSResult *getDFSResult() const { return DFSResult; } 466 getScheduledTrees()467 BitVector &getScheduledTrees() { return ScheduledTrees; } 468 469 /// Implement the ScheduleDAGInstrs interface for handling the next scheduling 470 /// region. This covers all instructions in a block, while schedule() may only 471 /// cover a subset. 472 void enterRegion(MachineBasicBlock *bb, 473 MachineBasicBlock::iterator begin, 474 MachineBasicBlock::iterator end, 475 unsigned regioninstrs) override; 476 477 /// Implement ScheduleDAGInstrs interface for scheduling a sequence of 478 /// reorderable instructions. 479 void schedule() override; 480 481 /// Compute the cyclic critical path through the DAG. 482 unsigned computeCyclicCriticalPath(); 483 484 void dump() const override; 485 486 protected: 487 // Top-Level entry points for the schedule() driver... 488 489 /// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking 490 /// enabled. This sets up three trackers. RPTracker will cover the entire DAG 491 /// region, TopTracker and BottomTracker will be initialized to the top and 492 /// bottom of the DAG region without covereing any unscheduled instruction. 493 void buildDAGWithRegPressure(); 494 495 /// Release ExitSU predecessors and setup scheduler queues. Re-position 496 /// the Top RP tracker in case the region beginning has changed. 497 void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots); 498 499 /// Move an instruction and update register pressure. 500 void scheduleMI(SUnit *SU, bool IsTopNode); 501 502 // Lesser helpers... 503 504 void initRegPressure(); 505 506 void updatePressureDiffs(ArrayRef<RegisterMaskPair> LiveUses); 507 508 void updateScheduledPressure(const SUnit *SU, 509 const std::vector<unsigned> &NewMaxPressure); 510 511 void collectVRegUses(SUnit &SU); 512 }; 513 514 //===----------------------------------------------------------------------===// 515 /// 516 /// Helpers for implementing custom MachineSchedStrategy classes. These take 517 /// care of the book-keeping associated with list scheduling heuristics. 518 /// 519 //===----------------------------------------------------------------------===// 520 521 /// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience 522 /// methods for pushing and removing nodes. ReadyQueue's are uniquely identified 523 /// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in. 524 /// 525 /// This is a convenience class that may be used by implementations of 526 /// MachineSchedStrategy. 527 class ReadyQueue { 528 unsigned ID; 529 std::string Name; 530 std::vector<SUnit*> Queue; 531 532 public: ReadyQueue(unsigned id,const Twine & name)533 ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {} 534 getID()535 unsigned getID() const { return ID; } 536 getName()537 StringRef getName() const { return Name; } 538 539 // SU is in this queue if it's NodeQueueID is a superset of this ID. isInQueue(SUnit * SU)540 bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); } 541 empty()542 bool empty() const { return Queue.empty(); } 543 clear()544 void clear() { Queue.clear(); } 545 size()546 unsigned size() const { return Queue.size(); } 547 548 using iterator = std::vector<SUnit*>::iterator; 549 begin()550 iterator begin() { return Queue.begin(); } 551 end()552 iterator end() { return Queue.end(); } 553 elements()554 ArrayRef<SUnit*> elements() { return Queue; } 555 find(SUnit * SU)556 iterator find(SUnit *SU) { return llvm::find(Queue, SU); } 557 push(SUnit * SU)558 void push(SUnit *SU) { 559 Queue.push_back(SU); 560 SU->NodeQueueId |= ID; 561 } 562 remove(iterator I)563 iterator remove(iterator I) { 564 (*I)->NodeQueueId &= ~ID; 565 *I = Queue.back(); 566 unsigned idx = I - Queue.begin(); 567 Queue.pop_back(); 568 return Queue.begin() + idx; 569 } 570 571 void dump() const; 572 }; 573 574 /// Summarize the unscheduled region. 575 struct SchedRemainder { 576 // Critical path through the DAG in expected latency. 577 unsigned CriticalPath; 578 unsigned CyclicCritPath; 579 580 // Scaled count of micro-ops left to schedule. 581 unsigned RemIssueCount; 582 583 bool IsAcyclicLatencyLimited; 584 585 // Unscheduled resources 586 SmallVector<unsigned, 16> RemainingCounts; 587 SchedRemainderSchedRemainder588 SchedRemainder() { reset(); } 589 resetSchedRemainder590 void reset() { 591 CriticalPath = 0; 592 CyclicCritPath = 0; 593 RemIssueCount = 0; 594 IsAcyclicLatencyLimited = false; 595 RemainingCounts.clear(); 596 } 597 598 void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); 599 }; 600 601 /// Each Scheduling boundary is associated with ready queues. It tracks the 602 /// current cycle in the direction of movement, and maintains the state 603 /// of "hazards" and other interlocks at the current cycle. 604 class SchedBoundary { 605 public: 606 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) 607 enum { 608 TopQID = 1, 609 BotQID = 2, 610 LogMaxQID = 2 611 }; 612 613 ScheduleDAGMI *DAG = nullptr; 614 const TargetSchedModel *SchedModel = nullptr; 615 SchedRemainder *Rem = nullptr; 616 617 ReadyQueue Available; 618 ReadyQueue Pending; 619 620 ScheduleHazardRecognizer *HazardRec = nullptr; 621 622 private: 623 /// True if the pending Q should be checked/updated before scheduling another 624 /// instruction. 625 bool CheckPending; 626 627 /// Number of cycles it takes to issue the instructions scheduled in this 628 /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. 629 /// See getStalls(). 630 unsigned CurrCycle; 631 632 /// Micro-ops issued in the current cycle 633 unsigned CurrMOps; 634 635 /// MinReadyCycle - Cycle of the soonest available instruction. 636 unsigned MinReadyCycle; 637 638 // The expected latency of the critical path in this scheduled zone. 639 unsigned ExpectedLatency; 640 641 // The latency of dependence chains leading into this zone. 642 // For each node scheduled bottom-up: DLat = max DLat, N.Depth. 643 // For each cycle scheduled: DLat -= 1. 644 unsigned DependentLatency; 645 646 /// Count the scheduled (issued) micro-ops that can be retired by 647 /// time=CurrCycle assuming the first scheduled instr is retired at time=0. 648 unsigned RetiredMOps; 649 650 // Count scheduled resources that have been executed. Resources are 651 // considered executed if they become ready in the time that it takes to 652 // saturate any resource including the one in question. Counts are scaled 653 // for direct comparison with other resources. Counts can be compared with 654 // MOps * getMicroOpFactor and Latency * getLatencyFactor. 655 SmallVector<unsigned, 16> ExecutedResCounts; 656 657 /// Cache the max count for a single resource. 658 unsigned MaxExecutedResCount; 659 660 // Cache the critical resources ID in this scheduled zone. 661 unsigned ZoneCritResIdx; 662 663 // Is the scheduled region resource limited vs. latency limited. 664 bool IsResourceLimited; 665 666 // Record the highest cycle at which each resource has been reserved by a 667 // scheduled instruction. 668 SmallVector<unsigned, 16> ReservedCycles; 669 670 // For each PIdx, stores first index into ReservedCycles that corresponds to 671 // it. 672 SmallVector<unsigned, 16> ReservedCyclesIndex; 673 674 #ifndef NDEBUG 675 // Remember the greatest possible stall as an upper bound on the number of 676 // times we should retry the pending queue because of a hazard. 677 unsigned MaxObservedStall; 678 #endif 679 680 public: 681 /// Pending queues extend the ready queues with the same ID and the 682 /// PendingFlag set. SchedBoundary(unsigned ID,const Twine & Name)683 SchedBoundary(unsigned ID, const Twine &Name): 684 Available(ID, Name+".A"), Pending(ID << LogMaxQID, Name+".P") { 685 reset(); 686 } 687 688 ~SchedBoundary(); 689 690 void reset(); 691 692 void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, 693 SchedRemainder *rem); 694 isTop()695 bool isTop() const { 696 return Available.getID() == TopQID; 697 } 698 699 /// Number of cycles to issue the instructions scheduled in this zone. getCurrCycle()700 unsigned getCurrCycle() const { return CurrCycle; } 701 702 /// Micro-ops issued in the current cycle getCurrMOps()703 unsigned getCurrMOps() const { return CurrMOps; } 704 705 // The latency of dependence chains leading into this zone. getDependentLatency()706 unsigned getDependentLatency() const { return DependentLatency; } 707 708 /// Get the number of latency cycles "covered" by the scheduled 709 /// instructions. This is the larger of the critical path within the zone 710 /// and the number of cycles required to issue the instructions. getScheduledLatency()711 unsigned getScheduledLatency() const { 712 return std::max(ExpectedLatency, CurrCycle); 713 } 714 getUnscheduledLatency(SUnit * SU)715 unsigned getUnscheduledLatency(SUnit *SU) const { 716 return isTop() ? SU->getHeight() : SU->getDepth(); 717 } 718 getResourceCount(unsigned ResIdx)719 unsigned getResourceCount(unsigned ResIdx) const { 720 return ExecutedResCounts[ResIdx]; 721 } 722 723 /// Get the scaled count of scheduled micro-ops and resources, including 724 /// executed resources. getCriticalCount()725 unsigned getCriticalCount() const { 726 if (!ZoneCritResIdx) 727 return RetiredMOps * SchedModel->getMicroOpFactor(); 728 return getResourceCount(ZoneCritResIdx); 729 } 730 731 /// Get a scaled count for the minimum execution time of the scheduled 732 /// micro-ops that are ready to execute by getExecutedCount. Notice the 733 /// feedback loop. getExecutedCount()734 unsigned getExecutedCount() const { 735 return std::max(CurrCycle * SchedModel->getLatencyFactor(), 736 MaxExecutedResCount); 737 } 738 getZoneCritResIdx()739 unsigned getZoneCritResIdx() const { return ZoneCritResIdx; } 740 741 // Is the scheduled region resource limited vs. latency limited. isResourceLimited()742 bool isResourceLimited() const { return IsResourceLimited; } 743 744 /// Get the difference between the given SUnit's ready time and the current 745 /// cycle. 746 unsigned getLatencyStallCycles(SUnit *SU); 747 748 unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, 749 unsigned Cycles); 750 751 std::pair<unsigned, unsigned> getNextResourceCycle(unsigned PIdx, 752 unsigned Cycles); 753 754 bool checkHazard(SUnit *SU); 755 756 unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs); 757 758 unsigned getOtherResourceCount(unsigned &OtherCritIdx); 759 760 /// Release SU to make it ready. If it's not in hazard, remove it from 761 /// pending queue (if already in) and push into available queue. 762 /// Otherwise, push the SU into pending queue. 763 /// 764 /// @param SU The unit to be released. 765 /// @param ReadyCycle Until which cycle the unit is ready. 766 /// @param InPQueue Whether SU is already in pending queue. 767 /// @param Idx Position offset in pending queue (if in it). 768 void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, 769 unsigned Idx = 0); 770 771 void bumpCycle(unsigned NextCycle); 772 773 void incExecutedResources(unsigned PIdx, unsigned Count); 774 775 unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle); 776 777 void bumpNode(SUnit *SU); 778 779 void releasePending(); 780 781 void removeReady(SUnit *SU); 782 783 /// Call this before applying any other heuristics to the Available queue. 784 /// Updates the Available/Pending Q's if necessary and returns the single 785 /// available instruction, or NULL if there are multiple candidates. 786 SUnit *pickOnlyChoice(); 787 788 void dumpScheduledState() const; 789 }; 790 791 /// Base class for GenericScheduler. This class maintains information about 792 /// scheduling candidates based on TargetSchedModel making it easy to implement 793 /// heuristics for either preRA or postRA scheduling. 794 class GenericSchedulerBase : public MachineSchedStrategy { 795 public: 796 /// Represent the type of SchedCandidate found within a single queue. 797 /// pickNodeBidirectional depends on these listed by decreasing priority. 798 enum CandReason : uint8_t { 799 NoCand, Only1, PhysReg, RegExcess, RegCritical, Stall, Cluster, Weak, 800 RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, 801 TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; 802 803 #ifndef NDEBUG 804 static const char *getReasonStr(GenericSchedulerBase::CandReason Reason); 805 #endif 806 807 /// Policy for scheduling the next instruction in the candidate's zone. 808 struct CandPolicy { 809 bool ReduceLatency = false; 810 unsigned ReduceResIdx = 0; 811 unsigned DemandResIdx = 0; 812 813 CandPolicy() = default; 814 815 bool operator==(const CandPolicy &RHS) const { 816 return ReduceLatency == RHS.ReduceLatency && 817 ReduceResIdx == RHS.ReduceResIdx && 818 DemandResIdx == RHS.DemandResIdx; 819 } 820 bool operator!=(const CandPolicy &RHS) const { 821 return !(*this == RHS); 822 } 823 }; 824 825 /// Status of an instruction's critical resource consumption. 826 struct SchedResourceDelta { 827 // Count critical resources in the scheduled region required by SU. 828 unsigned CritResources = 0; 829 830 // Count critical resources from another region consumed by SU. 831 unsigned DemandedResources = 0; 832 833 SchedResourceDelta() = default; 834 835 bool operator==(const SchedResourceDelta &RHS) const { 836 return CritResources == RHS.CritResources 837 && DemandedResources == RHS.DemandedResources; 838 } 839 bool operator!=(const SchedResourceDelta &RHS) const { 840 return !operator==(RHS); 841 } 842 }; 843 844 /// Store the state used by GenericScheduler heuristics, required for the 845 /// lifetime of one invocation of pickNode(). 846 struct SchedCandidate { 847 CandPolicy Policy; 848 849 // The best SUnit candidate. 850 SUnit *SU; 851 852 // The reason for this candidate. 853 CandReason Reason; 854 855 // Whether this candidate should be scheduled at top/bottom. 856 bool AtTop; 857 858 // Register pressure values for the best candidate. 859 RegPressureDelta RPDelta; 860 861 // Critical resource consumption of the best candidate. 862 SchedResourceDelta ResDelta; 863 SchedCandidateSchedCandidate864 SchedCandidate() { reset(CandPolicy()); } SchedCandidateSchedCandidate865 SchedCandidate(const CandPolicy &Policy) { reset(Policy); } 866 resetSchedCandidate867 void reset(const CandPolicy &NewPolicy) { 868 Policy = NewPolicy; 869 SU = nullptr; 870 Reason = NoCand; 871 AtTop = false; 872 RPDelta = RegPressureDelta(); 873 ResDelta = SchedResourceDelta(); 874 } 875 isValidSchedCandidate876 bool isValid() const { return SU; } 877 878 // Copy the status of another candidate without changing policy. setBestSchedCandidate879 void setBest(SchedCandidate &Best) { 880 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 881 SU = Best.SU; 882 Reason = Best.Reason; 883 AtTop = Best.AtTop; 884 RPDelta = Best.RPDelta; 885 ResDelta = Best.ResDelta; 886 } 887 888 void initResourceDelta(const ScheduleDAGMI *DAG, 889 const TargetSchedModel *SchedModel); 890 }; 891 892 protected: 893 const MachineSchedContext *Context; 894 const TargetSchedModel *SchedModel = nullptr; 895 const TargetRegisterInfo *TRI = nullptr; 896 897 SchedRemainder Rem; 898 GenericSchedulerBase(const MachineSchedContext * C)899 GenericSchedulerBase(const MachineSchedContext *C) : Context(C) {} 900 901 void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, 902 SchedBoundary *OtherZone); 903 904 #ifndef NDEBUG 905 void traceCandidate(const SchedCandidate &Cand); 906 #endif 907 908 private: 909 bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone, 910 bool ComputeRemLatency, unsigned &RemLatency) const; 911 }; 912 913 // Utility functions used by heuristics in tryCandidate(). 914 bool tryLess(int TryVal, int CandVal, 915 GenericSchedulerBase::SchedCandidate &TryCand, 916 GenericSchedulerBase::SchedCandidate &Cand, 917 GenericSchedulerBase::CandReason Reason); 918 bool tryGreater(int TryVal, int CandVal, 919 GenericSchedulerBase::SchedCandidate &TryCand, 920 GenericSchedulerBase::SchedCandidate &Cand, 921 GenericSchedulerBase::CandReason Reason); 922 bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, 923 GenericSchedulerBase::SchedCandidate &Cand, 924 SchedBoundary &Zone); 925 bool tryPressure(const PressureChange &TryP, 926 const PressureChange &CandP, 927 GenericSchedulerBase::SchedCandidate &TryCand, 928 GenericSchedulerBase::SchedCandidate &Cand, 929 GenericSchedulerBase::CandReason Reason, 930 const TargetRegisterInfo *TRI, 931 const MachineFunction &MF); 932 unsigned getWeakLeft(const SUnit *SU, bool isTop); 933 int biasPhysReg(const SUnit *SU, bool isTop); 934 935 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance 936 /// the schedule. 937 class GenericScheduler : public GenericSchedulerBase { 938 public: GenericScheduler(const MachineSchedContext * C)939 GenericScheduler(const MachineSchedContext *C): 940 GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"), 941 Bot(SchedBoundary::BotQID, "BotQ") {} 942 943 void initPolicy(MachineBasicBlock::iterator Begin, 944 MachineBasicBlock::iterator End, 945 unsigned NumRegionInstrs) override; 946 947 void dumpPolicy() const override; 948 shouldTrackPressure()949 bool shouldTrackPressure() const override { 950 return RegionPolicy.ShouldTrackPressure; 951 } 952 shouldTrackLaneMasks()953 bool shouldTrackLaneMasks() const override { 954 return RegionPolicy.ShouldTrackLaneMasks; 955 } 956 957 void initialize(ScheduleDAGMI *dag) override; 958 959 SUnit *pickNode(bool &IsTopNode) override; 960 961 void schedNode(SUnit *SU, bool IsTopNode) override; 962 releaseTopNode(SUnit * SU)963 void releaseTopNode(SUnit *SU) override { 964 if (SU->isScheduled) 965 return; 966 967 Top.releaseNode(SU, SU->TopReadyCycle, false); 968 TopCand.SU = nullptr; 969 } 970 releaseBottomNode(SUnit * SU)971 void releaseBottomNode(SUnit *SU) override { 972 if (SU->isScheduled) 973 return; 974 975 Bot.releaseNode(SU, SU->BotReadyCycle, false); 976 BotCand.SU = nullptr; 977 } 978 979 void registerRoots() override; 980 981 protected: 982 ScheduleDAGMILive *DAG = nullptr; 983 984 MachineSchedPolicy RegionPolicy; 985 986 // State of the top and bottom scheduled instruction boundaries. 987 SchedBoundary Top; 988 SchedBoundary Bot; 989 990 /// Candidate last picked from Top boundary. 991 SchedCandidate TopCand; 992 /// Candidate last picked from Bot boundary. 993 SchedCandidate BotCand; 994 995 void checkAcyclicLatency(); 996 997 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, 998 const RegPressureTracker &RPTracker, 999 RegPressureTracker &TempTracker); 1000 1001 virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, 1002 SchedBoundary *Zone) const; 1003 1004 SUnit *pickNodeBidirectional(bool &IsTopNode); 1005 1006 void pickNodeFromQueue(SchedBoundary &Zone, 1007 const CandPolicy &ZonePolicy, 1008 const RegPressureTracker &RPTracker, 1009 SchedCandidate &Candidate); 1010 1011 void reschedulePhysReg(SUnit *SU, bool isTop); 1012 }; 1013 1014 /// PostGenericScheduler - Interface to the scheduling algorithm used by 1015 /// ScheduleDAGMI. 1016 /// 1017 /// Callbacks from ScheduleDAGMI: 1018 /// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ... 1019 class PostGenericScheduler : public GenericSchedulerBase { 1020 protected: 1021 ScheduleDAGMI *DAG = nullptr; 1022 SchedBoundary Top; 1023 SmallVector<SUnit*, 8> BotRoots; 1024 1025 public: PostGenericScheduler(const MachineSchedContext * C)1026 PostGenericScheduler(const MachineSchedContext *C): 1027 GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {} 1028 1029 ~PostGenericScheduler() override = default; 1030 initPolicy(MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,unsigned NumRegionInstrs)1031 void initPolicy(MachineBasicBlock::iterator Begin, 1032 MachineBasicBlock::iterator End, 1033 unsigned NumRegionInstrs) override { 1034 /* no configurable policy */ 1035 } 1036 1037 /// PostRA scheduling does not track pressure. shouldTrackPressure()1038 bool shouldTrackPressure() const override { return false; } 1039 1040 void initialize(ScheduleDAGMI *Dag) override; 1041 1042 void registerRoots() override; 1043 1044 SUnit *pickNode(bool &IsTopNode) override; 1045 scheduleTree(unsigned SubtreeID)1046 void scheduleTree(unsigned SubtreeID) override { 1047 llvm_unreachable("PostRA scheduler does not support subtree analysis."); 1048 } 1049 1050 void schedNode(SUnit *SU, bool IsTopNode) override; 1051 releaseTopNode(SUnit * SU)1052 void releaseTopNode(SUnit *SU) override { 1053 if (SU->isScheduled) 1054 return; 1055 Top.releaseNode(SU, SU->TopReadyCycle, false); 1056 } 1057 1058 // Only called for roots. releaseBottomNode(SUnit * SU)1059 void releaseBottomNode(SUnit *SU) override { 1060 BotRoots.push_back(SU); 1061 } 1062 1063 protected: 1064 void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); 1065 1066 void pickNodeFromQueue(SchedCandidate &Cand); 1067 }; 1068 1069 /// Create the standard converging machine scheduler. This will be used as the 1070 /// default scheduler if the target does not set a default. 1071 /// Adds default DAG mutations. 1072 ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C); 1073 1074 /// Create a generic scheduler with no vreg liveness or DAG mutation passes. 1075 ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C); 1076 1077 std::unique_ptr<ScheduleDAGMutation> 1078 createLoadClusterDAGMutation(const TargetInstrInfo *TII, 1079 const TargetRegisterInfo *TRI); 1080 1081 std::unique_ptr<ScheduleDAGMutation> 1082 createStoreClusterDAGMutation(const TargetInstrInfo *TII, 1083 const TargetRegisterInfo *TRI); 1084 1085 std::unique_ptr<ScheduleDAGMutation> 1086 createCopyConstrainDAGMutation(const TargetInstrInfo *TII, 1087 const TargetRegisterInfo *TRI); 1088 1089 } // end namespace llvm 1090 1091 #endif // LLVM_CODEGEN_MACHINESCHEDULER_H 1092