1 //===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// SI Machine Scheduler interface 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H 16 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H 17 18 #include "SIInstrInfo.h" 19 #include "llvm/CodeGen/MachineBasicBlock.h" 20 #include "llvm/CodeGen/MachineScheduler.h" 21 #include "llvm/CodeGen/RegisterPressure.h" 22 #include "llvm/CodeGen/ScheduleDAG.h" 23 #include <cassert> 24 #include <cstdint> 25 #include <map> 26 #include <memory> 27 #include <set> 28 #include <vector> 29 30 namespace llvm { 31 32 enum SIScheduleCandReason { 33 NoCand, 34 RegUsage, 35 Latency, 36 Successor, 37 Depth, 38 NodeOrder 39 }; 40 41 struct SISchedulerCandidate { 42 // The reason for this candidate. 43 SIScheduleCandReason Reason = NoCand; 44 45 // Set of reasons that apply to multiple candidates. 46 uint32_t RepeatReasonSet = 0; 47 48 SISchedulerCandidate() = default; 49 isRepeatSISchedulerCandidate50 bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); } setRepeatSISchedulerCandidate51 void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); } 52 }; 53 54 class SIScheduleDAGMI; 55 class SIScheduleBlockCreator; 56 57 enum SIScheduleBlockLinkKind { 58 NoData, 59 Data 60 }; 61 62 class SIScheduleBlock { 63 SIScheduleDAGMI *DAG; 64 SIScheduleBlockCreator *BC; 65 66 std::vector<SUnit*> SUnits; 67 std::map<unsigned, unsigned> NodeNum2Index; 68 std::vector<SUnit*> TopReadySUs; 69 std::vector<SUnit*> ScheduledSUnits; 70 71 /// The top of the unscheduled zone. 72 IntervalPressure TopPressure; 73 RegPressureTracker TopRPTracker; 74 75 // Pressure: number of said class of registers needed to 76 // store the live virtual and real registers. 77 // We do care only of SGPR32 and VGPR32 and do track only virtual registers. 78 // Pressure of additional registers required inside the block. 79 std::vector<unsigned> InternalAdditionnalPressure; 80 // Pressure of input and output registers 81 std::vector<unsigned> LiveInPressure; 82 std::vector<unsigned> LiveOutPressure; 83 // Registers required by the block, and outputs. 84 // We do track only virtual registers. 85 // Note that some registers are not 32 bits, 86 // and thus the pressure is not equal 87 // to the number of live registers. 88 std::set<unsigned> LiveInRegs; 89 std::set<unsigned> LiveOutRegs; 90 91 bool Scheduled = false; 92 bool HighLatencyBlock = false; 93 94 std::vector<unsigned> HasLowLatencyNonWaitedParent; 95 96 // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table. 97 unsigned ID; 98 99 std::vector<SIScheduleBlock*> Preds; // All blocks predecessors. 100 // All blocks successors, and the kind of link 101 std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs; 102 unsigned NumHighLatencySuccessors = 0; 103 104 public: SIScheduleBlock(SIScheduleDAGMI * DAG,SIScheduleBlockCreator * BC,unsigned ID)105 SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC, 106 unsigned ID): 107 DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {} 108 109 ~SIScheduleBlock() = default; 110 getID()111 unsigned getID() const { return ID; } 112 113 /// Functions for Block construction. 114 void addUnit(SUnit *SU); 115 116 // When all SUs have been added. 117 void finalizeUnits(); 118 119 // Add block pred, which has instruction predecessor of SU. 120 void addPred(SIScheduleBlock *Pred); 121 void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind); 122 getPreds()123 const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; } 124 ArrayRef<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> getSuccs()125 getSuccs() const { return Succs; } 126 127 unsigned Height; // Maximum topdown path length to block without outputs 128 unsigned Depth; // Maximum bottomup path length to block without inputs 129 getNumHighLatencySuccessors()130 unsigned getNumHighLatencySuccessors() const { 131 return NumHighLatencySuccessors; 132 } 133 isHighLatencyBlock()134 bool isHighLatencyBlock() { return HighLatencyBlock; } 135 136 // This is approximative. 137 // Ideally should take into accounts some instructions (rcp, etc) 138 // are 4 times slower. getCost()139 int getCost() { return SUnits.size(); } 140 141 // The block Predecessors and Successors must be all registered 142 // before fastSchedule(). 143 // Fast schedule with no particular requirement. 144 void fastSchedule(); 145 getScheduledUnits()146 std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; } 147 148 // Complete schedule that will try to minimize reg pressure and 149 // low latencies, and will fill liveins and liveouts. 150 // Needs all MIs to be grouped between BeginBlock and EndBlock. 151 // The MIs can be moved after the scheduling, 152 // it is just used to allow correct track of live registers. 153 void schedule(MachineBasicBlock::iterator BeginBlock, 154 MachineBasicBlock::iterator EndBlock); 155 isScheduled()156 bool isScheduled() { return Scheduled; } 157 158 // Needs the block to be scheduled inside 159 // TODO: find a way to compute it. getInternalAdditionnalRegUsage()160 std::vector<unsigned> &getInternalAdditionnalRegUsage() { 161 return InternalAdditionnalPressure; 162 } 163 getInRegs()164 std::set<unsigned> &getInRegs() { return LiveInRegs; } getOutRegs()165 std::set<unsigned> &getOutRegs() { return LiveOutRegs; } 166 167 void printDebug(bool Full); 168 169 private: 170 struct SISchedCandidate : SISchedulerCandidate { 171 // The best SUnit candidate. 172 SUnit *SU = nullptr; 173 174 unsigned SGPRUsage; 175 unsigned VGPRUsage; 176 bool IsLowLatency; 177 unsigned LowLatencyOffset; 178 bool HasLowLatencyNonWaitedParent; 179 180 SISchedCandidate() = default; 181 isValidSISchedCandidate182 bool isValid() const { return SU; } 183 184 // Copy the status of another candidate without changing policy. setBestSISchedCandidate185 void setBest(SISchedCandidate &Best) { 186 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 187 SU = Best.SU; 188 Reason = Best.Reason; 189 SGPRUsage = Best.SGPRUsage; 190 VGPRUsage = Best.VGPRUsage; 191 IsLowLatency = Best.IsLowLatency; 192 LowLatencyOffset = Best.LowLatencyOffset; 193 HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent; 194 } 195 }; 196 197 void undoSchedule(); 198 199 void undoReleaseSucc(SUnit *SU, SDep *SuccEdge); 200 void releaseSucc(SUnit *SU, SDep *SuccEdge); 201 // InOrOutBlock: restrict to links pointing inside the block (true), 202 // or restrict to links pointing outside the block (false). 203 void releaseSuccessors(SUnit *SU, bool InOrOutBlock); 204 205 void nodeScheduled(SUnit *SU); 206 void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand); 207 void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand); 208 SUnit* pickNode(); 209 void traceCandidate(const SISchedCandidate &Cand); 210 void initRegPressure(MachineBasicBlock::iterator BeginBlock, 211 MachineBasicBlock::iterator EndBlock); 212 }; 213 214 struct SIScheduleBlocks { 215 std::vector<SIScheduleBlock*> Blocks; 216 std::vector<int> TopDownIndex2Block; 217 std::vector<int> TopDownBlock2Index; 218 }; 219 220 enum SISchedulerBlockCreatorVariant { 221 LatenciesAlone, 222 LatenciesGrouped, 223 LatenciesAlonePlusConsecutive 224 }; 225 226 class SIScheduleBlockCreator { 227 SIScheduleDAGMI *DAG; 228 // unique_ptr handles freeing memory for us. 229 std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs; 230 std::map<SISchedulerBlockCreatorVariant, 231 SIScheduleBlocks> Blocks; 232 std::vector<SIScheduleBlock*> CurrentBlocks; 233 std::vector<int> Node2CurrentBlock; 234 235 // Topological sort 236 // Maps topological index to the node number. 237 std::vector<int> TopDownIndex2Block; 238 std::vector<int> TopDownBlock2Index; 239 std::vector<int> BottomUpIndex2Block; 240 241 // 0 -> Color not given. 242 // 1 to SUnits.size() -> Reserved group (you should only add elements to them). 243 // Above -> Other groups. 244 int NextReservedID; 245 int NextNonReservedID; 246 std::vector<int> CurrentColoring; 247 std::vector<int> CurrentTopDownReservedDependencyColoring; 248 std::vector<int> CurrentBottomUpReservedDependencyColoring; 249 250 public: 251 SIScheduleBlockCreator(SIScheduleDAGMI *DAG); 252 ~SIScheduleBlockCreator(); 253 254 SIScheduleBlocks 255 getBlocks(SISchedulerBlockCreatorVariant BlockVariant); 256 257 bool isSUInBlock(SUnit *SU, unsigned ID); 258 259 private: 260 // Give a Reserved color to every high latency. 261 void colorHighLatenciesAlone(); 262 263 // Create groups of high latencies with a Reserved color. 264 void colorHighLatenciesGroups(); 265 266 // Compute coloring for topdown and bottom traversals with 267 // different colors depending on dependencies on Reserved colors. 268 void colorComputeReservedDependencies(); 269 270 // Give color to all non-colored SUs according to Reserved groups dependencies. 271 void colorAccordingToReservedDependencies(); 272 273 // Divides Blocks having no bottom up or top down dependencies on Reserved groups. 274 // The new colors are computed according to the dependencies on the other blocks 275 // formed with colorAccordingToReservedDependencies. 276 void colorEndsAccordingToDependencies(); 277 278 // Cut groups into groups with SUs in consecutive order (except for Reserved groups). 279 void colorForceConsecutiveOrderInGroup(); 280 281 // Merge Constant loads that have all their users into another group to the group. 282 // (TODO: else if all their users depend on the same group, put them there) 283 void colorMergeConstantLoadsNextGroup(); 284 285 // Merge SUs that have all their users into another group to the group 286 void colorMergeIfPossibleNextGroup(); 287 288 // Merge SUs that have all their users into another group to the group, 289 // but only for Reserved groups. 290 void colorMergeIfPossibleNextGroupOnlyForReserved(); 291 292 // Merge SUs that have all their users into another group to the group, 293 // but only if the group is no more than a few SUs. 294 void colorMergeIfPossibleSmallGroupsToNextGroup(); 295 296 // Divides Blocks with important size. 297 // Idea of implementation: attribute new colors depending on topdown and 298 // bottom up links to other blocks. 299 void cutHugeBlocks(); 300 301 // Put in one group all instructions with no users in this scheduling region 302 // (we'd want these groups be at the end). 303 void regroupNoUserInstructions(); 304 305 // Give Reserved color to export instructions 306 void colorExports(); 307 308 void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant); 309 310 void topologicalSort(); 311 312 void scheduleInsideBlocks(); 313 314 void fillStats(); 315 }; 316 317 enum SISchedulerBlockSchedulerVariant { 318 BlockLatencyRegUsage, 319 BlockRegUsageLatency, 320 BlockRegUsage 321 }; 322 323 class SIScheduleBlockScheduler { 324 SIScheduleDAGMI *DAG; 325 SISchedulerBlockSchedulerVariant Variant; 326 std::vector<SIScheduleBlock*> Blocks; 327 328 std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages; 329 std::set<unsigned> LiveRegs; 330 // Num of schedulable unscheduled blocks reading the register. 331 std::map<unsigned, unsigned> LiveRegsConsumers; 332 333 std::vector<unsigned> LastPosHighLatencyParentScheduled; 334 int LastPosWaitedHighLatency; 335 336 std::vector<SIScheduleBlock*> BlocksScheduled; 337 unsigned NumBlockScheduled; 338 std::vector<SIScheduleBlock*> ReadyBlocks; 339 340 unsigned VregCurrentUsage; 341 unsigned SregCurrentUsage; 342 343 // Currently is only approximation. 344 unsigned maxVregUsage; 345 unsigned maxSregUsage; 346 347 std::vector<unsigned> BlockNumPredsLeft; 348 std::vector<unsigned> BlockNumSuccsLeft; 349 350 public: 351 SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, 352 SISchedulerBlockSchedulerVariant Variant, 353 SIScheduleBlocks BlocksStruct); 354 ~SIScheduleBlockScheduler() = default; 355 getBlocks()356 std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; } 357 getVGPRUsage()358 unsigned getVGPRUsage() { return maxVregUsage; } getSGPRUsage()359 unsigned getSGPRUsage() { return maxSregUsage; } 360 361 private: 362 struct SIBlockSchedCandidate : SISchedulerCandidate { 363 // The best Block candidate. 364 SIScheduleBlock *Block = nullptr; 365 366 bool IsHighLatency; 367 int VGPRUsageDiff; 368 unsigned NumSuccessors; 369 unsigned NumHighLatencySuccessors; 370 unsigned LastPosHighLatParentScheduled; 371 unsigned Height; 372 373 SIBlockSchedCandidate() = default; 374 isValidSIBlockSchedCandidate375 bool isValid() const { return Block; } 376 377 // Copy the status of another candidate without changing policy. setBestSIBlockSchedCandidate378 void setBest(SIBlockSchedCandidate &Best) { 379 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 380 Block = Best.Block; 381 Reason = Best.Reason; 382 IsHighLatency = Best.IsHighLatency; 383 VGPRUsageDiff = Best.VGPRUsageDiff; 384 NumSuccessors = Best.NumSuccessors; 385 NumHighLatencySuccessors = Best.NumHighLatencySuccessors; 386 LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled; 387 Height = Best.Height; 388 } 389 }; 390 391 bool tryCandidateLatency(SIBlockSchedCandidate &Cand, 392 SIBlockSchedCandidate &TryCand); 393 bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand, 394 SIBlockSchedCandidate &TryCand); 395 SIScheduleBlock *pickBlock(); 396 397 void addLiveRegs(std::set<unsigned> &Regs); 398 void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs); 399 void releaseBlockSuccs(SIScheduleBlock *Parent); 400 void blockScheduled(SIScheduleBlock *Block); 401 402 // Check register pressure change 403 // by scheduling a block with these LiveIn and LiveOut. 404 std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs, 405 std::set<unsigned> &OutRegs); 406 407 void schedule(); 408 }; 409 410 struct SIScheduleBlockResult { 411 std::vector<unsigned> SUs; 412 unsigned MaxSGPRUsage; 413 unsigned MaxVGPRUsage; 414 }; 415 416 class SIScheduler { 417 SIScheduleDAGMI *DAG; 418 SIScheduleBlockCreator BlockCreator; 419 420 public: SIScheduler(SIScheduleDAGMI * DAG)421 SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {} 422 423 ~SIScheduler() = default; 424 425 struct SIScheduleBlockResult 426 scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant, 427 SISchedulerBlockSchedulerVariant ScheduleVariant); 428 }; 429 430 class SIScheduleDAGMI final : public ScheduleDAGMILive { 431 const SIInstrInfo *SITII; 432 const SIRegisterInfo *SITRI; 433 434 std::vector<SUnit> SUnitsLinksBackup; 435 436 // For moveLowLatencies. After all Scheduling variants are tested. 437 std::vector<unsigned> ScheduledSUnits; 438 std::vector<unsigned> ScheduledSUnitsInv; 439 440 unsigned VGPRSetID; 441 unsigned SGPRSetID; 442 443 public: 444 SIScheduleDAGMI(MachineSchedContext *C); 445 446 ~SIScheduleDAGMI() override; 447 448 // Entry point for the schedule. 449 void schedule() override; 450 451 // To init Block's RPTracker. initRPTracker(RegPressureTracker & RPTracker)452 void initRPTracker(RegPressureTracker &RPTracker) { 453 RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false); 454 } 455 getBB()456 MachineBasicBlock *getBB() { return BB; } getCurrentTop()457 MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; } getCurrentBottom()458 MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; } getLIS()459 LiveIntervals *getLIS() { return LIS; } getMRI()460 MachineRegisterInfo *getMRI() { return &MRI; } getTRI()461 const TargetRegisterInfo *getTRI() { return TRI; } GetTopo()462 ScheduleDAGTopologicalSort *GetTopo() { return &Topo; } getEntrySU()463 SUnit& getEntrySU() { return EntrySU; } getExitSU()464 SUnit& getExitSU() { return ExitSU; } 465 466 void restoreSULinksLeft(); 467 468 template<typename _Iterator> void fillVgprSgprCost(_Iterator First, 469 _Iterator End, 470 unsigned &VgprUsage, 471 unsigned &SgprUsage); 472 getInRegs()473 std::set<unsigned> getInRegs() { 474 std::set<unsigned> InRegs; 475 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) { 476 InRegs.insert(RegMaskPair.RegUnit); 477 } 478 return InRegs; 479 } 480 getOutRegs()481 std::set<unsigned> getOutRegs() { 482 std::set<unsigned> OutRegs; 483 for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) { 484 OutRegs.insert(RegMaskPair.RegUnit); 485 } 486 return OutRegs; 487 }; 488 getVGPRSetID()489 unsigned getVGPRSetID() const { return VGPRSetID; } getSGPRSetID()490 unsigned getSGPRSetID() const { return SGPRSetID; } 491 492 private: 493 void topologicalSort(); 494 // After scheduling is done, improve low latency placements. 495 void moveLowLatencies(); 496 497 public: 498 // Some stats for scheduling inside blocks. 499 std::vector<unsigned> IsLowLatencySU; 500 std::vector<unsigned> LowLatencyOffset; 501 std::vector<unsigned> IsHighLatencySU; 502 // Topological sort 503 // Maps topological index to the node number. 504 std::vector<int> TopDownIndex2SU; 505 std::vector<int> BottomUpIndex2SU; 506 }; 507 508 } // end namespace llvm 509 510 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H 511