1 //===-- SIMachineScheduler.h - SI Scheduler Interface -*- C++ -*-------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief SI Machine Scheduler interface 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H 16 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H 17 18 #include "SIInstrInfo.h" 19 #include "llvm/CodeGen/MachineScheduler.h" 20 #include "llvm/CodeGen/RegisterPressure.h" 21 22 using namespace llvm; 23 24 namespace llvm { 25 26 enum SIScheduleCandReason { 27 NoCand, 28 RegUsage, 29 Latency, 30 Successor, 31 Depth, 32 NodeOrder 33 }; 34 35 struct SISchedulerCandidate { 36 // The reason for this candidate. 37 SIScheduleCandReason Reason; 38 39 // Set of reasons that apply to multiple candidates. 40 uint32_t RepeatReasonSet; 41 SISchedulerCandidateSISchedulerCandidate42 SISchedulerCandidate() 43 : Reason(NoCand), RepeatReasonSet(0) {} 44 isRepeatSISchedulerCandidate45 bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); } setRepeatSISchedulerCandidate46 void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); } 47 }; 48 49 class SIScheduleDAGMI; 50 class SIScheduleBlockCreator; 51 52 class SIScheduleBlock { 53 SIScheduleDAGMI *DAG; 54 SIScheduleBlockCreator *BC; 55 56 std::vector<SUnit*> SUnits; 57 std::map<unsigned, unsigned> NodeNum2Index; 58 std::vector<SUnit*> TopReadySUs; 59 std::vector<SUnit*> ScheduledSUnits; 60 61 /// The top of the unscheduled zone. 62 IntervalPressure TopPressure; 63 RegPressureTracker TopRPTracker; 64 65 // Pressure: number of said class of registers needed to 66 // store the live virtual and real registers. 67 // We do care only of SGPR32 and VGPR32 and do track only virtual registers. 68 // Pressure of additional registers required inside the block. 69 std::vector<unsigned> InternalAdditionnalPressure; 70 // Pressure of input and output registers 71 std::vector<unsigned> LiveInPressure; 72 std::vector<unsigned> LiveOutPressure; 73 // Registers required by the block, and outputs. 74 // We do track only virtual registers. 75 // Note that some registers are not 32 bits, 76 // and thus the pressure is not equal 77 // to the number of live registers. 78 std::set<unsigned> LiveInRegs; 79 std::set<unsigned> LiveOutRegs; 80 81 bool Scheduled; 82 bool HighLatencyBlock; 83 84 std::vector<unsigned> HasLowLatencyNonWaitedParent; 85 86 // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table. 87 unsigned ID; 88 89 std::vector<SIScheduleBlock*> Preds; // All blocks predecessors. 90 std::vector<SIScheduleBlock*> Succs; // All blocks successors. 91 unsigned NumHighLatencySuccessors; 92 93 public: SIScheduleBlock(SIScheduleDAGMI * DAG,SIScheduleBlockCreator * BC,unsigned ID)94 SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC, 95 unsigned ID): 96 DAG(DAG), BC(BC), SUnits(), TopReadySUs(), ScheduledSUnits(), 97 TopRPTracker(TopPressure), Scheduled(false), 98 HighLatencyBlock(false), ID(ID), 99 Preds(), Succs(), NumHighLatencySuccessors(0) {}; 100 ~SIScheduleBlock()101 ~SIScheduleBlock() {}; 102 getID()103 unsigned getID() const { return ID; } 104 105 /// Functions for Block construction. 106 void addUnit(SUnit *SU); 107 108 // When all SUs have been added. 109 void finalizeUnits(); 110 111 // Add block pred, which has instruction predecessor of SU. 112 void addPred(SIScheduleBlock *Pred); 113 void addSucc(SIScheduleBlock *Succ); 114 getPreds()115 const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; } getSuccs()116 const std::vector<SIScheduleBlock*>& getSuccs() const { return Succs; } 117 118 unsigned Height; // Maximum topdown path length to block without outputs 119 unsigned Depth; // Maximum bottomup path length to block without inputs 120 getNumHighLatencySuccessors()121 unsigned getNumHighLatencySuccessors() const { 122 return NumHighLatencySuccessors; 123 } 124 isHighLatencyBlock()125 bool isHighLatencyBlock() { return HighLatencyBlock; } 126 127 // This is approximative. 128 // Ideally should take into accounts some instructions (rcp, etc) 129 // are 4 times slower. getCost()130 int getCost() { return SUnits.size(); } 131 132 // The block Predecessors and Successors must be all registered 133 // before fastSchedule(). 134 // Fast schedule with no particular requirement. 135 void fastSchedule(); 136 getScheduledUnits()137 std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; } 138 139 // Complete schedule that will try to minimize reg pressure and 140 // low latencies, and will fill liveins and liveouts. 141 // Needs all MIs to be grouped between BeginBlock and EndBlock. 142 // The MIs can be moved after the scheduling, 143 // it is just used to allow correct track of live registers. 144 void schedule(MachineBasicBlock::iterator BeginBlock, 145 MachineBasicBlock::iterator EndBlock); 146 isScheduled()147 bool isScheduled() { return Scheduled; } 148 149 150 // Needs the block to be scheduled inside 151 // TODO: find a way to compute it. getInternalAdditionnalRegUsage()152 std::vector<unsigned> &getInternalAdditionnalRegUsage() { 153 return InternalAdditionnalPressure; 154 } 155 getInRegs()156 std::set<unsigned> &getInRegs() { return LiveInRegs; } getOutRegs()157 std::set<unsigned> &getOutRegs() { return LiveOutRegs; } 158 159 void printDebug(bool Full); 160 161 private: 162 struct SISchedCandidate : SISchedulerCandidate { 163 // The best SUnit candidate. 164 SUnit *SU; 165 166 unsigned SGPRUsage; 167 unsigned VGPRUsage; 168 bool IsLowLatency; 169 unsigned LowLatencyOffset; 170 bool HasLowLatencyNonWaitedParent; 171 SISchedCandidateSISchedCandidate172 SISchedCandidate() 173 : SU(nullptr) {} 174 isValidSISchedCandidate175 bool isValid() const { return SU; } 176 177 // Copy the status of another candidate without changing policy. setBestSISchedCandidate178 void setBest(SISchedCandidate &Best) { 179 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 180 SU = Best.SU; 181 Reason = Best.Reason; 182 SGPRUsage = Best.SGPRUsage; 183 VGPRUsage = Best.VGPRUsage; 184 IsLowLatency = Best.IsLowLatency; 185 LowLatencyOffset = Best.LowLatencyOffset; 186 HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent; 187 } 188 }; 189 190 void undoSchedule(); 191 192 void undoReleaseSucc(SUnit *SU, SDep *SuccEdge); 193 void releaseSucc(SUnit *SU, SDep *SuccEdge); 194 // InOrOutBlock: restrict to links pointing inside the block (true), 195 // or restrict to links pointing outside the block (false). 196 void releaseSuccessors(SUnit *SU, bool InOrOutBlock); 197 198 void nodeScheduled(SUnit *SU); 199 void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand); 200 void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand); 201 SUnit* pickNode(); 202 void traceCandidate(const SISchedCandidate &Cand); 203 void initRegPressure(MachineBasicBlock::iterator BeginBlock, 204 MachineBasicBlock::iterator EndBlock); 205 }; 206 207 struct SIScheduleBlocks { 208 std::vector<SIScheduleBlock*> Blocks; 209 std::vector<int> TopDownIndex2Block; 210 std::vector<int> TopDownBlock2Index; 211 }; 212 213 enum SISchedulerBlockCreatorVariant { 214 LatenciesAlone, 215 LatenciesGrouped, 216 LatenciesAlonePlusConsecutive 217 }; 218 219 class SIScheduleBlockCreator { 220 SIScheduleDAGMI *DAG; 221 // unique_ptr handles freeing memory for us. 222 std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs; 223 std::map<SISchedulerBlockCreatorVariant, 224 SIScheduleBlocks> Blocks; 225 std::vector<SIScheduleBlock*> CurrentBlocks; 226 std::vector<int> Node2CurrentBlock; 227 228 // Topological sort 229 // Maps topological index to the node number. 230 std::vector<int> TopDownIndex2Block; 231 std::vector<int> TopDownBlock2Index; 232 std::vector<int> BottomUpIndex2Block; 233 234 // 0 -> Color not given. 235 // 1 to SUnits.size() -> Reserved group (you should only add elements to them). 236 // Above -> Other groups. 237 int NextReservedID; 238 int NextNonReservedID; 239 std::vector<int> CurrentColoring; 240 std::vector<int> CurrentTopDownReservedDependencyColoring; 241 std::vector<int> CurrentBottomUpReservedDependencyColoring; 242 243 public: 244 SIScheduleBlockCreator(SIScheduleDAGMI *DAG); 245 ~SIScheduleBlockCreator(); 246 247 SIScheduleBlocks 248 getBlocks(SISchedulerBlockCreatorVariant BlockVariant); 249 250 bool isSUInBlock(SUnit *SU, unsigned ID); 251 252 private: 253 // Give a Reserved color to every high latency. 254 void colorHighLatenciesAlone(); 255 256 // Create groups of high latencies with a Reserved color. 257 void colorHighLatenciesGroups(); 258 259 // Compute coloring for topdown and bottom traversals with 260 // different colors depending on dependencies on Reserved colors. 261 void colorComputeReservedDependencies(); 262 263 // Give color to all non-colored SUs according to Reserved groups dependencies. 264 void colorAccordingToReservedDependencies(); 265 266 // Divides Blocks having no bottom up or top down dependencies on Reserved groups. 267 // The new colors are computed according to the dependencies on the other blocks 268 // formed with colorAccordingToReservedDependencies. 269 void colorEndsAccordingToDependencies(); 270 271 // Cut groups into groups with SUs in consecutive order (except for Reserved groups). 272 void colorForceConsecutiveOrderInGroup(); 273 274 // Merge Constant loads that have all their users into another group to the group. 275 // (TODO: else if all their users depend on the same group, put them there) 276 void colorMergeConstantLoadsNextGroup(); 277 278 // Merge SUs that have all their users into another group to the group 279 void colorMergeIfPossibleNextGroup(); 280 281 // Merge SUs that have all their users into another group to the group, 282 // but only for Reserved groups. 283 void colorMergeIfPossibleNextGroupOnlyForReserved(); 284 285 // Merge SUs that have all their users into another group to the group, 286 // but only if the group is no more than a few SUs. 287 void colorMergeIfPossibleSmallGroupsToNextGroup(); 288 289 // Divides Blocks with important size. 290 // Idea of implementation: attribute new colors depending on topdown and 291 // bottom up links to other blocks. 292 void cutHugeBlocks(); 293 294 // Put in one group all instructions with no users in this scheduling region 295 // (we'd want these groups be at the end). 296 void regroupNoUserInstructions(); 297 298 void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant); 299 300 void topologicalSort(); 301 302 void scheduleInsideBlocks(); 303 304 void fillStats(); 305 }; 306 307 enum SISchedulerBlockSchedulerVariant { 308 BlockLatencyRegUsage, 309 BlockRegUsageLatency, 310 BlockRegUsage 311 }; 312 313 class SIScheduleBlockScheduler { 314 SIScheduleDAGMI *DAG; 315 SISchedulerBlockSchedulerVariant Variant; 316 std::vector<SIScheduleBlock*> Blocks; 317 318 std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages; 319 std::set<unsigned> LiveRegs; 320 // Num of schedulable unscheduled blocks reading the register. 321 std::map<unsigned, unsigned> LiveRegsConsumers; 322 323 std::vector<unsigned> LastPosHighLatencyParentScheduled; 324 int LastPosWaitedHighLatency; 325 326 std::vector<SIScheduleBlock*> BlocksScheduled; 327 unsigned NumBlockScheduled; 328 std::vector<SIScheduleBlock*> ReadyBlocks; 329 330 unsigned VregCurrentUsage; 331 unsigned SregCurrentUsage; 332 333 // Currently is only approximation. 334 unsigned maxVregUsage; 335 unsigned maxSregUsage; 336 337 std::vector<unsigned> BlockNumPredsLeft; 338 std::vector<unsigned> BlockNumSuccsLeft; 339 340 public: 341 SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, 342 SISchedulerBlockSchedulerVariant Variant, 343 SIScheduleBlocks BlocksStruct); ~SIScheduleBlockScheduler()344 ~SIScheduleBlockScheduler() {}; 345 getBlocks()346 std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }; 347 getVGPRUsage()348 unsigned getVGPRUsage() { return maxVregUsage; }; getSGPRUsage()349 unsigned getSGPRUsage() { return maxSregUsage; }; 350 351 private: 352 struct SIBlockSchedCandidate : SISchedulerCandidate { 353 // The best Block candidate. 354 SIScheduleBlock *Block; 355 356 bool IsHighLatency; 357 int VGPRUsageDiff; 358 unsigned NumSuccessors; 359 unsigned NumHighLatencySuccessors; 360 unsigned LastPosHighLatParentScheduled; 361 unsigned Height; 362 SIBlockSchedCandidateSIBlockSchedCandidate363 SIBlockSchedCandidate() 364 : Block(nullptr) {} 365 isValidSIBlockSchedCandidate366 bool isValid() const { return Block; } 367 368 // Copy the status of another candidate without changing policy. setBestSIBlockSchedCandidate369 void setBest(SIBlockSchedCandidate &Best) { 370 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 371 Block = Best.Block; 372 Reason = Best.Reason; 373 IsHighLatency = Best.IsHighLatency; 374 VGPRUsageDiff = Best.VGPRUsageDiff; 375 NumSuccessors = Best.NumSuccessors; 376 NumHighLatencySuccessors = Best.NumHighLatencySuccessors; 377 LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled; 378 Height = Best.Height; 379 } 380 }; 381 382 bool tryCandidateLatency(SIBlockSchedCandidate &Cand, 383 SIBlockSchedCandidate &TryCand); 384 bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand, 385 SIBlockSchedCandidate &TryCand); 386 SIScheduleBlock *pickBlock(); 387 388 void addLiveRegs(std::set<unsigned> &Regs); 389 void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs); 390 void releaseBlockSuccs(SIScheduleBlock *Parent); 391 void blockScheduled(SIScheduleBlock *Block); 392 393 // Check register pressure change 394 // by scheduling a block with these LiveIn and LiveOut. 395 std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs, 396 std::set<unsigned> &OutRegs); 397 398 void schedule(); 399 }; 400 401 struct SIScheduleBlockResult { 402 std::vector<unsigned> SUs; 403 unsigned MaxSGPRUsage; 404 unsigned MaxVGPRUsage; 405 }; 406 407 class SIScheduler { 408 SIScheduleDAGMI *DAG; 409 SIScheduleBlockCreator BlockCreator; 410 411 public: SIScheduler(SIScheduleDAGMI * DAG)412 SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}; 413 ~SIScheduler()414 ~SIScheduler() {}; 415 416 struct SIScheduleBlockResult 417 scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant, 418 SISchedulerBlockSchedulerVariant ScheduleVariant); 419 }; 420 421 class SIScheduleDAGMI final : public ScheduleDAGMILive { 422 const SIInstrInfo *SITII; 423 const SIRegisterInfo *SITRI; 424 425 std::vector<SUnit> SUnitsLinksBackup; 426 427 // For moveLowLatencies. After all Scheduling variants are tested. 428 std::vector<unsigned> ScheduledSUnits; 429 std::vector<unsigned> ScheduledSUnitsInv; 430 431 unsigned VGPRSetID; 432 unsigned SGPRSetID; 433 434 public: 435 SIScheduleDAGMI(MachineSchedContext *C); 436 437 ~SIScheduleDAGMI() override; 438 439 // Entry point for the schedule. 440 void schedule() override; 441 442 // To init Block's RPTracker. initRPTracker(RegPressureTracker & RPTracker)443 void initRPTracker(RegPressureTracker &RPTracker) { 444 RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false); 445 } 446 getBB()447 MachineBasicBlock *getBB() { return BB; } getCurrentTop()448 MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }; getCurrentBottom()449 MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }; getLIS()450 LiveIntervals *getLIS() { return LIS; } getMRI()451 MachineRegisterInfo *getMRI() { return &MRI; } getTRI()452 const TargetRegisterInfo *getTRI() { return TRI; } getEntrySU()453 SUnit& getEntrySU() { return EntrySU; }; getExitSU()454 SUnit& getExitSU() { return ExitSU; }; 455 456 void restoreSULinksLeft(); 457 458 template<typename _Iterator> void fillVgprSgprCost(_Iterator First, 459 _Iterator End, 460 unsigned &VgprUsage, 461 unsigned &SgprUsage); getInRegs()462 std::set<unsigned> getInRegs() { 463 std::set<unsigned> InRegs; 464 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) { 465 InRegs.insert(RegMaskPair.RegUnit); 466 } 467 return InRegs; 468 }; 469 getVGPRSetID()470 unsigned getVGPRSetID() const { return VGPRSetID; } getSGPRSetID()471 unsigned getSGPRSetID() const { return SGPRSetID; } 472 473 private: 474 void topologicalSort(); 475 // After scheduling is done, improve low latency placements. 476 void moveLowLatencies(); 477 478 public: 479 // Some stats for scheduling inside blocks. 480 std::vector<unsigned> IsLowLatencySU; 481 std::vector<unsigned> LowLatencyOffset; 482 std::vector<unsigned> IsHighLatencySU; 483 // Topological sort 484 // Maps topological index to the node number. 485 std::vector<int> TopDownIndex2SU; 486 std::vector<int> BottomUpIndex2SU; 487 }; 488 489 } // namespace llvm 490 491 #endif /* SIMACHINESCHEDULER_H_ */ 492