1 //===- GCNIterativeScheduler.cpp ------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the class GCNIterativeScheduler.
11 ///
12 //===----------------------------------------------------------------------===//
13
14 #include "GCNIterativeScheduler.h"
15 #include "AMDGPUSubtarget.h"
16 #include "GCNRegPressure.h"
17 #include "GCNSchedStrategy.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/CodeGen/LiveIntervals.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/RegisterPressure.h"
26 #include "llvm/CodeGen/ScheduleDAG.h"
27 #include "llvm/Config/llvm-config.h"
28 #include "llvm/Support/Compiler.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <algorithm>
32 #include <cassert>
33 #include <iterator>
34 #include <limits>
35 #include <memory>
36 #include <type_traits>
37 #include <vector>
38
39 using namespace llvm;
40
41 #define DEBUG_TYPE "machine-scheduler"
42
43 namespace llvm {
44
45 std::vector<const SUnit *> makeMinRegSchedule(ArrayRef<const SUnit *> TopRoots,
46 const ScheduleDAG &DAG);
47
48 std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
49 const ScheduleDAG &DAG);
50 }
51
52 // shim accessors for different order containers
getMachineInstr(MachineInstr * MI)53 static inline MachineInstr *getMachineInstr(MachineInstr *MI) {
54 return MI;
55 }
getMachineInstr(const SUnit * SU)56 static inline MachineInstr *getMachineInstr(const SUnit *SU) {
57 return SU->getInstr();
58 }
getMachineInstr(const SUnit & SU)59 static inline MachineInstr *getMachineInstr(const SUnit &SU) {
60 return SU.getInstr();
61 }
62
63 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
64 LLVM_DUMP_METHOD
printRegion(raw_ostream & OS,MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,const LiveIntervals * LIS,unsigned MaxInstNum=std::numeric_limits<unsigned>::max ())65 static void printRegion(raw_ostream &OS,
66 MachineBasicBlock::iterator Begin,
67 MachineBasicBlock::iterator End,
68 const LiveIntervals *LIS,
69 unsigned MaxInstNum =
70 std::numeric_limits<unsigned>::max()) {
71 auto BB = Begin->getParent();
72 OS << BB->getParent()->getName() << ":" << printMBBReference(*BB) << ' '
73 << BB->getName() << ":\n";
74 auto I = Begin;
75 MaxInstNum = std::max(MaxInstNum, 1u);
76 for (; I != End && MaxInstNum; ++I, --MaxInstNum) {
77 if (!I->isDebugInstr() && LIS)
78 OS << LIS->getInstructionIndex(*I);
79 OS << '\t' << *I;
80 }
81 if (I != End) {
82 OS << "\t...\n";
83 I = std::prev(End);
84 if (!I->isDebugInstr() && LIS)
85 OS << LIS->getInstructionIndex(*I);
86 OS << '\t' << *I;
87 }
88 if (End != BB->end()) { // print boundary inst if present
89 OS << "----\n";
90 if (LIS) OS << LIS->getInstructionIndex(*End) << '\t';
91 OS << *End;
92 }
93 }
94
95 LLVM_DUMP_METHOD
printLivenessInfo(raw_ostream & OS,MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,const LiveIntervals * LIS)96 static void printLivenessInfo(raw_ostream &OS,
97 MachineBasicBlock::iterator Begin,
98 MachineBasicBlock::iterator End,
99 const LiveIntervals *LIS) {
100 const auto BB = Begin->getParent();
101 const auto &MRI = BB->getParent()->getRegInfo();
102
103 const auto LiveIns = getLiveRegsBefore(*Begin, *LIS);
104 OS << "LIn RP: ";
105 getRegPressure(MRI, LiveIns).print(OS);
106
107 const auto BottomMI = End == BB->end() ? std::prev(End) : End;
108 const auto LiveOuts = getLiveRegsAfter(*BottomMI, *LIS);
109 OS << "LOt RP: ";
110 getRegPressure(MRI, LiveOuts).print(OS);
111 }
112
113 LLVM_DUMP_METHOD
printRegions(raw_ostream & OS) const114 void GCNIterativeScheduler::printRegions(raw_ostream &OS) const {
115 const auto &ST = MF.getSubtarget<GCNSubtarget>();
116 for (const auto R : Regions) {
117 OS << "Region to schedule ";
118 printRegion(OS, R->Begin, R->End, LIS, 1);
119 printLivenessInfo(OS, R->Begin, R->End, LIS);
120 OS << "Max RP: ";
121 R->MaxPressure.print(OS, &ST);
122 }
123 }
124
125 LLVM_DUMP_METHOD
printSchedResult(raw_ostream & OS,const Region * R,const GCNRegPressure & RP) const126 void GCNIterativeScheduler::printSchedResult(raw_ostream &OS,
127 const Region *R,
128 const GCNRegPressure &RP) const {
129 OS << "\nAfter scheduling ";
130 printRegion(OS, R->Begin, R->End, LIS);
131 printSchedRP(OS, R->MaxPressure, RP);
132 OS << '\n';
133 }
134
135 LLVM_DUMP_METHOD
printSchedRP(raw_ostream & OS,const GCNRegPressure & Before,const GCNRegPressure & After) const136 void GCNIterativeScheduler::printSchedRP(raw_ostream &OS,
137 const GCNRegPressure &Before,
138 const GCNRegPressure &After) const {
139 const auto &ST = MF.getSubtarget<GCNSubtarget>();
140 OS << "RP before: ";
141 Before.print(OS, &ST);
142 OS << "RP after: ";
143 After.print(OS, &ST);
144 }
145 #endif
146
147 // DAG builder helper
148 class GCNIterativeScheduler::BuildDAG {
149 GCNIterativeScheduler &Sch;
150 SmallVector<SUnit *, 8> TopRoots;
151
152 SmallVector<SUnit*, 8> BotRoots;
153 public:
BuildDAG(const Region & R,GCNIterativeScheduler & _Sch)154 BuildDAG(const Region &R, GCNIterativeScheduler &_Sch)
155 : Sch(_Sch) {
156 auto BB = R.Begin->getParent();
157 Sch.BaseClass::startBlock(BB);
158 Sch.BaseClass::enterRegion(BB, R.Begin, R.End, R.NumRegionInstrs);
159
160 Sch.buildSchedGraph(Sch.AA, nullptr, nullptr, nullptr,
161 /*TrackLaneMask*/true);
162 Sch.Topo.InitDAGTopologicalSorting();
163 Sch.findRootsAndBiasEdges(TopRoots, BotRoots);
164 }
165
~BuildDAG()166 ~BuildDAG() {
167 Sch.BaseClass::exitRegion();
168 Sch.BaseClass::finishBlock();
169 }
170
getTopRoots() const171 ArrayRef<const SUnit *> getTopRoots() const {
172 return TopRoots;
173 }
getBottomRoots() const174 ArrayRef<SUnit*> getBottomRoots() const {
175 return BotRoots;
176 }
177 };
178
179 class GCNIterativeScheduler::OverrideLegacyStrategy {
180 GCNIterativeScheduler &Sch;
181 Region &Rgn;
182 std::unique_ptr<MachineSchedStrategy> SaveSchedImpl;
183 GCNRegPressure SaveMaxRP;
184
185 public:
OverrideLegacyStrategy(Region & R,MachineSchedStrategy & OverrideStrategy,GCNIterativeScheduler & _Sch)186 OverrideLegacyStrategy(Region &R,
187 MachineSchedStrategy &OverrideStrategy,
188 GCNIterativeScheduler &_Sch)
189 : Sch(_Sch)
190 , Rgn(R)
191 , SaveSchedImpl(std::move(_Sch.SchedImpl))
192 , SaveMaxRP(R.MaxPressure) {
193 Sch.SchedImpl.reset(&OverrideStrategy);
194 auto BB = R.Begin->getParent();
195 Sch.BaseClass::startBlock(BB);
196 Sch.BaseClass::enterRegion(BB, R.Begin, R.End, R.NumRegionInstrs);
197 }
198
~OverrideLegacyStrategy()199 ~OverrideLegacyStrategy() {
200 Sch.BaseClass::exitRegion();
201 Sch.BaseClass::finishBlock();
202 Sch.SchedImpl.release();
203 Sch.SchedImpl = std::move(SaveSchedImpl);
204 }
205
schedule()206 void schedule() {
207 assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
208 LLVM_DEBUG(dbgs() << "\nScheduling ";
209 printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2));
210 Sch.BaseClass::schedule();
211
212 // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
213 Sch.RegionEnd = Rgn.End;
214 //assert(Rgn.End == Sch.RegionEnd);
215 Rgn.Begin = Sch.RegionBegin;
216 Rgn.MaxPressure.clear();
217 }
218
restoreOrder()219 void restoreOrder() {
220 assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
221 // DAG SUnits are stored using original region's order
222 // so just use SUnits as the restoring schedule
223 Sch.scheduleRegion(Rgn, Sch.SUnits, SaveMaxRP);
224 }
225 };
226
227 namespace {
228
229 // just a stub to make base class happy
230 class SchedStrategyStub : public MachineSchedStrategy {
231 public:
shouldTrackPressure() const232 bool shouldTrackPressure() const override { return false; }
shouldTrackLaneMasks() const233 bool shouldTrackLaneMasks() const override { return false; }
initialize(ScheduleDAGMI * DAG)234 void initialize(ScheduleDAGMI *DAG) override {}
pickNode(bool & IsTopNode)235 SUnit *pickNode(bool &IsTopNode) override { return nullptr; }
schedNode(SUnit * SU,bool IsTopNode)236 void schedNode(SUnit *SU, bool IsTopNode) override {}
releaseTopNode(SUnit * SU)237 void releaseTopNode(SUnit *SU) override {}
releaseBottomNode(SUnit * SU)238 void releaseBottomNode(SUnit *SU) override {}
239 };
240
241 } // end anonymous namespace
242
GCNIterativeScheduler(MachineSchedContext * C,StrategyKind S)243 GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C,
244 StrategyKind S)
245 : BaseClass(C, std::make_unique<SchedStrategyStub>())
246 , Context(C)
247 , Strategy(S)
248 , UPTracker(*LIS) {
249 }
250
251 // returns max pressure for a region
252 GCNRegPressure
getRegionPressure(MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End) const253 GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin,
254 MachineBasicBlock::iterator End)
255 const {
256 // For the purpose of pressure tracking bottom inst of the region should
257 // be also processed. End is either BB end, BB terminator inst or sched
258 // boundary inst.
259 auto const BBEnd = Begin->getParent()->end();
260 auto const BottomMI = End == BBEnd ? std::prev(End) : End;
261
262 // scheduleRegions walks bottom to top, so its likely we just get next
263 // instruction to track
264 auto AfterBottomMI = std::next(BottomMI);
265 if (AfterBottomMI == BBEnd ||
266 &*AfterBottomMI != UPTracker.getLastTrackedMI()) {
267 UPTracker.reset(*BottomMI);
268 } else {
269 assert(UPTracker.isValid());
270 }
271
272 for (auto I = BottomMI; I != Begin; --I)
273 UPTracker.recede(*I);
274
275 UPTracker.recede(*Begin);
276
277 assert(UPTracker.isValid() ||
278 (dbgs() << "Tracked region ",
279 printRegion(dbgs(), Begin, End, LIS), false));
280 return UPTracker.moveMaxPressure();
281 }
282
283 // returns max pressure for a tentative schedule
284 template <typename Range> GCNRegPressure
getSchedulePressure(const Region & R,Range && Schedule) const285 GCNIterativeScheduler::getSchedulePressure(const Region &R,
286 Range &&Schedule) const {
287 auto const BBEnd = R.Begin->getParent()->end();
288 GCNUpwardRPTracker RPTracker(*LIS);
289 if (R.End != BBEnd) {
290 // R.End points to the boundary instruction but the
291 // schedule doesn't include it
292 RPTracker.reset(*R.End);
293 RPTracker.recede(*R.End);
294 } else {
295 // R.End doesn't point to the boundary instruction
296 RPTracker.reset(*std::prev(BBEnd));
297 }
298 for (auto I = Schedule.end(), B = Schedule.begin(); I != B;) {
299 RPTracker.recede(*getMachineInstr(*--I));
300 }
301 return RPTracker.moveMaxPressure();
302 }
303
enterRegion(MachineBasicBlock * BB,MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,unsigned NumRegionInstrs)304 void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overriden
305 MachineBasicBlock::iterator Begin,
306 MachineBasicBlock::iterator End,
307 unsigned NumRegionInstrs) {
308 BaseClass::enterRegion(BB, Begin, End, NumRegionInstrs);
309 if (NumRegionInstrs > 2) {
310 Regions.push_back(
311 new (Alloc.Allocate())
312 Region { Begin, End, NumRegionInstrs,
313 getRegionPressure(Begin, End), nullptr });
314 }
315 }
316
schedule()317 void GCNIterativeScheduler::schedule() { // overriden
318 // do nothing
319 LLVM_DEBUG(printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS);
320 if (!Regions.empty() && Regions.back()->Begin == RegionBegin) {
321 dbgs() << "Max RP: ";
322 Regions.back()->MaxPressure.print(
323 dbgs(), &MF.getSubtarget<GCNSubtarget>());
324 } dbgs()
325 << '\n';);
326 }
327
finalizeSchedule()328 void GCNIterativeScheduler::finalizeSchedule() { // overriden
329 if (Regions.empty())
330 return;
331 switch (Strategy) {
332 case SCHEDULE_MINREGONLY: scheduleMinReg(); break;
333 case SCHEDULE_MINREGFORCED: scheduleMinReg(true); break;
334 case SCHEDULE_LEGACYMAXOCCUPANCY: scheduleLegacyMaxOccupancy(); break;
335 case SCHEDULE_ILP: scheduleILP(false); break;
336 }
337 }
338
339 // Detach schedule from SUnits and interleave it with debug values.
340 // Returned schedule becomes independent of DAG state.
341 std::vector<MachineInstr*>
detachSchedule(ScheduleRef Schedule) const342 GCNIterativeScheduler::detachSchedule(ScheduleRef Schedule) const {
343 std::vector<MachineInstr*> Res;
344 Res.reserve(Schedule.size() * 2);
345
346 if (FirstDbgValue)
347 Res.push_back(FirstDbgValue);
348
349 const auto DbgB = DbgValues.begin(), DbgE = DbgValues.end();
350 for (auto SU : Schedule) {
351 Res.push_back(SU->getInstr());
352 const auto &D = std::find_if(DbgB, DbgE, [SU](decltype(*DbgB) &P) {
353 return P.second == SU->getInstr();
354 });
355 if (D != DbgE)
356 Res.push_back(D->first);
357 }
358 return Res;
359 }
360
setBestSchedule(Region & R,ScheduleRef Schedule,const GCNRegPressure & MaxRP)361 void GCNIterativeScheduler::setBestSchedule(Region &R,
362 ScheduleRef Schedule,
363 const GCNRegPressure &MaxRP) {
364 R.BestSchedule.reset(
365 new TentativeSchedule{ detachSchedule(Schedule), MaxRP });
366 }
367
scheduleBest(Region & R)368 void GCNIterativeScheduler::scheduleBest(Region &R) {
369 assert(R.BestSchedule.get() && "No schedule specified");
370 scheduleRegion(R, R.BestSchedule->Schedule, R.BestSchedule->MaxPressure);
371 R.BestSchedule.reset();
372 }
373
374 // minimal required region scheduler, works for ranges of SUnits*,
375 // SUnits or MachineIntrs*
376 template <typename Range>
scheduleRegion(Region & R,Range && Schedule,const GCNRegPressure & MaxRP)377 void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
378 const GCNRegPressure &MaxRP) {
379 assert(RegionBegin == R.Begin && RegionEnd == R.End);
380 assert(LIS != nullptr);
381 #ifndef NDEBUG
382 const auto SchedMaxRP = getSchedulePressure(R, Schedule);
383 #endif
384 auto BB = R.Begin->getParent();
385 auto Top = R.Begin;
386 for (const auto &I : Schedule) {
387 auto MI = getMachineInstr(I);
388 if (MI != &*Top) {
389 BB->remove(MI);
390 BB->insert(Top, MI);
391 if (!MI->isDebugInstr())
392 LIS->handleMove(*MI, true);
393 }
394 if (!MI->isDebugInstr()) {
395 // Reset read - undef flags and update them later.
396 for (auto &Op : MI->operands())
397 if (Op.isReg() && Op.isDef())
398 Op.setIsUndef(false);
399
400 RegisterOperands RegOpers;
401 RegOpers.collect(*MI, *TRI, MRI, /*ShouldTrackLaneMasks*/true,
402 /*IgnoreDead*/false);
403 // Adjust liveness and add missing dead+read-undef flags.
404 auto SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
405 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
406 }
407 Top = std::next(MI->getIterator());
408 }
409 RegionBegin = getMachineInstr(Schedule.front());
410
411 // Schedule consisting of MachineInstr* is considered 'detached'
412 // and already interleaved with debug values
413 if (!std::is_same<decltype(*Schedule.begin()), MachineInstr*>::value) {
414 placeDebugValues();
415 // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
416 //assert(R.End == RegionEnd);
417 RegionEnd = R.End;
418 }
419
420 R.Begin = RegionBegin;
421 R.MaxPressure = MaxRP;
422
423 #ifndef NDEBUG
424 const auto RegionMaxRP = getRegionPressure(R);
425 const auto &ST = MF.getSubtarget<GCNSubtarget>();
426 #endif
427 assert((SchedMaxRP == RegionMaxRP && (MaxRP.empty() || SchedMaxRP == MaxRP))
428 || (dbgs() << "Max RP mismatch!!!\n"
429 "RP for schedule (calculated): ",
430 SchedMaxRP.print(dbgs(), &ST),
431 dbgs() << "RP for schedule (reported): ",
432 MaxRP.print(dbgs(), &ST),
433 dbgs() << "RP after scheduling: ",
434 RegionMaxRP.print(dbgs(), &ST),
435 false));
436 }
437
438 // Sort recorded regions by pressure - highest at the front
sortRegionsByPressure(unsigned TargetOcc)439 void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
440 const auto &ST = MF.getSubtarget<GCNSubtarget>();
441 llvm::sort(Regions, [&ST, TargetOcc](const Region *R1, const Region *R2) {
442 return R2->MaxPressure.less(ST, R1->MaxPressure, TargetOcc);
443 });
444 }
445
446 ///////////////////////////////////////////////////////////////////////////////
447 // Legacy MaxOccupancy Strategy
448
449 // Tries to increase occupancy applying minreg scheduler for a sequence of
450 // most demanding regions. Obtained schedules are saved as BestSchedule for a
451 // region.
452 // TargetOcc is the best achievable occupancy for a kernel.
453 // Returns better occupancy on success or current occupancy on fail.
454 // BestSchedules aren't deleted on fail.
tryMaximizeOccupancy(unsigned TargetOcc)455 unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
456 // TODO: assert Regions are sorted descending by pressure
457 const auto &ST = MF.getSubtarget<GCNSubtarget>();
458 const auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
459 LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
460 << ", current = " << Occ << '\n');
461
462 auto NewOcc = TargetOcc;
463 for (auto R : Regions) {
464 if (R->MaxPressure.getOccupancy(ST) >= NewOcc)
465 break;
466
467 LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
468 printLivenessInfo(dbgs(), R->Begin, R->End, LIS));
469
470 BuildDAG DAG(*R, *this);
471 const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
472 const auto MaxRP = getSchedulePressure(*R, MinSchedule);
473 LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
474 printSchedRP(dbgs(), R->MaxPressure, MaxRP));
475
476 NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST));
477 if (NewOcc <= Occ)
478 break;
479
480 setBestSchedule(*R, MinSchedule, MaxRP);
481 }
482 LLVM_DEBUG(dbgs() << "New occupancy = " << NewOcc
483 << ", prev occupancy = " << Occ << '\n');
484 if (NewOcc > Occ) {
485 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
486 MFI->increaseOccupancy(MF, NewOcc);
487 }
488
489 return std::max(NewOcc, Occ);
490 }
491
scheduleLegacyMaxOccupancy(bool TryMaximizeOccupancy)492 void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
493 bool TryMaximizeOccupancy) {
494 const auto &ST = MF.getSubtarget<GCNSubtarget>();
495 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
496 auto TgtOcc = MFI->getMinAllowedOccupancy();
497
498 sortRegionsByPressure(TgtOcc);
499 auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
500
501 if (TryMaximizeOccupancy && Occ < TgtOcc)
502 Occ = tryMaximizeOccupancy(TgtOcc);
503
504 // This is really weird but for some magic scheduling regions twice
505 // gives performance improvement
506 const int NumPasses = Occ < TgtOcc ? 2 : 1;
507
508 TgtOcc = std::min(Occ, TgtOcc);
509 LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
510 "target occupancy = "
511 << TgtOcc << '\n');
512 GCNMaxOccupancySchedStrategy LStrgy(Context);
513 unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
514
515 for (int I = 0; I < NumPasses; ++I) {
516 // running first pass with TargetOccupancy = 0 mimics previous scheduling
517 // approach and is a performance magic
518 LStrgy.setTargetOccupancy(I == 0 ? 0 : TgtOcc);
519 for (auto R : Regions) {
520 OverrideLegacyStrategy Ovr(*R, LStrgy, *this);
521
522 Ovr.schedule();
523 const auto RP = getRegionPressure(*R);
524 LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
525
526 if (RP.getOccupancy(ST) < TgtOcc) {
527 LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
528 if (R->BestSchedule.get() &&
529 R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) {
530 LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
531 scheduleBest(*R);
532 } else {
533 LLVM_DEBUG(dbgs() << ", restoring\n");
534 Ovr.restoreOrder();
535 assert(R->MaxPressure.getOccupancy(ST) >= TgtOcc);
536 }
537 }
538 FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST));
539 }
540 }
541 MFI->limitOccupancy(FinalOccupancy);
542 }
543
544 ///////////////////////////////////////////////////////////////////////////////
545 // Minimal Register Strategy
546
scheduleMinReg(bool force)547 void GCNIterativeScheduler::scheduleMinReg(bool force) {
548 const auto &ST = MF.getSubtarget<GCNSubtarget>();
549 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
550 const auto TgtOcc = MFI->getOccupancy();
551 sortRegionsByPressure(TgtOcc);
552
553 auto MaxPressure = Regions.front()->MaxPressure;
554 for (auto R : Regions) {
555 if (!force && R->MaxPressure.less(ST, MaxPressure, TgtOcc))
556 break;
557
558 BuildDAG DAG(*R, *this);
559 const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
560
561 const auto RP = getSchedulePressure(*R, MinSchedule);
562 LLVM_DEBUG(if (R->MaxPressure.less(ST, RP, TgtOcc)) {
563 dbgs() << "\nWarning: Pressure becomes worse after minreg!";
564 printSchedRP(dbgs(), R->MaxPressure, RP);
565 });
566
567 if (!force && MaxPressure.less(ST, RP, TgtOcc))
568 break;
569
570 scheduleRegion(*R, MinSchedule, RP);
571 LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
572
573 MaxPressure = RP;
574 }
575 }
576
577 ///////////////////////////////////////////////////////////////////////////////
578 // ILP scheduler port
579
scheduleILP(bool TryMaximizeOccupancy)580 void GCNIterativeScheduler::scheduleILP(
581 bool TryMaximizeOccupancy) {
582 const auto &ST = MF.getSubtarget<GCNSubtarget>();
583 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
584 auto TgtOcc = MFI->getMinAllowedOccupancy();
585
586 sortRegionsByPressure(TgtOcc);
587 auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
588
589 if (TryMaximizeOccupancy && Occ < TgtOcc)
590 Occ = tryMaximizeOccupancy(TgtOcc);
591
592 TgtOcc = std::min(Occ, TgtOcc);
593 LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
594 "target occupancy = "
595 << TgtOcc << '\n');
596
597 unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
598 for (auto R : Regions) {
599 BuildDAG DAG(*R, *this);
600 const auto ILPSchedule = makeGCNILPScheduler(DAG.getBottomRoots(), *this);
601
602 const auto RP = getSchedulePressure(*R, ILPSchedule);
603 LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
604
605 if (RP.getOccupancy(ST) < TgtOcc) {
606 LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
607 if (R->BestSchedule.get() &&
608 R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) {
609 LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
610 scheduleBest(*R);
611 }
612 } else {
613 scheduleRegion(*R, ILPSchedule, RP);
614 LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
615 FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST));
616 }
617 }
618 MFI->limitOccupancy(FinalOccupancy);
619 }
620