1 //===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // MachineScheduler schedules machine instructions after phi elimination. It
11 // preserves LiveIntervals so it can be invoked before register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #define DEBUG_TYPE "misched"
16
17 #include "HexagonMachineScheduler.h"
18
19 #include <queue>
20
21 using namespace llvm;
22
23 static cl::opt<bool> ForceTopDown("vliw-misched-topdown", cl::Hidden,
24 cl::desc("Force top-down list scheduling"));
25 static cl::opt<bool> ForceBottomUp("vliw-misched-bottomup", cl::Hidden,
26 cl::desc("Force bottom-up list scheduling"));
27
28 #ifndef NDEBUG
29 static cl::opt<bool> ViewMISchedDAGs("vliw-view-misched-dags", cl::Hidden,
30 cl::desc("Pop up a window to show MISched dags after they are processed"));
31
32 static cl::opt<unsigned> MISchedCutoff("vliw-misched-cutoff", cl::Hidden,
33 cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
34 #else
35 static bool ViewMISchedDAGs = false;
36 #endif // NDEBUG
37
38 /// Decrement this iterator until reaching the top or a non-debug instr.
39 static MachineBasicBlock::iterator
priorNonDebug(MachineBasicBlock::iterator I,MachineBasicBlock::iterator Beg)40 priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
41 assert(I != Beg && "reached the top of the region, cannot decrement");
42 while (--I != Beg) {
43 if (!I->isDebugValue())
44 break;
45 }
46 return I;
47 }
48
49 /// If this iterator is a debug value, increment until reaching the End or a
50 /// non-debug instruction.
51 static MachineBasicBlock::iterator
nextIfDebug(MachineBasicBlock::iterator I,MachineBasicBlock::iterator End)52 nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
53 for(; I != End; ++I) {
54 if (!I->isDebugValue())
55 break;
56 }
57 return I;
58 }
59
60 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
61 /// NumPredsLeft reaches zero, release the successor node.
62 ///
63 /// FIXME: Adjust SuccSU height based on MinLatency.
releaseSucc(SUnit * SU,SDep * SuccEdge)64 void VLIWMachineScheduler::releaseSucc(SUnit *SU, SDep *SuccEdge) {
65 SUnit *SuccSU = SuccEdge->getSUnit();
66
67 #ifndef NDEBUG
68 if (SuccSU->NumPredsLeft == 0) {
69 dbgs() << "*** Scheduling failed! ***\n";
70 SuccSU->dump(this);
71 dbgs() << " has been released too many times!\n";
72 llvm_unreachable(0);
73 }
74 #endif
75 --SuccSU->NumPredsLeft;
76 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
77 SchedImpl->releaseTopNode(SuccSU);
78 }
79
80 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
releaseSuccessors(SUnit * SU)81 void VLIWMachineScheduler::releaseSuccessors(SUnit *SU) {
82 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
83 I != E; ++I) {
84 releaseSucc(SU, &*I);
85 }
86 }
87
88 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
89 /// NumSuccsLeft reaches zero, release the predecessor node.
90 ///
91 /// FIXME: Adjust PredSU height based on MinLatency.
releasePred(SUnit * SU,SDep * PredEdge)92 void VLIWMachineScheduler::releasePred(SUnit *SU, SDep *PredEdge) {
93 SUnit *PredSU = PredEdge->getSUnit();
94
95 #ifndef NDEBUG
96 if (PredSU->NumSuccsLeft == 0) {
97 dbgs() << "*** Scheduling failed! ***\n";
98 PredSU->dump(this);
99 dbgs() << " has been released too many times!\n";
100 llvm_unreachable(0);
101 }
102 #endif
103 --PredSU->NumSuccsLeft;
104 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
105 SchedImpl->releaseBottomNode(PredSU);
106 }
107
108 /// releasePredecessors - Call releasePred on each of SU's predecessors.
releasePredecessors(SUnit * SU)109 void VLIWMachineScheduler::releasePredecessors(SUnit *SU) {
110 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
111 I != E; ++I) {
112 releasePred(SU, &*I);
113 }
114 }
115
moveInstruction(MachineInstr * MI,MachineBasicBlock::iterator InsertPos)116 void VLIWMachineScheduler::moveInstruction(MachineInstr *MI,
117 MachineBasicBlock::iterator InsertPos) {
118 // Advance RegionBegin if the first instruction moves down.
119 if (&*RegionBegin == MI)
120 ++RegionBegin;
121
122 // Update the instruction stream.
123 BB->splice(InsertPos, BB, MI);
124
125 // Update LiveIntervals
126 LIS->handleMove(MI);
127
128 // Recede RegionBegin if an instruction moves above the first.
129 if (RegionBegin == InsertPos)
130 RegionBegin = MI;
131 }
132
checkSchedLimit()133 bool VLIWMachineScheduler::checkSchedLimit() {
134 #ifndef NDEBUG
135 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
136 CurrentTop = CurrentBottom;
137 return false;
138 }
139 ++NumInstrsScheduled;
140 #endif
141 return true;
142 }
143
144 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
145 /// crossing a scheduling boundary. [begin, end) includes all instructions in
146 /// the region, including the boundary itself and single-instruction regions
147 /// that don't get scheduled.
enterRegion(MachineBasicBlock * bb,MachineBasicBlock::iterator begin,MachineBasicBlock::iterator end,unsigned endcount)148 void VLIWMachineScheduler::enterRegion(MachineBasicBlock *bb,
149 MachineBasicBlock::iterator begin,
150 MachineBasicBlock::iterator end,
151 unsigned endcount)
152 {
153 ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
154
155 // For convenience remember the end of the liveness region.
156 LiveRegionEnd =
157 (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
158 }
159
160 // Setup the register pressure trackers for the top scheduled top and bottom
161 // scheduled regions.
initRegPressure()162 void VLIWMachineScheduler::initRegPressure() {
163 TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
164 BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
165
166 // Close the RPTracker to finalize live ins.
167 RPTracker.closeRegion();
168
169 DEBUG(RPTracker.getPressure().dump(TRI));
170
171 // Initialize the live ins and live outs.
172 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
173 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
174
175 // Close one end of the tracker so we can call
176 // getMaxUpward/DownwardPressureDelta before advancing across any
177 // instructions. This converts currently live regs into live ins/outs.
178 TopRPTracker.closeTop();
179 BotRPTracker.closeBottom();
180
181 // Account for liveness generated by the region boundary.
182 if (LiveRegionEnd != RegionEnd)
183 BotRPTracker.recede();
184
185 assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
186
187 // Cache the list of excess pressure sets in this region. This will also track
188 // the max pressure in the scheduled code for these sets.
189 RegionCriticalPSets.clear();
190 std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
191 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
192 unsigned Limit = TRI->getRegPressureSetLimit(i);
193 DEBUG(dbgs() << TRI->getRegPressureSetName(i)
194 << "Limit " << Limit
195 << " Actual " << RegionPressure[i] << "\n");
196 if (RegionPressure[i] > Limit)
197 RegionCriticalPSets.push_back(PressureElement(i, 0));
198 }
199 DEBUG(dbgs() << "Excess PSets: ";
200 for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
201 dbgs() << TRI->getRegPressureSetName(
202 RegionCriticalPSets[i].PSetID) << " ";
203 dbgs() << "\n");
204
205 TotalPackets = 0;
206 }
207
208 // FIXME: When the pressure tracker deals in pressure differences then we won't
209 // iterate over all RegionCriticalPSets[i].
210 void VLIWMachineScheduler::
updateScheduledPressure(std::vector<unsigned> NewMaxPressure)211 updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
212 for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
213 unsigned ID = RegionCriticalPSets[i].PSetID;
214 int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
215 if ((int)NewMaxPressure[ID] > MaxUnits)
216 MaxUnits = NewMaxPressure[ID];
217 }
218 }
219
220 /// Check if scheduling of this SU is possible
221 /// in the current packet.
222 /// It is _not_ precise (statefull), it is more like
223 /// another heuristic. Many corner cases are figured
224 /// empirically.
isResourceAvailable(SUnit * SU)225 bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
226 if (!SU || !SU->getInstr())
227 return false;
228
229 // First see if the pipeline could receive this instruction
230 // in the current cycle.
231 switch (SU->getInstr()->getOpcode()) {
232 default:
233 if (!ResourcesModel->canReserveResources(SU->getInstr()))
234 return false;
235 case TargetOpcode::EXTRACT_SUBREG:
236 case TargetOpcode::INSERT_SUBREG:
237 case TargetOpcode::SUBREG_TO_REG:
238 case TargetOpcode::REG_SEQUENCE:
239 case TargetOpcode::IMPLICIT_DEF:
240 case TargetOpcode::COPY:
241 case TargetOpcode::INLINEASM:
242 break;
243 }
244
245 // Now see if there are no other dependencies to instructions already
246 // in the packet.
247 for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
248 if (Packet[i]->Succs.size() == 0)
249 continue;
250 for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
251 E = Packet[i]->Succs.end(); I != E; ++I) {
252 // Since we do not add pseudos to packets, might as well
253 // ignore order dependencies.
254 if (I->isCtrl())
255 continue;
256
257 if (I->getSUnit() == SU)
258 return false;
259 }
260 }
261 return true;
262 }
263
264 /// Keep track of available resources.
reserveResources(SUnit * SU)265 bool VLIWResourceModel::reserveResources(SUnit *SU) {
266 bool startNewCycle = false;
267 // If this SU does not fit in the packet
268 // start a new one.
269 if (!isResourceAvailable(SU)) {
270 ResourcesModel->clearResources();
271 Packet.clear();
272 TotalPackets++;
273 startNewCycle = true;
274 }
275
276 switch (SU->getInstr()->getOpcode()) {
277 default:
278 ResourcesModel->reserveResources(SU->getInstr());
279 break;
280 case TargetOpcode::EXTRACT_SUBREG:
281 case TargetOpcode::INSERT_SUBREG:
282 case TargetOpcode::SUBREG_TO_REG:
283 case TargetOpcode::REG_SEQUENCE:
284 case TargetOpcode::IMPLICIT_DEF:
285 case TargetOpcode::KILL:
286 case TargetOpcode::PROLOG_LABEL:
287 case TargetOpcode::EH_LABEL:
288 case TargetOpcode::COPY:
289 case TargetOpcode::INLINEASM:
290 break;
291 }
292 Packet.push_back(SU);
293
294 #ifndef NDEBUG
295 DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
296 for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
297 DEBUG(dbgs() << "\t[" << i << "] SU(");
298 DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
299 DEBUG(Packet[i]->getInstr()->dump());
300 }
301 #endif
302
303 // If packet is now full, reset the state so in the next cycle
304 // we start fresh.
305 if (Packet.size() >= InstrItins->SchedModel->IssueWidth) {
306 ResourcesModel->clearResources();
307 Packet.clear();
308 TotalPackets++;
309 startNewCycle = true;
310 }
311
312 return startNewCycle;
313 }
314
315 // Release all DAG roots for scheduling.
releaseRoots()316 void VLIWMachineScheduler::releaseRoots() {
317 SmallVector<SUnit*, 16> BotRoots;
318
319 for (std::vector<SUnit>::iterator
320 I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
321 // A SUnit is ready to top schedule if it has no predecessors.
322 if (I->Preds.empty())
323 SchedImpl->releaseTopNode(&(*I));
324 // A SUnit is ready to bottom schedule if it has no successors.
325 if (I->Succs.empty())
326 BotRoots.push_back(&(*I));
327 }
328 // Release bottom roots in reverse order so the higher priority nodes appear
329 // first. This is more natural and slightly more efficient.
330 for (SmallVectorImpl<SUnit*>::const_reverse_iterator
331 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
332 SchedImpl->releaseBottomNode(*I);
333 }
334
335 /// schedule - Called back from MachineScheduler::runOnMachineFunction
336 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
337 /// only includes instructions that have DAG nodes, not scheduling boundaries.
schedule()338 void VLIWMachineScheduler::schedule() {
339 DEBUG(dbgs()
340 << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber()
341 << " " << BB->getName()
342 << " in_func " << BB->getParent()->getFunction()->getName()
343 << " at loop depth " << MLI->getLoopDepth(BB)
344 << " \n");
345
346 // Initialize the register pressure tracker used by buildSchedGraph.
347 RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
348
349 // Account for liveness generate by the region boundary.
350 if (LiveRegionEnd != RegionEnd)
351 RPTracker.recede();
352
353 // Build the DAG, and compute current register pressure.
354 buildSchedGraph(AA, &RPTracker);
355
356 // Initialize top/bottom trackers after computing region pressure.
357 initRegPressure();
358
359 // To view Height/Depth correctly, they should be accessed at least once.
360 DEBUG(unsigned maxH = 0;
361 for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
362 if (SUnits[su].getHeight() > maxH)
363 maxH = SUnits[su].getHeight();
364 dbgs() << "Max Height " << maxH << "\n";);
365 DEBUG(unsigned maxD = 0;
366 for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
367 if (SUnits[su].getDepth() > maxD)
368 maxD = SUnits[su].getDepth();
369 dbgs() << "Max Depth " << maxD << "\n";);
370 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
371 SUnits[su].dumpAll(this));
372
373 if (ViewMISchedDAGs) viewGraph();
374
375 SchedImpl->initialize(this);
376
377 // Release edges from the special Entry node or to the special Exit node.
378 releaseSuccessors(&EntrySU);
379 releasePredecessors(&ExitSU);
380
381 // Release all DAG roots for scheduling.
382 releaseRoots();
383
384 CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
385 CurrentBottom = RegionEnd;
386 bool IsTopNode = false;
387 while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
388 if (!checkSchedLimit())
389 break;
390
391 // Move the instruction to its new location in the instruction stream.
392 MachineInstr *MI = SU->getInstr();
393
394 if (IsTopNode) {
395 assert(SU->isTopReady() && "node still has unscheduled dependencies");
396 if (&*CurrentTop == MI)
397 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
398 else {
399 moveInstruction(MI, CurrentTop);
400 TopRPTracker.setPos(MI);
401 }
402
403 // Update top scheduled pressure.
404 TopRPTracker.advance();
405 assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
406 updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
407
408 // Release dependent instructions for scheduling.
409 releaseSuccessors(SU);
410 } else {
411 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
412 MachineBasicBlock::iterator priorII =
413 priorNonDebug(CurrentBottom, CurrentTop);
414 if (&*priorII == MI)
415 CurrentBottom = priorII;
416 else {
417 if (&*CurrentTop == MI) {
418 CurrentTop = nextIfDebug(++CurrentTop, priorII);
419 TopRPTracker.setPos(CurrentTop);
420 }
421 moveInstruction(MI, CurrentBottom);
422 CurrentBottom = MI;
423 }
424 // Update bottom scheduled pressure.
425 BotRPTracker.recede();
426 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
427 updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
428
429 // Release dependent instructions for scheduling.
430 releasePredecessors(SU);
431 }
432 SU->isScheduled = true;
433 SchedImpl->schedNode(SU, IsTopNode);
434 }
435 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
436
437 placeDebugValues();
438 }
439
440 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
placeDebugValues()441 void VLIWMachineScheduler::placeDebugValues() {
442 // If first instruction was a DBG_VALUE then put it back.
443 if (FirstDbgValue) {
444 BB->splice(RegionBegin, BB, FirstDbgValue);
445 RegionBegin = FirstDbgValue;
446 }
447
448 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
449 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
450 std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
451 MachineInstr *DbgValue = P.first;
452 MachineBasicBlock::iterator OrigPrevMI = P.second;
453 BB->splice(++OrigPrevMI, BB, DbgValue);
454 if (OrigPrevMI == llvm::prior(RegionEnd))
455 RegionEnd = DbgValue;
456 }
457 DbgValues.clear();
458 FirstDbgValue = NULL;
459 }
460
initialize(VLIWMachineScheduler * dag)461 void ConvergingVLIWScheduler::initialize(VLIWMachineScheduler *dag) {
462 DAG = dag;
463 TRI = DAG->TRI;
464 Top.DAG = dag;
465 Bot.DAG = dag;
466
467 // Initialize the HazardRecognizers.
468 const TargetMachine &TM = DAG->MF.getTarget();
469 const InstrItineraryData *Itin = TM.getInstrItineraryData();
470 Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
471 Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
472
473 Top.ResourceModel = new VLIWResourceModel(TM);
474 Bot.ResourceModel = new VLIWResourceModel(TM);
475
476 assert((!ForceTopDown || !ForceBottomUp) &&
477 "-misched-topdown incompatible with -misched-bottomup");
478 }
479
releaseTopNode(SUnit * SU)480 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
481 if (SU->isScheduled)
482 return;
483
484 for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
485 I != E; ++I) {
486 unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
487 unsigned MinLatency = I->getMinLatency();
488 #ifndef NDEBUG
489 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
490 #endif
491 if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
492 SU->TopReadyCycle = PredReadyCycle + MinLatency;
493 }
494 Top.releaseNode(SU, SU->TopReadyCycle);
495 }
496
releaseBottomNode(SUnit * SU)497 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
498 if (SU->isScheduled)
499 return;
500
501 assert(SU->getInstr() && "Scheduled SUnit must have instr");
502
503 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
504 I != E; ++I) {
505 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
506 unsigned MinLatency = I->getMinLatency();
507 #ifndef NDEBUG
508 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
509 #endif
510 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
511 SU->BotReadyCycle = SuccReadyCycle + MinLatency;
512 }
513 Bot.releaseNode(SU, SU->BotReadyCycle);
514 }
515
516 /// Does this SU have a hazard within the current instruction group.
517 ///
518 /// The scheduler supports two modes of hazard recognition. The first is the
519 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
520 /// supports highly complicated in-order reservation tables
521 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
522 ///
523 /// The second is a streamlined mechanism that checks for hazards based on
524 /// simple counters that the scheduler itself maintains. It explicitly checks
525 /// for instruction dispatch limitations, including the number of micro-ops that
526 /// can dispatch per cycle.
527 ///
528 /// TODO: Also check whether the SU must start a new group.
checkHazard(SUnit * SU)529 bool ConvergingVLIWScheduler::SchedBoundary::checkHazard(SUnit *SU) {
530 if (HazardRec->isEnabled())
531 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
532
533 if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth())
534 return true;
535
536 return false;
537 }
538
releaseNode(SUnit * SU,unsigned ReadyCycle)539 void ConvergingVLIWScheduler::SchedBoundary::releaseNode(SUnit *SU,
540 unsigned ReadyCycle) {
541 if (ReadyCycle < MinReadyCycle)
542 MinReadyCycle = ReadyCycle;
543
544 // Check for interlocks first. For the purpose of other heuristics, an
545 // instruction that cannot issue appears as if it's not in the ReadyQueue.
546 if (ReadyCycle > CurrCycle || checkHazard(SU))
547
548 Pending.push(SU);
549 else
550 Available.push(SU);
551 }
552
553 /// Move the boundary of scheduled code by one cycle.
bumpCycle()554 void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() {
555 unsigned Width = DAG->getIssueWidth();
556 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
557
558 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
559 unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
560
561 if (!HazardRec->isEnabled()) {
562 // Bypass HazardRec virtual calls.
563 CurrCycle = NextCycle;
564 } else {
565 // Bypass getHazardType calls in case of long latency.
566 for (; CurrCycle != NextCycle; ++CurrCycle) {
567 if (isTop())
568 HazardRec->AdvanceCycle();
569 else
570 HazardRec->RecedeCycle();
571 }
572 }
573 CheckPending = true;
574
575 DEBUG(dbgs() << "*** " << Available.getName() << " cycle "
576 << CurrCycle << '\n');
577 }
578
579 /// Move the boundary of scheduled code by one SUnit.
bumpNode(SUnit * SU)580 void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) {
581 bool startNewCycle = false;
582
583 // Update the reservation table.
584 if (HazardRec->isEnabled()) {
585 if (!isTop() && SU->isCall) {
586 // Calls are scheduled with their preceding instructions. For bottom-up
587 // scheduling, clear the pipeline state before emitting.
588 HazardRec->Reset();
589 }
590 HazardRec->EmitInstruction(SU);
591 }
592
593 // Update DFA model.
594 startNewCycle = ResourceModel->reserveResources(SU);
595
596 // Check the instruction group dispatch limit.
597 // TODO: Check if this SU must end a dispatch group.
598 IssueCount += DAG->getNumMicroOps(SU->getInstr());
599 if (startNewCycle) {
600 DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
601 bumpCycle();
602 }
603 else
604 DEBUG(dbgs() << "*** IssueCount " << IssueCount
605 << " at cycle " << CurrCycle << '\n');
606 }
607
608 /// Release pending ready nodes in to the available queue. This makes them
609 /// visible to heuristics.
releasePending()610 void ConvergingVLIWScheduler::SchedBoundary::releasePending() {
611 // If the available queue is empty, it is safe to reset MinReadyCycle.
612 if (Available.empty())
613 MinReadyCycle = UINT_MAX;
614
615 // Check to see if any of the pending instructions are ready to issue. If
616 // so, add them to the available queue.
617 for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
618 SUnit *SU = *(Pending.begin()+i);
619 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
620
621 if (ReadyCycle < MinReadyCycle)
622 MinReadyCycle = ReadyCycle;
623
624 if (ReadyCycle > CurrCycle)
625 continue;
626
627 if (checkHazard(SU))
628 continue;
629
630 Available.push(SU);
631 Pending.remove(Pending.begin()+i);
632 --i; --e;
633 }
634 CheckPending = false;
635 }
636
637 /// Remove SU from the ready set for this boundary.
removeReady(SUnit * SU)638 void ConvergingVLIWScheduler::SchedBoundary::removeReady(SUnit *SU) {
639 if (Available.isInQueue(SU))
640 Available.remove(Available.find(SU));
641 else {
642 assert(Pending.isInQueue(SU) && "bad ready count");
643 Pending.remove(Pending.find(SU));
644 }
645 }
646
647 /// If this queue only has one ready candidate, return it. As a side effect,
648 /// advance the cycle until at least one node is ready. If multiple instructions
649 /// are ready, return NULL.
pickOnlyChoice()650 SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() {
651 if (CheckPending)
652 releasePending();
653
654 for (unsigned i = 0; Available.empty(); ++i) {
655 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
656 "permanent hazard"); (void)i;
657 bumpCycle();
658 releasePending();
659 }
660 if (Available.size() == 1)
661 return *Available.begin();
662 return NULL;
663 }
664
665 #ifndef NDEBUG
traceCandidate(const char * Label,const ReadyQueue & Q,SUnit * SU,PressureElement P)666 void ConvergingVLIWScheduler::traceCandidate(const char *Label,
667 const ReadyQueue &Q,
668 SUnit *SU, PressureElement P) {
669 dbgs() << Label << " " << Q.getName() << " ";
670 if (P.isValid())
671 dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
672 << " ";
673 else
674 dbgs() << " ";
675 SU->dump(DAG);
676 }
677 #endif
678
679 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
680 /// of SU, return it, otherwise return null.
getSingleUnscheduledPred(SUnit * SU)681 static SUnit *getSingleUnscheduledPred(SUnit *SU) {
682 SUnit *OnlyAvailablePred = 0;
683 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
684 I != E; ++I) {
685 SUnit &Pred = *I->getSUnit();
686 if (!Pred.isScheduled) {
687 // We found an available, but not scheduled, predecessor. If it's the
688 // only one we have found, keep track of it... otherwise give up.
689 if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
690 return 0;
691 OnlyAvailablePred = &Pred;
692 }
693 }
694 return OnlyAvailablePred;
695 }
696
697 /// getSingleUnscheduledSucc - If there is exactly one unscheduled successor
698 /// of SU, return it, otherwise return null.
getSingleUnscheduledSucc(SUnit * SU)699 static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
700 SUnit *OnlyAvailableSucc = 0;
701 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
702 I != E; ++I) {
703 SUnit &Succ = *I->getSUnit();
704 if (!Succ.isScheduled) {
705 // We found an available, but not scheduled, successor. If it's the
706 // only one we have found, keep track of it... otherwise give up.
707 if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ)
708 return 0;
709 OnlyAvailableSucc = &Succ;
710 }
711 }
712 return OnlyAvailableSucc;
713 }
714
715 // Constants used to denote relative importance of
716 // heuristic components for cost computation.
717 static const unsigned PriorityOne = 200;
718 static const unsigned PriorityTwo = 100;
719 static const unsigned PriorityThree = 50;
720 static const unsigned PriorityFour = 20;
721 static const unsigned ScaleTwo = 10;
722 static const unsigned FactorOne = 2;
723
724 /// Single point to compute overall scheduling cost.
725 /// TODO: More heuristics will be used soon.
SchedulingCost(ReadyQueue & Q,SUnit * SU,SchedCandidate & Candidate,RegPressureDelta & Delta,bool verbose)726 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
727 SchedCandidate &Candidate,
728 RegPressureDelta &Delta,
729 bool verbose) {
730 // Initial trivial priority.
731 int ResCount = 1;
732
733 // Do not waste time on a node that is already scheduled.
734 if (!SU || SU->isScheduled)
735 return ResCount;
736
737 // Forced priority is high.
738 if (SU->isScheduleHigh)
739 ResCount += PriorityOne;
740
741 // Critical path first.
742 if (Q.getID() == TopQID) {
743 ResCount += (SU->getHeight() * ScaleTwo);
744
745 // If resources are available for it, multiply the
746 // chance of scheduling.
747 if (Top.ResourceModel->isResourceAvailable(SU))
748 ResCount <<= FactorOne;
749 } else {
750 ResCount += (SU->getDepth() * ScaleTwo);
751
752 // If resources are available for it, multiply the
753 // chance of scheduling.
754 if (Bot.ResourceModel->isResourceAvailable(SU))
755 ResCount <<= FactorOne;
756 }
757
758 unsigned NumNodesBlocking = 0;
759 if (Q.getID() == TopQID) {
760 // How many SUs does it block from scheduling?
761 // Look at all of the successors of this node.
762 // Count the number of nodes that
763 // this node is the sole unscheduled node for.
764 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
765 I != E; ++I)
766 if (getSingleUnscheduledPred(I->getSUnit()) == SU)
767 ++NumNodesBlocking;
768 } else {
769 // How many unscheduled predecessors block this node?
770 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
771 I != E; ++I)
772 if (getSingleUnscheduledSucc(I->getSUnit()) == SU)
773 ++NumNodesBlocking;
774 }
775 ResCount += (NumNodesBlocking * ScaleTwo);
776
777 // Factor in reg pressure as a heuristic.
778 ResCount -= (Delta.Excess.UnitIncrease*PriorityThree);
779 ResCount -= (Delta.CriticalMax.UnitIncrease*PriorityThree);
780
781 DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")");
782
783 return ResCount;
784 }
785
786 /// Pick the best candidate from the top queue.
787 ///
788 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
789 /// DAG building. To adjust for the current scheduling location we need to
790 /// maintain the number of vreg uses remaining to be top-scheduled.
791 ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
pickNodeFromQueue(ReadyQueue & Q,const RegPressureTracker & RPTracker,SchedCandidate & Candidate)792 pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
793 SchedCandidate &Candidate) {
794 DEBUG(Q.dump());
795
796 // getMaxPressureDelta temporarily modifies the tracker.
797 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
798
799 // BestSU remains NULL if no top candidates beat the best existing candidate.
800 CandResult FoundCandidate = NoCand;
801 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
802 RegPressureDelta RPDelta;
803 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
804 DAG->getRegionCriticalPSets(),
805 DAG->getRegPressure().MaxSetPressure);
806
807 int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
808
809 // Initialize the candidate if needed.
810 if (!Candidate.SU) {
811 Candidate.SU = *I;
812 Candidate.RPDelta = RPDelta;
813 Candidate.SCost = CurrentCost;
814 FoundCandidate = NodeOrder;
815 continue;
816 }
817
818 // Best cost.
819 if (CurrentCost > Candidate.SCost) {
820 DEBUG(traceCandidate("CCAND", Q, *I));
821 Candidate.SU = *I;
822 Candidate.RPDelta = RPDelta;
823 Candidate.SCost = CurrentCost;
824 FoundCandidate = BestCost;
825 continue;
826 }
827
828 // Fall through to original instruction order.
829 // Only consider node order if Candidate was chosen from this Q.
830 if (FoundCandidate == NoCand)
831 continue;
832 }
833 return FoundCandidate;
834 }
835
836 /// Pick the best candidate node from either the top or bottom queue.
pickNodeBidrectional(bool & IsTopNode)837 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
838 // Schedule as far as possible in the direction of no choice. This is most
839 // efficient, but also provides the best heuristics for CriticalPSets.
840 if (SUnit *SU = Bot.pickOnlyChoice()) {
841 IsTopNode = false;
842 return SU;
843 }
844 if (SUnit *SU = Top.pickOnlyChoice()) {
845 IsTopNode = true;
846 return SU;
847 }
848 SchedCandidate BotCand;
849 // Prefer bottom scheduling when heuristics are silent.
850 CandResult BotResult = pickNodeFromQueue(Bot.Available,
851 DAG->getBotRPTracker(), BotCand);
852 assert(BotResult != NoCand && "failed to find the first candidate");
853
854 // If either Q has a single candidate that provides the least increase in
855 // Excess pressure, we can immediately schedule from that Q.
856 //
857 // RegionCriticalPSets summarizes the pressure within the scheduled region and
858 // affects picking from either Q. If scheduling in one direction must
859 // increase pressure for one of the excess PSets, then schedule in that
860 // direction first to provide more freedom in the other direction.
861 if (BotResult == SingleExcess || BotResult == SingleCritical) {
862 IsTopNode = false;
863 return BotCand.SU;
864 }
865 // Check if the top Q has a better candidate.
866 SchedCandidate TopCand;
867 CandResult TopResult = pickNodeFromQueue(Top.Available,
868 DAG->getTopRPTracker(), TopCand);
869 assert(TopResult != NoCand && "failed to find the first candidate");
870
871 if (TopResult == SingleExcess || TopResult == SingleCritical) {
872 IsTopNode = true;
873 return TopCand.SU;
874 }
875 // If either Q has a single candidate that minimizes pressure above the
876 // original region's pressure pick it.
877 if (BotResult == SingleMax) {
878 IsTopNode = false;
879 return BotCand.SU;
880 }
881 if (TopResult == SingleMax) {
882 IsTopNode = true;
883 return TopCand.SU;
884 }
885 if (TopCand.SCost > BotCand.SCost) {
886 IsTopNode = true;
887 return TopCand.SU;
888 }
889 // Otherwise prefer the bottom candidate in node order.
890 IsTopNode = false;
891 return BotCand.SU;
892 }
893
894 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
pickNode(bool & IsTopNode)895 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
896 if (DAG->top() == DAG->bottom()) {
897 assert(Top.Available.empty() && Top.Pending.empty() &&
898 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
899 return NULL;
900 }
901 SUnit *SU;
902 if (ForceTopDown) {
903 SU = Top.pickOnlyChoice();
904 if (!SU) {
905 SchedCandidate TopCand;
906 CandResult TopResult =
907 pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
908 assert(TopResult != NoCand && "failed to find the first candidate");
909 (void)TopResult;
910 SU = TopCand.SU;
911 }
912 IsTopNode = true;
913 } else if (ForceBottomUp) {
914 SU = Bot.pickOnlyChoice();
915 if (!SU) {
916 SchedCandidate BotCand;
917 CandResult BotResult =
918 pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
919 assert(BotResult != NoCand && "failed to find the first candidate");
920 (void)BotResult;
921 SU = BotCand.SU;
922 }
923 IsTopNode = false;
924 } else {
925 SU = pickNodeBidrectional(IsTopNode);
926 }
927 if (SU->isTopReady())
928 Top.removeReady(SU);
929 if (SU->isBottomReady())
930 Bot.removeReady(SU);
931
932 DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
933 << " Scheduling Instruction in cycle "
934 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
935 SU->dump(DAG));
936 return SU;
937 }
938
939 /// Update the scheduler's state after scheduling a node. This is the same node
940 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs
941 /// to update it's state based on the current cycle before MachineSchedStrategy
942 /// does.
schedNode(SUnit * SU,bool IsTopNode)943 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
944 if (IsTopNode) {
945 SU->TopReadyCycle = Top.CurrCycle;
946 Top.bumpNode(SU);
947 } else {
948 SU->BotReadyCycle = Bot.CurrCycle;
949 Bot.bumpNode(SU);
950 }
951 }
952
953