• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // MachineScheduler schedules machine instructions after phi elimination. It
11 // preserves LiveIntervals so it can be invoked before register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #define DEBUG_TYPE "misched"
16 
17 #include "HexagonMachineScheduler.h"
18 
19 #include <queue>
20 
21 using namespace llvm;
22 
23 static cl::opt<bool> ForceTopDown("vliw-misched-topdown", cl::Hidden,
24                                   cl::desc("Force top-down list scheduling"));
25 static cl::opt<bool> ForceBottomUp("vliw-misched-bottomup", cl::Hidden,
26                                    cl::desc("Force bottom-up list scheduling"));
27 
28 #ifndef NDEBUG
29 static cl::opt<bool> ViewMISchedDAGs("vliw-view-misched-dags", cl::Hidden,
30   cl::desc("Pop up a window to show MISched dags after they are processed"));
31 
32 static cl::opt<unsigned> MISchedCutoff("vliw-misched-cutoff", cl::Hidden,
33   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
34 #else
35 static bool ViewMISchedDAGs = false;
36 #endif // NDEBUG
37 
38 /// Decrement this iterator until reaching the top or a non-debug instr.
39 static MachineBasicBlock::iterator
priorNonDebug(MachineBasicBlock::iterator I,MachineBasicBlock::iterator Beg)40 priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
41   assert(I != Beg && "reached the top of the region, cannot decrement");
42   while (--I != Beg) {
43     if (!I->isDebugValue())
44       break;
45   }
46   return I;
47 }
48 
49 /// If this iterator is a debug value, increment until reaching the End or a
50 /// non-debug instruction.
51 static MachineBasicBlock::iterator
nextIfDebug(MachineBasicBlock::iterator I,MachineBasicBlock::iterator End)52 nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
53   for(; I != End; ++I) {
54     if (!I->isDebugValue())
55       break;
56   }
57   return I;
58 }
59 
60 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
61 /// NumPredsLeft reaches zero, release the successor node.
62 ///
63 /// FIXME: Adjust SuccSU height based on MinLatency.
releaseSucc(SUnit * SU,SDep * SuccEdge)64 void VLIWMachineScheduler::releaseSucc(SUnit *SU, SDep *SuccEdge) {
65   SUnit *SuccSU = SuccEdge->getSUnit();
66 
67 #ifndef NDEBUG
68   if (SuccSU->NumPredsLeft == 0) {
69     dbgs() << "*** Scheduling failed! ***\n";
70     SuccSU->dump(this);
71     dbgs() << " has been released too many times!\n";
72     llvm_unreachable(0);
73   }
74 #endif
75   --SuccSU->NumPredsLeft;
76   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
77     SchedImpl->releaseTopNode(SuccSU);
78 }
79 
80 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
releaseSuccessors(SUnit * SU)81 void VLIWMachineScheduler::releaseSuccessors(SUnit *SU) {
82   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
83        I != E; ++I) {
84     releaseSucc(SU, &*I);
85   }
86 }
87 
88 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
89 /// NumSuccsLeft reaches zero, release the predecessor node.
90 ///
91 /// FIXME: Adjust PredSU height based on MinLatency.
releasePred(SUnit * SU,SDep * PredEdge)92 void VLIWMachineScheduler::releasePred(SUnit *SU, SDep *PredEdge) {
93   SUnit *PredSU = PredEdge->getSUnit();
94 
95 #ifndef NDEBUG
96   if (PredSU->NumSuccsLeft == 0) {
97     dbgs() << "*** Scheduling failed! ***\n";
98     PredSU->dump(this);
99     dbgs() << " has been released too many times!\n";
100     llvm_unreachable(0);
101   }
102 #endif
103   --PredSU->NumSuccsLeft;
104   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
105     SchedImpl->releaseBottomNode(PredSU);
106 }
107 
108 /// releasePredecessors - Call releasePred on each of SU's predecessors.
releasePredecessors(SUnit * SU)109 void VLIWMachineScheduler::releasePredecessors(SUnit *SU) {
110   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
111        I != E; ++I) {
112     releasePred(SU, &*I);
113   }
114 }
115 
moveInstruction(MachineInstr * MI,MachineBasicBlock::iterator InsertPos)116 void VLIWMachineScheduler::moveInstruction(MachineInstr *MI,
117                                     MachineBasicBlock::iterator InsertPos) {
118   // Advance RegionBegin if the first instruction moves down.
119   if (&*RegionBegin == MI)
120     ++RegionBegin;
121 
122   // Update the instruction stream.
123   BB->splice(InsertPos, BB, MI);
124 
125   // Update LiveIntervals
126   LIS->handleMove(MI);
127 
128   // Recede RegionBegin if an instruction moves above the first.
129   if (RegionBegin == InsertPos)
130     RegionBegin = MI;
131 }
132 
checkSchedLimit()133 bool VLIWMachineScheduler::checkSchedLimit() {
134 #ifndef NDEBUG
135   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
136     CurrentTop = CurrentBottom;
137     return false;
138   }
139   ++NumInstrsScheduled;
140 #endif
141   return true;
142 }
143 
144 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
145 /// crossing a scheduling boundary. [begin, end) includes all instructions in
146 /// the region, including the boundary itself and single-instruction regions
147 /// that don't get scheduled.
enterRegion(MachineBasicBlock * bb,MachineBasicBlock::iterator begin,MachineBasicBlock::iterator end,unsigned endcount)148 void VLIWMachineScheduler::enterRegion(MachineBasicBlock *bb,
149                                 MachineBasicBlock::iterator begin,
150                                 MachineBasicBlock::iterator end,
151                                 unsigned endcount)
152 {
153   ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
154 
155   // For convenience remember the end of the liveness region.
156   LiveRegionEnd =
157     (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
158 }
159 
160 // Setup the register pressure trackers for the top scheduled top and bottom
161 // scheduled regions.
initRegPressure()162 void VLIWMachineScheduler::initRegPressure() {
163   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
164   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
165 
166   // Close the RPTracker to finalize live ins.
167   RPTracker.closeRegion();
168 
169   DEBUG(RPTracker.getPressure().dump(TRI));
170 
171   // Initialize the live ins and live outs.
172   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
173   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
174 
175   // Close one end of the tracker so we can call
176   // getMaxUpward/DownwardPressureDelta before advancing across any
177   // instructions. This converts currently live regs into live ins/outs.
178   TopRPTracker.closeTop();
179   BotRPTracker.closeBottom();
180 
181   // Account for liveness generated by the region boundary.
182   if (LiveRegionEnd != RegionEnd)
183     BotRPTracker.recede();
184 
185   assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
186 
187   // Cache the list of excess pressure sets in this region. This will also track
188   // the max pressure in the scheduled code for these sets.
189   RegionCriticalPSets.clear();
190   std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
191   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
192     unsigned Limit = TRI->getRegPressureSetLimit(i);
193     DEBUG(dbgs() << TRI->getRegPressureSetName(i)
194           << "Limit " << Limit
195           << " Actual " << RegionPressure[i] << "\n");
196     if (RegionPressure[i] > Limit)
197       RegionCriticalPSets.push_back(PressureElement(i, 0));
198   }
199   DEBUG(dbgs() << "Excess PSets: ";
200         for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
201           dbgs() << TRI->getRegPressureSetName(
202             RegionCriticalPSets[i].PSetID) << " ";
203         dbgs() << "\n");
204 
205   TotalPackets = 0;
206 }
207 
208 // FIXME: When the pressure tracker deals in pressure differences then we won't
209 // iterate over all RegionCriticalPSets[i].
210 void VLIWMachineScheduler::
updateScheduledPressure(std::vector<unsigned> NewMaxPressure)211 updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
212   for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
213     unsigned ID = RegionCriticalPSets[i].PSetID;
214     int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
215     if ((int)NewMaxPressure[ID] > MaxUnits)
216       MaxUnits = NewMaxPressure[ID];
217   }
218 }
219 
220 /// Check if scheduling of this SU is possible
221 /// in the current packet.
222 /// It is _not_ precise (statefull), it is more like
223 /// another heuristic. Many corner cases are figured
224 /// empirically.
isResourceAvailable(SUnit * SU)225 bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
226   if (!SU || !SU->getInstr())
227     return false;
228 
229   // First see if the pipeline could receive this instruction
230   // in the current cycle.
231   switch (SU->getInstr()->getOpcode()) {
232   default:
233     if (!ResourcesModel->canReserveResources(SU->getInstr()))
234       return false;
235   case TargetOpcode::EXTRACT_SUBREG:
236   case TargetOpcode::INSERT_SUBREG:
237   case TargetOpcode::SUBREG_TO_REG:
238   case TargetOpcode::REG_SEQUENCE:
239   case TargetOpcode::IMPLICIT_DEF:
240   case TargetOpcode::COPY:
241   case TargetOpcode::INLINEASM:
242     break;
243   }
244 
245   // Now see if there are no other dependencies to instructions already
246   // in the packet.
247   for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
248     if (Packet[i]->Succs.size() == 0)
249       continue;
250     for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
251          E = Packet[i]->Succs.end(); I != E; ++I) {
252       // Since we do not add pseudos to packets, might as well
253       // ignore order dependencies.
254       if (I->isCtrl())
255         continue;
256 
257       if (I->getSUnit() == SU)
258         return false;
259     }
260   }
261   return true;
262 }
263 
264 /// Keep track of available resources.
reserveResources(SUnit * SU)265 bool VLIWResourceModel::reserveResources(SUnit *SU) {
266   bool startNewCycle = false;
267   // If this SU does not fit in the packet
268   // start a new one.
269   if (!isResourceAvailable(SU)) {
270     ResourcesModel->clearResources();
271     Packet.clear();
272     TotalPackets++;
273     startNewCycle = true;
274   }
275 
276   switch (SU->getInstr()->getOpcode()) {
277   default:
278     ResourcesModel->reserveResources(SU->getInstr());
279     break;
280   case TargetOpcode::EXTRACT_SUBREG:
281   case TargetOpcode::INSERT_SUBREG:
282   case TargetOpcode::SUBREG_TO_REG:
283   case TargetOpcode::REG_SEQUENCE:
284   case TargetOpcode::IMPLICIT_DEF:
285   case TargetOpcode::KILL:
286   case TargetOpcode::PROLOG_LABEL:
287   case TargetOpcode::EH_LABEL:
288   case TargetOpcode::COPY:
289   case TargetOpcode::INLINEASM:
290     break;
291   }
292   Packet.push_back(SU);
293 
294 #ifndef NDEBUG
295   DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
296   for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
297     DEBUG(dbgs() << "\t[" << i << "] SU(");
298     DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
299     DEBUG(Packet[i]->getInstr()->dump());
300   }
301 #endif
302 
303   // If packet is now full, reset the state so in the next cycle
304   // we start fresh.
305   if (Packet.size() >= InstrItins->SchedModel->IssueWidth) {
306     ResourcesModel->clearResources();
307     Packet.clear();
308     TotalPackets++;
309     startNewCycle = true;
310   }
311 
312   return startNewCycle;
313 }
314 
315 // Release all DAG roots for scheduling.
releaseRoots()316 void VLIWMachineScheduler::releaseRoots() {
317   SmallVector<SUnit*, 16> BotRoots;
318 
319   for (std::vector<SUnit>::iterator
320          I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
321     // A SUnit is ready to top schedule if it has no predecessors.
322     if (I->Preds.empty())
323       SchedImpl->releaseTopNode(&(*I));
324     // A SUnit is ready to bottom schedule if it has no successors.
325     if (I->Succs.empty())
326       BotRoots.push_back(&(*I));
327   }
328   // Release bottom roots in reverse order so the higher priority nodes appear
329   // first. This is more natural and slightly more efficient.
330   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
331          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
332     SchedImpl->releaseBottomNode(*I);
333 }
334 
335 /// schedule - Called back from MachineScheduler::runOnMachineFunction
336 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
337 /// only includes instructions that have DAG nodes, not scheduling boundaries.
schedule()338 void VLIWMachineScheduler::schedule() {
339   DEBUG(dbgs()
340         << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber()
341         << " " << BB->getName()
342         << " in_func " << BB->getParent()->getFunction()->getName()
343         << " at loop depth "  << MLI->getLoopDepth(BB)
344         << " \n");
345 
346   // Initialize the register pressure tracker used by buildSchedGraph.
347   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
348 
349   // Account for liveness generate by the region boundary.
350   if (LiveRegionEnd != RegionEnd)
351     RPTracker.recede();
352 
353   // Build the DAG, and compute current register pressure.
354   buildSchedGraph(AA, &RPTracker);
355 
356   // Initialize top/bottom trackers after computing region pressure.
357   initRegPressure();
358 
359   // To view Height/Depth correctly, they should be accessed at least once.
360   DEBUG(unsigned maxH = 0;
361         for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
362           if (SUnits[su].getHeight() > maxH)
363             maxH = SUnits[su].getHeight();
364         dbgs() << "Max Height " << maxH << "\n";);
365   DEBUG(unsigned maxD = 0;
366         for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
367           if (SUnits[su].getDepth() > maxD)
368             maxD = SUnits[su].getDepth();
369         dbgs() << "Max Depth " << maxD << "\n";);
370   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
371           SUnits[su].dumpAll(this));
372 
373   if (ViewMISchedDAGs) viewGraph();
374 
375   SchedImpl->initialize(this);
376 
377   // Release edges from the special Entry node or to the special Exit node.
378   releaseSuccessors(&EntrySU);
379   releasePredecessors(&ExitSU);
380 
381   // Release all DAG roots for scheduling.
382   releaseRoots();
383 
384   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
385   CurrentBottom = RegionEnd;
386   bool IsTopNode = false;
387   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
388     if (!checkSchedLimit())
389       break;
390 
391     // Move the instruction to its new location in the instruction stream.
392     MachineInstr *MI = SU->getInstr();
393 
394     if (IsTopNode) {
395       assert(SU->isTopReady() && "node still has unscheduled dependencies");
396       if (&*CurrentTop == MI)
397         CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
398       else {
399         moveInstruction(MI, CurrentTop);
400         TopRPTracker.setPos(MI);
401       }
402 
403       // Update top scheduled pressure.
404       TopRPTracker.advance();
405       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
406       updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
407 
408       // Release dependent instructions for scheduling.
409       releaseSuccessors(SU);
410     } else {
411       assert(SU->isBottomReady() && "node still has unscheduled dependencies");
412       MachineBasicBlock::iterator priorII =
413         priorNonDebug(CurrentBottom, CurrentTop);
414       if (&*priorII == MI)
415         CurrentBottom = priorII;
416       else {
417         if (&*CurrentTop == MI) {
418           CurrentTop = nextIfDebug(++CurrentTop, priorII);
419           TopRPTracker.setPos(CurrentTop);
420         }
421         moveInstruction(MI, CurrentBottom);
422         CurrentBottom = MI;
423       }
424       // Update bottom scheduled pressure.
425       BotRPTracker.recede();
426       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
427       updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
428 
429       // Release dependent instructions for scheduling.
430       releasePredecessors(SU);
431     }
432     SU->isScheduled = true;
433     SchedImpl->schedNode(SU, IsTopNode);
434   }
435   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
436 
437   placeDebugValues();
438 }
439 
440 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
placeDebugValues()441 void VLIWMachineScheduler::placeDebugValues() {
442   // If first instruction was a DBG_VALUE then put it back.
443   if (FirstDbgValue) {
444     BB->splice(RegionBegin, BB, FirstDbgValue);
445     RegionBegin = FirstDbgValue;
446   }
447 
448   for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
449          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
450     std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
451     MachineInstr *DbgValue = P.first;
452     MachineBasicBlock::iterator OrigPrevMI = P.second;
453     BB->splice(++OrigPrevMI, BB, DbgValue);
454     if (OrigPrevMI == llvm::prior(RegionEnd))
455       RegionEnd = DbgValue;
456   }
457   DbgValues.clear();
458   FirstDbgValue = NULL;
459 }
460 
initialize(VLIWMachineScheduler * dag)461 void ConvergingVLIWScheduler::initialize(VLIWMachineScheduler *dag) {
462   DAG = dag;
463   TRI = DAG->TRI;
464   Top.DAG = dag;
465   Bot.DAG = dag;
466 
467   // Initialize the HazardRecognizers.
468   const TargetMachine &TM = DAG->MF.getTarget();
469   const InstrItineraryData *Itin = TM.getInstrItineraryData();
470   Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
471   Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
472 
473   Top.ResourceModel = new VLIWResourceModel(TM);
474   Bot.ResourceModel = new VLIWResourceModel(TM);
475 
476   assert((!ForceTopDown || !ForceBottomUp) &&
477          "-misched-topdown incompatible with -misched-bottomup");
478 }
479 
releaseTopNode(SUnit * SU)480 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
481   if (SU->isScheduled)
482     return;
483 
484   for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
485        I != E; ++I) {
486     unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
487     unsigned MinLatency = I->getMinLatency();
488 #ifndef NDEBUG
489     Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
490 #endif
491     if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
492       SU->TopReadyCycle = PredReadyCycle + MinLatency;
493   }
494   Top.releaseNode(SU, SU->TopReadyCycle);
495 }
496 
releaseBottomNode(SUnit * SU)497 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
498   if (SU->isScheduled)
499     return;
500 
501   assert(SU->getInstr() && "Scheduled SUnit must have instr");
502 
503   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
504        I != E; ++I) {
505     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
506     unsigned MinLatency = I->getMinLatency();
507 #ifndef NDEBUG
508     Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
509 #endif
510     if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
511       SU->BotReadyCycle = SuccReadyCycle + MinLatency;
512   }
513   Bot.releaseNode(SU, SU->BotReadyCycle);
514 }
515 
516 /// Does this SU have a hazard within the current instruction group.
517 ///
518 /// The scheduler supports two modes of hazard recognition. The first is the
519 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
520 /// supports highly complicated in-order reservation tables
521 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
522 ///
523 /// The second is a streamlined mechanism that checks for hazards based on
524 /// simple counters that the scheduler itself maintains. It explicitly checks
525 /// for instruction dispatch limitations, including the number of micro-ops that
526 /// can dispatch per cycle.
527 ///
528 /// TODO: Also check whether the SU must start a new group.
checkHazard(SUnit * SU)529 bool ConvergingVLIWScheduler::SchedBoundary::checkHazard(SUnit *SU) {
530   if (HazardRec->isEnabled())
531     return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
532 
533   if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth())
534     return true;
535 
536   return false;
537 }
538 
releaseNode(SUnit * SU,unsigned ReadyCycle)539 void ConvergingVLIWScheduler::SchedBoundary::releaseNode(SUnit *SU,
540                                                      unsigned ReadyCycle) {
541   if (ReadyCycle < MinReadyCycle)
542     MinReadyCycle = ReadyCycle;
543 
544   // Check for interlocks first. For the purpose of other heuristics, an
545   // instruction that cannot issue appears as if it's not in the ReadyQueue.
546   if (ReadyCycle > CurrCycle || checkHazard(SU))
547 
548     Pending.push(SU);
549   else
550     Available.push(SU);
551 }
552 
553 /// Move the boundary of scheduled code by one cycle.
bumpCycle()554 void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() {
555   unsigned Width = DAG->getIssueWidth();
556   IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
557 
558   assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
559   unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
560 
561   if (!HazardRec->isEnabled()) {
562     // Bypass HazardRec virtual calls.
563     CurrCycle = NextCycle;
564   } else {
565     // Bypass getHazardType calls in case of long latency.
566     for (; CurrCycle != NextCycle; ++CurrCycle) {
567       if (isTop())
568         HazardRec->AdvanceCycle();
569       else
570         HazardRec->RecedeCycle();
571     }
572   }
573   CheckPending = true;
574 
575   DEBUG(dbgs() << "*** " << Available.getName() << " cycle "
576         << CurrCycle << '\n');
577 }
578 
579 /// Move the boundary of scheduled code by one SUnit.
bumpNode(SUnit * SU)580 void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) {
581   bool startNewCycle = false;
582 
583   // Update the reservation table.
584   if (HazardRec->isEnabled()) {
585     if (!isTop() && SU->isCall) {
586       // Calls are scheduled with their preceding instructions. For bottom-up
587       // scheduling, clear the pipeline state before emitting.
588       HazardRec->Reset();
589     }
590     HazardRec->EmitInstruction(SU);
591   }
592 
593   // Update DFA model.
594   startNewCycle = ResourceModel->reserveResources(SU);
595 
596   // Check the instruction group dispatch limit.
597   // TODO: Check if this SU must end a dispatch group.
598   IssueCount += DAG->getNumMicroOps(SU->getInstr());
599   if (startNewCycle) {
600     DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
601     bumpCycle();
602   }
603   else
604     DEBUG(dbgs() << "*** IssueCount " << IssueCount
605           << " at cycle " << CurrCycle << '\n');
606 }
607 
608 /// Release pending ready nodes in to the available queue. This makes them
609 /// visible to heuristics.
releasePending()610 void ConvergingVLIWScheduler::SchedBoundary::releasePending() {
611   // If the available queue is empty, it is safe to reset MinReadyCycle.
612   if (Available.empty())
613     MinReadyCycle = UINT_MAX;
614 
615   // Check to see if any of the pending instructions are ready to issue.  If
616   // so, add them to the available queue.
617   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
618     SUnit *SU = *(Pending.begin()+i);
619     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
620 
621     if (ReadyCycle < MinReadyCycle)
622       MinReadyCycle = ReadyCycle;
623 
624     if (ReadyCycle > CurrCycle)
625       continue;
626 
627     if (checkHazard(SU))
628       continue;
629 
630     Available.push(SU);
631     Pending.remove(Pending.begin()+i);
632     --i; --e;
633   }
634   CheckPending = false;
635 }
636 
637 /// Remove SU from the ready set for this boundary.
removeReady(SUnit * SU)638 void ConvergingVLIWScheduler::SchedBoundary::removeReady(SUnit *SU) {
639   if (Available.isInQueue(SU))
640     Available.remove(Available.find(SU));
641   else {
642     assert(Pending.isInQueue(SU) && "bad ready count");
643     Pending.remove(Pending.find(SU));
644   }
645 }
646 
647 /// If this queue only has one ready candidate, return it. As a side effect,
648 /// advance the cycle until at least one node is ready. If multiple instructions
649 /// are ready, return NULL.
pickOnlyChoice()650 SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() {
651   if (CheckPending)
652     releasePending();
653 
654   for (unsigned i = 0; Available.empty(); ++i) {
655     assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
656            "permanent hazard"); (void)i;
657     bumpCycle();
658     releasePending();
659   }
660   if (Available.size() == 1)
661     return *Available.begin();
662   return NULL;
663 }
664 
665 #ifndef NDEBUG
traceCandidate(const char * Label,const ReadyQueue & Q,SUnit * SU,PressureElement P)666 void ConvergingVLIWScheduler::traceCandidate(const char *Label,
667                                              const ReadyQueue &Q,
668                                              SUnit *SU, PressureElement P) {
669   dbgs() << Label << " " << Q.getName() << " ";
670   if (P.isValid())
671     dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
672            << " ";
673   else
674     dbgs() << "     ";
675   SU->dump(DAG);
676 }
677 #endif
678 
679 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
680 /// of SU, return it, otherwise return null.
getSingleUnscheduledPred(SUnit * SU)681 static SUnit *getSingleUnscheduledPred(SUnit *SU) {
682   SUnit *OnlyAvailablePred = 0;
683   for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
684        I != E; ++I) {
685     SUnit &Pred = *I->getSUnit();
686     if (!Pred.isScheduled) {
687       // We found an available, but not scheduled, predecessor.  If it's the
688       // only one we have found, keep track of it... otherwise give up.
689       if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
690         return 0;
691       OnlyAvailablePred = &Pred;
692     }
693   }
694   return OnlyAvailablePred;
695 }
696 
697 /// getSingleUnscheduledSucc - If there is exactly one unscheduled successor
698 /// of SU, return it, otherwise return null.
getSingleUnscheduledSucc(SUnit * SU)699 static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
700   SUnit *OnlyAvailableSucc = 0;
701   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
702        I != E; ++I) {
703     SUnit &Succ = *I->getSUnit();
704     if (!Succ.isScheduled) {
705       // We found an available, but not scheduled, successor.  If it's the
706       // only one we have found, keep track of it... otherwise give up.
707       if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ)
708         return 0;
709       OnlyAvailableSucc = &Succ;
710     }
711   }
712   return OnlyAvailableSucc;
713 }
714 
715 // Constants used to denote relative importance of
716 // heuristic components for cost computation.
717 static const unsigned PriorityOne = 200;
718 static const unsigned PriorityTwo = 100;
719 static const unsigned PriorityThree = 50;
720 static const unsigned PriorityFour = 20;
721 static const unsigned ScaleTwo = 10;
722 static const unsigned FactorOne = 2;
723 
724 /// Single point to compute overall scheduling cost.
725 /// TODO: More heuristics will be used soon.
SchedulingCost(ReadyQueue & Q,SUnit * SU,SchedCandidate & Candidate,RegPressureDelta & Delta,bool verbose)726 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
727                                             SchedCandidate &Candidate,
728                                             RegPressureDelta &Delta,
729                                             bool verbose) {
730   // Initial trivial priority.
731   int ResCount = 1;
732 
733   // Do not waste time on a node that is already scheduled.
734   if (!SU || SU->isScheduled)
735     return ResCount;
736 
737   // Forced priority is high.
738   if (SU->isScheduleHigh)
739     ResCount += PriorityOne;
740 
741   // Critical path first.
742   if (Q.getID() == TopQID) {
743     ResCount += (SU->getHeight() * ScaleTwo);
744 
745     // If resources are available for it, multiply the
746     // chance of scheduling.
747     if (Top.ResourceModel->isResourceAvailable(SU))
748       ResCount <<= FactorOne;
749   } else {
750     ResCount += (SU->getDepth() * ScaleTwo);
751 
752     // If resources are available for it, multiply the
753     // chance of scheduling.
754     if (Bot.ResourceModel->isResourceAvailable(SU))
755       ResCount <<= FactorOne;
756   }
757 
758   unsigned NumNodesBlocking = 0;
759   if (Q.getID() == TopQID) {
760     // How many SUs does it block from scheduling?
761     // Look at all of the successors of this node.
762     // Count the number of nodes that
763     // this node is the sole unscheduled node for.
764     for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
765          I != E; ++I)
766       if (getSingleUnscheduledPred(I->getSUnit()) == SU)
767         ++NumNodesBlocking;
768   } else {
769     // How many unscheduled predecessors block this node?
770     for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
771          I != E; ++I)
772       if (getSingleUnscheduledSucc(I->getSUnit()) == SU)
773         ++NumNodesBlocking;
774   }
775   ResCount += (NumNodesBlocking * ScaleTwo);
776 
777   // Factor in reg pressure as a heuristic.
778   ResCount -= (Delta.Excess.UnitIncrease*PriorityThree);
779   ResCount -= (Delta.CriticalMax.UnitIncrease*PriorityThree);
780 
781   DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")");
782 
783   return ResCount;
784 }
785 
786 /// Pick the best candidate from the top queue.
787 ///
788 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
789 /// DAG building. To adjust for the current scheduling location we need to
790 /// maintain the number of vreg uses remaining to be top-scheduled.
791 ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
pickNodeFromQueue(ReadyQueue & Q,const RegPressureTracker & RPTracker,SchedCandidate & Candidate)792 pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
793                   SchedCandidate &Candidate) {
794   DEBUG(Q.dump());
795 
796   // getMaxPressureDelta temporarily modifies the tracker.
797   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
798 
799   // BestSU remains NULL if no top candidates beat the best existing candidate.
800   CandResult FoundCandidate = NoCand;
801   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
802     RegPressureDelta RPDelta;
803     TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
804                                     DAG->getRegionCriticalPSets(),
805                                     DAG->getRegPressure().MaxSetPressure);
806 
807     int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
808 
809     // Initialize the candidate if needed.
810     if (!Candidate.SU) {
811       Candidate.SU = *I;
812       Candidate.RPDelta = RPDelta;
813       Candidate.SCost = CurrentCost;
814       FoundCandidate = NodeOrder;
815       continue;
816     }
817 
818     // Best cost.
819     if (CurrentCost > Candidate.SCost) {
820       DEBUG(traceCandidate("CCAND", Q, *I));
821       Candidate.SU = *I;
822       Candidate.RPDelta = RPDelta;
823       Candidate.SCost = CurrentCost;
824       FoundCandidate = BestCost;
825       continue;
826     }
827 
828     // Fall through to original instruction order.
829     // Only consider node order if Candidate was chosen from this Q.
830     if (FoundCandidate == NoCand)
831       continue;
832   }
833   return FoundCandidate;
834 }
835 
836 /// Pick the best candidate node from either the top or bottom queue.
pickNodeBidrectional(bool & IsTopNode)837 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
838   // Schedule as far as possible in the direction of no choice. This is most
839   // efficient, but also provides the best heuristics for CriticalPSets.
840   if (SUnit *SU = Bot.pickOnlyChoice()) {
841     IsTopNode = false;
842     return SU;
843   }
844   if (SUnit *SU = Top.pickOnlyChoice()) {
845     IsTopNode = true;
846     return SU;
847   }
848   SchedCandidate BotCand;
849   // Prefer bottom scheduling when heuristics are silent.
850   CandResult BotResult = pickNodeFromQueue(Bot.Available,
851                                            DAG->getBotRPTracker(), BotCand);
852   assert(BotResult != NoCand && "failed to find the first candidate");
853 
854   // If either Q has a single candidate that provides the least increase in
855   // Excess pressure, we can immediately schedule from that Q.
856   //
857   // RegionCriticalPSets summarizes the pressure within the scheduled region and
858   // affects picking from either Q. If scheduling in one direction must
859   // increase pressure for one of the excess PSets, then schedule in that
860   // direction first to provide more freedom in the other direction.
861   if (BotResult == SingleExcess || BotResult == SingleCritical) {
862     IsTopNode = false;
863     return BotCand.SU;
864   }
865   // Check if the top Q has a better candidate.
866   SchedCandidate TopCand;
867   CandResult TopResult = pickNodeFromQueue(Top.Available,
868                                            DAG->getTopRPTracker(), TopCand);
869   assert(TopResult != NoCand && "failed to find the first candidate");
870 
871   if (TopResult == SingleExcess || TopResult == SingleCritical) {
872     IsTopNode = true;
873     return TopCand.SU;
874   }
875   // If either Q has a single candidate that minimizes pressure above the
876   // original region's pressure pick it.
877   if (BotResult == SingleMax) {
878     IsTopNode = false;
879     return BotCand.SU;
880   }
881   if (TopResult == SingleMax) {
882     IsTopNode = true;
883     return TopCand.SU;
884   }
885   if (TopCand.SCost > BotCand.SCost) {
886     IsTopNode = true;
887     return TopCand.SU;
888   }
889   // Otherwise prefer the bottom candidate in node order.
890   IsTopNode = false;
891   return BotCand.SU;
892 }
893 
894 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
pickNode(bool & IsTopNode)895 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
896   if (DAG->top() == DAG->bottom()) {
897     assert(Top.Available.empty() && Top.Pending.empty() &&
898            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
899     return NULL;
900   }
901   SUnit *SU;
902   if (ForceTopDown) {
903     SU = Top.pickOnlyChoice();
904     if (!SU) {
905       SchedCandidate TopCand;
906       CandResult TopResult =
907         pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
908       assert(TopResult != NoCand && "failed to find the first candidate");
909       (void)TopResult;
910       SU = TopCand.SU;
911     }
912     IsTopNode = true;
913   } else if (ForceBottomUp) {
914     SU = Bot.pickOnlyChoice();
915     if (!SU) {
916       SchedCandidate BotCand;
917       CandResult BotResult =
918         pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
919       assert(BotResult != NoCand && "failed to find the first candidate");
920       (void)BotResult;
921       SU = BotCand.SU;
922     }
923     IsTopNode = false;
924   } else {
925     SU = pickNodeBidrectional(IsTopNode);
926   }
927   if (SU->isTopReady())
928     Top.removeReady(SU);
929   if (SU->isBottomReady())
930     Bot.removeReady(SU);
931 
932   DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
933         << " Scheduling Instruction in cycle "
934         << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
935         SU->dump(DAG));
936   return SU;
937 }
938 
939 /// Update the scheduler's state after scheduling a node. This is the same node
940 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs
941 /// to update it's state based on the current cycle before MachineSchedStrategy
942 /// does.
schedNode(SUnit * SU,bool IsTopNode)943 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
944   if (IsTopNode) {
945     SU->TopReadyCycle = Top.CurrCycle;
946     Top.bumpNode(SU);
947   } else {
948     SU->BotReadyCycle = Bot.CurrCycle;
949     Bot.bumpNode(SU);
950   }
951 }
952 
953