• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a hazard recognizer for the SystemZ scheduler.
11 //
12 // This class is used by the SystemZ scheduling strategy to maintain
13 // the state during scheduling, and provide cost functions for
14 // scheduling candidates. This includes:
15 //
16 // * Decoder grouping. A decoder group can maximally hold 3 uops, and
17 // instructions that always begin a new group should be scheduled when
18 // the current decoder group is empty.
19 // * Processor resources usage. It is beneficial to balance the use of
20 // resources.
21 //
22 // A goal is to consider all instructions, also those outside of any
23 // scheduling region. Such instructions are "advanced" past and include
24 // single instructions before a scheduling region, branches etc.
25 //
26 // A block that has only one predecessor continues scheduling with the state
27 // of it (which may be updated by emitting branches).
28 //
29 // ===---------------------------------------------------------------------===//
30 
31 #include "SystemZHazardRecognizer.h"
32 #include "llvm/ADT/Statistic.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "machine-scheduler"
37 
38 // This is the limit of processor resource usage at which the
39 // scheduler should try to look for other instructions (not using the
40 // critical resource).
41 static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
42                                    cl::desc("The OOO window for processor "
43                                             "resources during scheduling."),
44                                    cl::init(8));
45 
46 unsigned SystemZHazardRecognizer::
getNumDecoderSlots(SUnit * SU) const47 getNumDecoderSlots(SUnit *SU) const {
48   const MCSchedClassDesc *SC = getSchedClass(SU);
49   if (!SC->isValid())
50     return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
51 
52   if (SC->BeginGroup) {
53     if (!SC->EndGroup)
54       return 2; // Cracked instruction
55     else
56       return 3; // Expanded/group-alone instruction
57   }
58 
59   return 1; // Normal instruction
60 }
61 
getCurrCycleIdx(SUnit * SU) const62 unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
63   unsigned Idx = CurrGroupSize;
64   if (GrpCount % 2)
65     Idx += 3;
66 
67   if (SU != nullptr && !fitsIntoCurrentGroup(SU)) {
68     if (Idx == 1 || Idx == 2)
69       Idx = 3;
70     else if (Idx == 4 || Idx == 5)
71       Idx = 0;
72   }
73 
74   return Idx;
75 }
76 
77 ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
getHazardType(SUnit * m,int Stalls)78 getHazardType(SUnit *m, int Stalls) {
79   return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
80 }
81 
Reset()82 void SystemZHazardRecognizer::Reset() {
83   CurrGroupSize = 0;
84   CurrGroupHas4RegOps = false;
85   clearProcResCounters();
86   GrpCount = 0;
87   LastFPdOpCycleIdx = UINT_MAX;
88   LastEmittedMI = nullptr;
89   LLVM_DEBUG(CurGroupDbg = "";);
90 }
91 
92 bool
fitsIntoCurrentGroup(SUnit * SU) const93 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
94   const MCSchedClassDesc *SC = getSchedClass(SU);
95   if (!SC->isValid())
96     return true;
97 
98   // A cracked instruction only fits into schedule if the current
99   // group is empty.
100   if (SC->BeginGroup)
101     return (CurrGroupSize == 0);
102 
103   // An instruction with 4 register operands will not fit in last slot.
104   assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&
105           "Current decoder group is already full!");
106   if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
107     return false;
108 
109   // Since a full group is handled immediately in EmitInstruction(),
110   // SU should fit into current group. NumSlots should be 1 or 0,
111   // since it is not a cracked or expanded instruction.
112   assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
113           "Expected normal instruction to fit in non-full group!");
114 
115   return true;
116 }
117 
has4RegOps(const MachineInstr * MI) const118 bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
119   const MachineFunction &MF = *MI->getParent()->getParent();
120   const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
121   const MCInstrDesc &MID = MI->getDesc();
122   unsigned Count = 0;
123   for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
124     const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
125     if (RC == nullptr)
126       continue;
127     if (OpIdx >= MID.getNumDefs() &&
128         MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
129       continue;
130     Count++;
131   }
132   return Count >= 4;
133 }
134 
nextGroup()135 void SystemZHazardRecognizer::nextGroup() {
136   if (CurrGroupSize == 0)
137     return;
138 
139   LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
140   LLVM_DEBUG(CurGroupDbg = "";);
141 
142   GrpCount++;
143 
144   // Reset counter for next group.
145   CurrGroupSize = 0;
146   CurrGroupHas4RegOps = false;
147 
148   // Decrease counters for execution units by one.
149   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
150     if (ProcResourceCounters[i] > 0)
151       ProcResourceCounters[i]--;
152 
153   // Clear CriticalResourceIdx if it is now below the threshold.
154   if (CriticalResourceIdx != UINT_MAX &&
155       (ProcResourceCounters[CriticalResourceIdx] <=
156        ProcResCostLim))
157     CriticalResourceIdx = UINT_MAX;
158 
159   LLVM_DEBUG(dumpState(););
160 }
161 
162 #ifndef NDEBUG // Debug output
dumpSU(SUnit * SU,raw_ostream & OS) const163 void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
164   OS << "SU(" << SU->NodeNum << "):";
165   OS << TII->getName(SU->getInstr()->getOpcode());
166 
167   const MCSchedClassDesc *SC = getSchedClass(SU);
168   if (!SC->isValid())
169     return;
170 
171   for (TargetSchedModel::ProcResIter
172          PI = SchedModel->getWriteProcResBegin(SC),
173          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
174     const MCProcResourceDesc &PRD =
175       *SchedModel->getProcResource(PI->ProcResourceIdx);
176     std::string FU(PRD.Name);
177     // trim e.g. Z13_FXaUnit -> FXa
178     FU = FU.substr(FU.find("_") + 1);
179     size_t Pos = FU.find("Unit");
180     if (Pos != std::string::npos)
181       FU.resize(Pos);
182     if (FU == "LS") // LSUnit -> LSU
183       FU = "LSU";
184     OS << "/" << FU;
185 
186     if (PI->Cycles > 1)
187       OS << "(" << PI->Cycles << "cyc)";
188   }
189 
190   if (SC->NumMicroOps > 1)
191     OS << "/" << SC->NumMicroOps << "uops";
192   if (SC->BeginGroup && SC->EndGroup)
193     OS << "/GroupsAlone";
194   else if (SC->BeginGroup)
195     OS << "/BeginsGroup";
196   else if (SC->EndGroup)
197     OS << "/EndsGroup";
198   if (SU->isUnbuffered)
199     OS << "/Unbuffered";
200   if (has4RegOps(SU->getInstr()))
201     OS << "/4RegOps";
202 }
203 
dumpCurrGroup(std::string Msg) const204 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
205   dbgs() << "++ " << Msg;
206   dbgs() << ": ";
207 
208   if (CurGroupDbg.empty())
209     dbgs() << " <empty>\n";
210   else {
211     dbgs() << "{ " << CurGroupDbg << " }";
212     dbgs() << " (" << CurrGroupSize << " decoder slot"
213            << (CurrGroupSize > 1 ? "s":"")
214            << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
215            << ")\n";
216   }
217 }
218 
dumpProcResourceCounters() const219 void SystemZHazardRecognizer::dumpProcResourceCounters() const {
220   bool any = false;
221 
222   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
223     if (ProcResourceCounters[i] > 0) {
224       any = true;
225       break;
226     }
227 
228   if (!any)
229     return;
230 
231   dbgs() << "++ | Resource counters: ";
232   for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
233     if (ProcResourceCounters[i] > 0)
234       dbgs() << SchedModel->getProcResource(i)->Name
235              << ":" << ProcResourceCounters[i] << " ";
236   dbgs() << "\n";
237 
238   if (CriticalResourceIdx != UINT_MAX)
239     dbgs() << "++ | Critical resource: "
240            << SchedModel->getProcResource(CriticalResourceIdx)->Name
241            << "\n";
242 }
243 
dumpState() const244 void SystemZHazardRecognizer::dumpState() const {
245   dumpCurrGroup("| Current decoder group");
246   dbgs() << "++ | Current cycle index: "
247          << getCurrCycleIdx() << "\n";
248   dumpProcResourceCounters();
249   if (LastFPdOpCycleIdx != UINT_MAX)
250     dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n";
251 }
252 
253 #endif //NDEBUG
254 
clearProcResCounters()255 void SystemZHazardRecognizer::clearProcResCounters() {
256   ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
257   CriticalResourceIdx = UINT_MAX;
258 }
259 
isBranchRetTrap(MachineInstr * MI)260 static inline bool isBranchRetTrap(MachineInstr *MI) {
261   return (MI->isBranch() || MI->isReturn() ||
262           MI->getOpcode() == SystemZ::CondTrap);
263 }
264 
265 // Update state with SU as the next scheduled unit.
266 void SystemZHazardRecognizer::
EmitInstruction(SUnit * SU)267 EmitInstruction(SUnit *SU) {
268   const MCSchedClassDesc *SC = getSchedClass(SU);
269   LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
270              dbgs() << "\n";);
271   LLVM_DEBUG(dumpCurrGroup("Decode group before emission"););
272 
273   // If scheduling an SU that must begin a new decoder group, move on
274   // to next group.
275   if (!fitsIntoCurrentGroup(SU))
276     nextGroup();
277 
278   LLVM_DEBUG(raw_string_ostream cgd(CurGroupDbg);
279              if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd););
280 
281   LastEmittedMI = SU->getInstr();
282 
283   // After returning from a call, we don't know much about the state.
284   if (SU->isCall) {
285     LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";);
286     Reset();
287     LastEmittedMI = SU->getInstr();
288     return;
289   }
290 
291   // Increase counter for execution unit(s).
292   for (TargetSchedModel::ProcResIter
293          PI = SchedModel->getWriteProcResBegin(SC),
294          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
295     // Don't handle FPd together with the other resources.
296     if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
297       continue;
298     int &CurrCounter =
299       ProcResourceCounters[PI->ProcResourceIdx];
300     CurrCounter += PI->Cycles;
301     // Check if this is now the new critical resource.
302     if ((CurrCounter > ProcResCostLim) &&
303         (CriticalResourceIdx == UINT_MAX ||
304          (PI->ProcResourceIdx != CriticalResourceIdx &&
305           CurrCounter >
306           ProcResourceCounters[CriticalResourceIdx]))) {
307       LLVM_DEBUG(
308           dbgs() << "++ New critical resource: "
309                  << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
310                  << "\n";);
311       CriticalResourceIdx = PI->ProcResourceIdx;
312     }
313   }
314 
315   // Make note of an instruction that uses a blocking resource (FPd).
316   if (SU->isUnbuffered) {
317     LastFPdOpCycleIdx = getCurrCycleIdx(SU);
318     LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx
319                       << "\n";);
320   }
321 
322   // Insert SU into current group by increasing number of slots used
323   // in current group.
324   CurrGroupSize += getNumDecoderSlots(SU);
325   CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
326   unsigned GroupLim =
327     ((CurrGroupHas4RegOps && getNumDecoderSlots(SU) < 3) ? 2 : 3);
328   assert (CurrGroupSize <= GroupLim && "SU does not fit into decoder group!");
329 
330   // Check if current group is now full/ended. If so, move on to next
331   // group to be ready to evaluate more candidates.
332   if (CurrGroupSize == GroupLim || SC->EndGroup)
333     nextGroup();
334 }
335 
groupingCost(SUnit * SU) const336 int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
337   const MCSchedClassDesc *SC = getSchedClass(SU);
338   if (!SC->isValid())
339     return 0;
340 
341   // If SU begins new group, it can either break a current group early
342   // or fit naturally if current group is empty (negative cost).
343   if (SC->BeginGroup) {
344     if (CurrGroupSize)
345       return 3 - CurrGroupSize;
346     return -1;
347   }
348 
349   // Similarly, a group-ending SU may either fit well (last in group), or
350   // end the group prematurely.
351   if (SC->EndGroup) {
352     unsigned resultingGroupSize =
353       (CurrGroupSize + getNumDecoderSlots(SU));
354     if (resultingGroupSize < 3)
355       return (3 - resultingGroupSize);
356     return -1;
357   }
358 
359   // An instruction with 4 register operands will not fit in last slot.
360   if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
361     return 1;
362 
363   // Most instructions can be placed in any decoder slot.
364   return 0;
365 }
366 
isFPdOpPreferred_distance(SUnit * SU) const367 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const {
368   assert (SU->isUnbuffered);
369   // If this is the first FPd op, it should be scheduled high.
370   if (LastFPdOpCycleIdx == UINT_MAX)
371     return true;
372   // If this is not the first PFd op, it should go into the other side
373   // of the processor to use the other FPd unit there. This should
374   // generally happen if two FPd ops are placed with 2 other
375   // instructions between them (modulo 6).
376   unsigned SUCycleIdx = getCurrCycleIdx(SU);
377   if (LastFPdOpCycleIdx > SUCycleIdx)
378     return ((LastFPdOpCycleIdx - SUCycleIdx) == 3);
379   return ((SUCycleIdx - LastFPdOpCycleIdx) == 3);
380 }
381 
382 int SystemZHazardRecognizer::
resourcesCost(SUnit * SU)383 resourcesCost(SUnit *SU) {
384   int Cost = 0;
385 
386   const MCSchedClassDesc *SC = getSchedClass(SU);
387   if (!SC->isValid())
388     return 0;
389 
390   // For a FPd op, either return min or max value as indicated by the
391   // distance to any prior FPd op.
392   if (SU->isUnbuffered)
393     Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
394   // For other instructions, give a cost to the use of the critical resource.
395   else if (CriticalResourceIdx != UINT_MAX) {
396     for (TargetSchedModel::ProcResIter
397            PI = SchedModel->getWriteProcResBegin(SC),
398            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
399       if (PI->ProcResourceIdx == CriticalResourceIdx)
400         Cost = PI->Cycles;
401   }
402 
403   return Cost;
404 }
405 
emitInstruction(MachineInstr * MI,bool TakenBranch)406 void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI,
407                                               bool TakenBranch) {
408   // Make a temporary SUnit.
409   SUnit SU(MI, 0);
410 
411   // Set interesting flags.
412   SU.isCall = MI->isCall();
413 
414   const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
415   for (const MCWriteProcResEntry &PRE :
416          make_range(SchedModel->getWriteProcResBegin(SC),
417                     SchedModel->getWriteProcResEnd(SC))) {
418     switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
419     case 0:
420       SU.hasReservedResource = true;
421       break;
422     case 1:
423       SU.isUnbuffered = true;
424       break;
425     default:
426       break;
427     }
428   }
429 
430   unsigned GroupSizeBeforeEmit = CurrGroupSize;
431   EmitInstruction(&SU);
432 
433   if (!TakenBranch && isBranchRetTrap(MI)) {
434     // NT Branch on second slot ends group.
435     if (GroupSizeBeforeEmit == 1)
436       nextGroup();
437   }
438 
439   if (TakenBranch && CurrGroupSize > 0)
440     nextGroup();
441 
442   assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
443           "Scheduler: unhandled terminator!");
444 }
445 
446 void SystemZHazardRecognizer::
copyState(SystemZHazardRecognizer * Incoming)447 copyState(SystemZHazardRecognizer *Incoming) {
448   // Current decoder group
449   CurrGroupSize = Incoming->CurrGroupSize;
450   LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;);
451 
452   // Processor resources
453   ProcResourceCounters = Incoming->ProcResourceCounters;
454   CriticalResourceIdx = Incoming->CriticalResourceIdx;
455 
456   // FPd
457   LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
458   GrpCount = Incoming->GrpCount;
459 }
460