1 //===----- HexagonPacketizer.cpp - vliw packetizer ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements a simple VLIW packetizer using DFA. The packetizer works on
11 // machine basic blocks. For each instruction I in BB, the packetizer consults
12 // the DFA to see if machine resources are available to execute I. If so, the
13 // packetizer checks if I depends on any instruction J in the current packet.
14 // If no dependency is found, I is added to current packet and machine resource
15 // is marked as taken. If any dependency is found, a target API call is made to
16 // prune the dependence.
17 //
18 //===----------------------------------------------------------------------===//
19 #include "HexagonRegisterInfo.h"
20 #include "HexagonSubtarget.h"
21 #include "HexagonTargetMachine.h"
22 #include "HexagonVLIWPacketizer.h"
23 #include "llvm/Analysis/AliasAnalysis.h"
24 #include "llvm/CodeGen/MachineDominators.h"
25 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineLoopInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/Passes.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Debug.h"
32
33 using namespace llvm;
34
35 #define DEBUG_TYPE "packets"
36
37 static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden,
38 cl::ZeroOrMore, cl::init(false),
39 cl::desc("Disable Hexagon packetizer pass"));
40
41 static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
42 cl::ZeroOrMore, cl::Hidden, cl::init(true),
43 cl::desc("Allow non-solo packetization of volatile memory references"));
44
45 static cl::opt<bool> EnableGenAllInsnClass("enable-gen-insn", cl::init(false),
46 cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC"));
47
48 static cl::opt<bool> DisableVecDblNVStores("disable-vecdbl-nv-stores",
49 cl::init(false), cl::Hidden, cl::ZeroOrMore,
50 cl::desc("Disable vector double new-value-stores"));
51
52 extern cl::opt<bool> ScheduleInlineAsm;
53
54 namespace llvm {
55 FunctionPass *createHexagonPacketizer();
56 void initializeHexagonPacketizerPass(PassRegistry&);
57 }
58
59
60 namespace {
61 class HexagonPacketizer : public MachineFunctionPass {
62 public:
63 static char ID;
HexagonPacketizer()64 HexagonPacketizer() : MachineFunctionPass(ID) {
65 initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry());
66 }
67
getAnalysisUsage(AnalysisUsage & AU) const68 void getAnalysisUsage(AnalysisUsage &AU) const override {
69 AU.setPreservesCFG();
70 AU.addRequired<AAResultsWrapperPass>();
71 AU.addRequired<MachineBranchProbabilityInfo>();
72 AU.addRequired<MachineDominatorTree>();
73 AU.addRequired<MachineLoopInfo>();
74 AU.addPreserved<MachineDominatorTree>();
75 AU.addPreserved<MachineLoopInfo>();
76 MachineFunctionPass::getAnalysisUsage(AU);
77 }
getPassName() const78 const char *getPassName() const override {
79 return "Hexagon Packetizer";
80 }
81 bool runOnMachineFunction(MachineFunction &Fn) override;
getRequiredProperties() const82 MachineFunctionProperties getRequiredProperties() const override {
83 return MachineFunctionProperties().set(
84 MachineFunctionProperties::Property::AllVRegsAllocated);
85 }
86
87 private:
88 const HexagonInstrInfo *HII;
89 const HexagonRegisterInfo *HRI;
90 };
91
92 char HexagonPacketizer::ID = 0;
93 }
94
95 INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
96 false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)97 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
98 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
99 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
100 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
101 INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer",
102 false, false)
103
104
105 HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
106 MachineLoopInfo &MLI, AliasAnalysis *AA,
107 const MachineBranchProbabilityInfo *MBPI)
108 : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) {
109 HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
110 HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
111 }
112
113 // Check if FirstI modifies a register that SecondI reads.
hasWriteToReadDep(const MachineInstr & FirstI,const MachineInstr & SecondI,const TargetRegisterInfo * TRI)114 static bool hasWriteToReadDep(const MachineInstr &FirstI,
115 const MachineInstr &SecondI,
116 const TargetRegisterInfo *TRI) {
117 for (auto &MO : FirstI.operands()) {
118 if (!MO.isReg() || !MO.isDef())
119 continue;
120 unsigned R = MO.getReg();
121 if (SecondI.readsRegister(R, TRI))
122 return true;
123 }
124 return false;
125 }
126
127
moveInstrOut(MachineInstr * MI,MachineBasicBlock::iterator BundleIt,bool Before)128 static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI,
129 MachineBasicBlock::iterator BundleIt, bool Before) {
130 MachineBasicBlock::instr_iterator InsertPt;
131 if (Before)
132 InsertPt = BundleIt.getInstrIterator();
133 else
134 InsertPt = std::next(BundleIt).getInstrIterator();
135
136 MachineBasicBlock &B = *MI->getParent();
137 // The instruction should at least be bundled with the preceding instruction
138 // (there will always be one, i.e. BUNDLE, if nothing else).
139 assert(MI->isBundledWithPred());
140 if (MI->isBundledWithSucc()) {
141 MI->clearFlag(MachineInstr::BundledSucc);
142 MI->clearFlag(MachineInstr::BundledPred);
143 } else {
144 // If it's not bundled with the successor (i.e. it is the last one
145 // in the bundle), then we can simply unbundle it from the predecessor,
146 // which will take care of updating the predecessor's flag.
147 MI->unbundleFromPred();
148 }
149 B.splice(InsertPt, &B, MI);
150
151 // Get the size of the bundle without asserting.
152 MachineBasicBlock::const_instr_iterator I = BundleIt.getInstrIterator();
153 MachineBasicBlock::const_instr_iterator E = B.instr_end();
154 unsigned Size = 0;
155 for (++I; I != E && I->isBundledWithPred(); ++I)
156 ++Size;
157
158 // If there are still two or more instructions, then there is nothing
159 // else to be done.
160 if (Size > 1)
161 return BundleIt;
162
163 // Otherwise, extract the single instruction out and delete the bundle.
164 MachineBasicBlock::iterator NextIt = std::next(BundleIt);
165 MachineInstr *SingleI = BundleIt->getNextNode();
166 SingleI->unbundleFromPred();
167 assert(!SingleI->isBundledWithSucc());
168 BundleIt->eraseFromParent();
169 return NextIt;
170 }
171
172
runOnMachineFunction(MachineFunction & MF)173 bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
174 if (DisablePacketizer || skipFunction(*MF.getFunction()))
175 return false;
176
177 HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
178 HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
179 auto &MLI = getAnalysis<MachineLoopInfo>();
180 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
181 auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
182
183 if (EnableGenAllInsnClass)
184 HII->genAllInsnTimingClasses(MF);
185
186 // Instantiate the packetizer.
187 HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI);
188
189 // DFA state table should not be empty.
190 assert(Packetizer.getResourceTracker() && "Empty DFA table!");
191
192 //
193 // Loop over all basic blocks and remove KILL pseudo-instructions
194 // These instructions confuse the dependence analysis. Consider:
195 // D0 = ... (Insn 0)
196 // R0 = KILL R0, D0 (Insn 1)
197 // R0 = ... (Insn 2)
198 // Here, Insn 1 will result in the dependence graph not emitting an output
199 // dependence between Insn 0 and Insn 2. This can lead to incorrect
200 // packetization
201 //
202 for (auto &MB : MF) {
203 auto End = MB.end();
204 auto MI = MB.begin();
205 while (MI != End) {
206 auto NextI = std::next(MI);
207 if (MI->isKill()) {
208 MB.erase(MI);
209 End = MB.end();
210 }
211 MI = NextI;
212 }
213 }
214
215 // Loop over all of the basic blocks.
216 for (auto &MB : MF) {
217 auto Begin = MB.begin(), End = MB.end();
218 while (Begin != End) {
219 // First the first non-boundary starting from the end of the last
220 // scheduling region.
221 MachineBasicBlock::iterator RB = Begin;
222 while (RB != End && HII->isSchedulingBoundary(*RB, &MB, MF))
223 ++RB;
224 // First the first boundary starting from the beginning of the new
225 // region.
226 MachineBasicBlock::iterator RE = RB;
227 while (RE != End && !HII->isSchedulingBoundary(*RE, &MB, MF))
228 ++RE;
229 // Add the scheduling boundary if it's not block end.
230 if (RE != End)
231 ++RE;
232 // If RB == End, then RE == End.
233 if (RB != End)
234 Packetizer.PacketizeMIs(&MB, RB, RE);
235
236 Begin = RE;
237 }
238 }
239
240 Packetizer.unpacketizeSoloInstrs(MF);
241 return true;
242 }
243
244
245 // Reserve resources for a constant extender. Trigger an assertion if the
246 // reservation fails.
reserveResourcesForConstExt()247 void HexagonPacketizerList::reserveResourcesForConstExt() {
248 if (!tryAllocateResourcesForConstExt(true))
249 llvm_unreachable("Resources not available");
250 }
251
canReserveResourcesForConstExt()252 bool HexagonPacketizerList::canReserveResourcesForConstExt() {
253 return tryAllocateResourcesForConstExt(false);
254 }
255
256 // Allocate resources (i.e. 4 bytes) for constant extender. If succeeded,
257 // return true, otherwise, return false.
tryAllocateResourcesForConstExt(bool Reserve)258 bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) {
259 auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc());
260 bool Avail = ResourceTracker->canReserveResources(*ExtMI);
261 if (Reserve && Avail)
262 ResourceTracker->reserveResources(*ExtMI);
263 MF.DeleteMachineInstr(ExtMI);
264 return Avail;
265 }
266
267
isCallDependent(const MachineInstr * MI,SDep::Kind DepType,unsigned DepReg)268 bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI,
269 SDep::Kind DepType, unsigned DepReg) {
270 // Check for LR dependence.
271 if (DepReg == HRI->getRARegister())
272 return true;
273
274 if (HII->isDeallocRet(MI))
275 if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister())
276 return true;
277
278 // Check if this is a predicate dependence.
279 const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg);
280 if (RC == &Hexagon::PredRegsRegClass)
281 return true;
282
283 // Assumes that the first operand of the CALLr is the function address.
284 if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) {
285 MachineOperand MO = MI->getOperand(0);
286 if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg))
287 return true;
288 }
289
290 return false;
291 }
292
isRegDependence(const SDep::Kind DepType)293 static bool isRegDependence(const SDep::Kind DepType) {
294 return DepType == SDep::Data || DepType == SDep::Anti ||
295 DepType == SDep::Output;
296 }
297
isDirectJump(const MachineInstr * MI)298 static bool isDirectJump(const MachineInstr* MI) {
299 return MI->getOpcode() == Hexagon::J2_jump;
300 }
301
isSchedBarrier(const MachineInstr * MI)302 static bool isSchedBarrier(const MachineInstr* MI) {
303 switch (MI->getOpcode()) {
304 case Hexagon::Y2_barrier:
305 return true;
306 }
307 return false;
308 }
309
isControlFlow(const MachineInstr * MI)310 static bool isControlFlow(const MachineInstr* MI) {
311 return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
312 }
313
314
315 /// Returns true if the instruction modifies a callee-saved register.
doesModifyCalleeSavedReg(const MachineInstr * MI,const TargetRegisterInfo * TRI)316 static bool doesModifyCalleeSavedReg(const MachineInstr *MI,
317 const TargetRegisterInfo *TRI) {
318 const MachineFunction &MF = *MI->getParent()->getParent();
319 for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
320 if (MI->modifiesRegister(*CSR, TRI))
321 return true;
322 return false;
323 }
324
325 // TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
326 // Returns true if an instruction can be promoted to .new predicate or
327 // new-value store.
isNewifiable(const MachineInstr * MI)328 bool HexagonPacketizerList::isNewifiable(const MachineInstr* MI) {
329 return HII->isCondInst(MI) || MI->isReturn() || HII->mayBeNewStore(MI);
330 }
331
332 // Promote an instructiont to its .cur form.
333 // At this time, we have already made a call to canPromoteToDotCur and made
334 // sure that it can *indeed* be promoted.
promoteToDotCur(MachineInstr * MI,SDep::Kind DepType,MachineBasicBlock::iterator & MII,const TargetRegisterClass * RC)335 bool HexagonPacketizerList::promoteToDotCur(MachineInstr* MI,
336 SDep::Kind DepType, MachineBasicBlock::iterator &MII,
337 const TargetRegisterClass* RC) {
338 assert(DepType == SDep::Data);
339 int CurOpcode = HII->getDotCurOp(MI);
340 MI->setDesc(HII->get(CurOpcode));
341 return true;
342 }
343
cleanUpDotCur()344 void HexagonPacketizerList::cleanUpDotCur() {
345 MachineInstr *MI = NULL;
346 for (auto BI : CurrentPacketMIs) {
347 DEBUG(dbgs() << "Cleanup packet has "; BI->dump(););
348 if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) {
349 MI = BI;
350 continue;
351 }
352 if (MI) {
353 for (auto &MO : BI->operands())
354 if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg())
355 return;
356 }
357 }
358 if (!MI)
359 return;
360 // We did not find a use of the CUR, so de-cur it.
361 MI->setDesc(HII->get(Hexagon::V6_vL32b_ai));
362 DEBUG(dbgs() << "Demoted CUR "; MI->dump(););
363 }
364
365 // Check to see if an instruction can be dot cur.
canPromoteToDotCur(const MachineInstr * MI,const SUnit * PacketSU,unsigned DepReg,MachineBasicBlock::iterator & MII,const TargetRegisterClass * RC)366 bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI,
367 const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
368 const TargetRegisterClass *RC) {
369 if (!HII->isV60VectorInstruction(MI))
370 return false;
371 if (!HII->isV60VectorInstruction(&*MII))
372 return false;
373
374 // Already a dot new instruction.
375 if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI))
376 return false;
377
378 if (!HII->mayBeCurLoad(MI))
379 return false;
380
381 // The "cur value" cannot come from inline asm.
382 if (PacketSU->getInstr()->isInlineAsm())
383 return false;
384
385 // Make sure candidate instruction uses cur.
386 DEBUG(dbgs() << "Can we DOT Cur Vector MI\n";
387 MI->dump();
388 dbgs() << "in packet\n";);
389 MachineInstr &MJ = *MII;
390 DEBUG({
391 dbgs() << "Checking CUR against ";
392 MJ.dump();
393 });
394 unsigned DestReg = MI->getOperand(0).getReg();
395 bool FoundMatch = false;
396 for (auto &MO : MJ.operands())
397 if (MO.isReg() && MO.getReg() == DestReg)
398 FoundMatch = true;
399 if (!FoundMatch)
400 return false;
401
402 // Check for existing uses of a vector register within the packet which
403 // would be affected by converting a vector load into .cur formt.
404 for (auto BI : CurrentPacketMIs) {
405 DEBUG(dbgs() << "packet has "; BI->dump(););
406 if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo()))
407 return false;
408 }
409
410 DEBUG(dbgs() << "Can Dot CUR MI\n"; MI->dump(););
411 // We can convert the opcode into a .cur.
412 return true;
413 }
414
415 // Promote an instruction to its .new form. At this time, we have already
416 // made a call to canPromoteToDotNew and made sure that it can *indeed* be
417 // promoted.
promoteToDotNew(MachineInstr * MI,SDep::Kind DepType,MachineBasicBlock::iterator & MII,const TargetRegisterClass * RC)418 bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI,
419 SDep::Kind DepType, MachineBasicBlock::iterator &MII,
420 const TargetRegisterClass* RC) {
421 assert (DepType == SDep::Data);
422 int NewOpcode;
423 if (RC == &Hexagon::PredRegsRegClass)
424 NewOpcode = HII->getDotNewPredOp(MI, MBPI);
425 else
426 NewOpcode = HII->getDotNewOp(MI);
427 MI->setDesc(HII->get(NewOpcode));
428 return true;
429 }
430
demoteToDotOld(MachineInstr * MI)431 bool HexagonPacketizerList::demoteToDotOld(MachineInstr* MI) {
432 int NewOpcode = HII->getDotOldOp(MI->getOpcode());
433 MI->setDesc(HII->get(NewOpcode));
434 return true;
435 }
436
437 enum PredicateKind {
438 PK_False,
439 PK_True,
440 PK_Unknown
441 };
442
443 /// Returns true if an instruction is predicated on p0 and false if it's
444 /// predicated on !p0.
getPredicateSense(const MachineInstr & MI,const HexagonInstrInfo * HII)445 static PredicateKind getPredicateSense(const MachineInstr &MI,
446 const HexagonInstrInfo *HII) {
447 if (!HII->isPredicated(MI))
448 return PK_Unknown;
449 if (HII->isPredicatedTrue(MI))
450 return PK_True;
451 return PK_False;
452 }
453
getPostIncrementOperand(const MachineInstr * MI,const HexagonInstrInfo * HII)454 static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI,
455 const HexagonInstrInfo *HII) {
456 assert(HII->isPostIncrement(MI) && "Not a post increment operation.");
457 #ifndef NDEBUG
458 // Post Increment means duplicates. Use dense map to find duplicates in the
459 // list. Caution: Densemap initializes with the minimum of 64 buckets,
460 // whereas there are at most 5 operands in the post increment.
461 DenseSet<unsigned> DefRegsSet;
462 for (auto &MO : MI->operands())
463 if (MO.isReg() && MO.isDef())
464 DefRegsSet.insert(MO.getReg());
465
466 for (auto &MO : MI->operands())
467 if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg()))
468 return MO;
469 #else
470 if (MI->mayLoad()) {
471 const MachineOperand &Op1 = MI->getOperand(1);
472 // The 2nd operand is always the post increment operand in load.
473 assert(Op1.isReg() && "Post increment operand has be to a register.");
474 return Op1;
475 }
476 if (MI->getDesc().mayStore()) {
477 const MachineOperand &Op0 = MI->getOperand(0);
478 // The 1st operand is always the post increment operand in store.
479 assert(Op0.isReg() && "Post increment operand has be to a register.");
480 return Op0;
481 }
482 #endif
483 // we should never come here.
484 llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
485 }
486
487 // Get the value being stored.
getStoreValueOperand(const MachineInstr * MI)488 static const MachineOperand& getStoreValueOperand(const MachineInstr *MI) {
489 // value being stored is always the last operand.
490 return MI->getOperand(MI->getNumOperands()-1);
491 }
492
isLoadAbsSet(const MachineInstr * MI)493 static bool isLoadAbsSet(const MachineInstr *MI) {
494 unsigned Opc = MI->getOpcode();
495 switch (Opc) {
496 case Hexagon::L4_loadrd_ap:
497 case Hexagon::L4_loadrb_ap:
498 case Hexagon::L4_loadrh_ap:
499 case Hexagon::L4_loadrub_ap:
500 case Hexagon::L4_loadruh_ap:
501 case Hexagon::L4_loadri_ap:
502 return true;
503 }
504 return false;
505 }
506
getAbsSetOperand(const MachineInstr * MI)507 static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) {
508 assert(isLoadAbsSet(MI));
509 return MI->getOperand(1);
510 }
511
512
513 // Can be new value store?
514 // Following restrictions are to be respected in convert a store into
515 // a new value store.
516 // 1. If an instruction uses auto-increment, its address register cannot
517 // be a new-value register. Arch Spec 5.4.2.1
518 // 2. If an instruction uses absolute-set addressing mode, its address
519 // register cannot be a new-value register. Arch Spec 5.4.2.1.
520 // 3. If an instruction produces a 64-bit result, its registers cannot be used
521 // as new-value registers. Arch Spec 5.4.2.2.
522 // 4. If the instruction that sets the new-value register is conditional, then
523 // the instruction that uses the new-value register must also be conditional,
524 // and both must always have their predicates evaluate identically.
525 // Arch Spec 5.4.2.3.
526 // 5. There is an implied restriction that a packet cannot have another store,
527 // if there is a new value store in the packet. Corollary: if there is
528 // already a store in a packet, there can not be a new value store.
529 // Arch Spec: 3.4.4.2
canPromoteToNewValueStore(const MachineInstr * MI,const MachineInstr * PacketMI,unsigned DepReg)530 bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
531 const MachineInstr *PacketMI, unsigned DepReg) {
532 // Make sure we are looking at the store, that can be promoted.
533 if (!HII->mayBeNewStore(MI))
534 return false;
535
536 // Make sure there is dependency and can be new value'd.
537 const MachineOperand &Val = getStoreValueOperand(MI);
538 if (Val.isReg() && Val.getReg() != DepReg)
539 return false;
540
541 const MCInstrDesc& MCID = PacketMI->getDesc();
542
543 // First operand is always the result.
544 const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF);
545 // Double regs can not feed into new value store: PRM section: 5.4.2.2.
546 if (PacketRC == &Hexagon::DoubleRegsRegClass)
547 return false;
548
549 // New-value stores are of class NV (slot 0), dual stores require class ST
550 // in slot 0 (PRM 5.5).
551 for (auto I : CurrentPacketMIs) {
552 SUnit *PacketSU = MIToSUnit.find(I)->second;
553 if (PacketSU->getInstr()->mayStore())
554 return false;
555 }
556
557 // Make sure it's NOT the post increment register that we are going to
558 // new value.
559 if (HII->isPostIncrement(MI) &&
560 getPostIncrementOperand(MI, HII).getReg() == DepReg) {
561 return false;
562 }
563
564 if (HII->isPostIncrement(PacketMI) && PacketMI->mayLoad() &&
565 getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) {
566 // If source is post_inc, or absolute-set addressing, it can not feed
567 // into new value store
568 // r3 = memw(r2++#4)
569 // memw(r30 + #-1404) = r2.new -> can not be new value store
570 // arch spec section: 5.4.2.1.
571 return false;
572 }
573
574 if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg)
575 return false;
576
577 // If the source that feeds the store is predicated, new value store must
578 // also be predicated.
579 if (HII->isPredicated(*PacketMI)) {
580 if (!HII->isPredicated(*MI))
581 return false;
582
583 // Check to make sure that they both will have their predicates
584 // evaluate identically.
585 unsigned predRegNumSrc = 0;
586 unsigned predRegNumDst = 0;
587 const TargetRegisterClass* predRegClass = nullptr;
588
589 // Get predicate register used in the source instruction.
590 for (auto &MO : PacketMI->operands()) {
591 if (!MO.isReg())
592 continue;
593 predRegNumSrc = MO.getReg();
594 predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc);
595 if (predRegClass == &Hexagon::PredRegsRegClass)
596 break;
597 }
598 assert((predRegClass == &Hexagon::PredRegsRegClass) &&
599 "predicate register not found in a predicated PacketMI instruction");
600
601 // Get predicate register used in new-value store instruction.
602 for (auto &MO : MI->operands()) {
603 if (!MO.isReg())
604 continue;
605 predRegNumDst = MO.getReg();
606 predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst);
607 if (predRegClass == &Hexagon::PredRegsRegClass)
608 break;
609 }
610 assert((predRegClass == &Hexagon::PredRegsRegClass) &&
611 "predicate register not found in a predicated MI instruction");
612
613 // New-value register producer and user (store) need to satisfy these
614 // constraints:
615 // 1) Both instructions should be predicated on the same register.
616 // 2) If producer of the new-value register is .new predicated then store
617 // should also be .new predicated and if producer is not .new predicated
618 // then store should not be .new predicated.
619 // 3) Both new-value register producer and user should have same predicate
620 // sense, i.e, either both should be negated or both should be non-negated.
621 if (predRegNumDst != predRegNumSrc ||
622 HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) ||
623 getPredicateSense(*MI, HII) != getPredicateSense(*PacketMI, HII))
624 return false;
625 }
626
627 // Make sure that other than the new-value register no other store instruction
628 // register has been modified in the same packet. Predicate registers can be
629 // modified by they should not be modified between the producer and the store
630 // instruction as it will make them both conditional on different values.
631 // We already know this to be true for all the instructions before and
632 // including PacketMI. Howerver, we need to perform the check for the
633 // remaining instructions in the packet.
634
635 unsigned StartCheck = 0;
636
637 for (auto I : CurrentPacketMIs) {
638 SUnit *TempSU = MIToSUnit.find(I)->second;
639 MachineInstr* TempMI = TempSU->getInstr();
640
641 // Following condition is true for all the instructions until PacketMI is
642 // reached (StartCheck is set to 0 before the for loop).
643 // StartCheck flag is 1 for all the instructions after PacketMI.
644 if (TempMI != PacketMI && !StartCheck) // Start processing only after
645 continue; // encountering PacketMI.
646
647 StartCheck = 1;
648 if (TempMI == PacketMI) // We don't want to check PacketMI for dependence.
649 continue;
650
651 for (auto &MO : MI->operands())
652 if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI))
653 return false;
654 }
655
656 // Make sure that for non-POST_INC stores:
657 // 1. The only use of reg is DepReg and no other registers.
658 // This handles V4 base+index registers.
659 // The following store can not be dot new.
660 // Eg. r0 = add(r0, #3)
661 // memw(r1+r0<<#2) = r0
662 if (!HII->isPostIncrement(MI)) {
663 for (unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
664 const MachineOperand &MO = MI->getOperand(opNum);
665 if (MO.isReg() && MO.getReg() == DepReg)
666 return false;
667 }
668 }
669
670 // If data definition is because of implicit definition of the register,
671 // do not newify the store. Eg.
672 // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
673 // S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
674 for (auto &MO : PacketMI->operands()) {
675 if (!MO.isReg() || !MO.isDef() || !MO.isImplicit())
676 continue;
677 unsigned R = MO.getReg();
678 if (R == DepReg || HRI->isSuperRegister(DepReg, R))
679 return false;
680 }
681
682 // Handle imp-use of super reg case. There is a target independent side
683 // change that should prevent this situation but I am handling it for
684 // just-in-case. For example, we cannot newify R2 in the following case:
685 // %R3<def> = A2_tfrsi 0;
686 // S2_storeri_io %R0<kill>, 0, %R2<kill>, %D1<imp-use,kill>;
687 for (auto &MO : MI->operands()) {
688 if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg)
689 return false;
690 }
691
692 // Can be dot new store.
693 return true;
694 }
695
696 // Can this MI to promoted to either new value store or new value jump.
canPromoteToNewValue(const MachineInstr * MI,const SUnit * PacketSU,unsigned DepReg,MachineBasicBlock::iterator & MII)697 bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI,
698 const SUnit *PacketSU, unsigned DepReg,
699 MachineBasicBlock::iterator &MII) {
700 if (!HII->mayBeNewStore(MI))
701 return false;
702
703 // Check to see the store can be new value'ed.
704 MachineInstr *PacketMI = PacketSU->getInstr();
705 if (canPromoteToNewValueStore(MI, PacketMI, DepReg))
706 return true;
707
708 // Check to see the compare/jump can be new value'ed.
709 // This is done as a pass on its own. Don't need to check it here.
710 return false;
711 }
712
isImplicitDependency(const MachineInstr * I,unsigned DepReg)713 static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) {
714 for (auto &MO : I->operands())
715 if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit())
716 return true;
717 return false;
718 }
719
720 // Check to see if an instruction can be dot new
721 // There are three kinds.
722 // 1. dot new on predicate - V2/V3/V4
723 // 2. dot new on stores NV/ST - V4
724 // 3. dot new on jump NV/J - V4 -- This is generated in a pass.
canPromoteToDotNew(const MachineInstr * MI,const SUnit * PacketSU,unsigned DepReg,MachineBasicBlock::iterator & MII,const TargetRegisterClass * RC)725 bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI,
726 const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
727 const TargetRegisterClass* RC) {
728 // Already a dot new instruction.
729 if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI))
730 return false;
731
732 if (!isNewifiable(MI))
733 return false;
734
735 const MachineInstr *PI = PacketSU->getInstr();
736
737 // The "new value" cannot come from inline asm.
738 if (PI->isInlineAsm())
739 return false;
740
741 // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no
742 // sense.
743 if (PI->isImplicitDef())
744 return false;
745
746 // If dependency is trough an implicitly defined register, we should not
747 // newify the use.
748 if (isImplicitDependency(PI, DepReg))
749 return false;
750
751 const MCInstrDesc& MCID = PI->getDesc();
752 const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF);
753 if (DisableVecDblNVStores && VecRC == &Hexagon::VecDblRegsRegClass)
754 return false;
755
756 // predicate .new
757 // bug 5670: until that is fixed
758 // TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
759 if (RC == &Hexagon::PredRegsRegClass)
760 if (HII->isCondInst(MI) || MI->isReturn())
761 return HII->predCanBeUsedAsDotNew(PI, DepReg);
762
763 if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI))
764 return false;
765
766 // Create a dot new machine instruction to see if resources can be
767 // allocated. If not, bail out now.
768 int NewOpcode = HII->getDotNewOp(MI);
769 const MCInstrDesc &D = HII->get(NewOpcode);
770 MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc());
771 bool ResourcesAvailable = ResourceTracker->canReserveResources(*NewMI);
772 MF.DeleteMachineInstr(NewMI);
773 if (!ResourcesAvailable)
774 return false;
775
776 // New Value Store only. New Value Jump generated as a separate pass.
777 if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII))
778 return false;
779
780 return true;
781 }
782
783 // Go through the packet instructions and search for an anti dependency between
784 // them and DepReg from MI. Consider this case:
785 // Trying to add
786 // a) %R1<def> = TFRI_cdNotPt %P3, 2
787 // to this packet:
788 // {
789 // b) %P0<def> = C2_or %P3<kill>, %P0<kill>
790 // c) %P3<def> = C2_tfrrp %R23
791 // d) %R1<def> = C2_cmovenewit %P3, 4
792 // }
793 // The P3 from a) and d) will be complements after
794 // a)'s P3 is converted to .new form
795 // Anti-dep between c) and b) is irrelevant for this case
restrictingDepExistInPacket(MachineInstr * MI,unsigned DepReg)796 bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI,
797 unsigned DepReg) {
798 SUnit *PacketSUDep = MIToSUnit.find(MI)->second;
799
800 for (auto I : CurrentPacketMIs) {
801 // We only care for dependencies to predicated instructions
802 if (!HII->isPredicated(*I))
803 continue;
804
805 // Scheduling Unit for current insn in the packet
806 SUnit *PacketSU = MIToSUnit.find(I)->second;
807
808 // Look at dependencies between current members of the packet and
809 // predicate defining instruction MI. Make sure that dependency is
810 // on the exact register we care about.
811 if (PacketSU->isSucc(PacketSUDep)) {
812 for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
813 auto &Dep = PacketSU->Succs[i];
814 if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti &&
815 Dep.getReg() == DepReg)
816 return true;
817 }
818 }
819 }
820
821 return false;
822 }
823
824
825 /// Gets the predicate register of a predicated instruction.
getPredicatedRegister(MachineInstr & MI,const HexagonInstrInfo * QII)826 static unsigned getPredicatedRegister(MachineInstr &MI,
827 const HexagonInstrInfo *QII) {
828 /// We use the following rule: The first predicate register that is a use is
829 /// the predicate register of a predicated instruction.
830 assert(QII->isPredicated(MI) && "Must be predicated instruction");
831
832 for (auto &Op : MI.operands()) {
833 if (Op.isReg() && Op.getReg() && Op.isUse() &&
834 Hexagon::PredRegsRegClass.contains(Op.getReg()))
835 return Op.getReg();
836 }
837
838 llvm_unreachable("Unknown instruction operand layout");
839 return 0;
840 }
841
842 // Given two predicated instructions, this function detects whether
843 // the predicates are complements.
arePredicatesComplements(MachineInstr & MI1,MachineInstr & MI2)844 bool HexagonPacketizerList::arePredicatesComplements(MachineInstr &MI1,
845 MachineInstr &MI2) {
846 // If we don't know the predicate sense of the instructions bail out early, we
847 // need it later.
848 if (getPredicateSense(MI1, HII) == PK_Unknown ||
849 getPredicateSense(MI2, HII) == PK_Unknown)
850 return false;
851
852 // Scheduling unit for candidate.
853 SUnit *SU = MIToSUnit[&MI1];
854
855 // One corner case deals with the following scenario:
856 // Trying to add
857 // a) %R24<def> = A2_tfrt %P0, %R25
858 // to this packet:
859 // {
860 // b) %R25<def> = A2_tfrf %P0, %R24
861 // c) %P0<def> = C2_cmpeqi %R26, 1
862 // }
863 //
864 // On general check a) and b) are complements, but presence of c) will
865 // convert a) to .new form, and then it is not a complement.
866 // We attempt to detect it by analyzing existing dependencies in the packet.
867
868 // Analyze relationships between all existing members of the packet.
869 // Look for Anti dependecy on the same predicate reg as used in the
870 // candidate.
871 for (auto I : CurrentPacketMIs) {
872 // Scheduling Unit for current insn in the packet.
873 SUnit *PacketSU = MIToSUnit.find(I)->second;
874
875 // If this instruction in the packet is succeeded by the candidate...
876 if (PacketSU->isSucc(SU)) {
877 for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
878 auto Dep = PacketSU->Succs[i];
879 // The corner case exist when there is true data dependency between
880 // candidate and one of current packet members, this dep is on
881 // predicate reg, and there already exist anti dep on the same pred in
882 // the packet.
883 if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data &&
884 Hexagon::PredRegsRegClass.contains(Dep.getReg())) {
885 // Here I know that I is predicate setting instruction with true
886 // data dep to candidate on the register we care about - c) in the
887 // above example. Now I need to see if there is an anti dependency
888 // from c) to any other instruction in the same packet on the pred
889 // reg of interest.
890 if (restrictingDepExistInPacket(I, Dep.getReg()))
891 return false;
892 }
893 }
894 }
895 }
896
897 // If the above case does not apply, check regular complement condition.
898 // Check that the predicate register is the same and that the predicate
899 // sense is different We also need to differentiate .old vs. .new: !p0
900 // is not complementary to p0.new.
901 unsigned PReg1 = getPredicatedRegister(MI1, HII);
902 unsigned PReg2 = getPredicatedRegister(MI2, HII);
903 return PReg1 == PReg2 &&
904 Hexagon::PredRegsRegClass.contains(PReg1) &&
905 Hexagon::PredRegsRegClass.contains(PReg2) &&
906 getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) &&
907 HII->isDotNewInst(&MI1) == HII->isDotNewInst(&MI2);
908 }
909
910 // Initialize packetizer flags.
initPacketizerState()911 void HexagonPacketizerList::initPacketizerState() {
912 Dependence = false;
913 PromotedToDotNew = false;
914 GlueToNewValueJump = false;
915 GlueAllocframeStore = false;
916 FoundSequentialDependence = false;
917 }
918
919 // Ignore bundling of pseudo instructions.
ignorePseudoInstruction(const MachineInstr & MI,const MachineBasicBlock *)920 bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr &MI,
921 const MachineBasicBlock *) {
922 if (MI.isDebugValue())
923 return true;
924
925 if (MI.isCFIInstruction())
926 return false;
927
928 // We must print out inline assembly.
929 if (MI.isInlineAsm())
930 return false;
931
932 if (MI.isImplicitDef())
933 return false;
934
935 // We check if MI has any functional units mapped to it. If it doesn't,
936 // we ignore the instruction.
937 const MCInstrDesc& TID = MI.getDesc();
938 auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass());
939 unsigned FuncUnits = IS->getUnits();
940 return !FuncUnits;
941 }
942
isSoloInstruction(const MachineInstr & MI)943 bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) {
944 if (MI.isEHLabel() || MI.isCFIInstruction())
945 return true;
946
947 // Consider inline asm to not be a solo instruction by default.
948 // Inline asm will be put in a packet temporarily, but then it will be
949 // removed, and placed outside of the packet (before or after, depending
950 // on dependencies). This is to reduce the impact of inline asm as a
951 // "packet splitting" instruction.
952 if (MI.isInlineAsm() && !ScheduleInlineAsm)
953 return true;
954
955 // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
956 // trap, pause, barrier, icinva, isync, and syncht are solo instructions.
957 // They must not be grouped with other instructions in a packet.
958 if (isSchedBarrier(&MI))
959 return true;
960
961 if (HII->isSolo(&MI))
962 return true;
963
964 if (MI.getOpcode() == Hexagon::A2_nop)
965 return true;
966
967 return false;
968 }
969
970
971 // Quick check if instructions MI and MJ cannot coexist in the same packet.
972 // Limit the tests to be "one-way", e.g. "if MI->isBranch and MJ->isInlineAsm",
973 // but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm".
974 // For full test call this function twice:
975 // cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI)
976 // Doing the test only one way saves the amount of code in this function,
977 // since every test would need to be repeated with the MI and MJ reversed.
cannotCoexistAsymm(const MachineInstr * MI,const MachineInstr * MJ,const HexagonInstrInfo & HII)978 static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ,
979 const HexagonInstrInfo &HII) {
980 const MachineFunction *MF = MI->getParent()->getParent();
981 if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() &&
982 HII.isHVXMemWithAIndirect(MI, MJ))
983 return true;
984
985 // An inline asm cannot be together with a branch, because we may not be
986 // able to remove the asm out after packetizing (i.e. if the asm must be
987 // moved past the bundle). Similarly, two asms cannot be together to avoid
988 // complications when determining their relative order outside of a bundle.
989 if (MI->isInlineAsm())
990 return MJ->isInlineAsm() || MJ->isBranch() || MJ->isBarrier() ||
991 MJ->isCall() || MJ->isTerminator();
992
993 // "False" really means that the quick check failed to determine if
994 // I and J cannot coexist.
995 return false;
996 }
997
998
999 // Full, symmetric check.
cannotCoexist(const MachineInstr * MI,const MachineInstr * MJ)1000 bool HexagonPacketizerList::cannotCoexist(const MachineInstr *MI,
1001 const MachineInstr *MJ) {
1002 return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII);
1003 }
1004
unpacketizeSoloInstrs(MachineFunction & MF)1005 void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) {
1006 for (auto &B : MF) {
1007 MachineBasicBlock::iterator BundleIt;
1008 MachineBasicBlock::instr_iterator NextI;
1009 for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) {
1010 NextI = std::next(I);
1011 MachineInstr *MI = &*I;
1012 if (MI->isBundle())
1013 BundleIt = I;
1014 if (!MI->isInsideBundle())
1015 continue;
1016
1017 // Decide on where to insert the instruction that we are pulling out.
1018 // Debug instructions always go before the bundle, but the placement of
1019 // INLINE_ASM depends on potential dependencies. By default, try to
1020 // put it before the bundle, but if the asm writes to a register that
1021 // other instructions in the bundle read, then we need to place it
1022 // after the bundle (to preserve the bundle semantics).
1023 bool InsertBeforeBundle;
1024 if (MI->isInlineAsm())
1025 InsertBeforeBundle = !hasWriteToReadDep(*MI, *BundleIt, HRI);
1026 else if (MI->isDebugValue())
1027 InsertBeforeBundle = true;
1028 else
1029 continue;
1030
1031 BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle);
1032 }
1033 }
1034 }
1035
1036 // Check if a given instruction is of class "system".
isSystemInstr(const MachineInstr * MI)1037 static bool isSystemInstr(const MachineInstr *MI) {
1038 unsigned Opc = MI->getOpcode();
1039 switch (Opc) {
1040 case Hexagon::Y2_barrier:
1041 case Hexagon::Y2_dcfetchbo:
1042 return true;
1043 }
1044 return false;
1045 }
1046
hasDeadDependence(const MachineInstr * I,const MachineInstr * J)1047 bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I,
1048 const MachineInstr *J) {
1049 // The dependence graph may not include edges between dead definitions,
1050 // so without extra checks, we could end up packetizing two instruction
1051 // defining the same (dead) register.
1052 if (I->isCall() || J->isCall())
1053 return false;
1054 if (HII->isPredicated(*I) || HII->isPredicated(*J))
1055 return false;
1056
1057 BitVector DeadDefs(Hexagon::NUM_TARGET_REGS);
1058 for (auto &MO : I->operands()) {
1059 if (!MO.isReg() || !MO.isDef() || !MO.isDead())
1060 continue;
1061 DeadDefs[MO.getReg()] = true;
1062 }
1063
1064 for (auto &MO : J->operands()) {
1065 if (!MO.isReg() || !MO.isDef() || !MO.isDead())
1066 continue;
1067 unsigned R = MO.getReg();
1068 if (R != Hexagon::USR_OVF && DeadDefs[R])
1069 return true;
1070 }
1071 return false;
1072 }
1073
hasControlDependence(const MachineInstr * I,const MachineInstr * J)1074 bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I,
1075 const MachineInstr *J) {
1076 // A save callee-save register function call can only be in a packet
1077 // with instructions that don't write to the callee-save registers.
1078 if ((HII->isSaveCalleeSavedRegsCall(I) &&
1079 doesModifyCalleeSavedReg(J, HRI)) ||
1080 (HII->isSaveCalleeSavedRegsCall(J) &&
1081 doesModifyCalleeSavedReg(I, HRI)))
1082 return true;
1083
1084 // Two control flow instructions cannot go in the same packet.
1085 if (isControlFlow(I) && isControlFlow(J))
1086 return true;
1087
1088 // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot
1089 // contain a speculative indirect jump,
1090 // a new-value compare jump or a dealloc_return.
1091 auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool {
1092 if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI))
1093 return true;
1094 if (HII->isPredicated(*MI) && HII->isPredicatedNew(*MI) && HII->isJumpR(MI))
1095 return true;
1096 return false;
1097 };
1098
1099 if (HII->isLoopN(I) && isBadForLoopN(J))
1100 return true;
1101 if (HII->isLoopN(J) && isBadForLoopN(I))
1102 return true;
1103
1104 // dealloc_return cannot appear in the same packet as a conditional or
1105 // unconditional jump.
1106 return HII->isDeallocRet(I) &&
1107 (J->isBranch() || J->isCall() || J->isBarrier());
1108 }
1109
hasV4SpecificDependence(const MachineInstr * I,const MachineInstr * J)1110 bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I,
1111 const MachineInstr *J) {
1112 bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
1113 bool StoreI = I->mayStore(), StoreJ = J->mayStore();
1114 if ((SysI && StoreJ) || (SysJ && StoreI))
1115 return true;
1116
1117 if (StoreI && StoreJ) {
1118 if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I))
1119 return true;
1120 } else {
1121 // A memop cannot be in the same packet with another memop or a store.
1122 // Two stores can be together, but here I and J cannot both be stores.
1123 bool MopStI = HII->isMemOp(I) || StoreI;
1124 bool MopStJ = HII->isMemOp(J) || StoreJ;
1125 if (MopStI && MopStJ)
1126 return true;
1127 }
1128
1129 return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J));
1130 }
1131
1132 // SUI is the current instruction that is out side of the current packet.
1133 // SUJ is the current instruction inside the current packet against which that
1134 // SUI will be packetized.
isLegalToPacketizeTogether(SUnit * SUI,SUnit * SUJ)1135 bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
1136 MachineInstr *I = SUI->getInstr();
1137 MachineInstr *J = SUJ->getInstr();
1138 assert(I && J && "Unable to packetize null instruction!");
1139
1140 // Clear IgnoreDepMIs when Packet starts.
1141 if (CurrentPacketMIs.size() == 1)
1142 IgnoreDepMIs.clear();
1143
1144 MachineBasicBlock::iterator II = I;
1145 const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
1146
1147 // Solo instructions cannot go in the packet.
1148 assert(!isSoloInstruction(*I) && "Unexpected solo instr!");
1149
1150 if (cannotCoexist(I, J))
1151 return false;
1152
1153 Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J);
1154 if (Dependence)
1155 return false;
1156
1157 // V4 allows dual stores. It does not allow second store, if the first
1158 // store is not in SLOT0. New value store, new value jump, dealloc_return
1159 // and memop always take SLOT0. Arch spec 3.4.4.2.
1160 Dependence = hasV4SpecificDependence(I, J);
1161 if (Dependence)
1162 return false;
1163
1164 // If an instruction feeds new value jump, glue it.
1165 MachineBasicBlock::iterator NextMII = I;
1166 ++NextMII;
1167 if (NextMII != I->getParent()->end() && HII->isNewValueJump(&*NextMII)) {
1168 MachineInstr &NextMI = *NextMII;
1169
1170 bool secondRegMatch = false;
1171 const MachineOperand &NOp0 = NextMI.getOperand(0);
1172 const MachineOperand &NOp1 = NextMI.getOperand(1);
1173
1174 if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg())
1175 secondRegMatch = true;
1176
1177 for (auto I : CurrentPacketMIs) {
1178 SUnit *PacketSU = MIToSUnit.find(I)->second;
1179 MachineInstr *PI = PacketSU->getInstr();
1180 // NVJ can not be part of the dual jump - Arch Spec: section 7.8.
1181 if (PI->isCall()) {
1182 Dependence = true;
1183 break;
1184 }
1185 // Validate:
1186 // 1. Packet does not have a store in it.
1187 // 2. If the first operand of the nvj is newified, and the second
1188 // operand is also a reg, it (second reg) is not defined in
1189 // the same packet.
1190 // 3. If the second operand of the nvj is newified, (which means
1191 // first operand is also a reg), first reg is not defined in
1192 // the same packet.
1193 if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() ||
1194 HII->isLoopN(PI)) {
1195 Dependence = true;
1196 break;
1197 }
1198 // Check #2/#3.
1199 const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1;
1200 if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) {
1201 Dependence = true;
1202 break;
1203 }
1204 }
1205
1206 if (Dependence)
1207 return false;
1208 GlueToNewValueJump = true;
1209 }
1210
1211 // There no dependency between a prolog instruction and its successor.
1212 if (!SUJ->isSucc(SUI))
1213 return true;
1214
1215 for (unsigned i = 0; i < SUJ->Succs.size(); ++i) {
1216 if (FoundSequentialDependence)
1217 break;
1218
1219 if (SUJ->Succs[i].getSUnit() != SUI)
1220 continue;
1221
1222 SDep::Kind DepType = SUJ->Succs[i].getKind();
1223 // For direct calls:
1224 // Ignore register dependences for call instructions for packetization
1225 // purposes except for those due to r31 and predicate registers.
1226 //
1227 // For indirect calls:
1228 // Same as direct calls + check for true dependences to the register
1229 // used in the indirect call.
1230 //
1231 // We completely ignore Order dependences for call instructions.
1232 //
1233 // For returns:
1234 // Ignore register dependences for return instructions like jumpr,
1235 // dealloc return unless we have dependencies on the explicit uses
1236 // of the registers used by jumpr (like r31) or dealloc return
1237 // (like r29 or r30).
1238 //
1239 // TODO: Currently, jumpr is handling only return of r31. So, the
1240 // following logic (specificaly isCallDependent) is working fine.
1241 // We need to enable jumpr for register other than r31 and then,
1242 // we need to rework the last part, where it handles indirect call
1243 // of that (isCallDependent) function. Bug 6216 is opened for this.
1244 unsigned DepReg = 0;
1245 const TargetRegisterClass *RC = nullptr;
1246 if (DepType == SDep::Data) {
1247 DepReg = SUJ->Succs[i].getReg();
1248 RC = HRI->getMinimalPhysRegClass(DepReg);
1249 }
1250
1251 if (I->isCall() || I->isReturn() || HII->isTailCall(I)) {
1252 if (!isRegDependence(DepType))
1253 continue;
1254 if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg()))
1255 continue;
1256 }
1257
1258 if (DepType == SDep::Data) {
1259 if (canPromoteToDotCur(J, SUJ, DepReg, II, RC))
1260 if (promoteToDotCur(J, DepType, II, RC))
1261 continue;
1262 }
1263
1264 // Data dpendence ok if we have load.cur.
1265 if (DepType == SDep::Data && HII->isDotCurInst(J)) {
1266 if (HII->isV60VectorInstruction(I))
1267 continue;
1268 }
1269
1270 // For instructions that can be promoted to dot-new, try to promote.
1271 if (DepType == SDep::Data) {
1272 if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) {
1273 if (promoteToDotNew(I, DepType, II, RC)) {
1274 PromotedToDotNew = true;
1275 continue;
1276 }
1277 }
1278 if (HII->isNewValueJump(I))
1279 continue;
1280 }
1281
1282 // For predicated instructions, if the predicates are complements then
1283 // there can be no dependence.
1284 if (HII->isPredicated(*I) && HII->isPredicated(*J) &&
1285 arePredicatesComplements(*I, *J)) {
1286 // Not always safe to do this translation.
1287 // DAG Builder attempts to reduce dependence edges using transitive
1288 // nature of dependencies. Here is an example:
1289 //
1290 // r0 = tfr_pt ... (1)
1291 // r0 = tfr_pf ... (2)
1292 // r0 = tfr_pt ... (3)
1293 //
1294 // There will be an output dependence between (1)->(2) and (2)->(3).
1295 // However, there is no dependence edge between (1)->(3). This results
1296 // in all 3 instructions going in the same packet. We ignore dependce
1297 // only once to avoid this situation.
1298 auto Itr = std::find(IgnoreDepMIs.begin(), IgnoreDepMIs.end(), J);
1299 if (Itr != IgnoreDepMIs.end()) {
1300 Dependence = true;
1301 return false;
1302 }
1303 IgnoreDepMIs.push_back(I);
1304 continue;
1305 }
1306
1307 // Ignore Order dependences between unconditional direct branches
1308 // and non-control-flow instructions.
1309 if (isDirectJump(I) && !J->isBranch() && !J->isCall() &&
1310 DepType == SDep::Order)
1311 continue;
1312
1313 // Ignore all dependences for jumps except for true and output
1314 // dependences.
1315 if (I->isConditionalBranch() && DepType != SDep::Data &&
1316 DepType != SDep::Output)
1317 continue;
1318
1319 // Ignore output dependences due to superregs. We can write to two
1320 // different subregisters of R1:0 for instance in the same cycle.
1321
1322 // If neither I nor J defines DepReg, then this is a superfluous output
1323 // dependence. The dependence must be of the form:
1324 // R0 = ...
1325 // R1 = ...
1326 // and there is an output dependence between the two instructions with
1327 // DepReg = D0.
1328 // We want to ignore these dependences. Ideally, the dependence
1329 // constructor should annotate such dependences. We can then avoid this
1330 // relatively expensive check.
1331 //
1332 if (DepType == SDep::Output) {
1333 // DepReg is the register that's responsible for the dependence.
1334 unsigned DepReg = SUJ->Succs[i].getReg();
1335
1336 // Check if I and J really defines DepReg.
1337 if (!I->definesRegister(DepReg) && !J->definesRegister(DepReg))
1338 continue;
1339 FoundSequentialDependence = true;
1340 break;
1341 }
1342
1343 // For Order dependences:
1344 // 1. On V4 or later, volatile loads/stores can be packetized together,
1345 // unless other rules prevent is.
1346 // 2. Store followed by a load is not allowed.
1347 // 3. Store followed by a store is only valid on V4 or later.
1348 // 4. Load followed by any memory operation is allowed.
1349 if (DepType == SDep::Order) {
1350 if (!PacketizeVolatiles) {
1351 bool OrdRefs = I->hasOrderedMemoryRef() || J->hasOrderedMemoryRef();
1352 if (OrdRefs) {
1353 FoundSequentialDependence = true;
1354 break;
1355 }
1356 }
1357 // J is first, I is second.
1358 bool LoadJ = J->mayLoad(), StoreJ = J->mayStore();
1359 bool LoadI = I->mayLoad(), StoreI = I->mayStore();
1360 if (StoreJ) {
1361 // Two stores are only allowed on V4+. Load following store is never
1362 // allowed.
1363 if (LoadI) {
1364 FoundSequentialDependence = true;
1365 break;
1366 }
1367 } else if (!LoadJ || (!LoadI && !StoreI)) {
1368 // If J is neither load nor store, assume a dependency.
1369 // If J is a load, but I is neither, also assume a dependency.
1370 FoundSequentialDependence = true;
1371 break;
1372 }
1373 // Store followed by store: not OK on V2.
1374 // Store followed by load: not OK on all.
1375 // Load followed by store: OK on all.
1376 // Load followed by load: OK on all.
1377 continue;
1378 }
1379
1380 // For V4, special case ALLOCFRAME. Even though there is dependency
1381 // between ALLOCFRAME and subsequent store, allow it to be packetized
1382 // in a same packet. This implies that the store is using the caller's
1383 // SP. Hence, offset needs to be updated accordingly.
1384 if (DepType == SDep::Data && J->getOpcode() == Hexagon::S2_allocframe) {
1385 unsigned Opc = I->getOpcode();
1386 switch (Opc) {
1387 case Hexagon::S2_storerd_io:
1388 case Hexagon::S2_storeri_io:
1389 case Hexagon::S2_storerh_io:
1390 case Hexagon::S2_storerb_io:
1391 if (I->getOperand(0).getReg() == HRI->getStackRegister()) {
1392 int64_t Imm = I->getOperand(1).getImm();
1393 int64_t NewOff = Imm - (FrameSize + HEXAGON_LRFP_SIZE);
1394 if (HII->isValidOffset(Opc, NewOff)) {
1395 GlueAllocframeStore = true;
1396 // Since this store is to be glued with allocframe in the same
1397 // packet, it will use SP of the previous stack frame, i.e.
1398 // caller's SP. Therefore, we need to recalculate offset
1399 // according to this change.
1400 I->getOperand(1).setImm(NewOff);
1401 continue;
1402 }
1403 }
1404 default:
1405 break;
1406 }
1407 }
1408
1409 // There are certain anti-dependencies that cannot be ignored.
1410 // Specifically:
1411 // J2_call ... %R0<imp-def> ; SUJ
1412 // R0 = ... ; SUI
1413 // Those cannot be packetized together, since the call will observe
1414 // the effect of the assignment to R0.
1415 if (DepType == SDep::Anti && J->isCall()) {
1416 // Check if I defines any volatile register. We should also check
1417 // registers that the call may read, but these happen to be a
1418 // subset of the volatile register set.
1419 for (const MCPhysReg *P = J->getDesc().ImplicitDefs; P && *P; ++P) {
1420 if (!I->modifiesRegister(*P, HRI))
1421 continue;
1422 FoundSequentialDependence = true;
1423 break;
1424 }
1425 }
1426
1427 // Skip over remaining anti-dependences. Two instructions that are
1428 // anti-dependent can share a packet, since in most such cases all
1429 // operands are read before any modifications take place.
1430 // The exceptions are branch and call instructions, since they are
1431 // executed after all other instructions have completed (at least
1432 // conceptually).
1433 if (DepType != SDep::Anti) {
1434 FoundSequentialDependence = true;
1435 break;
1436 }
1437 }
1438
1439 if (FoundSequentialDependence) {
1440 Dependence = true;
1441 return false;
1442 }
1443
1444 return true;
1445 }
1446
isLegalToPruneDependencies(SUnit * SUI,SUnit * SUJ)1447 bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
1448 MachineInstr *I = SUI->getInstr();
1449 MachineInstr *J = SUJ->getInstr();
1450 assert(I && J && "Unable to packetize null instruction!");
1451
1452 if (cannotCoexist(I, J))
1453 return false;
1454
1455 if (!Dependence)
1456 return true;
1457
1458 // Check if the instruction was promoted to a dot-new. If so, demote it
1459 // back into a dot-old.
1460 if (PromotedToDotNew)
1461 demoteToDotOld(I);
1462
1463 cleanUpDotCur();
1464 // Check if the instruction (must be a store) was glued with an allocframe
1465 // instruction. If so, restore its offset to its original value, i.e. use
1466 // current SP instead of caller's SP.
1467 if (GlueAllocframeStore) {
1468 unsigned FrameSize = MF.getFrameInfo()->getStackSize();
1469 MachineOperand &MOff = I->getOperand(1);
1470 MOff.setImm(MOff.getImm() + FrameSize + HEXAGON_LRFP_SIZE);
1471 }
1472 return false;
1473 }
1474
1475 MachineBasicBlock::iterator
addToPacket(MachineInstr & MI)1476 HexagonPacketizerList::addToPacket(MachineInstr &MI) {
1477 MachineBasicBlock::iterator MII = MI;
1478 MachineBasicBlock *MBB = MI.getParent();
1479 if (MI.isImplicitDef()) {
1480 unsigned R = MI.getOperand(0).getReg();
1481 if (Hexagon::IntRegsRegClass.contains(R)) {
1482 MCSuperRegIterator S(R, HRI, false);
1483 MI.addOperand(MachineOperand::CreateReg(*S, true, true));
1484 }
1485 return MII;
1486 }
1487 assert(ResourceTracker->canReserveResources(MI));
1488
1489 bool ExtMI = HII->isExtended(&MI) || HII->isConstExtended(&MI);
1490 bool Good = true;
1491
1492 if (GlueToNewValueJump) {
1493 MachineInstr &NvjMI = *++MII;
1494 // We need to put both instructions in the same packet: MI and NvjMI.
1495 // Either of them can require a constant extender. Try to add both to
1496 // the current packet, and if that fails, end the packet and start a
1497 // new one.
1498 ResourceTracker->reserveResources(MI);
1499 if (ExtMI)
1500 Good = tryAllocateResourcesForConstExt(true);
1501
1502 bool ExtNvjMI = HII->isExtended(&NvjMI) || HII->isConstExtended(&NvjMI);
1503 if (Good) {
1504 if (ResourceTracker->canReserveResources(NvjMI))
1505 ResourceTracker->reserveResources(NvjMI);
1506 else
1507 Good = false;
1508 }
1509 if (Good && ExtNvjMI)
1510 Good = tryAllocateResourcesForConstExt(true);
1511
1512 if (!Good) {
1513 endPacket(MBB, MI);
1514 assert(ResourceTracker->canReserveResources(MI));
1515 ResourceTracker->reserveResources(MI);
1516 if (ExtMI) {
1517 assert(canReserveResourcesForConstExt());
1518 tryAllocateResourcesForConstExt(true);
1519 }
1520 assert(ResourceTracker->canReserveResources(NvjMI));
1521 ResourceTracker->reserveResources(NvjMI);
1522 if (ExtNvjMI) {
1523 assert(canReserveResourcesForConstExt());
1524 reserveResourcesForConstExt();
1525 }
1526 }
1527 CurrentPacketMIs.push_back(&MI);
1528 CurrentPacketMIs.push_back(&NvjMI);
1529 return MII;
1530 }
1531
1532 ResourceTracker->reserveResources(MI);
1533 if (ExtMI && !tryAllocateResourcesForConstExt(true)) {
1534 endPacket(MBB, MI);
1535 if (PromotedToDotNew)
1536 demoteToDotOld(&MI);
1537 ResourceTracker->reserveResources(MI);
1538 reserveResourcesForConstExt();
1539 }
1540
1541 CurrentPacketMIs.push_back(&MI);
1542 return MII;
1543 }
1544
endPacket(MachineBasicBlock * MBB,MachineBasicBlock::iterator MI)1545 void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
1546 MachineBasicBlock::iterator MI) {
1547 OldPacketMIs = CurrentPacketMIs;
1548 VLIWPacketizerList::endPacket(MBB, MI);
1549 }
1550
shouldAddToPacket(const MachineInstr & MI)1551 bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) {
1552 return !producesStall(&MI);
1553 }
1554
1555
1556 // Return true when ConsMI uses a register defined by ProdMI.
isDependent(const MachineInstr * ProdMI,const MachineInstr * ConsMI)1557 static bool isDependent(const MachineInstr *ProdMI,
1558 const MachineInstr *ConsMI) {
1559 if (!ProdMI->getOperand(0).isReg())
1560 return false;
1561 unsigned DstReg = ProdMI->getOperand(0).getReg();
1562
1563 for (auto &Op : ConsMI->operands())
1564 if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg)
1565 // The MIs depend on each other.
1566 return true;
1567
1568 return false;
1569 }
1570
1571 // V60 forward scheduling.
producesStall(const MachineInstr * I)1572 bool HexagonPacketizerList::producesStall(const MachineInstr *I) {
1573 // Check whether the previous packet is in a different loop. If this is the
1574 // case, there is little point in trying to avoid a stall because that would
1575 // favor the rare case (loop entry) over the common case (loop iteration).
1576 //
1577 // TODO: We should really be able to check all the incoming edges if this is
1578 // the first packet in a basic block, so we can avoid stalls from the loop
1579 // backedge.
1580 if (!OldPacketMIs.empty()) {
1581 auto *OldBB = OldPacketMIs.front()->getParent();
1582 auto *ThisBB = I->getParent();
1583 if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB))
1584 return false;
1585 }
1586
1587 // Check for stall between two vector instructions.
1588 if (HII->isV60VectorInstruction(I)) {
1589 for (auto J : OldPacketMIs) {
1590 if (!HII->isV60VectorInstruction(J))
1591 continue;
1592 if (isDependent(J, I) && !HII->isVecUsableNextPacket(J, I))
1593 return true;
1594 }
1595 return false;
1596 }
1597
1598 // Check for stall between two scalar instructions. First, check that
1599 // there is no definition of a use in the current packet, because it
1600 // may be a candidate for .new.
1601 for (auto J : CurrentPacketMIs)
1602 if (!HII->isV60VectorInstruction(J) && isDependent(J, I))
1603 return false;
1604
1605 // Check for stall between I and instructions in the previous packet.
1606 if (MF.getSubtarget<HexagonSubtarget>().useBSBScheduling()) {
1607 for (auto J : OldPacketMIs) {
1608 if (HII->isV60VectorInstruction(J))
1609 continue;
1610 if (!HII->isLateInstrFeedsEarlyInstr(J, I))
1611 continue;
1612 if (isDependent(J, I) && !HII->canExecuteInBundle(J, I))
1613 return true;
1614 }
1615 }
1616
1617 return false;
1618 }
1619
1620
1621 //===----------------------------------------------------------------------===//
1622 // Public Constructor Functions
1623 //===----------------------------------------------------------------------===//
1624
createHexagonPacketizer()1625 FunctionPass *llvm::createHexagonPacketizer() {
1626 return new HexagonPacketizer();
1627 }
1628