• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- R600MergeVectorRegisters.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass merges inputs of swizzeable instructions into vector sharing
11 /// common data and/or have enough undef subreg using swizzle abilities.
12 ///
13 /// For instance let's consider the following pseudo code :
14 /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3
15 /// ...
16 /// %7 = REG_SEQ %1, sub0, %3, sub1, undef, sub2, %4, sub3
17 /// (swizzable Inst) %7, SwizzleMask : sub0, sub1, sub2, sub3
18 ///
19 /// is turned into :
20 /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3
21 /// ...
22 /// %7 = INSERT_SUBREG %4, sub3
23 /// (swizzable Inst) %7, SwizzleMask : sub0, sub2, sub1, sub3
24 ///
25 /// This allow regalloc to reduce register pressure for vector registers and
26 /// to reduce MOV count.
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
30 #include "AMDGPUSubtarget.h"
31 #include "R600Defines.h"
32 #include "R600InstrInfo.h"
33 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/StringRef.h"
37 #include "llvm/CodeGen/MachineBasicBlock.h"
38 #include "llvm/CodeGen/MachineDominators.h"
39 #include "llvm/CodeGen/MachineFunction.h"
40 #include "llvm/CodeGen/MachineFunctionPass.h"
41 #include "llvm/CodeGen/MachineInstr.h"
42 #include "llvm/CodeGen/MachineInstrBuilder.h"
43 #include "llvm/CodeGen/MachineLoopInfo.h"
44 #include "llvm/CodeGen/MachineOperand.h"
45 #include "llvm/CodeGen/MachineRegisterInfo.h"
46 #include "llvm/IR/DebugLoc.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/Debug.h"
49 #include "llvm/Support/ErrorHandling.h"
50 #include "llvm/Support/raw_ostream.h"
51 #include <cassert>
52 #include <utility>
53 #include <vector>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "vec-merger"
58 
isImplicitlyDef(MachineRegisterInfo & MRI,unsigned Reg)59 static bool isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
60   assert(MRI.isSSA());
61   if (Register::isPhysicalRegister(Reg))
62     return false;
63   const MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
64   return MI && MI->isImplicitDef();
65 }
66 
67 namespace {
68 
69 class RegSeqInfo {
70 public:
71   MachineInstr *Instr;
72   DenseMap<unsigned, unsigned> RegToChan;
73   std::vector<unsigned> UndefReg;
74 
RegSeqInfo(MachineRegisterInfo & MRI,MachineInstr * MI)75   RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) {
76     assert(MI->getOpcode() == R600::REG_SEQUENCE);
77     for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) {
78       MachineOperand &MO = Instr->getOperand(i);
79       unsigned Chan = Instr->getOperand(i + 1).getImm();
80       if (isImplicitlyDef(MRI, MO.getReg()))
81         UndefReg.push_back(Chan);
82       else
83         RegToChan[MO.getReg()] = Chan;
84     }
85   }
86 
87   RegSeqInfo() = default;
88 
operator ==(const RegSeqInfo & RSI) const89   bool operator==(const RegSeqInfo &RSI) const {
90     return RSI.Instr == Instr;
91   }
92 };
93 
94 class R600VectorRegMerger : public MachineFunctionPass {
95 private:
96   using InstructionSetMap = DenseMap<unsigned, std::vector<MachineInstr *>>;
97 
98   MachineRegisterInfo *MRI;
99   const R600InstrInfo *TII = nullptr;
100   DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq;
101   InstructionSetMap PreviousRegSeqByReg;
102   InstructionSetMap PreviousRegSeqByUndefCount;
103 
104   bool canSwizzle(const MachineInstr &MI) const;
105   bool areAllUsesSwizzeable(unsigned Reg) const;
106   void SwizzleInput(MachineInstr &,
107       const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const;
108   bool tryMergeVector(const RegSeqInfo *Untouched, RegSeqInfo *ToMerge,
109       std::vector<std::pair<unsigned, unsigned>> &Remap) const;
110   bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
111       std::vector<std::pair<unsigned, unsigned>> &RemapChan);
112   bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
113       std::vector<std::pair<unsigned, unsigned>> &RemapChan);
114   MachineInstr *RebuildVector(RegSeqInfo *MI, const RegSeqInfo *BaseVec,
115       const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const;
116   void RemoveMI(MachineInstr *);
117   void trackRSI(const RegSeqInfo &RSI);
118 
119 public:
120   static char ID;
121 
R600VectorRegMerger()122   R600VectorRegMerger() : MachineFunctionPass(ID) {}
123 
getAnalysisUsage(AnalysisUsage & AU) const124   void getAnalysisUsage(AnalysisUsage &AU) const override {
125     AU.setPreservesCFG();
126     AU.addRequired<MachineDominatorTree>();
127     AU.addPreserved<MachineDominatorTree>();
128     AU.addRequired<MachineLoopInfo>();
129     AU.addPreserved<MachineLoopInfo>();
130     MachineFunctionPass::getAnalysisUsage(AU);
131   }
132 
getPassName() const133   StringRef getPassName() const override {
134     return "R600 Vector Registers Merge Pass";
135   }
136 
137   bool runOnMachineFunction(MachineFunction &Fn) override;
138 };
139 
140 } // end anonymous namespace
141 
142 INITIALIZE_PASS_BEGIN(R600VectorRegMerger, DEBUG_TYPE,
143                      "R600 Vector Reg Merger", false, false)
144 INITIALIZE_PASS_END(R600VectorRegMerger, DEBUG_TYPE,
145                     "R600 Vector Reg Merger", false, false)
146 
147 char R600VectorRegMerger::ID = 0;
148 
149 char &llvm::R600VectorRegMergerID = R600VectorRegMerger::ID;
150 
canSwizzle(const MachineInstr & MI) const151 bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI)
152     const {
153   if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
154     return true;
155   switch (MI.getOpcode()) {
156   case R600::R600_ExportSwz:
157   case R600::EG_ExportSwz:
158     return true;
159   default:
160     return false;
161   }
162 }
163 
tryMergeVector(const RegSeqInfo * Untouched,RegSeqInfo * ToMerge,std::vector<std::pair<unsigned,unsigned>> & Remap) const164 bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched,
165     RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned>> &Remap)
166     const {
167   unsigned CurrentUndexIdx = 0;
168   for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(),
169       E = ToMerge->RegToChan.end(); It != E; ++It) {
170     DenseMap<unsigned, unsigned>::const_iterator PosInUntouched =
171         Untouched->RegToChan.find((*It).first);
172     if (PosInUntouched != Untouched->RegToChan.end()) {
173       Remap.push_back(std::pair<unsigned, unsigned>
174           ((*It).second, (*PosInUntouched).second));
175       continue;
176     }
177     if (CurrentUndexIdx >= Untouched->UndefReg.size())
178       return false;
179     Remap.push_back(std::pair<unsigned, unsigned>
180         ((*It).second, Untouched->UndefReg[CurrentUndexIdx++]));
181   }
182 
183   return true;
184 }
185 
186 static
getReassignedChan(const std::vector<std::pair<unsigned,unsigned>> & RemapChan,unsigned Chan)187 unsigned getReassignedChan(
188     const std::vector<std::pair<unsigned, unsigned>> &RemapChan,
189     unsigned Chan) {
190   for (unsigned j = 0, je = RemapChan.size(); j < je; j++) {
191     if (RemapChan[j].first == Chan)
192       return RemapChan[j].second;
193   }
194   llvm_unreachable("Chan wasn't reassigned");
195 }
196 
RebuildVector(RegSeqInfo * RSI,const RegSeqInfo * BaseRSI,const std::vector<std::pair<unsigned,unsigned>> & RemapChan) const197 MachineInstr *R600VectorRegMerger::RebuildVector(
198     RegSeqInfo *RSI, const RegSeqInfo *BaseRSI,
199     const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const {
200   Register Reg = RSI->Instr->getOperand(0).getReg();
201   MachineBasicBlock::iterator Pos = RSI->Instr;
202   MachineBasicBlock &MBB = *Pos->getParent();
203   DebugLoc DL = Pos->getDebugLoc();
204 
205   Register SrcVec = BaseRSI->Instr->getOperand(0).getReg();
206   DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan;
207   std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg;
208   for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(),
209       E = RSI->RegToChan.end(); It != E; ++It) {
210     Register DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass);
211     unsigned SubReg = (*It).first;
212     unsigned Swizzle = (*It).second;
213     unsigned Chan = getReassignedChan(RemapChan, Swizzle);
214 
215     MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(R600::INSERT_SUBREG),
216         DstReg)
217         .addReg(SrcVec)
218         .addReg(SubReg)
219         .addImm(Chan);
220     UpdatedRegToChan[SubReg] = Chan;
221     std::vector<unsigned>::iterator ChanPos = llvm::find(UpdatedUndef, Chan);
222     if (ChanPos != UpdatedUndef.end())
223       UpdatedUndef.erase(ChanPos);
224     assert(!is_contained(UpdatedUndef, Chan) &&
225            "UpdatedUndef shouldn't contain Chan more than once!");
226     LLVM_DEBUG(dbgs() << "    ->"; Tmp->dump(););
227     (void)Tmp;
228     SrcVec = DstReg;
229   }
230   MachineInstr *NewMI =
231       BuildMI(MBB, Pos, DL, TII->get(R600::COPY), Reg).addReg(SrcVec);
232   LLVM_DEBUG(dbgs() << "    ->"; NewMI->dump(););
233 
234   LLVM_DEBUG(dbgs() << "  Updating Swizzle:\n");
235   for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
236       E = MRI->use_instr_end(); It != E; ++It) {
237     LLVM_DEBUG(dbgs() << "    "; (*It).dump(); dbgs() << "    ->");
238     SwizzleInput(*It, RemapChan);
239     LLVM_DEBUG((*It).dump());
240   }
241   RSI->Instr->eraseFromParent();
242 
243   // Update RSI
244   RSI->Instr = NewMI;
245   RSI->RegToChan = UpdatedRegToChan;
246   RSI->UndefReg = UpdatedUndef;
247 
248   return NewMI;
249 }
250 
RemoveMI(MachineInstr * MI)251 void R600VectorRegMerger::RemoveMI(MachineInstr *MI) {
252   for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(),
253       E = PreviousRegSeqByReg.end(); It != E; ++It) {
254     std::vector<MachineInstr *> &MIs = (*It).second;
255     MIs.erase(llvm::find(MIs, MI), MIs.end());
256   }
257   for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(),
258       E = PreviousRegSeqByUndefCount.end(); It != E; ++It) {
259     std::vector<MachineInstr *> &MIs = (*It).second;
260     MIs.erase(llvm::find(MIs, MI), MIs.end());
261   }
262 }
263 
SwizzleInput(MachineInstr & MI,const std::vector<std::pair<unsigned,unsigned>> & RemapChan) const264 void R600VectorRegMerger::SwizzleInput(MachineInstr &MI,
265     const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const {
266   unsigned Offset;
267   if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
268     Offset = 2;
269   else
270     Offset = 3;
271   for (unsigned i = 0; i < 4; i++) {
272     unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1;
273     for (unsigned j = 0, e = RemapChan.size(); j < e; j++) {
274       if (RemapChan[j].first == Swizzle) {
275         MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1);
276         break;
277       }
278     }
279   }
280 }
281 
areAllUsesSwizzeable(unsigned Reg) const282 bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const {
283   for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg),
284       E = MRI->use_instr_end(); It != E; ++It) {
285     if (!canSwizzle(*It))
286       return false;
287   }
288   return true;
289 }
290 
tryMergeUsingCommonSlot(RegSeqInfo & RSI,RegSeqInfo & CompatibleRSI,std::vector<std::pair<unsigned,unsigned>> & RemapChan)291 bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI,
292     RegSeqInfo &CompatibleRSI,
293     std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
294   for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(),
295       MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) {
296     if (!MOp->isReg())
297       continue;
298     if (PreviousRegSeqByReg[MOp->getReg()].empty())
299       continue;
300     for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) {
301       CompatibleRSI = PreviousRegSeq[MI];
302       if (RSI == CompatibleRSI)
303         continue;
304       if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan))
305         return true;
306     }
307   }
308   return false;
309 }
310 
tryMergeUsingFreeSlot(RegSeqInfo & RSI,RegSeqInfo & CompatibleRSI,std::vector<std::pair<unsigned,unsigned>> & RemapChan)311 bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI,
312     RegSeqInfo &CompatibleRSI,
313     std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
314   unsigned NeededUndefs = 4 - RSI.UndefReg.size();
315   if (PreviousRegSeqByUndefCount[NeededUndefs].empty())
316     return false;
317   std::vector<MachineInstr *> &MIs =
318       PreviousRegSeqByUndefCount[NeededUndefs];
319   CompatibleRSI = PreviousRegSeq[MIs.back()];
320   tryMergeVector(&CompatibleRSI, &RSI, RemapChan);
321   return true;
322 }
323 
trackRSI(const RegSeqInfo & RSI)324 void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) {
325   for (DenseMap<unsigned, unsigned>::const_iterator
326   It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) {
327     PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr);
328   }
329   PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr);
330   PreviousRegSeq[RSI.Instr] = RSI;
331 }
332 
runOnMachineFunction(MachineFunction & Fn)333 bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
334   if (skipFunction(Fn.getFunction()))
335     return false;
336 
337   const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
338   TII = ST.getInstrInfo();
339   MRI = &Fn.getRegInfo();
340 
341   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
342        MBB != MBBe; ++MBB) {
343     MachineBasicBlock *MB = &*MBB;
344     PreviousRegSeq.clear();
345     PreviousRegSeqByReg.clear();
346     PreviousRegSeqByUndefCount.clear();
347 
348     for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();
349          MII != MIIE; ++MII) {
350       MachineInstr &MI = *MII;
351       if (MI.getOpcode() != R600::REG_SEQUENCE) {
352         if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
353           Register Reg = MI.getOperand(1).getReg();
354           for (MachineRegisterInfo::def_instr_iterator
355                It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end();
356                It != E; ++It) {
357             RemoveMI(&(*It));
358           }
359         }
360         continue;
361       }
362 
363       RegSeqInfo RSI(*MRI, &MI);
364 
365       // All uses of MI are swizzeable ?
366       Register Reg = MI.getOperand(0).getReg();
367       if (!areAllUsesSwizzeable(Reg))
368         continue;
369 
370       LLVM_DEBUG({
371         dbgs() << "Trying to optimize ";
372         MI.dump();
373       });
374 
375       RegSeqInfo CandidateRSI;
376       std::vector<std::pair<unsigned, unsigned>> RemapChan;
377       LLVM_DEBUG(dbgs() << "Using common slots...\n";);
378       if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) {
379         // Remove CandidateRSI mapping
380         RemoveMI(CandidateRSI.Instr);
381         MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
382         trackRSI(RSI);
383         continue;
384       }
385       LLVM_DEBUG(dbgs() << "Using free slots...\n";);
386       RemapChan.clear();
387       if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) {
388         RemoveMI(CandidateRSI.Instr);
389         MII = RebuildVector(&RSI, &CandidateRSI, RemapChan);
390         trackRSI(RSI);
391         continue;
392       }
393       //Failed to merge
394       trackRSI(RSI);
395     }
396   }
397   return false;
398 }
399 
createR600VectorRegMerger()400 llvm::FunctionPass *llvm::createR600VectorRegMerger() {
401   return new R600VectorRegMerger();
402 }
403