1 //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Pass to pre-allocated WWM registers
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPU.h"
15 #include "AMDGPUSubtarget.h"
16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "SIRegisterInfo.h"
20 #include "llvm/ADT/PostOrderIterator.h"
21 #include "llvm/CodeGen/LiveInterval.h"
22 #include "llvm/CodeGen/LiveIntervals.h"
23 #include "llvm/CodeGen/LiveRegMatrix.h"
24 #include "llvm/CodeGen/MachineDominators.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/RegisterClassInfo.h"
27 #include "llvm/CodeGen/VirtRegMap.h"
28 #include "llvm/InitializePasses.h"
29
30 using namespace llvm;
31
32 #define DEBUG_TYPE "si-pre-allocate-wwm-regs"
33
34 namespace {
35
36 class SIPreAllocateWWMRegs : public MachineFunctionPass {
37 private:
38 const SIInstrInfo *TII;
39 const SIRegisterInfo *TRI;
40 MachineRegisterInfo *MRI;
41 LiveIntervals *LIS;
42 LiveRegMatrix *Matrix;
43 VirtRegMap *VRM;
44 RegisterClassInfo RegClassInfo;
45
46 std::vector<unsigned> RegsToRewrite;
47
48 public:
49 static char ID;
50
SIPreAllocateWWMRegs()51 SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
52 initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
53 }
54
55 bool runOnMachineFunction(MachineFunction &MF) override;
56
getAnalysisUsage(AnalysisUsage & AU) const57 void getAnalysisUsage(AnalysisUsage &AU) const override {
58 AU.addRequired<LiveIntervals>();
59 AU.addPreserved<LiveIntervals>();
60 AU.addRequired<VirtRegMap>();
61 AU.addRequired<LiveRegMatrix>();
62 AU.addPreserved<SlotIndexes>();
63 AU.setPreservesCFG();
64 MachineFunctionPass::getAnalysisUsage(AU);
65 }
66
67 private:
68 bool processDef(MachineOperand &MO);
69 void rewriteRegs(MachineFunction &MF);
70 };
71
72 } // End anonymous namespace.
73
74 INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
75 "SI Pre-allocate WWM Registers", false, false)
76 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
77 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
78 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
79 INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
80 "SI Pre-allocate WWM Registers", false, false)
81
82 char SIPreAllocateWWMRegs::ID = 0;
83
84 char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
85
createSIPreAllocateWWMRegsPass()86 FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
87 return new SIPreAllocateWWMRegs();
88 }
89
processDef(MachineOperand & MO)90 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
91 if (!MO.isReg())
92 return false;
93
94 Register Reg = MO.getReg();
95
96 if (!TRI->isVGPR(*MRI, Reg))
97 return false;
98
99 if (Register::isPhysicalRegister(Reg))
100 return false;
101
102 if (VRM->hasPhys(Reg))
103 return false;
104
105 LiveInterval &LI = LIS->getInterval(Reg);
106
107 for (unsigned PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
108 if (!MRI->isPhysRegUsed(PhysReg) &&
109 Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
110 Matrix->assign(LI, PhysReg);
111 assert(PhysReg != 0);
112 RegsToRewrite.push_back(Reg);
113 return true;
114 }
115 }
116
117 llvm_unreachable("physreg not found for WWM expression");
118 return false;
119 }
120
rewriteRegs(MachineFunction & MF)121 void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
122 for (MachineBasicBlock &MBB : MF) {
123 for (MachineInstr &MI : MBB) {
124 for (MachineOperand &MO : MI.operands()) {
125 if (!MO.isReg())
126 continue;
127
128 const Register VirtReg = MO.getReg();
129 if (Register::isPhysicalRegister(VirtReg))
130 continue;
131
132 if (!VRM->hasPhys(VirtReg))
133 continue;
134
135 Register PhysReg = VRM->getPhys(VirtReg);
136 const unsigned SubReg = MO.getSubReg();
137 if (SubReg != 0) {
138 PhysReg = TRI->getSubReg(PhysReg, SubReg);
139 MO.setSubReg(0);
140 }
141
142 MO.setReg(PhysReg);
143 MO.setIsRenamable(false);
144 }
145 }
146 }
147
148 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
149
150 for (unsigned Reg : RegsToRewrite) {
151 LIS->removeInterval(Reg);
152
153 const Register PhysReg = VRM->getPhys(Reg);
154 assert(PhysReg != 0);
155 MFI->ReserveWWMRegister(PhysReg);
156 }
157
158 RegsToRewrite.clear();
159
160 // Update the set of reserved registers to include WWM ones.
161 MRI->freezeReservedRegs(MF);
162 }
163
runOnMachineFunction(MachineFunction & MF)164 bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
165 LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
166
167 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
168
169 TII = ST.getInstrInfo();
170 TRI = &TII->getRegisterInfo();
171 MRI = &MF.getRegInfo();
172
173 LIS = &getAnalysis<LiveIntervals>();
174 Matrix = &getAnalysis<LiveRegMatrix>();
175 VRM = &getAnalysis<VirtRegMap>();
176
177 RegClassInfo.runOnMachineFunction(MF);
178
179 bool RegsAssigned = false;
180
181 // We use a reverse post-order traversal of the control-flow graph to
182 // guarantee that we visit definitions in dominance order. Since WWM
183 // expressions are guaranteed to never involve phi nodes, and we can only
184 // escape WWM through the special WWM instruction, this means that this is a
185 // perfect elimination order, so we can never do any better.
186 ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
187
188 for (MachineBasicBlock *MBB : RPOT) {
189 bool InWWM = false;
190 for (MachineInstr &MI : *MBB) {
191 if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
192 MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
193 RegsAssigned |= processDef(MI.getOperand(0));
194
195 if (MI.getOpcode() == AMDGPU::ENTER_WWM) {
196 LLVM_DEBUG(dbgs() << "entering WWM region: " << MI << "\n");
197 InWWM = true;
198 continue;
199 }
200
201 if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
202 LLVM_DEBUG(dbgs() << "exiting WWM region: " << MI << "\n");
203 InWWM = false;
204 }
205
206 if (!InWWM)
207 continue;
208
209 LLVM_DEBUG(dbgs() << "processing " << MI << "\n");
210
211 for (MachineOperand &DefOpnd : MI.defs()) {
212 RegsAssigned |= processDef(DefOpnd);
213 }
214 }
215 }
216
217 if (!RegsAssigned)
218 return false;
219
220 rewriteRegs(MF);
221 return true;
222 }
223