1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12 /// This pass is merging consecutive CFAlus where applicable.
13 /// It needs to be called after IfCvt for best results.
14 //===----------------------------------------------------------------------===//
15
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "R600Defines.h"
19 #include "R600InstrInfo.h"
20 #include "R600MachineFunctionInfo.h"
21 #include "R600RegisterInfo.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
27
28 using namespace llvm;
29
30 #define DEBUG_TYPE "r600mergeclause"
31
32 namespace {
33
isCFAlu(const MachineInstr & MI)34 static bool isCFAlu(const MachineInstr &MI) {
35 switch (MI.getOpcode()) {
36 case AMDGPU::CF_ALU:
37 case AMDGPU::CF_ALU_PUSH_BEFORE:
38 return true;
39 default:
40 return false;
41 }
42 }
43
44 class R600ClauseMergePass : public MachineFunctionPass {
45
46 private:
47 static char ID;
48 const R600InstrInfo *TII;
49
50 unsigned getCFAluSize(const MachineInstr &MI) const;
51 bool isCFAluEnabled(const MachineInstr &MI) const;
52
53 /// IfCvt pass can generate "disabled" ALU clause marker that need to be
54 /// removed and their content affected to the previous alu clause.
55 /// This function parse instructions after CFAlu until it find a disabled
56 /// CFAlu and merge the content, or an enabled CFAlu.
57 void cleanPotentialDisabledCFAlu(MachineInstr &CFAlu) const;
58
59 /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
60 /// it is the case.
61 bool mergeIfPossible(MachineInstr &RootCFAlu,
62 const MachineInstr &LatrCFAlu) const;
63
64 public:
R600ClauseMergePass(TargetMachine & tm)65 R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
66
67 bool runOnMachineFunction(MachineFunction &MF) override;
68
69 const char *getPassName() const override;
70 };
71
72 char R600ClauseMergePass::ID = 0;
73
getCFAluSize(const MachineInstr & MI) const74 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
75 assert(isCFAlu(MI));
76 return MI
77 .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT))
78 .getImm();
79 }
80
isCFAluEnabled(const MachineInstr & MI) const81 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
82 assert(isCFAlu(MI));
83 return MI
84 .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled))
85 .getImm();
86 }
87
cleanPotentialDisabledCFAlu(MachineInstr & CFAlu) const88 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
89 MachineInstr &CFAlu) const {
90 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
91 MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
92 I++;
93 do {
94 while (I != E && !isCFAlu(*I))
95 I++;
96 if (I == E)
97 return;
98 MachineInstr &MI = *I++;
99 if (isCFAluEnabled(MI))
100 break;
101 CFAlu.getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
102 MI.eraseFromParent();
103 } while (I != E);
104 }
105
mergeIfPossible(MachineInstr & RootCFAlu,const MachineInstr & LatrCFAlu) const106 bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
107 const MachineInstr &LatrCFAlu) const {
108 assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
109 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
110 unsigned RootInstCount = getCFAluSize(RootCFAlu),
111 LaterInstCount = getCFAluSize(LatrCFAlu);
112 unsigned CumuledInsts = RootInstCount + LaterInstCount;
113 if (CumuledInsts >= TII->getMaxAlusPerClause()) {
114 DEBUG(dbgs() << "Excess inst counts\n");
115 return false;
116 }
117 if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
118 return false;
119 // Is KCache Bank 0 compatible ?
120 int Mode0Idx =
121 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
122 int KBank0Idx =
123 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
124 int KBank0LineIdx =
125 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
126 if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
127 RootCFAlu.getOperand(Mode0Idx).getImm() &&
128 (LatrCFAlu.getOperand(KBank0Idx).getImm() !=
129 RootCFAlu.getOperand(KBank0Idx).getImm() ||
130 LatrCFAlu.getOperand(KBank0LineIdx).getImm() !=
131 RootCFAlu.getOperand(KBank0LineIdx).getImm())) {
132 DEBUG(dbgs() << "Wrong KC0\n");
133 return false;
134 }
135 // Is KCache Bank 1 compatible ?
136 int Mode1Idx =
137 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
138 int KBank1Idx =
139 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
140 int KBank1LineIdx =
141 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
142 if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
143 RootCFAlu.getOperand(Mode1Idx).getImm() &&
144 (LatrCFAlu.getOperand(KBank1Idx).getImm() !=
145 RootCFAlu.getOperand(KBank1Idx).getImm() ||
146 LatrCFAlu.getOperand(KBank1LineIdx).getImm() !=
147 RootCFAlu.getOperand(KBank1LineIdx).getImm())) {
148 DEBUG(dbgs() << "Wrong KC0\n");
149 return false;
150 }
151 if (LatrCFAlu.getOperand(Mode0Idx).getImm()) {
152 RootCFAlu.getOperand(Mode0Idx).setImm(
153 LatrCFAlu.getOperand(Mode0Idx).getImm());
154 RootCFAlu.getOperand(KBank0Idx).setImm(
155 LatrCFAlu.getOperand(KBank0Idx).getImm());
156 RootCFAlu.getOperand(KBank0LineIdx)
157 .setImm(LatrCFAlu.getOperand(KBank0LineIdx).getImm());
158 }
159 if (LatrCFAlu.getOperand(Mode1Idx).getImm()) {
160 RootCFAlu.getOperand(Mode1Idx).setImm(
161 LatrCFAlu.getOperand(Mode1Idx).getImm());
162 RootCFAlu.getOperand(KBank1Idx).setImm(
163 LatrCFAlu.getOperand(KBank1Idx).getImm());
164 RootCFAlu.getOperand(KBank1LineIdx)
165 .setImm(LatrCFAlu.getOperand(KBank1LineIdx).getImm());
166 }
167 RootCFAlu.getOperand(CntIdx).setImm(CumuledInsts);
168 RootCFAlu.setDesc(TII->get(LatrCFAlu.getOpcode()));
169 return true;
170 }
171
runOnMachineFunction(MachineFunction & MF)172 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
173 if (skipFunction(*MF.getFunction()))
174 return false;
175
176 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
177 TII = ST.getInstrInfo();
178
179 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
180 BB != BB_E; ++BB) {
181 MachineBasicBlock &MBB = *BB;
182 MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
183 MachineBasicBlock::iterator LatestCFAlu = E;
184 while (I != E) {
185 MachineInstr &MI = *I++;
186 if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
187 TII->mustBeLastInClause(MI.getOpcode()))
188 LatestCFAlu = E;
189 if (!isCFAlu(MI))
190 continue;
191 cleanPotentialDisabledCFAlu(MI);
192
193 if (LatestCFAlu != E && mergeIfPossible(*LatestCFAlu, MI)) {
194 MI.eraseFromParent();
195 } else {
196 assert(MI.getOperand(8).getImm() && "CF ALU instruction disabled");
197 LatestCFAlu = MI;
198 }
199 }
200 }
201 return false;
202 }
203
getPassName() const204 const char *R600ClauseMergePass::getPassName() const {
205 return "R600 Merge Clause Markers Pass";
206 }
207
208 } // end anonymous namespace
209
210
createR600ClauseMergePass(TargetMachine & TM)211 llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
212 return new R600ClauseMergePass(TM);
213 }
214