1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The purpose of this pass is to employ a canonical code transformation so
10 // that code compiled with slightly different IR passes can be diffed more
11 // effectively than otherwise. This is done by renaming vregs in a given
12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13 // move defs closer to their use inorder to reduce diffs caused by slightly
14 // different schedules.
15 //
16 // Basic Usage:
17 //
18 // llc -o - -run-pass mir-canonicalizer example.mir
19 //
20 // Reorders instructions canonically.
21 // Renames virtual register operands canonically.
22 // Strips certain MIR artifacts (optionally).
23 //
24 //===----------------------------------------------------------------------===//
25
26 #include "MIRVRegNamerUtils.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
36
37 #include <queue>
38
39 using namespace llvm;
40
41 namespace llvm {
42 extern char &MIRCanonicalizerID;
43 } // namespace llvm
44
45 #define DEBUG_TYPE "mir-canonicalizer"
46
47 static cl::opt<unsigned>
48 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
49 cl::value_desc("N"),
50 cl::desc("Function number to canonicalize."));
51
52 namespace {
53
54 class MIRCanonicalizer : public MachineFunctionPass {
55 public:
56 static char ID;
MIRCanonicalizer()57 MIRCanonicalizer() : MachineFunctionPass(ID) {}
58
getPassName() const59 StringRef getPassName() const override {
60 return "Rename register operands in a canonical ordering.";
61 }
62
getAnalysisUsage(AnalysisUsage & AU) const63 void getAnalysisUsage(AnalysisUsage &AU) const override {
64 AU.setPreservesCFG();
65 MachineFunctionPass::getAnalysisUsage(AU);
66 }
67
68 bool runOnMachineFunction(MachineFunction &MF) override;
69 };
70
71 } // end anonymous namespace
72
73 char MIRCanonicalizer::ID;
74
75 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
76
77 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
78 "Rename Register Operands Canonically", false, false)
79
80 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
81 "Rename Register Operands Canonically", false, false)
82
GetRPOList(MachineFunction & MF)83 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
84 if (MF.empty())
85 return {};
86 ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
87 std::vector<MachineBasicBlock *> RPOList;
88 for (auto MBB : RPOT) {
89 RPOList.push_back(MBB);
90 }
91
92 return RPOList;
93 }
94
95 static bool
rescheduleLexographically(std::vector<MachineInstr * > instructions,MachineBasicBlock * MBB,std::function<MachineBasicBlock::iterator ()> getPos)96 rescheduleLexographically(std::vector<MachineInstr *> instructions,
97 MachineBasicBlock *MBB,
98 std::function<MachineBasicBlock::iterator()> getPos) {
99
100 bool Changed = false;
101 using StringInstrPair = std::pair<std::string, MachineInstr *>;
102 std::vector<StringInstrPair> StringInstrMap;
103
104 for (auto *II : instructions) {
105 std::string S;
106 raw_string_ostream OS(S);
107 II->print(OS);
108 OS.flush();
109
110 // Trim the assignment, or start from the begining in the case of a store.
111 const size_t i = S.find("=");
112 StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
113 }
114
115 llvm::sort(StringInstrMap,
116 [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
117 return (a.first < b.first);
118 });
119
120 for (auto &II : StringInstrMap) {
121
122 LLVM_DEBUG({
123 dbgs() << "Splicing ";
124 II.second->dump();
125 dbgs() << " right before: ";
126 getPos()->dump();
127 });
128
129 Changed = true;
130 MBB->splice(getPos(), MBB, II.second);
131 }
132
133 return Changed;
134 }
135
rescheduleCanonically(unsigned & PseudoIdempotentInstCount,MachineBasicBlock * MBB)136 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
137 MachineBasicBlock *MBB) {
138
139 bool Changed = false;
140
141 // Calculates the distance of MI from the begining of its parent BB.
142 auto getInstrIdx = [](const MachineInstr &MI) {
143 unsigned i = 0;
144 for (auto &CurMI : *MI.getParent()) {
145 if (&CurMI == &MI)
146 return i;
147 i++;
148 }
149 return ~0U;
150 };
151
152 // Pre-Populate vector of instructions to reschedule so that we don't
153 // clobber the iterator.
154 std::vector<MachineInstr *> Instructions;
155 for (auto &MI : *MBB) {
156 Instructions.push_back(&MI);
157 }
158
159 std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
160 std::map<unsigned, MachineInstr *> MultiUserLookup;
161 unsigned UseToBringDefCloserToCount = 0;
162 std::vector<MachineInstr *> PseudoIdempotentInstructions;
163 std::vector<unsigned> PhysRegDefs;
164 for (auto *II : Instructions) {
165 for (unsigned i = 1; i < II->getNumOperands(); i++) {
166 MachineOperand &MO = II->getOperand(i);
167 if (!MO.isReg())
168 continue;
169
170 if (Register::isVirtualRegister(MO.getReg()))
171 continue;
172
173 if (!MO.isDef())
174 continue;
175
176 PhysRegDefs.push_back(MO.getReg());
177 }
178 }
179
180 for (auto *II : Instructions) {
181 if (II->getNumOperands() == 0)
182 continue;
183 if (II->mayLoadOrStore())
184 continue;
185
186 MachineOperand &MO = II->getOperand(0);
187 if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
188 continue;
189 if (!MO.isDef())
190 continue;
191
192 bool IsPseudoIdempotent = true;
193 for (unsigned i = 1; i < II->getNumOperands(); i++) {
194
195 if (II->getOperand(i).isImm()) {
196 continue;
197 }
198
199 if (II->getOperand(i).isReg()) {
200 if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
201 if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) ==
202 PhysRegDefs.end()) {
203 continue;
204 }
205 }
206
207 IsPseudoIdempotent = false;
208 break;
209 }
210
211 if (IsPseudoIdempotent) {
212 PseudoIdempotentInstructions.push_back(II);
213 continue;
214 }
215
216 LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
217
218 MachineInstr *Def = II;
219 unsigned Distance = ~0U;
220 MachineInstr *UseToBringDefCloserTo = nullptr;
221 MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
222 for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
223 MachineInstr *UseInst = UO.getParent();
224
225 const unsigned DefLoc = getInstrIdx(*Def);
226 const unsigned UseLoc = getInstrIdx(*UseInst);
227 const unsigned Delta = (UseLoc - DefLoc);
228
229 if (UseInst->getParent() != Def->getParent())
230 continue;
231 if (DefLoc >= UseLoc)
232 continue;
233
234 if (Delta < Distance) {
235 Distance = Delta;
236 UseToBringDefCloserTo = UseInst;
237 MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
238 }
239 }
240
241 const auto BBE = MBB->instr_end();
242 MachineBasicBlock::iterator DefI = BBE;
243 MachineBasicBlock::iterator UseI = BBE;
244
245 for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
246
247 if (DefI != BBE && UseI != BBE)
248 break;
249
250 if (&*BBI == Def) {
251 DefI = BBI;
252 continue;
253 }
254
255 if (&*BBI == UseToBringDefCloserTo) {
256 UseI = BBI;
257 continue;
258 }
259 }
260
261 if (DefI == BBE || UseI == BBE)
262 continue;
263
264 LLVM_DEBUG({
265 dbgs() << "Splicing ";
266 DefI->dump();
267 dbgs() << " right before: ";
268 UseI->dump();
269 });
270
271 MultiUsers[UseToBringDefCloserTo].push_back(Def);
272 Changed = true;
273 MBB->splice(UseI, MBB, DefI);
274 }
275
276 // Sort the defs for users of multiple defs lexographically.
277 for (const auto &E : MultiUserLookup) {
278
279 auto UseI =
280 std::find_if(MBB->instr_begin(), MBB->instr_end(),
281 [&](MachineInstr &MI) -> bool { return &MI == E.second; });
282
283 if (UseI == MBB->instr_end())
284 continue;
285
286 LLVM_DEBUG(
287 dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
288 Changed |= rescheduleLexographically(
289 MultiUsers[E.second], MBB,
290 [&]() -> MachineBasicBlock::iterator { return UseI; });
291 }
292
293 PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
294 LLVM_DEBUG(
295 dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
296 Changed |= rescheduleLexographically(
297 PseudoIdempotentInstructions, MBB,
298 [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
299
300 return Changed;
301 }
302
propagateLocalCopies(MachineBasicBlock * MBB)303 static bool propagateLocalCopies(MachineBasicBlock *MBB) {
304 bool Changed = false;
305 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
306
307 std::vector<MachineInstr *> Copies;
308 for (MachineInstr &MI : MBB->instrs()) {
309 if (MI.isCopy())
310 Copies.push_back(&MI);
311 }
312
313 for (MachineInstr *MI : Copies) {
314
315 if (!MI->getOperand(0).isReg())
316 continue;
317 if (!MI->getOperand(1).isReg())
318 continue;
319
320 const Register Dst = MI->getOperand(0).getReg();
321 const Register Src = MI->getOperand(1).getReg();
322
323 if (!Register::isVirtualRegister(Dst))
324 continue;
325 if (!Register::isVirtualRegister(Src))
326 continue;
327 // Not folding COPY instructions if regbankselect has not set the RCs.
328 // Why are we only considering Register Classes? Because the verifier
329 // sometimes gets upset if the register classes don't match even if the
330 // types do. A future patch might add COPY folding for matching types in
331 // pre-registerbankselect code.
332 if (!MRI.getRegClassOrNull(Dst))
333 continue;
334 if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
335 continue;
336
337 std::vector<MachineOperand *> Uses;
338 for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI)
339 Uses.push_back(&*UI);
340 for (auto *MO : Uses)
341 MO->setReg(Src);
342
343 Changed = true;
344 MI->eraseFromParent();
345 }
346
347 return Changed;
348 }
349
doDefKillClear(MachineBasicBlock * MBB)350 static bool doDefKillClear(MachineBasicBlock *MBB) {
351 bool Changed = false;
352
353 for (auto &MI : *MBB) {
354 for (auto &MO : MI.operands()) {
355 if (!MO.isReg())
356 continue;
357 if (!MO.isDef() && MO.isKill()) {
358 Changed = true;
359 MO.setIsKill(false);
360 }
361
362 if (MO.isDef() && MO.isDead()) {
363 Changed = true;
364 MO.setIsDead(false);
365 }
366 }
367 }
368
369 return Changed;
370 }
371
runOnBasicBlock(MachineBasicBlock * MBB,unsigned BasicBlockNum,VRegRenamer & Renamer)372 static bool runOnBasicBlock(MachineBasicBlock *MBB,
373 unsigned BasicBlockNum, VRegRenamer &Renamer) {
374 LLVM_DEBUG({
375 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
376 dbgs() << "\n\n================================================\n\n";
377 });
378
379 bool Changed = false;
380
381 LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
382
383 LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
384 MBB->dump(););
385 Changed |= propagateLocalCopies(MBB);
386 LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
387
388 LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
389 unsigned IdempotentInstCount = 0;
390 Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
391 LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
392
393 Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
394
395 // TODO: Consider dropping this. Dropping kill defs is probably not
396 // semantically sound.
397 Changed |= doDefKillClear(MBB);
398
399 LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
400 dbgs() << "\n";);
401 LLVM_DEBUG(
402 dbgs() << "\n\n================================================\n\n");
403 return Changed;
404 }
405
runOnMachineFunction(MachineFunction & MF)406 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
407
408 static unsigned functionNum = 0;
409 if (CanonicalizeFunctionNumber != ~0U) {
410 if (CanonicalizeFunctionNumber != functionNum++)
411 return false;
412 LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
413 << "\n";);
414 }
415
416 // we need a valid vreg to create a vreg type for skipping all those
417 // stray vreg numbers so reach alignment/canonical vreg values.
418 std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
419
420 LLVM_DEBUG(
421 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
422 dbgs() << "\n\n================================================\n\n";
423 dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
424 for (auto MBB
425 : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
426 << "\n\n================================================\n\n";);
427
428 unsigned BBNum = 0;
429 bool Changed = false;
430 MachineRegisterInfo &MRI = MF.getRegInfo();
431 VRegRenamer Renamer(MRI);
432 for (auto MBB : RPOList)
433 Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
434
435 return Changed;
436 }
437