• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The purpose of this pass is to employ a canonical code transformation so
10 // that code compiled with slightly different IR passes can be diffed more
11 // effectively than otherwise. This is done by renaming vregs in a given
12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13 // move defs closer to their use inorder to reduce diffs caused by slightly
14 // different schedules.
15 //
16 // Basic Usage:
17 //
18 // llc -o - -run-pass mir-canonicalizer example.mir
19 //
20 // Reorders instructions canonically.
21 // Renames virtual register operands canonically.
22 // Strips certain MIR artifacts (optionally).
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "MIRVRegNamerUtils.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
36 
37 #include <queue>
38 
39 using namespace llvm;
40 
41 namespace llvm {
42 extern char &MIRCanonicalizerID;
43 } // namespace llvm
44 
45 #define DEBUG_TYPE "mir-canonicalizer"
46 
47 static cl::opt<unsigned>
48     CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
49                                cl::value_desc("N"),
50                                cl::desc("Function number to canonicalize."));
51 
52 namespace {
53 
54 class MIRCanonicalizer : public MachineFunctionPass {
55 public:
56   static char ID;
MIRCanonicalizer()57   MIRCanonicalizer() : MachineFunctionPass(ID) {}
58 
getPassName() const59   StringRef getPassName() const override {
60     return "Rename register operands in a canonical ordering.";
61   }
62 
getAnalysisUsage(AnalysisUsage & AU) const63   void getAnalysisUsage(AnalysisUsage &AU) const override {
64     AU.setPreservesCFG();
65     MachineFunctionPass::getAnalysisUsage(AU);
66   }
67 
68   bool runOnMachineFunction(MachineFunction &MF) override;
69 };
70 
71 } // end anonymous namespace
72 
73 char MIRCanonicalizer::ID;
74 
75 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
76 
77 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
78                       "Rename Register Operands Canonically", false, false)
79 
80 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
81                     "Rename Register Operands Canonically", false, false)
82 
GetRPOList(MachineFunction & MF)83 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
84   if (MF.empty())
85     return {};
86   ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
87   std::vector<MachineBasicBlock *> RPOList;
88   for (auto MBB : RPOT) {
89     RPOList.push_back(MBB);
90   }
91 
92   return RPOList;
93 }
94 
95 static bool
rescheduleLexographically(std::vector<MachineInstr * > instructions,MachineBasicBlock * MBB,std::function<MachineBasicBlock::iterator ()> getPos)96 rescheduleLexographically(std::vector<MachineInstr *> instructions,
97                           MachineBasicBlock *MBB,
98                           std::function<MachineBasicBlock::iterator()> getPos) {
99 
100   bool Changed = false;
101   using StringInstrPair = std::pair<std::string, MachineInstr *>;
102   std::vector<StringInstrPair> StringInstrMap;
103 
104   for (auto *II : instructions) {
105     std::string S;
106     raw_string_ostream OS(S);
107     II->print(OS);
108     OS.flush();
109 
110     // Trim the assignment, or start from the begining in the case of a store.
111     const size_t i = S.find("=");
112     StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
113   }
114 
115   llvm::sort(StringInstrMap,
116              [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
117                return (a.first < b.first);
118              });
119 
120   for (auto &II : StringInstrMap) {
121 
122     LLVM_DEBUG({
123       dbgs() << "Splicing ";
124       II.second->dump();
125       dbgs() << " right before: ";
126       getPos()->dump();
127     });
128 
129     Changed = true;
130     MBB->splice(getPos(), MBB, II.second);
131   }
132 
133   return Changed;
134 }
135 
rescheduleCanonically(unsigned & PseudoIdempotentInstCount,MachineBasicBlock * MBB)136 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
137                                   MachineBasicBlock *MBB) {
138 
139   bool Changed = false;
140 
141   // Calculates the distance of MI from the begining of its parent BB.
142   auto getInstrIdx = [](const MachineInstr &MI) {
143     unsigned i = 0;
144     for (auto &CurMI : *MI.getParent()) {
145       if (&CurMI == &MI)
146         return i;
147       i++;
148     }
149     return ~0U;
150   };
151 
152   // Pre-Populate vector of instructions to reschedule so that we don't
153   // clobber the iterator.
154   std::vector<MachineInstr *> Instructions;
155   for (auto &MI : *MBB) {
156     Instructions.push_back(&MI);
157   }
158 
159   std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
160   std::map<unsigned, MachineInstr *> MultiUserLookup;
161   unsigned UseToBringDefCloserToCount = 0;
162   std::vector<MachineInstr *> PseudoIdempotentInstructions;
163   std::vector<unsigned> PhysRegDefs;
164   for (auto *II : Instructions) {
165     for (unsigned i = 1; i < II->getNumOperands(); i++) {
166       MachineOperand &MO = II->getOperand(i);
167       if (!MO.isReg())
168         continue;
169 
170       if (Register::isVirtualRegister(MO.getReg()))
171         continue;
172 
173       if (!MO.isDef())
174         continue;
175 
176       PhysRegDefs.push_back(MO.getReg());
177     }
178   }
179 
180   for (auto *II : Instructions) {
181     if (II->getNumOperands() == 0)
182       continue;
183     if (II->mayLoadOrStore())
184       continue;
185 
186     MachineOperand &MO = II->getOperand(0);
187     if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
188       continue;
189     if (!MO.isDef())
190       continue;
191 
192     bool IsPseudoIdempotent = true;
193     for (unsigned i = 1; i < II->getNumOperands(); i++) {
194 
195       if (II->getOperand(i).isImm()) {
196         continue;
197       }
198 
199       if (II->getOperand(i).isReg()) {
200         if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
201           if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) ==
202               PhysRegDefs.end()) {
203             continue;
204           }
205       }
206 
207       IsPseudoIdempotent = false;
208       break;
209     }
210 
211     if (IsPseudoIdempotent) {
212       PseudoIdempotentInstructions.push_back(II);
213       continue;
214     }
215 
216     LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
217 
218     MachineInstr *Def = II;
219     unsigned Distance = ~0U;
220     MachineInstr *UseToBringDefCloserTo = nullptr;
221     MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
222     for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
223       MachineInstr *UseInst = UO.getParent();
224 
225       const unsigned DefLoc = getInstrIdx(*Def);
226       const unsigned UseLoc = getInstrIdx(*UseInst);
227       const unsigned Delta = (UseLoc - DefLoc);
228 
229       if (UseInst->getParent() != Def->getParent())
230         continue;
231       if (DefLoc >= UseLoc)
232         continue;
233 
234       if (Delta < Distance) {
235         Distance = Delta;
236         UseToBringDefCloserTo = UseInst;
237         MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
238       }
239     }
240 
241     const auto BBE = MBB->instr_end();
242     MachineBasicBlock::iterator DefI = BBE;
243     MachineBasicBlock::iterator UseI = BBE;
244 
245     for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
246 
247       if (DefI != BBE && UseI != BBE)
248         break;
249 
250       if (&*BBI == Def) {
251         DefI = BBI;
252         continue;
253       }
254 
255       if (&*BBI == UseToBringDefCloserTo) {
256         UseI = BBI;
257         continue;
258       }
259     }
260 
261     if (DefI == BBE || UseI == BBE)
262       continue;
263 
264     LLVM_DEBUG({
265       dbgs() << "Splicing ";
266       DefI->dump();
267       dbgs() << " right before: ";
268       UseI->dump();
269     });
270 
271     MultiUsers[UseToBringDefCloserTo].push_back(Def);
272     Changed = true;
273     MBB->splice(UseI, MBB, DefI);
274   }
275 
276   // Sort the defs for users of multiple defs lexographically.
277   for (const auto &E : MultiUserLookup) {
278 
279     auto UseI =
280         std::find_if(MBB->instr_begin(), MBB->instr_end(),
281                      [&](MachineInstr &MI) -> bool { return &MI == E.second; });
282 
283     if (UseI == MBB->instr_end())
284       continue;
285 
286     LLVM_DEBUG(
287         dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
288     Changed |= rescheduleLexographically(
289         MultiUsers[E.second], MBB,
290         [&]() -> MachineBasicBlock::iterator { return UseI; });
291   }
292 
293   PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
294   LLVM_DEBUG(
295       dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
296   Changed |= rescheduleLexographically(
297       PseudoIdempotentInstructions, MBB,
298       [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
299 
300   return Changed;
301 }
302 
propagateLocalCopies(MachineBasicBlock * MBB)303 static bool propagateLocalCopies(MachineBasicBlock *MBB) {
304   bool Changed = false;
305   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
306 
307   std::vector<MachineInstr *> Copies;
308   for (MachineInstr &MI : MBB->instrs()) {
309     if (MI.isCopy())
310       Copies.push_back(&MI);
311   }
312 
313   for (MachineInstr *MI : Copies) {
314 
315     if (!MI->getOperand(0).isReg())
316       continue;
317     if (!MI->getOperand(1).isReg())
318       continue;
319 
320     const Register Dst = MI->getOperand(0).getReg();
321     const Register Src = MI->getOperand(1).getReg();
322 
323     if (!Register::isVirtualRegister(Dst))
324       continue;
325     if (!Register::isVirtualRegister(Src))
326       continue;
327     // Not folding COPY instructions if regbankselect has not set the RCs.
328     // Why are we only considering Register Classes? Because the verifier
329     // sometimes gets upset if the register classes don't match even if the
330     // types do. A future patch might add COPY folding for matching types in
331     // pre-registerbankselect code.
332     if (!MRI.getRegClassOrNull(Dst))
333       continue;
334     if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
335       continue;
336 
337     std::vector<MachineOperand *> Uses;
338     for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI)
339       Uses.push_back(&*UI);
340     for (auto *MO : Uses)
341       MO->setReg(Src);
342 
343     Changed = true;
344     MI->eraseFromParent();
345   }
346 
347   return Changed;
348 }
349 
doDefKillClear(MachineBasicBlock * MBB)350 static bool doDefKillClear(MachineBasicBlock *MBB) {
351   bool Changed = false;
352 
353   for (auto &MI : *MBB) {
354     for (auto &MO : MI.operands()) {
355       if (!MO.isReg())
356         continue;
357       if (!MO.isDef() && MO.isKill()) {
358         Changed = true;
359         MO.setIsKill(false);
360       }
361 
362       if (MO.isDef() && MO.isDead()) {
363         Changed = true;
364         MO.setIsDead(false);
365       }
366     }
367   }
368 
369   return Changed;
370 }
371 
runOnBasicBlock(MachineBasicBlock * MBB,unsigned BasicBlockNum,VRegRenamer & Renamer)372 static bool runOnBasicBlock(MachineBasicBlock *MBB,
373                             unsigned BasicBlockNum, VRegRenamer &Renamer) {
374   LLVM_DEBUG({
375     dbgs() << "\n\n  NEW BASIC BLOCK: " << MBB->getName() << "  \n\n";
376     dbgs() << "\n\n================================================\n\n";
377   });
378 
379   bool Changed = false;
380 
381   LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
382 
383   LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
384              MBB->dump(););
385   Changed |= propagateLocalCopies(MBB);
386   LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
387 
388   LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
389   unsigned IdempotentInstCount = 0;
390   Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
391   LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
392 
393   Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
394 
395   // TODO: Consider dropping this. Dropping kill defs is probably not
396   // semantically sound.
397   Changed |= doDefKillClear(MBB);
398 
399   LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
400              dbgs() << "\n";);
401   LLVM_DEBUG(
402       dbgs() << "\n\n================================================\n\n");
403   return Changed;
404 }
405 
runOnMachineFunction(MachineFunction & MF)406 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
407 
408   static unsigned functionNum = 0;
409   if (CanonicalizeFunctionNumber != ~0U) {
410     if (CanonicalizeFunctionNumber != functionNum++)
411       return false;
412     LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
413                       << "\n";);
414   }
415 
416   // we need a valid vreg to create a vreg type for skipping all those
417   // stray vreg numbers so reach alignment/canonical vreg values.
418   std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
419 
420   LLVM_DEBUG(
421       dbgs() << "\n\n  NEW MACHINE FUNCTION: " << MF.getName() << "  \n\n";
422       dbgs() << "\n\n================================================\n\n";
423       dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
424       for (auto MBB
425            : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
426       << "\n\n================================================\n\n";);
427 
428   unsigned BBNum = 0;
429   bool Changed = false;
430   MachineRegisterInfo &MRI = MF.getRegInfo();
431   VRegRenamer Renamer(MRI);
432   for (auto MBB : RPOList)
433     Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
434 
435   return Changed;
436 }
437