1 //===-- MCFunction.cpp ----------------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the algorithm to break down a region of machine code
11 // into basic blocks and try to reconstruct a CFG from it.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "MCFunction.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/MC/MCDisassembler.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCInstPrinter.h"
20 #include "llvm/MC/MCInstrAnalysis.h"
21 #include "llvm/MC/MCInstrDesc.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/Support/MemoryObject.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "llvm/Support/system_error.h"
26 #include <set>
27 using namespace llvm;
28
29 MCFunction
createFunctionFromMC(StringRef Name,const MCDisassembler * DisAsm,const MemoryObject & Region,uint64_t Start,uint64_t End,const MCInstrAnalysis * Ana,raw_ostream & DebugOut,SmallVectorImpl<uint64_t> & Calls)30 MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
31 const MemoryObject &Region, uint64_t Start,
32 uint64_t End, const MCInstrAnalysis *Ana,
33 raw_ostream &DebugOut,
34 SmallVectorImpl<uint64_t> &Calls) {
35 std::vector<MCDecodedInst> Instructions;
36 std::set<uint64_t> Splits;
37 Splits.insert(Start);
38 uint64_t Size;
39
40 MCFunction f(Name);
41
42 {
43 DenseSet<uint64_t> VisitedInsts;
44 SmallVector<uint64_t, 16> WorkList;
45 WorkList.push_back(Start);
46 // Disassemble code and gather basic block split points.
47 while (!WorkList.empty()) {
48 uint64_t Index = WorkList.pop_back_val();
49 if (VisitedInsts.find(Index) != VisitedInsts.end())
50 continue; // Already visited this location.
51
52 for (;Index < End; Index += Size) {
53 VisitedInsts.insert(Index);
54
55 MCInst Inst;
56 if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){
57 Instructions.push_back(MCDecodedInst(Index, Size, Inst));
58 if (Ana->isBranch(Inst)) {
59 uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
60 if (targ != -1ULL && targ == Index+Size)
61 continue; // Skip nop jumps.
62
63 // If we could determine the branch target, make a note to start a
64 // new basic block there and add the target to the worklist.
65 if (targ != -1ULL) {
66 Splits.insert(targ);
67 WorkList.push_back(targ);
68 WorkList.push_back(Index+Size);
69 }
70 Splits.insert(Index+Size);
71 break;
72 } else if (Ana->isReturn(Inst)) {
73 // Return instruction. This basic block ends here.
74 Splits.insert(Index+Size);
75 break;
76 } else if (Ana->isCall(Inst)) {
77 uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
78 // Add the call to the call list if the destination is known.
79 if (targ != -1ULL && targ != Index+Size)
80 Calls.push_back(targ);
81 }
82 } else {
83 errs().write_hex(Index) << ": warning: invalid instruction encoding\n";
84 if (Size == 0)
85 Size = 1; // skip illegible bytes
86 }
87 }
88 }
89 }
90
91 // Make sure the instruction list is sorted.
92 std::sort(Instructions.begin(), Instructions.end());
93
94 // Create basic blocks.
95 unsigned ii = 0, ie = Instructions.size();
96 for (std::set<uint64_t>::iterator spi = Splits.begin(),
97 spe = llvm::prior(Splits.end()); spi != spe; ++spi) {
98 MCBasicBlock BB;
99 uint64_t BlockEnd = *llvm::next(spi);
100 // Add instructions to the BB.
101 for (; ii != ie; ++ii) {
102 if (Instructions[ii].Address < *spi ||
103 Instructions[ii].Address >= BlockEnd)
104 break;
105 BB.addInst(Instructions[ii]);
106 }
107 f.addBlock(*spi, BB);
108 }
109
110 std::sort(f.Blocks.begin(), f.Blocks.end());
111
112 // Calculate successors of each block.
113 for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
114 MCBasicBlock &BB = const_cast<MCBasicBlock&>(i->second);
115 if (BB.getInsts().empty()) continue;
116 const MCDecodedInst &Inst = BB.getInsts().back();
117
118 if (Ana->isBranch(Inst.Inst)) {
119 uint64_t targ = Ana->evaluateBranch(Inst.Inst, Inst.Address, Inst.Size);
120 if (targ == -1ULL) {
121 // Indirect branch. Bail and add all blocks of the function as a
122 // successor.
123 for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i)
124 BB.addSucc(i->first);
125 } else if (targ != Inst.Address+Inst.Size)
126 BB.addSucc(targ);
127 // Conditional branches can also fall through to the next block.
128 if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e)
129 BB.addSucc(llvm::next(i)->first);
130 } else {
131 // No branch. Fall through to the next block.
132 if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e)
133 BB.addSucc(llvm::next(i)->first);
134 }
135 }
136
137 return f;
138 }
139