1 //===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the Enhanced Disassembly library's disassembler class.
11 // The disassembler is responsible for vending individual instructions according
12 // to a given architecture and disassembly syntax.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #include "EDDisassembler.h"
17 #include "EDInst.h"
18 #include "llvm/MC/EDInstInfo.h"
19 #include "llvm/MC/MCAsmInfo.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCDisassembler.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstPrinter.h"
25 #include "llvm/MC/MCRegisterInfo.h"
26 #include "llvm/MC/MCStreamer.h"
27 #include "llvm/MC/MCSubtargetInfo.h"
28 #include "llvm/MC/MCParser/AsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/Support/MemoryBuffer.h"
32 #include "llvm/Support/MemoryObject.h"
33 #include "llvm/Support/SourceMgr.h"
34 #include "llvm/Target/TargetAsmLexer.h"
35 #include "llvm/Target/TargetAsmParser.h"
36 #include "llvm/Target/TargetRegistry.h"
37 #include "llvm/Target/TargetMachine.h"
38 #include "llvm/Target/TargetRegisterInfo.h"
39 #include "llvm/Target/TargetSelect.h"
40 using namespace llvm;
41
42 bool EDDisassembler::sInitialized = false;
43 EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
44
45 struct TripleMap {
46 Triple::ArchType Arch;
47 const char *String;
48 };
49
50 static struct TripleMap triplemap[] = {
51 { Triple::x86, "i386-unknown-unknown" },
52 { Triple::x86_64, "x86_64-unknown-unknown" },
53 { Triple::arm, "arm-unknown-unknown" },
54 { Triple::thumb, "thumb-unknown-unknown" },
55 { Triple::InvalidArch, NULL, }
56 };
57
58 /// infoFromArch - Returns the TripleMap corresponding to a given architecture,
59 /// or NULL if there is an error
60 ///
61 /// @arg arch - The Triple::ArchType for the desired architecture
tripleFromArch(Triple::ArchType arch)62 static const char *tripleFromArch(Triple::ArchType arch) {
63 unsigned int infoIndex;
64
65 for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) {
66 if (arch == triplemap[infoIndex].Arch)
67 return triplemap[infoIndex].String;
68 }
69
70 return NULL;
71 }
72
73 /// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer
74 /// for the desired assembly syntax, suitable for passing to
75 /// Target::createMCInstPrinter()
76 ///
77 /// @arg arch - The target architecture
78 /// @arg syntax - The assembly syntax in sd form
getLLVMSyntaxVariant(Triple::ArchType arch,EDDisassembler::AssemblySyntax syntax)79 static int getLLVMSyntaxVariant(Triple::ArchType arch,
80 EDDisassembler::AssemblySyntax syntax) {
81 switch (syntax) {
82 default:
83 return -1;
84 // Mappings below from X86AsmPrinter.cpp
85 case EDDisassembler::kEDAssemblySyntaxX86ATT:
86 if (arch == Triple::x86 || arch == Triple::x86_64)
87 return 0;
88 else
89 return -1;
90 case EDDisassembler::kEDAssemblySyntaxX86Intel:
91 if (arch == Triple::x86 || arch == Triple::x86_64)
92 return 1;
93 else
94 return -1;
95 case EDDisassembler::kEDAssemblySyntaxARMUAL:
96 if (arch == Triple::arm || arch == Triple::thumb)
97 return 0;
98 else
99 return -1;
100 }
101 }
102
initialize()103 void EDDisassembler::initialize() {
104 if (sInitialized)
105 return;
106
107 sInitialized = true;
108
109 InitializeAllTargetInfos();
110 InitializeAllTargets();
111 InitializeAllMCCodeGenInfos();
112 InitializeAllMCAsmInfos();
113 InitializeAllMCRegisterInfos();
114 InitializeAllMCSubtargetInfos();
115 InitializeAllAsmPrinters();
116 InitializeAllAsmParsers();
117 InitializeAllDisassemblers();
118 }
119
120 #undef BRINGUP_TARGET
121
getDisassembler(Triple::ArchType arch,AssemblySyntax syntax)122 EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
123 AssemblySyntax syntax) {
124 CPUKey key;
125 key.Arch = arch;
126 key.Syntax = syntax;
127
128 EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
129
130 if (i != sDisassemblers.end()) {
131 return i->second;
132 } else {
133 EDDisassembler* sdd = new EDDisassembler(key);
134 if (!sdd->valid()) {
135 delete sdd;
136 return NULL;
137 }
138
139 sDisassemblers[key] = sdd;
140
141 return sdd;
142 }
143
144 return NULL;
145 }
146
getDisassembler(StringRef str,AssemblySyntax syntax)147 EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
148 AssemblySyntax syntax) {
149 return getDisassembler(Triple(str).getArch(), syntax);
150 }
151
EDDisassembler(CPUKey & key)152 EDDisassembler::EDDisassembler(CPUKey &key) :
153 Valid(false),
154 HasSemantics(false),
155 ErrorStream(nulls()),
156 Key(key) {
157 const char *triple = tripleFromArch(key.Arch);
158
159 if (!triple)
160 return;
161
162 LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
163
164 if (LLVMSyntaxVariant < 0)
165 return;
166
167 std::string tripleString(triple);
168 std::string errorString;
169
170 Tgt = TargetRegistry::lookupTarget(tripleString,
171 errorString);
172
173 if (!Tgt)
174 return;
175
176 std::string CPU;
177 std::string featureString;
178 TargetMachine.reset(Tgt->createTargetMachine(tripleString, CPU,
179 featureString));
180
181 const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo();
182
183 if (!registerInfo)
184 return;
185
186 initMaps(*registerInfo);
187
188 AsmInfo.reset(Tgt->createMCAsmInfo(tripleString));
189
190 if (!AsmInfo)
191 return;
192
193 MRI.reset(Tgt->createMCRegInfo(tripleString));
194
195 if (!MRI)
196 return;
197
198 Disassembler.reset(Tgt->createMCDisassembler());
199
200 if (!Disassembler)
201 return;
202
203 InstInfos = Disassembler->getEDInfo();
204
205 InstString.reset(new std::string);
206 InstStream.reset(new raw_string_ostream(*InstString));
207 InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
208
209 if (!InstPrinter)
210 return;
211
212 GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
213 SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo));
214 SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
215
216 initMaps(*TargetMachine->getRegisterInfo());
217
218 Valid = true;
219 }
220
~EDDisassembler()221 EDDisassembler::~EDDisassembler() {
222 if (!valid())
223 return;
224 }
225
226 namespace {
227 /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback
228 /// as provided by the sd interface. See MemoryObject.
229 class EDMemoryObject : public llvm::MemoryObject {
230 private:
231 EDByteReaderCallback Callback;
232 void *Arg;
233 public:
EDMemoryObject(EDByteReaderCallback callback,void * arg)234 EDMemoryObject(EDByteReaderCallback callback,
235 void *arg) : Callback(callback), Arg(arg) { }
~EDMemoryObject()236 ~EDMemoryObject() { }
getBase() const237 uint64_t getBase() const { return 0x0; }
getExtent() const238 uint64_t getExtent() const { return (uint64_t)-1; }
readByte(uint64_t address,uint8_t * ptr) const239 int readByte(uint64_t address, uint8_t *ptr) const {
240 if (!Callback)
241 return -1;
242
243 if (Callback(ptr, address, Arg))
244 return -1;
245
246 return 0;
247 }
248 };
249 }
250
createInst(EDByteReaderCallback byteReader,uint64_t address,void * arg)251 EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
252 uint64_t address,
253 void *arg) {
254 EDMemoryObject memoryObject(byteReader, arg);
255
256 MCInst* inst = new MCInst;
257 uint64_t byteSize;
258
259 if (!Disassembler->getInstruction(*inst,
260 byteSize,
261 memoryObject,
262 address,
263 ErrorStream)) {
264 delete inst;
265 return NULL;
266 } else {
267 const llvm::EDInstInfo *thisInstInfo = NULL;
268
269 if (InstInfos) {
270 thisInstInfo = &InstInfos[inst->getOpcode()];
271 }
272
273 EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
274 return sdInst;
275 }
276 }
277
initMaps(const TargetRegisterInfo & registerInfo)278 void EDDisassembler::initMaps(const TargetRegisterInfo ®isterInfo) {
279 unsigned numRegisters = registerInfo.getNumRegs();
280 unsigned registerIndex;
281
282 for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) {
283 const char* registerName = registerInfo.get(registerIndex).Name;
284
285 RegVec.push_back(registerName);
286 RegRMap[registerName] = registerIndex;
287 }
288
289 switch (Key.Arch) {
290 default:
291 break;
292 case Triple::x86:
293 case Triple::x86_64:
294 stackPointers.insert(registerIDWithName("SP"));
295 stackPointers.insert(registerIDWithName("ESP"));
296 stackPointers.insert(registerIDWithName("RSP"));
297
298 programCounters.insert(registerIDWithName("IP"));
299 programCounters.insert(registerIDWithName("EIP"));
300 programCounters.insert(registerIDWithName("RIP"));
301 break;
302 case Triple::arm:
303 case Triple::thumb:
304 stackPointers.insert(registerIDWithName("SP"));
305
306 programCounters.insert(registerIDWithName("PC"));
307 break;
308 }
309 }
310
nameWithRegisterID(unsigned registerID) const311 const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const {
312 if (registerID >= RegVec.size())
313 return NULL;
314 else
315 return RegVec[registerID].c_str();
316 }
317
registerIDWithName(const char * name) const318 unsigned EDDisassembler::registerIDWithName(const char *name) const {
319 regrmap_t::const_iterator iter = RegRMap.find(std::string(name));
320 if (iter == RegRMap.end())
321 return 0;
322 else
323 return (*iter).second;
324 }
325
registerIsStackPointer(unsigned registerID)326 bool EDDisassembler::registerIsStackPointer(unsigned registerID) {
327 return (stackPointers.find(registerID) != stackPointers.end());
328 }
329
registerIsProgramCounter(unsigned registerID)330 bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
331 return (programCounters.find(registerID) != programCounters.end());
332 }
333
printInst(std::string & str,MCInst & inst)334 int EDDisassembler::printInst(std::string &str, MCInst &inst) {
335 PrinterMutex.acquire();
336
337 InstPrinter->printInst(&inst, *InstStream);
338 InstStream->flush();
339 str = *InstString;
340 InstString->clear();
341
342 PrinterMutex.release();
343
344 return 0;
345 }
346
diag_handler(const SMDiagnostic & diag,void * context)347 static void diag_handler(const SMDiagnostic &diag,
348 void *context)
349 {
350 if (context) {
351 EDDisassembler *disassembler = static_cast<EDDisassembler*>(context);
352 diag.Print("", disassembler->ErrorStream);
353 }
354 }
355
parseInst(SmallVectorImpl<MCParsedAsmOperand * > & operands,SmallVectorImpl<AsmToken> & tokens,const std::string & str)356 int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
357 SmallVectorImpl<AsmToken> &tokens,
358 const std::string &str) {
359 int ret = 0;
360
361 switch (Key.Arch) {
362 default:
363 return -1;
364 case Triple::x86:
365 case Triple::x86_64:
366 case Triple::arm:
367 case Triple::thumb:
368 break;
369 }
370
371 const char *cStr = str.c_str();
372 MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
373
374 StringRef instName;
375 SMLoc instLoc;
376
377 SourceMgr sourceMgr;
378 sourceMgr.setDiagHandler(diag_handler, static_cast<void*>(this));
379 sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
380 MCContext context(*AsmInfo, *MRI, NULL, NULL);
381 OwningPtr<MCStreamer> streamer(createNullStreamer(context));
382 OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr,
383 context, *streamer,
384 *AsmInfo));
385
386 StringRef triple = tripleFromArch(Key.Arch);
387 OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(triple, "", ""));
388 OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*STI,
389 *genericParser));
390
391 AsmToken OpcodeToken = genericParser->Lex();
392 AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to
393
394 if (OpcodeToken.is(AsmToken::Identifier)) {
395 instName = OpcodeToken.getString();
396 instLoc = OpcodeToken.getLoc();
397
398 if (NextToken.isNot(AsmToken::Eof) &&
399 TargetParser->ParseInstruction(instName, instLoc, operands))
400 ret = -1;
401 } else {
402 ret = -1;
403 }
404
405 ParserMutex.acquire();
406
407 if (!ret) {
408 GenericAsmLexer->setBuffer(buf);
409
410 while (SpecificAsmLexer->Lex(),
411 SpecificAsmLexer->isNot(AsmToken::Eof) &&
412 SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) {
413 if (SpecificAsmLexer->is(AsmToken::Error)) {
414 ret = -1;
415 break;
416 }
417 tokens.push_back(SpecificAsmLexer->getTok());
418 }
419 }
420
421 ParserMutex.release();
422
423 return ret;
424 }
425
llvmSyntaxVariant() const426 int EDDisassembler::llvmSyntaxVariant() const {
427 return LLVMSyntaxVariant;
428 }
429