• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the Enhanced Disassembly library's  disassembler class.
11 // The disassembler is responsible for vending individual instructions according
12 // to a given architecture and disassembly syntax.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "EDDisassembler.h"
17 #include "EDInst.h"
18 #include "llvm/MC/EDInstInfo.h"
19 #include "llvm/MC/MCAsmInfo.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCDisassembler.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstPrinter.h"
25 #include "llvm/MC/MCRegisterInfo.h"
26 #include "llvm/MC/MCStreamer.h"
27 #include "llvm/MC/MCSubtargetInfo.h"
28 #include "llvm/MC/MCParser/AsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/Support/MemoryBuffer.h"
32 #include "llvm/Support/MemoryObject.h"
33 #include "llvm/Support/SourceMgr.h"
34 #include "llvm/Target/TargetAsmLexer.h"
35 #include "llvm/Target/TargetAsmParser.h"
36 #include "llvm/Target/TargetRegistry.h"
37 #include "llvm/Target/TargetMachine.h"
38 #include "llvm/Target/TargetRegisterInfo.h"
39 #include "llvm/Target/TargetSelect.h"
40 using namespace llvm;
41 
42 bool EDDisassembler::sInitialized = false;
43 EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
44 
45 struct TripleMap {
46   Triple::ArchType Arch;
47   const char *String;
48 };
49 
50 static struct TripleMap triplemap[] = {
51   { Triple::x86,          "i386-unknown-unknown"    },
52   { Triple::x86_64,       "x86_64-unknown-unknown"  },
53   { Triple::arm,          "arm-unknown-unknown"     },
54   { Triple::thumb,        "thumb-unknown-unknown"   },
55   { Triple::InvalidArch,  NULL,                     }
56 };
57 
58 /// infoFromArch - Returns the TripleMap corresponding to a given architecture,
59 ///   or NULL if there is an error
60 ///
61 /// @arg arch - The Triple::ArchType for the desired architecture
tripleFromArch(Triple::ArchType arch)62 static const char *tripleFromArch(Triple::ArchType arch) {
63   unsigned int infoIndex;
64 
65   for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) {
66     if (arch == triplemap[infoIndex].Arch)
67       return triplemap[infoIndex].String;
68   }
69 
70   return NULL;
71 }
72 
73 /// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer
74 ///   for the desired assembly syntax, suitable for passing to
75 ///   Target::createMCInstPrinter()
76 ///
77 /// @arg arch   - The target architecture
78 /// @arg syntax - The assembly syntax in sd form
getLLVMSyntaxVariant(Triple::ArchType arch,EDDisassembler::AssemblySyntax syntax)79 static int getLLVMSyntaxVariant(Triple::ArchType arch,
80                                 EDDisassembler::AssemblySyntax syntax) {
81   switch (syntax) {
82   default:
83     return -1;
84   // Mappings below from X86AsmPrinter.cpp
85   case EDDisassembler::kEDAssemblySyntaxX86ATT:
86     if (arch == Triple::x86 || arch == Triple::x86_64)
87       return 0;
88     else
89       return -1;
90   case EDDisassembler::kEDAssemblySyntaxX86Intel:
91     if (arch == Triple::x86 || arch == Triple::x86_64)
92       return 1;
93     else
94       return -1;
95   case EDDisassembler::kEDAssemblySyntaxARMUAL:
96     if (arch == Triple::arm || arch == Triple::thumb)
97       return 0;
98     else
99       return -1;
100   }
101 }
102 
initialize()103 void EDDisassembler::initialize() {
104   if (sInitialized)
105     return;
106 
107   sInitialized = true;
108 
109   InitializeAllTargetInfos();
110   InitializeAllTargets();
111   InitializeAllMCCodeGenInfos();
112   InitializeAllMCAsmInfos();
113   InitializeAllMCRegisterInfos();
114   InitializeAllMCSubtargetInfos();
115   InitializeAllAsmPrinters();
116   InitializeAllAsmParsers();
117   InitializeAllDisassemblers();
118 }
119 
120 #undef BRINGUP_TARGET
121 
getDisassembler(Triple::ArchType arch,AssemblySyntax syntax)122 EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
123                                                 AssemblySyntax syntax) {
124   CPUKey key;
125   key.Arch = arch;
126   key.Syntax = syntax;
127 
128   EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
129 
130   if (i != sDisassemblers.end()) {
131     return i->second;
132   } else {
133     EDDisassembler* sdd = new EDDisassembler(key);
134     if (!sdd->valid()) {
135       delete sdd;
136       return NULL;
137     }
138 
139     sDisassemblers[key] = sdd;
140 
141     return sdd;
142   }
143 
144   return NULL;
145 }
146 
getDisassembler(StringRef str,AssemblySyntax syntax)147 EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
148                                                 AssemblySyntax syntax) {
149   return getDisassembler(Triple(str).getArch(), syntax);
150 }
151 
EDDisassembler(CPUKey & key)152 EDDisassembler::EDDisassembler(CPUKey &key) :
153   Valid(false),
154   HasSemantics(false),
155   ErrorStream(nulls()),
156   Key(key) {
157   const char *triple = tripleFromArch(key.Arch);
158 
159   if (!triple)
160     return;
161 
162   LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
163 
164   if (LLVMSyntaxVariant < 0)
165     return;
166 
167   std::string tripleString(triple);
168   std::string errorString;
169 
170   Tgt = TargetRegistry::lookupTarget(tripleString,
171                                      errorString);
172 
173   if (!Tgt)
174     return;
175 
176   std::string CPU;
177   std::string featureString;
178   TargetMachine.reset(Tgt->createTargetMachine(tripleString, CPU,
179                                                featureString));
180 
181   const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo();
182 
183   if (!registerInfo)
184     return;
185 
186   initMaps(*registerInfo);
187 
188   AsmInfo.reset(Tgt->createMCAsmInfo(tripleString));
189 
190   if (!AsmInfo)
191     return;
192 
193   MRI.reset(Tgt->createMCRegInfo(tripleString));
194 
195   if (!MRI)
196     return;
197 
198   Disassembler.reset(Tgt->createMCDisassembler());
199 
200   if (!Disassembler)
201     return;
202 
203   InstInfos = Disassembler->getEDInfo();
204 
205   InstString.reset(new std::string);
206   InstStream.reset(new raw_string_ostream(*InstString));
207   InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
208 
209   if (!InstPrinter)
210     return;
211 
212   GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
213   SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo));
214   SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
215 
216   initMaps(*TargetMachine->getRegisterInfo());
217 
218   Valid = true;
219 }
220 
~EDDisassembler()221 EDDisassembler::~EDDisassembler() {
222   if (!valid())
223     return;
224 }
225 
226 namespace {
227   /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback
228   ///   as provided by the sd interface.  See MemoryObject.
229   class EDMemoryObject : public llvm::MemoryObject {
230   private:
231     EDByteReaderCallback Callback;
232     void *Arg;
233   public:
EDMemoryObject(EDByteReaderCallback callback,void * arg)234     EDMemoryObject(EDByteReaderCallback callback,
235                    void *arg) : Callback(callback), Arg(arg) { }
~EDMemoryObject()236     ~EDMemoryObject() { }
getBase() const237     uint64_t getBase() const { return 0x0; }
getExtent() const238     uint64_t getExtent() const { return (uint64_t)-1; }
readByte(uint64_t address,uint8_t * ptr) const239     int readByte(uint64_t address, uint8_t *ptr) const {
240       if (!Callback)
241         return -1;
242 
243       if (Callback(ptr, address, Arg))
244         return -1;
245 
246       return 0;
247     }
248   };
249 }
250 
createInst(EDByteReaderCallback byteReader,uint64_t address,void * arg)251 EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
252                                    uint64_t address,
253                                    void *arg) {
254   EDMemoryObject memoryObject(byteReader, arg);
255 
256   MCInst* inst = new MCInst;
257   uint64_t byteSize;
258 
259   if (!Disassembler->getInstruction(*inst,
260                                     byteSize,
261                                     memoryObject,
262                                     address,
263                                     ErrorStream)) {
264     delete inst;
265     return NULL;
266   } else {
267     const llvm::EDInstInfo *thisInstInfo = NULL;
268 
269     if (InstInfos) {
270       thisInstInfo = &InstInfos[inst->getOpcode()];
271     }
272 
273     EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
274     return sdInst;
275   }
276 }
277 
initMaps(const TargetRegisterInfo & registerInfo)278 void EDDisassembler::initMaps(const TargetRegisterInfo &registerInfo) {
279   unsigned numRegisters = registerInfo.getNumRegs();
280   unsigned registerIndex;
281 
282   for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) {
283     const char* registerName = registerInfo.get(registerIndex).Name;
284 
285     RegVec.push_back(registerName);
286     RegRMap[registerName] = registerIndex;
287   }
288 
289   switch (Key.Arch) {
290   default:
291     break;
292   case Triple::x86:
293   case Triple::x86_64:
294     stackPointers.insert(registerIDWithName("SP"));
295     stackPointers.insert(registerIDWithName("ESP"));
296     stackPointers.insert(registerIDWithName("RSP"));
297 
298     programCounters.insert(registerIDWithName("IP"));
299     programCounters.insert(registerIDWithName("EIP"));
300     programCounters.insert(registerIDWithName("RIP"));
301     break;
302   case Triple::arm:
303   case Triple::thumb:
304     stackPointers.insert(registerIDWithName("SP"));
305 
306     programCounters.insert(registerIDWithName("PC"));
307     break;
308   }
309 }
310 
nameWithRegisterID(unsigned registerID) const311 const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const {
312   if (registerID >= RegVec.size())
313     return NULL;
314   else
315     return RegVec[registerID].c_str();
316 }
317 
registerIDWithName(const char * name) const318 unsigned EDDisassembler::registerIDWithName(const char *name) const {
319   regrmap_t::const_iterator iter = RegRMap.find(std::string(name));
320   if (iter == RegRMap.end())
321     return 0;
322   else
323     return (*iter).second;
324 }
325 
registerIsStackPointer(unsigned registerID)326 bool EDDisassembler::registerIsStackPointer(unsigned registerID) {
327   return (stackPointers.find(registerID) != stackPointers.end());
328 }
329 
registerIsProgramCounter(unsigned registerID)330 bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
331   return (programCounters.find(registerID) != programCounters.end());
332 }
333 
printInst(std::string & str,MCInst & inst)334 int EDDisassembler::printInst(std::string &str, MCInst &inst) {
335   PrinterMutex.acquire();
336 
337   InstPrinter->printInst(&inst, *InstStream);
338   InstStream->flush();
339   str = *InstString;
340   InstString->clear();
341 
342   PrinterMutex.release();
343 
344   return 0;
345 }
346 
diag_handler(const SMDiagnostic & diag,void * context)347 static void diag_handler(const SMDiagnostic &diag,
348                          void *context)
349 {
350   if (context) {
351     EDDisassembler *disassembler = static_cast<EDDisassembler*>(context);
352     diag.Print("", disassembler->ErrorStream);
353   }
354 }
355 
parseInst(SmallVectorImpl<MCParsedAsmOperand * > & operands,SmallVectorImpl<AsmToken> & tokens,const std::string & str)356 int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
357                               SmallVectorImpl<AsmToken> &tokens,
358                               const std::string &str) {
359   int ret = 0;
360 
361   switch (Key.Arch) {
362   default:
363     return -1;
364   case Triple::x86:
365   case Triple::x86_64:
366   case Triple::arm:
367   case Triple::thumb:
368     break;
369   }
370 
371   const char *cStr = str.c_str();
372   MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
373 
374   StringRef instName;
375   SMLoc instLoc;
376 
377   SourceMgr sourceMgr;
378   sourceMgr.setDiagHandler(diag_handler, static_cast<void*>(this));
379   sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
380   MCContext context(*AsmInfo, *MRI, NULL, NULL);
381   OwningPtr<MCStreamer> streamer(createNullStreamer(context));
382   OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr,
383                                                          context, *streamer,
384                                                          *AsmInfo));
385 
386   StringRef triple = tripleFromArch(Key.Arch);
387   OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(triple, "", ""));
388   OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*STI,
389                                                                *genericParser));
390 
391   AsmToken OpcodeToken = genericParser->Lex();
392   AsmToken NextToken = genericParser->Lex();  // consume next token, because specificParser expects us to
393 
394   if (OpcodeToken.is(AsmToken::Identifier)) {
395     instName = OpcodeToken.getString();
396     instLoc = OpcodeToken.getLoc();
397 
398     if (NextToken.isNot(AsmToken::Eof) &&
399         TargetParser->ParseInstruction(instName, instLoc, operands))
400       ret = -1;
401   } else {
402     ret = -1;
403   }
404 
405   ParserMutex.acquire();
406 
407   if (!ret) {
408     GenericAsmLexer->setBuffer(buf);
409 
410     while (SpecificAsmLexer->Lex(),
411            SpecificAsmLexer->isNot(AsmToken::Eof) &&
412            SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) {
413       if (SpecificAsmLexer->is(AsmToken::Error)) {
414         ret = -1;
415         break;
416       }
417       tokens.push_back(SpecificAsmLexer->getTok());
418     }
419   }
420 
421   ParserMutex.release();
422 
423   return ret;
424 }
425 
llvmSyntaxVariant() const426 int EDDisassembler::llvmSyntaxVariant() const {
427   return LLVMSyntaxVariant;
428 }
429