• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- EDDisassembler.h - LLVM Enhanced Disassembler -----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the interface for the Enhanced Disassembly library's
11 // disassembler class.  The disassembler is responsible for vending individual
12 // instructions according to a given architecture and disassembly syntax.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_EDDISASSEMBLER_H
17 #define LLVM_EDDISASSEMBLER_H
18 
19 #include "EDInfo.h"
20 
21 #include "llvm/ADT/OwningPtr.h"
22 #include "llvm/ADT/Triple.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include "llvm/Support/Mutex.h"
25 
26 #include <map>
27 #include <set>
28 #include <string>
29 #include <vector>
30 
31 namespace llvm {
32 class AsmLexer;
33 class AsmParser;
34 class AsmToken;
35 class MCContext;
36 class MCAsmInfo;
37 class MCAsmLexer;
38 class MCDisassembler;
39 class MCInst;
40 class MCInstPrinter;
41 class MCInstrInfo;
42 class MCParsedAsmOperand;
43 class MCRegisterInfo;
44 class MCStreamer;
45 class MCSubtargetInfo;
46 class MCTargetAsmLexer;
47 class MCTargetAsmParser;
48 template <typename T> class SmallVectorImpl;
49 class SourceMgr;
50 class Target;
51 
52 struct EDInstInfo;
53 struct EDInst;
54 struct EDOperand;
55 struct EDToken;
56 
57 typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
58 
59 /// EDDisassembler - Encapsulates a disassembler for a single architecture and
60 ///   disassembly syntax.  Also manages the static disassembler registry.
61 struct EDDisassembler {
62   typedef enum {
63     /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
64     kEDAssemblySyntaxX86Intel  = 0,
65     /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
66     kEDAssemblySyntaxX86ATT    = 1,
67     kEDAssemblySyntaxARMUAL    = 2
68   } AssemblySyntax;
69 
70 
71   ////////////////////
72   // Static members //
73   ////////////////////
74 
75   /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax
76   ///   pair
77   struct CPUKey {
78     /// The architecture type
79     std::string Triple;
80 
81     /// The assembly syntax
82     AssemblySyntax Syntax;
83 
84     /// operator== - Equality operator
85     bool operator==(const CPUKey &key) const {
86       return (Triple == key.Triple &&
87               Syntax == key.Syntax);
88     }
89 
90     /// operator< - Less-than operator
91     bool operator<(const CPUKey &key) const {
92       return ((Triple < key.Triple) ||
93               ((Triple == key.Triple) && Syntax < (key.Syntax)));
94     }
95   };
96 
97   typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t;
98 
99   /// A map from disassembler specifications to disassemblers.  Populated
100   ///   lazily.
101   static DisassemblerMap_t sDisassemblers;
102 
103   /// getDisassembler - Returns the specified disassemble, or NULL on failure
104   ///
105   /// @arg arch   - The desired architecture
106   /// @arg syntax - The desired disassembly syntax
107   static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch,
108                                          AssemblySyntax syntax);
109 
110   /// getDisassembler - Returns the disassembler for a given combination of
111   ///   CPU type, CPU subtype, and assembly syntax, or NULL on failure
112   ///
113   /// @arg str    - The string representation of the architecture triple, e.g.,
114   ///               "x86_64-apple-darwin"
115   /// @arg syntax - The disassembly syntax for the required disassembler
116   static EDDisassembler *getDisassembler(llvm::StringRef str,
117                                          AssemblySyntax syntax);
118 
119   ////////////////////////
120   // Per-object members //
121   ////////////////////////
122 
123   /// True only if the object has been successfully initialized
124   bool Valid;
125   /// True if the disassembler can provide semantic information
126   bool HasSemantics;
127 
128   /// The stream to write errors to
129   llvm::raw_ostream &ErrorStream;
130 
131   /// The triple/syntax pair for the current architecture
132   CPUKey Key;
133   /// The Triple fur the current architecture
134   Triple TgtTriple;
135   /// The LLVM target corresponding to the disassembler
136   const llvm::Target *Tgt;
137   /// The assembly information for the target architecture
138   llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo;
139   /// The subtarget information for the target architecture
140   llvm::OwningPtr<const llvm::MCSubtargetInfo> STI;
141   // The instruction information for the target architecture.
142   llvm::OwningPtr<const llvm::MCInstrInfo> MII;
143   // The register information for the target architecture.
144   llvm::OwningPtr<const llvm::MCRegisterInfo> MRI;
145   /// The disassembler for the target architecture
146   llvm::OwningPtr<const llvm::MCDisassembler> Disassembler;
147   /// The output string for the instruction printer; must be guarded with
148   ///   PrinterMutex
149   llvm::OwningPtr<std::string> InstString;
150   /// The output stream for the disassembler; must be guarded with
151   ///   PrinterMutex
152   llvm::OwningPtr<llvm::raw_string_ostream> InstStream;
153   /// The instruction printer for the target architecture; must be guarded with
154   ///   PrinterMutex when printing
155   llvm::OwningPtr<llvm::MCInstPrinter> InstPrinter;
156   /// The mutex that guards the instruction printer's printing functions, which
157   ///   use a shared stream
158   llvm::sys::Mutex PrinterMutex;
159   /// The array of instruction information provided by the TableGen backend for
160   ///   the target architecture
161   const llvm::EDInstInfo *InstInfos;
162   /// The target-specific lexer for use in tokenizing strings, in
163   ///   target-independent and target-specific portions
164   llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer;
165   llvm::OwningPtr<llvm::MCTargetAsmLexer> SpecificAsmLexer;
166   /// The guard for the above
167   llvm::sys::Mutex ParserMutex;
168   /// The LLVM number used for the target disassembly syntax variant
169   int LLVMSyntaxVariant;
170 
171   typedef std::vector<std::string> regvec_t;
172   typedef std::map<std::string, unsigned> regrmap_t;
173 
174   /// A vector of registers for quick mapping from LLVM register IDs to names
175   regvec_t RegVec;
176   /// A map of registers for quick mapping from register names to LLVM IDs
177   regrmap_t RegRMap;
178 
179   /// A set of register IDs for aliases of the stack pointer for the current
180   ///   architecture
181   std::set<unsigned> stackPointers;
182   /// A set of register IDs for aliases of the program counter for the current
183   ///   architecture
184   std::set<unsigned> programCounters;
185 
186   /// Constructor - initializes a disassembler with all the necessary objects,
187   ///   which come pre-allocated from the registry accessor function
188   ///
189   /// @arg key                - the architecture and disassembly syntax for the
190   ///                           disassembler
191   EDDisassembler(CPUKey& key);
192 
193   /// valid - reports whether there was a failure in the constructor.
validEDDisassembler194   bool valid() {
195     return Valid;
196   }
197 
198   /// hasSemantics - reports whether the disassembler can provide operands and
199   ///   tokens.
hasSemanticsEDDisassembler200   bool hasSemantics() {
201     return HasSemantics;
202   }
203 
204   ~EDDisassembler();
205 
206   /// createInst - creates and returns an instruction given a callback and
207   ///   memory address, or NULL on failure
208   ///
209   /// @arg byteReader - A callback function that provides machine code bytes
210   /// @arg address    - The address of the first byte of the instruction,
211   ///                   suitable for passing to byteReader
212   /// @arg arg        - An opaque argument for byteReader
213   EDInst *createInst(EDByteReaderCallback byteReader,
214                      uint64_t address,
215                      void *arg);
216 
217   /// initMaps - initializes regVec and regRMap using the provided register
218   ///   info
219   ///
220   /// @arg registerInfo - the register information to use as a source
221   void initMaps(const llvm::MCRegisterInfo &registerInfo);
222   /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a
223   ///   register for a given register ID, or NULL on failure
224   ///
225   /// @arg registerID - the ID of the register to be queried
226   const char *nameWithRegisterID(unsigned registerID) const;
227   /// registerIDWithName - Returns the ID of a register for a given register
228   ///   name, or (unsigned)-1 on failure
229   ///
230   /// @arg name - The name of the register
231   unsigned registerIDWithName(const char *name) const;
232 
233   /// registerIsStackPointer - reports whether a register ID is an alias for the
234   ///   stack pointer register
235   ///
236   /// @arg registerID - The LLVM register ID
237   bool registerIsStackPointer(unsigned registerID);
238   /// registerIsStackPointer - reports whether a register ID is an alias for the
239   ///   stack pointer register
240   ///
241   /// @arg registerID - The LLVM register ID
242   bool registerIsProgramCounter(unsigned registerID);
243 
244   /// printInst - prints an MCInst to a string, returning 0 on success, or -1
245   ///   otherwise
246   ///
247   /// @arg str  - A reference to a string which is filled in with the string
248   ///             representation of the instruction
249   /// @arg inst - A reference to the MCInst to be printed
250   int printInst(std::string& str,
251                 llvm::MCInst& inst);
252 
253   /// parseInst - extracts operands and tokens from a string for use in
254   ///   tokenizing the string.  Returns 0 on success, or -1 otherwise.
255   ///
256   /// @arg operands - A reference to a vector that will be filled in with the
257   ///                 parsed operands
258   /// @arg tokens   - A reference to a vector that will be filled in with the
259   ///                 tokens
260   /// @arg str      - The string representation of the instruction
261   int parseInst(llvm::SmallVectorImpl<llvm::MCParsedAsmOperand*> &operands,
262                 llvm::SmallVectorImpl<llvm::AsmToken> &tokens,
263                 const std::string &str);
264 
265   /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler
266   int llvmSyntaxVariant() const;
267 };
268 
269 } // end namespace llvm
270 
271 #endif
272