• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===*
2  *
3  *                     The LLVM Compiler Infrastructure
4  *
5  * This file is distributed under the University of Illinois Open Source
6  * License. See LICENSE.TXT for details.
7  *
8  *===----------------------------------------------------------------------===*
9  *
10  * This file is part of the X86 Disassembler.
11  * It contains the public interface of the instruction decoder.
12  * Documentation for the disassembler can be found in X86Disassembler.h.
13  *
14  *===----------------------------------------------------------------------===*/
15 
16 #ifndef X86DISASSEMBLERDECODER_H
17 #define X86DISASSEMBLERDECODER_H
18 
19 #ifdef __cplusplus
20 extern "C" {
21 #endif
22 
23 #define INSTRUCTION_SPECIFIER_FIELDS \
24   uint16_t operands;
25 
26 #define INSTRUCTION_IDS     \
27   uint16_t instructionIDs;
28 
29 #include "X86DisassemblerDecoderCommon.h"
30 
31 #undef INSTRUCTION_SPECIFIER_FIELDS
32 #undef INSTRUCTION_IDS
33 
34 /*
35  * Accessor functions for various fields of an Intel instruction
36  */
37 #define modFromModRM(modRM)  (((modRM) & 0xc0) >> 6)
38 #define regFromModRM(modRM)  (((modRM) & 0x38) >> 3)
39 #define rmFromModRM(modRM)   ((modRM) & 0x7)
40 #define scaleFromSIB(sib)    (((sib) & 0xc0) >> 6)
41 #define indexFromSIB(sib)    (((sib) & 0x38) >> 3)
42 #define baseFromSIB(sib)     ((sib) & 0x7)
43 #define wFromREX(rex)        (((rex) & 0x8) >> 3)
44 #define rFromREX(rex)        (((rex) & 0x4) >> 2)
45 #define xFromREX(rex)        (((rex) & 0x2) >> 1)
46 #define bFromREX(rex)        ((rex) & 0x1)
47 
48 #define rFromVEX2of3(vex)       (((~(vex)) & 0x80) >> 7)
49 #define xFromVEX2of3(vex)       (((~(vex)) & 0x40) >> 6)
50 #define bFromVEX2of3(vex)       (((~(vex)) & 0x20) >> 5)
51 #define mmmmmFromVEX2of3(vex)   ((vex) & 0x1f)
52 #define wFromVEX3of3(vex)       (((vex) & 0x80) >> 7)
53 #define vvvvFromVEX3of3(vex)    (((~(vex)) & 0x78) >> 3)
54 #define lFromVEX3of3(vex)       (((vex) & 0x4) >> 2)
55 #define ppFromVEX3of3(vex)      ((vex) & 0x3)
56 
57 #define rFromVEX2of2(vex)       (((~(vex)) & 0x80) >> 7)
58 #define vvvvFromVEX2of2(vex)    (((~(vex)) & 0x78) >> 3)
59 #define lFromVEX2of2(vex)       (((vex) & 0x4) >> 2)
60 #define ppFromVEX2of2(vex)      ((vex) & 0x3)
61 
62 /*
63  * These enums represent Intel registers for use by the decoder.
64  */
65 
66 #define REGS_8BIT     \
67   ENTRY(AL)           \
68   ENTRY(CL)           \
69   ENTRY(DL)           \
70   ENTRY(BL)           \
71   ENTRY(AH)           \
72   ENTRY(CH)           \
73   ENTRY(DH)           \
74   ENTRY(BH)           \
75   ENTRY(R8B)          \
76   ENTRY(R9B)          \
77   ENTRY(R10B)         \
78   ENTRY(R11B)         \
79   ENTRY(R12B)         \
80   ENTRY(R13B)         \
81   ENTRY(R14B)         \
82   ENTRY(R15B)         \
83   ENTRY(SPL)          \
84   ENTRY(BPL)          \
85   ENTRY(SIL)          \
86   ENTRY(DIL)
87 
88 #define EA_BASES_16BIT  \
89   ENTRY(BX_SI)          \
90   ENTRY(BX_DI)          \
91   ENTRY(BP_SI)          \
92   ENTRY(BP_DI)          \
93   ENTRY(SI)             \
94   ENTRY(DI)             \
95   ENTRY(BP)             \
96   ENTRY(BX)             \
97   ENTRY(R8W)            \
98   ENTRY(R9W)            \
99   ENTRY(R10W)           \
100   ENTRY(R11W)           \
101   ENTRY(R12W)           \
102   ENTRY(R13W)           \
103   ENTRY(R14W)           \
104   ENTRY(R15W)
105 
106 #define REGS_16BIT    \
107   ENTRY(AX)           \
108   ENTRY(CX)           \
109   ENTRY(DX)           \
110   ENTRY(BX)           \
111   ENTRY(SP)           \
112   ENTRY(BP)           \
113   ENTRY(SI)           \
114   ENTRY(DI)           \
115   ENTRY(R8W)          \
116   ENTRY(R9W)          \
117   ENTRY(R10W)         \
118   ENTRY(R11W)         \
119   ENTRY(R12W)         \
120   ENTRY(R13W)         \
121   ENTRY(R14W)         \
122   ENTRY(R15W)
123 
124 #define EA_BASES_32BIT  \
125   ENTRY(EAX)            \
126   ENTRY(ECX)            \
127   ENTRY(EDX)            \
128   ENTRY(EBX)            \
129   ENTRY(sib)            \
130   ENTRY(EBP)            \
131   ENTRY(ESI)            \
132   ENTRY(EDI)            \
133   ENTRY(R8D)            \
134   ENTRY(R9D)            \
135   ENTRY(R10D)           \
136   ENTRY(R11D)           \
137   ENTRY(R12D)           \
138   ENTRY(R13D)           \
139   ENTRY(R14D)           \
140   ENTRY(R15D)
141 
142 #define REGS_32BIT  \
143   ENTRY(EAX)        \
144   ENTRY(ECX)        \
145   ENTRY(EDX)        \
146   ENTRY(EBX)        \
147   ENTRY(ESP)        \
148   ENTRY(EBP)        \
149   ENTRY(ESI)        \
150   ENTRY(EDI)        \
151   ENTRY(R8D)        \
152   ENTRY(R9D)        \
153   ENTRY(R10D)       \
154   ENTRY(R11D)       \
155   ENTRY(R12D)       \
156   ENTRY(R13D)       \
157   ENTRY(R14D)       \
158   ENTRY(R15D)
159 
160 #define EA_BASES_64BIT  \
161   ENTRY(RAX)            \
162   ENTRY(RCX)            \
163   ENTRY(RDX)            \
164   ENTRY(RBX)            \
165   ENTRY(sib64)          \
166   ENTRY(RBP)            \
167   ENTRY(RSI)            \
168   ENTRY(RDI)            \
169   ENTRY(R8)             \
170   ENTRY(R9)             \
171   ENTRY(R10)            \
172   ENTRY(R11)            \
173   ENTRY(R12)            \
174   ENTRY(R13)            \
175   ENTRY(R14)            \
176   ENTRY(R15)
177 
178 #define REGS_64BIT  \
179   ENTRY(RAX)        \
180   ENTRY(RCX)        \
181   ENTRY(RDX)        \
182   ENTRY(RBX)        \
183   ENTRY(RSP)        \
184   ENTRY(RBP)        \
185   ENTRY(RSI)        \
186   ENTRY(RDI)        \
187   ENTRY(R8)         \
188   ENTRY(R9)         \
189   ENTRY(R10)        \
190   ENTRY(R11)        \
191   ENTRY(R12)        \
192   ENTRY(R13)        \
193   ENTRY(R14)        \
194   ENTRY(R15)
195 
196 #define REGS_MMX  \
197   ENTRY(MM0)      \
198   ENTRY(MM1)      \
199   ENTRY(MM2)      \
200   ENTRY(MM3)      \
201   ENTRY(MM4)      \
202   ENTRY(MM5)      \
203   ENTRY(MM6)      \
204   ENTRY(MM7)
205 
206 #define REGS_XMM  \
207   ENTRY(XMM0)     \
208   ENTRY(XMM1)     \
209   ENTRY(XMM2)     \
210   ENTRY(XMM3)     \
211   ENTRY(XMM4)     \
212   ENTRY(XMM5)     \
213   ENTRY(XMM6)     \
214   ENTRY(XMM7)     \
215   ENTRY(XMM8)     \
216   ENTRY(XMM9)     \
217   ENTRY(XMM10)    \
218   ENTRY(XMM11)    \
219   ENTRY(XMM12)    \
220   ENTRY(XMM13)    \
221   ENTRY(XMM14)    \
222   ENTRY(XMM15)
223 
224 #define REGS_YMM  \
225   ENTRY(YMM0)     \
226   ENTRY(YMM1)     \
227   ENTRY(YMM2)     \
228   ENTRY(YMM3)     \
229   ENTRY(YMM4)     \
230   ENTRY(YMM5)     \
231   ENTRY(YMM6)     \
232   ENTRY(YMM7)     \
233   ENTRY(YMM8)     \
234   ENTRY(YMM9)     \
235   ENTRY(YMM10)    \
236   ENTRY(YMM11)    \
237   ENTRY(YMM12)    \
238   ENTRY(YMM13)    \
239   ENTRY(YMM14)    \
240   ENTRY(YMM15)
241 
242 #define REGS_SEGMENT \
243   ENTRY(ES)          \
244   ENTRY(CS)          \
245   ENTRY(SS)          \
246   ENTRY(DS)          \
247   ENTRY(FS)          \
248   ENTRY(GS)
249 
250 #define REGS_DEBUG  \
251   ENTRY(DR0)        \
252   ENTRY(DR1)        \
253   ENTRY(DR2)        \
254   ENTRY(DR3)        \
255   ENTRY(DR4)        \
256   ENTRY(DR5)        \
257   ENTRY(DR6)        \
258   ENTRY(DR7)
259 
260 #define REGS_CONTROL  \
261   ENTRY(CR0)          \
262   ENTRY(CR1)          \
263   ENTRY(CR2)          \
264   ENTRY(CR3)          \
265   ENTRY(CR4)          \
266   ENTRY(CR5)          \
267   ENTRY(CR6)          \
268   ENTRY(CR7)          \
269   ENTRY(CR8)
270 
271 #define ALL_EA_BASES  \
272   EA_BASES_16BIT      \
273   EA_BASES_32BIT      \
274   EA_BASES_64BIT
275 
276 #define ALL_SIB_BASES \
277   REGS_32BIT          \
278   REGS_64BIT
279 
280 #define ALL_REGS      \
281   REGS_8BIT           \
282   REGS_16BIT          \
283   REGS_32BIT          \
284   REGS_64BIT          \
285   REGS_MMX            \
286   REGS_XMM            \
287   REGS_YMM            \
288   REGS_SEGMENT        \
289   REGS_DEBUG          \
290   REGS_CONTROL        \
291   ENTRY(RIP)
292 
293 /*
294  * EABase - All possible values of the base field for effective-address
295  *   computations, a.k.a. the Mod and R/M fields of the ModR/M byte.  We
296  *   distinguish between bases (EA_BASE_*) and registers that just happen to be
297  *   referred to when Mod == 0b11 (EA_REG_*).
298  */
299 typedef enum {
300   EA_BASE_NONE,
301 #define ENTRY(x) EA_BASE_##x,
302   ALL_EA_BASES
303 #undef ENTRY
304 #define ENTRY(x) EA_REG_##x,
305   ALL_REGS
306 #undef ENTRY
307   EA_max
308 } EABase;
309 
310 /*
311  * SIBIndex - All possible values of the SIB index field.
312  *   Borrows entries from ALL_EA_BASES with the special case that
313  *   sib is synonymous with NONE.
314  * Vector SIB: index can be XMM or YMM.
315  */
316 typedef enum {
317   SIB_INDEX_NONE,
318 #define ENTRY(x) SIB_INDEX_##x,
319   ALL_EA_BASES
320   REGS_XMM
321   REGS_YMM
322 #undef ENTRY
323   SIB_INDEX_max
324 } SIBIndex;
325 
326 /*
327  * SIBBase - All possible values of the SIB base field.
328  */
329 typedef enum {
330   SIB_BASE_NONE,
331 #define ENTRY(x) SIB_BASE_##x,
332   ALL_SIB_BASES
333 #undef ENTRY
334   SIB_BASE_max
335 } SIBBase;
336 
337 /*
338  * EADisplacement - Possible displacement types for effective-address
339  *   computations.
340  */
341 typedef enum {
342   EA_DISP_NONE,
343   EA_DISP_8,
344   EA_DISP_16,
345   EA_DISP_32
346 } EADisplacement;
347 
348 /*
349  * Reg - All possible values of the reg field in the ModR/M byte.
350  */
351 typedef enum {
352 #define ENTRY(x) MODRM_REG_##x,
353   ALL_REGS
354 #undef ENTRY
355   MODRM_REG_max
356 } Reg;
357 
358 /*
359  * SegmentOverride - All possible segment overrides.
360  */
361 typedef enum {
362   SEG_OVERRIDE_NONE,
363   SEG_OVERRIDE_CS,
364   SEG_OVERRIDE_SS,
365   SEG_OVERRIDE_DS,
366   SEG_OVERRIDE_ES,
367   SEG_OVERRIDE_FS,
368   SEG_OVERRIDE_GS,
369   SEG_OVERRIDE_max
370 } SegmentOverride;
371 
372 /*
373  * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
374  */
375 
376 typedef enum {
377   VEX_LOB_0F = 0x1,
378   VEX_LOB_0F38 = 0x2,
379   VEX_LOB_0F3A = 0x3
380 } VEXLeadingOpcodeByte;
381 
382 /*
383  * VEXPrefixCode - Possible values for the VEX.pp field
384  */
385 
386 typedef enum {
387   VEX_PREFIX_NONE = 0x0,
388   VEX_PREFIX_66 = 0x1,
389   VEX_PREFIX_F3 = 0x2,
390   VEX_PREFIX_F2 = 0x3
391 } VEXPrefixCode;
392 
393 typedef uint8_t BOOL;
394 
395 /*
396  * byteReader_t - Type for the byte reader that the consumer must provide to
397  *   the decoder.  Reads a single byte from the instruction's address space.
398  * @param arg     - A baton that the consumer can associate with any internal
399  *                  state that it needs.
400  * @param byte    - A pointer to a single byte in memory that should be set to
401  *                  contain the value at address.
402  * @param address - The address in the instruction's address space that should
403  *                  be read from.
404  * @return        - -1 if the byte cannot be read for any reason; 0 otherwise.
405  */
406 typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address);
407 
408 /*
409  * dlog_t - Type for the logging function that the consumer can provide to
410  *   get debugging output from the decoder.
411  * @param arg     - A baton that the consumer can associate with any internal
412  *                  state that it needs.
413  * @param log     - A string that contains the message.  Will be reused after
414  *                  the logger returns.
415  */
416 typedef void (*dlog_t)(void* arg, const char *log);
417 
418 /*
419  * The x86 internal instruction, which is produced by the decoder.
420  */
421 struct InternalInstruction {
422   /* Reader interface (C) */
423   byteReader_t reader;
424   /* Opaque value passed to the reader */
425   const void* readerArg;
426   /* The address of the next byte to read via the reader */
427   uint64_t readerCursor;
428 
429   /* Logger interface (C) */
430   dlog_t dlog;
431   /* Opaque value passed to the logger */
432   void* dlogArg;
433 
434   /* General instruction information */
435 
436   /* The mode to disassemble for (64-bit, protected, real) */
437   DisassemblerMode mode;
438   /* The start of the instruction, usable with the reader */
439   uint64_t startLocation;
440   /* The length of the instruction, in bytes */
441   size_t length;
442 
443   /* Prefix state */
444 
445   /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
446   uint8_t prefixPresent[0x100];
447   /* contains the location (for use with the reader) of the prefix byte */
448   uint64_t prefixLocations[0x100];
449   /* The value of the VEX prefix, if present */
450   uint8_t vexPrefix[3];
451   /* The length of the VEX prefix (0 if not present) */
452   uint8_t vexSize;
453   /* The value of the REX prefix, if present */
454   uint8_t rexPrefix;
455   /* The location where a mandatory prefix would have to be (i.e., right before
456      the opcode, or right before the REX prefix if one is present) */
457   uint64_t necessaryPrefixLocation;
458   /* The segment override type */
459   SegmentOverride segmentOverride;
460 
461   /* Sizes of various critical pieces of data, in bytes */
462   uint8_t registerSize;
463   uint8_t addressSize;
464   uint8_t displacementSize;
465   uint8_t immediateSize;
466 
467   /* Offsets from the start of the instruction to the pieces of data, which is
468      needed to find relocation entries for adding symbolic operands */
469   uint8_t displacementOffset;
470   uint8_t immediateOffset;
471 
472   /* opcode state */
473 
474   /* The value of the two-byte escape prefix (usually 0x0f) */
475   uint8_t twoByteEscape;
476   /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
477   uint8_t threeByteEscape;
478   /* The last byte of the opcode, not counting any ModR/M extension */
479   uint8_t opcode;
480   /* The ModR/M byte of the instruction, if it is an opcode extension */
481   uint8_t modRMExtension;
482 
483   /* decode state */
484 
485   /* The type of opcode, used for indexing into the array of decode tables */
486   OpcodeType opcodeType;
487   /* The instruction ID, extracted from the decode table */
488   uint16_t instructionID;
489   /* The specifier for the instruction, from the instruction info table */
490   const struct InstructionSpecifier *spec;
491 
492   /* state for additional bytes, consumed during operand decode.  Pattern:
493      consumed___ indicates that the byte was already consumed and does not
494      need to be consumed again */
495 
496   /* The VEX.vvvv field, which contains a third register operand for some AVX
497      instructions */
498   Reg                           vvvv;
499 
500   /* The ModR/M byte, which contains most register operands and some portion of
501      all memory operands */
502   BOOL                          consumedModRM;
503   uint8_t                       modRM;
504 
505   /* The SIB byte, used for more complex 32- or 64-bit memory operands */
506   BOOL                          consumedSIB;
507   uint8_t                       sib;
508 
509   /* The displacement, used for memory operands */
510   BOOL                          consumedDisplacement;
511   int32_t                       displacement;
512 
513   /* Immediates.  There can be two in some cases */
514   uint8_t                       numImmediatesConsumed;
515   uint8_t                       numImmediatesTranslated;
516   uint64_t                      immediates[2];
517 
518   /* A register or immediate operand encoded into the opcode */
519   BOOL                          consumedOpcodeModifier;
520   uint8_t                       opcodeModifier;
521   Reg                           opcodeRegister;
522 
523   /* Portions of the ModR/M byte */
524 
525   /* These fields determine the allowable values for the ModR/M fields, which
526      depend on operand and address widths */
527   EABase                        eaBaseBase;
528   EABase                        eaRegBase;
529   Reg                           regBase;
530 
531   /* The Mod and R/M fields can encode a base for an effective address, or a
532      register.  These are separated into two fields here */
533   EABase                        eaBase;
534   EADisplacement                eaDisplacement;
535   /* The reg field always encodes a register */
536   Reg                           reg;
537 
538   /* SIB state */
539   SIBIndex                      sibIndex;
540   uint8_t                       sibScale;
541   SIBBase                       sibBase;
542 
543   const struct OperandSpecifier *operands;
544 };
545 
546 /* decodeInstruction - Decode one instruction and store the decoding results in
547  *   a buffer provided by the consumer.
548  * @param insn      - The buffer to store the instruction in.  Allocated by the
549  *                    consumer.
550  * @param reader    - The byteReader_t for the bytes to be read.
551  * @param readerArg - An argument to pass to the reader for storing context
552  *                    specific to the consumer.  May be NULL.
553  * @param logger    - The dlog_t to be used in printing status messages from the
554  *                    disassembler.  May be NULL.
555  * @param loggerArg - An argument to pass to the logger for storing context
556  *                    specific to the logger.  May be NULL.
557  * @param startLoc  - The address (in the reader's address space) of the first
558  *                    byte in the instruction.
559  * @param mode      - The mode (16-bit, 32-bit, 64-bit) to decode in.
560  * @return          - Nonzero if there was an error during decode, 0 otherwise.
561  */
562 int decodeInstruction(struct InternalInstruction* insn,
563                       byteReader_t reader,
564                       const void* readerArg,
565                       dlog_t logger,
566                       void* loggerArg,
567                       const void* miiArg,
568                       uint64_t startLoc,
569                       DisassemblerMode mode);
570 
571 /* x86DisassemblerDebug - C-accessible function for printing a message to
572  *   debugs()
573  * @param file  - The name of the file printing the debug message.
574  * @param line  - The line number that printed the debug message.
575  * @param s     - The message to print.
576  */
577 
578 void x86DisassemblerDebug(const char *file,
579                           unsigned line,
580                           const char *s);
581 
582 const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii);
583 
584 #ifdef __cplusplus
585 }
586 #endif
587 
588 #endif
589