• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===*
2   *
3   *                     The LLVM Compiler Infrastructure
4   *
5   * This file is distributed under the University of Illinois Open Source
6   * License. See LICENSE.TXT for details.
7   *
8   *===----------------------------------------------------------------------===*
9   *
10   * This file is part of the X86 Disassembler.
11   * It contains the public interface of the instruction decoder.
12   * Documentation for the disassembler can be found in X86Disassembler.h.
13   *
14   *===----------------------------------------------------------------------===*/
15  
16  #ifndef X86DISASSEMBLERDECODER_H
17  #define X86DISASSEMBLERDECODER_H
18  
19  #ifdef __cplusplus
20  extern "C" {
21  #endif
22  
23  #define INSTRUCTION_SPECIFIER_FIELDS \
24    uint16_t operands;
25  
26  #define INSTRUCTION_IDS     \
27    uint16_t instructionIDs;
28  
29  #include "X86DisassemblerDecoderCommon.h"
30  
31  #undef INSTRUCTION_SPECIFIER_FIELDS
32  #undef INSTRUCTION_IDS
33  
34  /*
35   * Accessor functions for various fields of an Intel instruction
36   */
37  #define modFromModRM(modRM)  (((modRM) & 0xc0) >> 6)
38  #define regFromModRM(modRM)  (((modRM) & 0x38) >> 3)
39  #define rmFromModRM(modRM)   ((modRM) & 0x7)
40  #define scaleFromSIB(sib)    (((sib) & 0xc0) >> 6)
41  #define indexFromSIB(sib)    (((sib) & 0x38) >> 3)
42  #define baseFromSIB(sib)     ((sib) & 0x7)
43  #define wFromREX(rex)        (((rex) & 0x8) >> 3)
44  #define rFromREX(rex)        (((rex) & 0x4) >> 2)
45  #define xFromREX(rex)        (((rex) & 0x2) >> 1)
46  #define bFromREX(rex)        ((rex) & 0x1)
47  
48  #define rFromVEX2of3(vex)       (((~(vex)) & 0x80) >> 7)
49  #define xFromVEX2of3(vex)       (((~(vex)) & 0x40) >> 6)
50  #define bFromVEX2of3(vex)       (((~(vex)) & 0x20) >> 5)
51  #define mmmmmFromVEX2of3(vex)   ((vex) & 0x1f)
52  #define wFromVEX3of3(vex)       (((vex) & 0x80) >> 7)
53  #define vvvvFromVEX3of3(vex)    (((~(vex)) & 0x78) >> 3)
54  #define lFromVEX3of3(vex)       (((vex) & 0x4) >> 2)
55  #define ppFromVEX3of3(vex)      ((vex) & 0x3)
56  
57  #define rFromVEX2of2(vex)       (((~(vex)) & 0x80) >> 7)
58  #define vvvvFromVEX2of2(vex)    (((~(vex)) & 0x78) >> 3)
59  #define lFromVEX2of2(vex)       (((vex) & 0x4) >> 2)
60  #define ppFromVEX2of2(vex)      ((vex) & 0x3)
61  
62  /*
63   * These enums represent Intel registers for use by the decoder.
64   */
65  
66  #define REGS_8BIT     \
67    ENTRY(AL)           \
68    ENTRY(CL)           \
69    ENTRY(DL)           \
70    ENTRY(BL)           \
71    ENTRY(AH)           \
72    ENTRY(CH)           \
73    ENTRY(DH)           \
74    ENTRY(BH)           \
75    ENTRY(R8B)          \
76    ENTRY(R9B)          \
77    ENTRY(R10B)         \
78    ENTRY(R11B)         \
79    ENTRY(R12B)         \
80    ENTRY(R13B)         \
81    ENTRY(R14B)         \
82    ENTRY(R15B)         \
83    ENTRY(SPL)          \
84    ENTRY(BPL)          \
85    ENTRY(SIL)          \
86    ENTRY(DIL)
87  
88  #define EA_BASES_16BIT  \
89    ENTRY(BX_SI)          \
90    ENTRY(BX_DI)          \
91    ENTRY(BP_SI)          \
92    ENTRY(BP_DI)          \
93    ENTRY(SI)             \
94    ENTRY(DI)             \
95    ENTRY(BP)             \
96    ENTRY(BX)             \
97    ENTRY(R8W)            \
98    ENTRY(R9W)            \
99    ENTRY(R10W)           \
100    ENTRY(R11W)           \
101    ENTRY(R12W)           \
102    ENTRY(R13W)           \
103    ENTRY(R14W)           \
104    ENTRY(R15W)
105  
106  #define REGS_16BIT    \
107    ENTRY(AX)           \
108    ENTRY(CX)           \
109    ENTRY(DX)           \
110    ENTRY(BX)           \
111    ENTRY(SP)           \
112    ENTRY(BP)           \
113    ENTRY(SI)           \
114    ENTRY(DI)           \
115    ENTRY(R8W)          \
116    ENTRY(R9W)          \
117    ENTRY(R10W)         \
118    ENTRY(R11W)         \
119    ENTRY(R12W)         \
120    ENTRY(R13W)         \
121    ENTRY(R14W)         \
122    ENTRY(R15W)
123  
124  #define EA_BASES_32BIT  \
125    ENTRY(EAX)            \
126    ENTRY(ECX)            \
127    ENTRY(EDX)            \
128    ENTRY(EBX)            \
129    ENTRY(sib)            \
130    ENTRY(EBP)            \
131    ENTRY(ESI)            \
132    ENTRY(EDI)            \
133    ENTRY(R8D)            \
134    ENTRY(R9D)            \
135    ENTRY(R10D)           \
136    ENTRY(R11D)           \
137    ENTRY(R12D)           \
138    ENTRY(R13D)           \
139    ENTRY(R14D)           \
140    ENTRY(R15D)
141  
142  #define REGS_32BIT  \
143    ENTRY(EAX)        \
144    ENTRY(ECX)        \
145    ENTRY(EDX)        \
146    ENTRY(EBX)        \
147    ENTRY(ESP)        \
148    ENTRY(EBP)        \
149    ENTRY(ESI)        \
150    ENTRY(EDI)        \
151    ENTRY(R8D)        \
152    ENTRY(R9D)        \
153    ENTRY(R10D)       \
154    ENTRY(R11D)       \
155    ENTRY(R12D)       \
156    ENTRY(R13D)       \
157    ENTRY(R14D)       \
158    ENTRY(R15D)
159  
160  #define EA_BASES_64BIT  \
161    ENTRY(RAX)            \
162    ENTRY(RCX)            \
163    ENTRY(RDX)            \
164    ENTRY(RBX)            \
165    ENTRY(sib64)          \
166    ENTRY(RBP)            \
167    ENTRY(RSI)            \
168    ENTRY(RDI)            \
169    ENTRY(R8)             \
170    ENTRY(R9)             \
171    ENTRY(R10)            \
172    ENTRY(R11)            \
173    ENTRY(R12)            \
174    ENTRY(R13)            \
175    ENTRY(R14)            \
176    ENTRY(R15)
177  
178  #define REGS_64BIT  \
179    ENTRY(RAX)        \
180    ENTRY(RCX)        \
181    ENTRY(RDX)        \
182    ENTRY(RBX)        \
183    ENTRY(RSP)        \
184    ENTRY(RBP)        \
185    ENTRY(RSI)        \
186    ENTRY(RDI)        \
187    ENTRY(R8)         \
188    ENTRY(R9)         \
189    ENTRY(R10)        \
190    ENTRY(R11)        \
191    ENTRY(R12)        \
192    ENTRY(R13)        \
193    ENTRY(R14)        \
194    ENTRY(R15)
195  
196  #define REGS_MMX  \
197    ENTRY(MM0)      \
198    ENTRY(MM1)      \
199    ENTRY(MM2)      \
200    ENTRY(MM3)      \
201    ENTRY(MM4)      \
202    ENTRY(MM5)      \
203    ENTRY(MM6)      \
204    ENTRY(MM7)
205  
206  #define REGS_XMM  \
207    ENTRY(XMM0)     \
208    ENTRY(XMM1)     \
209    ENTRY(XMM2)     \
210    ENTRY(XMM3)     \
211    ENTRY(XMM4)     \
212    ENTRY(XMM5)     \
213    ENTRY(XMM6)     \
214    ENTRY(XMM7)     \
215    ENTRY(XMM8)     \
216    ENTRY(XMM9)     \
217    ENTRY(XMM10)    \
218    ENTRY(XMM11)    \
219    ENTRY(XMM12)    \
220    ENTRY(XMM13)    \
221    ENTRY(XMM14)    \
222    ENTRY(XMM15)
223  
224  #define REGS_YMM  \
225    ENTRY(YMM0)     \
226    ENTRY(YMM1)     \
227    ENTRY(YMM2)     \
228    ENTRY(YMM3)     \
229    ENTRY(YMM4)     \
230    ENTRY(YMM5)     \
231    ENTRY(YMM6)     \
232    ENTRY(YMM7)     \
233    ENTRY(YMM8)     \
234    ENTRY(YMM9)     \
235    ENTRY(YMM10)    \
236    ENTRY(YMM11)    \
237    ENTRY(YMM12)    \
238    ENTRY(YMM13)    \
239    ENTRY(YMM14)    \
240    ENTRY(YMM15)
241  
242  #define REGS_SEGMENT \
243    ENTRY(ES)          \
244    ENTRY(CS)          \
245    ENTRY(SS)          \
246    ENTRY(DS)          \
247    ENTRY(FS)          \
248    ENTRY(GS)
249  
250  #define REGS_DEBUG  \
251    ENTRY(DR0)        \
252    ENTRY(DR1)        \
253    ENTRY(DR2)        \
254    ENTRY(DR3)        \
255    ENTRY(DR4)        \
256    ENTRY(DR5)        \
257    ENTRY(DR6)        \
258    ENTRY(DR7)
259  
260  #define REGS_CONTROL  \
261    ENTRY(CR0)          \
262    ENTRY(CR1)          \
263    ENTRY(CR2)          \
264    ENTRY(CR3)          \
265    ENTRY(CR4)          \
266    ENTRY(CR5)          \
267    ENTRY(CR6)          \
268    ENTRY(CR7)          \
269    ENTRY(CR8)
270  
271  #define ALL_EA_BASES  \
272    EA_BASES_16BIT      \
273    EA_BASES_32BIT      \
274    EA_BASES_64BIT
275  
276  #define ALL_SIB_BASES \
277    REGS_32BIT          \
278    REGS_64BIT
279  
280  #define ALL_REGS      \
281    REGS_8BIT           \
282    REGS_16BIT          \
283    REGS_32BIT          \
284    REGS_64BIT          \
285    REGS_MMX            \
286    REGS_XMM            \
287    REGS_YMM            \
288    REGS_SEGMENT        \
289    REGS_DEBUG          \
290    REGS_CONTROL        \
291    ENTRY(RIP)
292  
293  /*
294   * EABase - All possible values of the base field for effective-address
295   *   computations, a.k.a. the Mod and R/M fields of the ModR/M byte.  We
296   *   distinguish between bases (EA_BASE_*) and registers that just happen to be
297   *   referred to when Mod == 0b11 (EA_REG_*).
298   */
299  typedef enum {
300    EA_BASE_NONE,
301  #define ENTRY(x) EA_BASE_##x,
302    ALL_EA_BASES
303  #undef ENTRY
304  #define ENTRY(x) EA_REG_##x,
305    ALL_REGS
306  #undef ENTRY
307    EA_max
308  } EABase;
309  
310  /*
311   * SIBIndex - All possible values of the SIB index field.
312   *   Borrows entries from ALL_EA_BASES with the special case that
313   *   sib is synonymous with NONE.
314   * Vector SIB: index can be XMM or YMM.
315   */
316  typedef enum {
317    SIB_INDEX_NONE,
318  #define ENTRY(x) SIB_INDEX_##x,
319    ALL_EA_BASES
320    REGS_XMM
321    REGS_YMM
322  #undef ENTRY
323    SIB_INDEX_max
324  } SIBIndex;
325  
326  /*
327   * SIBBase - All possible values of the SIB base field.
328   */
329  typedef enum {
330    SIB_BASE_NONE,
331  #define ENTRY(x) SIB_BASE_##x,
332    ALL_SIB_BASES
333  #undef ENTRY
334    SIB_BASE_max
335  } SIBBase;
336  
337  /*
338   * EADisplacement - Possible displacement types for effective-address
339   *   computations.
340   */
341  typedef enum {
342    EA_DISP_NONE,
343    EA_DISP_8,
344    EA_DISP_16,
345    EA_DISP_32
346  } EADisplacement;
347  
348  /*
349   * Reg - All possible values of the reg field in the ModR/M byte.
350   */
351  typedef enum {
352  #define ENTRY(x) MODRM_REG_##x,
353    ALL_REGS
354  #undef ENTRY
355    MODRM_REG_max
356  } Reg;
357  
358  /*
359   * SegmentOverride - All possible segment overrides.
360   */
361  typedef enum {
362    SEG_OVERRIDE_NONE,
363    SEG_OVERRIDE_CS,
364    SEG_OVERRIDE_SS,
365    SEG_OVERRIDE_DS,
366    SEG_OVERRIDE_ES,
367    SEG_OVERRIDE_FS,
368    SEG_OVERRIDE_GS,
369    SEG_OVERRIDE_max
370  } SegmentOverride;
371  
372  /*
373   * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
374   */
375  
376  typedef enum {
377    VEX_LOB_0F = 0x1,
378    VEX_LOB_0F38 = 0x2,
379    VEX_LOB_0F3A = 0x3
380  } VEXLeadingOpcodeByte;
381  
382  /*
383   * VEXPrefixCode - Possible values for the VEX.pp field
384   */
385  
386  typedef enum {
387    VEX_PREFIX_NONE = 0x0,
388    VEX_PREFIX_66 = 0x1,
389    VEX_PREFIX_F3 = 0x2,
390    VEX_PREFIX_F2 = 0x3
391  } VEXPrefixCode;
392  
393  typedef uint8_t BOOL;
394  
395  /*
396   * byteReader_t - Type for the byte reader that the consumer must provide to
397   *   the decoder.  Reads a single byte from the instruction's address space.
398   * @param arg     - A baton that the consumer can associate with any internal
399   *                  state that it needs.
400   * @param byte    - A pointer to a single byte in memory that should be set to
401   *                  contain the value at address.
402   * @param address - The address in the instruction's address space that should
403   *                  be read from.
404   * @return        - -1 if the byte cannot be read for any reason; 0 otherwise.
405   */
406  typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address);
407  
408  /*
409   * dlog_t - Type for the logging function that the consumer can provide to
410   *   get debugging output from the decoder.
411   * @param arg     - A baton that the consumer can associate with any internal
412   *                  state that it needs.
413   * @param log     - A string that contains the message.  Will be reused after
414   *                  the logger returns.
415   */
416  typedef void (*dlog_t)(void* arg, const char *log);
417  
418  /*
419   * The x86 internal instruction, which is produced by the decoder.
420   */
421  struct InternalInstruction {
422    /* Reader interface (C) */
423    byteReader_t reader;
424    /* Opaque value passed to the reader */
425    const void* readerArg;
426    /* The address of the next byte to read via the reader */
427    uint64_t readerCursor;
428  
429    /* Logger interface (C) */
430    dlog_t dlog;
431    /* Opaque value passed to the logger */
432    void* dlogArg;
433  
434    /* General instruction information */
435  
436    /* The mode to disassemble for (64-bit, protected, real) */
437    DisassemblerMode mode;
438    /* The start of the instruction, usable with the reader */
439    uint64_t startLocation;
440    /* The length of the instruction, in bytes */
441    size_t length;
442  
443    /* Prefix state */
444  
445    /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
446    uint8_t prefixPresent[0x100];
447    /* contains the location (for use with the reader) of the prefix byte */
448    uint64_t prefixLocations[0x100];
449    /* The value of the VEX prefix, if present */
450    uint8_t vexPrefix[3];
451    /* The length of the VEX prefix (0 if not present) */
452    uint8_t vexSize;
453    /* The value of the REX prefix, if present */
454    uint8_t rexPrefix;
455    /* The location where a mandatory prefix would have to be (i.e., right before
456       the opcode, or right before the REX prefix if one is present) */
457    uint64_t necessaryPrefixLocation;
458    /* The segment override type */
459    SegmentOverride segmentOverride;
460  
461    /* Sizes of various critical pieces of data, in bytes */
462    uint8_t registerSize;
463    uint8_t addressSize;
464    uint8_t displacementSize;
465    uint8_t immediateSize;
466  
467    /* Offsets from the start of the instruction to the pieces of data, which is
468       needed to find relocation entries for adding symbolic operands */
469    uint8_t displacementOffset;
470    uint8_t immediateOffset;
471  
472    /* opcode state */
473  
474    /* The value of the two-byte escape prefix (usually 0x0f) */
475    uint8_t twoByteEscape;
476    /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
477    uint8_t threeByteEscape;
478    /* The last byte of the opcode, not counting any ModR/M extension */
479    uint8_t opcode;
480    /* The ModR/M byte of the instruction, if it is an opcode extension */
481    uint8_t modRMExtension;
482  
483    /* decode state */
484  
485    /* The type of opcode, used for indexing into the array of decode tables */
486    OpcodeType opcodeType;
487    /* The instruction ID, extracted from the decode table */
488    uint16_t instructionID;
489    /* The specifier for the instruction, from the instruction info table */
490    const struct InstructionSpecifier *spec;
491  
492    /* state for additional bytes, consumed during operand decode.  Pattern:
493       consumed___ indicates that the byte was already consumed and does not
494       need to be consumed again */
495  
496    /* The VEX.vvvv field, which contains a third register operand for some AVX
497       instructions */
498    Reg                           vvvv;
499  
500    /* The ModR/M byte, which contains most register operands and some portion of
501       all memory operands */
502    BOOL                          consumedModRM;
503    uint8_t                       modRM;
504  
505    /* The SIB byte, used for more complex 32- or 64-bit memory operands */
506    BOOL                          consumedSIB;
507    uint8_t                       sib;
508  
509    /* The displacement, used for memory operands */
510    BOOL                          consumedDisplacement;
511    int32_t                       displacement;
512  
513    /* Immediates.  There can be two in some cases */
514    uint8_t                       numImmediatesConsumed;
515    uint8_t                       numImmediatesTranslated;
516    uint64_t                      immediates[2];
517  
518    /* A register or immediate operand encoded into the opcode */
519    BOOL                          consumedOpcodeModifier;
520    uint8_t                       opcodeModifier;
521    Reg                           opcodeRegister;
522  
523    /* Portions of the ModR/M byte */
524  
525    /* These fields determine the allowable values for the ModR/M fields, which
526       depend on operand and address widths */
527    EABase                        eaBaseBase;
528    EABase                        eaRegBase;
529    Reg                           regBase;
530  
531    /* The Mod and R/M fields can encode a base for an effective address, or a
532       register.  These are separated into two fields here */
533    EABase                        eaBase;
534    EADisplacement                eaDisplacement;
535    /* The reg field always encodes a register */
536    Reg                           reg;
537  
538    /* SIB state */
539    SIBIndex                      sibIndex;
540    uint8_t                       sibScale;
541    SIBBase                       sibBase;
542  
543    const struct OperandSpecifier *operands;
544  };
545  
546  /* decodeInstruction - Decode one instruction and store the decoding results in
547   *   a buffer provided by the consumer.
548   * @param insn      - The buffer to store the instruction in.  Allocated by the
549   *                    consumer.
550   * @param reader    - The byteReader_t for the bytes to be read.
551   * @param readerArg - An argument to pass to the reader for storing context
552   *                    specific to the consumer.  May be NULL.
553   * @param logger    - The dlog_t to be used in printing status messages from the
554   *                    disassembler.  May be NULL.
555   * @param loggerArg - An argument to pass to the logger for storing context
556   *                    specific to the logger.  May be NULL.
557   * @param startLoc  - The address (in the reader's address space) of the first
558   *                    byte in the instruction.
559   * @param mode      - The mode (16-bit, 32-bit, 64-bit) to decode in.
560   * @return          - Nonzero if there was an error during decode, 0 otherwise.
561   */
562  int decodeInstruction(struct InternalInstruction* insn,
563                        byteReader_t reader,
564                        const void* readerArg,
565                        dlog_t logger,
566                        void* loggerArg,
567                        const void* miiArg,
568                        uint64_t startLoc,
569                        DisassemblerMode mode);
570  
571  /* x86DisassemblerDebug - C-accessible function for printing a message to
572   *   debugs()
573   * @param file  - The name of the file printing the debug message.
574   * @param line  - The line number that printed the debug message.
575   * @param s     - The message to print.
576   */
577  
578  void x86DisassemblerDebug(const char *file,
579                            unsigned line,
580                            const char *s);
581  
582  const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii);
583  
584  #ifdef __cplusplus
585  }
586  #endif
587  
588  #endif
589