• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
2  *
3  *                     The LLVM Compiler Infrastructure
4  *
5  * This file is distributed under the University of Illinois Open Source
6  * License. See LICENSE.TXT for details.
7  *
8  *===----------------------------------------------------------------------===*
9  *
10  * This file is part of the X86 Disassembler.
11  * It contains common definitions used by both the disassembler and the table
12  *  generator.
13  * Documentation for the disassembler can be found in X86Disassembler.h.
14  *
15  *===----------------------------------------------------------------------===*/
16 
17 /*
18  * This header file provides those definitions that need to be shared between
19  * the decoder and the table generator in a C-friendly manner.
20  */
21 
22 #ifndef X86DISASSEMBLERDECODERCOMMON_H
23 #define X86DISASSEMBLERDECODERCOMMON_H
24 
25 #include "llvm/Support/DataTypes.h"
26 
27 #define INSTRUCTIONS_SYM  x86DisassemblerInstrSpecifiers
28 #define CONTEXTS_SYM      x86DisassemblerContexts
29 #define ONEBYTE_SYM       x86DisassemblerOneByteOpcodes
30 #define TWOBYTE_SYM       x86DisassemblerTwoByteOpcodes
31 #define THREEBYTE38_SYM   x86DisassemblerThreeByte38Opcodes
32 #define THREEBYTE3A_SYM   x86DisassemblerThreeByte3AOpcodes
33 #define THREEBYTEA6_SYM   x86DisassemblerThreeByteA6Opcodes
34 #define THREEBYTEA7_SYM   x86DisassemblerThreeByteA7Opcodes
35 
36 #define INSTRUCTIONS_STR  "x86DisassemblerInstrSpecifiers"
37 #define CONTEXTS_STR      "x86DisassemblerContexts"
38 #define ONEBYTE_STR       "x86DisassemblerOneByteOpcodes"
39 #define TWOBYTE_STR       "x86DisassemblerTwoByteOpcodes"
40 #define THREEBYTE38_STR   "x86DisassemblerThreeByte38Opcodes"
41 #define THREEBYTE3A_STR   "x86DisassemblerThreeByte3AOpcodes"
42 #define THREEBYTEA6_STR   "x86DisassemblerThreeByteA6Opcodes"
43 #define THREEBYTEA7_STR   "x86DisassemblerThreeByteA7Opcodes"
44 
45 /*
46  * Attributes of an instruction that must be known before the opcode can be
47  * processed correctly.  Most of these indicate the presence of particular
48  * prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
49  */
50 #define ATTRIBUTE_BITS          \
51   ENUM_ENTRY(ATTR_NONE,   0x00) \
52   ENUM_ENTRY(ATTR_64BIT,  0x01) \
53   ENUM_ENTRY(ATTR_XS,     0x02) \
54   ENUM_ENTRY(ATTR_XD,     0x04) \
55   ENUM_ENTRY(ATTR_REXW,   0x08) \
56   ENUM_ENTRY(ATTR_OPSIZE, 0x10) \
57   ENUM_ENTRY(ATTR_VEX,    0x20) \
58   ENUM_ENTRY(ATTR_VEXL,   0x40)
59 
60 #define ENUM_ENTRY(n, v) n = v,
61 enum attributeBits {
62   ATTRIBUTE_BITS
63   ATTR_max
64 };
65 #undef ENUM_ENTRY
66 
67 /*
68  * Combinations of the above attributes that are relevant to instruction
69  * decode.  Although other combinations are possible, they can be reduced to
70  * these without affecting the ultimately decoded instruction.
71  */
72 
73 /*           Class name           Rank  Rationale for rank assignment         */
74 #define INSTRUCTION_CONTEXTS                                                   \
75   ENUM_ENTRY(IC,                    0,  "says nothing about the instruction")  \
76   ENUM_ENTRY(IC_64BIT,              1,  "says the instruction applies in "     \
77                                         "64-bit mode but no more")             \
78   ENUM_ENTRY(IC_OPSIZE,             3,  "requires an OPSIZE prefix, so "       \
79                                         "operands change width")               \
80   ENUM_ENTRY(IC_XD,                 2,  "may say something about the opcode "  \
81                                         "but not the operands")                \
82   ENUM_ENTRY(IC_XS,                 2,  "may say something about the opcode "  \
83                                         "but not the operands")                \
84   ENUM_ENTRY(IC_64BIT_REXW,         4,  "requires a REX.W prefix, so operands "\
85                                         "change width; overrides IC_OPSIZE")   \
86   ENUM_ENTRY(IC_64BIT_OPSIZE,       3,  "Just as meaningful as IC_OPSIZE")     \
87   ENUM_ENTRY(IC_64BIT_XD,           5,  "XD instructions are SSE; REX.W is "   \
88                                         "secondary")                           \
89   ENUM_ENTRY(IC_64BIT_XS,           5,  "Just as meaningful as IC_64BIT_XD")   \
90   ENUM_ENTRY(IC_64BIT_REXW_XS,      6,  "OPSIZE could mean a different "       \
91                                         "opcode")                              \
92   ENUM_ENTRY(IC_64BIT_REXW_XD,      6,  "Just as meaningful as "               \
93                                         "IC_64BIT_REXW_XS")                    \
94   ENUM_ENTRY(IC_64BIT_REXW_OPSIZE,  7,  "The Dynamic Duo!  Prefer over all "   \
95                                         "else because this changes most "      \
96                                         "operands' meaning")                   \
97   ENUM_ENTRY(IC_VEX,                1,  "requires a VEX prefix")               \
98   ENUM_ENTRY(IC_VEX_XS,             2,  "requires VEX and the XS prefix")      \
99   ENUM_ENTRY(IC_VEX_XD,             2,  "requires VEX and the XD prefix")      \
100   ENUM_ENTRY(IC_VEX_OPSIZE,         2,  "requires VEX and the OpSize prefix")  \
101   ENUM_ENTRY(IC_VEX_W,              3,  "requires VEX and the W prefix")       \
102   ENUM_ENTRY(IC_VEX_W_XS,           4,  "requires VEX, W, and XS prefix")      \
103   ENUM_ENTRY(IC_VEX_W_XD,           4,  "requires VEX, W, and XD prefix")      \
104   ENUM_ENTRY(IC_VEX_W_OPSIZE,       4,  "requires VEX, W, and OpSize")         \
105   ENUM_ENTRY(IC_VEX_L,              3,  "requires VEX and the L prefix")       \
106   ENUM_ENTRY(IC_VEX_L_XS,           4,  "requires VEX and the L and XS prefix")\
107   ENUM_ENTRY(IC_VEX_L_XD,           4,  "requires VEX and the L and XS prefix")\
108   ENUM_ENTRY(IC_VEX_L_OPSIZE,       4,  "requires VEX, L, and OpSize")
109 
110 
111 #define ENUM_ENTRY(n, r, d) n,
112 typedef enum {
113   INSTRUCTION_CONTEXTS
114   IC_max
115 } InstructionContext;
116 #undef ENUM_ENTRY
117 
118 /*
119  * Opcode types, which determine which decode table to use, both in the Intel
120  * manual and also for the decoder.
121  */
122 typedef enum {
123   ONEBYTE       = 0,
124   TWOBYTE       = 1,
125   THREEBYTE_38  = 2,
126   THREEBYTE_3A  = 3,
127   THREEBYTE_A6  = 4,
128   THREEBYTE_A7  = 5
129 } OpcodeType;
130 
131 /*
132  * The following structs are used for the hierarchical decode table.  After
133  * determining the instruction's class (i.e., which IC_* constant applies to
134  * it), the decoder reads the opcode.  Some instructions require specific
135  * values of the ModR/M byte, so the ModR/M byte indexes into the final table.
136  *
137  * If a ModR/M byte is not required, "required" is left unset, and the values
138  * for each instructionID are identical.
139  */
140 
141 typedef uint16_t InstrUID;
142 
143 /*
144  * ModRMDecisionType - describes the type of ModR/M decision, allowing the
145  * consumer to determine the number of entries in it.
146  *
147  * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
148  *                  instruction is the same.
149  * MODRM_SPLITRM  - If the ModR/M byte is between 0x00 and 0xbf, the opcode
150  *                  corresponds to one instruction; otherwise, it corresponds to
151  *                  a different instruction.
152  * MODRM_FULL     - Potentially, each value of the ModR/M byte could correspond
153  *                  to a different instruction.
154  */
155 
156 #define MODRMTYPES            \
157   ENUM_ENTRY(MODRM_ONEENTRY)  \
158   ENUM_ENTRY(MODRM_SPLITRM)   \
159   ENUM_ENTRY(MODRM_FULL)
160 
161 #define ENUM_ENTRY(n) n,
162 typedef enum {
163   MODRMTYPES
164   MODRM_max
165 } ModRMDecisionType;
166 #undef ENUM_ENTRY
167 
168 /*
169  * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
170  *  instruction each possible value of the ModR/M byte corresponds to.  Once
171  *  this information is known, we have narrowed down to a single instruction.
172  */
173 struct ModRMDecision {
174   uint8_t     modrm_type;
175 
176   /* The macro below must be defined wherever this file is included. */
177   INSTRUCTION_IDS
178 };
179 
180 /*
181  * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
182  *   given a particular opcode.
183  */
184 struct OpcodeDecision {
185   struct ModRMDecision modRMDecisions[256];
186 };
187 
188 /*
189  * ContextDecision - Specifies which opcode->instruction tables to look at given
190  *   a particular context (set of attributes).  Since there are many possible
191  *   contexts, the decoder first uses CONTEXTS_SYM to determine which context
192  *   applies given a specific set of attributes.  Hence there are only IC_max
193  *   entries in this table, rather than 2^(ATTR_max).
194  */
195 struct ContextDecision {
196   struct OpcodeDecision opcodeDecisions[IC_max];
197 };
198 
199 /*
200  * Physical encodings of instruction operands.
201  */
202 
203 #define ENCODINGS                                                              \
204   ENUM_ENTRY(ENCODING_NONE,   "")                                              \
205   ENUM_ENTRY(ENCODING_REG,    "Register operand in ModR/M byte.")              \
206   ENUM_ENTRY(ENCODING_RM,     "R/M operand in ModR/M byte.")                   \
207   ENUM_ENTRY(ENCODING_VVVV,   "Register operand in VEX.vvvv byte.")            \
208   ENUM_ENTRY(ENCODING_CB,     "1-byte code offset (possible new CS value)")    \
209   ENUM_ENTRY(ENCODING_CW,     "2-byte")                                        \
210   ENUM_ENTRY(ENCODING_CD,     "4-byte")                                        \
211   ENUM_ENTRY(ENCODING_CP,     "6-byte")                                        \
212   ENUM_ENTRY(ENCODING_CO,     "8-byte")                                        \
213   ENUM_ENTRY(ENCODING_CT,     "10-byte")                                       \
214   ENUM_ENTRY(ENCODING_IB,     "1-byte immediate")                              \
215   ENUM_ENTRY(ENCODING_IW,     "2-byte")                                        \
216   ENUM_ENTRY(ENCODING_ID,     "4-byte")                                        \
217   ENUM_ENTRY(ENCODING_IO,     "8-byte")                                        \
218   ENUM_ENTRY(ENCODING_RB,     "(AL..DIL, R8L..R15L) Register code added to "   \
219                               "the opcode byte")                               \
220   ENUM_ENTRY(ENCODING_RW,     "(AX..DI, R8W..R15W)")                           \
221   ENUM_ENTRY(ENCODING_RD,     "(EAX..EDI, R8D..R15D)")                         \
222   ENUM_ENTRY(ENCODING_RO,     "(RAX..RDI, R8..R15)")                           \
223   ENUM_ENTRY(ENCODING_I,      "Position on floating-point stack added to the " \
224                               "opcode byte")                                   \
225                                                                                \
226   ENUM_ENTRY(ENCODING_Iv,     "Immediate of operand size")                     \
227   ENUM_ENTRY(ENCODING_Ia,     "Immediate of address size")                     \
228   ENUM_ENTRY(ENCODING_Rv,     "Register code of operand size added to the "    \
229                               "opcode byte")                                   \
230   ENUM_ENTRY(ENCODING_DUP,    "Duplicate of another operand; ID is encoded "   \
231                               "in type")
232 
233 #define ENUM_ENTRY(n, d) n,
234   typedef enum {
235     ENCODINGS
236     ENCODING_max
237   } OperandEncoding;
238 #undef ENUM_ENTRY
239 
240 /*
241  * Semantic interpretations of instruction operands.
242  */
243 
244 #define TYPES                                                                  \
245   ENUM_ENTRY(TYPE_NONE,       "")                                              \
246   ENUM_ENTRY(TYPE_REL8,       "1-byte immediate address")                      \
247   ENUM_ENTRY(TYPE_REL16,      "2-byte")                                        \
248   ENUM_ENTRY(TYPE_REL32,      "4-byte")                                        \
249   ENUM_ENTRY(TYPE_REL64,      "8-byte")                                        \
250   ENUM_ENTRY(TYPE_PTR1616,    "2+2-byte segment+offset address")               \
251   ENUM_ENTRY(TYPE_PTR1632,    "2+4-byte")                                      \
252   ENUM_ENTRY(TYPE_PTR1664,    "2+8-byte")                                      \
253   ENUM_ENTRY(TYPE_R8,         "1-byte register operand")                       \
254   ENUM_ENTRY(TYPE_R16,        "2-byte")                                        \
255   ENUM_ENTRY(TYPE_R32,        "4-byte")                                        \
256   ENUM_ENTRY(TYPE_R64,        "8-byte")                                        \
257   ENUM_ENTRY(TYPE_IMM8,       "1-byte immediate operand")                      \
258   ENUM_ENTRY(TYPE_IMM16,      "2-byte")                                        \
259   ENUM_ENTRY(TYPE_IMM32,      "4-byte")                                        \
260   ENUM_ENTRY(TYPE_IMM64,      "8-byte")                                        \
261   ENUM_ENTRY(TYPE_IMM3,       "1-byte immediate operand between 0 and 7")      \
262   ENUM_ENTRY(TYPE_RM8,        "1-byte register or memory operand")             \
263   ENUM_ENTRY(TYPE_RM16,       "2-byte")                                        \
264   ENUM_ENTRY(TYPE_RM32,       "4-byte")                                        \
265   ENUM_ENTRY(TYPE_RM64,       "8-byte")                                        \
266   ENUM_ENTRY(TYPE_M,          "Memory operand")                                \
267   ENUM_ENTRY(TYPE_M8,         "1-byte")                                        \
268   ENUM_ENTRY(TYPE_M16,        "2-byte")                                        \
269   ENUM_ENTRY(TYPE_M32,        "4-byte")                                        \
270   ENUM_ENTRY(TYPE_M64,        "8-byte")                                        \
271   ENUM_ENTRY(TYPE_LEA,        "Effective address")                             \
272   ENUM_ENTRY(TYPE_M128,       "16-byte (SSE/SSE2)")                            \
273   ENUM_ENTRY(TYPE_M256,       "256-byte (AVX)")                                \
274   ENUM_ENTRY(TYPE_M1616,      "2+2-byte segment+offset address")               \
275   ENUM_ENTRY(TYPE_M1632,      "2+4-byte")                                      \
276   ENUM_ENTRY(TYPE_M1664,      "2+8-byte")                                      \
277   ENUM_ENTRY(TYPE_M16_32,     "2+4-byte two-part memory operand (LIDT, LGDT)") \
278   ENUM_ENTRY(TYPE_M16_16,     "2+2-byte (BOUND)")                              \
279   ENUM_ENTRY(TYPE_M32_32,     "4+4-byte (BOUND)")                              \
280   ENUM_ENTRY(TYPE_M16_64,     "2+8-byte (LIDT, LGDT)")                         \
281   ENUM_ENTRY(TYPE_MOFFS8,     "1-byte memory offset (relative to segment "     \
282                               "base)")                                         \
283   ENUM_ENTRY(TYPE_MOFFS16,    "2-byte")                                        \
284   ENUM_ENTRY(TYPE_MOFFS32,    "4-byte")                                        \
285   ENUM_ENTRY(TYPE_MOFFS64,    "8-byte")                                        \
286   ENUM_ENTRY(TYPE_SREG,       "Byte with single bit set: 0 = ES, 1 = CS, "     \
287                               "2 = SS, 3 = DS, 4 = FS, 5 = GS")                \
288   ENUM_ENTRY(TYPE_M32FP,      "32-bit IEE754 memory floating-point operand")   \
289   ENUM_ENTRY(TYPE_M64FP,      "64-bit")                                        \
290   ENUM_ENTRY(TYPE_M80FP,      "80-bit extended")                               \
291   ENUM_ENTRY(TYPE_M16INT,     "2-byte memory integer operand for use in "      \
292                               "floating-point instructions")                   \
293   ENUM_ENTRY(TYPE_M32INT,     "4-byte")                                        \
294   ENUM_ENTRY(TYPE_M64INT,     "8-byte")                                        \
295   ENUM_ENTRY(TYPE_ST,         "Position on the floating-point stack")          \
296   ENUM_ENTRY(TYPE_MM,         "MMX register operand")                          \
297   ENUM_ENTRY(TYPE_MM32,       "4-byte MMX register or memory operand")         \
298   ENUM_ENTRY(TYPE_MM64,       "8-byte")                                        \
299   ENUM_ENTRY(TYPE_XMM,        "XMM register operand")                          \
300   ENUM_ENTRY(TYPE_XMM32,      "4-byte XMM register or memory operand")         \
301   ENUM_ENTRY(TYPE_XMM64,      "8-byte")                                        \
302   ENUM_ENTRY(TYPE_XMM128,     "16-byte")                                       \
303   ENUM_ENTRY(TYPE_XMM256,     "32-byte")                                       \
304   ENUM_ENTRY(TYPE_XMM0,       "Implicit use of XMM0")                          \
305   ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand")                      \
306   ENUM_ENTRY(TYPE_DEBUGREG,   "Debug register operand")                        \
307   ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand")                      \
308                                                                                \
309   ENUM_ENTRY(TYPE_Mv,         "Memory operand of operand size")                \
310   ENUM_ENTRY(TYPE_Rv,         "Register operand of operand size")              \
311   ENUM_ENTRY(TYPE_IMMv,       "Immediate operand of operand size")             \
312   ENUM_ENTRY(TYPE_RELv,       "Immediate address of operand size")             \
313   ENUM_ENTRY(TYPE_DUP0,       "Duplicate of operand 0")                        \
314   ENUM_ENTRY(TYPE_DUP1,       "operand 1")                                     \
315   ENUM_ENTRY(TYPE_DUP2,       "operand 2")                                     \
316   ENUM_ENTRY(TYPE_DUP3,       "operand 3")                                     \
317   ENUM_ENTRY(TYPE_DUP4,       "operand 4")                                     \
318   ENUM_ENTRY(TYPE_M512,       "512-bit FPU/MMX/XMM/MXCSR state")
319 
320 #define ENUM_ENTRY(n, d) n,
321 typedef enum {
322   TYPES
323   TYPE_max
324 } OperandType;
325 #undef ENUM_ENTRY
326 
327 /*
328  * OperandSpecifier - The specification for how to extract and interpret one
329  *   operand.
330  */
331 struct OperandSpecifier {
332   OperandEncoding  encoding;
333   OperandType      type;
334 };
335 
336 /*
337  * Indicates where the opcode modifier (if any) is to be found.  Extended
338  * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
339  */
340 
341 #define MODIFIER_TYPES        \
342   ENUM_ENTRY(MODIFIER_NONE)   \
343   ENUM_ENTRY(MODIFIER_OPCODE) \
344   ENUM_ENTRY(MODIFIER_MODRM)
345 
346 #define ENUM_ENTRY(n) n,
347 typedef enum {
348   MODIFIER_TYPES
349   MODIFIER_max
350 } ModifierType;
351 #undef ENUM_ENTRY
352 
353 #define X86_MAX_OPERANDS 5
354 
355 /*
356  * The specification for how to extract and interpret a full instruction and
357  * its operands.
358  */
359 struct InstructionSpecifier {
360   ModifierType modifierType;
361   uint8_t modifierBase;
362   struct OperandSpecifier operands[X86_MAX_OPERANDS];
363 
364   /* The macro below must be defined wherever this file is included. */
365   INSTRUCTION_SPECIFIER_FIELDS
366 };
367 
368 /*
369  * Decoding mode for the Intel disassembler.  16-bit, 32-bit, and 64-bit mode
370  * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
371  * respectively.
372  */
373 typedef enum {
374   MODE_16BIT,
375   MODE_32BIT,
376   MODE_64BIT
377 } DisassemblerMode;
378 
379 #endif
380