• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
2  *
3  *                     The LLVM Compiler Infrastructure
4  *
5  * This file is distributed under the University of Illinois Open Source
6  * License. See LICENSE.TXT for details.
7  *
8  *===----------------------------------------------------------------------===*
9  *
10  * This file is part of the X86 Disassembler.
11  * It contains the implementation of the instruction decoder.
12  * Documentation for the disassembler can be found in X86Disassembler.h.
13  *
14  *===----------------------------------------------------------------------===*/
15 
16 #include <stdarg.h>   /* for va_*()       */
17 #include <stdio.h>    /* for vsnprintf()  */
18 #include <stdlib.h>   /* for exit()       */
19 #include <string.h>   /* for memset()     */
20 
21 #include "X86DisassemblerDecoder.h"
22 
23 #include "X86GenDisassemblerTables.inc"
24 
25 #define TRUE  1
26 #define FALSE 0
27 
28 typedef int8_t bool;
29 
30 #ifndef NDEBUG
31 #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
32 #else
33 #define debug(s) do { } while (0)
34 #endif
35 
36 
37 /*
38  * contextForAttrs - Client for the instruction context table.  Takes a set of
39  *   attributes and returns the appropriate decode context.
40  *
41  * @param attrMask  - Attributes, from the enumeration attributeBits.
42  * @return          - The InstructionContext to use when looking up an
43  *                    an instruction with these attributes.
44  */
contextForAttrs(uint8_t attrMask)45 static InstructionContext contextForAttrs(uint8_t attrMask) {
46   return CONTEXTS_SYM[attrMask];
47 }
48 
49 /*
50  * modRMRequired - Reads the appropriate instruction table to determine whether
51  *   the ModR/M byte is required to decode a particular instruction.
52  *
53  * @param type        - The opcode type (i.e., how many bytes it has).
54  * @param insnContext - The context for the instruction, as returned by
55  *                      contextForAttrs.
56  * @param opcode      - The last byte of the instruction's opcode, not counting
57  *                      ModR/M extensions and escapes.
58  * @return            - TRUE if the ModR/M byte is required, FALSE otherwise.
59  */
modRMRequired(OpcodeType type,InstructionContext insnContext,uint8_t opcode)60 static int modRMRequired(OpcodeType type,
61                          InstructionContext insnContext,
62                          uint8_t opcode) {
63   const struct ContextDecision* decision = 0;
64 
65   switch (type) {
66   case ONEBYTE:
67     decision = &ONEBYTE_SYM;
68     break;
69   case TWOBYTE:
70     decision = &TWOBYTE_SYM;
71     break;
72   case THREEBYTE_38:
73     decision = &THREEBYTE38_SYM;
74     break;
75   case THREEBYTE_3A:
76     decision = &THREEBYTE3A_SYM;
77     break;
78   case THREEBYTE_A6:
79     decision = &THREEBYTEA6_SYM;
80     break;
81   case THREEBYTE_A7:
82     decision = &THREEBYTEA7_SYM;
83     break;
84   }
85 
86   return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
87     modrm_type != MODRM_ONEENTRY;
88 }
89 
90 /*
91  * decode - Reads the appropriate instruction table to obtain the unique ID of
92  *   an instruction.
93  *
94  * @param type        - See modRMRequired().
95  * @param insnContext - See modRMRequired().
96  * @param opcode      - See modRMRequired().
97  * @param modRM       - The ModR/M byte if required, or any value if not.
98  * @return            - The UID of the instruction, or 0 on failure.
99  */
decode(OpcodeType type,InstructionContext insnContext,uint8_t opcode,uint8_t modRM)100 static InstrUID decode(OpcodeType type,
101                        InstructionContext insnContext,
102                        uint8_t opcode,
103                        uint8_t modRM) {
104   const struct ModRMDecision* dec = 0;
105 
106   switch (type) {
107   case ONEBYTE:
108     dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
109     break;
110   case TWOBYTE:
111     dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
112     break;
113   case THREEBYTE_38:
114     dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
115     break;
116   case THREEBYTE_3A:
117     dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
118     break;
119   case THREEBYTE_A6:
120     dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
121     break;
122   case THREEBYTE_A7:
123     dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
124     break;
125   }
126 
127   switch (dec->modrm_type) {
128   default:
129     debug("Corrupt table!  Unknown modrm_type");
130     return 0;
131   case MODRM_ONEENTRY:
132     return modRMTable[dec->instructionIDs];
133   case MODRM_SPLITRM:
134     if (modFromModRM(modRM) == 0x3)
135       return modRMTable[dec->instructionIDs+1];
136     return modRMTable[dec->instructionIDs];
137   case MODRM_SPLITREG:
138     if (modFromModRM(modRM) == 0x3)
139       return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
140     return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
141   case MODRM_SPLITMISC:
142     if (modFromModRM(modRM) == 0x3)
143       return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
144     return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
145   case MODRM_FULL:
146     return modRMTable[dec->instructionIDs+modRM];
147   }
148 }
149 
150 /*
151  * specifierForUID - Given a UID, returns the name and operand specification for
152  *   that instruction.
153  *
154  * @param uid - The unique ID for the instruction.  This should be returned by
155  *              decode(); specifierForUID will not check bounds.
156  * @return    - A pointer to the specification for that instruction.
157  */
specifierForUID(InstrUID uid)158 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
159   return &INSTRUCTIONS_SYM[uid];
160 }
161 
162 /*
163  * consumeByte - Uses the reader function provided by the user to consume one
164  *   byte from the instruction's memory and advance the cursor.
165  *
166  * @param insn  - The instruction with the reader function to use.  The cursor
167  *                for this instruction is advanced.
168  * @param byte  - A pointer to a pre-allocated memory buffer to be populated
169  *                with the data read.
170  * @return      - 0 if the read was successful; nonzero otherwise.
171  */
consumeByte(struct InternalInstruction * insn,uint8_t * byte)172 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
173   int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
174 
175   if (!ret)
176     ++(insn->readerCursor);
177 
178   return ret;
179 }
180 
181 /*
182  * lookAtByte - Like consumeByte, but does not advance the cursor.
183  *
184  * @param insn  - See consumeByte().
185  * @param byte  - See consumeByte().
186  * @return      - See consumeByte().
187  */
lookAtByte(struct InternalInstruction * insn,uint8_t * byte)188 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
189   return insn->reader(insn->readerArg, byte, insn->readerCursor);
190 }
191 
unconsumeByte(struct InternalInstruction * insn)192 static void unconsumeByte(struct InternalInstruction* insn) {
193   insn->readerCursor--;
194 }
195 
196 #define CONSUME_FUNC(name, type)                                  \
197   static int name(struct InternalInstruction* insn, type* ptr) {  \
198     type combined = 0;                                            \
199     unsigned offset;                                              \
200     for (offset = 0; offset < sizeof(type); ++offset) {           \
201       uint8_t byte;                                               \
202       int ret = insn->reader(insn->readerArg,                     \
203                              &byte,                               \
204                              insn->readerCursor + offset);        \
205       if (ret)                                                    \
206         return ret;                                               \
207       combined = combined | ((uint64_t)byte << (offset * 8));     \
208     }                                                             \
209     *ptr = combined;                                              \
210     insn->readerCursor += sizeof(type);                           \
211     return 0;                                                     \
212   }
213 
214 /*
215  * consume* - Use the reader function provided by the user to consume data
216  *   values of various sizes from the instruction's memory and advance the
217  *   cursor appropriately.  These readers perform endian conversion.
218  *
219  * @param insn    - See consumeByte().
220  * @param ptr     - A pointer to a pre-allocated memory of appropriate size to
221  *                  be populated with the data read.
222  * @return        - See consumeByte().
223  */
CONSUME_FUNC(consumeInt8,int8_t)224 CONSUME_FUNC(consumeInt8, int8_t)
225 CONSUME_FUNC(consumeInt16, int16_t)
226 CONSUME_FUNC(consumeInt32, int32_t)
227 CONSUME_FUNC(consumeUInt16, uint16_t)
228 CONSUME_FUNC(consumeUInt32, uint32_t)
229 CONSUME_FUNC(consumeUInt64, uint64_t)
230 
231 /*
232  * dbgprintf - Uses the logging function provided by the user to log a single
233  *   message, typically without a carriage-return.
234  *
235  * @param insn    - The instruction containing the logging function.
236  * @param format  - See printf().
237  * @param ...     - See printf().
238  */
239 static void dbgprintf(struct InternalInstruction* insn,
240                       const char* format,
241                       ...) {
242   char buffer[256];
243   va_list ap;
244 
245   if (!insn->dlog)
246     return;
247 
248   va_start(ap, format);
249   (void)vsnprintf(buffer, sizeof(buffer), format, ap);
250   va_end(ap);
251 
252   insn->dlog(insn->dlogArg, buffer);
253 
254   return;
255 }
256 
257 /*
258  * setPrefixPresent - Marks that a particular prefix is present at a particular
259  *   location.
260  *
261  * @param insn      - The instruction to be marked as having the prefix.
262  * @param prefix    - The prefix that is present.
263  * @param location  - The location where the prefix is located (in the address
264  *                    space of the instruction's reader).
265  */
setPrefixPresent(struct InternalInstruction * insn,uint8_t prefix,uint64_t location)266 static void setPrefixPresent(struct InternalInstruction* insn,
267                                     uint8_t prefix,
268                                     uint64_t location)
269 {
270   insn->prefixPresent[prefix] = 1;
271   insn->prefixLocations[prefix] = location;
272 }
273 
274 /*
275  * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
276  *   present at a given location.
277  *
278  * @param insn      - The instruction to be queried.
279  * @param prefix    - The prefix.
280  * @param location  - The location to query.
281  * @return          - Whether the prefix is at that location.
282  */
isPrefixAtLocation(struct InternalInstruction * insn,uint8_t prefix,uint64_t location)283 static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
284                                uint8_t prefix,
285                                uint64_t location)
286 {
287   if (insn->prefixPresent[prefix] == 1 &&
288      insn->prefixLocations[prefix] == location)
289     return TRUE;
290   else
291     return FALSE;
292 }
293 
294 /*
295  * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
296  *   instruction as having them.  Also sets the instruction's default operand,
297  *   address, and other relevant data sizes to report operands correctly.
298  *
299  * @param insn  - The instruction whose prefixes are to be read.
300  * @return      - 0 if the instruction could be read until the end of the prefix
301  *                bytes, and no prefixes conflicted; nonzero otherwise.
302  */
readPrefixes(struct InternalInstruction * insn)303 static int readPrefixes(struct InternalInstruction* insn) {
304   BOOL isPrefix = TRUE;
305   BOOL prefixGroups[4] = { FALSE };
306   uint64_t prefixLocation;
307   uint8_t byte = 0;
308 
309   BOOL hasAdSize = FALSE;
310   BOOL hasOpSize = FALSE;
311 
312   dbgprintf(insn, "readPrefixes()");
313 
314   while (isPrefix) {
315     prefixLocation = insn->readerCursor;
316 
317     if (consumeByte(insn, &byte))
318       return -1;
319 
320     /*
321      * If the first byte is a LOCK prefix break and let it be disassembled
322      * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>.
323      * FIXME there is currently no way to get the disassembler to print the
324      * lock prefix if it is not the first byte.
325      */
326     if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
327       break;
328 
329     switch (byte) {
330     case 0xf0:  /* LOCK */
331     case 0xf2:  /* REPNE/REPNZ */
332     case 0xf3:  /* REP or REPE/REPZ */
333       if (prefixGroups[0])
334         dbgprintf(insn, "Redundant Group 1 prefix");
335       prefixGroups[0] = TRUE;
336       setPrefixPresent(insn, byte, prefixLocation);
337       break;
338     case 0x2e:  /* CS segment override -OR- Branch not taken */
339     case 0x36:  /* SS segment override -OR- Branch taken */
340     case 0x3e:  /* DS segment override */
341     case 0x26:  /* ES segment override */
342     case 0x64:  /* FS segment override */
343     case 0x65:  /* GS segment override */
344       switch (byte) {
345       case 0x2e:
346         insn->segmentOverride = SEG_OVERRIDE_CS;
347         break;
348       case 0x36:
349         insn->segmentOverride = SEG_OVERRIDE_SS;
350         break;
351       case 0x3e:
352         insn->segmentOverride = SEG_OVERRIDE_DS;
353         break;
354       case 0x26:
355         insn->segmentOverride = SEG_OVERRIDE_ES;
356         break;
357       case 0x64:
358         insn->segmentOverride = SEG_OVERRIDE_FS;
359         break;
360       case 0x65:
361         insn->segmentOverride = SEG_OVERRIDE_GS;
362         break;
363       default:
364         debug("Unhandled override");
365         return -1;
366       }
367       if (prefixGroups[1])
368         dbgprintf(insn, "Redundant Group 2 prefix");
369       prefixGroups[1] = TRUE;
370       setPrefixPresent(insn, byte, prefixLocation);
371       break;
372     case 0x66:  /* Operand-size override */
373       if (prefixGroups[2])
374         dbgprintf(insn, "Redundant Group 3 prefix");
375       prefixGroups[2] = TRUE;
376       hasOpSize = TRUE;
377       setPrefixPresent(insn, byte, prefixLocation);
378       break;
379     case 0x67:  /* Address-size override */
380       if (prefixGroups[3])
381         dbgprintf(insn, "Redundant Group 4 prefix");
382       prefixGroups[3] = TRUE;
383       hasAdSize = TRUE;
384       setPrefixPresent(insn, byte, prefixLocation);
385       break;
386     default:    /* Not a prefix byte */
387       isPrefix = FALSE;
388       break;
389     }
390 
391     if (isPrefix)
392       dbgprintf(insn, "Found prefix 0x%hhx", byte);
393   }
394 
395   insn->vexSize = 0;
396 
397   if (byte == 0xc4) {
398     uint8_t byte1;
399 
400     if (lookAtByte(insn, &byte1)) {
401       dbgprintf(insn, "Couldn't read second byte of VEX");
402       return -1;
403     }
404 
405     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
406       insn->vexSize = 3;
407       insn->necessaryPrefixLocation = insn->readerCursor - 1;
408     }
409     else {
410       unconsumeByte(insn);
411       insn->necessaryPrefixLocation = insn->readerCursor - 1;
412     }
413 
414     if (insn->vexSize == 3) {
415       insn->vexPrefix[0] = byte;
416       consumeByte(insn, &insn->vexPrefix[1]);
417       consumeByte(insn, &insn->vexPrefix[2]);
418 
419       /* We simulate the REX prefix for simplicity's sake */
420 
421       if (insn->mode == MODE_64BIT) {
422         insn->rexPrefix = 0x40
423                         | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
424                         | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
425                         | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
426                         | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
427       }
428 
429       switch (ppFromVEX3of3(insn->vexPrefix[2]))
430       {
431       default:
432         break;
433       case VEX_PREFIX_66:
434         hasOpSize = TRUE;
435         break;
436       }
437 
438       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
439     }
440   }
441   else if (byte == 0xc5) {
442     uint8_t byte1;
443 
444     if (lookAtByte(insn, &byte1)) {
445       dbgprintf(insn, "Couldn't read second byte of VEX");
446       return -1;
447     }
448 
449     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
450       insn->vexSize = 2;
451     }
452     else {
453       unconsumeByte(insn);
454     }
455 
456     if (insn->vexSize == 2) {
457       insn->vexPrefix[0] = byte;
458       consumeByte(insn, &insn->vexPrefix[1]);
459 
460       if (insn->mode == MODE_64BIT) {
461         insn->rexPrefix = 0x40
462                         | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
463       }
464 
465       switch (ppFromVEX2of2(insn->vexPrefix[1]))
466       {
467       default:
468         break;
469       case VEX_PREFIX_66:
470         hasOpSize = TRUE;
471         break;
472       }
473 
474       dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
475     }
476   }
477   else {
478     if (insn->mode == MODE_64BIT) {
479       if ((byte & 0xf0) == 0x40) {
480         uint8_t opcodeByte;
481 
482         if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
483           dbgprintf(insn, "Redundant REX prefix");
484           return -1;
485         }
486 
487         insn->rexPrefix = byte;
488         insn->necessaryPrefixLocation = insn->readerCursor - 2;
489 
490         dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
491       } else {
492         unconsumeByte(insn);
493         insn->necessaryPrefixLocation = insn->readerCursor - 1;
494       }
495     } else {
496       unconsumeByte(insn);
497       insn->necessaryPrefixLocation = insn->readerCursor - 1;
498     }
499   }
500 
501   if (insn->mode == MODE_16BIT) {
502     insn->registerSize       = (hasOpSize ? 4 : 2);
503     insn->addressSize        = (hasAdSize ? 4 : 2);
504     insn->displacementSize   = (hasAdSize ? 4 : 2);
505     insn->immediateSize      = (hasOpSize ? 4 : 2);
506   } else if (insn->mode == MODE_32BIT) {
507     insn->registerSize       = (hasOpSize ? 2 : 4);
508     insn->addressSize        = (hasAdSize ? 2 : 4);
509     insn->displacementSize   = (hasAdSize ? 2 : 4);
510     insn->immediateSize      = (hasOpSize ? 2 : 4);
511   } else if (insn->mode == MODE_64BIT) {
512     if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
513       insn->registerSize       = 8;
514       insn->addressSize        = (hasAdSize ? 4 : 8);
515       insn->displacementSize   = 4;
516       insn->immediateSize      = 4;
517     } else if (insn->rexPrefix) {
518       insn->registerSize       = (hasOpSize ? 2 : 4);
519       insn->addressSize        = (hasAdSize ? 4 : 8);
520       insn->displacementSize   = (hasOpSize ? 2 : 4);
521       insn->immediateSize      = (hasOpSize ? 2 : 4);
522     } else {
523       insn->registerSize       = (hasOpSize ? 2 : 4);
524       insn->addressSize        = (hasAdSize ? 4 : 8);
525       insn->displacementSize   = (hasOpSize ? 2 : 4);
526       insn->immediateSize      = (hasOpSize ? 2 : 4);
527     }
528   }
529 
530   return 0;
531 }
532 
533 /*
534  * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
535  *   extended or escape opcodes).
536  *
537  * @param insn  - The instruction whose opcode is to be read.
538  * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
539  */
readOpcode(struct InternalInstruction * insn)540 static int readOpcode(struct InternalInstruction* insn) {
541   /* Determine the length of the primary opcode */
542 
543   uint8_t current;
544 
545   dbgprintf(insn, "readOpcode()");
546 
547   insn->opcodeType = ONEBYTE;
548 
549   if (insn->vexSize == 3)
550   {
551     switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
552     {
553     default:
554       dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
555       return -1;
556     case 0:
557       break;
558     case VEX_LOB_0F:
559       insn->twoByteEscape = 0x0f;
560       insn->opcodeType = TWOBYTE;
561       return consumeByte(insn, &insn->opcode);
562     case VEX_LOB_0F38:
563       insn->twoByteEscape = 0x0f;
564       insn->threeByteEscape = 0x38;
565       insn->opcodeType = THREEBYTE_38;
566       return consumeByte(insn, &insn->opcode);
567     case VEX_LOB_0F3A:
568       insn->twoByteEscape = 0x0f;
569       insn->threeByteEscape = 0x3a;
570       insn->opcodeType = THREEBYTE_3A;
571       return consumeByte(insn, &insn->opcode);
572     }
573   }
574   else if (insn->vexSize == 2)
575   {
576     insn->twoByteEscape = 0x0f;
577     insn->opcodeType = TWOBYTE;
578     return consumeByte(insn, &insn->opcode);
579   }
580 
581   if (consumeByte(insn, &current))
582     return -1;
583 
584   if (current == 0x0f) {
585     dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
586 
587     insn->twoByteEscape = current;
588 
589     if (consumeByte(insn, &current))
590       return -1;
591 
592     if (current == 0x38) {
593       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
594 
595       insn->threeByteEscape = current;
596 
597       if (consumeByte(insn, &current))
598         return -1;
599 
600       insn->opcodeType = THREEBYTE_38;
601     } else if (current == 0x3a) {
602       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
603 
604       insn->threeByteEscape = current;
605 
606       if (consumeByte(insn, &current))
607         return -1;
608 
609       insn->opcodeType = THREEBYTE_3A;
610     } else if (current == 0xa6) {
611       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
612 
613       insn->threeByteEscape = current;
614 
615       if (consumeByte(insn, &current))
616         return -1;
617 
618       insn->opcodeType = THREEBYTE_A6;
619     } else if (current == 0xa7) {
620       dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
621 
622       insn->threeByteEscape = current;
623 
624       if (consumeByte(insn, &current))
625         return -1;
626 
627       insn->opcodeType = THREEBYTE_A7;
628     } else {
629       dbgprintf(insn, "Didn't find a three-byte escape prefix");
630 
631       insn->opcodeType = TWOBYTE;
632     }
633   }
634 
635   /*
636    * At this point we have consumed the full opcode.
637    * Anything we consume from here on must be unconsumed.
638    */
639 
640   insn->opcode = current;
641 
642   return 0;
643 }
644 
645 static int readModRM(struct InternalInstruction* insn);
646 
647 /*
648  * getIDWithAttrMask - Determines the ID of an instruction, consuming
649  *   the ModR/M byte as appropriate for extended and escape opcodes,
650  *   and using a supplied attribute mask.
651  *
652  * @param instructionID - A pointer whose target is filled in with the ID of the
653  *                        instruction.
654  * @param insn          - The instruction whose ID is to be determined.
655  * @param attrMask      - The attribute mask to search.
656  * @return              - 0 if the ModR/M could be read when needed or was not
657  *                        needed; nonzero otherwise.
658  */
getIDWithAttrMask(uint16_t * instructionID,struct InternalInstruction * insn,uint8_t attrMask)659 static int getIDWithAttrMask(uint16_t* instructionID,
660                              struct InternalInstruction* insn,
661                              uint8_t attrMask) {
662   BOOL hasModRMExtension;
663 
664   uint8_t instructionClass;
665 
666   instructionClass = contextForAttrs(attrMask);
667 
668   hasModRMExtension = modRMRequired(insn->opcodeType,
669                                     instructionClass,
670                                     insn->opcode);
671 
672   if (hasModRMExtension) {
673     if (readModRM(insn))
674       return -1;
675 
676     *instructionID = decode(insn->opcodeType,
677                             instructionClass,
678                             insn->opcode,
679                             insn->modRM);
680   } else {
681     *instructionID = decode(insn->opcodeType,
682                             instructionClass,
683                             insn->opcode,
684                             0);
685   }
686 
687   return 0;
688 }
689 
690 /*
691  * is16BitEquivalent - Determines whether two instruction names refer to
692  * equivalent instructions but one is 16-bit whereas the other is not.
693  *
694  * @param orig  - The instruction that is not 16-bit
695  * @param equiv - The instruction that is 16-bit
696  */
is16BitEquivalent(const char * orig,const char * equiv)697 static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
698   off_t i;
699 
700   for (i = 0;; i++) {
701     if (orig[i] == '\0' && equiv[i] == '\0')
702       return TRUE;
703     if (orig[i] == '\0' || equiv[i] == '\0')
704       return FALSE;
705     if (orig[i] != equiv[i]) {
706       if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
707         continue;
708       if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
709         continue;
710       if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
711         continue;
712       return FALSE;
713     }
714   }
715 }
716 
717 /*
718  * getID - Determines the ID of an instruction, consuming the ModR/M byte as
719  *   appropriate for extended and escape opcodes.  Determines the attributes and
720  *   context for the instruction before doing so.
721  *
722  * @param insn  - The instruction whose ID is to be determined.
723  * @return      - 0 if the ModR/M could be read when needed or was not needed;
724  *                nonzero otherwise.
725  */
getID(struct InternalInstruction * insn,const void * miiArg)726 static int getID(struct InternalInstruction* insn, const void *miiArg) {
727   uint8_t attrMask;
728   uint16_t instructionID;
729 
730   dbgprintf(insn, "getID()");
731 
732   attrMask = ATTR_NONE;
733 
734   if (insn->mode == MODE_64BIT)
735     attrMask |= ATTR_64BIT;
736 
737   if (insn->vexSize) {
738     attrMask |= ATTR_VEX;
739 
740     if (insn->vexSize == 3) {
741       switch (ppFromVEX3of3(insn->vexPrefix[2])) {
742       case VEX_PREFIX_66:
743         attrMask |= ATTR_OPSIZE;
744         break;
745       case VEX_PREFIX_F3:
746         attrMask |= ATTR_XS;
747         break;
748       case VEX_PREFIX_F2:
749         attrMask |= ATTR_XD;
750         break;
751       }
752 
753       if (lFromVEX3of3(insn->vexPrefix[2]))
754         attrMask |= ATTR_VEXL;
755     }
756     else if (insn->vexSize == 2) {
757       switch (ppFromVEX2of2(insn->vexPrefix[1])) {
758       case VEX_PREFIX_66:
759         attrMask |= ATTR_OPSIZE;
760         break;
761       case VEX_PREFIX_F3:
762         attrMask |= ATTR_XS;
763         break;
764       case VEX_PREFIX_F2:
765         attrMask |= ATTR_XD;
766         break;
767       }
768 
769       if (lFromVEX2of2(insn->vexPrefix[1]))
770         attrMask |= ATTR_VEXL;
771     }
772     else {
773       return -1;
774     }
775   }
776   else {
777     if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
778       attrMask |= ATTR_OPSIZE;
779     else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
780       attrMask |= ATTR_ADSIZE;
781     else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
782       attrMask |= ATTR_XS;
783     else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
784       attrMask |= ATTR_XD;
785   }
786 
787   if (insn->rexPrefix & 0x08)
788     attrMask |= ATTR_REXW;
789 
790   if (getIDWithAttrMask(&instructionID, insn, attrMask))
791     return -1;
792 
793   /* The following clauses compensate for limitations of the tables. */
794 
795   if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) &&
796       !(attrMask & ATTR_OPSIZE)) {
797     /*
798      * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
799      * has precedence since there are no L-bit with W-bit entries in the tables.
800      * So if the L-bit isn't significant we should use the W-bit instead.
801      * We only need to do this if the instruction doesn't specify OpSize since
802      * there is a VEX_L_W_OPSIZE table.
803      */
804 
805     const struct InstructionSpecifier *spec;
806     uint16_t instructionIDWithWBit;
807     const struct InstructionSpecifier *specWithWBit;
808 
809     spec = specifierForUID(instructionID);
810 
811     if (getIDWithAttrMask(&instructionIDWithWBit,
812                           insn,
813                           (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) {
814       insn->instructionID = instructionID;
815       insn->spec = spec;
816       return 0;
817     }
818 
819     specWithWBit = specifierForUID(instructionIDWithWBit);
820 
821     if (instructionID != instructionIDWithWBit) {
822       insn->instructionID = instructionIDWithWBit;
823       insn->spec = specWithWBit;
824     } else {
825       insn->instructionID = instructionID;
826       insn->spec = spec;
827     }
828     return 0;
829   }
830 
831   if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
832     /*
833      * The instruction tables make no distinction between instructions that
834      * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
835      * particular spot (i.e., many MMX operations).  In general we're
836      * conservative, but in the specific case where OpSize is present but not
837      * in the right place we check if there's a 16-bit operation.
838      */
839 
840     const struct InstructionSpecifier *spec;
841     uint16_t instructionIDWithOpsize;
842     const char *specName, *specWithOpSizeName;
843 
844     spec = specifierForUID(instructionID);
845 
846     if (getIDWithAttrMask(&instructionIDWithOpsize,
847                           insn,
848                           attrMask | ATTR_OPSIZE)) {
849       /*
850        * ModRM required with OpSize but not present; give up and return version
851        * without OpSize set
852        */
853 
854       insn->instructionID = instructionID;
855       insn->spec = spec;
856       return 0;
857     }
858 
859     specName = x86DisassemblerGetInstrName(instructionID, miiArg);
860     specWithOpSizeName =
861       x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
862 
863     if (is16BitEquivalent(specName, specWithOpSizeName)) {
864       insn->instructionID = instructionIDWithOpsize;
865       insn->spec = specifierForUID(instructionIDWithOpsize);
866     } else {
867       insn->instructionID = instructionID;
868       insn->spec = spec;
869     }
870     return 0;
871   }
872 
873   if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
874       insn->rexPrefix & 0x01) {
875     /*
876      * NOOP shouldn't decode as NOOP if REX.b is set. Instead
877      * it should decode as XCHG %r8, %eax.
878      */
879 
880     const struct InstructionSpecifier *spec;
881     uint16_t instructionIDWithNewOpcode;
882     const struct InstructionSpecifier *specWithNewOpcode;
883 
884     spec = specifierForUID(instructionID);
885 
886     /* Borrow opcode from one of the other XCHGar opcodes */
887     insn->opcode = 0x91;
888 
889     if (getIDWithAttrMask(&instructionIDWithNewOpcode,
890                           insn,
891                           attrMask)) {
892       insn->opcode = 0x90;
893 
894       insn->instructionID = instructionID;
895       insn->spec = spec;
896       return 0;
897     }
898 
899     specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
900 
901     /* Change back */
902     insn->opcode = 0x90;
903 
904     insn->instructionID = instructionIDWithNewOpcode;
905     insn->spec = specWithNewOpcode;
906 
907     return 0;
908   }
909 
910   insn->instructionID = instructionID;
911   insn->spec = specifierForUID(insn->instructionID);
912 
913   return 0;
914 }
915 
916 /*
917  * readSIB - Consumes the SIB byte to determine addressing information for an
918  *   instruction.
919  *
920  * @param insn  - The instruction whose SIB byte is to be read.
921  * @return      - 0 if the SIB byte was successfully read; nonzero otherwise.
922  */
readSIB(struct InternalInstruction * insn)923 static int readSIB(struct InternalInstruction* insn) {
924   SIBIndex sibIndexBase = 0;
925   SIBBase sibBaseBase = 0;
926   uint8_t index, base;
927 
928   dbgprintf(insn, "readSIB()");
929 
930   if (insn->consumedSIB)
931     return 0;
932 
933   insn->consumedSIB = TRUE;
934 
935   switch (insn->addressSize) {
936   case 2:
937     dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
938     return -1;
939     break;
940   case 4:
941     sibIndexBase = SIB_INDEX_EAX;
942     sibBaseBase = SIB_BASE_EAX;
943     break;
944   case 8:
945     sibIndexBase = SIB_INDEX_RAX;
946     sibBaseBase = SIB_BASE_RAX;
947     break;
948   }
949 
950   if (consumeByte(insn, &insn->sib))
951     return -1;
952 
953   index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
954 
955   switch (index) {
956   case 0x4:
957     insn->sibIndex = SIB_INDEX_NONE;
958     break;
959   default:
960     insn->sibIndex = (SIBIndex)(sibIndexBase + index);
961     if (insn->sibIndex == SIB_INDEX_sib ||
962         insn->sibIndex == SIB_INDEX_sib64)
963       insn->sibIndex = SIB_INDEX_NONE;
964     break;
965   }
966 
967   switch (scaleFromSIB(insn->sib)) {
968   case 0:
969     insn->sibScale = 1;
970     break;
971   case 1:
972     insn->sibScale = 2;
973     break;
974   case 2:
975     insn->sibScale = 4;
976     break;
977   case 3:
978     insn->sibScale = 8;
979     break;
980   }
981 
982   base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
983 
984   switch (base) {
985   case 0x5:
986     switch (modFromModRM(insn->modRM)) {
987     case 0x0:
988       insn->eaDisplacement = EA_DISP_32;
989       insn->sibBase = SIB_BASE_NONE;
990       break;
991     case 0x1:
992       insn->eaDisplacement = EA_DISP_8;
993       insn->sibBase = (insn->addressSize == 4 ?
994                        SIB_BASE_EBP : SIB_BASE_RBP);
995       break;
996     case 0x2:
997       insn->eaDisplacement = EA_DISP_32;
998       insn->sibBase = (insn->addressSize == 4 ?
999                        SIB_BASE_EBP : SIB_BASE_RBP);
1000       break;
1001     case 0x3:
1002       debug("Cannot have Mod = 0b11 and a SIB byte");
1003       return -1;
1004     }
1005     break;
1006   default:
1007     insn->sibBase = (SIBBase)(sibBaseBase + base);
1008     break;
1009   }
1010 
1011   return 0;
1012 }
1013 
1014 /*
1015  * readDisplacement - Consumes the displacement of an instruction.
1016  *
1017  * @param insn  - The instruction whose displacement is to be read.
1018  * @return      - 0 if the displacement byte was successfully read; nonzero
1019  *                otherwise.
1020  */
readDisplacement(struct InternalInstruction * insn)1021 static int readDisplacement(struct InternalInstruction* insn) {
1022   int8_t d8;
1023   int16_t d16;
1024   int32_t d32;
1025 
1026   dbgprintf(insn, "readDisplacement()");
1027 
1028   if (insn->consumedDisplacement)
1029     return 0;
1030 
1031   insn->consumedDisplacement = TRUE;
1032   insn->displacementOffset = insn->readerCursor - insn->startLocation;
1033 
1034   switch (insn->eaDisplacement) {
1035   case EA_DISP_NONE:
1036     insn->consumedDisplacement = FALSE;
1037     break;
1038   case EA_DISP_8:
1039     if (consumeInt8(insn, &d8))
1040       return -1;
1041     insn->displacement = d8;
1042     break;
1043   case EA_DISP_16:
1044     if (consumeInt16(insn, &d16))
1045       return -1;
1046     insn->displacement = d16;
1047     break;
1048   case EA_DISP_32:
1049     if (consumeInt32(insn, &d32))
1050       return -1;
1051     insn->displacement = d32;
1052     break;
1053   }
1054 
1055   insn->consumedDisplacement = TRUE;
1056   return 0;
1057 }
1058 
1059 /*
1060  * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
1061  *   displacement) for an instruction and interprets it.
1062  *
1063  * @param insn  - The instruction whose addressing information is to be read.
1064  * @return      - 0 if the information was successfully read; nonzero otherwise.
1065  */
readModRM(struct InternalInstruction * insn)1066 static int readModRM(struct InternalInstruction* insn) {
1067   uint8_t mod, rm, reg;
1068 
1069   dbgprintf(insn, "readModRM()");
1070 
1071   if (insn->consumedModRM)
1072     return 0;
1073 
1074   if (consumeByte(insn, &insn->modRM))
1075     return -1;
1076   insn->consumedModRM = TRUE;
1077 
1078   mod     = modFromModRM(insn->modRM);
1079   rm      = rmFromModRM(insn->modRM);
1080   reg     = regFromModRM(insn->modRM);
1081 
1082   /*
1083    * This goes by insn->registerSize to pick the correct register, which messes
1084    * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in
1085    * fixupReg().
1086    */
1087   switch (insn->registerSize) {
1088   case 2:
1089     insn->regBase = MODRM_REG_AX;
1090     insn->eaRegBase = EA_REG_AX;
1091     break;
1092   case 4:
1093     insn->regBase = MODRM_REG_EAX;
1094     insn->eaRegBase = EA_REG_EAX;
1095     break;
1096   case 8:
1097     insn->regBase = MODRM_REG_RAX;
1098     insn->eaRegBase = EA_REG_RAX;
1099     break;
1100   }
1101 
1102   reg |= rFromREX(insn->rexPrefix) << 3;
1103   rm  |= bFromREX(insn->rexPrefix) << 3;
1104 
1105   insn->reg = (Reg)(insn->regBase + reg);
1106 
1107   switch (insn->addressSize) {
1108   case 2:
1109     insn->eaBaseBase = EA_BASE_BX_SI;
1110 
1111     switch (mod) {
1112     case 0x0:
1113       if (rm == 0x6) {
1114         insn->eaBase = EA_BASE_NONE;
1115         insn->eaDisplacement = EA_DISP_16;
1116         if (readDisplacement(insn))
1117           return -1;
1118       } else {
1119         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1120         insn->eaDisplacement = EA_DISP_NONE;
1121       }
1122       break;
1123     case 0x1:
1124       insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1125       insn->eaDisplacement = EA_DISP_8;
1126       if (readDisplacement(insn))
1127         return -1;
1128       break;
1129     case 0x2:
1130       insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1131       insn->eaDisplacement = EA_DISP_16;
1132       if (readDisplacement(insn))
1133         return -1;
1134       break;
1135     case 0x3:
1136       insn->eaBase = (EABase)(insn->eaRegBase + rm);
1137       if (readDisplacement(insn))
1138         return -1;
1139       break;
1140     }
1141     break;
1142   case 4:
1143   case 8:
1144     insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
1145 
1146     switch (mod) {
1147     case 0x0:
1148       insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1149       switch (rm) {
1150       case 0x4:
1151       case 0xc:   /* in case REXW.b is set */
1152         insn->eaBase = (insn->addressSize == 4 ?
1153                         EA_BASE_sib : EA_BASE_sib64);
1154         readSIB(insn);
1155         if (readDisplacement(insn))
1156           return -1;
1157         break;
1158       case 0x5:
1159         insn->eaBase = EA_BASE_NONE;
1160         insn->eaDisplacement = EA_DISP_32;
1161         if (readDisplacement(insn))
1162           return -1;
1163         break;
1164       default:
1165         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1166         break;
1167       }
1168       break;
1169     case 0x1:
1170     case 0x2:
1171       insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1172       switch (rm) {
1173       case 0x4:
1174       case 0xc:   /* in case REXW.b is set */
1175         insn->eaBase = EA_BASE_sib;
1176         readSIB(insn);
1177         if (readDisplacement(insn))
1178           return -1;
1179         break;
1180       default:
1181         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1182         if (readDisplacement(insn))
1183           return -1;
1184         break;
1185       }
1186       break;
1187     case 0x3:
1188       insn->eaDisplacement = EA_DISP_NONE;
1189       insn->eaBase = (EABase)(insn->eaRegBase + rm);
1190       break;
1191     }
1192     break;
1193   } /* switch (insn->addressSize) */
1194 
1195   return 0;
1196 }
1197 
1198 #define GENERIC_FIXUP_FUNC(name, base, prefix)            \
1199   static uint8_t name(struct InternalInstruction *insn,   \
1200                       OperandType type,                   \
1201                       uint8_t index,                      \
1202                       uint8_t *valid) {                   \
1203     *valid = 1;                                           \
1204     switch (type) {                                       \
1205     default:                                              \
1206       debug("Unhandled register type");                   \
1207       *valid = 0;                                         \
1208       return 0;                                           \
1209     case TYPE_Rv:                                         \
1210       return base + index;                                \
1211     case TYPE_R8:                                         \
1212       if (insn->rexPrefix &&                              \
1213          index >= 4 && index <= 7) {                      \
1214         return prefix##_SPL + (index - 4);                \
1215       } else {                                            \
1216         return prefix##_AL + index;                       \
1217       }                                                   \
1218     case TYPE_R16:                                        \
1219       return prefix##_AX + index;                         \
1220     case TYPE_R32:                                        \
1221       return prefix##_EAX + index;                        \
1222     case TYPE_R64:                                        \
1223       return prefix##_RAX + index;                        \
1224     case TYPE_XMM256:                                     \
1225       return prefix##_YMM0 + index;                       \
1226     case TYPE_XMM128:                                     \
1227     case TYPE_XMM64:                                      \
1228     case TYPE_XMM32:                                      \
1229     case TYPE_XMM:                                        \
1230       return prefix##_XMM0 + index;                       \
1231     case TYPE_MM64:                                       \
1232     case TYPE_MM32:                                       \
1233     case TYPE_MM:                                         \
1234       if (index > 7)                                      \
1235         *valid = 0;                                       \
1236       return prefix##_MM0 + index;                        \
1237     case TYPE_SEGMENTREG:                                 \
1238       if (index > 5)                                      \
1239         *valid = 0;                                       \
1240       return prefix##_ES + index;                         \
1241     case TYPE_DEBUGREG:                                   \
1242       if (index > 7)                                      \
1243         *valid = 0;                                       \
1244       return prefix##_DR0 + index;                        \
1245     case TYPE_CONTROLREG:                                 \
1246       if (index > 8)                                      \
1247         *valid = 0;                                       \
1248       return prefix##_CR0 + index;                        \
1249     }                                                     \
1250   }
1251 
1252 /*
1253  * fixup*Value - Consults an operand type to determine the meaning of the
1254  *   reg or R/M field.  If the operand is an XMM operand, for example, an
1255  *   operand would be XMM0 instead of AX, which readModRM() would otherwise
1256  *   misinterpret it as.
1257  *
1258  * @param insn  - The instruction containing the operand.
1259  * @param type  - The operand type.
1260  * @param index - The existing value of the field as reported by readModRM().
1261  * @param valid - The address of a uint8_t.  The target is set to 1 if the
1262  *                field is valid for the register class; 0 if not.
1263  * @return      - The proper value.
1264  */
1265 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    MODRM_REG)
1266 GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG)
1267 
1268 /*
1269  * fixupReg - Consults an operand specifier to determine which of the
1270  *   fixup*Value functions to use in correcting readModRM()'ss interpretation.
1271  *
1272  * @param insn  - See fixup*Value().
1273  * @param op    - The operand specifier.
1274  * @return      - 0 if fixup was successful; -1 if the register returned was
1275  *                invalid for its class.
1276  */
fixupReg(struct InternalInstruction * insn,const struct OperandSpecifier * op)1277 static int fixupReg(struct InternalInstruction *insn,
1278                     const struct OperandSpecifier *op) {
1279   uint8_t valid;
1280 
1281   dbgprintf(insn, "fixupReg()");
1282 
1283   switch ((OperandEncoding)op->encoding) {
1284   default:
1285     debug("Expected a REG or R/M encoding in fixupReg");
1286     return -1;
1287   case ENCODING_VVVV:
1288     insn->vvvv = (Reg)fixupRegValue(insn,
1289                                     (OperandType)op->type,
1290                                     insn->vvvv,
1291                                     &valid);
1292     if (!valid)
1293       return -1;
1294     break;
1295   case ENCODING_REG:
1296     insn->reg = (Reg)fixupRegValue(insn,
1297                                    (OperandType)op->type,
1298                                    insn->reg - insn->regBase,
1299                                    &valid);
1300     if (!valid)
1301       return -1;
1302     break;
1303   case ENCODING_RM:
1304     if (insn->eaBase >= insn->eaRegBase) {
1305       insn->eaBase = (EABase)fixupRMValue(insn,
1306                                           (OperandType)op->type,
1307                                           insn->eaBase - insn->eaRegBase,
1308                                           &valid);
1309       if (!valid)
1310         return -1;
1311     }
1312     break;
1313   }
1314 
1315   return 0;
1316 }
1317 
1318 /*
1319  * readOpcodeModifier - Reads an operand from the opcode field of an
1320  *   instruction.  Handles AddRegFrm instructions.
1321  *
1322  * @param insn    - The instruction whose opcode field is to be read.
1323  * @param inModRM - Indicates that the opcode field is to be read from the
1324  *                  ModR/M extension; useful for escape opcodes
1325  * @return        - 0 on success; nonzero otherwise.
1326  */
readOpcodeModifier(struct InternalInstruction * insn)1327 static int readOpcodeModifier(struct InternalInstruction* insn) {
1328   dbgprintf(insn, "readOpcodeModifier()");
1329 
1330   if (insn->consumedOpcodeModifier)
1331     return 0;
1332 
1333   insn->consumedOpcodeModifier = TRUE;
1334 
1335   switch (insn->spec->modifierType) {
1336   default:
1337     debug("Unknown modifier type.");
1338     return -1;
1339   case MODIFIER_NONE:
1340     debug("No modifier but an operand expects one.");
1341     return -1;
1342   case MODIFIER_OPCODE:
1343     insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
1344     return 0;
1345   case MODIFIER_MODRM:
1346     insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
1347     return 0;
1348   }
1349 }
1350 
1351 /*
1352  * readOpcodeRegister - Reads an operand from the opcode field of an
1353  *   instruction and interprets it appropriately given the operand width.
1354  *   Handles AddRegFrm instructions.
1355  *
1356  * @param insn  - See readOpcodeModifier().
1357  * @param size  - The width (in bytes) of the register being specified.
1358  *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1359  *                RAX.
1360  * @return      - 0 on success; nonzero otherwise.
1361  */
readOpcodeRegister(struct InternalInstruction * insn,uint8_t size)1362 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
1363   dbgprintf(insn, "readOpcodeRegister()");
1364 
1365   if (readOpcodeModifier(insn))
1366     return -1;
1367 
1368   if (size == 0)
1369     size = insn->registerSize;
1370 
1371   switch (size) {
1372   case 1:
1373     insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1374                                                   | insn->opcodeModifier));
1375     if (insn->rexPrefix &&
1376         insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1377         insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1378       insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1379                                    + (insn->opcodeRegister - MODRM_REG_AL - 4));
1380     }
1381 
1382     break;
1383   case 2:
1384     insn->opcodeRegister = (Reg)(MODRM_REG_AX
1385                                  + ((bFromREX(insn->rexPrefix) << 3)
1386                                     | insn->opcodeModifier));
1387     break;
1388   case 4:
1389     insn->opcodeRegister = (Reg)(MODRM_REG_EAX
1390                                  + ((bFromREX(insn->rexPrefix) << 3)
1391                                     | insn->opcodeModifier));
1392     break;
1393   case 8:
1394     insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1395                                  + ((bFromREX(insn->rexPrefix) << 3)
1396                                     | insn->opcodeModifier));
1397     break;
1398   }
1399 
1400   return 0;
1401 }
1402 
1403 /*
1404  * readImmediate - Consumes an immediate operand from an instruction, given the
1405  *   desired operand size.
1406  *
1407  * @param insn  - The instruction whose operand is to be read.
1408  * @param size  - The width (in bytes) of the operand.
1409  * @return      - 0 if the immediate was successfully consumed; nonzero
1410  *                otherwise.
1411  */
readImmediate(struct InternalInstruction * insn,uint8_t size)1412 static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1413   uint8_t imm8;
1414   uint16_t imm16;
1415   uint32_t imm32;
1416   uint64_t imm64;
1417 
1418   dbgprintf(insn, "readImmediate()");
1419 
1420   if (insn->numImmediatesConsumed == 2) {
1421     debug("Already consumed two immediates");
1422     return -1;
1423   }
1424 
1425   if (size == 0)
1426     size = insn->immediateSize;
1427   else
1428     insn->immediateSize = size;
1429   insn->immediateOffset = insn->readerCursor - insn->startLocation;
1430 
1431   switch (size) {
1432   case 1:
1433     if (consumeByte(insn, &imm8))
1434       return -1;
1435     insn->immediates[insn->numImmediatesConsumed] = imm8;
1436     break;
1437   case 2:
1438     if (consumeUInt16(insn, &imm16))
1439       return -1;
1440     insn->immediates[insn->numImmediatesConsumed] = imm16;
1441     break;
1442   case 4:
1443     if (consumeUInt32(insn, &imm32))
1444       return -1;
1445     insn->immediates[insn->numImmediatesConsumed] = imm32;
1446     break;
1447   case 8:
1448     if (consumeUInt64(insn, &imm64))
1449       return -1;
1450     insn->immediates[insn->numImmediatesConsumed] = imm64;
1451     break;
1452   }
1453 
1454   insn->numImmediatesConsumed++;
1455 
1456   return 0;
1457 }
1458 
1459 /*
1460  * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
1461  *
1462  * @param insn  - The instruction whose operand is to be read.
1463  * @return      - 0 if the vvvv was successfully consumed; nonzero
1464  *                otherwise.
1465  */
readVVVV(struct InternalInstruction * insn)1466 static int readVVVV(struct InternalInstruction* insn) {
1467   dbgprintf(insn, "readVVVV()");
1468 
1469   if (insn->vexSize == 3)
1470     insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
1471   else if (insn->vexSize == 2)
1472     insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
1473   else
1474     return -1;
1475 
1476   if (insn->mode != MODE_64BIT)
1477     insn->vvvv &= 0x7;
1478 
1479   return 0;
1480 }
1481 
1482 /*
1483  * readOperands - Consults the specifier for an instruction and consumes all
1484  *   operands for that instruction, interpreting them as it goes.
1485  *
1486  * @param insn  - The instruction whose operands are to be read and interpreted.
1487  * @return      - 0 if all operands could be read; nonzero otherwise.
1488  */
readOperands(struct InternalInstruction * insn)1489 static int readOperands(struct InternalInstruction* insn) {
1490   int index;
1491   int hasVVVV, needVVVV;
1492   int sawRegImm = 0;
1493 
1494   dbgprintf(insn, "readOperands()");
1495 
1496   /* If non-zero vvvv specified, need to make sure one of the operands
1497      uses it. */
1498   hasVVVV = !readVVVV(insn);
1499   needVVVV = hasVVVV && (insn->vvvv != 0);
1500 
1501   for (index = 0; index < X86_MAX_OPERANDS; ++index) {
1502     switch (x86OperandSets[insn->spec->operands][index].encoding) {
1503     case ENCODING_NONE:
1504       break;
1505     case ENCODING_REG:
1506     case ENCODING_RM:
1507       if (readModRM(insn))
1508         return -1;
1509       if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
1510         return -1;
1511       break;
1512     case ENCODING_CB:
1513     case ENCODING_CW:
1514     case ENCODING_CD:
1515     case ENCODING_CP:
1516     case ENCODING_CO:
1517     case ENCODING_CT:
1518       dbgprintf(insn, "We currently don't hande code-offset encodings");
1519       return -1;
1520     case ENCODING_IB:
1521       if (sawRegImm) {
1522         /* Saw a register immediate so don't read again and instead split the
1523            previous immediate.  FIXME: This is a hack. */
1524         insn->immediates[insn->numImmediatesConsumed] =
1525           insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1526         ++insn->numImmediatesConsumed;
1527         break;
1528       }
1529       if (readImmediate(insn, 1))
1530         return -1;
1531       if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
1532           insn->immediates[insn->numImmediatesConsumed - 1] > 7)
1533         return -1;
1534       if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
1535           insn->immediates[insn->numImmediatesConsumed - 1] > 31)
1536         return -1;
1537       if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
1538           x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
1539         sawRegImm = 1;
1540       break;
1541     case ENCODING_IW:
1542       if (readImmediate(insn, 2))
1543         return -1;
1544       break;
1545     case ENCODING_ID:
1546       if (readImmediate(insn, 4))
1547         return -1;
1548       break;
1549     case ENCODING_IO:
1550       if (readImmediate(insn, 8))
1551         return -1;
1552       break;
1553     case ENCODING_Iv:
1554       if (readImmediate(insn, insn->immediateSize))
1555         return -1;
1556       break;
1557     case ENCODING_Ia:
1558       if (readImmediate(insn, insn->addressSize))
1559         return -1;
1560       break;
1561     case ENCODING_RB:
1562       if (readOpcodeRegister(insn, 1))
1563         return -1;
1564       break;
1565     case ENCODING_RW:
1566       if (readOpcodeRegister(insn, 2))
1567         return -1;
1568       break;
1569     case ENCODING_RD:
1570       if (readOpcodeRegister(insn, 4))
1571         return -1;
1572       break;
1573     case ENCODING_RO:
1574       if (readOpcodeRegister(insn, 8))
1575         return -1;
1576       break;
1577     case ENCODING_Rv:
1578       if (readOpcodeRegister(insn, 0))
1579         return -1;
1580       break;
1581     case ENCODING_I:
1582       if (readOpcodeModifier(insn))
1583         return -1;
1584       break;
1585     case ENCODING_VVVV:
1586       needVVVV = 0; /* Mark that we have found a VVVV operand. */
1587       if (!hasVVVV)
1588         return -1;
1589       if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
1590         return -1;
1591       break;
1592     case ENCODING_DUP:
1593       break;
1594     default:
1595       dbgprintf(insn, "Encountered an operand with an unknown encoding.");
1596       return -1;
1597     }
1598   }
1599 
1600   /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
1601   if (needVVVV) return -1;
1602 
1603   return 0;
1604 }
1605 
1606 /*
1607  * decodeInstruction - Reads and interprets a full instruction provided by the
1608  *   user.
1609  *
1610  * @param insn      - A pointer to the instruction to be populated.  Must be
1611  *                    pre-allocated.
1612  * @param reader    - The function to be used to read the instruction's bytes.
1613  * @param readerArg - A generic argument to be passed to the reader to store
1614  *                    any internal state.
1615  * @param logger    - If non-NULL, the function to be used to write log messages
1616  *                    and warnings.
1617  * @param loggerArg - A generic argument to be passed to the logger to store
1618  *                    any internal state.
1619  * @param startLoc  - The address (in the reader's address space) of the first
1620  *                    byte in the instruction.
1621  * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1622  *                    decode the instruction in.
1623  * @return          - 0 if the instruction's memory could be read; nonzero if
1624  *                    not.
1625  */
decodeInstruction(struct InternalInstruction * insn,byteReader_t reader,const void * readerArg,dlog_t logger,void * loggerArg,const void * miiArg,uint64_t startLoc,DisassemblerMode mode)1626 int decodeInstruction(struct InternalInstruction* insn,
1627                       byteReader_t reader,
1628                       const void* readerArg,
1629                       dlog_t logger,
1630                       void* loggerArg,
1631                       const void* miiArg,
1632                       uint64_t startLoc,
1633                       DisassemblerMode mode) {
1634   memset(insn, 0, sizeof(struct InternalInstruction));
1635 
1636   insn->reader = reader;
1637   insn->readerArg = readerArg;
1638   insn->dlog = logger;
1639   insn->dlogArg = loggerArg;
1640   insn->startLocation = startLoc;
1641   insn->readerCursor = startLoc;
1642   insn->mode = mode;
1643   insn->numImmediatesConsumed = 0;
1644 
1645   if (readPrefixes(insn)       ||
1646       readOpcode(insn)         ||
1647       getID(insn, miiArg)      ||
1648       insn->instructionID == 0 ||
1649       readOperands(insn))
1650     return -1;
1651 
1652   insn->operands = &x86OperandSets[insn->spec->operands][0];
1653 
1654   insn->length = insn->readerCursor - insn->startLocation;
1655 
1656   dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
1657             startLoc, insn->readerCursor, insn->length);
1658 
1659   if (insn->length > 15)
1660     dbgprintf(insn, "Instruction exceeds 15-byte limit");
1661 
1662   return 0;
1663 }
1664