1 /*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
2 *
3 * The LLVM Compiler Infrastructure
4 *
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===*
9 *
10 * This file is part of the X86 Disassembler.
11 * It contains the implementation of the instruction decoder.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
13 *
14 *===----------------------------------------------------------------------===*/
15
16 #include <stdarg.h> /* for va_*() */
17 #include <stdio.h> /* for vsnprintf() */
18 #include <stdlib.h> /* for exit() */
19 #include <string.h> /* for memset() */
20
21 #include "X86DisassemblerDecoder.h"
22
23 #include "X86GenDisassemblerTables.inc"
24
25 #define TRUE 1
26 #define FALSE 0
27
28 typedef int8_t bool;
29
30 #ifndef NDEBUG
31 #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
32 #else
33 #define debug(s) do { } while (0)
34 #endif
35
36
37 /*
38 * contextForAttrs - Client for the instruction context table. Takes a set of
39 * attributes and returns the appropriate decode context.
40 *
41 * @param attrMask - Attributes, from the enumeration attributeBits.
42 * @return - The InstructionContext to use when looking up an
43 * an instruction with these attributes.
44 */
contextForAttrs(uint8_t attrMask)45 static InstructionContext contextForAttrs(uint8_t attrMask) {
46 return CONTEXTS_SYM[attrMask];
47 }
48
49 /*
50 * modRMRequired - Reads the appropriate instruction table to determine whether
51 * the ModR/M byte is required to decode a particular instruction.
52 *
53 * @param type - The opcode type (i.e., how many bytes it has).
54 * @param insnContext - The context for the instruction, as returned by
55 * contextForAttrs.
56 * @param opcode - The last byte of the instruction's opcode, not counting
57 * ModR/M extensions and escapes.
58 * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
59 */
modRMRequired(OpcodeType type,InstructionContext insnContext,uint8_t opcode)60 static int modRMRequired(OpcodeType type,
61 InstructionContext insnContext,
62 uint8_t opcode) {
63 const struct ContextDecision* decision = 0;
64
65 switch (type) {
66 case ONEBYTE:
67 decision = &ONEBYTE_SYM;
68 break;
69 case TWOBYTE:
70 decision = &TWOBYTE_SYM;
71 break;
72 case THREEBYTE_38:
73 decision = &THREEBYTE38_SYM;
74 break;
75 case THREEBYTE_3A:
76 decision = &THREEBYTE3A_SYM;
77 break;
78 case THREEBYTE_A6:
79 decision = &THREEBYTEA6_SYM;
80 break;
81 case THREEBYTE_A7:
82 decision = &THREEBYTEA7_SYM;
83 break;
84 }
85
86 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
87 modrm_type != MODRM_ONEENTRY;
88 }
89
90 /*
91 * decode - Reads the appropriate instruction table to obtain the unique ID of
92 * an instruction.
93 *
94 * @param type - See modRMRequired().
95 * @param insnContext - See modRMRequired().
96 * @param opcode - See modRMRequired().
97 * @param modRM - The ModR/M byte if required, or any value if not.
98 * @return - The UID of the instruction, or 0 on failure.
99 */
decode(OpcodeType type,InstructionContext insnContext,uint8_t opcode,uint8_t modRM)100 static InstrUID decode(OpcodeType type,
101 InstructionContext insnContext,
102 uint8_t opcode,
103 uint8_t modRM) {
104 const struct ModRMDecision* dec = 0;
105
106 switch (type) {
107 case ONEBYTE:
108 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
109 break;
110 case TWOBYTE:
111 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
112 break;
113 case THREEBYTE_38:
114 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
115 break;
116 case THREEBYTE_3A:
117 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
118 break;
119 case THREEBYTE_A6:
120 dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
121 break;
122 case THREEBYTE_A7:
123 dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
124 break;
125 }
126
127 switch (dec->modrm_type) {
128 default:
129 debug("Corrupt table! Unknown modrm_type");
130 return 0;
131 case MODRM_ONEENTRY:
132 return modRMTable[dec->instructionIDs];
133 case MODRM_SPLITRM:
134 if (modFromModRM(modRM) == 0x3)
135 return modRMTable[dec->instructionIDs+1];
136 return modRMTable[dec->instructionIDs];
137 case MODRM_SPLITREG:
138 if (modFromModRM(modRM) == 0x3)
139 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
140 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
141 case MODRM_FULL:
142 return modRMTable[dec->instructionIDs+modRM];
143 }
144 }
145
146 /*
147 * specifierForUID - Given a UID, returns the name and operand specification for
148 * that instruction.
149 *
150 * @param uid - The unique ID for the instruction. This should be returned by
151 * decode(); specifierForUID will not check bounds.
152 * @return - A pointer to the specification for that instruction.
153 */
specifierForUID(InstrUID uid)154 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
155 return &INSTRUCTIONS_SYM[uid];
156 }
157
158 /*
159 * consumeByte - Uses the reader function provided by the user to consume one
160 * byte from the instruction's memory and advance the cursor.
161 *
162 * @param insn - The instruction with the reader function to use. The cursor
163 * for this instruction is advanced.
164 * @param byte - A pointer to a pre-allocated memory buffer to be populated
165 * with the data read.
166 * @return - 0 if the read was successful; nonzero otherwise.
167 */
consumeByte(struct InternalInstruction * insn,uint8_t * byte)168 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
169 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
170
171 if (!ret)
172 ++(insn->readerCursor);
173
174 return ret;
175 }
176
177 /*
178 * lookAtByte - Like consumeByte, but does not advance the cursor.
179 *
180 * @param insn - See consumeByte().
181 * @param byte - See consumeByte().
182 * @return - See consumeByte().
183 */
lookAtByte(struct InternalInstruction * insn,uint8_t * byte)184 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
185 return insn->reader(insn->readerArg, byte, insn->readerCursor);
186 }
187
unconsumeByte(struct InternalInstruction * insn)188 static void unconsumeByte(struct InternalInstruction* insn) {
189 insn->readerCursor--;
190 }
191
192 #define CONSUME_FUNC(name, type) \
193 static int name(struct InternalInstruction* insn, type* ptr) { \
194 type combined = 0; \
195 unsigned offset; \
196 for (offset = 0; offset < sizeof(type); ++offset) { \
197 uint8_t byte; \
198 int ret = insn->reader(insn->readerArg, \
199 &byte, \
200 insn->readerCursor + offset); \
201 if (ret) \
202 return ret; \
203 combined = combined | ((uint64_t)byte << (offset * 8)); \
204 } \
205 *ptr = combined; \
206 insn->readerCursor += sizeof(type); \
207 return 0; \
208 }
209
210 /*
211 * consume* - Use the reader function provided by the user to consume data
212 * values of various sizes from the instruction's memory and advance the
213 * cursor appropriately. These readers perform endian conversion.
214 *
215 * @param insn - See consumeByte().
216 * @param ptr - A pointer to a pre-allocated memory of appropriate size to
217 * be populated with the data read.
218 * @return - See consumeByte().
219 */
CONSUME_FUNC(consumeInt8,int8_t)220 CONSUME_FUNC(consumeInt8, int8_t)
221 CONSUME_FUNC(consumeInt16, int16_t)
222 CONSUME_FUNC(consumeInt32, int32_t)
223 CONSUME_FUNC(consumeUInt16, uint16_t)
224 CONSUME_FUNC(consumeUInt32, uint32_t)
225 CONSUME_FUNC(consumeUInt64, uint64_t)
226
227 /*
228 * dbgprintf - Uses the logging function provided by the user to log a single
229 * message, typically without a carriage-return.
230 *
231 * @param insn - The instruction containing the logging function.
232 * @param format - See printf().
233 * @param ... - See printf().
234 */
235 static void dbgprintf(struct InternalInstruction* insn,
236 const char* format,
237 ...) {
238 char buffer[256];
239 va_list ap;
240
241 if (!insn->dlog)
242 return;
243
244 va_start(ap, format);
245 (void)vsnprintf(buffer, sizeof(buffer), format, ap);
246 va_end(ap);
247
248 insn->dlog(insn->dlogArg, buffer);
249
250 return;
251 }
252
253 /*
254 * setPrefixPresent - Marks that a particular prefix is present at a particular
255 * location.
256 *
257 * @param insn - The instruction to be marked as having the prefix.
258 * @param prefix - The prefix that is present.
259 * @param location - The location where the prefix is located (in the address
260 * space of the instruction's reader).
261 */
setPrefixPresent(struct InternalInstruction * insn,uint8_t prefix,uint64_t location)262 static void setPrefixPresent(struct InternalInstruction* insn,
263 uint8_t prefix,
264 uint64_t location)
265 {
266 insn->prefixPresent[prefix] = 1;
267 insn->prefixLocations[prefix] = location;
268 }
269
270 /*
271 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
272 * present at a given location.
273 *
274 * @param insn - The instruction to be queried.
275 * @param prefix - The prefix.
276 * @param location - The location to query.
277 * @return - Whether the prefix is at that location.
278 */
isPrefixAtLocation(struct InternalInstruction * insn,uint8_t prefix,uint64_t location)279 static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
280 uint8_t prefix,
281 uint64_t location)
282 {
283 if (insn->prefixPresent[prefix] == 1 &&
284 insn->prefixLocations[prefix] == location)
285 return TRUE;
286 else
287 return FALSE;
288 }
289
290 /*
291 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
292 * instruction as having them. Also sets the instruction's default operand,
293 * address, and other relevant data sizes to report operands correctly.
294 *
295 * @param insn - The instruction whose prefixes are to be read.
296 * @return - 0 if the instruction could be read until the end of the prefix
297 * bytes, and no prefixes conflicted; nonzero otherwise.
298 */
readPrefixes(struct InternalInstruction * insn)299 static int readPrefixes(struct InternalInstruction* insn) {
300 BOOL isPrefix = TRUE;
301 BOOL prefixGroups[4] = { FALSE };
302 uint64_t prefixLocation;
303 uint8_t byte = 0;
304
305 BOOL hasAdSize = FALSE;
306 BOOL hasOpSize = FALSE;
307
308 dbgprintf(insn, "readPrefixes()");
309
310 while (isPrefix) {
311 prefixLocation = insn->readerCursor;
312
313 if (consumeByte(insn, &byte))
314 return -1;
315
316 /*
317 * If the first byte is a LOCK prefix break and let it be disassembled
318 * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>.
319 * FIXME there is currently no way to get the disassembler to print the
320 * lock prefix if it is not the first byte.
321 */
322 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
323 break;
324
325 switch (byte) {
326 case 0xf0: /* LOCK */
327 case 0xf2: /* REPNE/REPNZ */
328 case 0xf3: /* REP or REPE/REPZ */
329 if (prefixGroups[0])
330 dbgprintf(insn, "Redundant Group 1 prefix");
331 prefixGroups[0] = TRUE;
332 setPrefixPresent(insn, byte, prefixLocation);
333 break;
334 case 0x2e: /* CS segment override -OR- Branch not taken */
335 case 0x36: /* SS segment override -OR- Branch taken */
336 case 0x3e: /* DS segment override */
337 case 0x26: /* ES segment override */
338 case 0x64: /* FS segment override */
339 case 0x65: /* GS segment override */
340 switch (byte) {
341 case 0x2e:
342 insn->segmentOverride = SEG_OVERRIDE_CS;
343 break;
344 case 0x36:
345 insn->segmentOverride = SEG_OVERRIDE_SS;
346 break;
347 case 0x3e:
348 insn->segmentOverride = SEG_OVERRIDE_DS;
349 break;
350 case 0x26:
351 insn->segmentOverride = SEG_OVERRIDE_ES;
352 break;
353 case 0x64:
354 insn->segmentOverride = SEG_OVERRIDE_FS;
355 break;
356 case 0x65:
357 insn->segmentOverride = SEG_OVERRIDE_GS;
358 break;
359 default:
360 debug("Unhandled override");
361 return -1;
362 }
363 if (prefixGroups[1])
364 dbgprintf(insn, "Redundant Group 2 prefix");
365 prefixGroups[1] = TRUE;
366 setPrefixPresent(insn, byte, prefixLocation);
367 break;
368 case 0x66: /* Operand-size override */
369 if (prefixGroups[2])
370 dbgprintf(insn, "Redundant Group 3 prefix");
371 prefixGroups[2] = TRUE;
372 hasOpSize = TRUE;
373 setPrefixPresent(insn, byte, prefixLocation);
374 break;
375 case 0x67: /* Address-size override */
376 if (prefixGroups[3])
377 dbgprintf(insn, "Redundant Group 4 prefix");
378 prefixGroups[3] = TRUE;
379 hasAdSize = TRUE;
380 setPrefixPresent(insn, byte, prefixLocation);
381 break;
382 default: /* Not a prefix byte */
383 isPrefix = FALSE;
384 break;
385 }
386
387 if (isPrefix)
388 dbgprintf(insn, "Found prefix 0x%hhx", byte);
389 }
390
391 insn->vexSize = 0;
392
393 if (byte == 0xc4) {
394 uint8_t byte1;
395
396 if (lookAtByte(insn, &byte1)) {
397 dbgprintf(insn, "Couldn't read second byte of VEX");
398 return -1;
399 }
400
401 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
402 insn->vexSize = 3;
403 insn->necessaryPrefixLocation = insn->readerCursor - 1;
404 }
405 else {
406 unconsumeByte(insn);
407 insn->necessaryPrefixLocation = insn->readerCursor - 1;
408 }
409
410 if (insn->vexSize == 3) {
411 insn->vexPrefix[0] = byte;
412 consumeByte(insn, &insn->vexPrefix[1]);
413 consumeByte(insn, &insn->vexPrefix[2]);
414
415 /* We simulate the REX prefix for simplicity's sake */
416
417 if (insn->mode == MODE_64BIT) {
418 insn->rexPrefix = 0x40
419 | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
420 | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
421 | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
422 | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
423 }
424
425 switch (ppFromVEX3of3(insn->vexPrefix[2]))
426 {
427 default:
428 break;
429 case VEX_PREFIX_66:
430 hasOpSize = TRUE;
431 break;
432 }
433
434 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
435 }
436 }
437 else if (byte == 0xc5) {
438 uint8_t byte1;
439
440 if (lookAtByte(insn, &byte1)) {
441 dbgprintf(insn, "Couldn't read second byte of VEX");
442 return -1;
443 }
444
445 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
446 insn->vexSize = 2;
447 }
448 else {
449 unconsumeByte(insn);
450 }
451
452 if (insn->vexSize == 2) {
453 insn->vexPrefix[0] = byte;
454 consumeByte(insn, &insn->vexPrefix[1]);
455
456 if (insn->mode == MODE_64BIT) {
457 insn->rexPrefix = 0x40
458 | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
459 }
460
461 switch (ppFromVEX2of2(insn->vexPrefix[1]))
462 {
463 default:
464 break;
465 case VEX_PREFIX_66:
466 hasOpSize = TRUE;
467 break;
468 }
469
470 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
471 }
472 }
473 else {
474 if (insn->mode == MODE_64BIT) {
475 if ((byte & 0xf0) == 0x40) {
476 uint8_t opcodeByte;
477
478 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
479 dbgprintf(insn, "Redundant REX prefix");
480 return -1;
481 }
482
483 insn->rexPrefix = byte;
484 insn->necessaryPrefixLocation = insn->readerCursor - 2;
485
486 dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
487 } else {
488 unconsumeByte(insn);
489 insn->necessaryPrefixLocation = insn->readerCursor - 1;
490 }
491 } else {
492 unconsumeByte(insn);
493 insn->necessaryPrefixLocation = insn->readerCursor - 1;
494 }
495 }
496
497 if (insn->mode == MODE_16BIT) {
498 insn->registerSize = (hasOpSize ? 4 : 2);
499 insn->addressSize = (hasAdSize ? 4 : 2);
500 insn->displacementSize = (hasAdSize ? 4 : 2);
501 insn->immediateSize = (hasOpSize ? 4 : 2);
502 } else if (insn->mode == MODE_32BIT) {
503 insn->registerSize = (hasOpSize ? 2 : 4);
504 insn->addressSize = (hasAdSize ? 2 : 4);
505 insn->displacementSize = (hasAdSize ? 2 : 4);
506 insn->immediateSize = (hasOpSize ? 2 : 4);
507 } else if (insn->mode == MODE_64BIT) {
508 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
509 insn->registerSize = 8;
510 insn->addressSize = (hasAdSize ? 4 : 8);
511 insn->displacementSize = 4;
512 insn->immediateSize = 4;
513 } else if (insn->rexPrefix) {
514 insn->registerSize = (hasOpSize ? 2 : 4);
515 insn->addressSize = (hasAdSize ? 4 : 8);
516 insn->displacementSize = (hasOpSize ? 2 : 4);
517 insn->immediateSize = (hasOpSize ? 2 : 4);
518 } else {
519 insn->registerSize = (hasOpSize ? 2 : 4);
520 insn->addressSize = (hasAdSize ? 4 : 8);
521 insn->displacementSize = (hasOpSize ? 2 : 4);
522 insn->immediateSize = (hasOpSize ? 2 : 4);
523 }
524 }
525
526 return 0;
527 }
528
529 /*
530 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
531 * extended or escape opcodes).
532 *
533 * @param insn - The instruction whose opcode is to be read.
534 * @return - 0 if the opcode could be read successfully; nonzero otherwise.
535 */
readOpcode(struct InternalInstruction * insn)536 static int readOpcode(struct InternalInstruction* insn) {
537 /* Determine the length of the primary opcode */
538
539 uint8_t current;
540
541 dbgprintf(insn, "readOpcode()");
542
543 insn->opcodeType = ONEBYTE;
544
545 if (insn->vexSize == 3)
546 {
547 switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
548 {
549 default:
550 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
551 return -1;
552 case 0:
553 break;
554 case VEX_LOB_0F:
555 insn->twoByteEscape = 0x0f;
556 insn->opcodeType = TWOBYTE;
557 return consumeByte(insn, &insn->opcode);
558 case VEX_LOB_0F38:
559 insn->twoByteEscape = 0x0f;
560 insn->threeByteEscape = 0x38;
561 insn->opcodeType = THREEBYTE_38;
562 return consumeByte(insn, &insn->opcode);
563 case VEX_LOB_0F3A:
564 insn->twoByteEscape = 0x0f;
565 insn->threeByteEscape = 0x3a;
566 insn->opcodeType = THREEBYTE_3A;
567 return consumeByte(insn, &insn->opcode);
568 }
569 }
570 else if (insn->vexSize == 2)
571 {
572 insn->twoByteEscape = 0x0f;
573 insn->opcodeType = TWOBYTE;
574 return consumeByte(insn, &insn->opcode);
575 }
576
577 if (consumeByte(insn, ¤t))
578 return -1;
579
580 if (current == 0x0f) {
581 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
582
583 insn->twoByteEscape = current;
584
585 if (consumeByte(insn, ¤t))
586 return -1;
587
588 if (current == 0x38) {
589 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
590
591 insn->threeByteEscape = current;
592
593 if (consumeByte(insn, ¤t))
594 return -1;
595
596 insn->opcodeType = THREEBYTE_38;
597 } else if (current == 0x3a) {
598 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
599
600 insn->threeByteEscape = current;
601
602 if (consumeByte(insn, ¤t))
603 return -1;
604
605 insn->opcodeType = THREEBYTE_3A;
606 } else if (current == 0xa6) {
607 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
608
609 insn->threeByteEscape = current;
610
611 if (consumeByte(insn, ¤t))
612 return -1;
613
614 insn->opcodeType = THREEBYTE_A6;
615 } else if (current == 0xa7) {
616 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
617
618 insn->threeByteEscape = current;
619
620 if (consumeByte(insn, ¤t))
621 return -1;
622
623 insn->opcodeType = THREEBYTE_A7;
624 } else {
625 dbgprintf(insn, "Didn't find a three-byte escape prefix");
626
627 insn->opcodeType = TWOBYTE;
628 }
629 }
630
631 /*
632 * At this point we have consumed the full opcode.
633 * Anything we consume from here on must be unconsumed.
634 */
635
636 insn->opcode = current;
637
638 return 0;
639 }
640
641 static int readModRM(struct InternalInstruction* insn);
642
643 /*
644 * getIDWithAttrMask - Determines the ID of an instruction, consuming
645 * the ModR/M byte as appropriate for extended and escape opcodes,
646 * and using a supplied attribute mask.
647 *
648 * @param instructionID - A pointer whose target is filled in with the ID of the
649 * instruction.
650 * @param insn - The instruction whose ID is to be determined.
651 * @param attrMask - The attribute mask to search.
652 * @return - 0 if the ModR/M could be read when needed or was not
653 * needed; nonzero otherwise.
654 */
getIDWithAttrMask(uint16_t * instructionID,struct InternalInstruction * insn,uint8_t attrMask)655 static int getIDWithAttrMask(uint16_t* instructionID,
656 struct InternalInstruction* insn,
657 uint8_t attrMask) {
658 BOOL hasModRMExtension;
659
660 uint8_t instructionClass;
661
662 instructionClass = contextForAttrs(attrMask);
663
664 hasModRMExtension = modRMRequired(insn->opcodeType,
665 instructionClass,
666 insn->opcode);
667
668 if (hasModRMExtension) {
669 if (readModRM(insn))
670 return -1;
671
672 *instructionID = decode(insn->opcodeType,
673 instructionClass,
674 insn->opcode,
675 insn->modRM);
676 } else {
677 *instructionID = decode(insn->opcodeType,
678 instructionClass,
679 insn->opcode,
680 0);
681 }
682
683 return 0;
684 }
685
686 /*
687 * is16BitEquivalent - Determines whether two instruction names refer to
688 * equivalent instructions but one is 16-bit whereas the other is not.
689 *
690 * @param orig - The instruction that is not 16-bit
691 * @param equiv - The instruction that is 16-bit
692 */
is16BitEquvalent(const char * orig,const char * equiv)693 static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
694 off_t i;
695
696 for (i = 0;; i++) {
697 if (orig[i] == '\0' && equiv[i] == '\0')
698 return TRUE;
699 if (orig[i] == '\0' || equiv[i] == '\0')
700 return FALSE;
701 if (orig[i] != equiv[i]) {
702 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
703 continue;
704 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
705 continue;
706 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
707 continue;
708 return FALSE;
709 }
710 }
711 }
712
713 /*
714 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
715 * appropriate for extended and escape opcodes. Determines the attributes and
716 * context for the instruction before doing so.
717 *
718 * @param insn - The instruction whose ID is to be determined.
719 * @return - 0 if the ModR/M could be read when needed or was not needed;
720 * nonzero otherwise.
721 */
getID(struct InternalInstruction * insn,const void * miiArg)722 static int getID(struct InternalInstruction* insn, const void *miiArg) {
723 uint8_t attrMask;
724 uint16_t instructionID;
725
726 dbgprintf(insn, "getID()");
727
728 attrMask = ATTR_NONE;
729
730 if (insn->mode == MODE_64BIT)
731 attrMask |= ATTR_64BIT;
732
733 if (insn->vexSize) {
734 attrMask |= ATTR_VEX;
735
736 if (insn->vexSize == 3) {
737 switch (ppFromVEX3of3(insn->vexPrefix[2])) {
738 case VEX_PREFIX_66:
739 attrMask |= ATTR_OPSIZE;
740 break;
741 case VEX_PREFIX_F3:
742 attrMask |= ATTR_XS;
743 break;
744 case VEX_PREFIX_F2:
745 attrMask |= ATTR_XD;
746 break;
747 }
748
749 if (lFromVEX3of3(insn->vexPrefix[2]))
750 attrMask |= ATTR_VEXL;
751 }
752 else if (insn->vexSize == 2) {
753 switch (ppFromVEX2of2(insn->vexPrefix[1])) {
754 case VEX_PREFIX_66:
755 attrMask |= ATTR_OPSIZE;
756 break;
757 case VEX_PREFIX_F3:
758 attrMask |= ATTR_XS;
759 break;
760 case VEX_PREFIX_F2:
761 attrMask |= ATTR_XD;
762 break;
763 }
764
765 if (lFromVEX2of2(insn->vexPrefix[1]))
766 attrMask |= ATTR_VEXL;
767 }
768 else {
769 return -1;
770 }
771 }
772 else {
773 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
774 attrMask |= ATTR_OPSIZE;
775 else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
776 attrMask |= ATTR_ADSIZE;
777 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
778 attrMask |= ATTR_XS;
779 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
780 attrMask |= ATTR_XD;
781 }
782
783 if (insn->rexPrefix & 0x08)
784 attrMask |= ATTR_REXW;
785
786 if (getIDWithAttrMask(&instructionID, insn, attrMask))
787 return -1;
788
789 /* The following clauses compensate for limitations of the tables. */
790
791 if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) &&
792 !(attrMask & ATTR_OPSIZE)) {
793 /*
794 * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
795 * has precedence since there are no L-bit with W-bit entries in the tables.
796 * So if the L-bit isn't significant we should use the W-bit instead.
797 * We only need to do this if the instruction doesn't specify OpSize since
798 * there is a VEX_L_W_OPSIZE table.
799 */
800
801 const struct InstructionSpecifier *spec;
802 uint16_t instructionIDWithWBit;
803 const struct InstructionSpecifier *specWithWBit;
804
805 spec = specifierForUID(instructionID);
806
807 if (getIDWithAttrMask(&instructionIDWithWBit,
808 insn,
809 (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) {
810 insn->instructionID = instructionID;
811 insn->spec = spec;
812 return 0;
813 }
814
815 specWithWBit = specifierForUID(instructionIDWithWBit);
816
817 if (instructionID != instructionIDWithWBit) {
818 insn->instructionID = instructionIDWithWBit;
819 insn->spec = specWithWBit;
820 } else {
821 insn->instructionID = instructionID;
822 insn->spec = spec;
823 }
824 return 0;
825 }
826
827 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
828 /*
829 * The instruction tables make no distinction between instructions that
830 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
831 * particular spot (i.e., many MMX operations). In general we're
832 * conservative, but in the specific case where OpSize is present but not
833 * in the right place we check if there's a 16-bit operation.
834 */
835
836 const struct InstructionSpecifier *spec;
837 uint16_t instructionIDWithOpsize;
838 const char *specName, *specWithOpSizeName;
839
840 spec = specifierForUID(instructionID);
841
842 if (getIDWithAttrMask(&instructionIDWithOpsize,
843 insn,
844 attrMask | ATTR_OPSIZE)) {
845 /*
846 * ModRM required with OpSize but not present; give up and return version
847 * without OpSize set
848 */
849
850 insn->instructionID = instructionID;
851 insn->spec = spec;
852 return 0;
853 }
854
855 specName = x86DisassemblerGetInstrName(instructionID, miiArg);
856 specWithOpSizeName =
857 x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
858
859 if (is16BitEquvalent(specName, specWithOpSizeName)) {
860 insn->instructionID = instructionIDWithOpsize;
861 insn->spec = specifierForUID(instructionIDWithOpsize);
862 } else {
863 insn->instructionID = instructionID;
864 insn->spec = spec;
865 }
866 return 0;
867 }
868
869 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
870 insn->rexPrefix & 0x01) {
871 /*
872 * NOOP shouldn't decode as NOOP if REX.b is set. Instead
873 * it should decode as XCHG %r8, %eax.
874 */
875
876 const struct InstructionSpecifier *spec;
877 uint16_t instructionIDWithNewOpcode;
878 const struct InstructionSpecifier *specWithNewOpcode;
879
880 spec = specifierForUID(instructionID);
881
882 /* Borrow opcode from one of the other XCHGar opcodes */
883 insn->opcode = 0x91;
884
885 if (getIDWithAttrMask(&instructionIDWithNewOpcode,
886 insn,
887 attrMask)) {
888 insn->opcode = 0x90;
889
890 insn->instructionID = instructionID;
891 insn->spec = spec;
892 return 0;
893 }
894
895 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
896
897 /* Change back */
898 insn->opcode = 0x90;
899
900 insn->instructionID = instructionIDWithNewOpcode;
901 insn->spec = specWithNewOpcode;
902
903 return 0;
904 }
905
906 insn->instructionID = instructionID;
907 insn->spec = specifierForUID(insn->instructionID);
908
909 return 0;
910 }
911
912 /*
913 * readSIB - Consumes the SIB byte to determine addressing information for an
914 * instruction.
915 *
916 * @param insn - The instruction whose SIB byte is to be read.
917 * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
918 */
readSIB(struct InternalInstruction * insn)919 static int readSIB(struct InternalInstruction* insn) {
920 SIBIndex sibIndexBase = 0;
921 SIBBase sibBaseBase = 0;
922 uint8_t index, base;
923
924 dbgprintf(insn, "readSIB()");
925
926 if (insn->consumedSIB)
927 return 0;
928
929 insn->consumedSIB = TRUE;
930
931 switch (insn->addressSize) {
932 case 2:
933 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
934 return -1;
935 break;
936 case 4:
937 sibIndexBase = SIB_INDEX_EAX;
938 sibBaseBase = SIB_BASE_EAX;
939 break;
940 case 8:
941 sibIndexBase = SIB_INDEX_RAX;
942 sibBaseBase = SIB_BASE_RAX;
943 break;
944 }
945
946 if (consumeByte(insn, &insn->sib))
947 return -1;
948
949 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
950
951 switch (index) {
952 case 0x4:
953 insn->sibIndex = SIB_INDEX_NONE;
954 break;
955 default:
956 insn->sibIndex = (SIBIndex)(sibIndexBase + index);
957 if (insn->sibIndex == SIB_INDEX_sib ||
958 insn->sibIndex == SIB_INDEX_sib64)
959 insn->sibIndex = SIB_INDEX_NONE;
960 break;
961 }
962
963 switch (scaleFromSIB(insn->sib)) {
964 case 0:
965 insn->sibScale = 1;
966 break;
967 case 1:
968 insn->sibScale = 2;
969 break;
970 case 2:
971 insn->sibScale = 4;
972 break;
973 case 3:
974 insn->sibScale = 8;
975 break;
976 }
977
978 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
979
980 switch (base) {
981 case 0x5:
982 switch (modFromModRM(insn->modRM)) {
983 case 0x0:
984 insn->eaDisplacement = EA_DISP_32;
985 insn->sibBase = SIB_BASE_NONE;
986 break;
987 case 0x1:
988 insn->eaDisplacement = EA_DISP_8;
989 insn->sibBase = (insn->addressSize == 4 ?
990 SIB_BASE_EBP : SIB_BASE_RBP);
991 break;
992 case 0x2:
993 insn->eaDisplacement = EA_DISP_32;
994 insn->sibBase = (insn->addressSize == 4 ?
995 SIB_BASE_EBP : SIB_BASE_RBP);
996 break;
997 case 0x3:
998 debug("Cannot have Mod = 0b11 and a SIB byte");
999 return -1;
1000 }
1001 break;
1002 default:
1003 insn->sibBase = (SIBBase)(sibBaseBase + base);
1004 break;
1005 }
1006
1007 return 0;
1008 }
1009
1010 /*
1011 * readDisplacement - Consumes the displacement of an instruction.
1012 *
1013 * @param insn - The instruction whose displacement is to be read.
1014 * @return - 0 if the displacement byte was successfully read; nonzero
1015 * otherwise.
1016 */
readDisplacement(struct InternalInstruction * insn)1017 static int readDisplacement(struct InternalInstruction* insn) {
1018 int8_t d8;
1019 int16_t d16;
1020 int32_t d32;
1021
1022 dbgprintf(insn, "readDisplacement()");
1023
1024 if (insn->consumedDisplacement)
1025 return 0;
1026
1027 insn->consumedDisplacement = TRUE;
1028 insn->displacementOffset = insn->readerCursor - insn->startLocation;
1029
1030 switch (insn->eaDisplacement) {
1031 case EA_DISP_NONE:
1032 insn->consumedDisplacement = FALSE;
1033 break;
1034 case EA_DISP_8:
1035 if (consumeInt8(insn, &d8))
1036 return -1;
1037 insn->displacement = d8;
1038 break;
1039 case EA_DISP_16:
1040 if (consumeInt16(insn, &d16))
1041 return -1;
1042 insn->displacement = d16;
1043 break;
1044 case EA_DISP_32:
1045 if (consumeInt32(insn, &d32))
1046 return -1;
1047 insn->displacement = d32;
1048 break;
1049 }
1050
1051 insn->consumedDisplacement = TRUE;
1052 return 0;
1053 }
1054
1055 /*
1056 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
1057 * displacement) for an instruction and interprets it.
1058 *
1059 * @param insn - The instruction whose addressing information is to be read.
1060 * @return - 0 if the information was successfully read; nonzero otherwise.
1061 */
readModRM(struct InternalInstruction * insn)1062 static int readModRM(struct InternalInstruction* insn) {
1063 uint8_t mod, rm, reg;
1064
1065 dbgprintf(insn, "readModRM()");
1066
1067 if (insn->consumedModRM)
1068 return 0;
1069
1070 if (consumeByte(insn, &insn->modRM))
1071 return -1;
1072 insn->consumedModRM = TRUE;
1073
1074 mod = modFromModRM(insn->modRM);
1075 rm = rmFromModRM(insn->modRM);
1076 reg = regFromModRM(insn->modRM);
1077
1078 /*
1079 * This goes by insn->registerSize to pick the correct register, which messes
1080 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
1081 * fixupReg().
1082 */
1083 switch (insn->registerSize) {
1084 case 2:
1085 insn->regBase = MODRM_REG_AX;
1086 insn->eaRegBase = EA_REG_AX;
1087 break;
1088 case 4:
1089 insn->regBase = MODRM_REG_EAX;
1090 insn->eaRegBase = EA_REG_EAX;
1091 break;
1092 case 8:
1093 insn->regBase = MODRM_REG_RAX;
1094 insn->eaRegBase = EA_REG_RAX;
1095 break;
1096 }
1097
1098 reg |= rFromREX(insn->rexPrefix) << 3;
1099 rm |= bFromREX(insn->rexPrefix) << 3;
1100
1101 insn->reg = (Reg)(insn->regBase + reg);
1102
1103 switch (insn->addressSize) {
1104 case 2:
1105 insn->eaBaseBase = EA_BASE_BX_SI;
1106
1107 switch (mod) {
1108 case 0x0:
1109 if (rm == 0x6) {
1110 insn->eaBase = EA_BASE_NONE;
1111 insn->eaDisplacement = EA_DISP_16;
1112 if (readDisplacement(insn))
1113 return -1;
1114 } else {
1115 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1116 insn->eaDisplacement = EA_DISP_NONE;
1117 }
1118 break;
1119 case 0x1:
1120 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1121 insn->eaDisplacement = EA_DISP_8;
1122 if (readDisplacement(insn))
1123 return -1;
1124 break;
1125 case 0x2:
1126 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1127 insn->eaDisplacement = EA_DISP_16;
1128 if (readDisplacement(insn))
1129 return -1;
1130 break;
1131 case 0x3:
1132 insn->eaBase = (EABase)(insn->eaRegBase + rm);
1133 if (readDisplacement(insn))
1134 return -1;
1135 break;
1136 }
1137 break;
1138 case 4:
1139 case 8:
1140 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
1141
1142 switch (mod) {
1143 case 0x0:
1144 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1145 switch (rm) {
1146 case 0x4:
1147 case 0xc: /* in case REXW.b is set */
1148 insn->eaBase = (insn->addressSize == 4 ?
1149 EA_BASE_sib : EA_BASE_sib64);
1150 readSIB(insn);
1151 if (readDisplacement(insn))
1152 return -1;
1153 break;
1154 case 0x5:
1155 insn->eaBase = EA_BASE_NONE;
1156 insn->eaDisplacement = EA_DISP_32;
1157 if (readDisplacement(insn))
1158 return -1;
1159 break;
1160 default:
1161 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1162 break;
1163 }
1164 break;
1165 case 0x1:
1166 case 0x2:
1167 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1168 switch (rm) {
1169 case 0x4:
1170 case 0xc: /* in case REXW.b is set */
1171 insn->eaBase = EA_BASE_sib;
1172 readSIB(insn);
1173 if (readDisplacement(insn))
1174 return -1;
1175 break;
1176 default:
1177 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1178 if (readDisplacement(insn))
1179 return -1;
1180 break;
1181 }
1182 break;
1183 case 0x3:
1184 insn->eaDisplacement = EA_DISP_NONE;
1185 insn->eaBase = (EABase)(insn->eaRegBase + rm);
1186 break;
1187 }
1188 break;
1189 } /* switch (insn->addressSize) */
1190
1191 return 0;
1192 }
1193
1194 #define GENERIC_FIXUP_FUNC(name, base, prefix) \
1195 static uint8_t name(struct InternalInstruction *insn, \
1196 OperandType type, \
1197 uint8_t index, \
1198 uint8_t *valid) { \
1199 *valid = 1; \
1200 switch (type) { \
1201 default: \
1202 debug("Unhandled register type"); \
1203 *valid = 0; \
1204 return 0; \
1205 case TYPE_Rv: \
1206 return base + index; \
1207 case TYPE_R8: \
1208 if (insn->rexPrefix && \
1209 index >= 4 && index <= 7) { \
1210 return prefix##_SPL + (index - 4); \
1211 } else { \
1212 return prefix##_AL + index; \
1213 } \
1214 case TYPE_R16: \
1215 return prefix##_AX + index; \
1216 case TYPE_R32: \
1217 return prefix##_EAX + index; \
1218 case TYPE_R64: \
1219 return prefix##_RAX + index; \
1220 case TYPE_XMM256: \
1221 return prefix##_YMM0 + index; \
1222 case TYPE_XMM128: \
1223 case TYPE_XMM64: \
1224 case TYPE_XMM32: \
1225 case TYPE_XMM: \
1226 return prefix##_XMM0 + index; \
1227 case TYPE_MM64: \
1228 case TYPE_MM32: \
1229 case TYPE_MM: \
1230 if (index > 7) \
1231 *valid = 0; \
1232 return prefix##_MM0 + index; \
1233 case TYPE_SEGMENTREG: \
1234 if (index > 5) \
1235 *valid = 0; \
1236 return prefix##_ES + index; \
1237 case TYPE_DEBUGREG: \
1238 if (index > 7) \
1239 *valid = 0; \
1240 return prefix##_DR0 + index; \
1241 case TYPE_CONTROLREG: \
1242 if (index > 8) \
1243 *valid = 0; \
1244 return prefix##_CR0 + index; \
1245 } \
1246 }
1247
1248 /*
1249 * fixup*Value - Consults an operand type to determine the meaning of the
1250 * reg or R/M field. If the operand is an XMM operand, for example, an
1251 * operand would be XMM0 instead of AX, which readModRM() would otherwise
1252 * misinterpret it as.
1253 *
1254 * @param insn - The instruction containing the operand.
1255 * @param type - The operand type.
1256 * @param index - The existing value of the field as reported by readModRM().
1257 * @param valid - The address of a uint8_t. The target is set to 1 if the
1258 * field is valid for the register class; 0 if not.
1259 * @return - The proper value.
1260 */
1261 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
1262 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
1263
1264 /*
1265 * fixupReg - Consults an operand specifier to determine which of the
1266 * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1267 *
1268 * @param insn - See fixup*Value().
1269 * @param op - The operand specifier.
1270 * @return - 0 if fixup was successful; -1 if the register returned was
1271 * invalid for its class.
1272 */
fixupReg(struct InternalInstruction * insn,const struct OperandSpecifier * op)1273 static int fixupReg(struct InternalInstruction *insn,
1274 const struct OperandSpecifier *op) {
1275 uint8_t valid;
1276
1277 dbgprintf(insn, "fixupReg()");
1278
1279 switch ((OperandEncoding)op->encoding) {
1280 default:
1281 debug("Expected a REG or R/M encoding in fixupReg");
1282 return -1;
1283 case ENCODING_VVVV:
1284 insn->vvvv = (Reg)fixupRegValue(insn,
1285 (OperandType)op->type,
1286 insn->vvvv,
1287 &valid);
1288 if (!valid)
1289 return -1;
1290 break;
1291 case ENCODING_REG:
1292 insn->reg = (Reg)fixupRegValue(insn,
1293 (OperandType)op->type,
1294 insn->reg - insn->regBase,
1295 &valid);
1296 if (!valid)
1297 return -1;
1298 break;
1299 case ENCODING_RM:
1300 if (insn->eaBase >= insn->eaRegBase) {
1301 insn->eaBase = (EABase)fixupRMValue(insn,
1302 (OperandType)op->type,
1303 insn->eaBase - insn->eaRegBase,
1304 &valid);
1305 if (!valid)
1306 return -1;
1307 }
1308 break;
1309 }
1310
1311 return 0;
1312 }
1313
1314 /*
1315 * readOpcodeModifier - Reads an operand from the opcode field of an
1316 * instruction. Handles AddRegFrm instructions.
1317 *
1318 * @param insn - The instruction whose opcode field is to be read.
1319 * @param inModRM - Indicates that the opcode field is to be read from the
1320 * ModR/M extension; useful for escape opcodes
1321 * @return - 0 on success; nonzero otherwise.
1322 */
readOpcodeModifier(struct InternalInstruction * insn)1323 static int readOpcodeModifier(struct InternalInstruction* insn) {
1324 dbgprintf(insn, "readOpcodeModifier()");
1325
1326 if (insn->consumedOpcodeModifier)
1327 return 0;
1328
1329 insn->consumedOpcodeModifier = TRUE;
1330
1331 switch (insn->spec->modifierType) {
1332 default:
1333 debug("Unknown modifier type.");
1334 return -1;
1335 case MODIFIER_NONE:
1336 debug("No modifier but an operand expects one.");
1337 return -1;
1338 case MODIFIER_OPCODE:
1339 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
1340 return 0;
1341 case MODIFIER_MODRM:
1342 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
1343 return 0;
1344 }
1345 }
1346
1347 /*
1348 * readOpcodeRegister - Reads an operand from the opcode field of an
1349 * instruction and interprets it appropriately given the operand width.
1350 * Handles AddRegFrm instructions.
1351 *
1352 * @param insn - See readOpcodeModifier().
1353 * @param size - The width (in bytes) of the register being specified.
1354 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1355 * RAX.
1356 * @return - 0 on success; nonzero otherwise.
1357 */
readOpcodeRegister(struct InternalInstruction * insn,uint8_t size)1358 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
1359 dbgprintf(insn, "readOpcodeRegister()");
1360
1361 if (readOpcodeModifier(insn))
1362 return -1;
1363
1364 if (size == 0)
1365 size = insn->registerSize;
1366
1367 switch (size) {
1368 case 1:
1369 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1370 | insn->opcodeModifier));
1371 if (insn->rexPrefix &&
1372 insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1373 insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1374 insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1375 + (insn->opcodeRegister - MODRM_REG_AL - 4));
1376 }
1377
1378 break;
1379 case 2:
1380 insn->opcodeRegister = (Reg)(MODRM_REG_AX
1381 + ((bFromREX(insn->rexPrefix) << 3)
1382 | insn->opcodeModifier));
1383 break;
1384 case 4:
1385 insn->opcodeRegister = (Reg)(MODRM_REG_EAX
1386 + ((bFromREX(insn->rexPrefix) << 3)
1387 | insn->opcodeModifier));
1388 break;
1389 case 8:
1390 insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1391 + ((bFromREX(insn->rexPrefix) << 3)
1392 | insn->opcodeModifier));
1393 break;
1394 }
1395
1396 return 0;
1397 }
1398
1399 /*
1400 * readImmediate - Consumes an immediate operand from an instruction, given the
1401 * desired operand size.
1402 *
1403 * @param insn - The instruction whose operand is to be read.
1404 * @param size - The width (in bytes) of the operand.
1405 * @return - 0 if the immediate was successfully consumed; nonzero
1406 * otherwise.
1407 */
readImmediate(struct InternalInstruction * insn,uint8_t size)1408 static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1409 uint8_t imm8;
1410 uint16_t imm16;
1411 uint32_t imm32;
1412 uint64_t imm64;
1413
1414 dbgprintf(insn, "readImmediate()");
1415
1416 if (insn->numImmediatesConsumed == 2) {
1417 debug("Already consumed two immediates");
1418 return -1;
1419 }
1420
1421 if (size == 0)
1422 size = insn->immediateSize;
1423 else
1424 insn->immediateSize = size;
1425 insn->immediateOffset = insn->readerCursor - insn->startLocation;
1426
1427 switch (size) {
1428 case 1:
1429 if (consumeByte(insn, &imm8))
1430 return -1;
1431 insn->immediates[insn->numImmediatesConsumed] = imm8;
1432 break;
1433 case 2:
1434 if (consumeUInt16(insn, &imm16))
1435 return -1;
1436 insn->immediates[insn->numImmediatesConsumed] = imm16;
1437 break;
1438 case 4:
1439 if (consumeUInt32(insn, &imm32))
1440 return -1;
1441 insn->immediates[insn->numImmediatesConsumed] = imm32;
1442 break;
1443 case 8:
1444 if (consumeUInt64(insn, &imm64))
1445 return -1;
1446 insn->immediates[insn->numImmediatesConsumed] = imm64;
1447 break;
1448 }
1449
1450 insn->numImmediatesConsumed++;
1451
1452 return 0;
1453 }
1454
1455 /*
1456 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
1457 *
1458 * @param insn - The instruction whose operand is to be read.
1459 * @return - 0 if the vvvv was successfully consumed; nonzero
1460 * otherwise.
1461 */
readVVVV(struct InternalInstruction * insn)1462 static int readVVVV(struct InternalInstruction* insn) {
1463 dbgprintf(insn, "readVVVV()");
1464
1465 if (insn->vexSize == 3)
1466 insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
1467 else if (insn->vexSize == 2)
1468 insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
1469 else
1470 return -1;
1471
1472 if (insn->mode != MODE_64BIT)
1473 insn->vvvv &= 0x7;
1474
1475 return 0;
1476 }
1477
1478 /*
1479 * readOperands - Consults the specifier for an instruction and consumes all
1480 * operands for that instruction, interpreting them as it goes.
1481 *
1482 * @param insn - The instruction whose operands are to be read and interpreted.
1483 * @return - 0 if all operands could be read; nonzero otherwise.
1484 */
readOperands(struct InternalInstruction * insn)1485 static int readOperands(struct InternalInstruction* insn) {
1486 int index;
1487 int hasVVVV, needVVVV;
1488 int sawRegImm = 0;
1489
1490 dbgprintf(insn, "readOperands()");
1491
1492 /* If non-zero vvvv specified, need to make sure one of the operands
1493 uses it. */
1494 hasVVVV = !readVVVV(insn);
1495 needVVVV = hasVVVV && (insn->vvvv != 0);
1496
1497 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
1498 switch (x86OperandSets[insn->spec->operands][index].encoding) {
1499 case ENCODING_NONE:
1500 break;
1501 case ENCODING_REG:
1502 case ENCODING_RM:
1503 if (readModRM(insn))
1504 return -1;
1505 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
1506 return -1;
1507 break;
1508 case ENCODING_CB:
1509 case ENCODING_CW:
1510 case ENCODING_CD:
1511 case ENCODING_CP:
1512 case ENCODING_CO:
1513 case ENCODING_CT:
1514 dbgprintf(insn, "We currently don't hande code-offset encodings");
1515 return -1;
1516 case ENCODING_IB:
1517 if (sawRegImm) {
1518 /* Saw a register immediate so don't read again and instead split the
1519 previous immediate. FIXME: This is a hack. */
1520 insn->immediates[insn->numImmediatesConsumed] =
1521 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1522 ++insn->numImmediatesConsumed;
1523 break;
1524 }
1525 if (readImmediate(insn, 1))
1526 return -1;
1527 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
1528 insn->immediates[insn->numImmediatesConsumed - 1] > 7)
1529 return -1;
1530 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
1531 insn->immediates[insn->numImmediatesConsumed - 1] > 31)
1532 return -1;
1533 if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
1534 x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
1535 sawRegImm = 1;
1536 break;
1537 case ENCODING_IW:
1538 if (readImmediate(insn, 2))
1539 return -1;
1540 break;
1541 case ENCODING_ID:
1542 if (readImmediate(insn, 4))
1543 return -1;
1544 break;
1545 case ENCODING_IO:
1546 if (readImmediate(insn, 8))
1547 return -1;
1548 break;
1549 case ENCODING_Iv:
1550 if (readImmediate(insn, insn->immediateSize))
1551 return -1;
1552 break;
1553 case ENCODING_Ia:
1554 if (readImmediate(insn, insn->addressSize))
1555 return -1;
1556 break;
1557 case ENCODING_RB:
1558 if (readOpcodeRegister(insn, 1))
1559 return -1;
1560 break;
1561 case ENCODING_RW:
1562 if (readOpcodeRegister(insn, 2))
1563 return -1;
1564 break;
1565 case ENCODING_RD:
1566 if (readOpcodeRegister(insn, 4))
1567 return -1;
1568 break;
1569 case ENCODING_RO:
1570 if (readOpcodeRegister(insn, 8))
1571 return -1;
1572 break;
1573 case ENCODING_Rv:
1574 if (readOpcodeRegister(insn, 0))
1575 return -1;
1576 break;
1577 case ENCODING_I:
1578 if (readOpcodeModifier(insn))
1579 return -1;
1580 break;
1581 case ENCODING_VVVV:
1582 needVVVV = 0; /* Mark that we have found a VVVV operand. */
1583 if (!hasVVVV)
1584 return -1;
1585 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
1586 return -1;
1587 break;
1588 case ENCODING_DUP:
1589 break;
1590 default:
1591 dbgprintf(insn, "Encountered an operand with an unknown encoding.");
1592 return -1;
1593 }
1594 }
1595
1596 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
1597 if (needVVVV) return -1;
1598
1599 return 0;
1600 }
1601
1602 /*
1603 * decodeInstruction - Reads and interprets a full instruction provided by the
1604 * user.
1605 *
1606 * @param insn - A pointer to the instruction to be populated. Must be
1607 * pre-allocated.
1608 * @param reader - The function to be used to read the instruction's bytes.
1609 * @param readerArg - A generic argument to be passed to the reader to store
1610 * any internal state.
1611 * @param logger - If non-NULL, the function to be used to write log messages
1612 * and warnings.
1613 * @param loggerArg - A generic argument to be passed to the logger to store
1614 * any internal state.
1615 * @param startLoc - The address (in the reader's address space) of the first
1616 * byte in the instruction.
1617 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1618 * decode the instruction in.
1619 * @return - 0 if the instruction's memory could be read; nonzero if
1620 * not.
1621 */
decodeInstruction(struct InternalInstruction * insn,byteReader_t reader,const void * readerArg,dlog_t logger,void * loggerArg,const void * miiArg,uint64_t startLoc,DisassemblerMode mode)1622 int decodeInstruction(struct InternalInstruction* insn,
1623 byteReader_t reader,
1624 const void* readerArg,
1625 dlog_t logger,
1626 void* loggerArg,
1627 const void* miiArg,
1628 uint64_t startLoc,
1629 DisassemblerMode mode) {
1630 memset(insn, 0, sizeof(struct InternalInstruction));
1631
1632 insn->reader = reader;
1633 insn->readerArg = readerArg;
1634 insn->dlog = logger;
1635 insn->dlogArg = loggerArg;
1636 insn->startLocation = startLoc;
1637 insn->readerCursor = startLoc;
1638 insn->mode = mode;
1639 insn->numImmediatesConsumed = 0;
1640
1641 if (readPrefixes(insn) ||
1642 readOpcode(insn) ||
1643 getID(insn, miiArg) ||
1644 insn->instructionID == 0 ||
1645 readOperands(insn))
1646 return -1;
1647
1648 insn->operands = &x86OperandSets[insn->spec->operands][0];
1649
1650 insn->length = insn->readerCursor - insn->startLocation;
1651
1652 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
1653 startLoc, insn->readerCursor, insn->length);
1654
1655 if (insn->length > 15)
1656 dbgprintf(insn, "Instruction exceeds 15-byte limit");
1657
1658 return 0;
1659 }
1660