• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* udis86 - libudis86/decode.c
2  *
3  * Copyright (c) 2002-2009 Vivek Thampi
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification,
7  * are permitted provided that the following conditions are met:
8  *
9  *     * Redistributions of source code must retain the above copyright notice,
10  *       this list of conditions and the following disclaimer.
11  *     * Redistributions in binary form must reproduce the above copyright notice,
12  *       this list of conditions and the following disclaimer in the documentation
13  *       and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 #include "udint.h"
27 #include "types.h"
28 #include "input.h"
29 #include "decode.h"
30 
31 #ifndef __UD_STANDALONE__
32 # include <string.h>
33 #endif /* __UD_STANDALONE__ */
34 
35 /* The max number of prefixes to an instruction */
36 #define MAX_PREFIXES    15
37 
38 /* rex prefix bits */
39 #define REX_W(r)        ( ( 0xF & ( r ) )  >> 3 )
40 #define REX_R(r)        ( ( 0x7 & ( r ) )  >> 2 )
41 #define REX_X(r)        ( ( 0x3 & ( r ) )  >> 1 )
42 #define REX_B(r)        ( ( 0x1 & ( r ) )  >> 0 )
43 #define REX_PFX_MASK(n) ( ( P_REXW(n) << 3 ) | \
44                           ( P_REXR(n) << 2 ) | \
45                           ( P_REXX(n) << 1 ) | \
46                           ( P_REXB(n) << 0 ) )
47 
48 /* scable-index-base bits */
49 #define SIB_S(b)        ( ( b ) >> 6 )
50 #define SIB_I(b)        ( ( ( b ) >> 3 ) & 7 )
51 #define SIB_B(b)        ( ( b ) & 7 )
52 
53 /* modrm bits */
54 #define MODRM_REG(b)    ( ( ( b ) >> 3 ) & 7 )
55 #define MODRM_NNN(b)    ( ( ( b ) >> 3 ) & 7 )
56 #define MODRM_MOD(b)    ( ( ( b ) >> 6 ) & 3 )
57 #define MODRM_RM(b)     ( ( b ) & 7 )
58 
59 static int decode_ext(struct ud *u, uint16_t ptr);
60 
61 enum reg_class { /* register classes */
62   REGCLASS_NONE,
63   REGCLASS_GPR,
64   REGCLASS_MMX,
65   REGCLASS_CR,
66   REGCLASS_DB,
67   REGCLASS_SEG,
68   REGCLASS_XMM
69 };
70 
71 
72 /*
73  * inp_uint8
74  * int_uint16
75  * int_uint32
76  * int_uint64
77  *    Load little-endian values from input
78  */
79 static uint8_t
inp_uint8(struct ud * u)80 inp_uint8(struct ud* u)
81 {
82   return ud_inp_next(u);
83 }
84 
85 static uint16_t
inp_uint16(struct ud * u)86 inp_uint16(struct ud* u)
87 {
88   uint16_t r, ret;
89 
90   ret = ud_inp_next(u);
91   r = ud_inp_next(u);
92   return ret | (r << 8);
93 }
94 
95 static uint32_t
inp_uint32(struct ud * u)96 inp_uint32(struct ud* u)
97 {
98   uint32_t r, ret;
99 
100   ret = ud_inp_next(u);
101   r = ud_inp_next(u);
102   ret = ret | (r << 8);
103   r = ud_inp_next(u);
104   ret = ret | (r << 16);
105   r = ud_inp_next(u);
106   return ret | (r << 24);
107 }
108 
109 static uint64_t
inp_uint64(struct ud * u)110 inp_uint64(struct ud* u)
111 {
112   uint64_t r, ret;
113 
114   ret = ud_inp_next(u);
115   r = ud_inp_next(u);
116   ret = ret | (r << 8);
117   r = ud_inp_next(u);
118   ret = ret | (r << 16);
119   r = ud_inp_next(u);
120   ret = ret | (r << 24);
121   r = ud_inp_next(u);
122   ret = ret | (r << 32);
123   r = ud_inp_next(u);
124   ret = ret | (r << 40);
125   r = ud_inp_next(u);
126   ret = ret | (r << 48);
127   r = ud_inp_next(u);
128   return ret | (r << 56);
129 }
130 
131 
132 static inline int
eff_opr_mode(int dis_mode,int rex_w,int pfx_opr)133 eff_opr_mode(int dis_mode, int rex_w, int pfx_opr)
134 {
135   if (dis_mode == 64) {
136     return rex_w ? 64 : (pfx_opr ? 16 : 32);
137   } else if (dis_mode == 32) {
138     return pfx_opr ? 16 : 32;
139   } else {
140     UD_ASSERT(dis_mode == 16);
141     return pfx_opr ? 32 : 16;
142   }
143 }
144 
145 
146 static inline int
eff_adr_mode(int dis_mode,int pfx_adr)147 eff_adr_mode(int dis_mode, int pfx_adr)
148 {
149   if (dis_mode == 64) {
150     return pfx_adr ? 32 : 64;
151   } else if (dis_mode == 32) {
152     return pfx_adr ? 16 : 32;
153   } else {
154     UD_ASSERT(dis_mode == 16);
155     return pfx_adr ? 32 : 16;
156   }
157 }
158 
159 
160 /* Looks up mnemonic code in the mnemonic string table
161  * Returns NULL if the mnemonic code is invalid
162  */
163 const char*
ud_lookup_mnemonic(enum ud_mnemonic_code c)164 ud_lookup_mnemonic(enum ud_mnemonic_code c)
165 {
166   if (c < UD_MAX_MNEMONIC_CODE) {
167     return ud_mnemonics_str[c];
168   } else {
169     return NULL;
170   }
171 }
172 
173 
174 /*
175  * decode_prefixes
176  *
177  *  Extracts instruction prefixes.
178  */
179 static int
decode_prefixes(struct ud * u)180 decode_prefixes(struct ud *u)
181 {
182   int done = 0;
183   uint8_t curr;
184   UD_RETURN_ON_ERROR(u);
185 
186   do {
187     ud_inp_next(u);
188     UD_RETURN_ON_ERROR(u);
189     if (inp_len(u) == MAX_INSN_LENGTH) {
190       UD_RETURN_WITH_ERROR(u, "max instruction length");
191     }
192     curr = inp_curr(u);
193 
194     switch (curr)
195     {
196     case 0x2E :
197       u->pfx_seg = UD_R_CS;
198       break;
199     case 0x36 :
200       u->pfx_seg = UD_R_SS;
201       break;
202     case 0x3E :
203       u->pfx_seg = UD_R_DS;
204       break;
205     case 0x26 :
206       u->pfx_seg = UD_R_ES;
207       break;
208     case 0x64 :
209       u->pfx_seg = UD_R_FS;
210       break;
211     case 0x65 :
212       u->pfx_seg = UD_R_GS;
213       break;
214     case 0x67 : /* adress-size override prefix */
215       u->pfx_adr = 0x67;
216       break;
217     case 0xF0 :
218       u->pfx_lock = 0xF0;
219       break;
220     case 0x66:
221       u->pfx_opr = 0x66;
222       break;
223     case 0xF2:
224       u->pfx_str = 0xf2;
225       break;
226     case 0xF3:
227       u->pfx_str = 0xf3;
228       break;
229     default:
230       done = 1;
231       break;
232     }
233   } while (!done);
234 
235   if (u->dis_mode == 64 && (curr & 0xF0) == 0x40) {
236     /* rex prefixes in 64bit mode, must be the last prefix
237      */
238     u->pfx_rex = curr;
239   } else {
240     /* rewind back one byte in stream, since the above loop
241      * stops with a non-prefix byte.
242      */
243     inp_back(u);
244   }
245   return 0;
246 }
247 
248 
modrm(struct ud * u)249 static inline unsigned int modrm( struct ud * u )
250 {
251     if ( !u->have_modrm ) {
252         u->modrm = ud_inp_next( u );
253         u->have_modrm = 1;
254     }
255     return u->modrm;
256 }
257 
258 
259 static unsigned int
resolve_operand_size(const struct ud * u,unsigned int s)260 resolve_operand_size( const struct ud * u, unsigned int s )
261 {
262     switch ( s )
263     {
264     case SZ_V:
265         return ( u->opr_mode );
266     case SZ_Z:
267         return ( u->opr_mode == 16 ) ? 16 : 32;
268     case SZ_Y:
269         return ( u->opr_mode == 16 ) ? 32 : u->opr_mode;
270     case SZ_RDQ:
271         return ( u->dis_mode == 64 ) ? 64 : 32;
272     default:
273         return s;
274     }
275 }
276 
277 
resolve_mnemonic(struct ud * u)278 static int resolve_mnemonic( struct ud* u )
279 {
280   /* resolve 3dnow weirdness. */
281   if ( u->mnemonic == UD_I3dnow ) {
282     u->mnemonic = ud_itab[ u->le->table[ inp_curr( u )  ] ].mnemonic;
283   }
284   /* SWAPGS is only valid in 64bits mode */
285   if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
286     UDERR(u, "swapgs invalid in 64bits mode");
287     return -1;
288   }
289 
290   if (u->mnemonic == UD_Ixchg) {
291     if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX  &&
292          u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) ||
293         (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX &&
294          u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) {
295       u->operand[0].type = UD_NONE;
296       u->operand[1].type = UD_NONE;
297       u->mnemonic = UD_Inop;
298     }
299   }
300 
301   if (u->mnemonic == UD_Inop && u->pfx_repe) {
302     u->pfx_repe = 0;
303     u->mnemonic = UD_Ipause;
304   }
305   return 0;
306 }
307 
308 
309 /* -----------------------------------------------------------------------------
310  * decode_a()- Decodes operands of the type seg:offset
311  * -----------------------------------------------------------------------------
312  */
313 static void
decode_a(struct ud * u,struct ud_operand * op)314 decode_a(struct ud* u, struct ud_operand *op)
315 {
316   if (u->opr_mode == 16) {
317     /* seg16:off16 */
318     op->type = UD_OP_PTR;
319     op->size = 32;
320     op->lval.ptr.off = inp_uint16(u);
321     op->lval.ptr.seg = inp_uint16(u);
322   } else {
323     /* seg16:off32 */
324     op->type = UD_OP_PTR;
325     op->size = 48;
326     op->lval.ptr.off = inp_uint32(u);
327     op->lval.ptr.seg = inp_uint16(u);
328   }
329 }
330 
331 /* -----------------------------------------------------------------------------
332  * decode_gpr() - Returns decoded General Purpose Register
333  * -----------------------------------------------------------------------------
334  */
335 static enum ud_type
decode_gpr(register struct ud * u,unsigned int s,unsigned char rm)336 decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
337 {
338   switch (s) {
339     case 64:
340         return UD_R_RAX + rm;
341     case 32:
342         return UD_R_EAX + rm;
343     case 16:
344         return UD_R_AX  + rm;
345     case  8:
346         if (u->dis_mode == 64 && u->pfx_rex) {
347             if (rm >= 4)
348                 return UD_R_SPL + (rm-4);
349             return UD_R_AL + rm;
350         } else return UD_R_AL + rm;
351     default:
352         UD_ASSERT(!"invalid operand size");
353         return 0;
354   }
355 }
356 
357 static void
decode_reg(struct ud * u,struct ud_operand * opr,int type,int num,int size)358 decode_reg(struct ud *u,
359            struct ud_operand *opr,
360            int type,
361            int num,
362            int size)
363 {
364   int reg;
365   size = resolve_operand_size(u, size);
366   switch (type) {
367     case REGCLASS_GPR : reg = decode_gpr(u, size, num); break;
368     case REGCLASS_MMX : reg = UD_R_MM0  + (num & 7); break;
369     case REGCLASS_XMM : reg = UD_R_XMM0 + num; break;
370     case REGCLASS_CR : reg = UD_R_CR0  + num; break;
371     case REGCLASS_DB : reg = UD_R_DR0  + num; break;
372     case REGCLASS_SEG : {
373       /*
374        * Only 6 segment registers, anything else is an error.
375        */
376       if ((num & 7) > 5) {
377         UDERR(u, "invalid segment register value");
378         return;
379       } else {
380         reg = UD_R_ES + (num & 7);
381       }
382       break;
383     }
384     default:
385       UD_ASSERT(!"invalid register type");
386       break;
387   }
388   opr->type = UD_OP_REG;
389   opr->base = reg;
390   opr->size = size;
391 }
392 
393 
394 /*
395  * decode_imm
396  *
397  *    Decode Immediate values.
398  */
399 static void
decode_imm(struct ud * u,unsigned int size,struct ud_operand * op)400 decode_imm(struct ud* u, unsigned int size, struct ud_operand *op)
401 {
402   op->size = resolve_operand_size(u, size);
403   op->type = UD_OP_IMM;
404 
405   switch (op->size) {
406   case  8: op->lval.sbyte = inp_uint8(u);   break;
407   case 16: op->lval.uword = inp_uint16(u);  break;
408   case 32: op->lval.udword = inp_uint32(u); break;
409   case 64: op->lval.uqword = inp_uint64(u); break;
410   default: return;
411   }
412 }
413 
414 
415 /*
416  * decode_mem_disp
417  *
418  *    Decode mem address displacement.
419  */
420 static void
decode_mem_disp(struct ud * u,unsigned int size,struct ud_operand * op)421 decode_mem_disp(struct ud* u, unsigned int size, struct ud_operand *op)
422 {
423   switch (size) {
424   case 8:
425     op->offset = 8;
426     op->lval.ubyte  = inp_uint8(u);
427     break;
428   case 16:
429     op->offset = 16;
430     op->lval.uword  = inp_uint16(u);
431     break;
432   case 32:
433     op->offset = 32;
434     op->lval.udword = inp_uint32(u);
435     break;
436   case 64:
437     op->offset = 64;
438     op->lval.uqword = inp_uint64(u);
439     break;
440   default:
441       return;
442   }
443 }
444 
445 
446 /*
447  * decode_modrm_reg
448  *
449  *    Decodes reg field of mod/rm byte
450  *
451  */
452 static inline void
decode_modrm_reg(struct ud * u,struct ud_operand * operand,unsigned int type,unsigned int size)453 decode_modrm_reg(struct ud         *u,
454                  struct ud_operand *operand,
455                  unsigned int       type,
456                  unsigned int       size)
457 {
458   uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u));
459   decode_reg(u, operand, type, reg, size);
460 }
461 
462 
463 /*
464  * decode_modrm_rm
465  *
466  *    Decodes rm field of mod/rm byte
467  *
468  */
469 static void
decode_modrm_rm(struct ud * u,struct ud_operand * op,unsigned char type,unsigned int size)470 decode_modrm_rm(struct ud         *u,
471                 struct ud_operand *op,
472                 unsigned char      type,    /* register type */
473                 unsigned int       size)    /* operand size */
474 
475 {
476   size_t offset = 0;
477   unsigned char mod, rm;
478 
479   /* get mod, r/m and reg fields */
480   mod = MODRM_MOD(modrm(u));
481   rm  = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u));
482 
483   /*
484    * If mod is 11b, then the modrm.rm specifies a register.
485    *
486    */
487   if (mod == 3) {
488     decode_reg(u, op, type, rm, size);
489     return;
490   }
491 
492   /*
493    * !11b => Memory Address
494    */
495   op->type = UD_OP_MEM;
496   op->size = resolve_operand_size(u, size);
497 
498   if (u->adr_mode == 64) {
499     op->base = UD_R_RAX + rm;
500     if (mod == 1) {
501       offset = 8;
502     } else if (mod == 2) {
503       offset = 32;
504     } else if (mod == 0 && (rm & 7) == 5) {
505       op->base = UD_R_RIP;
506       offset = 32;
507     } else {
508       offset = 0;
509     }
510     /*
511      * Scale-Index-Base (SIB)
512      */
513     if ((rm & 7) == 4) {
514       ud_inp_next(u);
515 
516       op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
517       op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
518       op->base  = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
519 
520       /* special conditions for base reference */
521       if (op->index == UD_R_RSP) {
522         op->index = UD_NONE;
523         op->scale = UD_NONE;
524       }
525 
526       if (op->base == UD_R_RBP || op->base == UD_R_R13) {
527         if (mod == 0) {
528           op->base = UD_NONE;
529         }
530         if (mod == 1) {
531           offset = 8;
532         } else {
533           offset = 32;
534         }
535       }
536     }
537   } else if (u->adr_mode == 32) {
538     op->base = UD_R_EAX + rm;
539     if (mod == 1) {
540       offset = 8;
541     } else if (mod == 2) {
542       offset = 32;
543     } else if (mod == 0 && rm == 5) {
544       op->base = UD_NONE;
545       offset = 32;
546     } else {
547       offset = 0;
548     }
549 
550     /* Scale-Index-Base (SIB) */
551     if ((rm & 7) == 4) {
552       ud_inp_next(u);
553 
554       op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
555       op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
556       op->base  = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
557 
558       if (op->index == UD_R_ESP) {
559         op->index = UD_NONE;
560         op->scale = UD_NONE;
561       }
562 
563       /* special condition for base reference */
564       if (op->base == UD_R_EBP) {
565         if (mod == 0) {
566           op->base = UD_NONE;
567         }
568         if (mod == 1) {
569           offset = 8;
570         } else {
571           offset = 32;
572         }
573       }
574     }
575   } else {
576     const unsigned int bases[]   = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP,
577                                      UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX };
578     const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI,
579                                      UD_NONE, UD_NONE, UD_NONE, UD_NONE };
580     op->base  = bases[rm & 7];
581     op->index = indices[rm & 7];
582     if (mod == 0 && rm == 6) {
583       offset = 16;
584       op->base = UD_NONE;
585     } else if (mod == 1) {
586       offset = 8;
587     } else if (mod == 2) {
588       offset = 16;
589     }
590   }
591 
592   if (offset) {
593     decode_mem_disp(u, offset, op);
594   }
595 }
596 
597 
598 /*
599  * decode_moffset
600  *    Decode offset-only memory operand
601  */
602 static void
decode_moffset(struct ud * u,unsigned int size,struct ud_operand * opr)603 decode_moffset(struct ud *u, unsigned int size, struct ud_operand *opr)
604 {
605   opr->type = UD_OP_MEM;
606   opr->size = resolve_operand_size(u, size);
607   decode_mem_disp(u, u->adr_mode, opr);
608 }
609 
610 
611 /* -----------------------------------------------------------------------------
612  * decode_operands() - Disassembles Operands.
613  * -----------------------------------------------------------------------------
614  */
615 static int
decode_operand(struct ud * u,struct ud_operand * operand,enum ud_operand_code type,unsigned int size)616 decode_operand(struct ud           *u,
617                struct ud_operand   *operand,
618                enum ud_operand_code type,
619                unsigned int         size)
620 {
621   operand->_oprcode = type;
622 
623   switch (type) {
624     case OP_A :
625       decode_a(u, operand);
626       break;
627     case OP_MR:
628       decode_modrm_rm(u, operand, REGCLASS_GPR,
629                       MODRM_MOD(modrm(u)) == 3 ?
630                         Mx_reg_size(size) : Mx_mem_size(size));
631       break;
632     case OP_F:
633       u->br_far  = 1;
634       /* intended fall through */
635     case OP_M:
636       if (MODRM_MOD(modrm(u)) == 3) {
637         UDERR(u, "expected modrm.mod != 3");
638       }
639       /* intended fall through */
640     case OP_E:
641       decode_modrm_rm(u, operand, REGCLASS_GPR, size);
642       break;
643     case OP_G:
644       decode_modrm_reg(u, operand, REGCLASS_GPR, size);
645       break;
646     case OP_sI:
647     case OP_I:
648       decode_imm(u, size, operand);
649       break;
650     case OP_I1:
651       operand->type = UD_OP_CONST;
652       operand->lval.udword = 1;
653       break;
654     case OP_N:
655       if (MODRM_MOD(modrm(u)) != 3) {
656         UDERR(u, "expected modrm.mod == 3");
657       }
658       /* intended fall through */
659     case OP_Q:
660       decode_modrm_rm(u, operand, REGCLASS_MMX, size);
661       break;
662     case OP_P:
663       decode_modrm_reg(u, operand, REGCLASS_MMX, size);
664       break;
665     case OP_U:
666       if (MODRM_MOD(modrm(u)) != 3) {
667         UDERR(u, "expected modrm.mod == 3");
668       }
669       /* intended fall through */
670     case OP_W:
671       decode_modrm_rm(u, operand, REGCLASS_XMM, size);
672       break;
673     case OP_V:
674       decode_modrm_reg(u, operand, REGCLASS_XMM, size);
675       break;
676     case OP_MU:
677       decode_modrm_rm(u, operand, REGCLASS_XMM,
678                       MODRM_MOD(modrm(u)) == 3 ?
679                         Mx_reg_size(size) : Mx_mem_size(size));
680       break;
681     case OP_S:
682       decode_modrm_reg(u, operand, REGCLASS_SEG, size);
683       break;
684     case OP_O:
685       decode_moffset(u, size, operand);
686       break;
687     case OP_R0:
688     case OP_R1:
689     case OP_R2:
690     case OP_R3:
691     case OP_R4:
692     case OP_R5:
693     case OP_R6:
694     case OP_R7:
695       decode_reg(u, operand, REGCLASS_GPR,
696                  (REX_B(u->pfx_rex) << 3) | (type - OP_R0), size);
697       break;
698     case OP_AL:
699     case OP_AX:
700     case OP_eAX:
701     case OP_rAX:
702       decode_reg(u, operand, REGCLASS_GPR, 0, size);
703       break;
704     case OP_CL:
705     case OP_CX:
706     case OP_eCX:
707       decode_reg(u, operand, REGCLASS_GPR, 1, size);
708       break;
709     case OP_DL:
710     case OP_DX:
711     case OP_eDX:
712       decode_reg(u, operand, REGCLASS_GPR, 2, size);
713       break;
714     case OP_ES:
715     case OP_CS:
716     case OP_DS:
717     case OP_SS:
718     case OP_FS:
719     case OP_GS:
720       /* in 64bits mode, only fs and gs are allowed */
721       if (u->dis_mode == 64) {
722         if (type != OP_FS && type != OP_GS) {
723           UDERR(u, "invalid segment register in 64bits");
724         }
725       }
726       operand->type = UD_OP_REG;
727       operand->base = (type - OP_ES) + UD_R_ES;
728       operand->size = 16;
729       break;
730     case OP_J :
731       decode_imm(u, size, operand);
732       operand->type = UD_OP_JIMM;
733       break ;
734     case OP_R :
735       if (MODRM_MOD(modrm(u)) != 3) {
736         UDERR(u, "expected modrm.mod == 3");
737       }
738       decode_modrm_rm(u, operand, REGCLASS_GPR, size);
739       break;
740     case OP_C:
741       decode_modrm_reg(u, operand, REGCLASS_CR, size);
742       break;
743     case OP_D:
744       decode_modrm_reg(u, operand, REGCLASS_DB, size);
745       break;
746     case OP_I3 :
747       operand->type = UD_OP_CONST;
748       operand->lval.sbyte = 3;
749       break;
750     case OP_ST0:
751     case OP_ST1:
752     case OP_ST2:
753     case OP_ST3:
754     case OP_ST4:
755     case OP_ST5:
756     case OP_ST6:
757     case OP_ST7:
758       operand->type = UD_OP_REG;
759       operand->base = (type - OP_ST0) + UD_R_ST0;
760       operand->size = 80;
761       break;
762     default :
763       break;
764   }
765   return 0;
766 }
767 
768 
769 /*
770  * decode_operands
771  *
772  *    Disassemble upto 3 operands of the current instruction being
773  *    disassembled. By the end of the function, the operand fields
774  *    of the ud structure will have been filled.
775  */
776 static int
decode_operands(struct ud * u)777 decode_operands(struct ud* u)
778 {
779   decode_operand(u, &u->operand[0],
780                     u->itab_entry->operand1.type,
781                     u->itab_entry->operand1.size);
782   decode_operand(u, &u->operand[1],
783                     u->itab_entry->operand2.type,
784                     u->itab_entry->operand2.size);
785   decode_operand(u, &u->operand[2],
786                     u->itab_entry->operand3.type,
787                     u->itab_entry->operand3.size);
788   return 0;
789 }
790 
791 /* -----------------------------------------------------------------------------
792  * clear_insn() - clear instruction structure
793  * -----------------------------------------------------------------------------
794  */
795 static void
clear_insn(register struct ud * u)796 clear_insn(register struct ud* u)
797 {
798   u->error     = 0;
799   u->pfx_seg   = 0;
800   u->pfx_opr   = 0;
801   u->pfx_adr   = 0;
802   u->pfx_lock  = 0;
803   u->pfx_repne = 0;
804   u->pfx_rep   = 0;
805   u->pfx_repe  = 0;
806   u->pfx_rex   = 0;
807   u->pfx_str   = 0;
808   u->mnemonic  = UD_Inone;
809   u->itab_entry = NULL;
810   u->have_modrm = 0;
811   u->br_far    = 0;
812 
813   memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) );
814   memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) );
815   memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) );
816 }
817 
818 
819 static inline int
resolve_pfx_str(struct ud * u)820 resolve_pfx_str(struct ud* u)
821 {
822   if (u->pfx_str == 0xf3) {
823     if (P_STR(u->itab_entry->prefix)) {
824         u->pfx_rep  = 0xf3;
825     } else {
826         u->pfx_repe = 0xf3;
827     }
828   } else if (u->pfx_str == 0xf2) {
829     u->pfx_repne = 0xf3;
830   }
831   return 0;
832 }
833 
834 
835 static int
resolve_mode(struct ud * u)836 resolve_mode( struct ud* u )
837 {
838   /* if in error state, bail out */
839   if ( u->error ) return -1;
840 
841   /* propagate prefix effects */
842   if ( u->dis_mode == 64 ) {  /* set 64bit-mode flags */
843 
844     /* Check validity of  instruction m64 */
845     if ( P_INV64( u->itab_entry->prefix ) ) {
846       UDERR(u, "instruction invalid in 64bits");
847       return -1;
848     }
849 
850     /* effective rex prefix is the  effective mask for the
851      * instruction hard-coded in the opcode map.
852      */
853     u->pfx_rex = ( u->pfx_rex & 0x40 ) |
854                  ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) );
855 
856     /* whether this instruction has a default operand size of
857      * 64bit, also hardcoded into the opcode map.
858      */
859     u->default64 = P_DEF64( u->itab_entry->prefix );
860     /* calculate effective operand size */
861     if ( REX_W( u->pfx_rex ) ) {
862         u->opr_mode = 64;
863     } else if ( u->pfx_opr ) {
864         u->opr_mode = 16;
865     } else {
866         /* unless the default opr size of instruction is 64,
867          * the effective operand size in the absence of rex.w
868          * prefix is 32.
869          */
870         u->opr_mode = ( u->default64 ) ? 64 : 32;
871     }
872 
873     /* calculate effective address size */
874     u->adr_mode = (u->pfx_adr) ? 32 : 64;
875   } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
876     u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
877     u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
878   } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
879     u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
880     u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
881   }
882 
883   /* set flags for implicit addressing */
884   u->implicit_addr = P_IMPADDR( u->itab_entry->prefix );
885 
886   return 0;
887 }
888 
889 
890 static inline int
decode_insn(struct ud * u,uint16_t ptr)891 decode_insn(struct ud *u, uint16_t ptr)
892 {
893   UD_ASSERT((ptr & 0x8000) == 0);
894   u->itab_entry = &ud_itab[ ptr ];
895   u->mnemonic = u->itab_entry->mnemonic;
896   return (resolve_pfx_str(u)  == 0 &&
897           resolve_mode(u)     == 0 &&
898           decode_operands(u)  == 0 &&
899           resolve_mnemonic(u) == 0) ? 0 : -1;
900 }
901 
902 
903 /*
904  * decode_3dnow()
905  *
906  *    Decoding 3dnow is a little tricky because of its strange opcode
907  *    structure. The final opcode disambiguation depends on the last
908  *    byte that comes after the operands have been decoded. Fortunately,
909  *    all 3dnow instructions have the same set of operand types. So we
910  *    go ahead and decode the instruction by picking an arbitrarily chosen
911  *    valid entry in the table, decode the operands, and read the final
912  *    byte to resolve the menmonic.
913  */
914 static inline int
decode_3dnow(struct ud * u)915 decode_3dnow(struct ud* u)
916 {
917   uint16_t ptr;
918   UD_ASSERT(u->le->type == UD_TAB__OPC_3DNOW);
919   UD_ASSERT(u->le->table[0xc] != 0);
920   decode_insn(u, u->le->table[0xc]);
921   ud_inp_next(u);
922   if (u->error) {
923     return -1;
924   }
925   ptr = u->le->table[inp_curr(u)];
926   UD_ASSERT((ptr & 0x8000) == 0);
927   u->mnemonic = ud_itab[ptr].mnemonic;
928   return 0;
929 }
930 
931 
932 static int
decode_ssepfx(struct ud * u)933 decode_ssepfx(struct ud *u)
934 {
935   uint8_t idx;
936   uint8_t pfx;
937 
938   /*
939    * String prefixes (f2, f3) take precedence over operand
940    * size prefix (66).
941    */
942   pfx = u->pfx_str;
943   if (pfx == 0) {
944     pfx = u->pfx_opr;
945   }
946   idx = ((pfx & 0xf) + 1) / 2;
947   if (u->le->table[idx] == 0) {
948     idx = 0;
949   }
950   if (idx && u->le->table[idx] != 0) {
951     /*
952      * "Consume" the prefix as a part of the opcode, so it is no
953      * longer exported as an instruction prefix.
954      */
955     u->pfx_str = 0;
956     if (pfx == 0x66) {
957         /*
958          * consume "66" only if it was used for decoding, leaving
959          * it to be used as an operands size override for some
960          * simd instructions.
961          */
962         u->pfx_opr = 0;
963     }
964   }
965   return decode_ext(u, u->le->table[idx]);
966 }
967 
968 
969 /*
970  * decode_ext()
971  *
972  *    Decode opcode extensions (if any)
973  */
974 static int
decode_ext(struct ud * u,uint16_t ptr)975 decode_ext(struct ud *u, uint16_t ptr)
976 {
977   uint8_t idx = 0;
978   if ((ptr & 0x8000) == 0) {
979     return decode_insn(u, ptr);
980   }
981   u->le = &ud_lookup_table_list[(~0x8000 & ptr)];
982   if (u->le->type == UD_TAB__OPC_3DNOW) {
983     return decode_3dnow(u);
984   }
985 
986   switch (u->le->type) {
987     case UD_TAB__OPC_MOD:
988       /* !11 = 0, 11 = 1 */
989       idx = (MODRM_MOD(modrm(u)) + 1) / 4;
990       break;
991       /* disassembly mode/operand size/address size based tables.
992        * 16 = 0,, 32 = 1, 64 = 2
993        */
994     case UD_TAB__OPC_MODE:
995       idx = u->dis_mode != 64 ? 0 : 1;
996       break;
997     case UD_TAB__OPC_OSIZE:
998       idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32;
999       break;
1000     case UD_TAB__OPC_ASIZE:
1001       idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32;
1002       break;
1003     case UD_TAB__OPC_X87:
1004       idx = modrm(u) - 0xC0;
1005       break;
1006     case UD_TAB__OPC_VENDOR:
1007       if (u->vendor == UD_VENDOR_ANY) {
1008         /* choose a valid entry */
1009         idx = (u->le->table[idx] != 0) ? 0 : 1;
1010       } else if (u->vendor == UD_VENDOR_AMD) {
1011         idx = 0;
1012       } else {
1013         idx = 1;
1014       }
1015       break;
1016     case UD_TAB__OPC_RM:
1017       idx = MODRM_RM(modrm(u));
1018       break;
1019     case UD_TAB__OPC_REG:
1020       idx = MODRM_REG(modrm(u));
1021       break;
1022     case UD_TAB__OPC_SSE:
1023       return decode_ssepfx(u);
1024     default:
1025       UD_ASSERT(!"not reached");
1026       break;
1027   }
1028 
1029   return decode_ext(u, u->le->table[idx]);
1030 }
1031 
1032 
1033 static int
decode_opcode(struct ud * u)1034 decode_opcode(struct ud *u)
1035 {
1036   uint16_t ptr;
1037   UD_ASSERT(u->le->type == UD_TAB__OPC_TABLE);
1038   ud_inp_next(u);
1039   if (u->error) {
1040     return -1;
1041   }
1042   u->primary_opcode = inp_curr(u);
1043   ptr = u->le->table[inp_curr(u)];
1044   if (ptr & 0x8000) {
1045     u->le = &ud_lookup_table_list[ptr & ~0x8000];
1046     if (u->le->type == UD_TAB__OPC_TABLE) {
1047       return decode_opcode(u);
1048     }
1049   }
1050   return decode_ext(u, ptr);
1051 }
1052 
1053 
1054 /* =============================================================================
1055  * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
1056  * =============================================================================
1057  */
1058 unsigned int
ud_decode(struct ud * u)1059 ud_decode(struct ud *u)
1060 {
1061   inp_start(u);
1062   clear_insn(u);
1063   u->le = &ud_lookup_table_list[0];
1064   u->error = decode_prefixes(u) == -1 ||
1065              decode_opcode(u)   == -1 ||
1066              u->error;
1067   /* Handle decode error. */
1068   if (u->error) {
1069     /* clear out the decode data. */
1070     clear_insn(u);
1071     /* mark the sequence of bytes as invalid. */
1072     u->itab_entry = &ud_itab[0]; /* entry 0 is invalid */
1073     u->mnemonic = u->itab_entry->mnemonic;
1074   }
1075 
1076     /* maybe this stray segment override byte
1077      * should be spewed out?
1078      */
1079     if ( !P_SEG( u->itab_entry->prefix ) &&
1080             u->operand[0].type != UD_OP_MEM &&
1081             u->operand[1].type != UD_OP_MEM )
1082         u->pfx_seg = 0;
1083 
1084   u->insn_offset = u->pc; /* set offset of instruction */
1085   u->asm_buf_fill = 0;   /* set translation buffer index to 0 */
1086   u->pc += u->inp_ctr;    /* move program counter by bytes decoded */
1087 
1088   /* return number of bytes disassembled. */
1089   return u->inp_ctr;
1090 }
1091 
1092 /*
1093 vim: set ts=2 sw=2 expandtab
1094 */
1095