• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Licensed to the Apache Software Foundation (ASF) under one or more
3  *  contributor license agreements.  See the NOTICE file distributed with
4  *  this work for additional information regarding copyright ownership.
5  *  The ASF licenses this file to You under the Apache License, Version 2.0
6  *  (the "License"); you may not use this file except in compliance with
7  *  the License.  You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  */
17 /**
18  * @author Alexander V. Astapchuk
19  */
20 
21 /**
22  * @file
23  * @brief Main decoding (disassembling) routines implementation.
24  */
25 
26 #include "dec_base.h"
27 #include "enc_prvt.h"
28 #include <stdio.h>
29 //#include "open/common.h"
30 
is_prefix(const unsigned char * bytes)31 bool DecoderBase::is_prefix(const unsigned char * bytes)
32 {
33     unsigned char b0 = *bytes;
34     unsigned char b1 = *(bytes+1);
35     if (b0 == 0xF0) { // LOCK
36         return true;
37     }
38     if (b0==0xF2 || b0==0xF3) { // REPNZ/REPZ prefixes
39         if (b1 == 0x0F) {   // .... but may be a part of SIMD opcode
40             return false;
41         }
42         return true;
43     }
44     if (b0 == 0x2E || b0 == 0x36 || b0==0x3E || b0==0x26 || b0==0x64 || b0==0x3E) {
45         // branch hints, segment prefixes
46         return true;
47     }
48     if (b0==0x66) { // operand-size prefix
49         if (b1 == 0x0F) {   // .... but may be a part of SIMD opcode
50             return false;
51         }
52         return false; //XXX - currently considered as part of opcode//true;
53     }
54     if (b0==0x67) { // address size prefix
55         return true;
56     }
57     return false;
58 }
59 
60 // Returns prefix count from 0 to 4, or ((unsigned int)-1) on error
fill_prefs(const unsigned char * bytes,Inst * pinst)61 unsigned int DecoderBase::fill_prefs(const unsigned char * bytes, Inst * pinst)
62 {
63     const unsigned char * my_bytes = bytes;
64 
65     while( 1 )
66     {
67         unsigned char by1 = *my_bytes;
68         unsigned char by2 = *(my_bytes + 1);
69         Inst::PrefGroups where;
70 
71         switch( by1 )
72         {
73         case InstPrefix_REPNE:
74         case InstPrefix_REP:
75         {
76             if( 0x0F == by2)
77             {
78                 return pinst->prefc;
79             }
80         }
81         case InstPrefix_LOCK:
82         {
83             where = Inst::Group1;
84             break;
85         }
86         case InstPrefix_CS:
87         case InstPrefix_SS:
88         case InstPrefix_DS:
89         case InstPrefix_ES:
90         case InstPrefix_FS:
91         case InstPrefix_GS:
92 //      case InstPrefix_HintTaken: the same as CS override
93 //      case InstPrefix_HintNotTaken: the same as DS override
94         {
95             where = Inst::Group2;
96             break;
97         }
98         case InstPrefix_OpndSize:
99         {
100 //NOTE:   prefix does not work for JMP Sz16, the opcode is 0x66 0xe9
101 //        here 0x66 will be treated as prefix, try_mn will try to match the code starting at 0xe9
102 //        it will match JMP Sz32 ...
103 //HACK:   assume it is the last prefix, return any way
104             if( 0x0F == by2)
105             {
106                 return pinst->prefc;
107             }
108             return pinst->prefc;
109             where = Inst::Group3;
110             break;
111         }
112         case InstPrefix_AddrSize:
113         {
114             where = Inst::Group4;
115             break;
116         }
117         default:
118         {
119             return pinst->prefc;
120         }
121         }
122         // Assertions are not allowed here.
123         // Error situations should result in returning error status
124         if (InstPrefix_Null != pinst->pref[where]) //only one prefix in each group
125             return (unsigned int)-1;
126 
127         pinst->pref[where] = (InstPrefix)by1;
128 
129         if (pinst->prefc >= 4) //no more than 4 prefixes
130             return (unsigned int)-1;
131 
132         pinst->prefc++;
133         ++my_bytes;
134     }
135 }
136 
137 
138 
decode(const void * addr,Inst * pinst)139 unsigned DecoderBase::decode(const void * addr, Inst * pinst)
140 {
141     Inst tmp;
142 
143     //assert( *(unsigned char*)addr != 0x66);
144 
145     const unsigned char * bytes = (unsigned char*)addr;
146 
147     // Load up to 4 prefixes
148     // for each Mnemonic
149     unsigned int pref_count = fill_prefs(bytes, &tmp);
150 
151     if (pref_count == (unsigned int)-1) // Wrong prefix sequence, or >4 prefixes
152         return 0; // Error
153 
154     bytes += pref_count;
155 
156     //  for each opcodedesc
157     //      if (raw_len == 0) memcmp(, raw_len)
158     //  else check the mixed state which is one of the following:
159     //      /digit /i /rw /rd /rb
160 
161     bool found = false;
162     const unsigned char * saveBytes = bytes;
163     for (unsigned mn=1; mn<Mnemonic_Count; mn++) {
164         bytes = saveBytes;
165         found=try_mn((Mnemonic)mn, &bytes, &tmp);
166         if (found) {
167             tmp.mn = (Mnemonic)mn;
168             break;
169         }
170     }
171     if (!found) {
172         // Unknown opcode
173         return 0;
174     }
175     tmp.size = (unsigned)(bytes-(const unsigned char*)addr);
176     if (pinst) {
177         *pinst = tmp;
178     }
179     return tmp.size;
180 }
181 
182 #ifdef _EM64T_
183 #define EXTEND_REG(reg, flag)                        \
184     ((NULL == rex || 0 == rex->flag) ? reg : (reg + 8))
185 #else
186 #define EXTEND_REG(reg, flag) (reg)
187 #endif
188 
189 //don't know the use of rex, seems not used when _EM64T_ is not enabled
decode_aux(const EncoderBase::OpcodeDesc & odesc,unsigned aux,const unsigned char ** pbuf,Inst * pinst,const Rex UNREF * rex)190 bool DecoderBase::decode_aux(const EncoderBase::OpcodeDesc& odesc, unsigned aux,
191     const unsigned char ** pbuf, Inst * pinst
192 #ifdef _EM64T_
193     , const Rex UNREF *rex
194 #endif
195     )
196 {
197     OpcodeByteKind kind = (OpcodeByteKind)(aux & OpcodeByteKind_KindMask);
198     unsigned byte = (aux & OpcodeByteKind_OpcodeMask);
199     unsigned data_byte = **pbuf;
200     EncoderBase::Operand& opnd = pinst->operands[pinst->argc];
201     const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc];
202 
203     switch (kind) {
204     case OpcodeByteKind_SlashR:
205         {
206             RegName reg;
207             OpndKind okind;
208             const ModRM& modrm = *(ModRM*)*pbuf;
209             if (opndDesc.kind & OpndKind_Mem) { // 1st operand is memory
210 #ifdef _EM64T_
211                 decodeModRM(odesc, pbuf, pinst, rex);
212 #else
213                 decodeModRM(odesc, pbuf, pinst);
214 #endif
215                 ++pinst->argc;
216                 const EncoderBase::OpndDesc& opndDesc2 = odesc.opnds[pinst->argc];
217                 okind = ((opndDesc2.kind & OpndKind_XMMReg) || opndDesc2.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
218                 EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc];
219                 reg = getRegName(okind, opndDesc2.size, EXTEND_REG(modrm.reg, r));
220                 regOpnd = EncoderBase::Operand(reg);
221             } else {                            // 2nd operand is memory
222                 okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
223                 EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc];
224                 reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.reg, r));
225                 regOpnd = EncoderBase::Operand(reg);
226                 ++pinst->argc;
227 #ifdef _EM64T_
228                 decodeModRM(odesc, pbuf, pinst, rex);
229 #else
230                 decodeModRM(odesc, pbuf, pinst);
231 #endif
232             }
233             ++pinst->argc;
234         }
235         return true;
236     case OpcodeByteKind_rb:
237     case OpcodeByteKind_rw:
238     case OpcodeByteKind_rd:
239         {
240             // Gregory -
241             // Here we don't parse register because for current needs
242             // disassembler doesn't require to parse all operands
243             unsigned regid = data_byte - byte;
244             if (regid>7) {
245                 return false;
246             }
247             OpndSize opnd_size;
248             switch(kind)
249             {
250             case OpcodeByteKind_rb:
251             {
252                 opnd_size = OpndSize_8;
253                 break;
254             }
255             case OpcodeByteKind_rw:
256             {
257                 opnd_size = OpndSize_16;
258                 break;
259             }
260             case OpcodeByteKind_rd:
261             {
262                 opnd_size = OpndSize_32;
263                 break;
264             }
265             default:
266                 opnd_size = OpndSize_32;  // so there is no compiler warning
267                 assert( false );
268             }
269             opnd = EncoderBase::Operand( getRegName(OpndKind_GPReg, opnd_size, regid) );
270 
271             ++pinst->argc;
272             ++*pbuf;
273             return true;
274         }
275     case OpcodeByteKind_cb:
276         {
277         char offset = *(char*)*pbuf;
278         *pbuf += 1;
279         opnd = EncoderBase::Operand(offset);
280         ++pinst->argc;
281         //pinst->direct_addr = (void*)(pinst->offset + *pbuf);
282         }
283         return true;
284     case OpcodeByteKind_cw:
285         // not an error, but not expected in current env
286         // Android x86
287         {
288         short offset = *(short*)*pbuf;
289         *pbuf += 2;
290         opnd = EncoderBase::Operand(offset);
291         ++pinst->argc;
292         }
293         return true;
294         //return false;
295     case OpcodeByteKind_cd:
296         {
297         int offset = *(int*)*pbuf;
298         *pbuf += 4;
299         opnd = EncoderBase::Operand(offset);
300         ++pinst->argc;
301         }
302         return true;
303     case OpcodeByteKind_SlashNum:
304         {
305         const ModRM& modrm = *(ModRM*)*pbuf;
306         if (modrm.reg != byte) {
307             return false;
308         }
309         decodeModRM(odesc, pbuf, pinst
310 #ifdef _EM64T_
311                         , rex
312 #endif
313                         );
314         ++pinst->argc;
315         }
316         return true;
317     case OpcodeByteKind_ib:
318         {
319         char ival = *(char*)*pbuf;
320         opnd = EncoderBase::Operand(ival);
321         ++pinst->argc;
322         *pbuf += 1;
323         }
324         return true;
325     case OpcodeByteKind_iw:
326         {
327         short ival = *(short*)*pbuf;
328         opnd = EncoderBase::Operand(ival);
329         ++pinst->argc;
330         *pbuf += 2;
331         }
332         return true;
333     case OpcodeByteKind_id:
334         {
335         int ival = *(int*)*pbuf;
336         opnd = EncoderBase::Operand(ival);
337         ++pinst->argc;
338         *pbuf += 4;
339         }
340         return true;
341 #ifdef _EM64T_
342     case OpcodeByteKind_io:
343         {
344         long long int ival = *(long long int*)*pbuf;
345         opnd = EncoderBase::Operand(OpndSize_64, ival);
346         ++pinst->argc;
347         *pbuf += 8;
348         }
349         return true;
350 #endif
351     case OpcodeByteKind_plus_i:
352         {
353             unsigned regid = data_byte - byte;
354             if (regid>7) {
355                 return false;
356             }
357             ++*pbuf;
358             return true;
359         }
360     case OpcodeByteKind_ZeroOpcodeByte: // cant be here
361         return false;
362     default:
363         // unknown kind ? how comes ?
364         break;
365     }
366     return false;
367 }
368 
try_mn(Mnemonic mn,const unsigned char ** pbuf,Inst * pinst)369 bool DecoderBase::try_mn(Mnemonic mn, const unsigned char ** pbuf, Inst * pinst) {
370     const unsigned char * save_pbuf = *pbuf;
371     EncoderBase::OpcodeDesc * opcodes = EncoderBase::opcodes[mn];
372 
373     for (unsigned i=0; !opcodes[i].last; i++) {
374         const EncoderBase::OpcodeDesc& odesc = opcodes[i];
375         char *opcode_ptr = const_cast<char *>(odesc.opcode);
376         int opcode_len = odesc.opcode_len;
377 #ifdef _EM64T_
378         Rex *prex = NULL;
379         Rex rex;
380 #endif
381 
382         *pbuf = save_pbuf;
383 #ifdef _EM64T_
384         // Match REX prefixes
385         unsigned char rex_byte = (*pbuf)[0];
386         if ((rex_byte & 0xf0) == 0x40)
387         {
388             if ((rex_byte & 0x08) != 0)
389             {
390                 // Have REX.W
391                 if (opcode_len > 0 && opcode_ptr[0] == 0x48)
392                 {
393                     // Have REX.W in opcode. All mnemonics that allow
394                     // REX.W have to have specified it in opcode,
395                     // otherwise it is not allowed
396                     rex = *(Rex *)*pbuf;
397                     prex = &rex;
398                     (*pbuf)++;
399                     opcode_ptr++;
400                     opcode_len--;
401                 }
402             }
403             else
404             {
405                 // No REX.W, so it doesn't have to be in opcode. We
406                 // have REX.B, REX.X, REX.R or their combination, but
407                 // not in opcode, they may extend any part of the
408                 // instruction
409                 rex = *(Rex *)*pbuf;
410                 prex = &rex;
411                 (*pbuf)++;
412             }
413         }
414 #endif
415         if (opcode_len != 0) {
416             if (memcmp(*pbuf, opcode_ptr, opcode_len)) {
417                 continue;
418             }
419             *pbuf += opcode_len;
420         }
421         if (odesc.aux0 != 0) {
422 
423             if (!decode_aux(odesc, odesc.aux0, pbuf, pinst
424 #ifdef _EM64T_
425                             , prex
426 #endif
427                             )) {
428                 continue;
429             }
430             if (odesc.aux1 != 0) {
431                 if (!decode_aux(odesc, odesc.aux1, pbuf, pinst
432 #ifdef _EM64T_
433                             , prex
434 #endif
435                             )) {
436                     continue;
437                 }
438             }
439             pinst->odesc = &opcodes[i];
440             return true;
441         }
442         else {
443             // Can't have empty opcode
444             assert(opcode_len != 0);
445             pinst->odesc = &opcodes[i];
446             return true;
447         }
448     }
449     return false;
450 }
451 
decodeModRM(const EncoderBase::OpcodeDesc & odesc,const unsigned char ** pbuf,Inst * pinst,const Rex * rex)452 bool DecoderBase::decodeModRM(const EncoderBase::OpcodeDesc& odesc,
453     const unsigned char ** pbuf, Inst * pinst
454 #ifdef _EM64T_
455     , const Rex *rex
456 #endif
457     )
458 {
459     EncoderBase::Operand& opnd = pinst->operands[pinst->argc];
460     const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc];
461 
462     //XXX debug ///assert(0x66 != *(*pbuf-2));
463     const ModRM& modrm = *(ModRM*)*pbuf;
464     *pbuf += 1;
465 
466     RegName base = RegName_Null;
467     RegName index = RegName_Null;
468     int disp = 0;
469     unsigned scale = 0;
470 
471     // On x86_64 all mnemonics that allow REX.W have REX.W in opcode.
472     // Therefore REX.W is simply ignored, and opndDesc.size is used
473 
474     if (modrm.mod == 3) {
475         // we have only modrm. no sib, no disp.
476         // Android x86: Use XMMReg for 64b operand.
477         OpndKind okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size == OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
478         RegName reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.rm, b));
479         opnd = EncoderBase::Operand(reg);
480         return true;
481     }
482     //Android x86: m16, m32, m64: mean a byte[word|doubleword] operand in memory
483     //base and index should be 32 bits!!!
484     const SIB& sib = *(SIB*)*pbuf;
485     // check whether we have a sib
486     if (modrm.rm == 4) {
487         // yes, we have SIB
488         *pbuf += 1;
489         // scale = sib.scale == 0 ? 0 : (1<<sib.scale);
490         scale = (1<<sib.scale);
491         if (sib.index != 4) {
492             index = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.index, x)); //Android x86: OpndDesc.size
493         } else {
494             // (sib.index == 4) => no index
495             //%esp can't be sib.index
496         }
497 
498         if (sib.base != 5 || modrm.mod != 0) {
499             base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.base, b)); //Android x86: OpndDesc.size
500         } else {
501             // (sib.base == 5 && modrm.mod == 0) => no base
502         }
503     }
504     else {
505         if (modrm.mod != 0 || modrm.rm != 5) {
506             base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(modrm.rm, b)); //Android x86: OpndDesc.size
507         }
508         else {
509             // mod=0 && rm == 5 => only disp32
510         }
511     }
512 
513     //update disp and pbuf
514     if (modrm.mod == 2) {
515         // have disp32
516         disp = *(int*)*pbuf;
517         *pbuf += 4;
518     }
519     else if (modrm.mod == 1) {
520         // have disp8
521         disp = *(char*)*pbuf;
522         *pbuf += 1;
523     }
524     else {
525         assert(modrm.mod == 0);
526         if (modrm.rm == 5) {
527             // have disp32 w/o sib
528             disp = *(int*)*pbuf;
529             *pbuf += 4;
530         }
531         else if (modrm.rm == 4 && sib.base == 5) {
532             // have disp32 with SI in sib
533             disp = *(int*)*pbuf;
534             *pbuf += 4;
535         }
536     }
537     opnd = EncoderBase::Operand(opndDesc.size, base, index, scale, disp);
538     return true;
539 }
540 
541