1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 /**
18 * @author Alexander V. Astapchuk
19 */
20
21 /**
22 * @file
23 * @brief Main decoding (disassembling) routines implementation.
24 */
25
26 #include "dec_base.h"
27 #include "enc_prvt.h"
28 #include <stdio.h>
29 //#include "open/common.h"
30
is_prefix(const unsigned char * bytes)31 bool DecoderBase::is_prefix(const unsigned char * bytes)
32 {
33 unsigned char b0 = *bytes;
34 unsigned char b1 = *(bytes+1);
35 if (b0 == 0xF0) { // LOCK
36 return true;
37 }
38 if (b0==0xF2 || b0==0xF3) { // REPNZ/REPZ prefixes
39 if (b1 == 0x0F) { // .... but may be a part of SIMD opcode
40 return false;
41 }
42 return true;
43 }
44 if (b0 == 0x2E || b0 == 0x36 || b0==0x3E || b0==0x26 || b0==0x64 || b0==0x3E) {
45 // branch hints, segment prefixes
46 return true;
47 }
48 if (b0==0x66) { // operand-size prefix
49 if (b1 == 0x0F) { // .... but may be a part of SIMD opcode
50 return false;
51 }
52 return false; //XXX - currently considered as part of opcode//true;
53 }
54 if (b0==0x67) { // address size prefix
55 return true;
56 }
57 return false;
58 }
59
60 // Returns prefix count from 0 to 4, or ((unsigned int)-1) on error
fill_prefs(const unsigned char * bytes,Inst * pinst)61 unsigned int DecoderBase::fill_prefs(const unsigned char * bytes, Inst * pinst)
62 {
63 const unsigned char * my_bytes = bytes;
64
65 while( 1 )
66 {
67 unsigned char by1 = *my_bytes;
68 unsigned char by2 = *(my_bytes + 1);
69 Inst::PrefGroups where;
70
71 switch( by1 )
72 {
73 case InstPrefix_REPNE:
74 case InstPrefix_REP:
75 {
76 if( 0x0F == by2)
77 {
78 return pinst->prefc;
79 }
80 }
81 case InstPrefix_LOCK:
82 {
83 where = Inst::Group1;
84 break;
85 }
86 case InstPrefix_CS:
87 case InstPrefix_SS:
88 case InstPrefix_DS:
89 case InstPrefix_ES:
90 case InstPrefix_FS:
91 case InstPrefix_GS:
92 // case InstPrefix_HintTaken: the same as CS override
93 // case InstPrefix_HintNotTaken: the same as DS override
94 {
95 where = Inst::Group2;
96 break;
97 }
98 case InstPrefix_OpndSize:
99 {
100 //NOTE: prefix does not work for JMP Sz16, the opcode is 0x66 0xe9
101 // here 0x66 will be treated as prefix, try_mn will try to match the code starting at 0xe9
102 // it will match JMP Sz32 ...
103 //HACK: assume it is the last prefix, return any way
104 if( 0x0F == by2)
105 {
106 return pinst->prefc;
107 }
108 return pinst->prefc;
109 where = Inst::Group3;
110 break;
111 }
112 case InstPrefix_AddrSize:
113 {
114 where = Inst::Group4;
115 break;
116 }
117 default:
118 {
119 return pinst->prefc;
120 }
121 }
122 // Assertions are not allowed here.
123 // Error situations should result in returning error status
124 if (InstPrefix_Null != pinst->pref[where]) //only one prefix in each group
125 return (unsigned int)-1;
126
127 pinst->pref[where] = (InstPrefix)by1;
128
129 if (pinst->prefc >= 4) //no more than 4 prefixes
130 return (unsigned int)-1;
131
132 pinst->prefc++;
133 ++my_bytes;
134 }
135 }
136
137
138
decode(const void * addr,Inst * pinst)139 unsigned DecoderBase::decode(const void * addr, Inst * pinst)
140 {
141 Inst tmp;
142
143 //assert( *(unsigned char*)addr != 0x66);
144
145 const unsigned char * bytes = (unsigned char*)addr;
146
147 // Load up to 4 prefixes
148 // for each Mnemonic
149 unsigned int pref_count = fill_prefs(bytes, &tmp);
150
151 if (pref_count == (unsigned int)-1) // Wrong prefix sequence, or >4 prefixes
152 return 0; // Error
153
154 bytes += pref_count;
155
156 // for each opcodedesc
157 // if (raw_len == 0) memcmp(, raw_len)
158 // else check the mixed state which is one of the following:
159 // /digit /i /rw /rd /rb
160
161 bool found = false;
162 const unsigned char * saveBytes = bytes;
163 for (unsigned mn=1; mn<Mnemonic_Count; mn++) {
164 bytes = saveBytes;
165 found=try_mn((Mnemonic)mn, &bytes, &tmp);
166 if (found) {
167 tmp.mn = (Mnemonic)mn;
168 break;
169 }
170 }
171 if (!found) {
172 // Unknown opcode
173 return 0;
174 }
175 tmp.size = (unsigned)(bytes-(const unsigned char*)addr);
176 if (pinst) {
177 *pinst = tmp;
178 }
179 return tmp.size;
180 }
181
182 #ifdef _EM64T_
183 #define EXTEND_REG(reg, flag) \
184 ((NULL == rex || 0 == rex->flag) ? reg : (reg + 8))
185 #else
186 #define EXTEND_REG(reg, flag) (reg)
187 #endif
188
189 //don't know the use of rex, seems not used when _EM64T_ is not enabled
decode_aux(const EncoderBase::OpcodeDesc & odesc,unsigned aux,const unsigned char ** pbuf,Inst * pinst,const Rex UNREF * rex)190 bool DecoderBase::decode_aux(const EncoderBase::OpcodeDesc& odesc, unsigned aux,
191 const unsigned char ** pbuf, Inst * pinst
192 #ifdef _EM64T_
193 , const Rex UNREF *rex
194 #endif
195 )
196 {
197 OpcodeByteKind kind = (OpcodeByteKind)(aux & OpcodeByteKind_KindMask);
198 unsigned byte = (aux & OpcodeByteKind_OpcodeMask);
199 unsigned data_byte = **pbuf;
200 EncoderBase::Operand& opnd = pinst->operands[pinst->argc];
201 const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc];
202
203 switch (kind) {
204 case OpcodeByteKind_SlashR:
205 {
206 RegName reg;
207 OpndKind okind;
208 const ModRM& modrm = *(ModRM*)*pbuf;
209 if (opndDesc.kind & OpndKind_Mem) { // 1st operand is memory
210 #ifdef _EM64T_
211 decodeModRM(odesc, pbuf, pinst, rex);
212 #else
213 decodeModRM(odesc, pbuf, pinst);
214 #endif
215 ++pinst->argc;
216 const EncoderBase::OpndDesc& opndDesc2 = odesc.opnds[pinst->argc];
217 okind = ((opndDesc2.kind & OpndKind_XMMReg) || opndDesc2.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
218 EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc];
219 reg = getRegName(okind, opndDesc2.size, EXTEND_REG(modrm.reg, r));
220 regOpnd = EncoderBase::Operand(reg);
221 } else { // 2nd operand is memory
222 okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
223 EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc];
224 reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.reg, r));
225 regOpnd = EncoderBase::Operand(reg);
226 ++pinst->argc;
227 #ifdef _EM64T_
228 decodeModRM(odesc, pbuf, pinst, rex);
229 #else
230 decodeModRM(odesc, pbuf, pinst);
231 #endif
232 }
233 ++pinst->argc;
234 }
235 return true;
236 case OpcodeByteKind_rb:
237 case OpcodeByteKind_rw:
238 case OpcodeByteKind_rd:
239 {
240 // Gregory -
241 // Here we don't parse register because for current needs
242 // disassembler doesn't require to parse all operands
243 unsigned regid = data_byte - byte;
244 if (regid>7) {
245 return false;
246 }
247 OpndSize opnd_size;
248 switch(kind)
249 {
250 case OpcodeByteKind_rb:
251 {
252 opnd_size = OpndSize_8;
253 break;
254 }
255 case OpcodeByteKind_rw:
256 {
257 opnd_size = OpndSize_16;
258 break;
259 }
260 case OpcodeByteKind_rd:
261 {
262 opnd_size = OpndSize_32;
263 break;
264 }
265 default:
266 opnd_size = OpndSize_32; // so there is no compiler warning
267 assert( false );
268 }
269 opnd = EncoderBase::Operand( getRegName(OpndKind_GPReg, opnd_size, regid) );
270
271 ++pinst->argc;
272 ++*pbuf;
273 return true;
274 }
275 case OpcodeByteKind_cb:
276 {
277 char offset = *(char*)*pbuf;
278 *pbuf += 1;
279 opnd = EncoderBase::Operand(offset);
280 ++pinst->argc;
281 //pinst->direct_addr = (void*)(pinst->offset + *pbuf);
282 }
283 return true;
284 case OpcodeByteKind_cw:
285 // not an error, but not expected in current env
286 // Android x86
287 {
288 short offset = *(short*)*pbuf;
289 *pbuf += 2;
290 opnd = EncoderBase::Operand(offset);
291 ++pinst->argc;
292 }
293 return true;
294 //return false;
295 case OpcodeByteKind_cd:
296 {
297 int offset = *(int*)*pbuf;
298 *pbuf += 4;
299 opnd = EncoderBase::Operand(offset);
300 ++pinst->argc;
301 }
302 return true;
303 case OpcodeByteKind_SlashNum:
304 {
305 const ModRM& modrm = *(ModRM*)*pbuf;
306 if (modrm.reg != byte) {
307 return false;
308 }
309 decodeModRM(odesc, pbuf, pinst
310 #ifdef _EM64T_
311 , rex
312 #endif
313 );
314 ++pinst->argc;
315 }
316 return true;
317 case OpcodeByteKind_ib:
318 {
319 char ival = *(char*)*pbuf;
320 opnd = EncoderBase::Operand(ival);
321 ++pinst->argc;
322 *pbuf += 1;
323 }
324 return true;
325 case OpcodeByteKind_iw:
326 {
327 short ival = *(short*)*pbuf;
328 opnd = EncoderBase::Operand(ival);
329 ++pinst->argc;
330 *pbuf += 2;
331 }
332 return true;
333 case OpcodeByteKind_id:
334 {
335 int ival = *(int*)*pbuf;
336 opnd = EncoderBase::Operand(ival);
337 ++pinst->argc;
338 *pbuf += 4;
339 }
340 return true;
341 #ifdef _EM64T_
342 case OpcodeByteKind_io:
343 {
344 long long int ival = *(long long int*)*pbuf;
345 opnd = EncoderBase::Operand(OpndSize_64, ival);
346 ++pinst->argc;
347 *pbuf += 8;
348 }
349 return true;
350 #endif
351 case OpcodeByteKind_plus_i:
352 {
353 unsigned regid = data_byte - byte;
354 if (regid>7) {
355 return false;
356 }
357 ++*pbuf;
358 return true;
359 }
360 case OpcodeByteKind_ZeroOpcodeByte: // cant be here
361 return false;
362 default:
363 // unknown kind ? how comes ?
364 break;
365 }
366 return false;
367 }
368
try_mn(Mnemonic mn,const unsigned char ** pbuf,Inst * pinst)369 bool DecoderBase::try_mn(Mnemonic mn, const unsigned char ** pbuf, Inst * pinst) {
370 const unsigned char * save_pbuf = *pbuf;
371 EncoderBase::OpcodeDesc * opcodes = EncoderBase::opcodes[mn];
372
373 for (unsigned i=0; !opcodes[i].last; i++) {
374 const EncoderBase::OpcodeDesc& odesc = opcodes[i];
375 char *opcode_ptr = const_cast<char *>(odesc.opcode);
376 int opcode_len = odesc.opcode_len;
377 #ifdef _EM64T_
378 Rex *prex = NULL;
379 Rex rex;
380 #endif
381
382 *pbuf = save_pbuf;
383 #ifdef _EM64T_
384 // Match REX prefixes
385 unsigned char rex_byte = (*pbuf)[0];
386 if ((rex_byte & 0xf0) == 0x40)
387 {
388 if ((rex_byte & 0x08) != 0)
389 {
390 // Have REX.W
391 if (opcode_len > 0 && opcode_ptr[0] == 0x48)
392 {
393 // Have REX.W in opcode. All mnemonics that allow
394 // REX.W have to have specified it in opcode,
395 // otherwise it is not allowed
396 rex = *(Rex *)*pbuf;
397 prex = &rex;
398 (*pbuf)++;
399 opcode_ptr++;
400 opcode_len--;
401 }
402 }
403 else
404 {
405 // No REX.W, so it doesn't have to be in opcode. We
406 // have REX.B, REX.X, REX.R or their combination, but
407 // not in opcode, they may extend any part of the
408 // instruction
409 rex = *(Rex *)*pbuf;
410 prex = &rex;
411 (*pbuf)++;
412 }
413 }
414 #endif
415 if (opcode_len != 0) {
416 if (memcmp(*pbuf, opcode_ptr, opcode_len)) {
417 continue;
418 }
419 *pbuf += opcode_len;
420 }
421 if (odesc.aux0 != 0) {
422
423 if (!decode_aux(odesc, odesc.aux0, pbuf, pinst
424 #ifdef _EM64T_
425 , prex
426 #endif
427 )) {
428 continue;
429 }
430 if (odesc.aux1 != 0) {
431 if (!decode_aux(odesc, odesc.aux1, pbuf, pinst
432 #ifdef _EM64T_
433 , prex
434 #endif
435 )) {
436 continue;
437 }
438 }
439 pinst->odesc = &opcodes[i];
440 return true;
441 }
442 else {
443 // Can't have empty opcode
444 assert(opcode_len != 0);
445 pinst->odesc = &opcodes[i];
446 return true;
447 }
448 }
449 return false;
450 }
451
decodeModRM(const EncoderBase::OpcodeDesc & odesc,const unsigned char ** pbuf,Inst * pinst,const Rex * rex)452 bool DecoderBase::decodeModRM(const EncoderBase::OpcodeDesc& odesc,
453 const unsigned char ** pbuf, Inst * pinst
454 #ifdef _EM64T_
455 , const Rex *rex
456 #endif
457 )
458 {
459 EncoderBase::Operand& opnd = pinst->operands[pinst->argc];
460 const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc];
461
462 //XXX debug ///assert(0x66 != *(*pbuf-2));
463 const ModRM& modrm = *(ModRM*)*pbuf;
464 *pbuf += 1;
465
466 RegName base = RegName_Null;
467 RegName index = RegName_Null;
468 int disp = 0;
469 unsigned scale = 0;
470
471 // On x86_64 all mnemonics that allow REX.W have REX.W in opcode.
472 // Therefore REX.W is simply ignored, and opndDesc.size is used
473
474 if (modrm.mod == 3) {
475 // we have only modrm. no sib, no disp.
476 // Android x86: Use XMMReg for 64b operand.
477 OpndKind okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size == OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
478 RegName reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.rm, b));
479 opnd = EncoderBase::Operand(reg);
480 return true;
481 }
482 //Android x86: m16, m32, m64: mean a byte[word|doubleword] operand in memory
483 //base and index should be 32 bits!!!
484 const SIB& sib = *(SIB*)*pbuf;
485 // check whether we have a sib
486 if (modrm.rm == 4) {
487 // yes, we have SIB
488 *pbuf += 1;
489 // scale = sib.scale == 0 ? 0 : (1<<sib.scale);
490 scale = (1<<sib.scale);
491 if (sib.index != 4) {
492 index = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.index, x)); //Android x86: OpndDesc.size
493 } else {
494 // (sib.index == 4) => no index
495 //%esp can't be sib.index
496 }
497
498 if (sib.base != 5 || modrm.mod != 0) {
499 base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.base, b)); //Android x86: OpndDesc.size
500 } else {
501 // (sib.base == 5 && modrm.mod == 0) => no base
502 }
503 }
504 else {
505 if (modrm.mod != 0 || modrm.rm != 5) {
506 base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(modrm.rm, b)); //Android x86: OpndDesc.size
507 }
508 else {
509 // mod=0 && rm == 5 => only disp32
510 }
511 }
512
513 //update disp and pbuf
514 if (modrm.mod == 2) {
515 // have disp32
516 disp = *(int*)*pbuf;
517 *pbuf += 4;
518 }
519 else if (modrm.mod == 1) {
520 // have disp8
521 disp = *(char*)*pbuf;
522 *pbuf += 1;
523 }
524 else {
525 assert(modrm.mod == 0);
526 if (modrm.rm == 5) {
527 // have disp32 w/o sib
528 disp = *(int*)*pbuf;
529 *pbuf += 4;
530 }
531 else if (modrm.rm == 4 && sib.base == 5) {
532 // have disp32 with SI in sib
533 disp = *(int*)*pbuf;
534 *pbuf += 4;
535 }
536 }
537 opnd = EncoderBase::Operand(opndDesc.size, base, index, scale, disp);
538 return true;
539 }
540
541