1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Implementation of MiniDisassembler.
6
7 #ifdef _WIN64
8 #error The code in this file should not be used on 64-bit Windows.
9 #endif
10
11 #include "sandbox/win/src/sidestep/mini_disassembler.h"
12
13 namespace sidestep {
14
MiniDisassembler(bool operand_default_is_32_bits,bool address_default_is_32_bits)15 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
16 bool address_default_is_32_bits)
17 : operand_default_is_32_bits_(operand_default_is_32_bits),
18 address_default_is_32_bits_(address_default_is_32_bits) {
19 Initialize();
20 }
21
MiniDisassembler()22 MiniDisassembler::MiniDisassembler()
23 : operand_default_is_32_bits_(true),
24 address_default_is_32_bits_(true) {
25 Initialize();
26 }
27
Disassemble(unsigned char * start_byte,unsigned int * instruction_bytes)28 InstructionType MiniDisassembler::Disassemble(
29 unsigned char* start_byte,
30 unsigned int* instruction_bytes) {
31 // Clean up any state from previous invocations.
32 Initialize();
33
34 // Start by processing any prefixes.
35 unsigned char* current_byte = start_byte;
36 unsigned int size = 0;
37 InstructionType instruction_type = ProcessPrefixes(current_byte, &size);
38
39 if (IT_UNKNOWN == instruction_type)
40 return instruction_type;
41
42 current_byte += size;
43 size = 0;
44
45 // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
46 // and address_is_32_bits_ flags are correctly set.
47
48 instruction_type = ProcessOpcode(current_byte, 0, &size);
49
50 // Check for error processing instruction
51 if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
52 return IT_UNKNOWN;
53 }
54
55 current_byte += size;
56
57 // Invariant: operand_bytes_ indicates the total size of operands
58 // specified by the opcode and/or ModR/M byte and/or SIB byte.
59 // pCurrentByte points to the first byte after the ModR/M byte, or after
60 // the SIB byte if it is present (i.e. the first byte of any operands
61 // encoded in the instruction).
62
63 // We get the total length of any prefixes, the opcode, and the ModR/M and
64 // SIB bytes if present, by taking the difference of the original starting
65 // address and the current byte (which points to the first byte of the
66 // operands if present, or to the first byte of the next instruction if
67 // they are not). Adding the count of bytes in the operands encoded in
68 // the instruction gives us the full length of the instruction in bytes.
69 *instruction_bytes += operand_bytes_ + (current_byte - start_byte);
70
71 // Return the instruction type, which was set by ProcessOpcode().
72 return instruction_type_;
73 }
74
Initialize()75 void MiniDisassembler::Initialize() {
76 operand_is_32_bits_ = operand_default_is_32_bits_;
77 address_is_32_bits_ = address_default_is_32_bits_;
78 operand_bytes_ = 0;
79 have_modrm_ = false;
80 should_decode_modrm_ = false;
81 instruction_type_ = IT_UNKNOWN;
82 got_f2_prefix_ = false;
83 got_f3_prefix_ = false;
84 got_66_prefix_ = false;
85 }
86
ProcessPrefixes(unsigned char * start_byte,unsigned int * size)87 InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
88 unsigned int* size) {
89 InstructionType instruction_type = IT_GENERIC;
90 const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
91
92 switch (opcode.type_) {
93 case IT_PREFIX_ADDRESS:
94 address_is_32_bits_ = !address_default_is_32_bits_;
95 goto nochangeoperand;
96 case IT_PREFIX_OPERAND:
97 operand_is_32_bits_ = !operand_default_is_32_bits_;
98 nochangeoperand:
99 case IT_PREFIX:
100
101 if (0xF2 == (*start_byte))
102 got_f2_prefix_ = true;
103 else if (0xF3 == (*start_byte))
104 got_f3_prefix_ = true;
105 else if (0x66 == (*start_byte))
106 got_66_prefix_ = true;
107
108 instruction_type = opcode.type_;
109 (*size)++;
110 // we got a prefix, so add one and check next byte
111 ProcessPrefixes(start_byte + 1, size);
112 default:
113 break; // not a prefix byte
114 }
115
116 return instruction_type;
117 }
118
ProcessOpcode(unsigned char * start_byte,unsigned int table_index,unsigned int * size)119 InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
120 unsigned int table_index,
121 unsigned int* size) {
122 const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table
123 unsigned char current_byte = (*start_byte) >> table.shift_;
124 current_byte = current_byte & table.mask_; // Mask out the bits we will use
125
126 // Check whether the byte we have is inside the table we have.
127 if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
128 instruction_type_ = IT_UNKNOWN;
129 return instruction_type_;
130 }
131
132 const Opcode& opcode = table.table_[current_byte];
133 if (IT_UNUSED == opcode.type_) {
134 // This instruction is not used by the IA-32 ISA, so we indicate
135 // this to the user. Probably means that we were pointed to
136 // a byte in memory that was not the start of an instruction.
137 instruction_type_ = IT_UNUSED;
138 return instruction_type_;
139 } else if (IT_REFERENCE == opcode.type_) {
140 // We are looking at an opcode that has more bytes (or is continued
141 // in the ModR/M byte). Recursively find the opcode definition in
142 // the table for the opcode's next byte.
143 (*size)++;
144 ProcessOpcode(start_byte + 1, opcode.table_index_, size);
145 return instruction_type_;
146 }
147
148 const SpecificOpcode* specific_opcode = reinterpret_cast<
149 const SpecificOpcode*>(&opcode);
150 if (opcode.is_prefix_dependent_) {
151 if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
152 specific_opcode = &opcode.opcode_if_f2_prefix_;
153 } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
154 specific_opcode = &opcode.opcode_if_f3_prefix_;
155 } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
156 specific_opcode = &opcode.opcode_if_66_prefix_;
157 }
158 }
159
160 // Inv: The opcode type is known.
161 instruction_type_ = specific_opcode->type_;
162
163 // Let's process the operand types to see if we have any immediate
164 // operands, and/or a ModR/M byte.
165
166 ProcessOperand(specific_opcode->flag_dest_);
167 ProcessOperand(specific_opcode->flag_source_);
168 ProcessOperand(specific_opcode->flag_aux_);
169
170 // Inv: We have processed the opcode and incremented operand_bytes_
171 // by the number of bytes of any operands specified by the opcode
172 // that are stored in the instruction (not registers etc.). Now
173 // we need to return the total number of bytes for the opcode and
174 // for the ModR/M or SIB bytes if they are present.
175
176 if (table.mask_ != 0xff) {
177 if (have_modrm_) {
178 // we're looking at a ModR/M byte so we're not going to
179 // count that into the opcode size
180 ProcessModrm(start_byte, size);
181 return IT_GENERIC;
182 } else {
183 // need to count the ModR/M byte even if it's just being
184 // used for opcode extension
185 (*size)++;
186 return IT_GENERIC;
187 }
188 } else {
189 if (have_modrm_) {
190 // The ModR/M byte is the next byte.
191 (*size)++;
192 ProcessModrm(start_byte + 1, size);
193 return IT_GENERIC;
194 } else {
195 (*size)++;
196 return IT_GENERIC;
197 }
198 }
199 }
200
ProcessOperand(int flag_operand)201 bool MiniDisassembler::ProcessOperand(int flag_operand) {
202 bool succeeded = true;
203 if (AM_NOT_USED == flag_operand)
204 return succeeded;
205
206 // Decide what to do based on the addressing mode.
207 switch (flag_operand & AM_MASK) {
208 // No ModR/M byte indicated by these addressing modes, and no
209 // additional (e.g. immediate) parameters.
210 case AM_A: // Direct address
211 case AM_F: // EFLAGS register
212 case AM_X: // Memory addressed by the DS:SI register pair
213 case AM_Y: // Memory addressed by the ES:DI register pair
214 case AM_IMPLICIT: // Parameter is implicit, occupies no space in
215 // instruction
216 break;
217
218 // There is a ModR/M byte but it does not necessarily need
219 // to be decoded.
220 case AM_C: // reg field of ModR/M selects a control register
221 case AM_D: // reg field of ModR/M selects a debug register
222 case AM_G: // reg field of ModR/M selects a general register
223 case AM_P: // reg field of ModR/M selects an MMX register
224 case AM_R: // mod field of ModR/M may refer only to a general register
225 case AM_S: // reg field of ModR/M selects a segment register
226 case AM_T: // reg field of ModR/M selects a test register
227 case AM_V: // reg field of ModR/M selects a 128-bit XMM register
228 have_modrm_ = true;
229 break;
230
231 // In these addressing modes, there is a ModR/M byte and it needs to be
232 // decoded. No other (e.g. immediate) params than indicated in ModR/M.
233 case AM_E: // Operand is either a general-purpose register or memory,
234 // specified by ModR/M byte
235 case AM_M: // ModR/M byte will refer only to memory
236 case AM_Q: // Operand is either an MMX register or memory (complex
237 // evaluation), specified by ModR/M byte
238 case AM_W: // Operand is either a 128-bit XMM register or memory (complex
239 // eval), specified by ModR/M byte
240 have_modrm_ = true;
241 should_decode_modrm_ = true;
242 break;
243
244 // These addressing modes specify an immediate or an offset value
245 // directly, so we need to look at the operand type to see how many
246 // bytes.
247 case AM_I: // Immediate data.
248 case AM_J: // Jump to offset.
249 case AM_O: // Operand is at offset.
250 switch (flag_operand & OT_MASK) {
251 case OT_B: // Byte regardless of operand-size attribute.
252 operand_bytes_ += OS_BYTE;
253 break;
254 case OT_C: // Byte or word, depending on operand-size attribute.
255 if (operand_is_32_bits_)
256 operand_bytes_ += OS_WORD;
257 else
258 operand_bytes_ += OS_BYTE;
259 break;
260 case OT_D: // Doubleword, regardless of operand-size attribute.
261 operand_bytes_ += OS_DOUBLE_WORD;
262 break;
263 case OT_DQ: // Double-quadword, regardless of operand-size attribute.
264 operand_bytes_ += OS_DOUBLE_QUAD_WORD;
265 break;
266 case OT_P: // 32-bit or 48-bit pointer, depending on operand-size
267 // attribute.
268 if (operand_is_32_bits_)
269 operand_bytes_ += OS_48_BIT_POINTER;
270 else
271 operand_bytes_ += OS_32_BIT_POINTER;
272 break;
273 case OT_PS: // 128-bit packed single-precision floating-point data.
274 operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
275 break;
276 case OT_Q: // Quadword, regardless of operand-size attribute.
277 operand_bytes_ += OS_QUAD_WORD;
278 break;
279 case OT_S: // 6-byte pseudo-descriptor.
280 operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
281 break;
282 case OT_SD: // Scalar Double-Precision Floating-Point Value
283 case OT_PD: // Unaligned packed double-precision floating point value
284 operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
285 break;
286 case OT_SS:
287 // Scalar element of a 128-bit packed single-precision
288 // floating data.
289 // We simply return enItUnknown since we don't have to support
290 // floating point
291 succeeded = false;
292 break;
293 case OT_V: // Word or doubleword, depending on operand-size attribute.
294 if (operand_is_32_bits_)
295 operand_bytes_ += OS_DOUBLE_WORD;
296 else
297 operand_bytes_ += OS_WORD;
298 break;
299 case OT_W: // Word, regardless of operand-size attribute.
300 operand_bytes_ += OS_WORD;
301 break;
302
303 // Can safely ignore these.
304 case OT_A: // Two one-word operands in memory or two double-word
305 // operands in memory
306 case OT_PI: // Quadword MMX technology register (e.g. mm0)
307 case OT_SI: // Doubleword integer register (e.g., eax)
308 break;
309
310 default:
311 break;
312 }
313 break;
314
315 default:
316 break;
317 }
318
319 return succeeded;
320 }
321
ProcessModrm(unsigned char * start_byte,unsigned int * size)322 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
323 unsigned int* size) {
324 // If we don't need to decode, we just return the size of the ModR/M
325 // byte (there is never a SIB byte in this case).
326 if (!should_decode_modrm_) {
327 (*size)++;
328 return true;
329 }
330
331 // We never care about the reg field, only the combination of the mod
332 // and r/m fields, so let's start by packing those fields together into
333 // 5 bits.
334 unsigned char modrm = (*start_byte);
335 unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field
336 modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field
337 mod = mod >> 3; // shift the mod field to the right place
338 modrm = mod | modrm; // combine the r/m and mod fields as discussed
339 mod = mod >> 3; // shift the mod field to bits 2..0
340
341 // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
342 // in bits 2..0, and mod contains the mod field in bits 2..0
343
344 const ModrmEntry* modrm_entry = 0;
345 if (address_is_32_bits_)
346 modrm_entry = &s_ia32_modrm_map_[modrm];
347 else
348 modrm_entry = &s_ia16_modrm_map_[modrm];
349
350 // Invariant: modrm_entry points to information that we need to decode
351 // the ModR/M byte.
352
353 // Add to the count of operand bytes, if the ModR/M byte indicates
354 // that some operands are encoded in the instruction.
355 if (modrm_entry->is_encoded_in_instruction_)
356 operand_bytes_ += modrm_entry->operand_size_;
357
358 // Process the SIB byte if necessary, and return the count
359 // of ModR/M and SIB bytes.
360 if (modrm_entry->use_sib_byte_) {
361 (*size)++;
362 return ProcessSib(start_byte + 1, mod, size);
363 } else {
364 (*size)++;
365 return true;
366 }
367 }
368
ProcessSib(unsigned char * start_byte,unsigned char mod,unsigned int * size)369 bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
370 unsigned char mod,
371 unsigned int* size) {
372 // get the mod field from the 2..0 bits of the SIB byte
373 unsigned char sib_base = (*start_byte) & 0x07;
374 if (0x05 == sib_base) {
375 switch (mod) {
376 case 0x00: // mod == 00
377 case 0x02: // mod == 10
378 operand_bytes_ += OS_DOUBLE_WORD;
379 break;
380 case 0x01: // mod == 01
381 operand_bytes_ += OS_BYTE;
382 break;
383 case 0x03: // mod == 11
384 // According to the IA-32 docs, there does not seem to be a disp
385 // value for this value of mod
386 default:
387 break;
388 }
389 }
390
391 (*size)++;
392 return true;
393 }
394
395 }; // namespace sidestep
396