1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file is part of the X86 Disassembler.
11 // It contains code to translate the data produced by the decoder into
12 // MCInsts.
13 // Documentation for the disassembler can be found in X86Disassembler.h.
14 //
15 //===----------------------------------------------------------------------===//
16
17 #include "X86Disassembler.h"
18 #include "X86DisassemblerDecoder.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCDisassembler.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/MemoryObject.h"
27 #include "llvm/Support/TargetRegistry.h"
28 #include "llvm/Support/raw_ostream.h"
29
30 using namespace llvm;
31 using namespace llvm::X86Disassembler;
32
33 #define DEBUG_TYPE "x86-disassembler"
34
35 #define GET_REGINFO_ENUM
36 #include "X86GenRegisterInfo.inc"
37 #define GET_INSTRINFO_ENUM
38 #include "X86GenInstrInfo.inc"
39 #define GET_SUBTARGETINFO_ENUM
40 #include "X86GenSubtargetInfo.inc"
41
Debug(const char * file,unsigned line,const char * s)42 void llvm::X86Disassembler::Debug(const char *file, unsigned line,
43 const char *s) {
44 dbgs() << file << ":" << line << ": " << s;
45 }
46
GetInstrName(unsigned Opcode,const void * mii)47 const char *llvm::X86Disassembler::GetInstrName(unsigned Opcode,
48 const void *mii) {
49 const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii);
50 return MII->getName(Opcode);
51 }
52
53 #define debug(s) DEBUG(Debug(__FILE__, __LINE__, s));
54
55 namespace llvm {
56
57 // Fill-ins to make the compiler happy. These constants are never actually
58 // assigned; they are just filler to make an automatically-generated switch
59 // statement work.
60 namespace X86 {
61 enum {
62 BX_SI = 500,
63 BX_DI = 501,
64 BP_SI = 502,
65 BP_DI = 503,
66 sib = 504,
67 sib64 = 505
68 };
69 }
70
71 extern Target TheX86_32Target, TheX86_64Target;
72
73 }
74
75 static bool translateInstruction(MCInst &target,
76 InternalInstruction &source,
77 const MCDisassembler *Dis);
78
X86GenericDisassembler(const MCSubtargetInfo & STI,MCContext & Ctx,std::unique_ptr<const MCInstrInfo> MII)79 X86GenericDisassembler::X86GenericDisassembler(
80 const MCSubtargetInfo &STI,
81 MCContext &Ctx,
82 std::unique_ptr<const MCInstrInfo> MII)
83 : MCDisassembler(STI, Ctx), MII(std::move(MII)) {
84 switch (STI.getFeatureBits() &
85 (X86::Mode16Bit | X86::Mode32Bit | X86::Mode64Bit)) {
86 case X86::Mode16Bit:
87 fMode = MODE_16BIT;
88 break;
89 case X86::Mode32Bit:
90 fMode = MODE_32BIT;
91 break;
92 case X86::Mode64Bit:
93 fMode = MODE_64BIT;
94 break;
95 default:
96 llvm_unreachable("Invalid CPU mode");
97 }
98 }
99
100 /// regionReader - a callback function that wraps the readByte method from
101 /// MemoryObject.
102 ///
103 /// @param arg - The generic callback parameter. In this case, this should
104 /// be a pointer to a MemoryObject.
105 /// @param byte - A pointer to the byte to be read.
106 /// @param address - The address to be read.
regionReader(const void * arg,uint8_t * byte,uint64_t address)107 static int regionReader(const void* arg, uint8_t* byte, uint64_t address) {
108 const MemoryObject* region = static_cast<const MemoryObject*>(arg);
109 return region->readByte(address, byte);
110 }
111
112 /// logger - a callback function that wraps the operator<< method from
113 /// raw_ostream.
114 ///
115 /// @param arg - The generic callback parameter. This should be a pointe
116 /// to a raw_ostream.
117 /// @param log - A string to be logged. logger() adds a newline.
logger(void * arg,const char * log)118 static void logger(void* arg, const char* log) {
119 if (!arg)
120 return;
121
122 raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
123 vStream << log << "\n";
124 }
125
126 //
127 // Public interface for the disassembler
128 //
129
130 MCDisassembler::DecodeStatus
getInstruction(MCInst & instr,uint64_t & size,const MemoryObject & region,uint64_t address,raw_ostream & vStream,raw_ostream & cStream) const131 X86GenericDisassembler::getInstruction(MCInst &instr,
132 uint64_t &size,
133 const MemoryObject ®ion,
134 uint64_t address,
135 raw_ostream &vStream,
136 raw_ostream &cStream) const {
137 CommentStream = &cStream;
138
139 InternalInstruction internalInstr;
140
141 dlog_t loggerFn = logger;
142 if (&vStream == &nulls())
143 loggerFn = nullptr; // Disable logging completely if it's going to nulls().
144
145 int ret = decodeInstruction(&internalInstr,
146 regionReader,
147 (const void*)®ion,
148 loggerFn,
149 (void*)&vStream,
150 (const void*)MII.get(),
151 address,
152 fMode);
153
154 if (ret) {
155 size = internalInstr.readerCursor - address;
156 return Fail;
157 }
158 else {
159 size = internalInstr.length;
160 return (!translateInstruction(instr, internalInstr, this)) ?
161 Success : Fail;
162 }
163 }
164
165 //
166 // Private code that translates from struct InternalInstructions to MCInsts.
167 //
168
169 /// translateRegister - Translates an internal register to the appropriate LLVM
170 /// register, and appends it as an operand to an MCInst.
171 ///
172 /// @param mcInst - The MCInst to append to.
173 /// @param reg - The Reg to append.
translateRegister(MCInst & mcInst,Reg reg)174 static void translateRegister(MCInst &mcInst, Reg reg) {
175 #define ENTRY(x) X86::x,
176 uint8_t llvmRegnums[] = {
177 ALL_REGS
178 0
179 };
180 #undef ENTRY
181
182 uint8_t llvmRegnum = llvmRegnums[reg];
183 mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
184 }
185
186 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
187 /// immediate Value in the MCInst.
188 ///
189 /// @param Value - The immediate Value, has had any PC adjustment made by
190 /// the caller.
191 /// @param isBranch - If the instruction is a branch instruction
192 /// @param Address - The starting address of the instruction
193 /// @param Offset - The byte offset to this immediate in the instruction
194 /// @param Width - The byte width of this immediate in the instruction
195 ///
196 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
197 /// called then that function is called to get any symbolic information for the
198 /// immediate in the instruction using the Address, Offset and Width. If that
199 /// returns non-zero then the symbolic information it returns is used to create
200 /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
201 /// returns zero and isBranch is true then a symbol look up for immediate Value
202 /// is done and if a symbol is found an MCExpr is created with that, else
203 /// an MCExpr with the immediate Value is created. This function returns true
204 /// if it adds an operand to the MCInst and false otherwise.
tryAddingSymbolicOperand(int64_t Value,bool isBranch,uint64_t Address,uint64_t Offset,uint64_t Width,MCInst & MI,const MCDisassembler * Dis)205 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
206 uint64_t Address, uint64_t Offset,
207 uint64_t Width, MCInst &MI,
208 const MCDisassembler *Dis) {
209 return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch,
210 Offset, Width);
211 }
212
213 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
214 /// referenced by a load instruction with the base register that is the rip.
215 /// These can often be addresses in a literal pool. The Address of the
216 /// instruction and its immediate Value are used to determine the address
217 /// being referenced in the literal pool entry. The SymbolLookUp call back will
218 /// return a pointer to a literal 'C' string if the referenced address is an
219 /// address into a section with 'C' string literals.
tryAddingPcLoadReferenceComment(uint64_t Address,uint64_t Value,const void * Decoder)220 static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value,
221 const void *Decoder) {
222 const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
223 Dis->tryAddingPcLoadReferenceComment(Value, Address);
224 }
225
226 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
227 0, // SEG_OVERRIDE_NONE
228 X86::CS,
229 X86::SS,
230 X86::DS,
231 X86::ES,
232 X86::FS,
233 X86::GS
234 };
235
236 /// translateSrcIndex - Appends a source index operand to an MCInst.
237 ///
238 /// @param mcInst - The MCInst to append to.
239 /// @param insn - The internal instruction.
translateSrcIndex(MCInst & mcInst,InternalInstruction & insn)240 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
241 unsigned baseRegNo;
242
243 if (insn.mode == MODE_64BIT)
244 baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::RSI;
245 else if (insn.mode == MODE_32BIT)
246 baseRegNo = insn.prefixPresent[0x67] ? X86::SI : X86::ESI;
247 else {
248 assert(insn.mode == MODE_16BIT);
249 baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::SI;
250 }
251 MCOperand baseReg = MCOperand::CreateReg(baseRegNo);
252 mcInst.addOperand(baseReg);
253
254 MCOperand segmentReg;
255 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
256 mcInst.addOperand(segmentReg);
257 return false;
258 }
259
260 /// translateDstIndex - Appends a destination index operand to an MCInst.
261 ///
262 /// @param mcInst - The MCInst to append to.
263 /// @param insn - The internal instruction.
264
translateDstIndex(MCInst & mcInst,InternalInstruction & insn)265 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
266 unsigned baseRegNo;
267
268 if (insn.mode == MODE_64BIT)
269 baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::RDI;
270 else if (insn.mode == MODE_32BIT)
271 baseRegNo = insn.prefixPresent[0x67] ? X86::DI : X86::EDI;
272 else {
273 assert(insn.mode == MODE_16BIT);
274 baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::DI;
275 }
276 MCOperand baseReg = MCOperand::CreateReg(baseRegNo);
277 mcInst.addOperand(baseReg);
278 return false;
279 }
280
281 /// translateImmediate - Appends an immediate operand to an MCInst.
282 ///
283 /// @param mcInst - The MCInst to append to.
284 /// @param immediate - The immediate value to append.
285 /// @param operand - The operand, as stored in the descriptor table.
286 /// @param insn - The internal instruction.
translateImmediate(MCInst & mcInst,uint64_t immediate,const OperandSpecifier & operand,InternalInstruction & insn,const MCDisassembler * Dis)287 static void translateImmediate(MCInst &mcInst, uint64_t immediate,
288 const OperandSpecifier &operand,
289 InternalInstruction &insn,
290 const MCDisassembler *Dis) {
291 // Sign-extend the immediate if necessary.
292
293 OperandType type = (OperandType)operand.type;
294
295 bool isBranch = false;
296 uint64_t pcrel = 0;
297 if (type == TYPE_RELv) {
298 isBranch = true;
299 pcrel = insn.startLocation +
300 insn.immediateOffset + insn.immediateSize;
301 switch (insn.displacementSize) {
302 default:
303 break;
304 case 1:
305 if(immediate & 0x80)
306 immediate |= ~(0xffull);
307 break;
308 case 2:
309 if(immediate & 0x8000)
310 immediate |= ~(0xffffull);
311 break;
312 case 4:
313 if(immediate & 0x80000000)
314 immediate |= ~(0xffffffffull);
315 break;
316 case 8:
317 break;
318 }
319 }
320 // By default sign-extend all X86 immediates based on their encoding.
321 else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 ||
322 type == TYPE_IMM64 || type == TYPE_IMMv) {
323 uint32_t Opcode = mcInst.getOpcode();
324 switch (operand.encoding) {
325 default:
326 break;
327 case ENCODING_IB:
328 // Special case those X86 instructions that use the imm8 as a set of
329 // bits, bit count, etc. and are not sign-extend.
330 if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri &&
331 Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri &&
332 Opcode != X86::DPPSrri && Opcode != X86::DPPDrri &&
333 Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri &&
334 Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri &&
335 Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri &&
336 Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri &&
337 Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri &&
338 Opcode != X86::VINSERTPSrr)
339 if(immediate & 0x80)
340 immediate |= ~(0xffull);
341 break;
342 case ENCODING_IW:
343 if(immediate & 0x8000)
344 immediate |= ~(0xffffull);
345 break;
346 case ENCODING_ID:
347 if(immediate & 0x80000000)
348 immediate |= ~(0xffffffffull);
349 break;
350 case ENCODING_IO:
351 break;
352 }
353 }
354
355 switch (type) {
356 case TYPE_XMM32:
357 case TYPE_XMM64:
358 case TYPE_XMM128:
359 mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
360 return;
361 case TYPE_XMM256:
362 mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4)));
363 return;
364 case TYPE_XMM512:
365 mcInst.addOperand(MCOperand::CreateReg(X86::ZMM0 + (immediate >> 4)));
366 return;
367 case TYPE_REL8:
368 isBranch = true;
369 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
370 if(immediate & 0x80)
371 immediate |= ~(0xffull);
372 break;
373 case TYPE_REL32:
374 case TYPE_REL64:
375 isBranch = true;
376 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
377 if(immediate & 0x80000000)
378 immediate |= ~(0xffffffffull);
379 break;
380 default:
381 // operand is 64 bits wide. Do nothing.
382 break;
383 }
384
385 if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
386 insn.immediateOffset, insn.immediateSize,
387 mcInst, Dis))
388 mcInst.addOperand(MCOperand::CreateImm(immediate));
389
390 if (type == TYPE_MOFFS8 || type == TYPE_MOFFS16 ||
391 type == TYPE_MOFFS32 || type == TYPE_MOFFS64) {
392 MCOperand segmentReg;
393 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
394 mcInst.addOperand(segmentReg);
395 }
396 }
397
398 /// translateRMRegister - Translates a register stored in the R/M field of the
399 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
400 /// @param mcInst - The MCInst to append to.
401 /// @param insn - The internal instruction to extract the R/M field
402 /// from.
403 /// @return - 0 on success; -1 otherwise
translateRMRegister(MCInst & mcInst,InternalInstruction & insn)404 static bool translateRMRegister(MCInst &mcInst,
405 InternalInstruction &insn) {
406 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
407 debug("A R/M register operand may not have a SIB byte");
408 return true;
409 }
410
411 switch (insn.eaBase) {
412 default:
413 debug("Unexpected EA base register");
414 return true;
415 case EA_BASE_NONE:
416 debug("EA_BASE_NONE for ModR/M base");
417 return true;
418 #define ENTRY(x) case EA_BASE_##x:
419 ALL_EA_BASES
420 #undef ENTRY
421 debug("A R/M register operand may not have a base; "
422 "the operand must be a register.");
423 return true;
424 #define ENTRY(x) \
425 case EA_REG_##x: \
426 mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
427 ALL_REGS
428 #undef ENTRY
429 }
430
431 return false;
432 }
433
434 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
435 /// fields of an internal instruction (and possibly its SIB byte) to a memory
436 /// operand in LLVM's format, and appends it to an MCInst.
437 ///
438 /// @param mcInst - The MCInst to append to.
439 /// @param insn - The instruction to extract Mod, R/M, and SIB fields
440 /// from.
441 /// @return - 0 on success; nonzero otherwise
translateRMMemory(MCInst & mcInst,InternalInstruction & insn,const MCDisassembler * Dis)442 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
443 const MCDisassembler *Dis) {
444 // Addresses in an MCInst are represented as five operands:
445 // 1. basereg (register) The R/M base, or (if there is a SIB) the
446 // SIB base
447 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
448 // scale amount
449 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
450 // the index (which is multiplied by the
451 // scale amount)
452 // 4. displacement (immediate) 0, or the displacement if there is one
453 // 5. segmentreg (register) x86_registerNONE for now, but could be set
454 // if we have segment overrides
455
456 MCOperand baseReg;
457 MCOperand scaleAmount;
458 MCOperand indexReg;
459 MCOperand displacement;
460 MCOperand segmentReg;
461 uint64_t pcrel = 0;
462
463 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
464 if (insn.sibBase != SIB_BASE_NONE) {
465 switch (insn.sibBase) {
466 default:
467 debug("Unexpected sibBase");
468 return true;
469 #define ENTRY(x) \
470 case SIB_BASE_##x: \
471 baseReg = MCOperand::CreateReg(X86::x); break;
472 ALL_SIB_BASES
473 #undef ENTRY
474 }
475 } else {
476 baseReg = MCOperand::CreateReg(0);
477 }
478
479 // Check whether we are handling VSIB addressing mode for GATHER.
480 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and
481 // we should use SIB_INDEX_XMM4|YMM4 for VSIB.
482 // I don't see a way to get the correct IndexReg in readSIB:
483 // We can tell whether it is VSIB or SIB after instruction ID is decoded,
484 // but instruction ID may not be decoded yet when calling readSIB.
485 uint32_t Opcode = mcInst.getOpcode();
486 bool IndexIs128 = (Opcode == X86::VGATHERDPDrm ||
487 Opcode == X86::VGATHERDPDYrm ||
488 Opcode == X86::VGATHERQPDrm ||
489 Opcode == X86::VGATHERDPSrm ||
490 Opcode == X86::VGATHERQPSrm ||
491 Opcode == X86::VPGATHERDQrm ||
492 Opcode == X86::VPGATHERDQYrm ||
493 Opcode == X86::VPGATHERQQrm ||
494 Opcode == X86::VPGATHERDDrm ||
495 Opcode == X86::VPGATHERQDrm);
496 bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm ||
497 Opcode == X86::VGATHERDPSYrm ||
498 Opcode == X86::VGATHERQPSYrm ||
499 Opcode == X86::VGATHERDPDZrm ||
500 Opcode == X86::VPGATHERDQZrm ||
501 Opcode == X86::VPGATHERQQYrm ||
502 Opcode == X86::VPGATHERDDYrm ||
503 Opcode == X86::VPGATHERQDYrm);
504 bool IndexIs512 = (Opcode == X86::VGATHERQPDZrm ||
505 Opcode == X86::VGATHERDPSZrm ||
506 Opcode == X86::VGATHERQPSZrm ||
507 Opcode == X86::VPGATHERQQZrm ||
508 Opcode == X86::VPGATHERDDZrm ||
509 Opcode == X86::VPGATHERQDZrm);
510 if (IndexIs128 || IndexIs256 || IndexIs512) {
511 unsigned IndexOffset = insn.sibIndex -
512 (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
513 SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 :
514 IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
515 insn.sibIndex = (SIBIndex)(IndexBase +
516 (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));
517 }
518
519 if (insn.sibIndex != SIB_INDEX_NONE) {
520 switch (insn.sibIndex) {
521 default:
522 debug("Unexpected sibIndex");
523 return true;
524 #define ENTRY(x) \
525 case SIB_INDEX_##x: \
526 indexReg = MCOperand::CreateReg(X86::x); break;
527 EA_BASES_32BIT
528 EA_BASES_64BIT
529 REGS_XMM
530 REGS_YMM
531 REGS_ZMM
532 #undef ENTRY
533 }
534 } else {
535 indexReg = MCOperand::CreateReg(0);
536 }
537
538 scaleAmount = MCOperand::CreateImm(insn.sibScale);
539 } else {
540 switch (insn.eaBase) {
541 case EA_BASE_NONE:
542 if (insn.eaDisplacement == EA_DISP_NONE) {
543 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
544 return true;
545 }
546 if (insn.mode == MODE_64BIT){
547 pcrel = insn.startLocation +
548 insn.displacementOffset + insn.displacementSize;
549 tryAddingPcLoadReferenceComment(insn.startLocation +
550 insn.displacementOffset,
551 insn.displacement + pcrel, Dis);
552 baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
553 }
554 else
555 baseReg = MCOperand::CreateReg(0);
556
557 indexReg = MCOperand::CreateReg(0);
558 break;
559 case EA_BASE_BX_SI:
560 baseReg = MCOperand::CreateReg(X86::BX);
561 indexReg = MCOperand::CreateReg(X86::SI);
562 break;
563 case EA_BASE_BX_DI:
564 baseReg = MCOperand::CreateReg(X86::BX);
565 indexReg = MCOperand::CreateReg(X86::DI);
566 break;
567 case EA_BASE_BP_SI:
568 baseReg = MCOperand::CreateReg(X86::BP);
569 indexReg = MCOperand::CreateReg(X86::SI);
570 break;
571 case EA_BASE_BP_DI:
572 baseReg = MCOperand::CreateReg(X86::BP);
573 indexReg = MCOperand::CreateReg(X86::DI);
574 break;
575 default:
576 indexReg = MCOperand::CreateReg(0);
577 switch (insn.eaBase) {
578 default:
579 debug("Unexpected eaBase");
580 return true;
581 // Here, we will use the fill-ins defined above. However,
582 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
583 // sib and sib64 were handled in the top-level if, so they're only
584 // placeholders to keep the compiler happy.
585 #define ENTRY(x) \
586 case EA_BASE_##x: \
587 baseReg = MCOperand::CreateReg(X86::x); break;
588 ALL_EA_BASES
589 #undef ENTRY
590 #define ENTRY(x) case EA_REG_##x:
591 ALL_REGS
592 #undef ENTRY
593 debug("A R/M memory operand may not be a register; "
594 "the base field must be a base.");
595 return true;
596 }
597 }
598
599 scaleAmount = MCOperand::CreateImm(1);
600 }
601
602 displacement = MCOperand::CreateImm(insn.displacement);
603
604 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
605
606 mcInst.addOperand(baseReg);
607 mcInst.addOperand(scaleAmount);
608 mcInst.addOperand(indexReg);
609 if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
610 insn.startLocation, insn.displacementOffset,
611 insn.displacementSize, mcInst, Dis))
612 mcInst.addOperand(displacement);
613 mcInst.addOperand(segmentReg);
614 return false;
615 }
616
617 /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
618 /// byte of an instruction to LLVM form, and appends it to an MCInst.
619 ///
620 /// @param mcInst - The MCInst to append to.
621 /// @param operand - The operand, as stored in the descriptor table.
622 /// @param insn - The instruction to extract Mod, R/M, and SIB fields
623 /// from.
624 /// @return - 0 on success; nonzero otherwise
translateRM(MCInst & mcInst,const OperandSpecifier & operand,InternalInstruction & insn,const MCDisassembler * Dis)625 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
626 InternalInstruction &insn, const MCDisassembler *Dis) {
627 switch (operand.type) {
628 default:
629 debug("Unexpected type for a R/M operand");
630 return true;
631 case TYPE_R8:
632 case TYPE_R16:
633 case TYPE_R32:
634 case TYPE_R64:
635 case TYPE_Rv:
636 case TYPE_MM:
637 case TYPE_MM32:
638 case TYPE_MM64:
639 case TYPE_XMM:
640 case TYPE_XMM32:
641 case TYPE_XMM64:
642 case TYPE_XMM128:
643 case TYPE_XMM256:
644 case TYPE_XMM512:
645 case TYPE_VK1:
646 case TYPE_VK8:
647 case TYPE_VK16:
648 case TYPE_DEBUGREG:
649 case TYPE_CONTROLREG:
650 return translateRMRegister(mcInst, insn);
651 case TYPE_M:
652 case TYPE_M8:
653 case TYPE_M16:
654 case TYPE_M32:
655 case TYPE_M64:
656 case TYPE_M128:
657 case TYPE_M256:
658 case TYPE_M512:
659 case TYPE_Mv:
660 case TYPE_M32FP:
661 case TYPE_M64FP:
662 case TYPE_M80FP:
663 case TYPE_M16INT:
664 case TYPE_M32INT:
665 case TYPE_M64INT:
666 case TYPE_M1616:
667 case TYPE_M1632:
668 case TYPE_M1664:
669 case TYPE_LEA:
670 return translateRMMemory(mcInst, insn, Dis);
671 }
672 }
673
674 /// translateFPRegister - Translates a stack position on the FPU stack to its
675 /// LLVM form, and appends it to an MCInst.
676 ///
677 /// @param mcInst - The MCInst to append to.
678 /// @param stackPos - The stack position to translate.
translateFPRegister(MCInst & mcInst,uint8_t stackPos)679 static void translateFPRegister(MCInst &mcInst,
680 uint8_t stackPos) {
681 mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
682 }
683
684 /// translateMaskRegister - Translates a 3-bit mask register number to
685 /// LLVM form, and appends it to an MCInst.
686 ///
687 /// @param mcInst - The MCInst to append to.
688 /// @param maskRegNum - Number of mask register from 0 to 7.
689 /// @return - false on success; true otherwise.
translateMaskRegister(MCInst & mcInst,uint8_t maskRegNum)690 static bool translateMaskRegister(MCInst &mcInst,
691 uint8_t maskRegNum) {
692 if (maskRegNum >= 8) {
693 debug("Invalid mask register number");
694 return true;
695 }
696
697 mcInst.addOperand(MCOperand::CreateReg(X86::K0 + maskRegNum));
698 return false;
699 }
700
701 /// translateOperand - Translates an operand stored in an internal instruction
702 /// to LLVM's format and appends it to an MCInst.
703 ///
704 /// @param mcInst - The MCInst to append to.
705 /// @param operand - The operand, as stored in the descriptor table.
706 /// @param insn - The internal instruction.
707 /// @return - false on success; true otherwise.
translateOperand(MCInst & mcInst,const OperandSpecifier & operand,InternalInstruction & insn,const MCDisassembler * Dis)708 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
709 InternalInstruction &insn,
710 const MCDisassembler *Dis) {
711 switch (operand.encoding) {
712 default:
713 debug("Unhandled operand encoding during translation");
714 return true;
715 case ENCODING_REG:
716 translateRegister(mcInst, insn.reg);
717 return false;
718 case ENCODING_WRITEMASK:
719 return translateMaskRegister(mcInst, insn.writemask);
720 case ENCODING_RM:
721 return translateRM(mcInst, operand, insn, Dis);
722 case ENCODING_CB:
723 case ENCODING_CW:
724 case ENCODING_CD:
725 case ENCODING_CP:
726 case ENCODING_CO:
727 case ENCODING_CT:
728 debug("Translation of code offsets isn't supported.");
729 return true;
730 case ENCODING_IB:
731 case ENCODING_IW:
732 case ENCODING_ID:
733 case ENCODING_IO:
734 case ENCODING_Iv:
735 case ENCODING_Ia:
736 translateImmediate(mcInst,
737 insn.immediates[insn.numImmediatesTranslated++],
738 operand,
739 insn,
740 Dis);
741 return false;
742 case ENCODING_SI:
743 return translateSrcIndex(mcInst, insn);
744 case ENCODING_DI:
745 return translateDstIndex(mcInst, insn);
746 case ENCODING_RB:
747 case ENCODING_RW:
748 case ENCODING_RD:
749 case ENCODING_RO:
750 case ENCODING_Rv:
751 translateRegister(mcInst, insn.opcodeRegister);
752 return false;
753 case ENCODING_FP:
754 translateFPRegister(mcInst, insn.modRM & 7);
755 return false;
756 case ENCODING_VVVV:
757 translateRegister(mcInst, insn.vvvv);
758 return false;
759 case ENCODING_DUP:
760 return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
761 insn, Dis);
762 }
763 }
764
765 /// translateInstruction - Translates an internal instruction and all its
766 /// operands to an MCInst.
767 ///
768 /// @param mcInst - The MCInst to populate with the instruction's data.
769 /// @param insn - The internal instruction.
770 /// @return - false on success; true otherwise.
translateInstruction(MCInst & mcInst,InternalInstruction & insn,const MCDisassembler * Dis)771 static bool translateInstruction(MCInst &mcInst,
772 InternalInstruction &insn,
773 const MCDisassembler *Dis) {
774 if (!insn.spec) {
775 debug("Instruction has no specification");
776 return true;
777 }
778
779 mcInst.setOpcode(insn.instructionID);
780 // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
781 // prefix bytes should be disassembled as xrelease and xacquire then set the
782 // opcode to those instead of the rep and repne opcodes.
783 if (insn.xAcquireRelease) {
784 if(mcInst.getOpcode() == X86::REP_PREFIX)
785 mcInst.setOpcode(X86::XRELEASE_PREFIX);
786 else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
787 mcInst.setOpcode(X86::XACQUIRE_PREFIX);
788 }
789
790 insn.numImmediatesTranslated = 0;
791
792 for (const auto &Op : insn.operands) {
793 if (Op.encoding != ENCODING_NONE) {
794 if (translateOperand(mcInst, Op, insn, Dis)) {
795 return true;
796 }
797 }
798 }
799
800 return false;
801 }
802
createX86Disassembler(const Target & T,const MCSubtargetInfo & STI,MCContext & Ctx)803 static MCDisassembler *createX86Disassembler(const Target &T,
804 const MCSubtargetInfo &STI,
805 MCContext &Ctx) {
806 std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
807 return new X86Disassembler::X86GenericDisassembler(STI, Ctx, std::move(MII));
808 }
809
LLVMInitializeX86Disassembler()810 extern "C" void LLVMInitializeX86Disassembler() {
811 // Register the disassembler.
812 TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
813 createX86Disassembler);
814 TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
815 createX86Disassembler);
816 }
817