1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_x86_defs.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2013 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 #include "libvex_trc_values.h"
39
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_x86_defs.h"
43
44
45 /* --------- Registers. --------- */
46
ppHRegX86(HReg reg)47 void ppHRegX86 ( HReg reg )
48 {
49 Int r;
50 static const HChar* ireg32_names[8]
51 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" };
52 /* Be generic for all virtual regs. */
53 if (hregIsVirtual(reg)) {
54 ppHReg(reg);
55 return;
56 }
57 /* But specific for real regs. */
58 switch (hregClass(reg)) {
59 case HRcInt32:
60 r = hregNumber(reg);
61 vassert(r >= 0 && r < 8);
62 vex_printf("%s", ireg32_names[r]);
63 return;
64 case HRcFlt64:
65 r = hregNumber(reg);
66 vassert(r >= 0 && r < 6);
67 vex_printf("%%fake%d", r);
68 return;
69 case HRcVec128:
70 r = hregNumber(reg);
71 vassert(r >= 0 && r < 8);
72 vex_printf("%%xmm%d", r);
73 return;
74 default:
75 vpanic("ppHRegX86");
76 }
77 }
78
hregX86_EAX(void)79 HReg hregX86_EAX ( void ) { return mkHReg(0, HRcInt32, False); }
hregX86_ECX(void)80 HReg hregX86_ECX ( void ) { return mkHReg(1, HRcInt32, False); }
hregX86_EDX(void)81 HReg hregX86_EDX ( void ) { return mkHReg(2, HRcInt32, False); }
hregX86_EBX(void)82 HReg hregX86_EBX ( void ) { return mkHReg(3, HRcInt32, False); }
hregX86_ESP(void)83 HReg hregX86_ESP ( void ) { return mkHReg(4, HRcInt32, False); }
hregX86_EBP(void)84 HReg hregX86_EBP ( void ) { return mkHReg(5, HRcInt32, False); }
hregX86_ESI(void)85 HReg hregX86_ESI ( void ) { return mkHReg(6, HRcInt32, False); }
hregX86_EDI(void)86 HReg hregX86_EDI ( void ) { return mkHReg(7, HRcInt32, False); }
87
hregX86_FAKE0(void)88 HReg hregX86_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); }
hregX86_FAKE1(void)89 HReg hregX86_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); }
hregX86_FAKE2(void)90 HReg hregX86_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); }
hregX86_FAKE3(void)91 HReg hregX86_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); }
hregX86_FAKE4(void)92 HReg hregX86_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); }
hregX86_FAKE5(void)93 HReg hregX86_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); }
94
hregX86_XMM0(void)95 HReg hregX86_XMM0 ( void ) { return mkHReg(0, HRcVec128, False); }
hregX86_XMM1(void)96 HReg hregX86_XMM1 ( void ) { return mkHReg(1, HRcVec128, False); }
hregX86_XMM2(void)97 HReg hregX86_XMM2 ( void ) { return mkHReg(2, HRcVec128, False); }
hregX86_XMM3(void)98 HReg hregX86_XMM3 ( void ) { return mkHReg(3, HRcVec128, False); }
hregX86_XMM4(void)99 HReg hregX86_XMM4 ( void ) { return mkHReg(4, HRcVec128, False); }
hregX86_XMM5(void)100 HReg hregX86_XMM5 ( void ) { return mkHReg(5, HRcVec128, False); }
hregX86_XMM6(void)101 HReg hregX86_XMM6 ( void ) { return mkHReg(6, HRcVec128, False); }
hregX86_XMM7(void)102 HReg hregX86_XMM7 ( void ) { return mkHReg(7, HRcVec128, False); }
103
104
getAllocableRegs_X86(Int * nregs,HReg ** arr)105 void getAllocableRegs_X86 ( Int* nregs, HReg** arr )
106 {
107 *nregs = 20;
108 *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
109 (*arr)[0] = hregX86_EAX();
110 (*arr)[1] = hregX86_EBX();
111 (*arr)[2] = hregX86_ECX();
112 (*arr)[3] = hregX86_EDX();
113 (*arr)[4] = hregX86_ESI();
114 (*arr)[5] = hregX86_EDI();
115 (*arr)[6] = hregX86_FAKE0();
116 (*arr)[7] = hregX86_FAKE1();
117 (*arr)[8] = hregX86_FAKE2();
118 (*arr)[9] = hregX86_FAKE3();
119 (*arr)[10] = hregX86_FAKE4();
120 (*arr)[11] = hregX86_FAKE5();
121 (*arr)[12] = hregX86_XMM0();
122 (*arr)[13] = hregX86_XMM1();
123 (*arr)[14] = hregX86_XMM2();
124 (*arr)[15] = hregX86_XMM3();
125 (*arr)[16] = hregX86_XMM4();
126 (*arr)[17] = hregX86_XMM5();
127 (*arr)[18] = hregX86_XMM6();
128 (*arr)[19] = hregX86_XMM7();
129 }
130
131
132 /* --------- Condition codes, Intel encoding. --------- */
133
showX86CondCode(X86CondCode cond)134 const HChar* showX86CondCode ( X86CondCode cond )
135 {
136 switch (cond) {
137 case Xcc_O: return "o";
138 case Xcc_NO: return "no";
139 case Xcc_B: return "b";
140 case Xcc_NB: return "nb";
141 case Xcc_Z: return "z";
142 case Xcc_NZ: return "nz";
143 case Xcc_BE: return "be";
144 case Xcc_NBE: return "nbe";
145 case Xcc_S: return "s";
146 case Xcc_NS: return "ns";
147 case Xcc_P: return "p";
148 case Xcc_NP: return "np";
149 case Xcc_L: return "l";
150 case Xcc_NL: return "nl";
151 case Xcc_LE: return "le";
152 case Xcc_NLE: return "nle";
153 case Xcc_ALWAYS: return "ALWAYS";
154 default: vpanic("ppX86CondCode");
155 }
156 }
157
158
159 /* --------- X86AMode: memory address expressions. --------- */
160
X86AMode_IR(UInt imm32,HReg reg)161 X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) {
162 X86AMode* am = LibVEX_Alloc(sizeof(X86AMode));
163 am->tag = Xam_IR;
164 am->Xam.IR.imm = imm32;
165 am->Xam.IR.reg = reg;
166 return am;
167 }
X86AMode_IRRS(UInt imm32,HReg base,HReg indEx,Int shift)168 X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
169 X86AMode* am = LibVEX_Alloc(sizeof(X86AMode));
170 am->tag = Xam_IRRS;
171 am->Xam.IRRS.imm = imm32;
172 am->Xam.IRRS.base = base;
173 am->Xam.IRRS.index = indEx;
174 am->Xam.IRRS.shift = shift;
175 vassert(shift >= 0 && shift <= 3);
176 return am;
177 }
178
dopyX86AMode(X86AMode * am)179 X86AMode* dopyX86AMode ( X86AMode* am ) {
180 switch (am->tag) {
181 case Xam_IR:
182 return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg );
183 case Xam_IRRS:
184 return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base,
185 am->Xam.IRRS.index, am->Xam.IRRS.shift );
186 default:
187 vpanic("dopyX86AMode");
188 }
189 }
190
ppX86AMode(X86AMode * am)191 void ppX86AMode ( X86AMode* am ) {
192 switch (am->tag) {
193 case Xam_IR:
194 if (am->Xam.IR.imm == 0)
195 vex_printf("(");
196 else
197 vex_printf("0x%x(", am->Xam.IR.imm);
198 ppHRegX86(am->Xam.IR.reg);
199 vex_printf(")");
200 return;
201 case Xam_IRRS:
202 vex_printf("0x%x(", am->Xam.IRRS.imm);
203 ppHRegX86(am->Xam.IRRS.base);
204 vex_printf(",");
205 ppHRegX86(am->Xam.IRRS.index);
206 vex_printf(",%d)", 1 << am->Xam.IRRS.shift);
207 return;
208 default:
209 vpanic("ppX86AMode");
210 }
211 }
212
addRegUsage_X86AMode(HRegUsage * u,X86AMode * am)213 static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) {
214 switch (am->tag) {
215 case Xam_IR:
216 addHRegUse(u, HRmRead, am->Xam.IR.reg);
217 return;
218 case Xam_IRRS:
219 addHRegUse(u, HRmRead, am->Xam.IRRS.base);
220 addHRegUse(u, HRmRead, am->Xam.IRRS.index);
221 return;
222 default:
223 vpanic("addRegUsage_X86AMode");
224 }
225 }
226
mapRegs_X86AMode(HRegRemap * m,X86AMode * am)227 static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) {
228 switch (am->tag) {
229 case Xam_IR:
230 am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg);
231 return;
232 case Xam_IRRS:
233 am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base);
234 am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index);
235 return;
236 default:
237 vpanic("mapRegs_X86AMode");
238 }
239 }
240
241 /* --------- Operand, which can be reg, immediate or memory. --------- */
242
X86RMI_Imm(UInt imm32)243 X86RMI* X86RMI_Imm ( UInt imm32 ) {
244 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI));
245 op->tag = Xrmi_Imm;
246 op->Xrmi.Imm.imm32 = imm32;
247 return op;
248 }
X86RMI_Reg(HReg reg)249 X86RMI* X86RMI_Reg ( HReg reg ) {
250 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI));
251 op->tag = Xrmi_Reg;
252 op->Xrmi.Reg.reg = reg;
253 return op;
254 }
X86RMI_Mem(X86AMode * am)255 X86RMI* X86RMI_Mem ( X86AMode* am ) {
256 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI));
257 op->tag = Xrmi_Mem;
258 op->Xrmi.Mem.am = am;
259 return op;
260 }
261
ppX86RMI(X86RMI * op)262 void ppX86RMI ( X86RMI* op ) {
263 switch (op->tag) {
264 case Xrmi_Imm:
265 vex_printf("$0x%x", op->Xrmi.Imm.imm32);
266 return;
267 case Xrmi_Reg:
268 ppHRegX86(op->Xrmi.Reg.reg);
269 return;
270 case Xrmi_Mem:
271 ppX86AMode(op->Xrmi.Mem.am);
272 return;
273 default:
274 vpanic("ppX86RMI");
275 }
276 }
277
278 /* An X86RMI can only be used in a "read" context (what would it mean
279 to write or modify a literal?) and so we enumerate its registers
280 accordingly. */
addRegUsage_X86RMI(HRegUsage * u,X86RMI * op)281 static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) {
282 switch (op->tag) {
283 case Xrmi_Imm:
284 return;
285 case Xrmi_Reg:
286 addHRegUse(u, HRmRead, op->Xrmi.Reg.reg);
287 return;
288 case Xrmi_Mem:
289 addRegUsage_X86AMode(u, op->Xrmi.Mem.am);
290 return;
291 default:
292 vpanic("addRegUsage_X86RMI");
293 }
294 }
295
mapRegs_X86RMI(HRegRemap * m,X86RMI * op)296 static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) {
297 switch (op->tag) {
298 case Xrmi_Imm:
299 return;
300 case Xrmi_Reg:
301 op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg);
302 return;
303 case Xrmi_Mem:
304 mapRegs_X86AMode(m, op->Xrmi.Mem.am);
305 return;
306 default:
307 vpanic("mapRegs_X86RMI");
308 }
309 }
310
311
312 /* --------- Operand, which can be reg or immediate only. --------- */
313
X86RI_Imm(UInt imm32)314 X86RI* X86RI_Imm ( UInt imm32 ) {
315 X86RI* op = LibVEX_Alloc(sizeof(X86RI));
316 op->tag = Xri_Imm;
317 op->Xri.Imm.imm32 = imm32;
318 return op;
319 }
X86RI_Reg(HReg reg)320 X86RI* X86RI_Reg ( HReg reg ) {
321 X86RI* op = LibVEX_Alloc(sizeof(X86RI));
322 op->tag = Xri_Reg;
323 op->Xri.Reg.reg = reg;
324 return op;
325 }
326
ppX86RI(X86RI * op)327 void ppX86RI ( X86RI* op ) {
328 switch (op->tag) {
329 case Xri_Imm:
330 vex_printf("$0x%x", op->Xri.Imm.imm32);
331 return;
332 case Xri_Reg:
333 ppHRegX86(op->Xri.Reg.reg);
334 return;
335 default:
336 vpanic("ppX86RI");
337 }
338 }
339
340 /* An X86RI can only be used in a "read" context (what would it mean
341 to write or modify a literal?) and so we enumerate its registers
342 accordingly. */
addRegUsage_X86RI(HRegUsage * u,X86RI * op)343 static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) {
344 switch (op->tag) {
345 case Xri_Imm:
346 return;
347 case Xri_Reg:
348 addHRegUse(u, HRmRead, op->Xri.Reg.reg);
349 return;
350 default:
351 vpanic("addRegUsage_X86RI");
352 }
353 }
354
mapRegs_X86RI(HRegRemap * m,X86RI * op)355 static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) {
356 switch (op->tag) {
357 case Xri_Imm:
358 return;
359 case Xri_Reg:
360 op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg);
361 return;
362 default:
363 vpanic("mapRegs_X86RI");
364 }
365 }
366
367
368 /* --------- Operand, which can be reg or memory only. --------- */
369
X86RM_Reg(HReg reg)370 X86RM* X86RM_Reg ( HReg reg ) {
371 X86RM* op = LibVEX_Alloc(sizeof(X86RM));
372 op->tag = Xrm_Reg;
373 op->Xrm.Reg.reg = reg;
374 return op;
375 }
X86RM_Mem(X86AMode * am)376 X86RM* X86RM_Mem ( X86AMode* am ) {
377 X86RM* op = LibVEX_Alloc(sizeof(X86RM));
378 op->tag = Xrm_Mem;
379 op->Xrm.Mem.am = am;
380 return op;
381 }
382
ppX86RM(X86RM * op)383 void ppX86RM ( X86RM* op ) {
384 switch (op->tag) {
385 case Xrm_Mem:
386 ppX86AMode(op->Xrm.Mem.am);
387 return;
388 case Xrm_Reg:
389 ppHRegX86(op->Xrm.Reg.reg);
390 return;
391 default:
392 vpanic("ppX86RM");
393 }
394 }
395
396 /* Because an X86RM can be both a source or destination operand, we
397 have to supply a mode -- pertaining to the operand as a whole --
398 indicating how it's being used. */
addRegUsage_X86RM(HRegUsage * u,X86RM * op,HRegMode mode)399 static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) {
400 switch (op->tag) {
401 case Xrm_Mem:
402 /* Memory is read, written or modified. So we just want to
403 know the regs read by the amode. */
404 addRegUsage_X86AMode(u, op->Xrm.Mem.am);
405 return;
406 case Xrm_Reg:
407 /* reg is read, written or modified. Add it in the
408 appropriate way. */
409 addHRegUse(u, mode, op->Xrm.Reg.reg);
410 return;
411 default:
412 vpanic("addRegUsage_X86RM");
413 }
414 }
415
mapRegs_X86RM(HRegRemap * m,X86RM * op)416 static void mapRegs_X86RM ( HRegRemap* m, X86RM* op )
417 {
418 switch (op->tag) {
419 case Xrm_Mem:
420 mapRegs_X86AMode(m, op->Xrm.Mem.am);
421 return;
422 case Xrm_Reg:
423 op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg);
424 return;
425 default:
426 vpanic("mapRegs_X86RM");
427 }
428 }
429
430
431 /* --------- Instructions. --------- */
432
showX86UnaryOp(X86UnaryOp op)433 const HChar* showX86UnaryOp ( X86UnaryOp op ) {
434 switch (op) {
435 case Xun_NOT: return "not";
436 case Xun_NEG: return "neg";
437 default: vpanic("showX86UnaryOp");
438 }
439 }
440
showX86AluOp(X86AluOp op)441 const HChar* showX86AluOp ( X86AluOp op ) {
442 switch (op) {
443 case Xalu_MOV: return "mov";
444 case Xalu_CMP: return "cmp";
445 case Xalu_ADD: return "add";
446 case Xalu_SUB: return "sub";
447 case Xalu_ADC: return "adc";
448 case Xalu_SBB: return "sbb";
449 case Xalu_AND: return "and";
450 case Xalu_OR: return "or";
451 case Xalu_XOR: return "xor";
452 case Xalu_MUL: return "mul";
453 default: vpanic("showX86AluOp");
454 }
455 }
456
showX86ShiftOp(X86ShiftOp op)457 const HChar* showX86ShiftOp ( X86ShiftOp op ) {
458 switch (op) {
459 case Xsh_SHL: return "shl";
460 case Xsh_SHR: return "shr";
461 case Xsh_SAR: return "sar";
462 default: vpanic("showX86ShiftOp");
463 }
464 }
465
showX86FpOp(X86FpOp op)466 const HChar* showX86FpOp ( X86FpOp op ) {
467 switch (op) {
468 case Xfp_ADD: return "add";
469 case Xfp_SUB: return "sub";
470 case Xfp_MUL: return "mul";
471 case Xfp_DIV: return "div";
472 case Xfp_SCALE: return "scale";
473 case Xfp_ATAN: return "atan";
474 case Xfp_YL2X: return "yl2x";
475 case Xfp_YL2XP1: return "yl2xp1";
476 case Xfp_PREM: return "prem";
477 case Xfp_PREM1: return "prem1";
478 case Xfp_SQRT: return "sqrt";
479 case Xfp_ABS: return "abs";
480 case Xfp_NEG: return "chs";
481 case Xfp_MOV: return "mov";
482 case Xfp_SIN: return "sin";
483 case Xfp_COS: return "cos";
484 case Xfp_TAN: return "tan";
485 case Xfp_ROUND: return "round";
486 case Xfp_2XM1: return "2xm1";
487 default: vpanic("showX86FpOp");
488 }
489 }
490
showX86SseOp(X86SseOp op)491 const HChar* showX86SseOp ( X86SseOp op ) {
492 switch (op) {
493 case Xsse_MOV: return "mov(?!)";
494 case Xsse_ADDF: return "add";
495 case Xsse_SUBF: return "sub";
496 case Xsse_MULF: return "mul";
497 case Xsse_DIVF: return "div";
498 case Xsse_MAXF: return "max";
499 case Xsse_MINF: return "min";
500 case Xsse_CMPEQF: return "cmpFeq";
501 case Xsse_CMPLTF: return "cmpFlt";
502 case Xsse_CMPLEF: return "cmpFle";
503 case Xsse_CMPUNF: return "cmpFun";
504 case Xsse_RCPF: return "rcp";
505 case Xsse_RSQRTF: return "rsqrt";
506 case Xsse_SQRTF: return "sqrt";
507 case Xsse_AND: return "and";
508 case Xsse_OR: return "or";
509 case Xsse_XOR: return "xor";
510 case Xsse_ANDN: return "andn";
511 case Xsse_ADD8: return "paddb";
512 case Xsse_ADD16: return "paddw";
513 case Xsse_ADD32: return "paddd";
514 case Xsse_ADD64: return "paddq";
515 case Xsse_QADD8U: return "paddusb";
516 case Xsse_QADD16U: return "paddusw";
517 case Xsse_QADD8S: return "paddsb";
518 case Xsse_QADD16S: return "paddsw";
519 case Xsse_SUB8: return "psubb";
520 case Xsse_SUB16: return "psubw";
521 case Xsse_SUB32: return "psubd";
522 case Xsse_SUB64: return "psubq";
523 case Xsse_QSUB8U: return "psubusb";
524 case Xsse_QSUB16U: return "psubusw";
525 case Xsse_QSUB8S: return "psubsb";
526 case Xsse_QSUB16S: return "psubsw";
527 case Xsse_MUL16: return "pmullw";
528 case Xsse_MULHI16U: return "pmulhuw";
529 case Xsse_MULHI16S: return "pmulhw";
530 case Xsse_AVG8U: return "pavgb";
531 case Xsse_AVG16U: return "pavgw";
532 case Xsse_MAX16S: return "pmaxw";
533 case Xsse_MAX8U: return "pmaxub";
534 case Xsse_MIN16S: return "pminw";
535 case Xsse_MIN8U: return "pminub";
536 case Xsse_CMPEQ8: return "pcmpeqb";
537 case Xsse_CMPEQ16: return "pcmpeqw";
538 case Xsse_CMPEQ32: return "pcmpeqd";
539 case Xsse_CMPGT8S: return "pcmpgtb";
540 case Xsse_CMPGT16S: return "pcmpgtw";
541 case Xsse_CMPGT32S: return "pcmpgtd";
542 case Xsse_SHL16: return "psllw";
543 case Xsse_SHL32: return "pslld";
544 case Xsse_SHL64: return "psllq";
545 case Xsse_SHR16: return "psrlw";
546 case Xsse_SHR32: return "psrld";
547 case Xsse_SHR64: return "psrlq";
548 case Xsse_SAR16: return "psraw";
549 case Xsse_SAR32: return "psrad";
550 case Xsse_PACKSSD: return "packssdw";
551 case Xsse_PACKSSW: return "packsswb";
552 case Xsse_PACKUSW: return "packuswb";
553 case Xsse_UNPCKHB: return "punpckhb";
554 case Xsse_UNPCKHW: return "punpckhw";
555 case Xsse_UNPCKHD: return "punpckhd";
556 case Xsse_UNPCKHQ: return "punpckhq";
557 case Xsse_UNPCKLB: return "punpcklb";
558 case Xsse_UNPCKLW: return "punpcklw";
559 case Xsse_UNPCKLD: return "punpckld";
560 case Xsse_UNPCKLQ: return "punpcklq";
561 default: vpanic("showX86SseOp");
562 }
563 }
564
X86Instr_Alu32R(X86AluOp op,X86RMI * src,HReg dst)565 X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) {
566 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
567 i->tag = Xin_Alu32R;
568 i->Xin.Alu32R.op = op;
569 i->Xin.Alu32R.src = src;
570 i->Xin.Alu32R.dst = dst;
571 return i;
572 }
X86Instr_Alu32M(X86AluOp op,X86RI * src,X86AMode * dst)573 X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) {
574 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
575 i->tag = Xin_Alu32M;
576 i->Xin.Alu32M.op = op;
577 i->Xin.Alu32M.src = src;
578 i->Xin.Alu32M.dst = dst;
579 vassert(op != Xalu_MUL);
580 return i;
581 }
X86Instr_Sh32(X86ShiftOp op,UInt src,HReg dst)582 X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) {
583 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
584 i->tag = Xin_Sh32;
585 i->Xin.Sh32.op = op;
586 i->Xin.Sh32.src = src;
587 i->Xin.Sh32.dst = dst;
588 return i;
589 }
X86Instr_Test32(UInt imm32,X86RM * dst)590 X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) {
591 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
592 i->tag = Xin_Test32;
593 i->Xin.Test32.imm32 = imm32;
594 i->Xin.Test32.dst = dst;
595 return i;
596 }
X86Instr_Unary32(X86UnaryOp op,HReg dst)597 X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) {
598 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
599 i->tag = Xin_Unary32;
600 i->Xin.Unary32.op = op;
601 i->Xin.Unary32.dst = dst;
602 return i;
603 }
X86Instr_Lea32(X86AMode * am,HReg dst)604 X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) {
605 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
606 i->tag = Xin_Lea32;
607 i->Xin.Lea32.am = am;
608 i->Xin.Lea32.dst = dst;
609 return i;
610 }
X86Instr_MulL(Bool syned,X86RM * src)611 X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) {
612 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
613 i->tag = Xin_MulL;
614 i->Xin.MulL.syned = syned;
615 i->Xin.MulL.src = src;
616 return i;
617 }
X86Instr_Div(Bool syned,X86RM * src)618 X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) {
619 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
620 i->tag = Xin_Div;
621 i->Xin.Div.syned = syned;
622 i->Xin.Div.src = src;
623 return i;
624 }
X86Instr_Sh3232(X86ShiftOp op,UInt amt,HReg src,HReg dst)625 X86Instr* X86Instr_Sh3232 ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) {
626 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
627 i->tag = Xin_Sh3232;
628 i->Xin.Sh3232.op = op;
629 i->Xin.Sh3232.amt = amt;
630 i->Xin.Sh3232.src = src;
631 i->Xin.Sh3232.dst = dst;
632 vassert(op == Xsh_SHL || op == Xsh_SHR);
633 return i;
634 }
X86Instr_Push(X86RMI * src)635 X86Instr* X86Instr_Push( X86RMI* src ) {
636 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
637 i->tag = Xin_Push;
638 i->Xin.Push.src = src;
639 return i;
640 }
X86Instr_Call(X86CondCode cond,Addr32 target,Int regparms,RetLoc rloc)641 X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms,
642 RetLoc rloc ) {
643 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
644 i->tag = Xin_Call;
645 i->Xin.Call.cond = cond;
646 i->Xin.Call.target = target;
647 i->Xin.Call.regparms = regparms;
648 i->Xin.Call.rloc = rloc;
649 vassert(regparms >= 0 && regparms <= 3);
650 vassert(is_sane_RetLoc(rloc));
651 return i;
652 }
X86Instr_XDirect(Addr32 dstGA,X86AMode * amEIP,X86CondCode cond,Bool toFastEP)653 X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP,
654 X86CondCode cond, Bool toFastEP ) {
655 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
656 i->tag = Xin_XDirect;
657 i->Xin.XDirect.dstGA = dstGA;
658 i->Xin.XDirect.amEIP = amEIP;
659 i->Xin.XDirect.cond = cond;
660 i->Xin.XDirect.toFastEP = toFastEP;
661 return i;
662 }
X86Instr_XIndir(HReg dstGA,X86AMode * amEIP,X86CondCode cond)663 X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP,
664 X86CondCode cond ) {
665 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
666 i->tag = Xin_XIndir;
667 i->Xin.XIndir.dstGA = dstGA;
668 i->Xin.XIndir.amEIP = amEIP;
669 i->Xin.XIndir.cond = cond;
670 return i;
671 }
X86Instr_XAssisted(HReg dstGA,X86AMode * amEIP,X86CondCode cond,IRJumpKind jk)672 X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP,
673 X86CondCode cond, IRJumpKind jk ) {
674 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
675 i->tag = Xin_XAssisted;
676 i->Xin.XAssisted.dstGA = dstGA;
677 i->Xin.XAssisted.amEIP = amEIP;
678 i->Xin.XAssisted.cond = cond;
679 i->Xin.XAssisted.jk = jk;
680 return i;
681 }
X86Instr_CMov32(X86CondCode cond,X86RM * src,HReg dst)682 X86Instr* X86Instr_CMov32 ( X86CondCode cond, X86RM* src, HReg dst ) {
683 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
684 i->tag = Xin_CMov32;
685 i->Xin.CMov32.cond = cond;
686 i->Xin.CMov32.src = src;
687 i->Xin.CMov32.dst = dst;
688 vassert(cond != Xcc_ALWAYS);
689 return i;
690 }
X86Instr_LoadEX(UChar szSmall,Bool syned,X86AMode * src,HReg dst)691 X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned,
692 X86AMode* src, HReg dst ) {
693 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
694 i->tag = Xin_LoadEX;
695 i->Xin.LoadEX.szSmall = szSmall;
696 i->Xin.LoadEX.syned = syned;
697 i->Xin.LoadEX.src = src;
698 i->Xin.LoadEX.dst = dst;
699 vassert(szSmall == 1 || szSmall == 2);
700 return i;
701 }
X86Instr_Store(UChar sz,HReg src,X86AMode * dst)702 X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) {
703 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
704 i->tag = Xin_Store;
705 i->Xin.Store.sz = sz;
706 i->Xin.Store.src = src;
707 i->Xin.Store.dst = dst;
708 vassert(sz == 1 || sz == 2);
709 return i;
710 }
X86Instr_Set32(X86CondCode cond,HReg dst)711 X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) {
712 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
713 i->tag = Xin_Set32;
714 i->Xin.Set32.cond = cond;
715 i->Xin.Set32.dst = dst;
716 return i;
717 }
X86Instr_Bsfr32(Bool isFwds,HReg src,HReg dst)718 X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) {
719 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
720 i->tag = Xin_Bsfr32;
721 i->Xin.Bsfr32.isFwds = isFwds;
722 i->Xin.Bsfr32.src = src;
723 i->Xin.Bsfr32.dst = dst;
724 return i;
725 }
X86Instr_MFence(UInt hwcaps)726 X86Instr* X86Instr_MFence ( UInt hwcaps ) {
727 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
728 i->tag = Xin_MFence;
729 i->Xin.MFence.hwcaps = hwcaps;
730 vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_MMXEXT
731 |VEX_HWCAPS_X86_SSE1
732 |VEX_HWCAPS_X86_SSE2
733 |VEX_HWCAPS_X86_SSE3
734 |VEX_HWCAPS_X86_LZCNT)));
735 return i;
736 }
X86Instr_ACAS(X86AMode * addr,UChar sz)737 X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) {
738 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
739 i->tag = Xin_ACAS;
740 i->Xin.ACAS.addr = addr;
741 i->Xin.ACAS.sz = sz;
742 vassert(sz == 4 || sz == 2 || sz == 1);
743 return i;
744 }
X86Instr_DACAS(X86AMode * addr)745 X86Instr* X86Instr_DACAS ( X86AMode* addr ) {
746 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
747 i->tag = Xin_DACAS;
748 i->Xin.DACAS.addr = addr;
749 return i;
750 }
751
X86Instr_FpUnary(X86FpOp op,HReg src,HReg dst)752 X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) {
753 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
754 i->tag = Xin_FpUnary;
755 i->Xin.FpUnary.op = op;
756 i->Xin.FpUnary.src = src;
757 i->Xin.FpUnary.dst = dst;
758 return i;
759 }
X86Instr_FpBinary(X86FpOp op,HReg srcL,HReg srcR,HReg dst)760 X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) {
761 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
762 i->tag = Xin_FpBinary;
763 i->Xin.FpBinary.op = op;
764 i->Xin.FpBinary.srcL = srcL;
765 i->Xin.FpBinary.srcR = srcR;
766 i->Xin.FpBinary.dst = dst;
767 return i;
768 }
X86Instr_FpLdSt(Bool isLoad,UChar sz,HReg reg,X86AMode * addr)769 X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) {
770 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
771 i->tag = Xin_FpLdSt;
772 i->Xin.FpLdSt.isLoad = isLoad;
773 i->Xin.FpLdSt.sz = sz;
774 i->Xin.FpLdSt.reg = reg;
775 i->Xin.FpLdSt.addr = addr;
776 vassert(sz == 4 || sz == 8 || sz == 10);
777 return i;
778 }
X86Instr_FpLdStI(Bool isLoad,UChar sz,HReg reg,X86AMode * addr)779 X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz,
780 HReg reg, X86AMode* addr ) {
781 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
782 i->tag = Xin_FpLdStI;
783 i->Xin.FpLdStI.isLoad = isLoad;
784 i->Xin.FpLdStI.sz = sz;
785 i->Xin.FpLdStI.reg = reg;
786 i->Xin.FpLdStI.addr = addr;
787 vassert(sz == 2 || sz == 4 || sz == 8);
788 return i;
789 }
X86Instr_Fp64to32(HReg src,HReg dst)790 X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) {
791 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
792 i->tag = Xin_Fp64to32;
793 i->Xin.Fp64to32.src = src;
794 i->Xin.Fp64to32.dst = dst;
795 return i;
796 }
X86Instr_FpCMov(X86CondCode cond,HReg src,HReg dst)797 X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) {
798 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
799 i->tag = Xin_FpCMov;
800 i->Xin.FpCMov.cond = cond;
801 i->Xin.FpCMov.src = src;
802 i->Xin.FpCMov.dst = dst;
803 vassert(cond != Xcc_ALWAYS);
804 return i;
805 }
X86Instr_FpLdCW(X86AMode * addr)806 X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) {
807 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
808 i->tag = Xin_FpLdCW;
809 i->Xin.FpLdCW.addr = addr;
810 return i;
811 }
X86Instr_FpStSW_AX(void)812 X86Instr* X86Instr_FpStSW_AX ( void ) {
813 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
814 i->tag = Xin_FpStSW_AX;
815 return i;
816 }
X86Instr_FpCmp(HReg srcL,HReg srcR,HReg dst)817 X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) {
818 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
819 i->tag = Xin_FpCmp;
820 i->Xin.FpCmp.srcL = srcL;
821 i->Xin.FpCmp.srcR = srcR;
822 i->Xin.FpCmp.dst = dst;
823 return i;
824 }
X86Instr_SseConst(UShort con,HReg dst)825 X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) {
826 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
827 i->tag = Xin_SseConst;
828 i->Xin.SseConst.con = con;
829 i->Xin.SseConst.dst = dst;
830 vassert(hregClass(dst) == HRcVec128);
831 return i;
832 }
X86Instr_SseLdSt(Bool isLoad,HReg reg,X86AMode * addr)833 X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) {
834 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
835 i->tag = Xin_SseLdSt;
836 i->Xin.SseLdSt.isLoad = isLoad;
837 i->Xin.SseLdSt.reg = reg;
838 i->Xin.SseLdSt.addr = addr;
839 return i;
840 }
X86Instr_SseLdzLO(Int sz,HReg reg,X86AMode * addr)841 X86Instr* X86Instr_SseLdzLO ( Int sz, HReg reg, X86AMode* addr )
842 {
843 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
844 i->tag = Xin_SseLdzLO;
845 i->Xin.SseLdzLO.sz = toUChar(sz);
846 i->Xin.SseLdzLO.reg = reg;
847 i->Xin.SseLdzLO.addr = addr;
848 vassert(sz == 4 || sz == 8);
849 return i;
850 }
X86Instr_Sse32Fx4(X86SseOp op,HReg src,HReg dst)851 X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) {
852 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
853 i->tag = Xin_Sse32Fx4;
854 i->Xin.Sse32Fx4.op = op;
855 i->Xin.Sse32Fx4.src = src;
856 i->Xin.Sse32Fx4.dst = dst;
857 vassert(op != Xsse_MOV);
858 return i;
859 }
X86Instr_Sse32FLo(X86SseOp op,HReg src,HReg dst)860 X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) {
861 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
862 i->tag = Xin_Sse32FLo;
863 i->Xin.Sse32FLo.op = op;
864 i->Xin.Sse32FLo.src = src;
865 i->Xin.Sse32FLo.dst = dst;
866 vassert(op != Xsse_MOV);
867 return i;
868 }
X86Instr_Sse64Fx2(X86SseOp op,HReg src,HReg dst)869 X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) {
870 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
871 i->tag = Xin_Sse64Fx2;
872 i->Xin.Sse64Fx2.op = op;
873 i->Xin.Sse64Fx2.src = src;
874 i->Xin.Sse64Fx2.dst = dst;
875 vassert(op != Xsse_MOV);
876 return i;
877 }
X86Instr_Sse64FLo(X86SseOp op,HReg src,HReg dst)878 X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) {
879 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
880 i->tag = Xin_Sse64FLo;
881 i->Xin.Sse64FLo.op = op;
882 i->Xin.Sse64FLo.src = src;
883 i->Xin.Sse64FLo.dst = dst;
884 vassert(op != Xsse_MOV);
885 return i;
886 }
X86Instr_SseReRg(X86SseOp op,HReg re,HReg rg)887 X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) {
888 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
889 i->tag = Xin_SseReRg;
890 i->Xin.SseReRg.op = op;
891 i->Xin.SseReRg.src = re;
892 i->Xin.SseReRg.dst = rg;
893 return i;
894 }
X86Instr_SseCMov(X86CondCode cond,HReg src,HReg dst)895 X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) {
896 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
897 i->tag = Xin_SseCMov;
898 i->Xin.SseCMov.cond = cond;
899 i->Xin.SseCMov.src = src;
900 i->Xin.SseCMov.dst = dst;
901 vassert(cond != Xcc_ALWAYS);
902 return i;
903 }
X86Instr_SseShuf(Int order,HReg src,HReg dst)904 X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) {
905 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
906 i->tag = Xin_SseShuf;
907 i->Xin.SseShuf.order = order;
908 i->Xin.SseShuf.src = src;
909 i->Xin.SseShuf.dst = dst;
910 vassert(order >= 0 && order <= 0xFF);
911 return i;
912 }
X86Instr_EvCheck(X86AMode * amCounter,X86AMode * amFailAddr)913 X86Instr* X86Instr_EvCheck ( X86AMode* amCounter,
914 X86AMode* amFailAddr ) {
915 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
916 i->tag = Xin_EvCheck;
917 i->Xin.EvCheck.amCounter = amCounter;
918 i->Xin.EvCheck.amFailAddr = amFailAddr;
919 return i;
920 }
X86Instr_ProfInc(void)921 X86Instr* X86Instr_ProfInc ( void ) {
922 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
923 i->tag = Xin_ProfInc;
924 return i;
925 }
926
ppX86Instr(X86Instr * i,Bool mode64)927 void ppX86Instr ( X86Instr* i, Bool mode64 ) {
928 vassert(mode64 == False);
929 switch (i->tag) {
930 case Xin_Alu32R:
931 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op));
932 ppX86RMI(i->Xin.Alu32R.src);
933 vex_printf(",");
934 ppHRegX86(i->Xin.Alu32R.dst);
935 return;
936 case Xin_Alu32M:
937 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op));
938 ppX86RI(i->Xin.Alu32M.src);
939 vex_printf(",");
940 ppX86AMode(i->Xin.Alu32M.dst);
941 return;
942 case Xin_Sh32:
943 vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op));
944 if (i->Xin.Sh32.src == 0)
945 vex_printf("%%cl,");
946 else
947 vex_printf("$%d,", (Int)i->Xin.Sh32.src);
948 ppHRegX86(i->Xin.Sh32.dst);
949 return;
950 case Xin_Test32:
951 vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32);
952 ppX86RM(i->Xin.Test32.dst);
953 return;
954 case Xin_Unary32:
955 vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op));
956 ppHRegX86(i->Xin.Unary32.dst);
957 return;
958 case Xin_Lea32:
959 vex_printf("leal ");
960 ppX86AMode(i->Xin.Lea32.am);
961 vex_printf(",");
962 ppHRegX86(i->Xin.Lea32.dst);
963 return;
964 case Xin_MulL:
965 vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u');
966 ppX86RM(i->Xin.MulL.src);
967 return;
968 case Xin_Div:
969 vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u');
970 ppX86RM(i->Xin.Div.src);
971 return;
972 case Xin_Sh3232:
973 vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op));
974 if (i->Xin.Sh3232.amt == 0)
975 vex_printf(" %%cl,");
976 else
977 vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt);
978 ppHRegX86(i->Xin.Sh3232.src);
979 vex_printf(",");
980 ppHRegX86(i->Xin.Sh3232.dst);
981 return;
982 case Xin_Push:
983 vex_printf("pushl ");
984 ppX86RMI(i->Xin.Push.src);
985 return;
986 case Xin_Call:
987 vex_printf("call%s[%d,",
988 i->Xin.Call.cond==Xcc_ALWAYS
989 ? "" : showX86CondCode(i->Xin.Call.cond),
990 i->Xin.Call.regparms);
991 ppRetLoc(i->Xin.Call.rloc);
992 vex_printf("] 0x%x", i->Xin.Call.target);
993 break;
994 case Xin_XDirect:
995 vex_printf("(xDirect) ");
996 vex_printf("if (%%eflags.%s) { ",
997 showX86CondCode(i->Xin.XDirect.cond));
998 vex_printf("movl $0x%x,", i->Xin.XDirect.dstGA);
999 ppX86AMode(i->Xin.XDirect.amEIP);
1000 vex_printf("; ");
1001 vex_printf("movl $disp_cp_chain_me_to_%sEP,%%edx; call *%%edx }",
1002 i->Xin.XDirect.toFastEP ? "fast" : "slow");
1003 return;
1004 case Xin_XIndir:
1005 vex_printf("(xIndir) ");
1006 vex_printf("if (%%eflags.%s) { movl ",
1007 showX86CondCode(i->Xin.XIndir.cond));
1008 ppHRegX86(i->Xin.XIndir.dstGA);
1009 vex_printf(",");
1010 ppX86AMode(i->Xin.XIndir.amEIP);
1011 vex_printf("; movl $disp_indir,%%edx; jmp *%%edx }");
1012 return;
1013 case Xin_XAssisted:
1014 vex_printf("(xAssisted) ");
1015 vex_printf("if (%%eflags.%s) { ",
1016 showX86CondCode(i->Xin.XAssisted.cond));
1017 vex_printf("movl ");
1018 ppHRegX86(i->Xin.XAssisted.dstGA);
1019 vex_printf(",");
1020 ppX86AMode(i->Xin.XAssisted.amEIP);
1021 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%ebp",
1022 (Int)i->Xin.XAssisted.jk);
1023 vex_printf("; movl $disp_assisted,%%edx; jmp *%%edx }");
1024 return;
1025 case Xin_CMov32:
1026 vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond));
1027 ppX86RM(i->Xin.CMov32.src);
1028 vex_printf(",");
1029 ppHRegX86(i->Xin.CMov32.dst);
1030 return;
1031 case Xin_LoadEX:
1032 vex_printf("mov%c%cl ",
1033 i->Xin.LoadEX.syned ? 's' : 'z',
1034 i->Xin.LoadEX.szSmall==1 ? 'b' : 'w');
1035 ppX86AMode(i->Xin.LoadEX.src);
1036 vex_printf(",");
1037 ppHRegX86(i->Xin.LoadEX.dst);
1038 return;
1039 case Xin_Store:
1040 vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w');
1041 ppHRegX86(i->Xin.Store.src);
1042 vex_printf(",");
1043 ppX86AMode(i->Xin.Store.dst);
1044 return;
1045 case Xin_Set32:
1046 vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond));
1047 ppHRegX86(i->Xin.Set32.dst);
1048 return;
1049 case Xin_Bsfr32:
1050 vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r');
1051 ppHRegX86(i->Xin.Bsfr32.src);
1052 vex_printf(",");
1053 ppHRegX86(i->Xin.Bsfr32.dst);
1054 return;
1055 case Xin_MFence:
1056 vex_printf("mfence(%s)",
1057 LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps));
1058 return;
1059 case Xin_ACAS:
1060 vex_printf("lock cmpxchg%c ",
1061 i->Xin.ACAS.sz==1 ? 'b'
1062 : i->Xin.ACAS.sz==2 ? 'w' : 'l');
1063 vex_printf("{%%eax->%%ebx},");
1064 ppX86AMode(i->Xin.ACAS.addr);
1065 return;
1066 case Xin_DACAS:
1067 vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},");
1068 ppX86AMode(i->Xin.DACAS.addr);
1069 return;
1070 case Xin_FpUnary:
1071 vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op));
1072 ppHRegX86(i->Xin.FpUnary.src);
1073 vex_printf(",");
1074 ppHRegX86(i->Xin.FpUnary.dst);
1075 break;
1076 case Xin_FpBinary:
1077 vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op));
1078 ppHRegX86(i->Xin.FpBinary.srcL);
1079 vex_printf(",");
1080 ppHRegX86(i->Xin.FpBinary.srcR);
1081 vex_printf(",");
1082 ppHRegX86(i->Xin.FpBinary.dst);
1083 break;
1084 case Xin_FpLdSt:
1085 if (i->Xin.FpLdSt.isLoad) {
1086 vex_printf("gld%c " , i->Xin.FpLdSt.sz==10 ? 'T'
1087 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
1088 ppX86AMode(i->Xin.FpLdSt.addr);
1089 vex_printf(", ");
1090 ppHRegX86(i->Xin.FpLdSt.reg);
1091 } else {
1092 vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T'
1093 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
1094 ppHRegX86(i->Xin.FpLdSt.reg);
1095 vex_printf(", ");
1096 ppX86AMode(i->Xin.FpLdSt.addr);
1097 }
1098 return;
1099 case Xin_FpLdStI:
1100 if (i->Xin.FpLdStI.isLoad) {
1101 vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
1102 i->Xin.FpLdStI.sz==4 ? "l" : "w");
1103 ppX86AMode(i->Xin.FpLdStI.addr);
1104 vex_printf(", ");
1105 ppHRegX86(i->Xin.FpLdStI.reg);
1106 } else {
1107 vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
1108 i->Xin.FpLdStI.sz==4 ? "l" : "w");
1109 ppHRegX86(i->Xin.FpLdStI.reg);
1110 vex_printf(", ");
1111 ppX86AMode(i->Xin.FpLdStI.addr);
1112 }
1113 return;
1114 case Xin_Fp64to32:
1115 vex_printf("gdtof ");
1116 ppHRegX86(i->Xin.Fp64to32.src);
1117 vex_printf(",");
1118 ppHRegX86(i->Xin.Fp64to32.dst);
1119 return;
1120 case Xin_FpCMov:
1121 vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond));
1122 ppHRegX86(i->Xin.FpCMov.src);
1123 vex_printf(",");
1124 ppHRegX86(i->Xin.FpCMov.dst);
1125 return;
1126 case Xin_FpLdCW:
1127 vex_printf("fldcw ");
1128 ppX86AMode(i->Xin.FpLdCW.addr);
1129 return;
1130 case Xin_FpStSW_AX:
1131 vex_printf("fstsw %%ax");
1132 return;
1133 case Xin_FpCmp:
1134 vex_printf("gcmp ");
1135 ppHRegX86(i->Xin.FpCmp.srcL);
1136 vex_printf(",");
1137 ppHRegX86(i->Xin.FpCmp.srcR);
1138 vex_printf(",");
1139 ppHRegX86(i->Xin.FpCmp.dst);
1140 break;
1141 case Xin_SseConst:
1142 vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
1143 ppHRegX86(i->Xin.SseConst.dst);
1144 break;
1145 case Xin_SseLdSt:
1146 vex_printf("movups ");
1147 if (i->Xin.SseLdSt.isLoad) {
1148 ppX86AMode(i->Xin.SseLdSt.addr);
1149 vex_printf(",");
1150 ppHRegX86(i->Xin.SseLdSt.reg);
1151 } else {
1152 ppHRegX86(i->Xin.SseLdSt.reg);
1153 vex_printf(",");
1154 ppX86AMode(i->Xin.SseLdSt.addr);
1155 }
1156 return;
1157 case Xin_SseLdzLO:
1158 vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d");
1159 ppX86AMode(i->Xin.SseLdzLO.addr);
1160 vex_printf(",");
1161 ppHRegX86(i->Xin.SseLdzLO.reg);
1162 return;
1163 case Xin_Sse32Fx4:
1164 vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op));
1165 ppHRegX86(i->Xin.Sse32Fx4.src);
1166 vex_printf(",");
1167 ppHRegX86(i->Xin.Sse32Fx4.dst);
1168 return;
1169 case Xin_Sse32FLo:
1170 vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op));
1171 ppHRegX86(i->Xin.Sse32FLo.src);
1172 vex_printf(",");
1173 ppHRegX86(i->Xin.Sse32FLo.dst);
1174 return;
1175 case Xin_Sse64Fx2:
1176 vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op));
1177 ppHRegX86(i->Xin.Sse64Fx2.src);
1178 vex_printf(",");
1179 ppHRegX86(i->Xin.Sse64Fx2.dst);
1180 return;
1181 case Xin_Sse64FLo:
1182 vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op));
1183 ppHRegX86(i->Xin.Sse64FLo.src);
1184 vex_printf(",");
1185 ppHRegX86(i->Xin.Sse64FLo.dst);
1186 return;
1187 case Xin_SseReRg:
1188 vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op));
1189 ppHRegX86(i->Xin.SseReRg.src);
1190 vex_printf(",");
1191 ppHRegX86(i->Xin.SseReRg.dst);
1192 return;
1193 case Xin_SseCMov:
1194 vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond));
1195 ppHRegX86(i->Xin.SseCMov.src);
1196 vex_printf(",");
1197 ppHRegX86(i->Xin.SseCMov.dst);
1198 return;
1199 case Xin_SseShuf:
1200 vex_printf("pshufd $0x%x,", i->Xin.SseShuf.order);
1201 ppHRegX86(i->Xin.SseShuf.src);
1202 vex_printf(",");
1203 ppHRegX86(i->Xin.SseShuf.dst);
1204 return;
1205 case Xin_EvCheck:
1206 vex_printf("(evCheck) decl ");
1207 ppX86AMode(i->Xin.EvCheck.amCounter);
1208 vex_printf("; jns nofail; jmp *");
1209 ppX86AMode(i->Xin.EvCheck.amFailAddr);
1210 vex_printf("; nofail:");
1211 return;
1212 case Xin_ProfInc:
1213 vex_printf("(profInc) addl $1,NotKnownYet; "
1214 "adcl $0,NotKnownYet+4");
1215 return;
1216 default:
1217 vpanic("ppX86Instr");
1218 }
1219 }
1220
1221 /* --------- Helpers for register allocation. --------- */
1222
getRegUsage_X86Instr(HRegUsage * u,X86Instr * i,Bool mode64)1223 void getRegUsage_X86Instr (HRegUsage* u, X86Instr* i, Bool mode64)
1224 {
1225 Bool unary;
1226 vassert(mode64 == False);
1227 initHRegUsage(u);
1228 switch (i->tag) {
1229 case Xin_Alu32R:
1230 addRegUsage_X86RMI(u, i->Xin.Alu32R.src);
1231 if (i->Xin.Alu32R.op == Xalu_MOV) {
1232 addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst);
1233 return;
1234 }
1235 if (i->Xin.Alu32R.op == Xalu_CMP) {
1236 addHRegUse(u, HRmRead, i->Xin.Alu32R.dst);
1237 return;
1238 }
1239 addHRegUse(u, HRmModify, i->Xin.Alu32R.dst);
1240 return;
1241 case Xin_Alu32M:
1242 addRegUsage_X86RI(u, i->Xin.Alu32M.src);
1243 addRegUsage_X86AMode(u, i->Xin.Alu32M.dst);
1244 return;
1245 case Xin_Sh32:
1246 addHRegUse(u, HRmModify, i->Xin.Sh32.dst);
1247 if (i->Xin.Sh32.src == 0)
1248 addHRegUse(u, HRmRead, hregX86_ECX());
1249 return;
1250 case Xin_Test32:
1251 addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead);
1252 return;
1253 case Xin_Unary32:
1254 addHRegUse(u, HRmModify, i->Xin.Unary32.dst);
1255 return;
1256 case Xin_Lea32:
1257 addRegUsage_X86AMode(u, i->Xin.Lea32.am);
1258 addHRegUse(u, HRmWrite, i->Xin.Lea32.dst);
1259 return;
1260 case Xin_MulL:
1261 addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead);
1262 addHRegUse(u, HRmModify, hregX86_EAX());
1263 addHRegUse(u, HRmWrite, hregX86_EDX());
1264 return;
1265 case Xin_Div:
1266 addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead);
1267 addHRegUse(u, HRmModify, hregX86_EAX());
1268 addHRegUse(u, HRmModify, hregX86_EDX());
1269 return;
1270 case Xin_Sh3232:
1271 addHRegUse(u, HRmRead, i->Xin.Sh3232.src);
1272 addHRegUse(u, HRmModify, i->Xin.Sh3232.dst);
1273 if (i->Xin.Sh3232.amt == 0)
1274 addHRegUse(u, HRmRead, hregX86_ECX());
1275 return;
1276 case Xin_Push:
1277 addRegUsage_X86RMI(u, i->Xin.Push.src);
1278 addHRegUse(u, HRmModify, hregX86_ESP());
1279 return;
1280 case Xin_Call:
1281 /* This is a bit subtle. */
1282 /* First off, claim it trashes all the caller-saved regs
1283 which fall within the register allocator's jurisdiction.
1284 These I believe to be %eax %ecx %edx and all the xmm
1285 registers. */
1286 addHRegUse(u, HRmWrite, hregX86_EAX());
1287 addHRegUse(u, HRmWrite, hregX86_ECX());
1288 addHRegUse(u, HRmWrite, hregX86_EDX());
1289 addHRegUse(u, HRmWrite, hregX86_XMM0());
1290 addHRegUse(u, HRmWrite, hregX86_XMM1());
1291 addHRegUse(u, HRmWrite, hregX86_XMM2());
1292 addHRegUse(u, HRmWrite, hregX86_XMM3());
1293 addHRegUse(u, HRmWrite, hregX86_XMM4());
1294 addHRegUse(u, HRmWrite, hregX86_XMM5());
1295 addHRegUse(u, HRmWrite, hregX86_XMM6());
1296 addHRegUse(u, HRmWrite, hregX86_XMM7());
1297 /* Now we have to state any parameter-carrying registers
1298 which might be read. This depends on the regparmness. */
1299 switch (i->Xin.Call.regparms) {
1300 case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/
1301 case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/
1302 case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break;
1303 case 0: break;
1304 default: vpanic("getRegUsage_X86Instr:Call:regparms");
1305 }
1306 /* Finally, there is the issue that the insn trashes a
1307 register because the literal target address has to be
1308 loaded into a register. Fortunately, for the 0/1/2
1309 regparm case, we can use EAX, EDX and ECX respectively, so
1310 this does not cause any further damage. For the 3-regparm
1311 case, we'll have to choose another register arbitrarily --
1312 since A, D and C are used for parameters -- and so we might
1313 as well choose EDI. */
1314 if (i->Xin.Call.regparms == 3)
1315 addHRegUse(u, HRmWrite, hregX86_EDI());
1316 /* Upshot of this is that the assembler really must observe
1317 the here-stated convention of which register to use as an
1318 address temporary, depending on the regparmness: 0==EAX,
1319 1==EDX, 2==ECX, 3==EDI. */
1320 return;
1321 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1322 conditionally exit the block. Hence we only need to list (1)
1323 the registers that they read, and (2) the registers that they
1324 write in the case where the block is not exited. (2) is
1325 empty, hence only (1) is relevant here. */
1326 case Xin_XDirect:
1327 addRegUsage_X86AMode(u, i->Xin.XDirect.amEIP);
1328 return;
1329 case Xin_XIndir:
1330 addHRegUse(u, HRmRead, i->Xin.XIndir.dstGA);
1331 addRegUsage_X86AMode(u, i->Xin.XIndir.amEIP);
1332 return;
1333 case Xin_XAssisted:
1334 addHRegUse(u, HRmRead, i->Xin.XAssisted.dstGA);
1335 addRegUsage_X86AMode(u, i->Xin.XAssisted.amEIP);
1336 return;
1337 case Xin_CMov32:
1338 addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead);
1339 addHRegUse(u, HRmModify, i->Xin.CMov32.dst);
1340 return;
1341 case Xin_LoadEX:
1342 addRegUsage_X86AMode(u, i->Xin.LoadEX.src);
1343 addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst);
1344 return;
1345 case Xin_Store:
1346 addHRegUse(u, HRmRead, i->Xin.Store.src);
1347 addRegUsage_X86AMode(u, i->Xin.Store.dst);
1348 return;
1349 case Xin_Set32:
1350 addHRegUse(u, HRmWrite, i->Xin.Set32.dst);
1351 return;
1352 case Xin_Bsfr32:
1353 addHRegUse(u, HRmRead, i->Xin.Bsfr32.src);
1354 addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst);
1355 return;
1356 case Xin_MFence:
1357 return;
1358 case Xin_ACAS:
1359 addRegUsage_X86AMode(u, i->Xin.ACAS.addr);
1360 addHRegUse(u, HRmRead, hregX86_EBX());
1361 addHRegUse(u, HRmModify, hregX86_EAX());
1362 return;
1363 case Xin_DACAS:
1364 addRegUsage_X86AMode(u, i->Xin.DACAS.addr);
1365 addHRegUse(u, HRmRead, hregX86_ECX());
1366 addHRegUse(u, HRmRead, hregX86_EBX());
1367 addHRegUse(u, HRmModify, hregX86_EDX());
1368 addHRegUse(u, HRmModify, hregX86_EAX());
1369 return;
1370 case Xin_FpUnary:
1371 addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
1372 addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
1373 return;
1374 case Xin_FpBinary:
1375 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
1376 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR);
1377 addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst);
1378 return;
1379 case Xin_FpLdSt:
1380 addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr);
1381 addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead,
1382 i->Xin.FpLdSt.reg);
1383 return;
1384 case Xin_FpLdStI:
1385 addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr);
1386 addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead,
1387 i->Xin.FpLdStI.reg);
1388 return;
1389 case Xin_Fp64to32:
1390 addHRegUse(u, HRmRead, i->Xin.Fp64to32.src);
1391 addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst);
1392 return;
1393 case Xin_FpCMov:
1394 addHRegUse(u, HRmRead, i->Xin.FpCMov.src);
1395 addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
1396 return;
1397 case Xin_FpLdCW:
1398 addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr);
1399 return;
1400 case Xin_FpStSW_AX:
1401 addHRegUse(u, HRmWrite, hregX86_EAX());
1402 return;
1403 case Xin_FpCmp:
1404 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL);
1405 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR);
1406 addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst);
1407 addHRegUse(u, HRmWrite, hregX86_EAX());
1408 return;
1409 case Xin_SseLdSt:
1410 addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr);
1411 addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead,
1412 i->Xin.SseLdSt.reg);
1413 return;
1414 case Xin_SseLdzLO:
1415 addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr);
1416 addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg);
1417 return;
1418 case Xin_SseConst:
1419 addHRegUse(u, HRmWrite, i->Xin.SseConst.dst);
1420 return;
1421 case Xin_Sse32Fx4:
1422 vassert(i->Xin.Sse32Fx4.op != Xsse_MOV);
1423 unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF
1424 || i->Xin.Sse32Fx4.op == Xsse_RSQRTF
1425 || i->Xin.Sse32Fx4.op == Xsse_SQRTF );
1426 addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src);
1427 addHRegUse(u, unary ? HRmWrite : HRmModify,
1428 i->Xin.Sse32Fx4.dst);
1429 return;
1430 case Xin_Sse32FLo:
1431 vassert(i->Xin.Sse32FLo.op != Xsse_MOV);
1432 unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF
1433 || i->Xin.Sse32FLo.op == Xsse_RSQRTF
1434 || i->Xin.Sse32FLo.op == Xsse_SQRTF );
1435 addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src);
1436 addHRegUse(u, unary ? HRmWrite : HRmModify,
1437 i->Xin.Sse32FLo.dst);
1438 return;
1439 case Xin_Sse64Fx2:
1440 vassert(i->Xin.Sse64Fx2.op != Xsse_MOV);
1441 unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF
1442 || i->Xin.Sse64Fx2.op == Xsse_RSQRTF
1443 || i->Xin.Sse64Fx2.op == Xsse_SQRTF );
1444 addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src);
1445 addHRegUse(u, unary ? HRmWrite : HRmModify,
1446 i->Xin.Sse64Fx2.dst);
1447 return;
1448 case Xin_Sse64FLo:
1449 vassert(i->Xin.Sse64FLo.op != Xsse_MOV);
1450 unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF
1451 || i->Xin.Sse64FLo.op == Xsse_RSQRTF
1452 || i->Xin.Sse64FLo.op == Xsse_SQRTF );
1453 addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src);
1454 addHRegUse(u, unary ? HRmWrite : HRmModify,
1455 i->Xin.Sse64FLo.dst);
1456 return;
1457 case Xin_SseReRg:
1458 if (i->Xin.SseReRg.op == Xsse_XOR
1459 && sameHReg(i->Xin.SseReRg.src, i->Xin.SseReRg.dst)) {
1460 /* reg-alloc needs to understand 'xor r,r' as a write of r */
1461 /* (as opposed to a rite of passage :-) */
1462 addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst);
1463 } else {
1464 addHRegUse(u, HRmRead, i->Xin.SseReRg.src);
1465 addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV
1466 ? HRmWrite : HRmModify,
1467 i->Xin.SseReRg.dst);
1468 }
1469 return;
1470 case Xin_SseCMov:
1471 addHRegUse(u, HRmRead, i->Xin.SseCMov.src);
1472 addHRegUse(u, HRmModify, i->Xin.SseCMov.dst);
1473 return;
1474 case Xin_SseShuf:
1475 addHRegUse(u, HRmRead, i->Xin.SseShuf.src);
1476 addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst);
1477 return;
1478 case Xin_EvCheck:
1479 /* We expect both amodes only to mention %ebp, so this is in
1480 fact pointless, since %ebp isn't allocatable, but anyway.. */
1481 addRegUsage_X86AMode(u, i->Xin.EvCheck.amCounter);
1482 addRegUsage_X86AMode(u, i->Xin.EvCheck.amFailAddr);
1483 return;
1484 case Xin_ProfInc:
1485 /* does not use any registers. */
1486 return;
1487 default:
1488 ppX86Instr(i, False);
1489 vpanic("getRegUsage_X86Instr");
1490 }
1491 }
1492
1493 /* local helper */
mapReg(HRegRemap * m,HReg * r)1494 static void mapReg( HRegRemap* m, HReg* r )
1495 {
1496 *r = lookupHRegRemap(m, *r);
1497 }
1498
mapRegs_X86Instr(HRegRemap * m,X86Instr * i,Bool mode64)1499 void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 )
1500 {
1501 vassert(mode64 == False);
1502 switch (i->tag) {
1503 case Xin_Alu32R:
1504 mapRegs_X86RMI(m, i->Xin.Alu32R.src);
1505 mapReg(m, &i->Xin.Alu32R.dst);
1506 return;
1507 case Xin_Alu32M:
1508 mapRegs_X86RI(m, i->Xin.Alu32M.src);
1509 mapRegs_X86AMode(m, i->Xin.Alu32M.dst);
1510 return;
1511 case Xin_Sh32:
1512 mapReg(m, &i->Xin.Sh32.dst);
1513 return;
1514 case Xin_Test32:
1515 mapRegs_X86RM(m, i->Xin.Test32.dst);
1516 return;
1517 case Xin_Unary32:
1518 mapReg(m, &i->Xin.Unary32.dst);
1519 return;
1520 case Xin_Lea32:
1521 mapRegs_X86AMode(m, i->Xin.Lea32.am);
1522 mapReg(m, &i->Xin.Lea32.dst);
1523 return;
1524 case Xin_MulL:
1525 mapRegs_X86RM(m, i->Xin.MulL.src);
1526 return;
1527 case Xin_Div:
1528 mapRegs_X86RM(m, i->Xin.Div.src);
1529 return;
1530 case Xin_Sh3232:
1531 mapReg(m, &i->Xin.Sh3232.src);
1532 mapReg(m, &i->Xin.Sh3232.dst);
1533 return;
1534 case Xin_Push:
1535 mapRegs_X86RMI(m, i->Xin.Push.src);
1536 return;
1537 case Xin_Call:
1538 return;
1539 case Xin_XDirect:
1540 mapRegs_X86AMode(m, i->Xin.XDirect.amEIP);
1541 return;
1542 case Xin_XIndir:
1543 mapReg(m, &i->Xin.XIndir.dstGA);
1544 mapRegs_X86AMode(m, i->Xin.XIndir.amEIP);
1545 return;
1546 case Xin_XAssisted:
1547 mapReg(m, &i->Xin.XAssisted.dstGA);
1548 mapRegs_X86AMode(m, i->Xin.XAssisted.amEIP);
1549 return;
1550 case Xin_CMov32:
1551 mapRegs_X86RM(m, i->Xin.CMov32.src);
1552 mapReg(m, &i->Xin.CMov32.dst);
1553 return;
1554 case Xin_LoadEX:
1555 mapRegs_X86AMode(m, i->Xin.LoadEX.src);
1556 mapReg(m, &i->Xin.LoadEX.dst);
1557 return;
1558 case Xin_Store:
1559 mapReg(m, &i->Xin.Store.src);
1560 mapRegs_X86AMode(m, i->Xin.Store.dst);
1561 return;
1562 case Xin_Set32:
1563 mapReg(m, &i->Xin.Set32.dst);
1564 return;
1565 case Xin_Bsfr32:
1566 mapReg(m, &i->Xin.Bsfr32.src);
1567 mapReg(m, &i->Xin.Bsfr32.dst);
1568 return;
1569 case Xin_MFence:
1570 return;
1571 case Xin_ACAS:
1572 mapRegs_X86AMode(m, i->Xin.ACAS.addr);
1573 return;
1574 case Xin_DACAS:
1575 mapRegs_X86AMode(m, i->Xin.DACAS.addr);
1576 return;
1577 case Xin_FpUnary:
1578 mapReg(m, &i->Xin.FpUnary.src);
1579 mapReg(m, &i->Xin.FpUnary.dst);
1580 return;
1581 case Xin_FpBinary:
1582 mapReg(m, &i->Xin.FpBinary.srcL);
1583 mapReg(m, &i->Xin.FpBinary.srcR);
1584 mapReg(m, &i->Xin.FpBinary.dst);
1585 return;
1586 case Xin_FpLdSt:
1587 mapRegs_X86AMode(m, i->Xin.FpLdSt.addr);
1588 mapReg(m, &i->Xin.FpLdSt.reg);
1589 return;
1590 case Xin_FpLdStI:
1591 mapRegs_X86AMode(m, i->Xin.FpLdStI.addr);
1592 mapReg(m, &i->Xin.FpLdStI.reg);
1593 return;
1594 case Xin_Fp64to32:
1595 mapReg(m, &i->Xin.Fp64to32.src);
1596 mapReg(m, &i->Xin.Fp64to32.dst);
1597 return;
1598 case Xin_FpCMov:
1599 mapReg(m, &i->Xin.FpCMov.src);
1600 mapReg(m, &i->Xin.FpCMov.dst);
1601 return;
1602 case Xin_FpLdCW:
1603 mapRegs_X86AMode(m, i->Xin.FpLdCW.addr);
1604 return;
1605 case Xin_FpStSW_AX:
1606 return;
1607 case Xin_FpCmp:
1608 mapReg(m, &i->Xin.FpCmp.srcL);
1609 mapReg(m, &i->Xin.FpCmp.srcR);
1610 mapReg(m, &i->Xin.FpCmp.dst);
1611 return;
1612 case Xin_SseConst:
1613 mapReg(m, &i->Xin.SseConst.dst);
1614 return;
1615 case Xin_SseLdSt:
1616 mapReg(m, &i->Xin.SseLdSt.reg);
1617 mapRegs_X86AMode(m, i->Xin.SseLdSt.addr);
1618 break;
1619 case Xin_SseLdzLO:
1620 mapReg(m, &i->Xin.SseLdzLO.reg);
1621 mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr);
1622 break;
1623 case Xin_Sse32Fx4:
1624 mapReg(m, &i->Xin.Sse32Fx4.src);
1625 mapReg(m, &i->Xin.Sse32Fx4.dst);
1626 return;
1627 case Xin_Sse32FLo:
1628 mapReg(m, &i->Xin.Sse32FLo.src);
1629 mapReg(m, &i->Xin.Sse32FLo.dst);
1630 return;
1631 case Xin_Sse64Fx2:
1632 mapReg(m, &i->Xin.Sse64Fx2.src);
1633 mapReg(m, &i->Xin.Sse64Fx2.dst);
1634 return;
1635 case Xin_Sse64FLo:
1636 mapReg(m, &i->Xin.Sse64FLo.src);
1637 mapReg(m, &i->Xin.Sse64FLo.dst);
1638 return;
1639 case Xin_SseReRg:
1640 mapReg(m, &i->Xin.SseReRg.src);
1641 mapReg(m, &i->Xin.SseReRg.dst);
1642 return;
1643 case Xin_SseCMov:
1644 mapReg(m, &i->Xin.SseCMov.src);
1645 mapReg(m, &i->Xin.SseCMov.dst);
1646 return;
1647 case Xin_SseShuf:
1648 mapReg(m, &i->Xin.SseShuf.src);
1649 mapReg(m, &i->Xin.SseShuf.dst);
1650 return;
1651 case Xin_EvCheck:
1652 /* We expect both amodes only to mention %ebp, so this is in
1653 fact pointless, since %ebp isn't allocatable, but anyway.. */
1654 mapRegs_X86AMode(m, i->Xin.EvCheck.amCounter);
1655 mapRegs_X86AMode(m, i->Xin.EvCheck.amFailAddr);
1656 return;
1657 case Xin_ProfInc:
1658 /* does not use any registers. */
1659 return;
1660
1661 default:
1662 ppX86Instr(i, mode64);
1663 vpanic("mapRegs_X86Instr");
1664 }
1665 }
1666
1667 /* Figure out if i represents a reg-reg move, and if so assign the
1668 source and destination to *src and *dst. If in doubt say No. Used
1669 by the register allocator to do move coalescing.
1670 */
isMove_X86Instr(X86Instr * i,HReg * src,HReg * dst)1671 Bool isMove_X86Instr ( X86Instr* i, HReg* src, HReg* dst )
1672 {
1673 /* Moves between integer regs */
1674 if (i->tag == Xin_Alu32R) {
1675 if (i->Xin.Alu32R.op != Xalu_MOV)
1676 return False;
1677 if (i->Xin.Alu32R.src->tag != Xrmi_Reg)
1678 return False;
1679 *src = i->Xin.Alu32R.src->Xrmi.Reg.reg;
1680 *dst = i->Xin.Alu32R.dst;
1681 return True;
1682 }
1683 /* Moves between FP regs */
1684 if (i->tag == Xin_FpUnary) {
1685 if (i->Xin.FpUnary.op != Xfp_MOV)
1686 return False;
1687 *src = i->Xin.FpUnary.src;
1688 *dst = i->Xin.FpUnary.dst;
1689 return True;
1690 }
1691 if (i->tag == Xin_SseReRg) {
1692 if (i->Xin.SseReRg.op != Xsse_MOV)
1693 return False;
1694 *src = i->Xin.SseReRg.src;
1695 *dst = i->Xin.SseReRg.dst;
1696 return True;
1697 }
1698 return False;
1699 }
1700
1701
1702 /* Generate x86 spill/reload instructions under the direction of the
1703 register allocator. Note it's critical these don't write the
1704 condition codes. */
1705
genSpill_X86(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)1706 void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1707 HReg rreg, Int offsetB, Bool mode64 )
1708 {
1709 X86AMode* am;
1710 vassert(offsetB >= 0);
1711 vassert(!hregIsVirtual(rreg));
1712 vassert(mode64 == False);
1713 *i1 = *i2 = NULL;
1714 am = X86AMode_IR(offsetB, hregX86_EBP());
1715 switch (hregClass(rreg)) {
1716 case HRcInt32:
1717 *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am );
1718 return;
1719 case HRcFlt64:
1720 *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am );
1721 return;
1722 case HRcVec128:
1723 *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am );
1724 return;
1725 default:
1726 ppHRegClass(hregClass(rreg));
1727 vpanic("genSpill_X86: unimplemented regclass");
1728 }
1729 }
1730
genReload_X86(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)1731 void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1732 HReg rreg, Int offsetB, Bool mode64 )
1733 {
1734 X86AMode* am;
1735 vassert(offsetB >= 0);
1736 vassert(!hregIsVirtual(rreg));
1737 vassert(mode64 == False);
1738 *i1 = *i2 = NULL;
1739 am = X86AMode_IR(offsetB, hregX86_EBP());
1740 switch (hregClass(rreg)) {
1741 case HRcInt32:
1742 *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg );
1743 return;
1744 case HRcFlt64:
1745 *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am );
1746 return;
1747 case HRcVec128:
1748 *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am );
1749 return;
1750 default:
1751 ppHRegClass(hregClass(rreg));
1752 vpanic("genReload_X86: unimplemented regclass");
1753 }
1754 }
1755
1756 /* The given instruction reads the specified vreg exactly once, and
1757 that vreg is currently located at the given spill offset. If
1758 possible, return a variant of the instruction to one which instead
1759 references the spill slot directly. */
1760
directReload_X86(X86Instr * i,HReg vreg,Short spill_off)1761 X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off )
1762 {
1763 vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
1764
1765 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
1766 Convert to: src=RMI_Mem, dst=Reg
1767 */
1768 if (i->tag == Xin_Alu32R
1769 && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR
1770 || i->Xin.Alu32R.op == Xalu_XOR)
1771 && i->Xin.Alu32R.src->tag == Xrmi_Reg
1772 && sameHReg(i->Xin.Alu32R.src->Xrmi.Reg.reg, vreg)) {
1773 vassert(! sameHReg(i->Xin.Alu32R.dst, vreg));
1774 return X86Instr_Alu32R(
1775 i->Xin.Alu32R.op,
1776 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())),
1777 i->Xin.Alu32R.dst
1778 );
1779 }
1780
1781 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
1782 Convert to: src=RI_Imm, dst=Mem
1783 */
1784 if (i->tag == Xin_Alu32R
1785 && (i->Xin.Alu32R.op == Xalu_CMP)
1786 && i->Xin.Alu32R.src->tag == Xrmi_Imm
1787 && sameHReg(i->Xin.Alu32R.dst, vreg)) {
1788 return X86Instr_Alu32M(
1789 i->Xin.Alu32R.op,
1790 X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ),
1791 X86AMode_IR( spill_off, hregX86_EBP())
1792 );
1793 }
1794
1795 /* Deal with form: Push(RMI_Reg)
1796 Convert to: Push(RMI_Mem)
1797 */
1798 if (i->tag == Xin_Push
1799 && i->Xin.Push.src->tag == Xrmi_Reg
1800 && sameHReg(i->Xin.Push.src->Xrmi.Reg.reg, vreg)) {
1801 return X86Instr_Push(
1802 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP()))
1803 );
1804 }
1805
1806 /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src
1807 Convert to CMov32(RM_Mem, dst) */
1808 if (i->tag == Xin_CMov32
1809 && i->Xin.CMov32.src->tag == Xrm_Reg
1810 && sameHReg(i->Xin.CMov32.src->Xrm.Reg.reg, vreg)) {
1811 vassert(! sameHReg(i->Xin.CMov32.dst, vreg));
1812 return X86Instr_CMov32(
1813 i->Xin.CMov32.cond,
1814 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )),
1815 i->Xin.CMov32.dst
1816 );
1817 }
1818
1819 /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */
1820 if (i->tag == Xin_Test32
1821 && i->Xin.Test32.dst->tag == Xrm_Reg
1822 && sameHReg(i->Xin.Test32.dst->Xrm.Reg.reg, vreg)) {
1823 return X86Instr_Test32(
1824 i->Xin.Test32.imm32,
1825 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) )
1826 );
1827 }
1828
1829 return NULL;
1830 }
1831
1832
1833 /* --------- The x86 assembler (bleh.) --------- */
1834
iregNo(HReg r)1835 static UChar iregNo ( HReg r )
1836 {
1837 UInt n;
1838 vassert(hregClass(r) == HRcInt32);
1839 vassert(!hregIsVirtual(r));
1840 n = hregNumber(r);
1841 vassert(n <= 7);
1842 return toUChar(n);
1843 }
1844
fregNo(HReg r)1845 static UInt fregNo ( HReg r )
1846 {
1847 UInt n;
1848 vassert(hregClass(r) == HRcFlt64);
1849 vassert(!hregIsVirtual(r));
1850 n = hregNumber(r);
1851 vassert(n <= 5);
1852 return n;
1853 }
1854
vregNo(HReg r)1855 static UInt vregNo ( HReg r )
1856 {
1857 UInt n;
1858 vassert(hregClass(r) == HRcVec128);
1859 vassert(!hregIsVirtual(r));
1860 n = hregNumber(r);
1861 vassert(n <= 7);
1862 return n;
1863 }
1864
mkModRegRM(UInt mod,UInt reg,UInt regmem)1865 static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
1866 {
1867 vassert(mod < 4);
1868 vassert((reg|regmem) < 8);
1869 return toUChar( ((mod & 3) << 6)
1870 | ((reg & 7) << 3)
1871 | (regmem & 7) );
1872 }
1873
mkSIB(UInt shift,UInt regindex,UInt regbase)1874 static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
1875 {
1876 vassert(shift < 4);
1877 vassert((regindex|regbase) < 8);
1878 return toUChar( ((shift & 3) << 6)
1879 | ((regindex & 7) << 3)
1880 | (regbase & 7) );
1881 }
1882
emit32(UChar * p,UInt w32)1883 static UChar* emit32 ( UChar* p, UInt w32 )
1884 {
1885 *p++ = toUChar( w32 & 0x000000FF);
1886 *p++ = toUChar((w32 >> 8) & 0x000000FF);
1887 *p++ = toUChar((w32 >> 16) & 0x000000FF);
1888 *p++ = toUChar((w32 >> 24) & 0x000000FF);
1889 return p;
1890 }
1891
1892 /* Does a sign-extend of the lowest 8 bits give
1893 the original number? */
fits8bits(UInt w32)1894 static Bool fits8bits ( UInt w32 )
1895 {
1896 Int i32 = (Int)w32;
1897 return toBool(i32 == ((i32 << 24) >> 24));
1898 }
1899
1900
1901 /* Forming mod-reg-rm bytes and scale-index-base bytes.
1902
1903 greg, 0(ereg) | ereg != ESP && ereg != EBP
1904 = 00 greg ereg
1905
1906 greg, d8(ereg) | ereg != ESP
1907 = 01 greg ereg, d8
1908
1909 greg, d32(ereg) | ereg != ESP
1910 = 10 greg ereg, d32
1911
1912 greg, d8(%esp) = 01 greg 100, 0x24, d8
1913
1914 -----------------------------------------------
1915
1916 greg, d8(base,index,scale)
1917 | index != ESP
1918 = 01 greg 100, scale index base, d8
1919
1920 greg, d32(base,index,scale)
1921 | index != ESP
1922 = 10 greg 100, scale index base, d32
1923 */
doAMode_M(UChar * p,HReg greg,X86AMode * am)1924 static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am )
1925 {
1926 if (am->tag == Xam_IR) {
1927 if (am->Xam.IR.imm == 0
1928 && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())
1929 && ! sameHReg(am->Xam.IR.reg, hregX86_EBP()) ) {
1930 *p++ = mkModRegRM(0, iregNo(greg), iregNo(am->Xam.IR.reg));
1931 return p;
1932 }
1933 if (fits8bits(am->Xam.IR.imm)
1934 && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())) {
1935 *p++ = mkModRegRM(1, iregNo(greg), iregNo(am->Xam.IR.reg));
1936 *p++ = toUChar(am->Xam.IR.imm & 0xFF);
1937 return p;
1938 }
1939 if (! sameHReg(am->Xam.IR.reg, hregX86_ESP())) {
1940 *p++ = mkModRegRM(2, iregNo(greg), iregNo(am->Xam.IR.reg));
1941 p = emit32(p, am->Xam.IR.imm);
1942 return p;
1943 }
1944 if (sameHReg(am->Xam.IR.reg, hregX86_ESP())
1945 && fits8bits(am->Xam.IR.imm)) {
1946 *p++ = mkModRegRM(1, iregNo(greg), 4);
1947 *p++ = 0x24;
1948 *p++ = toUChar(am->Xam.IR.imm & 0xFF);
1949 return p;
1950 }
1951 ppX86AMode(am);
1952 vpanic("doAMode_M: can't emit amode IR");
1953 /*NOTREACHED*/
1954 }
1955 if (am->tag == Xam_IRRS) {
1956 if (fits8bits(am->Xam.IRRS.imm)
1957 && ! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) {
1958 *p++ = mkModRegRM(1, iregNo(greg), 4);
1959 *p++ = mkSIB(am->Xam.IRRS.shift, iregNo(am->Xam.IRRS.index),
1960 iregNo(am->Xam.IRRS.base));
1961 *p++ = toUChar(am->Xam.IRRS.imm & 0xFF);
1962 return p;
1963 }
1964 if (! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) {
1965 *p++ = mkModRegRM(2, iregNo(greg), 4);
1966 *p++ = mkSIB(am->Xam.IRRS.shift, iregNo(am->Xam.IRRS.index),
1967 iregNo(am->Xam.IRRS.base));
1968 p = emit32(p, am->Xam.IRRS.imm);
1969 return p;
1970 }
1971 ppX86AMode(am);
1972 vpanic("doAMode_M: can't emit amode IRRS");
1973 /*NOTREACHED*/
1974 }
1975 vpanic("doAMode_M: unknown amode");
1976 /*NOTREACHED*/
1977 }
1978
1979
1980 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
doAMode_R(UChar * p,HReg greg,HReg ereg)1981 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
1982 {
1983 *p++ = mkModRegRM(3, iregNo(greg), iregNo(ereg));
1984 return p;
1985 }
1986
1987
1988 /* Emit ffree %st(7) */
do_ffree_st7(UChar * p)1989 static UChar* do_ffree_st7 ( UChar* p )
1990 {
1991 *p++ = 0xDD;
1992 *p++ = 0xC7;
1993 return p;
1994 }
1995
1996 /* Emit fstp %st(i), 1 <= i <= 7 */
do_fstp_st(UChar * p,Int i)1997 static UChar* do_fstp_st ( UChar* p, Int i )
1998 {
1999 vassert(1 <= i && i <= 7);
2000 *p++ = 0xDD;
2001 *p++ = toUChar(0xD8+i);
2002 return p;
2003 }
2004
2005 /* Emit fld %st(i), 0 <= i <= 6 */
do_fld_st(UChar * p,Int i)2006 static UChar* do_fld_st ( UChar* p, Int i )
2007 {
2008 vassert(0 <= i && i <= 6);
2009 *p++ = 0xD9;
2010 *p++ = toUChar(0xC0+i);
2011 return p;
2012 }
2013
2014 /* Emit f<op> %st(0) */
do_fop1_st(UChar * p,X86FpOp op)2015 static UChar* do_fop1_st ( UChar* p, X86FpOp op )
2016 {
2017 switch (op) {
2018 case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break;
2019 case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break;
2020 case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
2021 case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
2022 case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
2023 case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
2024 case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
2025 case Xfp_MOV: break;
2026 case Xfp_TAN:
2027 /* fptan pushes 1.0 on the FP stack, except when the argument
2028 is out of range. Hence we have to do the instruction,
2029 then inspect C2 to see if there is an out of range
2030 condition. If there is, we skip the fincstp that is used
2031 by the in-range case to get rid of this extra 1.0
2032 value. */
2033 p = do_ffree_st7(p); /* since fptan sometimes pushes 1.0 */
2034 *p++ = 0xD9; *p++ = 0xF2; // fptan
2035 *p++ = 0x50; // pushl %eax
2036 *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax
2037 *p++ = 0x66; *p++ = 0xA9;
2038 *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax
2039 *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp
2040 *p++ = 0xD9; *p++ = 0xF7; // fincstp
2041 *p++ = 0x58; // after_fincstp: popl %eax
2042 break;
2043 default:
2044 vpanic("do_fop1_st: unknown op");
2045 }
2046 return p;
2047 }
2048
2049 /* Emit f<op> %st(i), 1 <= i <= 5 */
do_fop2_st(UChar * p,X86FpOp op,Int i)2050 static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i )
2051 {
2052 # define fake(_n) mkHReg((_n), HRcInt32, False)
2053 Int subopc;
2054 switch (op) {
2055 case Xfp_ADD: subopc = 0; break;
2056 case Xfp_SUB: subopc = 4; break;
2057 case Xfp_MUL: subopc = 1; break;
2058 case Xfp_DIV: subopc = 6; break;
2059 default: vpanic("do_fop2_st: unknown op");
2060 }
2061 *p++ = 0xD8;
2062 p = doAMode_R(p, fake(subopc), fake(i));
2063 return p;
2064 # undef fake
2065 }
2066
2067 /* Push a 32-bit word on the stack. The word depends on tags[3:0];
2068 each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
2069 */
push_word_from_tags(UChar * p,UShort tags)2070 static UChar* push_word_from_tags ( UChar* p, UShort tags )
2071 {
2072 UInt w;
2073 vassert(0 == (tags & ~0xF));
2074 if (tags == 0) {
2075 /* pushl $0x00000000 */
2076 *p++ = 0x6A;
2077 *p++ = 0x00;
2078 }
2079 else
2080 /* pushl $0xFFFFFFFF */
2081 if (tags == 0xF) {
2082 *p++ = 0x6A;
2083 *p++ = 0xFF;
2084 } else {
2085 vassert(0); /* awaiting test case */
2086 w = 0;
2087 if (tags & 1) w |= 0x000000FF;
2088 if (tags & 2) w |= 0x0000FF00;
2089 if (tags & 4) w |= 0x00FF0000;
2090 if (tags & 8) w |= 0xFF000000;
2091 *p++ = 0x68;
2092 p = emit32(p, w);
2093 }
2094 return p;
2095 }
2096
2097 /* Emit an instruction into buf and return the number of bytes used.
2098 Note that buf is not the insn's final place, and therefore it is
2099 imperative to emit position-independent code. If the emitted
2100 instruction was a profiler inc, set *is_profInc to True, else
2101 leave it unchanged. */
2102
emit_X86Instr(Bool * is_profInc,UChar * buf,Int nbuf,X86Instr * i,Bool mode64,void * disp_cp_chain_me_to_slowEP,void * disp_cp_chain_me_to_fastEP,void * disp_cp_xindir,void * disp_cp_xassisted)2103 Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc,
2104 UChar* buf, Int nbuf, X86Instr* i,
2105 Bool mode64,
2106 void* disp_cp_chain_me_to_slowEP,
2107 void* disp_cp_chain_me_to_fastEP,
2108 void* disp_cp_xindir,
2109 void* disp_cp_xassisted )
2110 {
2111 UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
2112
2113 UInt xtra;
2114 UChar* p = &buf[0];
2115 UChar* ptmp;
2116 vassert(nbuf >= 32);
2117 vassert(mode64 == False);
2118
2119 /* Wrap an integer as a int register, for use assembling
2120 GrpN insns, in which the greg field is used as a sub-opcode
2121 and does not really contain a register. */
2122 # define fake(_n) mkHReg((_n), HRcInt32, False)
2123
2124 /* vex_printf("asm ");ppX86Instr(i, mode64); vex_printf("\n"); */
2125
2126 switch (i->tag) {
2127
2128 case Xin_Alu32R:
2129 /* Deal specially with MOV */
2130 if (i->Xin.Alu32R.op == Xalu_MOV) {
2131 switch (i->Xin.Alu32R.src->tag) {
2132 case Xrmi_Imm:
2133 *p++ = toUChar(0xB8 + iregNo(i->Xin.Alu32R.dst));
2134 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2135 goto done;
2136 case Xrmi_Reg:
2137 *p++ = 0x89;
2138 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
2139 i->Xin.Alu32R.dst);
2140 goto done;
2141 case Xrmi_Mem:
2142 *p++ = 0x8B;
2143 p = doAMode_M(p, i->Xin.Alu32R.dst,
2144 i->Xin.Alu32R.src->Xrmi.Mem.am);
2145 goto done;
2146 default:
2147 goto bad;
2148 }
2149 }
2150 /* MUL */
2151 if (i->Xin.Alu32R.op == Xalu_MUL) {
2152 switch (i->Xin.Alu32R.src->tag) {
2153 case Xrmi_Reg:
2154 *p++ = 0x0F;
2155 *p++ = 0xAF;
2156 p = doAMode_R(p, i->Xin.Alu32R.dst,
2157 i->Xin.Alu32R.src->Xrmi.Reg.reg);
2158 goto done;
2159 case Xrmi_Mem:
2160 *p++ = 0x0F;
2161 *p++ = 0xAF;
2162 p = doAMode_M(p, i->Xin.Alu32R.dst,
2163 i->Xin.Alu32R.src->Xrmi.Mem.am);
2164 goto done;
2165 case Xrmi_Imm:
2166 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2167 *p++ = 0x6B;
2168 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
2169 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2170 } else {
2171 *p++ = 0x69;
2172 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
2173 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2174 }
2175 goto done;
2176 default:
2177 goto bad;
2178 }
2179 }
2180 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2181 opc = opc_rr = subopc_imm = opc_imma = 0;
2182 switch (i->Xin.Alu32R.op) {
2183 case Xalu_ADC: opc = 0x13; opc_rr = 0x11;
2184 subopc_imm = 2; opc_imma = 0x15; break;
2185 case Xalu_ADD: opc = 0x03; opc_rr = 0x01;
2186 subopc_imm = 0; opc_imma = 0x05; break;
2187 case Xalu_SUB: opc = 0x2B; opc_rr = 0x29;
2188 subopc_imm = 5; opc_imma = 0x2D; break;
2189 case Xalu_SBB: opc = 0x1B; opc_rr = 0x19;
2190 subopc_imm = 3; opc_imma = 0x1D; break;
2191 case Xalu_AND: opc = 0x23; opc_rr = 0x21;
2192 subopc_imm = 4; opc_imma = 0x25; break;
2193 case Xalu_XOR: opc = 0x33; opc_rr = 0x31;
2194 subopc_imm = 6; opc_imma = 0x35; break;
2195 case Xalu_OR: opc = 0x0B; opc_rr = 0x09;
2196 subopc_imm = 1; opc_imma = 0x0D; break;
2197 case Xalu_CMP: opc = 0x3B; opc_rr = 0x39;
2198 subopc_imm = 7; opc_imma = 0x3D; break;
2199 default: goto bad;
2200 }
2201 switch (i->Xin.Alu32R.src->tag) {
2202 case Xrmi_Imm:
2203 if (sameHReg(i->Xin.Alu32R.dst, hregX86_EAX())
2204 && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2205 *p++ = toUChar(opc_imma);
2206 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2207 } else
2208 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2209 *p++ = 0x83;
2210 p = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst);
2211 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2212 } else {
2213 *p++ = 0x81;
2214 p = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst);
2215 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2216 }
2217 goto done;
2218 case Xrmi_Reg:
2219 *p++ = toUChar(opc_rr);
2220 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
2221 i->Xin.Alu32R.dst);
2222 goto done;
2223 case Xrmi_Mem:
2224 *p++ = toUChar(opc);
2225 p = doAMode_M(p, i->Xin.Alu32R.dst,
2226 i->Xin.Alu32R.src->Xrmi.Mem.am);
2227 goto done;
2228 default:
2229 goto bad;
2230 }
2231 break;
2232
2233 case Xin_Alu32M:
2234 /* Deal specially with MOV */
2235 if (i->Xin.Alu32M.op == Xalu_MOV) {
2236 switch (i->Xin.Alu32M.src->tag) {
2237 case Xri_Reg:
2238 *p++ = 0x89;
2239 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2240 i->Xin.Alu32M.dst);
2241 goto done;
2242 case Xri_Imm:
2243 *p++ = 0xC7;
2244 p = doAMode_M(p, fake(0), i->Xin.Alu32M.dst);
2245 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
2246 goto done;
2247 default:
2248 goto bad;
2249 }
2250 }
2251 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
2252 allowed here. */
2253 opc = subopc_imm = opc_imma = 0;
2254 switch (i->Xin.Alu32M.op) {
2255 case Xalu_ADD: opc = 0x01; subopc_imm = 0; break;
2256 case Xalu_SUB: opc = 0x29; subopc_imm = 5; break;
2257 case Xalu_CMP: opc = 0x39; subopc_imm = 7; break;
2258 default: goto bad;
2259 }
2260 switch (i->Xin.Alu32M.src->tag) {
2261 case Xri_Reg:
2262 *p++ = toUChar(opc);
2263 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2264 i->Xin.Alu32M.dst);
2265 goto done;
2266 case Xri_Imm:
2267 if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) {
2268 *p++ = 0x83;
2269 p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
2270 *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32);
2271 goto done;
2272 } else {
2273 *p++ = 0x81;
2274 p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
2275 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
2276 goto done;
2277 }
2278 default:
2279 goto bad;
2280 }
2281 break;
2282
2283 case Xin_Sh32:
2284 opc_cl = opc_imm = subopc = 0;
2285 switch (i->Xin.Sh32.op) {
2286 case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2287 case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2288 case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2289 default: goto bad;
2290 }
2291 if (i->Xin.Sh32.src == 0) {
2292 *p++ = toUChar(opc_cl);
2293 p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst);
2294 } else {
2295 *p++ = toUChar(opc_imm);
2296 p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst);
2297 *p++ = (UChar)(i->Xin.Sh32.src);
2298 }
2299 goto done;
2300
2301 case Xin_Test32:
2302 if (i->Xin.Test32.dst->tag == Xrm_Reg) {
2303 /* testl $imm32, %reg */
2304 *p++ = 0xF7;
2305 p = doAMode_R(p, fake(0), i->Xin.Test32.dst->Xrm.Reg.reg);
2306 p = emit32(p, i->Xin.Test32.imm32);
2307 goto done;
2308 } else {
2309 /* testl $imm32, amode */
2310 *p++ = 0xF7;
2311 p = doAMode_M(p, fake(0), i->Xin.Test32.dst->Xrm.Mem.am);
2312 p = emit32(p, i->Xin.Test32.imm32);
2313 goto done;
2314 }
2315
2316 case Xin_Unary32:
2317 if (i->Xin.Unary32.op == Xun_NOT) {
2318 *p++ = 0xF7;
2319 p = doAMode_R(p, fake(2), i->Xin.Unary32.dst);
2320 goto done;
2321 }
2322 if (i->Xin.Unary32.op == Xun_NEG) {
2323 *p++ = 0xF7;
2324 p = doAMode_R(p, fake(3), i->Xin.Unary32.dst);
2325 goto done;
2326 }
2327 break;
2328
2329 case Xin_Lea32:
2330 *p++ = 0x8D;
2331 p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am);
2332 goto done;
2333
2334 case Xin_MulL:
2335 subopc = i->Xin.MulL.syned ? 5 : 4;
2336 *p++ = 0xF7;
2337 switch (i->Xin.MulL.src->tag) {
2338 case Xrm_Mem:
2339 p = doAMode_M(p, fake(subopc),
2340 i->Xin.MulL.src->Xrm.Mem.am);
2341 goto done;
2342 case Xrm_Reg:
2343 p = doAMode_R(p, fake(subopc),
2344 i->Xin.MulL.src->Xrm.Reg.reg);
2345 goto done;
2346 default:
2347 goto bad;
2348 }
2349 break;
2350
2351 case Xin_Div:
2352 subopc = i->Xin.Div.syned ? 7 : 6;
2353 *p++ = 0xF7;
2354 switch (i->Xin.Div.src->tag) {
2355 case Xrm_Mem:
2356 p = doAMode_M(p, fake(subopc),
2357 i->Xin.Div.src->Xrm.Mem.am);
2358 goto done;
2359 case Xrm_Reg:
2360 p = doAMode_R(p, fake(subopc),
2361 i->Xin.Div.src->Xrm.Reg.reg);
2362 goto done;
2363 default:
2364 goto bad;
2365 }
2366 break;
2367
2368 case Xin_Sh3232:
2369 vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR);
2370 if (i->Xin.Sh3232.amt == 0) {
2371 /* shldl/shrdl by %cl */
2372 *p++ = 0x0F;
2373 if (i->Xin.Sh3232.op == Xsh_SHL) {
2374 *p++ = 0xA5;
2375 } else {
2376 *p++ = 0xAD;
2377 }
2378 p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst);
2379 goto done;
2380 }
2381 break;
2382
2383 case Xin_Push:
2384 switch (i->Xin.Push.src->tag) {
2385 case Xrmi_Mem:
2386 *p++ = 0xFF;
2387 p = doAMode_M(p, fake(6), i->Xin.Push.src->Xrmi.Mem.am);
2388 goto done;
2389 case Xrmi_Imm:
2390 *p++ = 0x68;
2391 p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32);
2392 goto done;
2393 case Xrmi_Reg:
2394 *p++ = toUChar(0x50 + iregNo(i->Xin.Push.src->Xrmi.Reg.reg));
2395 goto done;
2396 default:
2397 goto bad;
2398 }
2399
2400 case Xin_Call:
2401 if (i->Xin.Call.cond != Xcc_ALWAYS
2402 && i->Xin.Call.rloc.pri != RLPri_None) {
2403 /* The call might not happen (it isn't unconditional) and it
2404 returns a result. In this case we will need to generate a
2405 control flow diamond to put 0x555..555 in the return
2406 register(s) in the case where the call doesn't happen. If
2407 this ever becomes necessary, maybe copy code from the ARM
2408 equivalent. Until that day, just give up. */
2409 goto bad;
2410 }
2411 /* See detailed comment for Xin_Call in getRegUsage_X86Instr above
2412 for explanation of this. */
2413 switch (i->Xin.Call.regparms) {
2414 case 0: irno = iregNo(hregX86_EAX()); break;
2415 case 1: irno = iregNo(hregX86_EDX()); break;
2416 case 2: irno = iregNo(hregX86_ECX()); break;
2417 case 3: irno = iregNo(hregX86_EDI()); break;
2418 default: vpanic(" emit_X86Instr:call:regparms");
2419 }
2420 /* jump over the following two insns if the condition does not
2421 hold */
2422 if (i->Xin.Call.cond != Xcc_ALWAYS) {
2423 *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1)));
2424 *p++ = 0x07; /* 7 bytes in the next two insns */
2425 }
2426 /* movl $target, %tmp */
2427 *p++ = toUChar(0xB8 + irno);
2428 p = emit32(p, i->Xin.Call.target);
2429 /* call *%tmp */
2430 *p++ = 0xFF;
2431 *p++ = toUChar(0xD0 + irno);
2432 goto done;
2433
2434 case Xin_XDirect: {
2435 /* NB: what goes on here has to be very closely coordinated with the
2436 chainXDirect_X86 and unchainXDirect_X86 below. */
2437 /* We're generating chain-me requests here, so we need to be
2438 sure this is actually allowed -- no-redir translations can't
2439 use chain-me's. Hence: */
2440 vassert(disp_cp_chain_me_to_slowEP != NULL);
2441 vassert(disp_cp_chain_me_to_fastEP != NULL);
2442
2443 /* Use ptmp for backpatching conditional jumps. */
2444 ptmp = NULL;
2445
2446 /* First off, if this is conditional, create a conditional
2447 jump over the rest of it. */
2448 if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
2449 /* jmp fwds if !condition */
2450 *p++ = toUChar(0x70 + (0xF & (i->Xin.XDirect.cond ^ 1)));
2451 ptmp = p; /* fill in this bit later */
2452 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2453 }
2454
2455 /* Update the guest EIP. */
2456 /* movl $dstGA, amEIP */
2457 *p++ = 0xC7;
2458 p = doAMode_M(p, fake(0), i->Xin.XDirect.amEIP);
2459 p = emit32(p, i->Xin.XDirect.dstGA);
2460
2461 /* --- FIRST PATCHABLE BYTE follows --- */
2462 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
2463 to) backs up the return address, so as to find the address of
2464 the first patchable byte. So: don't change the length of the
2465 two instructions below. */
2466 /* movl $disp_cp_chain_me_to_{slow,fast}EP,%edx; */
2467 *p++ = 0xBA;
2468 void* disp_cp_chain_me
2469 = i->Xin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
2470 : disp_cp_chain_me_to_slowEP;
2471 p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_chain_me));
2472 /* call *%edx */
2473 *p++ = 0xFF;
2474 *p++ = 0xD2;
2475 /* --- END of PATCHABLE BYTES --- */
2476
2477 /* Fix up the conditional jump, if there was one. */
2478 if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
2479 Int delta = p - ptmp;
2480 vassert(delta > 0 && delta < 40);
2481 *ptmp = toUChar(delta-1);
2482 }
2483 goto done;
2484 }
2485
2486 case Xin_XIndir: {
2487 /* We're generating transfers that could lead indirectly to a
2488 chain-me, so we need to be sure this is actually allowed --
2489 no-redir translations are not allowed to reach normal
2490 translations without going through the scheduler. That means
2491 no XDirects or XIndirs out from no-redir translations.
2492 Hence: */
2493 vassert(disp_cp_xindir != NULL);
2494
2495 /* Use ptmp for backpatching conditional jumps. */
2496 ptmp = NULL;
2497
2498 /* First off, if this is conditional, create a conditional
2499 jump over the rest of it. */
2500 if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
2501 /* jmp fwds if !condition */
2502 *p++ = toUChar(0x70 + (0xF & (i->Xin.XIndir.cond ^ 1)));
2503 ptmp = p; /* fill in this bit later */
2504 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2505 }
2506
2507 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
2508 *p++ = 0x89;
2509 p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
2510
2511 /* movl $disp_indir, %edx */
2512 *p++ = 0xBA;
2513 p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xindir));
2514 /* jmp *%edx */
2515 *p++ = 0xFF;
2516 *p++ = 0xE2;
2517
2518 /* Fix up the conditional jump, if there was one. */
2519 if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
2520 Int delta = p - ptmp;
2521 vassert(delta > 0 && delta < 40);
2522 *ptmp = toUChar(delta-1);
2523 }
2524 goto done;
2525 }
2526
2527 case Xin_XAssisted: {
2528 /* Use ptmp for backpatching conditional jumps. */
2529 ptmp = NULL;
2530
2531 /* First off, if this is conditional, create a conditional
2532 jump over the rest of it. */
2533 if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
2534 /* jmp fwds if !condition */
2535 *p++ = toUChar(0x70 + (0xF & (i->Xin.XAssisted.cond ^ 1)));
2536 ptmp = p; /* fill in this bit later */
2537 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2538 }
2539
2540 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
2541 *p++ = 0x89;
2542 p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
2543 /* movl $magic_number, %ebp. */
2544 UInt trcval = 0;
2545 switch (i->Xin.XAssisted.jk) {
2546 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
2547 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
2548 case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
2549 case Ijk_Sys_int129: trcval = VEX_TRC_JMP_SYS_INT129; break;
2550 case Ijk_Sys_int130: trcval = VEX_TRC_JMP_SYS_INT130; break;
2551 case Ijk_Sys_sysenter: trcval = VEX_TRC_JMP_SYS_SYSENTER; break;
2552 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
2553 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
2554 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
2555 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
2556 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
2557 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
2558 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
2559 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
2560 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
2561 /* We don't expect to see the following being assisted. */
2562 case Ijk_Ret:
2563 case Ijk_Call:
2564 /* fallthrough */
2565 default:
2566 ppIRJumpKind(i->Xin.XAssisted.jk);
2567 vpanic("emit_X86Instr.Xin_XAssisted: unexpected jump kind");
2568 }
2569 vassert(trcval != 0);
2570 *p++ = 0xBD;
2571 p = emit32(p, trcval);
2572
2573 /* movl $disp_indir, %edx */
2574 *p++ = 0xBA;
2575 p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xassisted));
2576 /* jmp *%edx */
2577 *p++ = 0xFF;
2578 *p++ = 0xE2;
2579
2580 /* Fix up the conditional jump, if there was one. */
2581 if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
2582 Int delta = p - ptmp;
2583 vassert(delta > 0 && delta < 40);
2584 *ptmp = toUChar(delta-1);
2585 }
2586 goto done;
2587 }
2588
2589 case Xin_CMov32:
2590 vassert(i->Xin.CMov32.cond != Xcc_ALWAYS);
2591
2592 /* This generates cmov, which is illegal on P54/P55. */
2593 /*
2594 *p++ = 0x0F;
2595 *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond));
2596 if (i->Xin.CMov32.src->tag == Xrm_Reg) {
2597 p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg);
2598 goto done;
2599 }
2600 if (i->Xin.CMov32.src->tag == Xrm_Mem) {
2601 p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am);
2602 goto done;
2603 }
2604 */
2605
2606 /* Alternative version which works on any x86 variant. */
2607 /* jmp fwds if !condition */
2608 *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1));
2609 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
2610 ptmp = p;
2611
2612 switch (i->Xin.CMov32.src->tag) {
2613 case Xrm_Reg:
2614 /* Big sigh. This is movl E -> G ... */
2615 *p++ = 0x89;
2616 p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg,
2617 i->Xin.CMov32.dst);
2618
2619 break;
2620 case Xrm_Mem:
2621 /* ... whereas this is movl G -> E. That's why the args
2622 to doAMode_R appear to be the wrong way round in the
2623 Xrm_Reg case. */
2624 *p++ = 0x8B;
2625 p = doAMode_M(p, i->Xin.CMov32.dst,
2626 i->Xin.CMov32.src->Xrm.Mem.am);
2627 break;
2628 default:
2629 goto bad;
2630 }
2631 /* Fill in the jump offset. */
2632 *(ptmp-1) = toUChar(p - ptmp);
2633 goto done;
2634
2635 break;
2636
2637 case Xin_LoadEX:
2638 if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) {
2639 /* movzbl */
2640 *p++ = 0x0F;
2641 *p++ = 0xB6;
2642 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2643 goto done;
2644 }
2645 if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) {
2646 /* movzwl */
2647 *p++ = 0x0F;
2648 *p++ = 0xB7;
2649 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2650 goto done;
2651 }
2652 if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) {
2653 /* movsbl */
2654 *p++ = 0x0F;
2655 *p++ = 0xBE;
2656 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2657 goto done;
2658 }
2659 break;
2660
2661 case Xin_Set32:
2662 /* Make the destination register be 1 or 0, depending on whether
2663 the relevant condition holds. We have to dodge and weave
2664 when the destination is %esi or %edi as we cannot directly
2665 emit the native 'setb %reg' for those. Further complication:
2666 the top 24 bits of the destination should be forced to zero,
2667 but doing 'xor %r,%r' kills the flag(s) we are about to read.
2668 Sigh. So start off my moving $0 into the dest. */
2669
2670 /* Do we need to swap in %eax? */
2671 if (iregNo(i->Xin.Set32.dst) >= 4) {
2672 /* xchg %eax, %dst */
2673 *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst));
2674 /* movl $0, %eax */
2675 *p++ =toUChar(0xB8 + iregNo(hregX86_EAX()));
2676 p = emit32(p, 0);
2677 /* setb lo8(%eax) */
2678 *p++ = 0x0F;
2679 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
2680 p = doAMode_R(p, fake(0), hregX86_EAX());
2681 /* xchg %eax, %dst */
2682 *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst));
2683 } else {
2684 /* movl $0, %dst */
2685 *p++ = toUChar(0xB8 + iregNo(i->Xin.Set32.dst));
2686 p = emit32(p, 0);
2687 /* setb lo8(%dst) */
2688 *p++ = 0x0F;
2689 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
2690 p = doAMode_R(p, fake(0), i->Xin.Set32.dst);
2691 }
2692 goto done;
2693
2694 case Xin_Bsfr32:
2695 *p++ = 0x0F;
2696 if (i->Xin.Bsfr32.isFwds) {
2697 *p++ = 0xBC;
2698 } else {
2699 *p++ = 0xBD;
2700 }
2701 p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src);
2702 goto done;
2703
2704 case Xin_MFence:
2705 /* see comment in hdefs.h re this insn */
2706 if (0) vex_printf("EMIT FENCE\n");
2707 if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3
2708 |VEX_HWCAPS_X86_SSE2)) {
2709 /* mfence */
2710 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
2711 goto done;
2712 }
2713 if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_MMXEXT) {
2714 /* sfence */
2715 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8;
2716 /* lock addl $0,0(%esp) */
2717 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
2718 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
2719 goto done;
2720 }
2721 if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) {
2722 /* lock addl $0,0(%esp) */
2723 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
2724 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
2725 goto done;
2726 }
2727 vpanic("emit_X86Instr:mfence:hwcaps");
2728 /*NOTREACHED*/
2729 break;
2730
2731 case Xin_ACAS:
2732 /* lock */
2733 *p++ = 0xF0;
2734 /* cmpxchg{b,w,l} %ebx,mem. Expected-value in %eax, new value
2735 in %ebx. The new-value register is hardwired to be %ebx
2736 since letting it be any integer register gives the problem
2737 that %sil and %dil are unaddressible on x86 and hence we
2738 would have to resort to the same kind of trickery as with
2739 byte-sized Xin.Store, just below. Given that this isn't
2740 performance critical, it is simpler just to force the
2741 register operand to %ebx (could equally be %ecx or %edx).
2742 (Although %ebx is more consistent with cmpxchg8b.) */
2743 if (i->Xin.ACAS.sz == 2) *p++ = 0x66;
2744 *p++ = 0x0F;
2745 if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
2746 p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr);
2747 goto done;
2748
2749 case Xin_DACAS:
2750 /* lock */
2751 *p++ = 0xF0;
2752 /* cmpxchg8b m64. Expected-value in %edx:%eax, new value
2753 in %ecx:%ebx. All 4 regs are hardwired in the ISA, so
2754 aren't encoded in the insn. */
2755 *p++ = 0x0F;
2756 *p++ = 0xC7;
2757 p = doAMode_M(p, fake(1), i->Xin.DACAS.addr);
2758 goto done;
2759
2760 case Xin_Store:
2761 if (i->Xin.Store.sz == 2) {
2762 /* This case, at least, is simple, given that we can
2763 reference the low 16 bits of any integer register. */
2764 *p++ = 0x66;
2765 *p++ = 0x89;
2766 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
2767 goto done;
2768 }
2769
2770 if (i->Xin.Store.sz == 1) {
2771 /* We have to do complex dodging and weaving if src is not
2772 the low 8 bits of %eax/%ebx/%ecx/%edx. */
2773 if (iregNo(i->Xin.Store.src) < 4) {
2774 /* we're OK, can do it directly */
2775 *p++ = 0x88;
2776 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
2777 goto done;
2778 } else {
2779 /* Bleh. This means the source is %edi or %esi. Since
2780 the address mode can only mention three registers, at
2781 least one of %eax/%ebx/%ecx/%edx must be available to
2782 temporarily swap the source into, so the store can
2783 happen. So we have to look at the regs mentioned
2784 in the amode. */
2785 HReg swap = INVALID_HREG;
2786 HReg eax = hregX86_EAX(), ebx = hregX86_EBX(),
2787 ecx = hregX86_ECX(), edx = hregX86_EDX();
2788 Bool a_ok = True, b_ok = True, c_ok = True, d_ok = True;
2789 HRegUsage u;
2790 Int j;
2791 initHRegUsage(&u);
2792 addRegUsage_X86AMode(&u, i->Xin.Store.dst);
2793 for (j = 0; j < u.n_used; j++) {
2794 HReg r = u.hreg[j];
2795 if (sameHReg(r, eax)) a_ok = False;
2796 if (sameHReg(r, ebx)) b_ok = False;
2797 if (sameHReg(r, ecx)) c_ok = False;
2798 if (sameHReg(r, edx)) d_ok = False;
2799 }
2800 if (a_ok) swap = eax;
2801 if (b_ok) swap = ebx;
2802 if (c_ok) swap = ecx;
2803 if (d_ok) swap = edx;
2804 vassert(! hregIsInvalid(swap));
2805 /* xchgl %source, %swap. Could do better if swap is %eax. */
2806 *p++ = 0x87;
2807 p = doAMode_R(p, i->Xin.Store.src, swap);
2808 /* movb lo8{%swap}, (dst) */
2809 *p++ = 0x88;
2810 p = doAMode_M(p, swap, i->Xin.Store.dst);
2811 /* xchgl %source, %swap. Could do better if swap is %eax. */
2812 *p++ = 0x87;
2813 p = doAMode_R(p, i->Xin.Store.src, swap);
2814 goto done;
2815 }
2816 } /* if (i->Xin.Store.sz == 1) */
2817 break;
2818
2819 case Xin_FpUnary:
2820 /* gop %src, %dst
2821 --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
2822 */
2823 p = do_ffree_st7(p);
2824 p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src));
2825 p = do_fop1_st(p, i->Xin.FpUnary.op);
2826 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst));
2827 goto done;
2828
2829 case Xin_FpBinary:
2830 if (i->Xin.FpBinary.op == Xfp_YL2X
2831 || i->Xin.FpBinary.op == Xfp_YL2XP1) {
2832 /* Have to do this specially. */
2833 /* ffree %st7 ; fld %st(srcL) ;
2834 ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
2835 p = do_ffree_st7(p);
2836 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
2837 p = do_ffree_st7(p);
2838 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
2839 *p++ = 0xD9;
2840 *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9);
2841 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
2842 goto done;
2843 }
2844 if (i->Xin.FpBinary.op == Xfp_ATAN) {
2845 /* Have to do this specially. */
2846 /* ffree %st7 ; fld %st(srcL) ;
2847 ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
2848 p = do_ffree_st7(p);
2849 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
2850 p = do_ffree_st7(p);
2851 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
2852 *p++ = 0xD9; *p++ = 0xF3;
2853 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
2854 goto done;
2855 }
2856 if (i->Xin.FpBinary.op == Xfp_PREM
2857 || i->Xin.FpBinary.op == Xfp_PREM1
2858 || i->Xin.FpBinary.op == Xfp_SCALE) {
2859 /* Have to do this specially. */
2860 /* ffree %st7 ; fld %st(srcR) ;
2861 ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
2862 fincstp ; ffree %st7 */
2863 p = do_ffree_st7(p);
2864 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR));
2865 p = do_ffree_st7(p);
2866 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL));
2867 *p++ = 0xD9;
2868 switch (i->Xin.FpBinary.op) {
2869 case Xfp_PREM: *p++ = 0xF8; break;
2870 case Xfp_PREM1: *p++ = 0xF5; break;
2871 case Xfp_SCALE: *p++ = 0xFD; break;
2872 default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)");
2873 }
2874 p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst));
2875 *p++ = 0xD9; *p++ = 0xF7;
2876 p = do_ffree_st7(p);
2877 goto done;
2878 }
2879 /* General case */
2880 /* gop %srcL, %srcR, %dst
2881 --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
2882 */
2883 p = do_ffree_st7(p);
2884 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
2885 p = do_fop2_st(p, i->Xin.FpBinary.op,
2886 1+hregNumber(i->Xin.FpBinary.srcR));
2887 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
2888 goto done;
2889
2890 case Xin_FpLdSt:
2891 if (i->Xin.FpLdSt.isLoad) {
2892 /* Load from memory into %fakeN.
2893 --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1)
2894 */
2895 p = do_ffree_st7(p);
2896 switch (i->Xin.FpLdSt.sz) {
2897 case 4:
2898 *p++ = 0xD9;
2899 p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
2900 break;
2901 case 8:
2902 *p++ = 0xDD;
2903 p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
2904 break;
2905 case 10:
2906 *p++ = 0xDB;
2907 p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdSt.addr);
2908 break;
2909 default:
2910 vpanic("emitX86Instr(FpLdSt,load)");
2911 }
2912 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg));
2913 goto done;
2914 } else {
2915 /* Store from %fakeN into memory.
2916 --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
2917 */
2918 p = do_ffree_st7(p);
2919 p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg));
2920 switch (i->Xin.FpLdSt.sz) {
2921 case 4:
2922 *p++ = 0xD9;
2923 p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
2924 break;
2925 case 8:
2926 *p++ = 0xDD;
2927 p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
2928 break;
2929 case 10:
2930 *p++ = 0xDB;
2931 p = doAMode_M(p, fake(7)/*subopcode*/, i->Xin.FpLdSt.addr);
2932 break;
2933 default:
2934 vpanic("emitX86Instr(FpLdSt,store)");
2935 }
2936 goto done;
2937 }
2938 break;
2939
2940 case Xin_FpLdStI:
2941 if (i->Xin.FpLdStI.isLoad) {
2942 /* Load from memory into %fakeN, converting from an int.
2943 --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
2944 */
2945 switch (i->Xin.FpLdStI.sz) {
2946 case 8: opc = 0xDF; subopc_imm = 5; break;
2947 case 4: opc = 0xDB; subopc_imm = 0; break;
2948 case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break;
2949 default: vpanic("emitX86Instr(Xin_FpLdStI-load)");
2950 }
2951 p = do_ffree_st7(p);
2952 *p++ = toUChar(opc);
2953 p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
2954 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg));
2955 goto done;
2956 } else {
2957 /* Store from %fakeN into memory, converting to an int.
2958 --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
2959 */
2960 switch (i->Xin.FpLdStI.sz) {
2961 case 8: opc = 0xDF; subopc_imm = 7; break;
2962 case 4: opc = 0xDB; subopc_imm = 3; break;
2963 case 2: opc = 0xDF; subopc_imm = 3; break;
2964 default: vpanic("emitX86Instr(Xin_FpLdStI-store)");
2965 }
2966 p = do_ffree_st7(p);
2967 p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg));
2968 *p++ = toUChar(opc);
2969 p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
2970 goto done;
2971 }
2972 break;
2973
2974 case Xin_Fp64to32:
2975 /* ffree %st7 ; fld %st(src) */
2976 p = do_ffree_st7(p);
2977 p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src));
2978 /* subl $4, %esp */
2979 *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04;
2980 /* fstps (%esp) */
2981 *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24;
2982 /* flds (%esp) */
2983 *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24;
2984 /* addl $4, %esp */
2985 *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04;
2986 /* fstp %st(1+dst) */
2987 p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst));
2988 goto done;
2989
2990 case Xin_FpCMov:
2991 /* jmp fwds if !condition */
2992 *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1));
2993 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
2994 ptmp = p;
2995
2996 /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
2997 p = do_ffree_st7(p);
2998 p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src));
2999 p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst));
3000
3001 /* Fill in the jump offset. */
3002 *(ptmp-1) = toUChar(p - ptmp);
3003 goto done;
3004
3005 case Xin_FpLdCW:
3006 *p++ = 0xD9;
3007 p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdCW.addr);
3008 goto done;
3009
3010 case Xin_FpStSW_AX:
3011 /* note, this emits fnstsw %ax, not fstsw %ax */
3012 *p++ = 0xDF;
3013 *p++ = 0xE0;
3014 goto done;
3015
3016 case Xin_FpCmp:
3017 /* gcmp %fL, %fR, %dst
3018 -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
3019 fnstsw %ax ; movl %eax, %dst
3020 */
3021 /* ffree %st7 */
3022 p = do_ffree_st7(p);
3023 /* fpush %fL */
3024 p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL));
3025 /* fucomp %(fR+1) */
3026 *p++ = 0xDD;
3027 *p++ = toUChar(0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR))));
3028 /* fnstsw %ax */
3029 *p++ = 0xDF;
3030 *p++ = 0xE0;
3031 /* movl %eax, %dst */
3032 *p++ = 0x89;
3033 p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst);
3034 goto done;
3035
3036 case Xin_SseConst: {
3037 UShort con = i->Xin.SseConst.con;
3038 p = push_word_from_tags(p, toUShort((con >> 12) & 0xF));
3039 p = push_word_from_tags(p, toUShort((con >> 8) & 0xF));
3040 p = push_word_from_tags(p, toUShort((con >> 4) & 0xF));
3041 p = push_word_from_tags(p, toUShort(con & 0xF));
3042 /* movl (%esp), %xmm-dst */
3043 *p++ = 0x0F;
3044 *p++ = 0x10;
3045 *p++ = toUChar(0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst)));
3046 *p++ = 0x24;
3047 /* addl $16, %esp */
3048 *p++ = 0x83;
3049 *p++ = 0xC4;
3050 *p++ = 0x10;
3051 goto done;
3052 }
3053
3054 case Xin_SseLdSt:
3055 *p++ = 0x0F;
3056 *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11);
3057 p = doAMode_M(p, fake(vregNo(i->Xin.SseLdSt.reg)), i->Xin.SseLdSt.addr);
3058 goto done;
3059
3060 case Xin_SseLdzLO:
3061 vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8);
3062 /* movs[sd] amode, %xmm-dst */
3063 *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
3064 *p++ = 0x0F;
3065 *p++ = 0x10;
3066 p = doAMode_M(p, fake(vregNo(i->Xin.SseLdzLO.reg)),
3067 i->Xin.SseLdzLO.addr);
3068 goto done;
3069
3070 case Xin_Sse32Fx4:
3071 xtra = 0;
3072 *p++ = 0x0F;
3073 switch (i->Xin.Sse32Fx4.op) {
3074 case Xsse_ADDF: *p++ = 0x58; break;
3075 case Xsse_DIVF: *p++ = 0x5E; break;
3076 case Xsse_MAXF: *p++ = 0x5F; break;
3077 case Xsse_MINF: *p++ = 0x5D; break;
3078 case Xsse_MULF: *p++ = 0x59; break;
3079 case Xsse_RCPF: *p++ = 0x53; break;
3080 case Xsse_RSQRTF: *p++ = 0x52; break;
3081 case Xsse_SQRTF: *p++ = 0x51; break;
3082 case Xsse_SUBF: *p++ = 0x5C; break;
3083 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3084 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3085 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3086 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3087 default: goto bad;
3088 }
3089 p = doAMode_R(p, fake(vregNo(i->Xin.Sse32Fx4.dst)),
3090 fake(vregNo(i->Xin.Sse32Fx4.src)) );
3091 if (xtra & 0x100)
3092 *p++ = toUChar(xtra & 0xFF);
3093 goto done;
3094
3095 case Xin_Sse64Fx2:
3096 xtra = 0;
3097 *p++ = 0x66;
3098 *p++ = 0x0F;
3099 switch (i->Xin.Sse64Fx2.op) {
3100 case Xsse_ADDF: *p++ = 0x58; break;
3101 case Xsse_DIVF: *p++ = 0x5E; break;
3102 case Xsse_MAXF: *p++ = 0x5F; break;
3103 case Xsse_MINF: *p++ = 0x5D; break;
3104 case Xsse_MULF: *p++ = 0x59; break;
3105 case Xsse_RCPF: *p++ = 0x53; break;
3106 case Xsse_RSQRTF: *p++ = 0x52; break;
3107 case Xsse_SQRTF: *p++ = 0x51; break;
3108 case Xsse_SUBF: *p++ = 0x5C; break;
3109 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3110 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3111 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3112 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3113 default: goto bad;
3114 }
3115 p = doAMode_R(p, fake(vregNo(i->Xin.Sse64Fx2.dst)),
3116 fake(vregNo(i->Xin.Sse64Fx2.src)) );
3117 if (xtra & 0x100)
3118 *p++ = toUChar(xtra & 0xFF);
3119 goto done;
3120
3121 case Xin_Sse32FLo:
3122 xtra = 0;
3123 *p++ = 0xF3;
3124 *p++ = 0x0F;
3125 switch (i->Xin.Sse32FLo.op) {
3126 case Xsse_ADDF: *p++ = 0x58; break;
3127 case Xsse_DIVF: *p++ = 0x5E; break;
3128 case Xsse_MAXF: *p++ = 0x5F; break;
3129 case Xsse_MINF: *p++ = 0x5D; break;
3130 case Xsse_MULF: *p++ = 0x59; break;
3131 case Xsse_RCPF: *p++ = 0x53; break;
3132 case Xsse_RSQRTF: *p++ = 0x52; break;
3133 case Xsse_SQRTF: *p++ = 0x51; break;
3134 case Xsse_SUBF: *p++ = 0x5C; break;
3135 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3136 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3137 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3138 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3139 default: goto bad;
3140 }
3141 p = doAMode_R(p, fake(vregNo(i->Xin.Sse32FLo.dst)),
3142 fake(vregNo(i->Xin.Sse32FLo.src)) );
3143 if (xtra & 0x100)
3144 *p++ = toUChar(xtra & 0xFF);
3145 goto done;
3146
3147 case Xin_Sse64FLo:
3148 xtra = 0;
3149 *p++ = 0xF2;
3150 *p++ = 0x0F;
3151 switch (i->Xin.Sse64FLo.op) {
3152 case Xsse_ADDF: *p++ = 0x58; break;
3153 case Xsse_DIVF: *p++ = 0x5E; break;
3154 case Xsse_MAXF: *p++ = 0x5F; break;
3155 case Xsse_MINF: *p++ = 0x5D; break;
3156 case Xsse_MULF: *p++ = 0x59; break;
3157 case Xsse_RCPF: *p++ = 0x53; break;
3158 case Xsse_RSQRTF: *p++ = 0x52; break;
3159 case Xsse_SQRTF: *p++ = 0x51; break;
3160 case Xsse_SUBF: *p++ = 0x5C; break;
3161 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3162 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3163 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3164 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3165 default: goto bad;
3166 }
3167 p = doAMode_R(p, fake(vregNo(i->Xin.Sse64FLo.dst)),
3168 fake(vregNo(i->Xin.Sse64FLo.src)) );
3169 if (xtra & 0x100)
3170 *p++ = toUChar(xtra & 0xFF);
3171 goto done;
3172
3173 case Xin_SseReRg:
3174 # define XX(_n) *p++ = (_n)
3175 switch (i->Xin.SseReRg.op) {
3176 case Xsse_MOV: /*movups*/ XX(0x0F); XX(0x10); break;
3177 case Xsse_OR: XX(0x0F); XX(0x56); break;
3178 case Xsse_XOR: XX(0x0F); XX(0x57); break;
3179 case Xsse_AND: XX(0x0F); XX(0x54); break;
3180 case Xsse_PACKSSD: XX(0x66); XX(0x0F); XX(0x6B); break;
3181 case Xsse_PACKSSW: XX(0x66); XX(0x0F); XX(0x63); break;
3182 case Xsse_PACKUSW: XX(0x66); XX(0x0F); XX(0x67); break;
3183 case Xsse_ADD8: XX(0x66); XX(0x0F); XX(0xFC); break;
3184 case Xsse_ADD16: XX(0x66); XX(0x0F); XX(0xFD); break;
3185 case Xsse_ADD32: XX(0x66); XX(0x0F); XX(0xFE); break;
3186 case Xsse_ADD64: XX(0x66); XX(0x0F); XX(0xD4); break;
3187 case Xsse_QADD8S: XX(0x66); XX(0x0F); XX(0xEC); break;
3188 case Xsse_QADD16S: XX(0x66); XX(0x0F); XX(0xED); break;
3189 case Xsse_QADD8U: XX(0x66); XX(0x0F); XX(0xDC); break;
3190 case Xsse_QADD16U: XX(0x66); XX(0x0F); XX(0xDD); break;
3191 case Xsse_AVG8U: XX(0x66); XX(0x0F); XX(0xE0); break;
3192 case Xsse_AVG16U: XX(0x66); XX(0x0F); XX(0xE3); break;
3193 case Xsse_CMPEQ8: XX(0x66); XX(0x0F); XX(0x74); break;
3194 case Xsse_CMPEQ16: XX(0x66); XX(0x0F); XX(0x75); break;
3195 case Xsse_CMPEQ32: XX(0x66); XX(0x0F); XX(0x76); break;
3196 case Xsse_CMPGT8S: XX(0x66); XX(0x0F); XX(0x64); break;
3197 case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break;
3198 case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break;
3199 case Xsse_MAX16S: XX(0x66); XX(0x0F); XX(0xEE); break;
3200 case Xsse_MAX8U: XX(0x66); XX(0x0F); XX(0xDE); break;
3201 case Xsse_MIN16S: XX(0x66); XX(0x0F); XX(0xEA); break;
3202 case Xsse_MIN8U: XX(0x66); XX(0x0F); XX(0xDA); break;
3203 case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break;
3204 case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break;
3205 case Xsse_MUL16: XX(0x66); XX(0x0F); XX(0xD5); break;
3206 case Xsse_SHL16: XX(0x66); XX(0x0F); XX(0xF1); break;
3207 case Xsse_SHL32: XX(0x66); XX(0x0F); XX(0xF2); break;
3208 case Xsse_SHL64: XX(0x66); XX(0x0F); XX(0xF3); break;
3209 case Xsse_SAR16: XX(0x66); XX(0x0F); XX(0xE1); break;
3210 case Xsse_SAR32: XX(0x66); XX(0x0F); XX(0xE2); break;
3211 case Xsse_SHR16: XX(0x66); XX(0x0F); XX(0xD1); break;
3212 case Xsse_SHR32: XX(0x66); XX(0x0F); XX(0xD2); break;
3213 case Xsse_SHR64: XX(0x66); XX(0x0F); XX(0xD3); break;
3214 case Xsse_SUB8: XX(0x66); XX(0x0F); XX(0xF8); break;
3215 case Xsse_SUB16: XX(0x66); XX(0x0F); XX(0xF9); break;
3216 case Xsse_SUB32: XX(0x66); XX(0x0F); XX(0xFA); break;
3217 case Xsse_SUB64: XX(0x66); XX(0x0F); XX(0xFB); break;
3218 case Xsse_QSUB8S: XX(0x66); XX(0x0F); XX(0xE8); break;
3219 case Xsse_QSUB16S: XX(0x66); XX(0x0F); XX(0xE9); break;
3220 case Xsse_QSUB8U: XX(0x66); XX(0x0F); XX(0xD8); break;
3221 case Xsse_QSUB16U: XX(0x66); XX(0x0F); XX(0xD9); break;
3222 case Xsse_UNPCKHB: XX(0x66); XX(0x0F); XX(0x68); break;
3223 case Xsse_UNPCKHW: XX(0x66); XX(0x0F); XX(0x69); break;
3224 case Xsse_UNPCKHD: XX(0x66); XX(0x0F); XX(0x6A); break;
3225 case Xsse_UNPCKHQ: XX(0x66); XX(0x0F); XX(0x6D); break;
3226 case Xsse_UNPCKLB: XX(0x66); XX(0x0F); XX(0x60); break;
3227 case Xsse_UNPCKLW: XX(0x66); XX(0x0F); XX(0x61); break;
3228 case Xsse_UNPCKLD: XX(0x66); XX(0x0F); XX(0x62); break;
3229 case Xsse_UNPCKLQ: XX(0x66); XX(0x0F); XX(0x6C); break;
3230 default: goto bad;
3231 }
3232 p = doAMode_R(p, fake(vregNo(i->Xin.SseReRg.dst)),
3233 fake(vregNo(i->Xin.SseReRg.src)) );
3234 # undef XX
3235 goto done;
3236
3237 case Xin_SseCMov:
3238 /* jmp fwds if !condition */
3239 *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1));
3240 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3241 ptmp = p;
3242
3243 /* movaps %src, %dst */
3244 *p++ = 0x0F;
3245 *p++ = 0x28;
3246 p = doAMode_R(p, fake(vregNo(i->Xin.SseCMov.dst)),
3247 fake(vregNo(i->Xin.SseCMov.src)) );
3248
3249 /* Fill in the jump offset. */
3250 *(ptmp-1) = toUChar(p - ptmp);
3251 goto done;
3252
3253 case Xin_SseShuf:
3254 *p++ = 0x66;
3255 *p++ = 0x0F;
3256 *p++ = 0x70;
3257 p = doAMode_R(p, fake(vregNo(i->Xin.SseShuf.dst)),
3258 fake(vregNo(i->Xin.SseShuf.src)) );
3259 *p++ = (UChar)(i->Xin.SseShuf.order);
3260 goto done;
3261
3262 case Xin_EvCheck: {
3263 /* We generate:
3264 (3 bytes) decl 4(%ebp) 4 == offsetof(host_EvC_COUNTER)
3265 (2 bytes) jns nofail expected taken
3266 (3 bytes) jmp* 0(%ebp) 0 == offsetof(host_EvC_FAILADDR)
3267 nofail:
3268 */
3269 /* This is heavily asserted re instruction lengths. It needs to
3270 be. If we get given unexpected forms of .amCounter or
3271 .amFailAddr -- basically, anything that's not of the form
3272 uimm7(%ebp) -- they are likely to fail. */
3273 /* Note also that after the decl we must be very careful not to
3274 read the carry flag, else we get a partial flags stall.
3275 js/jns avoids that, though. */
3276 UChar* p0 = p;
3277 /* --- decl 8(%ebp) --- */
3278 /* "fake(1)" because + there's no register in this encoding;
3279 instead the register + field is used as a sub opcode. The
3280 encoding for "decl r/m32" + is FF /1, hence the fake(1). */
3281 *p++ = 0xFF;
3282 p = doAMode_M(p, fake(1), i->Xin.EvCheck.amCounter);
3283 vassert(p - p0 == 3);
3284 /* --- jns nofail --- */
3285 *p++ = 0x79;
3286 *p++ = 0x03; /* need to check this 0x03 after the next insn */
3287 vassert(p - p0 == 5);
3288 /* --- jmp* 0(%ebp) --- */
3289 /* The encoding is FF /4. */
3290 *p++ = 0xFF;
3291 p = doAMode_M(p, fake(4), i->Xin.EvCheck.amFailAddr);
3292 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
3293 /* And crosscheck .. */
3294 vassert(evCheckSzB_X86() == 8);
3295 goto done;
3296 }
3297
3298 case Xin_ProfInc: {
3299 /* We generate addl $1,NotKnownYet
3300 adcl $0,NotKnownYet+4
3301 in the expectation that a later call to LibVEX_patchProfCtr
3302 will be used to fill in the immediate fields once the right
3303 value is known.
3304 83 05 00 00 00 00 01
3305 83 15 00 00 00 00 00
3306 */
3307 *p++ = 0x83; *p++ = 0x05;
3308 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3309 *p++ = 0x01;
3310 *p++ = 0x83; *p++ = 0x15;
3311 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3312 *p++ = 0x00;
3313 /* Tell the caller .. */
3314 vassert(!(*is_profInc));
3315 *is_profInc = True;
3316 goto done;
3317 }
3318
3319 default:
3320 goto bad;
3321 }
3322
3323 bad:
3324 ppX86Instr(i, mode64);
3325 vpanic("emit_X86Instr");
3326 /*NOTREACHED*/
3327
3328 done:
3329 vassert(p - &buf[0] <= 32);
3330 return p - &buf[0];
3331
3332 # undef fake
3333 }
3334
3335
3336 /* How big is an event check? See case for Xin_EvCheck in
3337 emit_X86Instr just above. That crosschecks what this returns, so
3338 we can tell if we're inconsistent. */
evCheckSzB_X86(void)3339 Int evCheckSzB_X86 ( void )
3340 {
3341 return 8;
3342 }
3343
3344
3345 /* NB: what goes on here has to be very closely coordinated with the
3346 emitInstr case for XDirect, above. */
chainXDirect_X86(void * place_to_chain,void * disp_cp_chain_me_EXPECTED,void * place_to_jump_to)3347 VexInvalRange chainXDirect_X86 ( void* place_to_chain,
3348 void* disp_cp_chain_me_EXPECTED,
3349 void* place_to_jump_to )
3350 {
3351 /* What we're expecting to see is:
3352 movl $disp_cp_chain_me_EXPECTED, %edx
3353 call *%edx
3354 viz
3355 BA <4 bytes value == disp_cp_chain_me_EXPECTED>
3356 FF D2
3357 */
3358 UChar* p = (UChar*)place_to_chain;
3359 vassert(p[0] == 0xBA);
3360 vassert(*(UInt*)(&p[1]) == (UInt)Ptr_to_ULong(disp_cp_chain_me_EXPECTED));
3361 vassert(p[5] == 0xFF);
3362 vassert(p[6] == 0xD2);
3363 /* And what we want to change it to is:
3364 jmp disp32 where disp32 is relative to the next insn
3365 ud2;
3366 viz
3367 E9 <4 bytes == disp32>
3368 0F 0B
3369 The replacement has the same length as the original.
3370 */
3371 /* This is the delta we need to put into a JMP d32 insn. It's
3372 relative to the start of the next insn, hence the -5. */
3373 Long delta = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)5;
3374
3375 /* And make the modifications. */
3376 p[0] = 0xE9;
3377 p[1] = (delta >> 0) & 0xFF;
3378 p[2] = (delta >> 8) & 0xFF;
3379 p[3] = (delta >> 16) & 0xFF;
3380 p[4] = (delta >> 24) & 0xFF;
3381 p[5] = 0x0F; p[6] = 0x0B;
3382 /* sanity check on the delta -- top 32 are all 0 or all 1 */
3383 delta >>= 32;
3384 vassert(delta == 0LL || delta == -1LL);
3385 VexInvalRange vir = { (HWord)place_to_chain, 7 };
3386 return vir;
3387 }
3388
3389
3390 /* NB: what goes on here has to be very closely coordinated with the
3391 emitInstr case for XDirect, above. */
unchainXDirect_X86(void * place_to_unchain,void * place_to_jump_to_EXPECTED,void * disp_cp_chain_me)3392 VexInvalRange unchainXDirect_X86 ( void* place_to_unchain,
3393 void* place_to_jump_to_EXPECTED,
3394 void* disp_cp_chain_me )
3395 {
3396 /* What we're expecting to see is:
3397 jmp d32
3398 ud2;
3399 viz
3400 E9 <4 bytes == disp32>
3401 0F 0B
3402 */
3403 UChar* p = (UChar*)place_to_unchain;
3404 Bool valid = False;
3405 if (p[0] == 0xE9
3406 && p[5] == 0x0F && p[6] == 0x0B) {
3407 /* Check the offset is right. */
3408 Int s32 = *(Int*)(&p[1]);
3409 if ((UChar*)p + 5 + s32 == (UChar*)place_to_jump_to_EXPECTED) {
3410 valid = True;
3411 if (0)
3412 vex_printf("QQQ unchainXDirect_X86: found valid\n");
3413 }
3414 }
3415 vassert(valid);
3416 /* And what we want to change it to is:
3417 movl $disp_cp_chain_me, %edx
3418 call *%edx
3419 viz
3420 BA <4 bytes value == disp_cp_chain_me_EXPECTED>
3421 FF D2
3422 So it's the same length (convenient, huh).
3423 */
3424 p[0] = 0xBA;
3425 *(UInt*)(&p[1]) = (UInt)Ptr_to_ULong(disp_cp_chain_me);
3426 p[5] = 0xFF;
3427 p[6] = 0xD2;
3428 VexInvalRange vir = { (HWord)place_to_unchain, 7 };
3429 return vir;
3430 }
3431
3432
3433 /* Patch the counter address into a profile inc point, as previously
3434 created by the Xin_ProfInc case for emit_X86Instr. */
patchProfInc_X86(void * place_to_patch,ULong * location_of_counter)3435 VexInvalRange patchProfInc_X86 ( void* place_to_patch,
3436 ULong* location_of_counter )
3437 {
3438 vassert(sizeof(ULong*) == 4);
3439 UChar* p = (UChar*)place_to_patch;
3440 vassert(p[0] == 0x83);
3441 vassert(p[1] == 0x05);
3442 vassert(p[2] == 0x00);
3443 vassert(p[3] == 0x00);
3444 vassert(p[4] == 0x00);
3445 vassert(p[5] == 0x00);
3446 vassert(p[6] == 0x01);
3447 vassert(p[7] == 0x83);
3448 vassert(p[8] == 0x15);
3449 vassert(p[9] == 0x00);
3450 vassert(p[10] == 0x00);
3451 vassert(p[11] == 0x00);
3452 vassert(p[12] == 0x00);
3453 vassert(p[13] == 0x00);
3454 UInt imm32 = (UInt)Ptr_to_ULong(location_of_counter);
3455 p[2] = imm32 & 0xFF; imm32 >>= 8;
3456 p[3] = imm32 & 0xFF; imm32 >>= 8;
3457 p[4] = imm32 & 0xFF; imm32 >>= 8;
3458 p[5] = imm32 & 0xFF; imm32 >>= 8;
3459 imm32 = 4 + (UInt)Ptr_to_ULong(location_of_counter);
3460 p[9] = imm32 & 0xFF; imm32 >>= 8;
3461 p[10] = imm32 & 0xFF; imm32 >>= 8;
3462 p[11] = imm32 & 0xFF; imm32 >>= 8;
3463 p[12] = imm32 & 0xFF; imm32 >>= 8;
3464 VexInvalRange vir = { (HWord)place_to_patch, 14 };
3465 return vir;
3466 }
3467
3468
3469 /*---------------------------------------------------------------*/
3470 /*--- end host_x86_defs.c ---*/
3471 /*---------------------------------------------------------------*/
3472