1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_amd64_defs.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2012 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 #include "libvex_trc_values.h"
39
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_amd64_defs.h"
43
44
45 /* --------- Registers. --------- */
46
ppHRegAMD64(HReg reg)47 void ppHRegAMD64 ( HReg reg )
48 {
49 Int r;
50 static HChar* ireg64_names[16]
51 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
52 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
53 /* Be generic for all virtual regs. */
54 if (hregIsVirtual(reg)) {
55 ppHReg(reg);
56 return;
57 }
58 /* But specific for real regs. */
59 switch (hregClass(reg)) {
60 case HRcInt64:
61 r = hregNumber(reg);
62 vassert(r >= 0 && r < 16);
63 vex_printf("%s", ireg64_names[r]);
64 return;
65 case HRcFlt64:
66 r = hregNumber(reg);
67 vassert(r >= 0 && r < 6);
68 vex_printf("%%fake%d", r);
69 return;
70 case HRcVec128:
71 r = hregNumber(reg);
72 vassert(r >= 0 && r < 16);
73 vex_printf("%%xmm%d", r);
74 return;
75 default:
76 vpanic("ppHRegAMD64");
77 }
78 }
79
ppHRegAMD64_lo32(HReg reg)80 static void ppHRegAMD64_lo32 ( HReg reg )
81 {
82 Int r;
83 static HChar* ireg32_names[16]
84 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
85 "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
86 /* Be generic for all virtual regs. */
87 if (hregIsVirtual(reg)) {
88 ppHReg(reg);
89 vex_printf("d");
90 return;
91 }
92 /* But specific for real regs. */
93 switch (hregClass(reg)) {
94 case HRcInt64:
95 r = hregNumber(reg);
96 vassert(r >= 0 && r < 16);
97 vex_printf("%s", ireg32_names[r]);
98 return;
99 default:
100 vpanic("ppHRegAMD64_lo32: invalid regclass");
101 }
102 }
103
hregAMD64_RAX(void)104 HReg hregAMD64_RAX ( void ) { return mkHReg( 0, HRcInt64, False); }
hregAMD64_RCX(void)105 HReg hregAMD64_RCX ( void ) { return mkHReg( 1, HRcInt64, False); }
hregAMD64_RDX(void)106 HReg hregAMD64_RDX ( void ) { return mkHReg( 2, HRcInt64, False); }
hregAMD64_RBX(void)107 HReg hregAMD64_RBX ( void ) { return mkHReg( 3, HRcInt64, False); }
hregAMD64_RSP(void)108 HReg hregAMD64_RSP ( void ) { return mkHReg( 4, HRcInt64, False); }
hregAMD64_RBP(void)109 HReg hregAMD64_RBP ( void ) { return mkHReg( 5, HRcInt64, False); }
hregAMD64_RSI(void)110 HReg hregAMD64_RSI ( void ) { return mkHReg( 6, HRcInt64, False); }
hregAMD64_RDI(void)111 HReg hregAMD64_RDI ( void ) { return mkHReg( 7, HRcInt64, False); }
hregAMD64_R8(void)112 HReg hregAMD64_R8 ( void ) { return mkHReg( 8, HRcInt64, False); }
hregAMD64_R9(void)113 HReg hregAMD64_R9 ( void ) { return mkHReg( 9, HRcInt64, False); }
hregAMD64_R10(void)114 HReg hregAMD64_R10 ( void ) { return mkHReg(10, HRcInt64, False); }
hregAMD64_R11(void)115 HReg hregAMD64_R11 ( void ) { return mkHReg(11, HRcInt64, False); }
hregAMD64_R12(void)116 HReg hregAMD64_R12 ( void ) { return mkHReg(12, HRcInt64, False); }
hregAMD64_R13(void)117 HReg hregAMD64_R13 ( void ) { return mkHReg(13, HRcInt64, False); }
hregAMD64_R14(void)118 HReg hregAMD64_R14 ( void ) { return mkHReg(14, HRcInt64, False); }
hregAMD64_R15(void)119 HReg hregAMD64_R15 ( void ) { return mkHReg(15, HRcInt64, False); }
120
hregAMD64_XMM0(void)121 HReg hregAMD64_XMM0 ( void ) { return mkHReg( 0, HRcVec128, False); }
hregAMD64_XMM1(void)122 HReg hregAMD64_XMM1 ( void ) { return mkHReg( 1, HRcVec128, False); }
hregAMD64_XMM3(void)123 HReg hregAMD64_XMM3 ( void ) { return mkHReg( 3, HRcVec128, False); }
hregAMD64_XMM4(void)124 HReg hregAMD64_XMM4 ( void ) { return mkHReg( 4, HRcVec128, False); }
hregAMD64_XMM5(void)125 HReg hregAMD64_XMM5 ( void ) { return mkHReg( 5, HRcVec128, False); }
hregAMD64_XMM6(void)126 HReg hregAMD64_XMM6 ( void ) { return mkHReg( 6, HRcVec128, False); }
hregAMD64_XMM7(void)127 HReg hregAMD64_XMM7 ( void ) { return mkHReg( 7, HRcVec128, False); }
hregAMD64_XMM8(void)128 HReg hregAMD64_XMM8 ( void ) { return mkHReg( 8, HRcVec128, False); }
hregAMD64_XMM9(void)129 HReg hregAMD64_XMM9 ( void ) { return mkHReg( 9, HRcVec128, False); }
hregAMD64_XMM10(void)130 HReg hregAMD64_XMM10 ( void ) { return mkHReg(10, HRcVec128, False); }
hregAMD64_XMM11(void)131 HReg hregAMD64_XMM11 ( void ) { return mkHReg(11, HRcVec128, False); }
hregAMD64_XMM12(void)132 HReg hregAMD64_XMM12 ( void ) { return mkHReg(12, HRcVec128, False); }
133
134
getAllocableRegs_AMD64(Int * nregs,HReg ** arr)135 void getAllocableRegs_AMD64 ( Int* nregs, HReg** arr )
136 {
137 #if 0
138 *nregs = 6;
139 *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
140 (*arr)[ 0] = hregAMD64_RSI();
141 (*arr)[ 1] = hregAMD64_RDI();
142 (*arr)[ 2] = hregAMD64_RBX();
143
144 (*arr)[ 3] = hregAMD64_XMM7();
145 (*arr)[ 4] = hregAMD64_XMM8();
146 (*arr)[ 5] = hregAMD64_XMM9();
147 #endif
148 #if 1
149 *nregs = 20;
150 *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
151 (*arr)[ 0] = hregAMD64_RSI();
152 (*arr)[ 1] = hregAMD64_RDI();
153 (*arr)[ 2] = hregAMD64_R8();
154 (*arr)[ 3] = hregAMD64_R9();
155 (*arr)[ 4] = hregAMD64_R12();
156 (*arr)[ 5] = hregAMD64_R13();
157 (*arr)[ 6] = hregAMD64_R14();
158 (*arr)[ 7] = hregAMD64_R15();
159 (*arr)[ 8] = hregAMD64_RBX();
160
161 (*arr)[ 9] = hregAMD64_XMM3();
162 (*arr)[10] = hregAMD64_XMM4();
163 (*arr)[11] = hregAMD64_XMM5();
164 (*arr)[12] = hregAMD64_XMM6();
165 (*arr)[13] = hregAMD64_XMM7();
166 (*arr)[14] = hregAMD64_XMM8();
167 (*arr)[15] = hregAMD64_XMM9();
168 (*arr)[16] = hregAMD64_XMM10();
169 (*arr)[17] = hregAMD64_XMM11();
170 (*arr)[18] = hregAMD64_XMM12();
171 (*arr)[19] = hregAMD64_R10();
172 #endif
173 }
174
175
176 /* --------- Condition codes, Intel encoding. --------- */
177
showAMD64CondCode(AMD64CondCode cond)178 HChar* showAMD64CondCode ( AMD64CondCode cond )
179 {
180 switch (cond) {
181 case Acc_O: return "o";
182 case Acc_NO: return "no";
183 case Acc_B: return "b";
184 case Acc_NB: return "nb";
185 case Acc_Z: return "z";
186 case Acc_NZ: return "nz";
187 case Acc_BE: return "be";
188 case Acc_NBE: return "nbe";
189 case Acc_S: return "s";
190 case Acc_NS: return "ns";
191 case Acc_P: return "p";
192 case Acc_NP: return "np";
193 case Acc_L: return "l";
194 case Acc_NL: return "nl";
195 case Acc_LE: return "le";
196 case Acc_NLE: return "nle";
197 case Acc_ALWAYS: return "ALWAYS";
198 default: vpanic("ppAMD64CondCode");
199 }
200 }
201
202
203 /* --------- AMD64AMode: memory address expressions. --------- */
204
AMD64AMode_IR(UInt imm32,HReg reg)205 AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) {
206 AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode));
207 am->tag = Aam_IR;
208 am->Aam.IR.imm = imm32;
209 am->Aam.IR.reg = reg;
210 return am;
211 }
AMD64AMode_IRRS(UInt imm32,HReg base,HReg indEx,Int shift)212 AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
213 AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode));
214 am->tag = Aam_IRRS;
215 am->Aam.IRRS.imm = imm32;
216 am->Aam.IRRS.base = base;
217 am->Aam.IRRS.index = indEx;
218 am->Aam.IRRS.shift = shift;
219 vassert(shift >= 0 && shift <= 3);
220 return am;
221 }
222
ppAMD64AMode(AMD64AMode * am)223 void ppAMD64AMode ( AMD64AMode* am ) {
224 switch (am->tag) {
225 case Aam_IR:
226 if (am->Aam.IR.imm == 0)
227 vex_printf("(");
228 else
229 vex_printf("0x%x(", am->Aam.IR.imm);
230 ppHRegAMD64(am->Aam.IR.reg);
231 vex_printf(")");
232 return;
233 case Aam_IRRS:
234 vex_printf("0x%x(", am->Aam.IRRS.imm);
235 ppHRegAMD64(am->Aam.IRRS.base);
236 vex_printf(",");
237 ppHRegAMD64(am->Aam.IRRS.index);
238 vex_printf(",%d)", 1 << am->Aam.IRRS.shift);
239 return;
240 default:
241 vpanic("ppAMD64AMode");
242 }
243 }
244
addRegUsage_AMD64AMode(HRegUsage * u,AMD64AMode * am)245 static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) {
246 switch (am->tag) {
247 case Aam_IR:
248 addHRegUse(u, HRmRead, am->Aam.IR.reg);
249 return;
250 case Aam_IRRS:
251 addHRegUse(u, HRmRead, am->Aam.IRRS.base);
252 addHRegUse(u, HRmRead, am->Aam.IRRS.index);
253 return;
254 default:
255 vpanic("addRegUsage_AMD64AMode");
256 }
257 }
258
mapRegs_AMD64AMode(HRegRemap * m,AMD64AMode * am)259 static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) {
260 switch (am->tag) {
261 case Aam_IR:
262 am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg);
263 return;
264 case Aam_IRRS:
265 am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base);
266 am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index);
267 return;
268 default:
269 vpanic("mapRegs_AMD64AMode");
270 }
271 }
272
273 /* --------- Operand, which can be reg, immediate or memory. --------- */
274
AMD64RMI_Imm(UInt imm32)275 AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) {
276 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
277 op->tag = Armi_Imm;
278 op->Armi.Imm.imm32 = imm32;
279 return op;
280 }
AMD64RMI_Reg(HReg reg)281 AMD64RMI* AMD64RMI_Reg ( HReg reg ) {
282 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
283 op->tag = Armi_Reg;
284 op->Armi.Reg.reg = reg;
285 return op;
286 }
AMD64RMI_Mem(AMD64AMode * am)287 AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) {
288 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
289 op->tag = Armi_Mem;
290 op->Armi.Mem.am = am;
291 return op;
292 }
293
ppAMD64RMI_wrk(AMD64RMI * op,Bool lo32)294 static void ppAMD64RMI_wrk ( AMD64RMI* op, Bool lo32 ) {
295 switch (op->tag) {
296 case Armi_Imm:
297 vex_printf("$0x%x", op->Armi.Imm.imm32);
298 return;
299 case Armi_Reg:
300 if (lo32)
301 ppHRegAMD64_lo32(op->Armi.Reg.reg);
302 else
303 ppHRegAMD64(op->Armi.Reg.reg);
304 return;
305 case Armi_Mem:
306 ppAMD64AMode(op->Armi.Mem.am);
307 return;
308 default:
309 vpanic("ppAMD64RMI");
310 }
311 }
ppAMD64RMI(AMD64RMI * op)312 void ppAMD64RMI ( AMD64RMI* op ) {
313 ppAMD64RMI_wrk(op, False/*!lo32*/);
314 }
ppAMD64RMI_lo32(AMD64RMI * op)315 void ppAMD64RMI_lo32 ( AMD64RMI* op ) {
316 ppAMD64RMI_wrk(op, True/*lo32*/);
317 }
318
319 /* An AMD64RMI can only be used in a "read" context (what would it mean
320 to write or modify a literal?) and so we enumerate its registers
321 accordingly. */
addRegUsage_AMD64RMI(HRegUsage * u,AMD64RMI * op)322 static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) {
323 switch (op->tag) {
324 case Armi_Imm:
325 return;
326 case Armi_Reg:
327 addHRegUse(u, HRmRead, op->Armi.Reg.reg);
328 return;
329 case Armi_Mem:
330 addRegUsage_AMD64AMode(u, op->Armi.Mem.am);
331 return;
332 default:
333 vpanic("addRegUsage_AMD64RMI");
334 }
335 }
336
mapRegs_AMD64RMI(HRegRemap * m,AMD64RMI * op)337 static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) {
338 switch (op->tag) {
339 case Armi_Imm:
340 return;
341 case Armi_Reg:
342 op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg);
343 return;
344 case Armi_Mem:
345 mapRegs_AMD64AMode(m, op->Armi.Mem.am);
346 return;
347 default:
348 vpanic("mapRegs_AMD64RMI");
349 }
350 }
351
352
353 /* --------- Operand, which can be reg or immediate only. --------- */
354
AMD64RI_Imm(UInt imm32)355 AMD64RI* AMD64RI_Imm ( UInt imm32 ) {
356 AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI));
357 op->tag = Ari_Imm;
358 op->Ari.Imm.imm32 = imm32;
359 return op;
360 }
AMD64RI_Reg(HReg reg)361 AMD64RI* AMD64RI_Reg ( HReg reg ) {
362 AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI));
363 op->tag = Ari_Reg;
364 op->Ari.Reg.reg = reg;
365 return op;
366 }
367
ppAMD64RI(AMD64RI * op)368 void ppAMD64RI ( AMD64RI* op ) {
369 switch (op->tag) {
370 case Ari_Imm:
371 vex_printf("$0x%x", op->Ari.Imm.imm32);
372 return;
373 case Ari_Reg:
374 ppHRegAMD64(op->Ari.Reg.reg);
375 return;
376 default:
377 vpanic("ppAMD64RI");
378 }
379 }
380
381 /* An AMD64RI can only be used in a "read" context (what would it mean
382 to write or modify a literal?) and so we enumerate its registers
383 accordingly. */
addRegUsage_AMD64RI(HRegUsage * u,AMD64RI * op)384 static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) {
385 switch (op->tag) {
386 case Ari_Imm:
387 return;
388 case Ari_Reg:
389 addHRegUse(u, HRmRead, op->Ari.Reg.reg);
390 return;
391 default:
392 vpanic("addRegUsage_AMD64RI");
393 }
394 }
395
mapRegs_AMD64RI(HRegRemap * m,AMD64RI * op)396 static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) {
397 switch (op->tag) {
398 case Ari_Imm:
399 return;
400 case Ari_Reg:
401 op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg);
402 return;
403 default:
404 vpanic("mapRegs_AMD64RI");
405 }
406 }
407
408
409 /* --------- Operand, which can be reg or memory only. --------- */
410
AMD64RM_Reg(HReg reg)411 AMD64RM* AMD64RM_Reg ( HReg reg ) {
412 AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM));
413 op->tag = Arm_Reg;
414 op->Arm.Reg.reg = reg;
415 return op;
416 }
AMD64RM_Mem(AMD64AMode * am)417 AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) {
418 AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM));
419 op->tag = Arm_Mem;
420 op->Arm.Mem.am = am;
421 return op;
422 }
423
ppAMD64RM(AMD64RM * op)424 void ppAMD64RM ( AMD64RM* op ) {
425 switch (op->tag) {
426 case Arm_Mem:
427 ppAMD64AMode(op->Arm.Mem.am);
428 return;
429 case Arm_Reg:
430 ppHRegAMD64(op->Arm.Reg.reg);
431 return;
432 default:
433 vpanic("ppAMD64RM");
434 }
435 }
436
437 /* Because an AMD64RM can be both a source or destination operand, we
438 have to supply a mode -- pertaining to the operand as a whole --
439 indicating how it's being used. */
addRegUsage_AMD64RM(HRegUsage * u,AMD64RM * op,HRegMode mode)440 static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) {
441 switch (op->tag) {
442 case Arm_Mem:
443 /* Memory is read, written or modified. So we just want to
444 know the regs read by the amode. */
445 addRegUsage_AMD64AMode(u, op->Arm.Mem.am);
446 return;
447 case Arm_Reg:
448 /* reg is read, written or modified. Add it in the
449 appropriate way. */
450 addHRegUse(u, mode, op->Arm.Reg.reg);
451 return;
452 default:
453 vpanic("addRegUsage_AMD64RM");
454 }
455 }
456
mapRegs_AMD64RM(HRegRemap * m,AMD64RM * op)457 static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op )
458 {
459 switch (op->tag) {
460 case Arm_Mem:
461 mapRegs_AMD64AMode(m, op->Arm.Mem.am);
462 return;
463 case Arm_Reg:
464 op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg);
465 return;
466 default:
467 vpanic("mapRegs_AMD64RM");
468 }
469 }
470
471
472 /* --------- Instructions. --------- */
473
showAMD64ScalarSz(Int sz)474 static HChar* showAMD64ScalarSz ( Int sz ) {
475 switch (sz) {
476 case 2: return "w";
477 case 4: return "l";
478 case 8: return "q";
479 default: vpanic("showAMD64ScalarSz");
480 }
481 }
482
showAMD64UnaryOp(AMD64UnaryOp op)483 HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) {
484 switch (op) {
485 case Aun_NOT: return "not";
486 case Aun_NEG: return "neg";
487 default: vpanic("showAMD64UnaryOp");
488 }
489 }
490
showAMD64AluOp(AMD64AluOp op)491 HChar* showAMD64AluOp ( AMD64AluOp op ) {
492 switch (op) {
493 case Aalu_MOV: return "mov";
494 case Aalu_CMP: return "cmp";
495 case Aalu_ADD: return "add";
496 case Aalu_SUB: return "sub";
497 case Aalu_ADC: return "adc";
498 case Aalu_SBB: return "sbb";
499 case Aalu_AND: return "and";
500 case Aalu_OR: return "or";
501 case Aalu_XOR: return "xor";
502 case Aalu_MUL: return "imul";
503 default: vpanic("showAMD64AluOp");
504 }
505 }
506
showAMD64ShiftOp(AMD64ShiftOp op)507 HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) {
508 switch (op) {
509 case Ash_SHL: return "shl";
510 case Ash_SHR: return "shr";
511 case Ash_SAR: return "sar";
512 default: vpanic("showAMD64ShiftOp");
513 }
514 }
515
showA87FpOp(A87FpOp op)516 HChar* showA87FpOp ( A87FpOp op ) {
517 switch (op) {
518 case Afp_SCALE: return "scale";
519 case Afp_ATAN: return "atan";
520 case Afp_YL2X: return "yl2x";
521 case Afp_YL2XP1: return "yl2xp1";
522 case Afp_PREM: return "prem";
523 case Afp_PREM1: return "prem1";
524 case Afp_SQRT: return "sqrt";
525 case Afp_SIN: return "sin";
526 case Afp_COS: return "cos";
527 case Afp_TAN: return "tan";
528 case Afp_ROUND: return "round";
529 case Afp_2XM1: return "2xm1";
530 default: vpanic("showA87FpOp");
531 }
532 }
533
showAMD64SseOp(AMD64SseOp op)534 HChar* showAMD64SseOp ( AMD64SseOp op ) {
535 switch (op) {
536 case Asse_MOV: return "movups";
537 case Asse_ADDF: return "add";
538 case Asse_SUBF: return "sub";
539 case Asse_MULF: return "mul";
540 case Asse_DIVF: return "div";
541 case Asse_MAXF: return "max";
542 case Asse_MINF: return "min";
543 case Asse_CMPEQF: return "cmpFeq";
544 case Asse_CMPLTF: return "cmpFlt";
545 case Asse_CMPLEF: return "cmpFle";
546 case Asse_CMPUNF: return "cmpFun";
547 case Asse_RCPF: return "rcp";
548 case Asse_RSQRTF: return "rsqrt";
549 case Asse_SQRTF: return "sqrt";
550 case Asse_AND: return "and";
551 case Asse_OR: return "or";
552 case Asse_XOR: return "xor";
553 case Asse_ANDN: return "andn";
554 case Asse_ADD8: return "paddb";
555 case Asse_ADD16: return "paddw";
556 case Asse_ADD32: return "paddd";
557 case Asse_ADD64: return "paddq";
558 case Asse_QADD8U: return "paddusb";
559 case Asse_QADD16U: return "paddusw";
560 case Asse_QADD8S: return "paddsb";
561 case Asse_QADD16S: return "paddsw";
562 case Asse_SUB8: return "psubb";
563 case Asse_SUB16: return "psubw";
564 case Asse_SUB32: return "psubd";
565 case Asse_SUB64: return "psubq";
566 case Asse_QSUB8U: return "psubusb";
567 case Asse_QSUB16U: return "psubusw";
568 case Asse_QSUB8S: return "psubsb";
569 case Asse_QSUB16S: return "psubsw";
570 case Asse_MUL16: return "pmullw";
571 case Asse_MULHI16U: return "pmulhuw";
572 case Asse_MULHI16S: return "pmulhw";
573 case Asse_AVG8U: return "pavgb";
574 case Asse_AVG16U: return "pavgw";
575 case Asse_MAX16S: return "pmaxw";
576 case Asse_MAX8U: return "pmaxub";
577 case Asse_MIN16S: return "pminw";
578 case Asse_MIN8U: return "pminub";
579 case Asse_CMPEQ8: return "pcmpeqb";
580 case Asse_CMPEQ16: return "pcmpeqw";
581 case Asse_CMPEQ32: return "pcmpeqd";
582 case Asse_CMPGT8S: return "pcmpgtb";
583 case Asse_CMPGT16S: return "pcmpgtw";
584 case Asse_CMPGT32S: return "pcmpgtd";
585 case Asse_SHL16: return "psllw";
586 case Asse_SHL32: return "pslld";
587 case Asse_SHL64: return "psllq";
588 case Asse_SHR16: return "psrlw";
589 case Asse_SHR32: return "psrld";
590 case Asse_SHR64: return "psrlq";
591 case Asse_SAR16: return "psraw";
592 case Asse_SAR32: return "psrad";
593 case Asse_PACKSSD: return "packssdw";
594 case Asse_PACKSSW: return "packsswb";
595 case Asse_PACKUSW: return "packuswb";
596 case Asse_UNPCKHB: return "punpckhb";
597 case Asse_UNPCKHW: return "punpckhw";
598 case Asse_UNPCKHD: return "punpckhd";
599 case Asse_UNPCKHQ: return "punpckhq";
600 case Asse_UNPCKLB: return "punpcklb";
601 case Asse_UNPCKLW: return "punpcklw";
602 case Asse_UNPCKLD: return "punpckld";
603 case Asse_UNPCKLQ: return "punpcklq";
604 default: vpanic("showAMD64SseOp");
605 }
606 }
607
AMD64Instr_Imm64(ULong imm64,HReg dst)608 AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) {
609 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
610 i->tag = Ain_Imm64;
611 i->Ain.Imm64.imm64 = imm64;
612 i->Ain.Imm64.dst = dst;
613 return i;
614 }
AMD64Instr_Alu64R(AMD64AluOp op,AMD64RMI * src,HReg dst)615 AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
616 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
617 i->tag = Ain_Alu64R;
618 i->Ain.Alu64R.op = op;
619 i->Ain.Alu64R.src = src;
620 i->Ain.Alu64R.dst = dst;
621 return i;
622 }
AMD64Instr_Alu64M(AMD64AluOp op,AMD64RI * src,AMD64AMode * dst)623 AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) {
624 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
625 i->tag = Ain_Alu64M;
626 i->Ain.Alu64M.op = op;
627 i->Ain.Alu64M.src = src;
628 i->Ain.Alu64M.dst = dst;
629 vassert(op != Aalu_MUL);
630 return i;
631 }
AMD64Instr_Sh64(AMD64ShiftOp op,UInt src,HReg dst)632 AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) {
633 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
634 i->tag = Ain_Sh64;
635 i->Ain.Sh64.op = op;
636 i->Ain.Sh64.src = src;
637 i->Ain.Sh64.dst = dst;
638 return i;
639 }
AMD64Instr_Test64(UInt imm32,HReg dst)640 AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) {
641 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
642 i->tag = Ain_Test64;
643 i->Ain.Test64.imm32 = imm32;
644 i->Ain.Test64.dst = dst;
645 return i;
646 }
AMD64Instr_Unary64(AMD64UnaryOp op,HReg dst)647 AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) {
648 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
649 i->tag = Ain_Unary64;
650 i->Ain.Unary64.op = op;
651 i->Ain.Unary64.dst = dst;
652 return i;
653 }
AMD64Instr_Lea64(AMD64AMode * am,HReg dst)654 AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) {
655 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
656 i->tag = Ain_Lea64;
657 i->Ain.Lea64.am = am;
658 i->Ain.Lea64.dst = dst;
659 return i;
660 }
AMD64Instr_Alu32R(AMD64AluOp op,AMD64RMI * src,HReg dst)661 AMD64Instr* AMD64Instr_Alu32R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
662 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
663 i->tag = Ain_Alu32R;
664 i->Ain.Alu32R.op = op;
665 i->Ain.Alu32R.src = src;
666 i->Ain.Alu32R.dst = dst;
667 switch (op) {
668 case Aalu_ADD: case Aalu_SUB: case Aalu_CMP:
669 case Aalu_AND: case Aalu_OR: case Aalu_XOR: break;
670 default: vassert(0);
671 }
672 return i;
673 }
AMD64Instr_MulL(Bool syned,AMD64RM * src)674 AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) {
675 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
676 i->tag = Ain_MulL;
677 i->Ain.MulL.syned = syned;
678 i->Ain.MulL.src = src;
679 return i;
680 }
AMD64Instr_Div(Bool syned,Int sz,AMD64RM * src)681 AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) {
682 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
683 i->tag = Ain_Div;
684 i->Ain.Div.syned = syned;
685 i->Ain.Div.sz = sz;
686 i->Ain.Div.src = src;
687 vassert(sz == 4 || sz == 8);
688 return i;
689 }
AMD64Instr_Push(AMD64RMI * src)690 AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) {
691 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
692 i->tag = Ain_Push;
693 i->Ain.Push.src = src;
694 return i;
695 }
AMD64Instr_Call(AMD64CondCode cond,Addr64 target,Int regparms)696 AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms ) {
697 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
698 i->tag = Ain_Call;
699 i->Ain.Call.cond = cond;
700 i->Ain.Call.target = target;
701 i->Ain.Call.regparms = regparms;
702 vassert(regparms >= 0 && regparms <= 6);
703 return i;
704 }
705
AMD64Instr_XDirect(Addr64 dstGA,AMD64AMode * amRIP,AMD64CondCode cond,Bool toFastEP)706 AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP,
707 AMD64CondCode cond, Bool toFastEP ) {
708 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
709 i->tag = Ain_XDirect;
710 i->Ain.XDirect.dstGA = dstGA;
711 i->Ain.XDirect.amRIP = amRIP;
712 i->Ain.XDirect.cond = cond;
713 i->Ain.XDirect.toFastEP = toFastEP;
714 return i;
715 }
AMD64Instr_XIndir(HReg dstGA,AMD64AMode * amRIP,AMD64CondCode cond)716 AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP,
717 AMD64CondCode cond ) {
718 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
719 i->tag = Ain_XIndir;
720 i->Ain.XIndir.dstGA = dstGA;
721 i->Ain.XIndir.amRIP = amRIP;
722 i->Ain.XIndir.cond = cond;
723 return i;
724 }
AMD64Instr_XAssisted(HReg dstGA,AMD64AMode * amRIP,AMD64CondCode cond,IRJumpKind jk)725 AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP,
726 AMD64CondCode cond, IRJumpKind jk ) {
727 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
728 i->tag = Ain_XAssisted;
729 i->Ain.XAssisted.dstGA = dstGA;
730 i->Ain.XAssisted.amRIP = amRIP;
731 i->Ain.XAssisted.cond = cond;
732 i->Ain.XAssisted.jk = jk;
733 return i;
734 }
735
AMD64Instr_CMov64(AMD64CondCode cond,AMD64RM * src,HReg dst)736 AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, AMD64RM* src, HReg dst ) {
737 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
738 i->tag = Ain_CMov64;
739 i->Ain.CMov64.cond = cond;
740 i->Ain.CMov64.src = src;
741 i->Ain.CMov64.dst = dst;
742 vassert(cond != Acc_ALWAYS);
743 return i;
744 }
AMD64Instr_MovxLQ(Bool syned,HReg src,HReg dst)745 AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
746 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
747 i->tag = Ain_MovxLQ;
748 i->Ain.MovxLQ.syned = syned;
749 i->Ain.MovxLQ.src = src;
750 i->Ain.MovxLQ.dst = dst;
751 return i;
752 }
AMD64Instr_LoadEX(UChar szSmall,Bool syned,AMD64AMode * src,HReg dst)753 AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
754 AMD64AMode* src, HReg dst ) {
755 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
756 i->tag = Ain_LoadEX;
757 i->Ain.LoadEX.szSmall = szSmall;
758 i->Ain.LoadEX.syned = syned;
759 i->Ain.LoadEX.src = src;
760 i->Ain.LoadEX.dst = dst;
761 vassert(szSmall == 1 || szSmall == 2 || szSmall == 4);
762 return i;
763 }
AMD64Instr_Store(UChar sz,HReg src,AMD64AMode * dst)764 AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) {
765 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
766 i->tag = Ain_Store;
767 i->Ain.Store.sz = sz;
768 i->Ain.Store.src = src;
769 i->Ain.Store.dst = dst;
770 vassert(sz == 1 || sz == 2 || sz == 4);
771 return i;
772 }
AMD64Instr_Set64(AMD64CondCode cond,HReg dst)773 AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) {
774 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
775 i->tag = Ain_Set64;
776 i->Ain.Set64.cond = cond;
777 i->Ain.Set64.dst = dst;
778 return i;
779 }
AMD64Instr_Bsfr64(Bool isFwds,HReg src,HReg dst)780 AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) {
781 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
782 i->tag = Ain_Bsfr64;
783 i->Ain.Bsfr64.isFwds = isFwds;
784 i->Ain.Bsfr64.src = src;
785 i->Ain.Bsfr64.dst = dst;
786 return i;
787 }
AMD64Instr_MFence(void)788 AMD64Instr* AMD64Instr_MFence ( void ) {
789 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
790 i->tag = Ain_MFence;
791 return i;
792 }
AMD64Instr_ACAS(AMD64AMode * addr,UChar sz)793 AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) {
794 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
795 i->tag = Ain_ACAS;
796 i->Ain.ACAS.addr = addr;
797 i->Ain.ACAS.sz = sz;
798 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
799 return i;
800 }
AMD64Instr_DACAS(AMD64AMode * addr,UChar sz)801 AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) {
802 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
803 i->tag = Ain_DACAS;
804 i->Ain.DACAS.addr = addr;
805 i->Ain.DACAS.sz = sz;
806 vassert(sz == 8 || sz == 4);
807 return i;
808 }
809
AMD64Instr_A87Free(Int nregs)810 AMD64Instr* AMD64Instr_A87Free ( Int nregs )
811 {
812 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
813 i->tag = Ain_A87Free;
814 i->Ain.A87Free.nregs = nregs;
815 vassert(nregs >= 1 && nregs <= 7);
816 return i;
817 }
AMD64Instr_A87PushPop(AMD64AMode * addr,Bool isPush,UChar szB)818 AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB )
819 {
820 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
821 i->tag = Ain_A87PushPop;
822 i->Ain.A87PushPop.addr = addr;
823 i->Ain.A87PushPop.isPush = isPush;
824 i->Ain.A87PushPop.szB = szB;
825 vassert(szB == 8 || szB == 4);
826 return i;
827 }
AMD64Instr_A87FpOp(A87FpOp op)828 AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
829 {
830 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
831 i->tag = Ain_A87FpOp;
832 i->Ain.A87FpOp.op = op;
833 return i;
834 }
AMD64Instr_A87LdCW(AMD64AMode * addr)835 AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr )
836 {
837 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
838 i->tag = Ain_A87LdCW;
839 i->Ain.A87LdCW.addr = addr;
840 return i;
841 }
AMD64Instr_A87StSW(AMD64AMode * addr)842 AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr )
843 {
844 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
845 i->tag = Ain_A87StSW;
846 i->Ain.A87StSW.addr = addr;
847 return i;
848 }
AMD64Instr_LdMXCSR(AMD64AMode * addr)849 AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
850 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
851 i->tag = Ain_LdMXCSR;
852 i->Ain.LdMXCSR.addr = addr;
853 return i;
854 }
AMD64Instr_SseUComIS(Int sz,HReg srcL,HReg srcR,HReg dst)855 AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) {
856 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
857 i->tag = Ain_SseUComIS;
858 i->Ain.SseUComIS.sz = toUChar(sz);
859 i->Ain.SseUComIS.srcL = srcL;
860 i->Ain.SseUComIS.srcR = srcR;
861 i->Ain.SseUComIS.dst = dst;
862 vassert(sz == 4 || sz == 8);
863 return i;
864 }
AMD64Instr_SseSI2SF(Int szS,Int szD,HReg src,HReg dst)865 AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) {
866 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
867 i->tag = Ain_SseSI2SF;
868 i->Ain.SseSI2SF.szS = toUChar(szS);
869 i->Ain.SseSI2SF.szD = toUChar(szD);
870 i->Ain.SseSI2SF.src = src;
871 i->Ain.SseSI2SF.dst = dst;
872 vassert(szS == 4 || szS == 8);
873 vassert(szD == 4 || szD == 8);
874 return i;
875 }
AMD64Instr_SseSF2SI(Int szS,Int szD,HReg src,HReg dst)876 AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) {
877 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
878 i->tag = Ain_SseSF2SI;
879 i->Ain.SseSF2SI.szS = toUChar(szS);
880 i->Ain.SseSF2SI.szD = toUChar(szD);
881 i->Ain.SseSF2SI.src = src;
882 i->Ain.SseSF2SI.dst = dst;
883 vassert(szS == 4 || szS == 8);
884 vassert(szD == 4 || szD == 8);
885 return i;
886 }
AMD64Instr_SseSDSS(Bool from64,HReg src,HReg dst)887 AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst )
888 {
889 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
890 i->tag = Ain_SseSDSS;
891 i->Ain.SseSDSS.from64 = from64;
892 i->Ain.SseSDSS.src = src;
893 i->Ain.SseSDSS.dst = dst;
894 return i;
895 }
AMD64Instr_SseLdSt(Bool isLoad,Int sz,HReg reg,AMD64AMode * addr)896 AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz,
897 HReg reg, AMD64AMode* addr ) {
898 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
899 i->tag = Ain_SseLdSt;
900 i->Ain.SseLdSt.isLoad = isLoad;
901 i->Ain.SseLdSt.sz = toUChar(sz);
902 i->Ain.SseLdSt.reg = reg;
903 i->Ain.SseLdSt.addr = addr;
904 vassert(sz == 4 || sz == 8 || sz == 16);
905 return i;
906 }
AMD64Instr_SseLdzLO(Int sz,HReg reg,AMD64AMode * addr)907 AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr )
908 {
909 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
910 i->tag = Ain_SseLdzLO;
911 i->Ain.SseLdzLO.sz = sz;
912 i->Ain.SseLdzLO.reg = reg;
913 i->Ain.SseLdzLO.addr = addr;
914 vassert(sz == 4 || sz == 8);
915 return i;
916 }
AMD64Instr_Sse32Fx4(AMD64SseOp op,HReg src,HReg dst)917 AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) {
918 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
919 i->tag = Ain_Sse32Fx4;
920 i->Ain.Sse32Fx4.op = op;
921 i->Ain.Sse32Fx4.src = src;
922 i->Ain.Sse32Fx4.dst = dst;
923 vassert(op != Asse_MOV);
924 return i;
925 }
AMD64Instr_Sse32FLo(AMD64SseOp op,HReg src,HReg dst)926 AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) {
927 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
928 i->tag = Ain_Sse32FLo;
929 i->Ain.Sse32FLo.op = op;
930 i->Ain.Sse32FLo.src = src;
931 i->Ain.Sse32FLo.dst = dst;
932 vassert(op != Asse_MOV);
933 return i;
934 }
AMD64Instr_Sse64Fx2(AMD64SseOp op,HReg src,HReg dst)935 AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) {
936 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
937 i->tag = Ain_Sse64Fx2;
938 i->Ain.Sse64Fx2.op = op;
939 i->Ain.Sse64Fx2.src = src;
940 i->Ain.Sse64Fx2.dst = dst;
941 vassert(op != Asse_MOV);
942 return i;
943 }
AMD64Instr_Sse64FLo(AMD64SseOp op,HReg src,HReg dst)944 AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) {
945 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
946 i->tag = Ain_Sse64FLo;
947 i->Ain.Sse64FLo.op = op;
948 i->Ain.Sse64FLo.src = src;
949 i->Ain.Sse64FLo.dst = dst;
950 vassert(op != Asse_MOV);
951 return i;
952 }
AMD64Instr_SseReRg(AMD64SseOp op,HReg re,HReg rg)953 AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) {
954 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
955 i->tag = Ain_SseReRg;
956 i->Ain.SseReRg.op = op;
957 i->Ain.SseReRg.src = re;
958 i->Ain.SseReRg.dst = rg;
959 return i;
960 }
AMD64Instr_SseCMov(AMD64CondCode cond,HReg src,HReg dst)961 AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
962 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
963 i->tag = Ain_SseCMov;
964 i->Ain.SseCMov.cond = cond;
965 i->Ain.SseCMov.src = src;
966 i->Ain.SseCMov.dst = dst;
967 vassert(cond != Acc_ALWAYS);
968 return i;
969 }
AMD64Instr_SseShuf(Int order,HReg src,HReg dst)970 AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) {
971 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
972 i->tag = Ain_SseShuf;
973 i->Ain.SseShuf.order = order;
974 i->Ain.SseShuf.src = src;
975 i->Ain.SseShuf.dst = dst;
976 vassert(order >= 0 && order <= 0xFF);
977 return i;
978 }
979 //uu AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad,
980 //uu HReg reg, AMD64AMode* addr ) {
981 //uu AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
982 //uu i->tag = Ain_AvxLdSt;
983 //uu i->Ain.AvxLdSt.isLoad = isLoad;
984 //uu i->Ain.AvxLdSt.reg = reg;
985 //uu i->Ain.AvxLdSt.addr = addr;
986 //uu return i;
987 //uu }
988 //uu AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) {
989 //uu AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
990 //uu i->tag = Ain_AvxReRg;
991 //uu i->Ain.AvxReRg.op = op;
992 //uu i->Ain.AvxReRg.src = re;
993 //uu i->Ain.AvxReRg.dst = rg;
994 //uu return i;
995 //uu }
AMD64Instr_EvCheck(AMD64AMode * amCounter,AMD64AMode * amFailAddr)996 AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter,
997 AMD64AMode* amFailAddr ) {
998 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
999 i->tag = Ain_EvCheck;
1000 i->Ain.EvCheck.amCounter = amCounter;
1001 i->Ain.EvCheck.amFailAddr = amFailAddr;
1002 return i;
1003 }
AMD64Instr_ProfInc(void)1004 AMD64Instr* AMD64Instr_ProfInc ( void ) {
1005 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
1006 i->tag = Ain_ProfInc;
1007 return i;
1008 }
1009
ppAMD64Instr(AMD64Instr * i,Bool mode64)1010 void ppAMD64Instr ( AMD64Instr* i, Bool mode64 )
1011 {
1012 vassert(mode64 == True);
1013 switch (i->tag) {
1014 case Ain_Imm64:
1015 vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64);
1016 ppHRegAMD64(i->Ain.Imm64.dst);
1017 return;
1018 case Ain_Alu64R:
1019 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op));
1020 ppAMD64RMI(i->Ain.Alu64R.src);
1021 vex_printf(",");
1022 ppHRegAMD64(i->Ain.Alu64R.dst);
1023 return;
1024 case Ain_Alu64M:
1025 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op));
1026 ppAMD64RI(i->Ain.Alu64M.src);
1027 vex_printf(",");
1028 ppAMD64AMode(i->Ain.Alu64M.dst);
1029 return;
1030 case Ain_Sh64:
1031 vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op));
1032 if (i->Ain.Sh64.src == 0)
1033 vex_printf("%%cl,");
1034 else
1035 vex_printf("$%d,", (Int)i->Ain.Sh64.src);
1036 ppHRegAMD64(i->Ain.Sh64.dst);
1037 return;
1038 case Ain_Test64:
1039 vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32);
1040 ppHRegAMD64(i->Ain.Test64.dst);
1041 return;
1042 case Ain_Unary64:
1043 vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op));
1044 ppHRegAMD64(i->Ain.Unary64.dst);
1045 return;
1046 case Ain_Lea64:
1047 vex_printf("leaq ");
1048 ppAMD64AMode(i->Ain.Lea64.am);
1049 vex_printf(",");
1050 ppHRegAMD64(i->Ain.Lea64.dst);
1051 return;
1052 case Ain_Alu32R:
1053 vex_printf("%sl ", showAMD64AluOp(i->Ain.Alu32R.op));
1054 ppAMD64RMI_lo32(i->Ain.Alu32R.src);
1055 vex_printf(",");
1056 ppHRegAMD64_lo32(i->Ain.Alu32R.dst);
1057 return;
1058 case Ain_MulL:
1059 vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u');
1060 ppAMD64RM(i->Ain.MulL.src);
1061 return;
1062 case Ain_Div:
1063 vex_printf("%cdiv%s ",
1064 i->Ain.Div.syned ? 's' : 'u',
1065 showAMD64ScalarSz(i->Ain.Div.sz));
1066 ppAMD64RM(i->Ain.Div.src);
1067 return;
1068 case Ain_Push:
1069 vex_printf("pushq ");
1070 ppAMD64RMI(i->Ain.Push.src);
1071 return;
1072 case Ain_Call:
1073 vex_printf("call%s[%d] ",
1074 i->Ain.Call.cond==Acc_ALWAYS
1075 ? "" : showAMD64CondCode(i->Ain.Call.cond),
1076 i->Ain.Call.regparms );
1077 vex_printf("0x%llx", i->Ain.Call.target);
1078 break;
1079
1080 case Ain_XDirect:
1081 vex_printf("(xDirect) ");
1082 vex_printf("if (%%rflags.%s) { ",
1083 showAMD64CondCode(i->Ain.XDirect.cond));
1084 vex_printf("movabsq $0x%llx,%%r11; ", i->Ain.XDirect.dstGA);
1085 vex_printf("movq %%r11,");
1086 ppAMD64AMode(i->Ain.XDirect.amRIP);
1087 vex_printf("; ");
1088 vex_printf("movabsq $disp_cp_chain_me_to_%sEP,%%r11; call *%%r11 }",
1089 i->Ain.XDirect.toFastEP ? "fast" : "slow");
1090 return;
1091 case Ain_XIndir:
1092 vex_printf("(xIndir) ");
1093 vex_printf("if (%%rflags.%s) { ",
1094 showAMD64CondCode(i->Ain.XIndir.cond));
1095 vex_printf("movq ");
1096 ppHRegAMD64(i->Ain.XIndir.dstGA);
1097 vex_printf(",");
1098 ppAMD64AMode(i->Ain.XIndir.amRIP);
1099 vex_printf("; movabsq $disp_indir,%%r11; jmp *%%r11 }");
1100 return;
1101 case Ain_XAssisted:
1102 vex_printf("(xAssisted) ");
1103 vex_printf("if (%%rflags.%s) { ",
1104 showAMD64CondCode(i->Ain.XAssisted.cond));
1105 vex_printf("movq ");
1106 ppHRegAMD64(i->Ain.XAssisted.dstGA);
1107 vex_printf(",");
1108 ppAMD64AMode(i->Ain.XAssisted.amRIP);
1109 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%rbp",
1110 (Int)i->Ain.XAssisted.jk);
1111 vex_printf("; movabsq $disp_assisted,%%r11; jmp *%%r11 }");
1112 return;
1113
1114 case Ain_CMov64:
1115 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond));
1116 ppAMD64RM(i->Ain.CMov64.src);
1117 vex_printf(",");
1118 ppHRegAMD64(i->Ain.CMov64.dst);
1119 return;
1120 case Ain_MovxLQ:
1121 vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
1122 ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
1123 vex_printf(",");
1124 ppHRegAMD64(i->Ain.MovxLQ.dst);
1125 return;
1126 case Ain_LoadEX:
1127 if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) {
1128 vex_printf("movl ");
1129 ppAMD64AMode(i->Ain.LoadEX.src);
1130 vex_printf(",");
1131 ppHRegAMD64_lo32(i->Ain.LoadEX.dst);
1132 } else {
1133 vex_printf("mov%c%cq ",
1134 i->Ain.LoadEX.syned ? 's' : 'z',
1135 i->Ain.LoadEX.szSmall==1
1136 ? 'b'
1137 : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l'));
1138 ppAMD64AMode(i->Ain.LoadEX.src);
1139 vex_printf(",");
1140 ppHRegAMD64(i->Ain.LoadEX.dst);
1141 }
1142 return;
1143 case Ain_Store:
1144 vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b'
1145 : (i->Ain.Store.sz==2 ? 'w' : 'l'));
1146 ppHRegAMD64(i->Ain.Store.src);
1147 vex_printf(",");
1148 ppAMD64AMode(i->Ain.Store.dst);
1149 return;
1150 case Ain_Set64:
1151 vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond));
1152 ppHRegAMD64(i->Ain.Set64.dst);
1153 return;
1154 case Ain_Bsfr64:
1155 vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r');
1156 ppHRegAMD64(i->Ain.Bsfr64.src);
1157 vex_printf(",");
1158 ppHRegAMD64(i->Ain.Bsfr64.dst);
1159 return;
1160 case Ain_MFence:
1161 vex_printf("mfence" );
1162 return;
1163 case Ain_ACAS:
1164 vex_printf("lock cmpxchg%c ",
1165 i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w'
1166 : i->Ain.ACAS.sz==4 ? 'l' : 'q' );
1167 vex_printf("{%%rax->%%rbx},");
1168 ppAMD64AMode(i->Ain.ACAS.addr);
1169 return;
1170 case Ain_DACAS:
1171 vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
1172 (Int)(2 * i->Ain.DACAS.sz));
1173 ppAMD64AMode(i->Ain.DACAS.addr);
1174 return;
1175 case Ain_A87Free:
1176 vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
1177 break;
1178 case Ain_A87PushPop:
1179 vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ",
1180 i->Ain.A87PushPop.szB == 4 ? 's' : 'l');
1181 ppAMD64AMode(i->Ain.A87PushPop.addr);
1182 break;
1183 case Ain_A87FpOp:
1184 vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op));
1185 break;
1186 case Ain_A87LdCW:
1187 vex_printf("fldcw ");
1188 ppAMD64AMode(i->Ain.A87LdCW.addr);
1189 break;
1190 case Ain_A87StSW:
1191 vex_printf("fstsw ");
1192 ppAMD64AMode(i->Ain.A87StSW.addr);
1193 break;
1194 case Ain_LdMXCSR:
1195 vex_printf("ldmxcsr ");
1196 ppAMD64AMode(i->Ain.LdMXCSR.addr);
1197 break;
1198 case Ain_SseUComIS:
1199 vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d");
1200 ppHRegAMD64(i->Ain.SseUComIS.srcL);
1201 vex_printf(",");
1202 ppHRegAMD64(i->Ain.SseUComIS.srcR);
1203 vex_printf(" ; pushfq ; popq ");
1204 ppHRegAMD64(i->Ain.SseUComIS.dst);
1205 break;
1206 case Ain_SseSI2SF:
1207 vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d");
1208 (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1209 (i->Ain.SseSI2SF.src);
1210 vex_printf(",");
1211 ppHRegAMD64(i->Ain.SseSI2SF.dst);
1212 break;
1213 case Ain_SseSF2SI:
1214 vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d");
1215 ppHRegAMD64(i->Ain.SseSF2SI.src);
1216 vex_printf(",");
1217 (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1218 (i->Ain.SseSF2SI.dst);
1219 break;
1220 case Ain_SseSDSS:
1221 vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd ");
1222 ppHRegAMD64(i->Ain.SseSDSS.src);
1223 vex_printf(",");
1224 ppHRegAMD64(i->Ain.SseSDSS.dst);
1225 break;
1226 case Ain_SseLdSt:
1227 switch (i->Ain.SseLdSt.sz) {
1228 case 4: vex_printf("movss "); break;
1229 case 8: vex_printf("movsd "); break;
1230 case 16: vex_printf("movups "); break;
1231 default: vassert(0);
1232 }
1233 if (i->Ain.SseLdSt.isLoad) {
1234 ppAMD64AMode(i->Ain.SseLdSt.addr);
1235 vex_printf(",");
1236 ppHRegAMD64(i->Ain.SseLdSt.reg);
1237 } else {
1238 ppHRegAMD64(i->Ain.SseLdSt.reg);
1239 vex_printf(",");
1240 ppAMD64AMode(i->Ain.SseLdSt.addr);
1241 }
1242 return;
1243 case Ain_SseLdzLO:
1244 vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d");
1245 ppAMD64AMode(i->Ain.SseLdzLO.addr);
1246 vex_printf(",");
1247 ppHRegAMD64(i->Ain.SseLdzLO.reg);
1248 return;
1249 case Ain_Sse32Fx4:
1250 vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op));
1251 ppHRegAMD64(i->Ain.Sse32Fx4.src);
1252 vex_printf(",");
1253 ppHRegAMD64(i->Ain.Sse32Fx4.dst);
1254 return;
1255 case Ain_Sse32FLo:
1256 vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op));
1257 ppHRegAMD64(i->Ain.Sse32FLo.src);
1258 vex_printf(",");
1259 ppHRegAMD64(i->Ain.Sse32FLo.dst);
1260 return;
1261 case Ain_Sse64Fx2:
1262 vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op));
1263 ppHRegAMD64(i->Ain.Sse64Fx2.src);
1264 vex_printf(",");
1265 ppHRegAMD64(i->Ain.Sse64Fx2.dst);
1266 return;
1267 case Ain_Sse64FLo:
1268 vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op));
1269 ppHRegAMD64(i->Ain.Sse64FLo.src);
1270 vex_printf(",");
1271 ppHRegAMD64(i->Ain.Sse64FLo.dst);
1272 return;
1273 case Ain_SseReRg:
1274 vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1275 ppHRegAMD64(i->Ain.SseReRg.src);
1276 vex_printf(",");
1277 ppHRegAMD64(i->Ain.SseReRg.dst);
1278 return;
1279 case Ain_SseCMov:
1280 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond));
1281 ppHRegAMD64(i->Ain.SseCMov.src);
1282 vex_printf(",");
1283 ppHRegAMD64(i->Ain.SseCMov.dst);
1284 return;
1285 case Ain_SseShuf:
1286 vex_printf("pshufd $0x%x,", i->Ain.SseShuf.order);
1287 ppHRegAMD64(i->Ain.SseShuf.src);
1288 vex_printf(",");
1289 ppHRegAMD64(i->Ain.SseShuf.dst);
1290 return;
1291 //uu case Ain_AvxLdSt:
1292 //uu vex_printf("vmovups ");
1293 //uu if (i->Ain.AvxLdSt.isLoad) {
1294 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1295 //uu vex_printf(",");
1296 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1297 //uu } else {
1298 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1299 //uu vex_printf(",");
1300 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1301 //uu }
1302 //uu return;
1303 //uu case Ain_AvxReRg:
1304 //uu vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1305 //uu ppHRegAMD64(i->Ain.AvxReRg.src);
1306 //uu vex_printf(",");
1307 //uu ppHRegAMD64(i->Ain.AvxReRg.dst);
1308 //uu return;
1309 case Ain_EvCheck:
1310 vex_printf("(evCheck) decl ");
1311 ppAMD64AMode(i->Ain.EvCheck.amCounter);
1312 vex_printf("; jns nofail; jmp *");
1313 ppAMD64AMode(i->Ain.EvCheck.amFailAddr);
1314 vex_printf("; nofail:");
1315 return;
1316 case Ain_ProfInc:
1317 vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)");
1318 return;
1319 default:
1320 vpanic("ppAMD64Instr");
1321 }
1322 }
1323
1324 /* --------- Helpers for register allocation. --------- */
1325
getRegUsage_AMD64Instr(HRegUsage * u,AMD64Instr * i,Bool mode64)1326 void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 )
1327 {
1328 Bool unary;
1329 vassert(mode64 == True);
1330 initHRegUsage(u);
1331 switch (i->tag) {
1332 case Ain_Imm64:
1333 addHRegUse(u, HRmWrite, i->Ain.Imm64.dst);
1334 return;
1335 case Ain_Alu64R:
1336 addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
1337 if (i->Ain.Alu64R.op == Aalu_MOV) {
1338 addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
1339 return;
1340 }
1341 if (i->Ain.Alu64R.op == Aalu_CMP) {
1342 addHRegUse(u, HRmRead, i->Ain.Alu64R.dst);
1343 return;
1344 }
1345 addHRegUse(u, HRmModify, i->Ain.Alu64R.dst);
1346 return;
1347 case Ain_Alu64M:
1348 addRegUsage_AMD64RI(u, i->Ain.Alu64M.src);
1349 addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst);
1350 return;
1351 case Ain_Sh64:
1352 addHRegUse(u, HRmModify, i->Ain.Sh64.dst);
1353 if (i->Ain.Sh64.src == 0)
1354 addHRegUse(u, HRmRead, hregAMD64_RCX());
1355 return;
1356 case Ain_Test64:
1357 addHRegUse(u, HRmRead, i->Ain.Test64.dst);
1358 return;
1359 case Ain_Unary64:
1360 addHRegUse(u, HRmModify, i->Ain.Unary64.dst);
1361 return;
1362 case Ain_Lea64:
1363 addRegUsage_AMD64AMode(u, i->Ain.Lea64.am);
1364 addHRegUse(u, HRmWrite, i->Ain.Lea64.dst);
1365 return;
1366 case Ain_Alu32R:
1367 vassert(i->Ain.Alu32R.op != Aalu_MOV);
1368 addRegUsage_AMD64RMI(u, i->Ain.Alu32R.src);
1369 if (i->Ain.Alu32R.op == Aalu_CMP) {
1370 addHRegUse(u, HRmRead, i->Ain.Alu32R.dst);
1371 return;
1372 }
1373 addHRegUse(u, HRmModify, i->Ain.Alu32R.dst);
1374 return;
1375 case Ain_MulL:
1376 addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead);
1377 addHRegUse(u, HRmModify, hregAMD64_RAX());
1378 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1379 return;
1380 case Ain_Div:
1381 addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead);
1382 addHRegUse(u, HRmModify, hregAMD64_RAX());
1383 addHRegUse(u, HRmModify, hregAMD64_RDX());
1384 return;
1385 case Ain_Push:
1386 addRegUsage_AMD64RMI(u, i->Ain.Push.src);
1387 addHRegUse(u, HRmModify, hregAMD64_RSP());
1388 return;
1389 case Ain_Call:
1390 /* This is a bit subtle. */
1391 /* First off, claim it trashes all the caller-saved regs
1392 which fall within the register allocator's jurisdiction.
1393 These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11
1394 and all the xmm registers.
1395 */
1396 addHRegUse(u, HRmWrite, hregAMD64_RAX());
1397 addHRegUse(u, HRmWrite, hregAMD64_RCX());
1398 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1399 addHRegUse(u, HRmWrite, hregAMD64_RSI());
1400 addHRegUse(u, HRmWrite, hregAMD64_RDI());
1401 addHRegUse(u, HRmWrite, hregAMD64_R8());
1402 addHRegUse(u, HRmWrite, hregAMD64_R9());
1403 addHRegUse(u, HRmWrite, hregAMD64_R10());
1404 addHRegUse(u, HRmWrite, hregAMD64_R11());
1405 addHRegUse(u, HRmWrite, hregAMD64_XMM0());
1406 addHRegUse(u, HRmWrite, hregAMD64_XMM1());
1407 addHRegUse(u, HRmWrite, hregAMD64_XMM3());
1408 addHRegUse(u, HRmWrite, hregAMD64_XMM4());
1409 addHRegUse(u, HRmWrite, hregAMD64_XMM5());
1410 addHRegUse(u, HRmWrite, hregAMD64_XMM6());
1411 addHRegUse(u, HRmWrite, hregAMD64_XMM7());
1412 addHRegUse(u, HRmWrite, hregAMD64_XMM8());
1413 addHRegUse(u, HRmWrite, hregAMD64_XMM9());
1414 addHRegUse(u, HRmWrite, hregAMD64_XMM10());
1415 addHRegUse(u, HRmWrite, hregAMD64_XMM11());
1416 addHRegUse(u, HRmWrite, hregAMD64_XMM12());
1417
1418 /* Now we have to state any parameter-carrying registers
1419 which might be read. This depends on the regparmness. */
1420 switch (i->Ain.Call.regparms) {
1421 case 6: addHRegUse(u, HRmRead, hregAMD64_R9()); /*fallthru*/
1422 case 5: addHRegUse(u, HRmRead, hregAMD64_R8()); /*fallthru*/
1423 case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/
1424 case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/
1425 case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/
1426 case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break;
1427 case 0: break;
1428 default: vpanic("getRegUsage_AMD64Instr:Call:regparms");
1429 }
1430 /* Finally, there is the issue that the insn trashes a
1431 register because the literal target address has to be
1432 loaded into a register. Fortunately, r11 is stated in the
1433 ABI as a scratch register, and so seems a suitable victim. */
1434 addHRegUse(u, HRmWrite, hregAMD64_R11());
1435 /* Upshot of this is that the assembler really must use r11,
1436 and no other, as a destination temporary. */
1437 return;
1438 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1439 conditionally exit the block. Hence we only need to list (1)
1440 the registers that they read, and (2) the registers that they
1441 write in the case where the block is not exited. (2) is
1442 empty, hence only (1) is relevant here. */
1443 case Ain_XDirect:
1444 /* Don't bother to mention the write to %r11, since it is not
1445 available to the allocator. */
1446 addRegUsage_AMD64AMode(u, i->Ain.XDirect.amRIP);
1447 return;
1448 case Ain_XIndir:
1449 /* Ditto re %r11 */
1450 addHRegUse(u, HRmRead, i->Ain.XIndir.dstGA);
1451 addRegUsage_AMD64AMode(u, i->Ain.XIndir.amRIP);
1452 return;
1453 case Ain_XAssisted:
1454 /* Ditto re %r11 and %rbp (the baseblock ptr) */
1455 addHRegUse(u, HRmRead, i->Ain.XAssisted.dstGA);
1456 addRegUsage_AMD64AMode(u, i->Ain.XAssisted.amRIP);
1457 return;
1458 case Ain_CMov64:
1459 addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead);
1460 addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
1461 return;
1462 case Ain_MovxLQ:
1463 addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
1464 addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
1465 return;
1466 case Ain_LoadEX:
1467 addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src);
1468 addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst);
1469 return;
1470 case Ain_Store:
1471 addHRegUse(u, HRmRead, i->Ain.Store.src);
1472 addRegUsage_AMD64AMode(u, i->Ain.Store.dst);
1473 return;
1474 case Ain_Set64:
1475 addHRegUse(u, HRmWrite, i->Ain.Set64.dst);
1476 return;
1477 case Ain_Bsfr64:
1478 addHRegUse(u, HRmRead, i->Ain.Bsfr64.src);
1479 addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst);
1480 return;
1481 case Ain_MFence:
1482 return;
1483 case Ain_ACAS:
1484 addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr);
1485 addHRegUse(u, HRmRead, hregAMD64_RBX());
1486 addHRegUse(u, HRmModify, hregAMD64_RAX());
1487 return;
1488 case Ain_DACAS:
1489 addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr);
1490 addHRegUse(u, HRmRead, hregAMD64_RCX());
1491 addHRegUse(u, HRmRead, hregAMD64_RBX());
1492 addHRegUse(u, HRmModify, hregAMD64_RDX());
1493 addHRegUse(u, HRmModify, hregAMD64_RAX());
1494 return;
1495 case Ain_A87Free:
1496 return;
1497 case Ain_A87PushPop:
1498 addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr);
1499 return;
1500 case Ain_A87FpOp:
1501 return;
1502 case Ain_A87LdCW:
1503 addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr);
1504 return;
1505 case Ain_A87StSW:
1506 addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr);
1507 return;
1508 case Ain_LdMXCSR:
1509 addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
1510 return;
1511 case Ain_SseUComIS:
1512 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL);
1513 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR);
1514 addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst);
1515 return;
1516 case Ain_SseSI2SF:
1517 addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src);
1518 addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst);
1519 return;
1520 case Ain_SseSF2SI:
1521 addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src);
1522 addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst);
1523 return;
1524 case Ain_SseSDSS:
1525 addHRegUse(u, HRmRead, i->Ain.SseSDSS.src);
1526 addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst);
1527 return;
1528 case Ain_SseLdSt:
1529 addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr);
1530 addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead,
1531 i->Ain.SseLdSt.reg);
1532 return;
1533 case Ain_SseLdzLO:
1534 addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr);
1535 addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg);
1536 return;
1537 case Ain_Sse32Fx4:
1538 vassert(i->Ain.Sse32Fx4.op != Asse_MOV);
1539 unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF
1540 || i->Ain.Sse32Fx4.op == Asse_RSQRTF
1541 || i->Ain.Sse32Fx4.op == Asse_SQRTF );
1542 addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src);
1543 addHRegUse(u, unary ? HRmWrite : HRmModify,
1544 i->Ain.Sse32Fx4.dst);
1545 return;
1546 case Ain_Sse32FLo:
1547 vassert(i->Ain.Sse32FLo.op != Asse_MOV);
1548 unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF
1549 || i->Ain.Sse32FLo.op == Asse_RSQRTF
1550 || i->Ain.Sse32FLo.op == Asse_SQRTF );
1551 addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src);
1552 addHRegUse(u, unary ? HRmWrite : HRmModify,
1553 i->Ain.Sse32FLo.dst);
1554 return;
1555 case Ain_Sse64Fx2:
1556 vassert(i->Ain.Sse64Fx2.op != Asse_MOV);
1557 unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF
1558 || i->Ain.Sse64Fx2.op == Asse_RSQRTF
1559 || i->Ain.Sse64Fx2.op == Asse_SQRTF );
1560 addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src);
1561 addHRegUse(u, unary ? HRmWrite : HRmModify,
1562 i->Ain.Sse64Fx2.dst);
1563 return;
1564 case Ain_Sse64FLo:
1565 vassert(i->Ain.Sse64FLo.op != Asse_MOV);
1566 unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF
1567 || i->Ain.Sse64FLo.op == Asse_RSQRTF
1568 || i->Ain.Sse64FLo.op == Asse_SQRTF );
1569 addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src);
1570 addHRegUse(u, unary ? HRmWrite : HRmModify,
1571 i->Ain.Sse64FLo.dst);
1572 return;
1573 case Ain_SseReRg:
1574 if ( (i->Ain.SseReRg.op == Asse_XOR
1575 || i->Ain.SseReRg.op == Asse_CMPEQ32)
1576 && i->Ain.SseReRg.src == i->Ain.SseReRg.dst) {
1577 /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd
1578 r,r' as a write of a value to r, and independent of any
1579 previous value in r */
1580 /* (as opposed to a rite of passage :-) */
1581 addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst);
1582 } else {
1583 addHRegUse(u, HRmRead, i->Ain.SseReRg.src);
1584 addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV
1585 ? HRmWrite : HRmModify,
1586 i->Ain.SseReRg.dst);
1587 }
1588 return;
1589 case Ain_SseCMov:
1590 addHRegUse(u, HRmRead, i->Ain.SseCMov.src);
1591 addHRegUse(u, HRmModify, i->Ain.SseCMov.dst);
1592 return;
1593 case Ain_SseShuf:
1594 addHRegUse(u, HRmRead, i->Ain.SseShuf.src);
1595 addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
1596 return;
1597 //uu case Ain_AvxLdSt:
1598 //uu addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr);
1599 //uu addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead,
1600 //uu i->Ain.AvxLdSt.reg);
1601 //uu return;
1602 //uu case Ain_AvxReRg:
1603 //uu if ( (i->Ain.AvxReRg.op == Asse_XOR
1604 //uu || i->Ain.AvxReRg.op == Asse_CMPEQ32)
1605 //uu && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) {
1606 //uu /* See comments on the case for Ain_SseReRg. */
1607 //uu addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst);
1608 //uu } else {
1609 //uu addHRegUse(u, HRmRead, i->Ain.AvxReRg.src);
1610 //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV
1611 //uu ? HRmWrite : HRmModify,
1612 //uu i->Ain.AvxReRg.dst);
1613 //uu }
1614 //uu return;
1615 case Ain_EvCheck:
1616 /* We expect both amodes only to mention %rbp, so this is in
1617 fact pointless, since %rbp isn't allocatable, but anyway.. */
1618 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amCounter);
1619 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amFailAddr);
1620 return;
1621 case Ain_ProfInc:
1622 addHRegUse(u, HRmWrite, hregAMD64_R11());
1623 return;
1624 default:
1625 ppAMD64Instr(i, mode64);
1626 vpanic("getRegUsage_AMD64Instr");
1627 }
1628 }
1629
1630 /* local helper */
mapReg(HRegRemap * m,HReg * r)1631 static inline void mapReg(HRegRemap* m, HReg* r)
1632 {
1633 *r = lookupHRegRemap(m, *r);
1634 }
1635
mapRegs_AMD64Instr(HRegRemap * m,AMD64Instr * i,Bool mode64)1636 void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
1637 {
1638 vassert(mode64 == True);
1639 switch (i->tag) {
1640 case Ain_Imm64:
1641 mapReg(m, &i->Ain.Imm64.dst);
1642 return;
1643 case Ain_Alu64R:
1644 mapRegs_AMD64RMI(m, i->Ain.Alu64R.src);
1645 mapReg(m, &i->Ain.Alu64R.dst);
1646 return;
1647 case Ain_Alu64M:
1648 mapRegs_AMD64RI(m, i->Ain.Alu64M.src);
1649 mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst);
1650 return;
1651 case Ain_Sh64:
1652 mapReg(m, &i->Ain.Sh64.dst);
1653 return;
1654 case Ain_Test64:
1655 mapReg(m, &i->Ain.Test64.dst);
1656 return;
1657 case Ain_Unary64:
1658 mapReg(m, &i->Ain.Unary64.dst);
1659 return;
1660 case Ain_Lea64:
1661 mapRegs_AMD64AMode(m, i->Ain.Lea64.am);
1662 mapReg(m, &i->Ain.Lea64.dst);
1663 return;
1664 case Ain_Alu32R:
1665 mapRegs_AMD64RMI(m, i->Ain.Alu32R.src);
1666 mapReg(m, &i->Ain.Alu32R.dst);
1667 return;
1668 case Ain_MulL:
1669 mapRegs_AMD64RM(m, i->Ain.MulL.src);
1670 return;
1671 case Ain_Div:
1672 mapRegs_AMD64RM(m, i->Ain.Div.src);
1673 return;
1674 case Ain_Push:
1675 mapRegs_AMD64RMI(m, i->Ain.Push.src);
1676 return;
1677 case Ain_Call:
1678 return;
1679 case Ain_XDirect:
1680 mapRegs_AMD64AMode(m, i->Ain.XDirect.amRIP);
1681 return;
1682 case Ain_XIndir:
1683 mapReg(m, &i->Ain.XIndir.dstGA);
1684 mapRegs_AMD64AMode(m, i->Ain.XIndir.amRIP);
1685 return;
1686 case Ain_XAssisted:
1687 mapReg(m, &i->Ain.XAssisted.dstGA);
1688 mapRegs_AMD64AMode(m, i->Ain.XAssisted.amRIP);
1689 return;
1690 case Ain_CMov64:
1691 mapRegs_AMD64RM(m, i->Ain.CMov64.src);
1692 mapReg(m, &i->Ain.CMov64.dst);
1693 return;
1694 case Ain_MovxLQ:
1695 mapReg(m, &i->Ain.MovxLQ.src);
1696 mapReg(m, &i->Ain.MovxLQ.dst);
1697 return;
1698 case Ain_LoadEX:
1699 mapRegs_AMD64AMode(m, i->Ain.LoadEX.src);
1700 mapReg(m, &i->Ain.LoadEX.dst);
1701 return;
1702 case Ain_Store:
1703 mapReg(m, &i->Ain.Store.src);
1704 mapRegs_AMD64AMode(m, i->Ain.Store.dst);
1705 return;
1706 case Ain_Set64:
1707 mapReg(m, &i->Ain.Set64.dst);
1708 return;
1709 case Ain_Bsfr64:
1710 mapReg(m, &i->Ain.Bsfr64.src);
1711 mapReg(m, &i->Ain.Bsfr64.dst);
1712 return;
1713 case Ain_MFence:
1714 return;
1715 case Ain_ACAS:
1716 mapRegs_AMD64AMode(m, i->Ain.ACAS.addr);
1717 return;
1718 case Ain_DACAS:
1719 mapRegs_AMD64AMode(m, i->Ain.DACAS.addr);
1720 return;
1721 case Ain_A87Free:
1722 return;
1723 case Ain_A87PushPop:
1724 mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr);
1725 return;
1726 case Ain_A87FpOp:
1727 return;
1728 case Ain_A87LdCW:
1729 mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr);
1730 return;
1731 case Ain_A87StSW:
1732 mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr);
1733 return;
1734 case Ain_LdMXCSR:
1735 mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
1736 return;
1737 case Ain_SseUComIS:
1738 mapReg(m, &i->Ain.SseUComIS.srcL);
1739 mapReg(m, &i->Ain.SseUComIS.srcR);
1740 mapReg(m, &i->Ain.SseUComIS.dst);
1741 return;
1742 case Ain_SseSI2SF:
1743 mapReg(m, &i->Ain.SseSI2SF.src);
1744 mapReg(m, &i->Ain.SseSI2SF.dst);
1745 return;
1746 case Ain_SseSF2SI:
1747 mapReg(m, &i->Ain.SseSF2SI.src);
1748 mapReg(m, &i->Ain.SseSF2SI.dst);
1749 return;
1750 case Ain_SseSDSS:
1751 mapReg(m, &i->Ain.SseSDSS.src);
1752 mapReg(m, &i->Ain.SseSDSS.dst);
1753 return;
1754 case Ain_SseLdSt:
1755 mapReg(m, &i->Ain.SseLdSt.reg);
1756 mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr);
1757 break;
1758 case Ain_SseLdzLO:
1759 mapReg(m, &i->Ain.SseLdzLO.reg);
1760 mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr);
1761 break;
1762 case Ain_Sse32Fx4:
1763 mapReg(m, &i->Ain.Sse32Fx4.src);
1764 mapReg(m, &i->Ain.Sse32Fx4.dst);
1765 return;
1766 case Ain_Sse32FLo:
1767 mapReg(m, &i->Ain.Sse32FLo.src);
1768 mapReg(m, &i->Ain.Sse32FLo.dst);
1769 return;
1770 case Ain_Sse64Fx2:
1771 mapReg(m, &i->Ain.Sse64Fx2.src);
1772 mapReg(m, &i->Ain.Sse64Fx2.dst);
1773 return;
1774 case Ain_Sse64FLo:
1775 mapReg(m, &i->Ain.Sse64FLo.src);
1776 mapReg(m, &i->Ain.Sse64FLo.dst);
1777 return;
1778 case Ain_SseReRg:
1779 mapReg(m, &i->Ain.SseReRg.src);
1780 mapReg(m, &i->Ain.SseReRg.dst);
1781 return;
1782 case Ain_SseCMov:
1783 mapReg(m, &i->Ain.SseCMov.src);
1784 mapReg(m, &i->Ain.SseCMov.dst);
1785 return;
1786 case Ain_SseShuf:
1787 mapReg(m, &i->Ain.SseShuf.src);
1788 mapReg(m, &i->Ain.SseShuf.dst);
1789 return;
1790 //uu case Ain_AvxLdSt:
1791 //uu mapReg(m, &i->Ain.AvxLdSt.reg);
1792 //uu mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr);
1793 //uu break;
1794 //uu case Ain_AvxReRg:
1795 //uu mapReg(m, &i->Ain.AvxReRg.src);
1796 //uu mapReg(m, &i->Ain.AvxReRg.dst);
1797 //uu return;
1798 case Ain_EvCheck:
1799 /* We expect both amodes only to mention %rbp, so this is in
1800 fact pointless, since %rbp isn't allocatable, but anyway.. */
1801 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amCounter);
1802 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amFailAddr);
1803 return;
1804 case Ain_ProfInc:
1805 /* hardwires r11 -- nothing to modify. */
1806 return;
1807 default:
1808 ppAMD64Instr(i, mode64);
1809 vpanic("mapRegs_AMD64Instr");
1810 }
1811 }
1812
1813 /* Figure out if i represents a reg-reg move, and if so assign the
1814 source and destination to *src and *dst. If in doubt say No. Used
1815 by the register allocator to do move coalescing.
1816 */
isMove_AMD64Instr(AMD64Instr * i,HReg * src,HReg * dst)1817 Bool isMove_AMD64Instr ( AMD64Instr* i, HReg* src, HReg* dst )
1818 {
1819 switch (i->tag) {
1820 case Ain_Alu64R:
1821 /* Moves between integer regs */
1822 if (i->Ain.Alu64R.op != Aalu_MOV)
1823 return False;
1824 if (i->Ain.Alu64R.src->tag != Armi_Reg)
1825 return False;
1826 *src = i->Ain.Alu64R.src->Armi.Reg.reg;
1827 *dst = i->Ain.Alu64R.dst;
1828 return True;
1829 case Ain_SseReRg:
1830 /* Moves between SSE regs */
1831 if (i->Ain.SseReRg.op != Asse_MOV)
1832 return False;
1833 *src = i->Ain.SseReRg.src;
1834 *dst = i->Ain.SseReRg.dst;
1835 return True;
1836 //uu case Ain_AvxReRg:
1837 //uu /* Moves between AVX regs */
1838 //uu if (i->Ain.AvxReRg.op != Asse_MOV)
1839 //uu return False;
1840 //uu *src = i->Ain.AvxReRg.src;
1841 //uu *dst = i->Ain.AvxReRg.dst;
1842 //uu return True;
1843 default:
1844 return False;
1845 }
1846 /*NOTREACHED*/
1847 }
1848
1849
1850 /* Generate amd64 spill/reload instructions under the direction of the
1851 register allocator. Note it's critical these don't write the
1852 condition codes. */
1853
genSpill_AMD64(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)1854 void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1855 HReg rreg, Int offsetB, Bool mode64 )
1856 {
1857 AMD64AMode* am;
1858 vassert(offsetB >= 0);
1859 vassert(!hregIsVirtual(rreg));
1860 vassert(mode64 == True);
1861 *i1 = *i2 = NULL;
1862 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
1863 switch (hregClass(rreg)) {
1864 case HRcInt64:
1865 *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am );
1866 return;
1867 case HRcVec128:
1868 *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am );
1869 return;
1870 default:
1871 ppHRegClass(hregClass(rreg));
1872 vpanic("genSpill_AMD64: unimplemented regclass");
1873 }
1874 }
1875
genReload_AMD64(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)1876 void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1877 HReg rreg, Int offsetB, Bool mode64 )
1878 {
1879 AMD64AMode* am;
1880 vassert(offsetB >= 0);
1881 vassert(!hregIsVirtual(rreg));
1882 vassert(mode64 == True);
1883 *i1 = *i2 = NULL;
1884 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
1885 switch (hregClass(rreg)) {
1886 case HRcInt64:
1887 *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg );
1888 return;
1889 case HRcVec128:
1890 *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am );
1891 return;
1892 default:
1893 ppHRegClass(hregClass(rreg));
1894 vpanic("genReload_AMD64: unimplemented regclass");
1895 }
1896 }
1897
1898
1899 /* --------- The amd64 assembler (bleh.) --------- */
1900
1901 /* Produce the low three bits of an integer register number. */
iregBits210(HReg r)1902 static UChar iregBits210 ( HReg r )
1903 {
1904 UInt n;
1905 vassert(hregClass(r) == HRcInt64);
1906 vassert(!hregIsVirtual(r));
1907 n = hregNumber(r);
1908 vassert(n <= 15);
1909 return toUChar(n & 7);
1910 }
1911
1912 /* Produce bit 3 of an integer register number. */
iregBit3(HReg r)1913 static UChar iregBit3 ( HReg r )
1914 {
1915 UInt n;
1916 vassert(hregClass(r) == HRcInt64);
1917 vassert(!hregIsVirtual(r));
1918 n = hregNumber(r);
1919 vassert(n <= 15);
1920 return toUChar((n >> 3) & 1);
1921 }
1922
1923 /* Produce a complete 4-bit integer register number. */
iregBits3210(HReg r)1924 static UChar iregBits3210 ( HReg r )
1925 {
1926 UInt n;
1927 vassert(hregClass(r) == HRcInt64);
1928 vassert(!hregIsVirtual(r));
1929 n = hregNumber(r);
1930 vassert(n <= 15);
1931 return toUChar(n);
1932 }
1933
1934 /* Given an xmm (128bit V-class) register number, produce the
1935 equivalent numbered register in 64-bit I-class. This is a bit of
1936 fakery which facilitates using functions that work on integer
1937 register numbers to be used when assembling SSE instructions
1938 too. */
vreg2ireg(HReg r)1939 static UInt vreg2ireg ( HReg r )
1940 {
1941 UInt n;
1942 vassert(hregClass(r) == HRcVec128);
1943 vassert(!hregIsVirtual(r));
1944 n = hregNumber(r);
1945 vassert(n <= 15);
1946 return mkHReg(n, HRcInt64, False);
1947 }
1948
1949 //uu /* Ditto for ymm regs. */
1950 //uu static UInt dvreg2ireg ( HReg r )
1951 //uu {
1952 //uu UInt n;
1953 //uu vassert(hregClass(r) == HRcVec256);
1954 //uu vassert(!hregIsVirtual(r));
1955 //uu n = hregNumber(r);
1956 //uu vassert(n <= 15);
1957 //uu return mkHReg(n, HRcInt64, False);
1958 //uu }
1959
mkModRegRM(UChar mod,UChar reg,UChar regmem)1960 static UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem )
1961 {
1962 return toUChar( ((mod & 3) << 6)
1963 | ((reg & 7) << 3)
1964 | (regmem & 7) );
1965 }
1966
mkSIB(Int shift,Int regindex,Int regbase)1967 static UChar mkSIB ( Int shift, Int regindex, Int regbase )
1968 {
1969 return toUChar( ((shift & 3) << 6)
1970 | ((regindex & 7) << 3)
1971 | (regbase & 7) );
1972 }
1973
emit32(UChar * p,UInt w32)1974 static UChar* emit32 ( UChar* p, UInt w32 )
1975 {
1976 *p++ = toUChar((w32) & 0x000000FF);
1977 *p++ = toUChar((w32 >> 8) & 0x000000FF);
1978 *p++ = toUChar((w32 >> 16) & 0x000000FF);
1979 *p++ = toUChar((w32 >> 24) & 0x000000FF);
1980 return p;
1981 }
1982
emit64(UChar * p,ULong w64)1983 static UChar* emit64 ( UChar* p, ULong w64 )
1984 {
1985 p = emit32(p, toUInt(w64 & 0xFFFFFFFF));
1986 p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF));
1987 return p;
1988 }
1989
1990 /* Does a sign-extend of the lowest 8 bits give
1991 the original number? */
fits8bits(UInt w32)1992 static Bool fits8bits ( UInt w32 )
1993 {
1994 Int i32 = (Int)w32;
1995 return toBool(i32 == ((i32 << 24) >> 24));
1996 }
1997 /* Can the lower 32 bits be signedly widened to produce the whole
1998 64-bit value? In other words, are the top 33 bits either all 0 or
1999 all 1 ? */
fitsIn32Bits(ULong x)2000 static Bool fitsIn32Bits ( ULong x )
2001 {
2002 Long y0 = (Long)x;
2003 Long y1 = y0;
2004 y1 <<= 32;
2005 y1 >>=/*s*/ 32;
2006 return toBool(x == y1);
2007 }
2008
2009
2010 /* Forming mod-reg-rm bytes and scale-index-base bytes.
2011
2012 greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13
2013 = 00 greg ereg
2014
2015 greg, d8(ereg) | ereg is neither of: RSP R12
2016 = 01 greg ereg, d8
2017
2018 greg, d32(ereg) | ereg is neither of: RSP R12
2019 = 10 greg ereg, d32
2020
2021 greg, d8(ereg) | ereg is either: RSP R12
2022 = 01 greg 100, 0x24, d8
2023 (lowest bit of rex distinguishes R12/RSP)
2024
2025 greg, d32(ereg) | ereg is either: RSP R12
2026 = 10 greg 100, 0x24, d32
2027 (lowest bit of rex distinguishes R12/RSP)
2028
2029 -----------------------------------------------
2030
2031 greg, d8(base,index,scale)
2032 | index != RSP
2033 = 01 greg 100, scale index base, d8
2034
2035 greg, d32(base,index,scale)
2036 | index != RSP
2037 = 10 greg 100, scale index base, d32
2038 */
doAMode_M(UChar * p,HReg greg,AMD64AMode * am)2039 static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am )
2040 {
2041 if (am->tag == Aam_IR) {
2042 if (am->Aam.IR.imm == 0
2043 && am->Aam.IR.reg != hregAMD64_RSP()
2044 && am->Aam.IR.reg != hregAMD64_RBP()
2045 && am->Aam.IR.reg != hregAMD64_R12()
2046 && am->Aam.IR.reg != hregAMD64_R13()
2047 ) {
2048 *p++ = mkModRegRM(0, iregBits210(greg),
2049 iregBits210(am->Aam.IR.reg));
2050 return p;
2051 }
2052 if (fits8bits(am->Aam.IR.imm)
2053 && am->Aam.IR.reg != hregAMD64_RSP()
2054 && am->Aam.IR.reg != hregAMD64_R12()
2055 ) {
2056 *p++ = mkModRegRM(1, iregBits210(greg),
2057 iregBits210(am->Aam.IR.reg));
2058 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2059 return p;
2060 }
2061 if (am->Aam.IR.reg != hregAMD64_RSP()
2062 && am->Aam.IR.reg != hregAMD64_R12()
2063 ) {
2064 *p++ = mkModRegRM(2, iregBits210(greg),
2065 iregBits210(am->Aam.IR.reg));
2066 p = emit32(p, am->Aam.IR.imm);
2067 return p;
2068 }
2069 if ((am->Aam.IR.reg == hregAMD64_RSP()
2070 || am->Aam.IR.reg == hregAMD64_R12())
2071 && fits8bits(am->Aam.IR.imm)) {
2072 *p++ = mkModRegRM(1, iregBits210(greg), 4);
2073 *p++ = 0x24;
2074 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2075 return p;
2076 }
2077 if (/* (am->Aam.IR.reg == hregAMD64_RSP()
2078 || wait for test case for RSP case */
2079 am->Aam.IR.reg == hregAMD64_R12()) {
2080 *p++ = mkModRegRM(2, iregBits210(greg), 4);
2081 *p++ = 0x24;
2082 p = emit32(p, am->Aam.IR.imm);
2083 return p;
2084 }
2085 ppAMD64AMode(am);
2086 vpanic("doAMode_M: can't emit amode IR");
2087 /*NOTREACHED*/
2088 }
2089 if (am->tag == Aam_IRRS) {
2090 if (fits8bits(am->Aam.IRRS.imm)
2091 && am->Aam.IRRS.index != hregAMD64_RSP()) {
2092 *p++ = mkModRegRM(1, iregBits210(greg), 4);
2093 *p++ = mkSIB(am->Aam.IRRS.shift, am->Aam.IRRS.index,
2094 am->Aam.IRRS.base);
2095 *p++ = toUChar(am->Aam.IRRS.imm & 0xFF);
2096 return p;
2097 }
2098 if (am->Aam.IRRS.index != hregAMD64_RSP()) {
2099 *p++ = mkModRegRM(2, iregBits210(greg), 4);
2100 *p++ = mkSIB(am->Aam.IRRS.shift, am->Aam.IRRS.index,
2101 am->Aam.IRRS.base);
2102 p = emit32(p, am->Aam.IRRS.imm);
2103 return p;
2104 }
2105 ppAMD64AMode(am);
2106 vpanic("doAMode_M: can't emit amode IRRS");
2107 /*NOTREACHED*/
2108 }
2109 vpanic("doAMode_M: unknown amode");
2110 /*NOTREACHED*/
2111 }
2112
2113
2114 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
doAMode_R(UChar * p,HReg greg,HReg ereg)2115 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
2116 {
2117 *p++ = mkModRegRM(3, iregBits210(greg), iregBits210(ereg));
2118 return p;
2119 }
2120
2121
2122 /* Clear the W bit on a REX byte, thereby changing the operand size
2123 back to whatever that instruction's default operand size is. */
clearWBit(UChar rex)2124 static inline UChar clearWBit ( UChar rex )
2125 {
2126 return toUChar(rex & ~(1<<3));
2127 }
2128
2129
2130 /* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
rexAMode_M(HReg greg,AMD64AMode * am)2131 static UChar rexAMode_M ( HReg greg, AMD64AMode* am )
2132 {
2133 if (am->tag == Aam_IR) {
2134 UChar W = 1; /* we want 64-bit mode */
2135 UChar R = iregBit3(greg);
2136 UChar X = 0; /* not relevant */
2137 UChar B = iregBit3(am->Aam.IR.reg);
2138 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
2139 }
2140 if (am->tag == Aam_IRRS) {
2141 UChar W = 1; /* we want 64-bit mode */
2142 UChar R = iregBit3(greg);
2143 UChar X = iregBit3(am->Aam.IRRS.index);
2144 UChar B = iregBit3(am->Aam.IRRS.base);
2145 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
2146 }
2147 vassert(0);
2148 return 0; /*NOTREACHED*/
2149 }
2150
2151 /* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */
rexAMode_R(HReg greg,HReg ereg)2152 static UChar rexAMode_R ( HReg greg, HReg ereg )
2153 {
2154 UChar W = 1; /* we want 64-bit mode */
2155 UChar R = iregBit3(greg);
2156 UChar X = 0; /* not relevant */
2157 UChar B = iregBit3(ereg);
2158 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
2159 }
2160
2161
2162 //uu /* May 2012: this VEX prefix stuff is currently unused, but has
2163 //uu verified correct (I reckon). Certainly it has been known to
2164 //uu produce correct VEX prefixes during testing. */
2165 //uu
2166 //uu /* Assemble a 2 or 3 byte VEX prefix from parts. rexR, rexX, rexB and
2167 //uu notVvvvv need to be not-ed before packing. mmmmm, rexW, L and pp go
2168 //uu in verbatim. There's no range checking on the bits. */
2169 //uu static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB,
2170 //uu UInt mmmmm, UInt rexW, UInt notVvvv,
2171 //uu UInt L, UInt pp )
2172 //uu {
2173 //uu UChar byte0 = 0;
2174 //uu UChar byte1 = 0;
2175 //uu UChar byte2 = 0;
2176 //uu if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) {
2177 //uu /* 2 byte encoding is possible. */
2178 //uu byte0 = 0xC5;
2179 //uu byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3)
2180 //uu | (L << 2) | pp;
2181 //uu } else {
2182 //uu /* 3 byte encoding is needed. */
2183 //uu byte0 = 0xC4;
2184 //uu byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6)
2185 //uu | ((rexB ^ 1) << 5) | mmmmm;
2186 //uu byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp;
2187 //uu }
2188 //uu return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0);
2189 //uu }
2190 //uu
2191 //uu /* Make up a VEX prefix for a (greg,amode) pair. First byte in bits
2192 //uu 7:0 of result, second in 15:8, third (for a 3 byte prefix) in
2193 //uu 23:16. Has m-mmmm set to indicate a prefix of 0F, pp set to
2194 //uu indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and
2195 //uu vvvv=1111 (unused 3rd reg). */
2196 //uu static UInt vexAMode_M ( HReg greg, AMD64AMode* am )
2197 //uu {
2198 //uu UChar L = 1; /* size = 256 */
2199 //uu UChar pp = 0; /* no SIMD prefix */
2200 //uu UChar mmmmm = 1; /* 0F */
2201 //uu UChar notVvvv = 0; /* unused */
2202 //uu UChar rexW = 0;
2203 //uu UChar rexR = 0;
2204 //uu UChar rexX = 0;
2205 //uu UChar rexB = 0;
2206 //uu /* Same logic as in rexAMode_M. */
2207 //uu if (am->tag == Aam_IR) {
2208 //uu rexR = iregBit3(greg);
2209 //uu rexX = 0; /* not relevant */
2210 //uu rexB = iregBit3(am->Aam.IR.reg);
2211 //uu }
2212 //uu else if (am->tag == Aam_IRRS) {
2213 //uu rexR = iregBit3(greg);
2214 //uu rexX = iregBit3(am->Aam.IRRS.index);
2215 //uu rexB = iregBit3(am->Aam.IRRS.base);
2216 //uu } else {
2217 //uu vassert(0);
2218 //uu }
2219 //uu return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp );
2220 //uu }
2221 //uu
2222 //uu static UChar* emitVexPrefix ( UChar* p, UInt vex )
2223 //uu {
2224 //uu switch (vex & 0xFF) {
2225 //uu case 0xC5:
2226 //uu *p++ = 0xC5;
2227 //uu *p++ = (vex >> 8) & 0xFF;
2228 //uu vassert(0 == (vex >> 16));
2229 //uu break;
2230 //uu case 0xC4:
2231 //uu *p++ = 0xC4;
2232 //uu *p++ = (vex >> 8) & 0xFF;
2233 //uu *p++ = (vex >> 16) & 0xFF;
2234 //uu vassert(0 == (vex >> 24));
2235 //uu break;
2236 //uu default:
2237 //uu vassert(0);
2238 //uu }
2239 //uu return p;
2240 //uu }
2241
2242
2243 /* Emit ffree %st(N) */
do_ffree_st(UChar * p,Int n)2244 static UChar* do_ffree_st ( UChar* p, Int n )
2245 {
2246 vassert(n >= 0 && n <= 7);
2247 *p++ = 0xDD;
2248 *p++ = toUChar(0xC0 + n);
2249 return p;
2250 }
2251
2252 /* Emit an instruction into buf and return the number of bytes used.
2253 Note that buf is not the insn's final place, and therefore it is
2254 imperative to emit position-independent code. If the emitted
2255 instruction was a profiler inc, set *is_profInc to True, else
2256 leave it unchanged. */
2257
emit_AMD64Instr(Bool * is_profInc,UChar * buf,Int nbuf,AMD64Instr * i,Bool mode64,void * disp_cp_chain_me_to_slowEP,void * disp_cp_chain_me_to_fastEP,void * disp_cp_xindir,void * disp_cp_xassisted)2258 Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
2259 UChar* buf, Int nbuf, AMD64Instr* i,
2260 Bool mode64,
2261 void* disp_cp_chain_me_to_slowEP,
2262 void* disp_cp_chain_me_to_fastEP,
2263 void* disp_cp_xindir,
2264 void* disp_cp_xassisted )
2265 {
2266 UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
2267 UInt xtra;
2268 UInt reg;
2269 UChar rex;
2270 UChar* p = &buf[0];
2271 UChar* ptmp;
2272 Int j;
2273 vassert(nbuf >= 32);
2274 vassert(mode64 == True);
2275
2276 /* Wrap an integer as a int register, for use assembling
2277 GrpN insns, in which the greg field is used as a sub-opcode
2278 and does not really contain a register. */
2279 # define fake(_n) mkHReg((_n), HRcInt64, False)
2280
2281 /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
2282
2283 switch (i->tag) {
2284
2285 case Ain_Imm64:
2286 if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) {
2287 /* Use the short form (load into 32 bit reg, + default
2288 widening rule) for constants under 1 million. We could
2289 use this form for the range 0 to 0x7FFFFFFF inclusive, but
2290 limit it to a smaller range for verifiability purposes. */
2291 if (1 & iregBit3(i->Ain.Imm64.dst))
2292 *p++ = 0x41;
2293 *p++ = 0xB8 + iregBits210(i->Ain.Imm64.dst);
2294 p = emit32(p, (UInt)i->Ain.Imm64.imm64);
2295 } else {
2296 *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Imm64.dst)));
2297 *p++ = toUChar(0xB8 + iregBits210(i->Ain.Imm64.dst));
2298 p = emit64(p, i->Ain.Imm64.imm64);
2299 }
2300 goto done;
2301
2302 case Ain_Alu64R:
2303 /* Deal specially with MOV */
2304 if (i->Ain.Alu64R.op == Aalu_MOV) {
2305 switch (i->Ain.Alu64R.src->tag) {
2306 case Armi_Imm:
2307 if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFFFF)) {
2308 /* Actually we could use this form for constants in
2309 the range 0 through 0x7FFFFFFF inclusive, but
2310 limit it to a small range for verifiability
2311 purposes. */
2312 /* Generate "movl $imm32, 32-bit-register" and let
2313 the default zero-extend rule cause the upper half
2314 of the dst to be zeroed out too. This saves 1
2315 and sometimes 2 bytes compared to the more
2316 obvious encoding in the 'else' branch. */
2317 if (1 & iregBit3(i->Ain.Alu64R.dst))
2318 *p++ = 0x41;
2319 *p++ = 0xB8 + iregBits210(i->Ain.Alu64R.dst);
2320 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2321 } else {
2322 *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Alu64R.dst)));
2323 *p++ = 0xC7;
2324 *p++ = toUChar(0xC0 + iregBits210(i->Ain.Alu64R.dst));
2325 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2326 }
2327 goto done;
2328 case Armi_Reg:
2329 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2330 i->Ain.Alu64R.dst );
2331 *p++ = 0x89;
2332 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2333 i->Ain.Alu64R.dst);
2334 goto done;
2335 case Armi_Mem:
2336 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2337 i->Ain.Alu64R.src->Armi.Mem.am);
2338 *p++ = 0x8B;
2339 p = doAMode_M(p, i->Ain.Alu64R.dst,
2340 i->Ain.Alu64R.src->Armi.Mem.am);
2341 goto done;
2342 default:
2343 goto bad;
2344 }
2345 }
2346 /* MUL */
2347 if (i->Ain.Alu64R.op == Aalu_MUL) {
2348 switch (i->Ain.Alu64R.src->tag) {
2349 case Armi_Reg:
2350 *p++ = rexAMode_R( i->Ain.Alu64R.dst,
2351 i->Ain.Alu64R.src->Armi.Reg.reg);
2352 *p++ = 0x0F;
2353 *p++ = 0xAF;
2354 p = doAMode_R(p, i->Ain.Alu64R.dst,
2355 i->Ain.Alu64R.src->Armi.Reg.reg);
2356 goto done;
2357 case Armi_Mem:
2358 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2359 i->Ain.Alu64R.src->Armi.Mem.am);
2360 *p++ = 0x0F;
2361 *p++ = 0xAF;
2362 p = doAMode_M(p, i->Ain.Alu64R.dst,
2363 i->Ain.Alu64R.src->Armi.Mem.am);
2364 goto done;
2365 case Armi_Imm:
2366 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2367 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2368 *p++ = 0x6B;
2369 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2370 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2371 } else {
2372 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2373 *p++ = 0x69;
2374 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2375 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2376 }
2377 goto done;
2378 default:
2379 goto bad;
2380 }
2381 }
2382 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2383 opc = opc_rr = subopc_imm = opc_imma = 0;
2384 switch (i->Ain.Alu64R.op) {
2385 case Aalu_ADC: opc = 0x13; opc_rr = 0x11;
2386 subopc_imm = 2; opc_imma = 0x15; break;
2387 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2388 subopc_imm = 0; opc_imma = 0x05; break;
2389 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2390 subopc_imm = 5; opc_imma = 0x2D; break;
2391 case Aalu_SBB: opc = 0x1B; opc_rr = 0x19;
2392 subopc_imm = 3; opc_imma = 0x1D; break;
2393 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2394 subopc_imm = 4; opc_imma = 0x25; break;
2395 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2396 subopc_imm = 6; opc_imma = 0x35; break;
2397 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2398 subopc_imm = 1; opc_imma = 0x0D; break;
2399 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2400 subopc_imm = 7; opc_imma = 0x3D; break;
2401 default: goto bad;
2402 }
2403 switch (i->Ain.Alu64R.src->tag) {
2404 case Armi_Imm:
2405 if (i->Ain.Alu64R.dst == hregAMD64_RAX()
2406 && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2407 goto bad; /* FIXME: awaiting test case */
2408 *p++ = toUChar(opc_imma);
2409 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2410 } else
2411 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2412 *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst );
2413 *p++ = 0x83;
2414 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst);
2415 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2416 } else {
2417 *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst);
2418 *p++ = 0x81;
2419 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst);
2420 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2421 }
2422 goto done;
2423 case Armi_Reg:
2424 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2425 i->Ain.Alu64R.dst);
2426 *p++ = toUChar(opc_rr);
2427 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2428 i->Ain.Alu64R.dst);
2429 goto done;
2430 case Armi_Mem:
2431 *p++ = rexAMode_M( i->Ain.Alu64R.dst,
2432 i->Ain.Alu64R.src->Armi.Mem.am);
2433 *p++ = toUChar(opc);
2434 p = doAMode_M(p, i->Ain.Alu64R.dst,
2435 i->Ain.Alu64R.src->Armi.Mem.am);
2436 goto done;
2437 default:
2438 goto bad;
2439 }
2440 break;
2441
2442 case Ain_Alu64M:
2443 /* Deal specially with MOV */
2444 if (i->Ain.Alu64M.op == Aalu_MOV) {
2445 switch (i->Ain.Alu64M.src->tag) {
2446 case Ari_Reg:
2447 *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg,
2448 i->Ain.Alu64M.dst);
2449 *p++ = 0x89;
2450 p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg,
2451 i->Ain.Alu64M.dst);
2452 goto done;
2453 case Ari_Imm:
2454 *p++ = rexAMode_M(fake(0), i->Ain.Alu64M.dst);
2455 *p++ = 0xC7;
2456 p = doAMode_M(p, fake(0), i->Ain.Alu64M.dst);
2457 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
2458 goto done;
2459 default:
2460 goto bad;
2461 }
2462 }
2463 break;
2464
2465 case Ain_Sh64:
2466 opc_cl = opc_imm = subopc = 0;
2467 switch (i->Ain.Sh64.op) {
2468 case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2469 case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2470 case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2471 default: goto bad;
2472 }
2473 if (i->Ain.Sh64.src == 0) {
2474 *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst);
2475 *p++ = toUChar(opc_cl);
2476 p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst);
2477 goto done;
2478 } else {
2479 *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst);
2480 *p++ = toUChar(opc_imm);
2481 p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst);
2482 *p++ = (UChar)(i->Ain.Sh64.src);
2483 goto done;
2484 }
2485 break;
2486
2487 case Ain_Test64:
2488 /* testq sign-extend($imm32), %reg */
2489 *p++ = rexAMode_R(fake(0), i->Ain.Test64.dst);
2490 *p++ = 0xF7;
2491 p = doAMode_R(p, fake(0), i->Ain.Test64.dst);
2492 p = emit32(p, i->Ain.Test64.imm32);
2493 goto done;
2494
2495 case Ain_Unary64:
2496 if (i->Ain.Unary64.op == Aun_NOT) {
2497 *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst);
2498 *p++ = 0xF7;
2499 p = doAMode_R(p, fake(2), i->Ain.Unary64.dst);
2500 goto done;
2501 }
2502 if (i->Ain.Unary64.op == Aun_NEG) {
2503 *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst);
2504 *p++ = 0xF7;
2505 p = doAMode_R(p, fake(3), i->Ain.Unary64.dst);
2506 goto done;
2507 }
2508 break;
2509
2510 case Ain_Lea64:
2511 *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am);
2512 *p++ = 0x8D;
2513 p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am);
2514 goto done;
2515
2516 case Ain_Alu32R:
2517 /* ADD/SUB/AND/OR/XOR/CMP */
2518 opc = opc_rr = subopc_imm = opc_imma = 0;
2519 switch (i->Ain.Alu32R.op) {
2520 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2521 subopc_imm = 0; opc_imma = 0x05; break;
2522 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2523 subopc_imm = 5; opc_imma = 0x2D; break;
2524 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2525 subopc_imm = 4; opc_imma = 0x25; break;
2526 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2527 subopc_imm = 6; opc_imma = 0x35; break;
2528 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2529 subopc_imm = 1; opc_imma = 0x0D; break;
2530 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2531 subopc_imm = 7; opc_imma = 0x3D; break;
2532 default: goto bad;
2533 }
2534 switch (i->Ain.Alu32R.src->tag) {
2535 case Armi_Imm:
2536 if (i->Ain.Alu32R.dst == hregAMD64_RAX()
2537 && !fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2538 goto bad; /* FIXME: awaiting test case */
2539 *p++ = toUChar(opc_imma);
2540 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2541 } else
2542 if (fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2543 rex = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst ) );
2544 if (rex != 0x40) *p++ = rex;
2545 *p++ = 0x83;
2546 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst);
2547 *p++ = toUChar(0xFF & i->Ain.Alu32R.src->Armi.Imm.imm32);
2548 } else {
2549 rex = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst) );
2550 if (rex != 0x40) *p++ = rex;
2551 *p++ = 0x81;
2552 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst);
2553 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2554 }
2555 goto done;
2556 case Armi_Reg:
2557 rex = clearWBit(
2558 rexAMode_R( i->Ain.Alu32R.src->Armi.Reg.reg,
2559 i->Ain.Alu32R.dst) );
2560 if (rex != 0x40) *p++ = rex;
2561 *p++ = toUChar(opc_rr);
2562 p = doAMode_R(p, i->Ain.Alu32R.src->Armi.Reg.reg,
2563 i->Ain.Alu32R.dst);
2564 goto done;
2565 case Armi_Mem:
2566 rex = clearWBit(
2567 rexAMode_M( i->Ain.Alu32R.dst,
2568 i->Ain.Alu32R.src->Armi.Mem.am) );
2569 if (rex != 0x40) *p++ = rex;
2570 *p++ = toUChar(opc);
2571 p = doAMode_M(p, i->Ain.Alu32R.dst,
2572 i->Ain.Alu32R.src->Armi.Mem.am);
2573 goto done;
2574 default:
2575 goto bad;
2576 }
2577 break;
2578
2579 case Ain_MulL:
2580 subopc = i->Ain.MulL.syned ? 5 : 4;
2581 switch (i->Ain.MulL.src->tag) {
2582 case Arm_Mem:
2583 *p++ = rexAMode_M( fake(0),
2584 i->Ain.MulL.src->Arm.Mem.am);
2585 *p++ = 0xF7;
2586 p = doAMode_M(p, fake(subopc),
2587 i->Ain.MulL.src->Arm.Mem.am);
2588 goto done;
2589 case Arm_Reg:
2590 *p++ = rexAMode_R(fake(0),
2591 i->Ain.MulL.src->Arm.Reg.reg);
2592 *p++ = 0xF7;
2593 p = doAMode_R(p, fake(subopc),
2594 i->Ain.MulL.src->Arm.Reg.reg);
2595 goto done;
2596 default:
2597 goto bad;
2598 }
2599 break;
2600
2601 case Ain_Div:
2602 subopc = i->Ain.Div.syned ? 7 : 6;
2603 if (i->Ain.Div.sz == 4) {
2604 switch (i->Ain.Div.src->tag) {
2605 case Arm_Mem:
2606 goto bad;
2607 /*FIXME*/
2608 *p++ = 0xF7;
2609 p = doAMode_M(p, fake(subopc),
2610 i->Ain.Div.src->Arm.Mem.am);
2611 goto done;
2612 case Arm_Reg:
2613 *p++ = clearWBit(
2614 rexAMode_R( fake(0), i->Ain.Div.src->Arm.Reg.reg));
2615 *p++ = 0xF7;
2616 p = doAMode_R(p, fake(subopc),
2617 i->Ain.Div.src->Arm.Reg.reg);
2618 goto done;
2619 default:
2620 goto bad;
2621 }
2622 }
2623 if (i->Ain.Div.sz == 8) {
2624 switch (i->Ain.Div.src->tag) {
2625 case Arm_Mem:
2626 *p++ = rexAMode_M( fake(0),
2627 i->Ain.Div.src->Arm.Mem.am);
2628 *p++ = 0xF7;
2629 p = doAMode_M(p, fake(subopc),
2630 i->Ain.Div.src->Arm.Mem.am);
2631 goto done;
2632 case Arm_Reg:
2633 *p++ = rexAMode_R( fake(0),
2634 i->Ain.Div.src->Arm.Reg.reg);
2635 *p++ = 0xF7;
2636 p = doAMode_R(p, fake(subopc),
2637 i->Ain.Div.src->Arm.Reg.reg);
2638 goto done;
2639 default:
2640 goto bad;
2641 }
2642 }
2643 break;
2644
2645 case Ain_Push:
2646 switch (i->Ain.Push.src->tag) {
2647 case Armi_Mem:
2648 *p++ = clearWBit(
2649 rexAMode_M(fake(0), i->Ain.Push.src->Armi.Mem.am));
2650 *p++ = 0xFF;
2651 p = doAMode_M(p, fake(6), i->Ain.Push.src->Armi.Mem.am);
2652 goto done;
2653 case Armi_Imm:
2654 *p++ = 0x68;
2655 p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32);
2656 goto done;
2657 case Armi_Reg:
2658 *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.Push.src->Armi.Reg.reg)));
2659 *p++ = toUChar(0x50 + iregBits210(i->Ain.Push.src->Armi.Reg.reg));
2660 goto done;
2661 default:
2662 goto bad;
2663 }
2664
2665 case Ain_Call: {
2666 /* As per detailed comment for Ain_Call in
2667 getRegUsage_AMD64Instr above, %r11 is used as an address
2668 temporary. */
2669 /* jump over the following two insns if the condition does not
2670 hold */
2671 Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
2672 if (i->Ain.Call.cond != Acc_ALWAYS) {
2673 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
2674 *p++ = shortImm ? 10 : 13;
2675 /* 10 or 13 bytes in the next two insns */
2676 }
2677 if (shortImm) {
2678 /* 7 bytes: movl sign-extend(imm32), %r11 */
2679 *p++ = 0x49;
2680 *p++ = 0xC7;
2681 *p++ = 0xC3;
2682 p = emit32(p, (UInt)i->Ain.Call.target);
2683 } else {
2684 /* 10 bytes: movabsq $target, %r11 */
2685 *p++ = 0x49;
2686 *p++ = 0xBB;
2687 p = emit64(p, i->Ain.Call.target);
2688 }
2689 /* 3 bytes: call *%r11 */
2690 *p++ = 0x41;
2691 *p++ = 0xFF;
2692 *p++ = 0xD3;
2693 goto done;
2694 }
2695
2696 case Ain_XDirect: {
2697 /* NB: what goes on here has to be very closely coordinated with the
2698 chainXDirect_AMD64 and unchainXDirect_AMD64 below. */
2699 /* We're generating chain-me requests here, so we need to be
2700 sure this is actually allowed -- no-redir translations can't
2701 use chain-me's. Hence: */
2702 vassert(disp_cp_chain_me_to_slowEP != NULL);
2703 vassert(disp_cp_chain_me_to_fastEP != NULL);
2704
2705 HReg r11 = hregAMD64_R11();
2706
2707 /* Use ptmp for backpatching conditional jumps. */
2708 ptmp = NULL;
2709
2710 /* First off, if this is conditional, create a conditional
2711 jump over the rest of it. */
2712 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
2713 /* jmp fwds if !condition */
2714 *p++ = toUChar(0x70 + (0xF & (i->Ain.XDirect.cond ^ 1)));
2715 ptmp = p; /* fill in this bit later */
2716 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2717 }
2718
2719 /* Update the guest RIP. */
2720 if (fitsIn32Bits(i->Ain.XDirect.dstGA)) {
2721 /* use a shorter encoding */
2722 /* movl sign-extend(dstGA), %r11 */
2723 *p++ = 0x49;
2724 *p++ = 0xC7;
2725 *p++ = 0xC3;
2726 p = emit32(p, (UInt)i->Ain.XDirect.dstGA);
2727 } else {
2728 /* movabsq $dstGA, %r11 */
2729 *p++ = 0x49;
2730 *p++ = 0xBB;
2731 p = emit64(p, i->Ain.XDirect.dstGA);
2732 }
2733
2734 /* movq %r11, amRIP */
2735 *p++ = rexAMode_M(r11, i->Ain.XDirect.amRIP);
2736 *p++ = 0x89;
2737 p = doAMode_M(p, r11, i->Ain.XDirect.amRIP);
2738
2739 /* --- FIRST PATCHABLE BYTE follows --- */
2740 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
2741 to) backs up the return address, so as to find the address of
2742 the first patchable byte. So: don't change the length of the
2743 two instructions below. */
2744 /* movabsq $disp_cp_chain_me_to_{slow,fast}EP,%r11; */
2745 *p++ = 0x49;
2746 *p++ = 0xBB;
2747 void* disp_cp_chain_me
2748 = i->Ain.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
2749 : disp_cp_chain_me_to_slowEP;
2750 p = emit64(p, Ptr_to_ULong(disp_cp_chain_me));
2751 /* call *%r11 */
2752 *p++ = 0x41;
2753 *p++ = 0xFF;
2754 *p++ = 0xD3;
2755 /* --- END of PATCHABLE BYTES --- */
2756
2757 /* Fix up the conditional jump, if there was one. */
2758 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
2759 Int delta = p - ptmp;
2760 vassert(delta > 0 && delta < 40);
2761 *ptmp = toUChar(delta-1);
2762 }
2763 goto done;
2764 }
2765
2766 case Ain_XIndir: {
2767 /* We're generating transfers that could lead indirectly to a
2768 chain-me, so we need to be sure this is actually allowed --
2769 no-redir translations are not allowed to reach normal
2770 translations without going through the scheduler. That means
2771 no XDirects or XIndirs out from no-redir translations.
2772 Hence: */
2773 vassert(disp_cp_xindir != NULL);
2774
2775 /* Use ptmp for backpatching conditional jumps. */
2776 ptmp = NULL;
2777
2778 /* First off, if this is conditional, create a conditional
2779 jump over the rest of it. */
2780 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
2781 /* jmp fwds if !condition */
2782 *p++ = toUChar(0x70 + (0xF & (i->Ain.XIndir.cond ^ 1)));
2783 ptmp = p; /* fill in this bit later */
2784 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2785 }
2786
2787 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
2788 *p++ = rexAMode_M(i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
2789 *p++ = 0x89;
2790 p = doAMode_M(p, i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
2791
2792 /* get $disp_cp_xindir into %r11 */
2793 if (fitsIn32Bits(Ptr_to_ULong(disp_cp_xindir))) {
2794 /* use a shorter encoding */
2795 /* movl sign-extend(disp_cp_xindir), %r11 */
2796 *p++ = 0x49;
2797 *p++ = 0xC7;
2798 *p++ = 0xC3;
2799 p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xindir));
2800 } else {
2801 /* movabsq $disp_cp_xindir, %r11 */
2802 *p++ = 0x49;
2803 *p++ = 0xBB;
2804 p = emit64(p, Ptr_to_ULong(disp_cp_xindir));
2805 }
2806
2807 /* jmp *%r11 */
2808 *p++ = 0x41;
2809 *p++ = 0xFF;
2810 *p++ = 0xE3;
2811
2812 /* Fix up the conditional jump, if there was one. */
2813 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
2814 Int delta = p - ptmp;
2815 vassert(delta > 0 && delta < 40);
2816 *ptmp = toUChar(delta-1);
2817 }
2818 goto done;
2819 }
2820
2821 case Ain_XAssisted: {
2822 /* Use ptmp for backpatching conditional jumps. */
2823 ptmp = NULL;
2824
2825 /* First off, if this is conditional, create a conditional
2826 jump over the rest of it. */
2827 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
2828 /* jmp fwds if !condition */
2829 *p++ = toUChar(0x70 + (0xF & (i->Ain.XAssisted.cond ^ 1)));
2830 ptmp = p; /* fill in this bit later */
2831 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2832 }
2833
2834 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
2835 *p++ = rexAMode_M(i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
2836 *p++ = 0x89;
2837 p = doAMode_M(p, i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
2838 /* movl $magic_number, %ebp. Since these numbers are all small positive
2839 integers, we can get away with "movl $N, %ebp" rather than
2840 the longer "movq $N, %rbp". */
2841 UInt trcval = 0;
2842 switch (i->Ain.XAssisted.jk) {
2843 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
2844 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
2845 case Ijk_Sys_int32: trcval = VEX_TRC_JMP_SYS_INT32; break;
2846 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
2847 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
2848 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
2849 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
2850 case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break;
2851 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
2852 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
2853 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
2854 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
2855 /* We don't expect to see the following being assisted. */
2856 case Ijk_Ret:
2857 case Ijk_Call:
2858 /* fallthrough */
2859 default:
2860 ppIRJumpKind(i->Ain.XAssisted.jk);
2861 vpanic("emit_AMD64Instr.Ain_XAssisted: unexpected jump kind");
2862 }
2863 vassert(trcval != 0);
2864 *p++ = 0xBD;
2865 p = emit32(p, trcval);
2866 /* movabsq $disp_assisted, %r11 */
2867 *p++ = 0x49;
2868 *p++ = 0xBB;
2869 p = emit64(p, Ptr_to_ULong(disp_cp_xassisted));
2870 /* jmp *%r11 */
2871 *p++ = 0x41;
2872 *p++ = 0xFF;
2873 *p++ = 0xE3;
2874
2875 /* Fix up the conditional jump, if there was one. */
2876 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
2877 Int delta = p - ptmp;
2878 vassert(delta > 0 && delta < 40);
2879 *ptmp = toUChar(delta-1);
2880 }
2881 goto done;
2882 }
2883
2884 case Ain_CMov64:
2885 vassert(i->Ain.CMov64.cond != Acc_ALWAYS);
2886 if (i->Ain.CMov64.src->tag == Arm_Reg) {
2887 *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg);
2888 *p++ = 0x0F;
2889 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
2890 p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg);
2891 goto done;
2892 }
2893 if (i->Ain.CMov64.src->tag == Arm_Mem) {
2894 *p++ = rexAMode_M(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am);
2895 *p++ = 0x0F;
2896 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
2897 p = doAMode_M(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am);
2898 goto done;
2899 }
2900 break;
2901
2902 case Ain_MovxLQ:
2903 /* No, _don't_ ask me why the sense of the args has to be
2904 different in the S vs Z case. I don't know. */
2905 if (i->Ain.MovxLQ.syned) {
2906 /* Need REX.W = 1 here, but rexAMode_R does that for us. */
2907 *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
2908 *p++ = 0x63;
2909 p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
2910 } else {
2911 /* Produce a 32-bit reg-reg move, since the implicit
2912 zero-extend does what we want. */
2913 *p++ = clearWBit (
2914 rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst));
2915 *p++ = 0x89;
2916 p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst);
2917 }
2918 goto done;
2919
2920 case Ain_LoadEX:
2921 if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) {
2922 /* movzbq */
2923 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2924 *p++ = 0x0F;
2925 *p++ = 0xB6;
2926 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2927 goto done;
2928 }
2929 if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) {
2930 /* movzwq */
2931 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2932 *p++ = 0x0F;
2933 *p++ = 0xB7;
2934 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2935 goto done;
2936 }
2937 if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) {
2938 /* movzlq */
2939 /* This isn't really an existing AMD64 instruction per se.
2940 Rather, we have to do a 32-bit load. Because a 32-bit
2941 write implicitly clears the upper 32 bits of the target
2942 register, we get what we want. */
2943 *p++ = clearWBit(
2944 rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src));
2945 *p++ = 0x8B;
2946 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2947 goto done;
2948 }
2949 break;
2950
2951 case Ain_Set64:
2952 /* Make the destination register be 1 or 0, depending on whether
2953 the relevant condition holds. Complication: the top 56 bits
2954 of the destination should be forced to zero, but doing 'xorq
2955 %r,%r' kills the flag(s) we are about to read. Sigh. So
2956 start off my moving $0 into the dest. */
2957 reg = iregBits3210(i->Ain.Set64.dst);
2958 vassert(reg < 16);
2959
2960 /* movq $0, %dst */
2961 *p++ = toUChar(reg >= 8 ? 0x49 : 0x48);
2962 *p++ = 0xC7;
2963 *p++ = toUChar(0xC0 + (reg & 7));
2964 p = emit32(p, 0);
2965
2966 /* setb lo8(%dst) */
2967 /* note, 8-bit register rex trickyness. Be careful here. */
2968 *p++ = toUChar(reg >= 8 ? 0x41 : 0x40);
2969 *p++ = 0x0F;
2970 *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond));
2971 *p++ = toUChar(0xC0 + (reg & 7));
2972 goto done;
2973
2974 case Ain_Bsfr64:
2975 *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
2976 *p++ = 0x0F;
2977 if (i->Ain.Bsfr64.isFwds) {
2978 *p++ = 0xBC;
2979 } else {
2980 *p++ = 0xBD;
2981 }
2982 p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
2983 goto done;
2984
2985 case Ain_MFence:
2986 /* mfence */
2987 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
2988 goto done;
2989
2990 case Ain_ACAS:
2991 /* lock */
2992 *p++ = 0xF0;
2993 if (i->Ain.ACAS.sz == 2) *p++ = 0x66;
2994 /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value
2995 in %rbx. The new-value register is hardwired to be %rbx
2996 since dealing with byte integer registers is too much hassle,
2997 so we force the register operand to %rbx (could equally be
2998 %rcx or %rdx). */
2999 rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr );
3000 if (i->Ain.ACAS.sz != 8)
3001 rex = clearWBit(rex);
3002
3003 *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */
3004 *p++ = 0x0F;
3005 if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
3006 p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr);
3007 goto done;
3008
3009 case Ain_DACAS:
3010 /* lock */
3011 *p++ = 0xF0;
3012 /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
3013 value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
3014 aren't encoded in the insn. */
3015 rex = rexAMode_M( fake(1), i->Ain.ACAS.addr );
3016 if (i->Ain.ACAS.sz != 8)
3017 rex = clearWBit(rex);
3018 *p++ = rex;
3019 *p++ = 0x0F;
3020 *p++ = 0xC7;
3021 p = doAMode_M(p, fake(1), i->Ain.DACAS.addr);
3022 goto done;
3023
3024 case Ain_A87Free:
3025 vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7);
3026 for (j = 0; j < i->Ain.A87Free.nregs; j++) {
3027 p = do_ffree_st(p, 7-j);
3028 }
3029 goto done;
3030
3031 case Ain_A87PushPop:
3032 vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4);
3033 if (i->Ain.A87PushPop.isPush) {
3034 /* Load from memory into %st(0): flds/fldl amode */
3035 *p++ = clearWBit(
3036 rexAMode_M(fake(0), i->Ain.A87PushPop.addr) );
3037 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
3038 p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr);
3039 } else {
3040 /* Dump %st(0) to memory: fstps/fstpl amode */
3041 *p++ = clearWBit(
3042 rexAMode_M(fake(3), i->Ain.A87PushPop.addr) );
3043 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
3044 p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr);
3045 goto done;
3046 }
3047 goto done;
3048
3049 case Ain_A87FpOp:
3050 switch (i->Ain.A87FpOp.op) {
3051 case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
3052 case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
3053 case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
3054 case Afp_TAN: *p++ = 0xD9; *p++ = 0xF2; break;
3055 case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
3056 case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
3057 case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break;
3058 case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break;
3059 case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break;
3060 case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break;
3061 case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break;
3062 case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break;
3063 default: goto bad;
3064 }
3065 goto done;
3066
3067 case Ain_A87LdCW:
3068 *p++ = clearWBit(
3069 rexAMode_M(fake(5), i->Ain.A87LdCW.addr) );
3070 *p++ = 0xD9;
3071 p = doAMode_M(p, fake(5)/*subopcode*/, i->Ain.A87LdCW.addr);
3072 goto done;
3073
3074 case Ain_A87StSW:
3075 *p++ = clearWBit(
3076 rexAMode_M(fake(7), i->Ain.A87StSW.addr) );
3077 *p++ = 0xDD;
3078 p = doAMode_M(p, fake(7)/*subopcode*/, i->Ain.A87StSW.addr);
3079 goto done;
3080
3081 case Ain_Store:
3082 if (i->Ain.Store.sz == 2) {
3083 /* This just goes to show the crazyness of the instruction
3084 set encoding. We have to insert two prefix bytes, but be
3085 careful to avoid a conflict in what the size should be, by
3086 ensuring that REX.W = 0. */
3087 *p++ = 0x66; /* override to 16-bits */
3088 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3089 *p++ = 0x89;
3090 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3091 goto done;
3092 }
3093 if (i->Ain.Store.sz == 4) {
3094 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3095 *p++ = 0x89;
3096 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3097 goto done;
3098 }
3099 if (i->Ain.Store.sz == 1) {
3100 /* This is one place where it would be wrong to skip emitting
3101 a rex byte of 0x40, since the mere presence of rex changes
3102 the meaning of the byte register access. Be careful. */
3103 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3104 *p++ = 0x88;
3105 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3106 goto done;
3107 }
3108 break;
3109
3110 case Ain_LdMXCSR:
3111 *p++ = clearWBit(rexAMode_M( fake(0), i->Ain.LdMXCSR.addr));
3112 *p++ = 0x0F;
3113 *p++ = 0xAE;
3114 p = doAMode_M(p, fake(2)/*subopcode*/, i->Ain.LdMXCSR.addr);
3115 goto done;
3116
3117 case Ain_SseUComIS:
3118 /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */
3119 /* ucomi[sd] %srcL, %srcR */
3120 if (i->Ain.SseUComIS.sz == 8) {
3121 *p++ = 0x66;
3122 } else {
3123 goto bad;
3124 vassert(i->Ain.SseUComIS.sz == 4);
3125 }
3126 *p++ = clearWBit (
3127 rexAMode_R( vreg2ireg(i->Ain.SseUComIS.srcL),
3128 vreg2ireg(i->Ain.SseUComIS.srcR) ));
3129 *p++ = 0x0F;
3130 *p++ = 0x2E;
3131 p = doAMode_R(p, vreg2ireg(i->Ain.SseUComIS.srcL),
3132 vreg2ireg(i->Ain.SseUComIS.srcR) );
3133 /* pushfq */
3134 *p++ = 0x9C;
3135 /* popq %dst */
3136 *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.SseUComIS.dst)));
3137 *p++ = toUChar(0x58 + iregBits210(i->Ain.SseUComIS.dst));
3138 goto done;
3139
3140 case Ain_SseSI2SF:
3141 /* cvssi2s[sd] %src, %dst */
3142 rex = rexAMode_R( vreg2ireg(i->Ain.SseSI2SF.dst),
3143 i->Ain.SseSI2SF.src );
3144 *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2);
3145 *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex);
3146 *p++ = 0x0F;
3147 *p++ = 0x2A;
3148 p = doAMode_R( p, vreg2ireg(i->Ain.SseSI2SF.dst),
3149 i->Ain.SseSI2SF.src );
3150 goto done;
3151
3152 case Ain_SseSF2SI:
3153 /* cvss[sd]2si %src, %dst */
3154 rex = rexAMode_R( i->Ain.SseSF2SI.dst,
3155 vreg2ireg(i->Ain.SseSF2SI.src) );
3156 *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2);
3157 *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex);
3158 *p++ = 0x0F;
3159 *p++ = 0x2D;
3160 p = doAMode_R( p, i->Ain.SseSF2SI.dst,
3161 vreg2ireg(i->Ain.SseSF2SI.src) );
3162 goto done;
3163
3164 case Ain_SseSDSS:
3165 /* cvtsd2ss/cvtss2sd %src, %dst */
3166 *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3);
3167 *p++ = clearWBit(
3168 rexAMode_R( vreg2ireg(i->Ain.SseSDSS.dst),
3169 vreg2ireg(i->Ain.SseSDSS.src) ));
3170 *p++ = 0x0F;
3171 *p++ = 0x5A;
3172 p = doAMode_R( p, vreg2ireg(i->Ain.SseSDSS.dst),
3173 vreg2ireg(i->Ain.SseSDSS.src) );
3174 goto done;
3175
3176 case Ain_SseLdSt:
3177 if (i->Ain.SseLdSt.sz == 8) {
3178 *p++ = 0xF2;
3179 } else
3180 if (i->Ain.SseLdSt.sz == 4) {
3181 *p++ = 0xF3;
3182 } else
3183 if (i->Ain.SseLdSt.sz != 16) {
3184 vassert(0);
3185 }
3186 *p++ = clearWBit(
3187 rexAMode_M( vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr));
3188 *p++ = 0x0F;
3189 *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11);
3190 p = doAMode_M(p, vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr);
3191 goto done;
3192
3193 case Ain_SseLdzLO:
3194 vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8);
3195 /* movs[sd] amode, %xmm-dst */
3196 *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
3197 *p++ = clearWBit(
3198 rexAMode_M(vreg2ireg(i->Ain.SseLdzLO.reg),
3199 i->Ain.SseLdzLO.addr));
3200 *p++ = 0x0F;
3201 *p++ = 0x10;
3202 p = doAMode_M(p, vreg2ireg(i->Ain.SseLdzLO.reg),
3203 i->Ain.SseLdzLO.addr);
3204 goto done;
3205
3206 case Ain_Sse32Fx4:
3207 xtra = 0;
3208 *p++ = clearWBit(
3209 rexAMode_R( vreg2ireg(i->Ain.Sse32Fx4.dst),
3210 vreg2ireg(i->Ain.Sse32Fx4.src) ));
3211 *p++ = 0x0F;
3212 switch (i->Ain.Sse32Fx4.op) {
3213 case Asse_ADDF: *p++ = 0x58; break;
3214 case Asse_DIVF: *p++ = 0x5E; break;
3215 case Asse_MAXF: *p++ = 0x5F; break;
3216 case Asse_MINF: *p++ = 0x5D; break;
3217 case Asse_MULF: *p++ = 0x59; break;
3218 case Asse_RCPF: *p++ = 0x53; break;
3219 case Asse_RSQRTF: *p++ = 0x52; break;
3220 case Asse_SQRTF: *p++ = 0x51; break;
3221 case Asse_SUBF: *p++ = 0x5C; break;
3222 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3223 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3224 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3225 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3226 default: goto bad;
3227 }
3228 p = doAMode_R(p, vreg2ireg(i->Ain.Sse32Fx4.dst),
3229 vreg2ireg(i->Ain.Sse32Fx4.src) );
3230 if (xtra & 0x100)
3231 *p++ = toUChar(xtra & 0xFF);
3232 goto done;
3233
3234 case Ain_Sse64Fx2:
3235 xtra = 0;
3236 *p++ = 0x66;
3237 *p++ = clearWBit(
3238 rexAMode_R( vreg2ireg(i->Ain.Sse64Fx2.dst),
3239 vreg2ireg(i->Ain.Sse64Fx2.src) ));
3240 *p++ = 0x0F;
3241 switch (i->Ain.Sse64Fx2.op) {
3242 case Asse_ADDF: *p++ = 0x58; break;
3243 case Asse_DIVF: *p++ = 0x5E; break;
3244 case Asse_MAXF: *p++ = 0x5F; break;
3245 case Asse_MINF: *p++ = 0x5D; break;
3246 case Asse_MULF: *p++ = 0x59; break;
3247 case Asse_SQRTF: *p++ = 0x51; break;
3248 case Asse_SUBF: *p++ = 0x5C; break;
3249 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3250 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3251 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3252 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3253 default: goto bad;
3254 }
3255 p = doAMode_R(p, vreg2ireg(i->Ain.Sse64Fx2.dst),
3256 vreg2ireg(i->Ain.Sse64Fx2.src) );
3257 if (xtra & 0x100)
3258 *p++ = toUChar(xtra & 0xFF);
3259 goto done;
3260
3261 case Ain_Sse32FLo:
3262 xtra = 0;
3263 *p++ = 0xF3;
3264 *p++ = clearWBit(
3265 rexAMode_R( vreg2ireg(i->Ain.Sse32FLo.dst),
3266 vreg2ireg(i->Ain.Sse32FLo.src) ));
3267 *p++ = 0x0F;
3268 switch (i->Ain.Sse32FLo.op) {
3269 case Asse_ADDF: *p++ = 0x58; break;
3270 case Asse_DIVF: *p++ = 0x5E; break;
3271 case Asse_MAXF: *p++ = 0x5F; break;
3272 case Asse_MINF: *p++ = 0x5D; break;
3273 case Asse_MULF: *p++ = 0x59; break;
3274 case Asse_RCPF: *p++ = 0x53; break;
3275 case Asse_RSQRTF: *p++ = 0x52; break;
3276 case Asse_SQRTF: *p++ = 0x51; break;
3277 case Asse_SUBF: *p++ = 0x5C; break;
3278 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3279 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3280 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3281 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3282 default: goto bad;
3283 }
3284 p = doAMode_R(p, vreg2ireg(i->Ain.Sse32FLo.dst),
3285 vreg2ireg(i->Ain.Sse32FLo.src) );
3286 if (xtra & 0x100)
3287 *p++ = toUChar(xtra & 0xFF);
3288 goto done;
3289
3290 case Ain_Sse64FLo:
3291 xtra = 0;
3292 *p++ = 0xF2;
3293 *p++ = clearWBit(
3294 rexAMode_R( vreg2ireg(i->Ain.Sse64FLo.dst),
3295 vreg2ireg(i->Ain.Sse64FLo.src) ));
3296 *p++ = 0x0F;
3297 switch (i->Ain.Sse64FLo.op) {
3298 case Asse_ADDF: *p++ = 0x58; break;
3299 case Asse_DIVF: *p++ = 0x5E; break;
3300 case Asse_MAXF: *p++ = 0x5F; break;
3301 case Asse_MINF: *p++ = 0x5D; break;
3302 case Asse_MULF: *p++ = 0x59; break;
3303 case Asse_SQRTF: *p++ = 0x51; break;
3304 case Asse_SUBF: *p++ = 0x5C; break;
3305 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3306 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3307 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3308 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3309 default: goto bad;
3310 }
3311 p = doAMode_R(p, vreg2ireg(i->Ain.Sse64FLo.dst),
3312 vreg2ireg(i->Ain.Sse64FLo.src) );
3313 if (xtra & 0x100)
3314 *p++ = toUChar(xtra & 0xFF);
3315 goto done;
3316
3317 case Ain_SseReRg:
3318 # define XX(_n) *p++ = (_n)
3319
3320 rex = clearWBit(
3321 rexAMode_R( vreg2ireg(i->Ain.SseReRg.dst),
3322 vreg2ireg(i->Ain.SseReRg.src) ));
3323
3324 switch (i->Ain.SseReRg.op) {
3325 case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break;
3326 case Asse_OR: XX(rex); XX(0x0F); XX(0x56); break;
3327 case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break;
3328 case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break;
3329 case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); break;
3330 case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break;
3331 case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break;
3332 case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break;
3333 case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break;
3334 case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break;
3335 case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break;
3336 case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break;
3337 case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break;
3338 case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); break;
3339 case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break;
3340 case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break;
3341 case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break;
3342 case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break;
3343 case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); break;
3344 case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); break;
3345 case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); break;
3346 case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); break;
3347 case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break;
3348 case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break;
3349 case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break;
3350 case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break;
3351 case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break;
3352 case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break;
3353 case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break;
3354 case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break;
3355 case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break;
3356 case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break;
3357 case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break;
3358 case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break;
3359 case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break;
3360 case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break;
3361 case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break;
3362 case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break;
3363 case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break;
3364 case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break;
3365 case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break;
3366 case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break;
3367 case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break;
3368 case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break;
3369 case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break;
3370 case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break;
3371 case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break;
3372 case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); break;
3373 case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); break;
3374 case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break;
3375 case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break;
3376 case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); break;
3377 case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); break;
3378 case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); break;
3379 case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break;
3380 default: goto bad;
3381 }
3382 p = doAMode_R(p, vreg2ireg(i->Ain.SseReRg.dst),
3383 vreg2ireg(i->Ain.SseReRg.src) );
3384 # undef XX
3385 goto done;
3386
3387 case Ain_SseCMov:
3388 /* jmp fwds if !condition */
3389 *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1));
3390 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3391 ptmp = p;
3392
3393 /* movaps %src, %dst */
3394 *p++ = clearWBit(
3395 rexAMode_R( vreg2ireg(i->Ain.SseCMov.dst),
3396 vreg2ireg(i->Ain.SseCMov.src) ));
3397 *p++ = 0x0F;
3398 *p++ = 0x28;
3399 p = doAMode_R(p, vreg2ireg(i->Ain.SseCMov.dst),
3400 vreg2ireg(i->Ain.SseCMov.src) );
3401
3402 /* Fill in the jump offset. */
3403 *(ptmp-1) = toUChar(p - ptmp);
3404 goto done;
3405
3406 case Ain_SseShuf:
3407 *p++ = 0x66;
3408 *p++ = clearWBit(
3409 rexAMode_R( vreg2ireg(i->Ain.SseShuf.dst),
3410 vreg2ireg(i->Ain.SseShuf.src) ));
3411 *p++ = 0x0F;
3412 *p++ = 0x70;
3413 p = doAMode_R(p, vreg2ireg(i->Ain.SseShuf.dst),
3414 vreg2ireg(i->Ain.SseShuf.src) );
3415 *p++ = (UChar)(i->Ain.SseShuf.order);
3416 goto done;
3417
3418 //uu case Ain_AvxLdSt: {
3419 //uu UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg),
3420 //uu i->Ain.AvxLdSt.addr );
3421 //uu p = emitVexPrefix(p, vex);
3422 //uu *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11);
3423 //uu p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr);
3424 //uu goto done;
3425 //uu }
3426
3427 case Ain_EvCheck: {
3428 /* We generate:
3429 (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER)
3430 (2 bytes) jns nofail expected taken
3431 (3 bytes) jmp* 0(%rbp) 0 == offsetof(host_EvC_FAILADDR)
3432 nofail:
3433 */
3434 /* This is heavily asserted re instruction lengths. It needs to
3435 be. If we get given unexpected forms of .amCounter or
3436 .amFailAddr -- basically, anything that's not of the form
3437 uimm7(%rbp) -- they are likely to fail. */
3438 /* Note also that after the decl we must be very careful not to
3439 read the carry flag, else we get a partial flags stall.
3440 js/jns avoids that, though. */
3441 UChar* p0 = p;
3442 /* --- decl 8(%rbp) --- */
3443 /* Need to compute the REX byte for the decl in order to prove
3444 that we don't need it, since this is a 32-bit inc and all
3445 registers involved in the amode are < r8. "fake(1)" because
3446 there's no register in this encoding; instead the register
3447 field is used as a sub opcode. The encoding for "decl r/m32"
3448 is FF /1, hence the fake(1). */
3449 rex = clearWBit(rexAMode_M(fake(1), i->Ain.EvCheck.amCounter));
3450 if (rex != 0x40) goto bad; /* We don't expect to need the REX byte. */
3451 *p++ = 0xFF;
3452 p = doAMode_M(p, fake(1), i->Ain.EvCheck.amCounter);
3453 vassert(p - p0 == 3);
3454 /* --- jns nofail --- */
3455 *p++ = 0x79;
3456 *p++ = 0x03; /* need to check this 0x03 after the next insn */
3457 vassert(p - p0 == 5);
3458 /* --- jmp* 0(%rbp) --- */
3459 /* Once again, verify we don't need REX. The encoding is FF /4.
3460 We don't need REX.W since by default FF /4 in 64-bit mode
3461 implies a 64 bit load. */
3462 rex = clearWBit(rexAMode_M(fake(4), i->Ain.EvCheck.amFailAddr));
3463 if (rex != 0x40) goto bad;
3464 *p++ = 0xFF;
3465 p = doAMode_M(p, fake(4), i->Ain.EvCheck.amFailAddr);
3466 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
3467 /* And crosscheck .. */
3468 vassert(evCheckSzB_AMD64() == 8);
3469 goto done;
3470 }
3471
3472 case Ain_ProfInc: {
3473 /* We generate movabsq $0, %r11
3474 incq (%r11)
3475 in the expectation that a later call to LibVEX_patchProfCtr
3476 will be used to fill in the immediate field once the right
3477 value is known.
3478 49 BB 00 00 00 00 00 00 00 00
3479 49 FF 03
3480 */
3481 *p++ = 0x49; *p++ = 0xBB;
3482 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3483 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3484 *p++ = 0x49; *p++ = 0xFF; *p++ = 0x03;
3485 /* Tell the caller .. */
3486 vassert(!(*is_profInc));
3487 *is_profInc = True;
3488 goto done;
3489 }
3490
3491 default:
3492 goto bad;
3493 }
3494
3495 bad:
3496 ppAMD64Instr(i, mode64);
3497 vpanic("emit_AMD64Instr");
3498 /*NOTREACHED*/
3499
3500 done:
3501 vassert(p - &buf[0] <= 32);
3502 return p - &buf[0];
3503
3504 # undef fake
3505 }
3506
3507
3508 /* How big is an event check? See case for Ain_EvCheck in
3509 emit_AMD64Instr just above. That crosschecks what this returns, so
3510 we can tell if we're inconsistent. */
evCheckSzB_AMD64(void)3511 Int evCheckSzB_AMD64 ( void )
3512 {
3513 return 8;
3514 }
3515
3516
3517 /* NB: what goes on here has to be very closely coordinated with the
3518 emitInstr case for XDirect, above. */
chainXDirect_AMD64(void * place_to_chain,void * disp_cp_chain_me_EXPECTED,void * place_to_jump_to)3519 VexInvalRange chainXDirect_AMD64 ( void* place_to_chain,
3520 void* disp_cp_chain_me_EXPECTED,
3521 void* place_to_jump_to )
3522 {
3523 /* What we're expecting to see is:
3524 movabsq $disp_cp_chain_me_EXPECTED, %r11
3525 call *%r11
3526 viz
3527 49 BB <8 bytes value == disp_cp_chain_me_EXPECTED>
3528 41 FF D3
3529 */
3530 UChar* p = (UChar*)place_to_chain;
3531 vassert(p[0] == 0x49);
3532 vassert(p[1] == 0xBB);
3533 vassert(*(ULong*)(&p[2]) == Ptr_to_ULong(disp_cp_chain_me_EXPECTED));
3534 vassert(p[10] == 0x41);
3535 vassert(p[11] == 0xFF);
3536 vassert(p[12] == 0xD3);
3537 /* And what we want to change it to is either:
3538 (general case):
3539 movabsq $place_to_jump_to, %r11
3540 jmpq *%r11
3541 viz
3542 49 BB <8 bytes value == place_to_jump_to>
3543 41 FF E3
3544 So it's the same length (convenient, huh) and we don't
3545 need to change all the bits.
3546 ---OR---
3547 in the case where the displacement falls within 32 bits
3548 jmpq disp32 where disp32 is relative to the next insn
3549 ud2; ud2; ud2; ud2
3550 viz
3551 E9 <4 bytes == disp32>
3552 0F 0B 0F 0B 0F 0B 0F 0B
3553
3554 In both cases the replacement has the same length as the original.
3555 To remain sane & verifiable,
3556 (1) limit the displacement for the short form to
3557 (say) +/- one billion, so as to avoid wraparound
3558 off-by-ones
3559 (2) even if the short form is applicable, once every (say)
3560 1024 times use the long form anyway, so as to maintain
3561 verifiability
3562 */
3563 /* This is the delta we need to put into a JMP d32 insn. It's
3564 relative to the start of the next insn, hence the -5. */
3565 Long delta = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)5;
3566 Bool shortOK = delta >= -1000*1000*1000 && delta < 1000*1000*1000;
3567
3568 static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
3569 if (shortOK) {
3570 shortCTR++; // thread safety bleh
3571 if (0 == (shortCTR & 0x3FF)) {
3572 shortOK = False;
3573 if (0)
3574 vex_printf("QQQ chainXDirect_AMD64: shortCTR = %u, "
3575 "using long jmp\n", shortCTR);
3576 }
3577 }
3578
3579 /* And make the modifications. */
3580 if (shortOK) {
3581 p[0] = 0xE9;
3582 p[1] = (delta >> 0) & 0xFF;
3583 p[2] = (delta >> 8) & 0xFF;
3584 p[3] = (delta >> 16) & 0xFF;
3585 p[4] = (delta >> 24) & 0xFF;
3586 p[5] = 0x0F; p[6] = 0x0B;
3587 p[7] = 0x0F; p[8] = 0x0B;
3588 p[9] = 0x0F; p[10] = 0x0B;
3589 p[11] = 0x0F; p[12] = 0x0B;
3590 /* sanity check on the delta -- top 32 are all 0 or all 1 */
3591 delta >>= 32;
3592 vassert(delta == 0LL || delta == -1LL);
3593 } else {
3594 /* Minimal modifications from the starting sequence. */
3595 *(ULong*)(&p[2]) = Ptr_to_ULong(place_to_jump_to);
3596 p[12] = 0xE3;
3597 }
3598 VexInvalRange vir = {0, 0};
3599 return vir;
3600 }
3601
3602
3603 /* NB: what goes on here has to be very closely coordinated with the
3604 emitInstr case for XDirect, above. */
unchainXDirect_AMD64(void * place_to_unchain,void * place_to_jump_to_EXPECTED,void * disp_cp_chain_me)3605 VexInvalRange unchainXDirect_AMD64 ( void* place_to_unchain,
3606 void* place_to_jump_to_EXPECTED,
3607 void* disp_cp_chain_me )
3608 {
3609 /* What we're expecting to see is either:
3610 (general case)
3611 movabsq $place_to_jump_to_EXPECTED, %r11
3612 jmpq *%r11
3613 viz
3614 49 BB <8 bytes value == place_to_jump_to_EXPECTED>
3615 41 FF E3
3616 ---OR---
3617 in the case where the displacement falls within 32 bits
3618 jmpq d32
3619 ud2; ud2; ud2; ud2
3620 viz
3621 E9 <4 bytes == disp32>
3622 0F 0B 0F 0B 0F 0B 0F 0B
3623 */
3624 UChar* p = (UChar*)place_to_unchain;
3625 Bool valid = False;
3626 if (p[0] == 0x49 && p[1] == 0xBB
3627 && *(ULong*)(&p[2]) == Ptr_to_ULong(place_to_jump_to_EXPECTED)
3628 && p[10] == 0x41 && p[11] == 0xFF && p[12] == 0xE3) {
3629 /* it's the long form */
3630 valid = True;
3631 }
3632 else
3633 if (p[0] == 0xE9
3634 && p[5] == 0x0F && p[6] == 0x0B
3635 && p[7] == 0x0F && p[8] == 0x0B
3636 && p[9] == 0x0F && p[10] == 0x0B
3637 && p[11] == 0x0F && p[12] == 0x0B) {
3638 /* It's the short form. Check the offset is right. */
3639 Int s32 = *(Int*)(&p[1]);
3640 Long s64 = (Long)s32;
3641 if ((UChar*)p + 5 + s64 == (UChar*)place_to_jump_to_EXPECTED) {
3642 valid = True;
3643 if (0)
3644 vex_printf("QQQ unchainXDirect_AMD64: found short form\n");
3645 }
3646 }
3647 vassert(valid);
3648 /* And what we want to change it to is:
3649 movabsq $disp_cp_chain_me, %r11
3650 call *%r11
3651 viz
3652 49 BB <8 bytes value == disp_cp_chain_me>
3653 41 FF D3
3654 So it's the same length (convenient, huh).
3655 */
3656 p[0] = 0x49;
3657 p[1] = 0xBB;
3658 *(ULong*)(&p[2]) = Ptr_to_ULong(disp_cp_chain_me);
3659 p[10] = 0x41;
3660 p[11] = 0xFF;
3661 p[12] = 0xD3;
3662 VexInvalRange vir = {0, 0};
3663 return vir;
3664 }
3665
3666
3667 /* Patch the counter address into a profile inc point, as previously
3668 created by the Ain_ProfInc case for emit_AMD64Instr. */
patchProfInc_AMD64(void * place_to_patch,ULong * location_of_counter)3669 VexInvalRange patchProfInc_AMD64 ( void* place_to_patch,
3670 ULong* location_of_counter )
3671 {
3672 vassert(sizeof(ULong*) == 8);
3673 UChar* p = (UChar*)place_to_patch;
3674 vassert(p[0] == 0x49);
3675 vassert(p[1] == 0xBB);
3676 vassert(p[2] == 0x00);
3677 vassert(p[3] == 0x00);
3678 vassert(p[4] == 0x00);
3679 vassert(p[5] == 0x00);
3680 vassert(p[6] == 0x00);
3681 vassert(p[7] == 0x00);
3682 vassert(p[8] == 0x00);
3683 vassert(p[9] == 0x00);
3684 vassert(p[10] == 0x49);
3685 vassert(p[11] == 0xFF);
3686 vassert(p[12] == 0x03);
3687 ULong imm64 = (ULong)Ptr_to_ULong(location_of_counter);
3688 p[2] = imm64 & 0xFF; imm64 >>= 8;
3689 p[3] = imm64 & 0xFF; imm64 >>= 8;
3690 p[4] = imm64 & 0xFF; imm64 >>= 8;
3691 p[5] = imm64 & 0xFF; imm64 >>= 8;
3692 p[6] = imm64 & 0xFF; imm64 >>= 8;
3693 p[7] = imm64 & 0xFF; imm64 >>= 8;
3694 p[8] = imm64 & 0xFF; imm64 >>= 8;
3695 p[9] = imm64 & 0xFF; imm64 >>= 8;
3696 VexInvalRange vir = {0, 0};
3697 return vir;
3698 }
3699
3700
3701 /*---------------------------------------------------------------*/
3702 /*--- end host_amd64_defs.c ---*/
3703 /*---------------------------------------------------------------*/
3704