1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_amd64_defs.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2013 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 #include "libvex_trc_values.h"
39
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_amd64_defs.h"
43
44
45 /* --------- Registers. --------- */
46
ppHRegAMD64(HReg reg)47 void ppHRegAMD64 ( HReg reg )
48 {
49 Int r;
50 static const HChar* ireg64_names[16]
51 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
52 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
53 /* Be generic for all virtual regs. */
54 if (hregIsVirtual(reg)) {
55 ppHReg(reg);
56 return;
57 }
58 /* But specific for real regs. */
59 switch (hregClass(reg)) {
60 case HRcInt64:
61 r = hregNumber(reg);
62 vassert(r >= 0 && r < 16);
63 vex_printf("%s", ireg64_names[r]);
64 return;
65 case HRcFlt64:
66 r = hregNumber(reg);
67 vassert(r >= 0 && r < 6);
68 vex_printf("%%fake%d", r);
69 return;
70 case HRcVec128:
71 r = hregNumber(reg);
72 vassert(r >= 0 && r < 16);
73 vex_printf("%%xmm%d", r);
74 return;
75 default:
76 vpanic("ppHRegAMD64");
77 }
78 }
79
ppHRegAMD64_lo32(HReg reg)80 static void ppHRegAMD64_lo32 ( HReg reg )
81 {
82 Int r;
83 static const HChar* ireg32_names[16]
84 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
85 "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
86 /* Be generic for all virtual regs. */
87 if (hregIsVirtual(reg)) {
88 ppHReg(reg);
89 vex_printf("d");
90 return;
91 }
92 /* But specific for real regs. */
93 switch (hregClass(reg)) {
94 case HRcInt64:
95 r = hregNumber(reg);
96 vassert(r >= 0 && r < 16);
97 vex_printf("%s", ireg32_names[r]);
98 return;
99 default:
100 vpanic("ppHRegAMD64_lo32: invalid regclass");
101 }
102 }
103
hregAMD64_RAX(void)104 HReg hregAMD64_RAX ( void ) { return mkHReg( 0, HRcInt64, False); }
hregAMD64_RCX(void)105 HReg hregAMD64_RCX ( void ) { return mkHReg( 1, HRcInt64, False); }
hregAMD64_RDX(void)106 HReg hregAMD64_RDX ( void ) { return mkHReg( 2, HRcInt64, False); }
hregAMD64_RBX(void)107 HReg hregAMD64_RBX ( void ) { return mkHReg( 3, HRcInt64, False); }
hregAMD64_RSP(void)108 HReg hregAMD64_RSP ( void ) { return mkHReg( 4, HRcInt64, False); }
hregAMD64_RBP(void)109 HReg hregAMD64_RBP ( void ) { return mkHReg( 5, HRcInt64, False); }
hregAMD64_RSI(void)110 HReg hregAMD64_RSI ( void ) { return mkHReg( 6, HRcInt64, False); }
hregAMD64_RDI(void)111 HReg hregAMD64_RDI ( void ) { return mkHReg( 7, HRcInt64, False); }
hregAMD64_R8(void)112 HReg hregAMD64_R8 ( void ) { return mkHReg( 8, HRcInt64, False); }
hregAMD64_R9(void)113 HReg hregAMD64_R9 ( void ) { return mkHReg( 9, HRcInt64, False); }
hregAMD64_R10(void)114 HReg hregAMD64_R10 ( void ) { return mkHReg(10, HRcInt64, False); }
hregAMD64_R11(void)115 HReg hregAMD64_R11 ( void ) { return mkHReg(11, HRcInt64, False); }
hregAMD64_R12(void)116 HReg hregAMD64_R12 ( void ) { return mkHReg(12, HRcInt64, False); }
hregAMD64_R13(void)117 HReg hregAMD64_R13 ( void ) { return mkHReg(13, HRcInt64, False); }
hregAMD64_R14(void)118 HReg hregAMD64_R14 ( void ) { return mkHReg(14, HRcInt64, False); }
hregAMD64_R15(void)119 HReg hregAMD64_R15 ( void ) { return mkHReg(15, HRcInt64, False); }
120
hregAMD64_XMM0(void)121 HReg hregAMD64_XMM0 ( void ) { return mkHReg( 0, HRcVec128, False); }
hregAMD64_XMM1(void)122 HReg hregAMD64_XMM1 ( void ) { return mkHReg( 1, HRcVec128, False); }
hregAMD64_XMM3(void)123 HReg hregAMD64_XMM3 ( void ) { return mkHReg( 3, HRcVec128, False); }
hregAMD64_XMM4(void)124 HReg hregAMD64_XMM4 ( void ) { return mkHReg( 4, HRcVec128, False); }
hregAMD64_XMM5(void)125 HReg hregAMD64_XMM5 ( void ) { return mkHReg( 5, HRcVec128, False); }
hregAMD64_XMM6(void)126 HReg hregAMD64_XMM6 ( void ) { return mkHReg( 6, HRcVec128, False); }
hregAMD64_XMM7(void)127 HReg hregAMD64_XMM7 ( void ) { return mkHReg( 7, HRcVec128, False); }
hregAMD64_XMM8(void)128 HReg hregAMD64_XMM8 ( void ) { return mkHReg( 8, HRcVec128, False); }
hregAMD64_XMM9(void)129 HReg hregAMD64_XMM9 ( void ) { return mkHReg( 9, HRcVec128, False); }
hregAMD64_XMM10(void)130 HReg hregAMD64_XMM10 ( void ) { return mkHReg(10, HRcVec128, False); }
hregAMD64_XMM11(void)131 HReg hregAMD64_XMM11 ( void ) { return mkHReg(11, HRcVec128, False); }
hregAMD64_XMM12(void)132 HReg hregAMD64_XMM12 ( void ) { return mkHReg(12, HRcVec128, False); }
133
134
getAllocableRegs_AMD64(Int * nregs,HReg ** arr)135 void getAllocableRegs_AMD64 ( Int* nregs, HReg** arr )
136 {
137 #if 0
138 *nregs = 6;
139 *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
140 (*arr)[ 0] = hregAMD64_RSI();
141 (*arr)[ 1] = hregAMD64_RDI();
142 (*arr)[ 2] = hregAMD64_RBX();
143
144 (*arr)[ 3] = hregAMD64_XMM7();
145 (*arr)[ 4] = hregAMD64_XMM8();
146 (*arr)[ 5] = hregAMD64_XMM9();
147 #endif
148 #if 1
149 *nregs = 20;
150 *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
151 (*arr)[ 0] = hregAMD64_RSI();
152 (*arr)[ 1] = hregAMD64_RDI();
153 (*arr)[ 2] = hregAMD64_R8();
154 (*arr)[ 3] = hregAMD64_R9();
155 (*arr)[ 4] = hregAMD64_R12();
156 (*arr)[ 5] = hregAMD64_R13();
157 (*arr)[ 6] = hregAMD64_R14();
158 (*arr)[ 7] = hregAMD64_R15();
159 (*arr)[ 8] = hregAMD64_RBX();
160
161 (*arr)[ 9] = hregAMD64_XMM3();
162 (*arr)[10] = hregAMD64_XMM4();
163 (*arr)[11] = hregAMD64_XMM5();
164 (*arr)[12] = hregAMD64_XMM6();
165 (*arr)[13] = hregAMD64_XMM7();
166 (*arr)[14] = hregAMD64_XMM8();
167 (*arr)[15] = hregAMD64_XMM9();
168 (*arr)[16] = hregAMD64_XMM10();
169 (*arr)[17] = hregAMD64_XMM11();
170 (*arr)[18] = hregAMD64_XMM12();
171 (*arr)[19] = hregAMD64_R10();
172 #endif
173 }
174
175
176 /* --------- Condition codes, Intel encoding. --------- */
177
showAMD64CondCode(AMD64CondCode cond)178 const HChar* showAMD64CondCode ( AMD64CondCode cond )
179 {
180 switch (cond) {
181 case Acc_O: return "o";
182 case Acc_NO: return "no";
183 case Acc_B: return "b";
184 case Acc_NB: return "nb";
185 case Acc_Z: return "z";
186 case Acc_NZ: return "nz";
187 case Acc_BE: return "be";
188 case Acc_NBE: return "nbe";
189 case Acc_S: return "s";
190 case Acc_NS: return "ns";
191 case Acc_P: return "p";
192 case Acc_NP: return "np";
193 case Acc_L: return "l";
194 case Acc_NL: return "nl";
195 case Acc_LE: return "le";
196 case Acc_NLE: return "nle";
197 case Acc_ALWAYS: return "ALWAYS";
198 default: vpanic("ppAMD64CondCode");
199 }
200 }
201
202
203 /* --------- AMD64AMode: memory address expressions. --------- */
204
AMD64AMode_IR(UInt imm32,HReg reg)205 AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) {
206 AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode));
207 am->tag = Aam_IR;
208 am->Aam.IR.imm = imm32;
209 am->Aam.IR.reg = reg;
210 return am;
211 }
AMD64AMode_IRRS(UInt imm32,HReg base,HReg indEx,Int shift)212 AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
213 AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode));
214 am->tag = Aam_IRRS;
215 am->Aam.IRRS.imm = imm32;
216 am->Aam.IRRS.base = base;
217 am->Aam.IRRS.index = indEx;
218 am->Aam.IRRS.shift = shift;
219 vassert(shift >= 0 && shift <= 3);
220 return am;
221 }
222
ppAMD64AMode(AMD64AMode * am)223 void ppAMD64AMode ( AMD64AMode* am ) {
224 switch (am->tag) {
225 case Aam_IR:
226 if (am->Aam.IR.imm == 0)
227 vex_printf("(");
228 else
229 vex_printf("0x%x(", am->Aam.IR.imm);
230 ppHRegAMD64(am->Aam.IR.reg);
231 vex_printf(")");
232 return;
233 case Aam_IRRS:
234 vex_printf("0x%x(", am->Aam.IRRS.imm);
235 ppHRegAMD64(am->Aam.IRRS.base);
236 vex_printf(",");
237 ppHRegAMD64(am->Aam.IRRS.index);
238 vex_printf(",%d)", 1 << am->Aam.IRRS.shift);
239 return;
240 default:
241 vpanic("ppAMD64AMode");
242 }
243 }
244
addRegUsage_AMD64AMode(HRegUsage * u,AMD64AMode * am)245 static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) {
246 switch (am->tag) {
247 case Aam_IR:
248 addHRegUse(u, HRmRead, am->Aam.IR.reg);
249 return;
250 case Aam_IRRS:
251 addHRegUse(u, HRmRead, am->Aam.IRRS.base);
252 addHRegUse(u, HRmRead, am->Aam.IRRS.index);
253 return;
254 default:
255 vpanic("addRegUsage_AMD64AMode");
256 }
257 }
258
mapRegs_AMD64AMode(HRegRemap * m,AMD64AMode * am)259 static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) {
260 switch (am->tag) {
261 case Aam_IR:
262 am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg);
263 return;
264 case Aam_IRRS:
265 am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base);
266 am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index);
267 return;
268 default:
269 vpanic("mapRegs_AMD64AMode");
270 }
271 }
272
273 /* --------- Operand, which can be reg, immediate or memory. --------- */
274
AMD64RMI_Imm(UInt imm32)275 AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) {
276 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
277 op->tag = Armi_Imm;
278 op->Armi.Imm.imm32 = imm32;
279 return op;
280 }
AMD64RMI_Reg(HReg reg)281 AMD64RMI* AMD64RMI_Reg ( HReg reg ) {
282 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
283 op->tag = Armi_Reg;
284 op->Armi.Reg.reg = reg;
285 return op;
286 }
AMD64RMI_Mem(AMD64AMode * am)287 AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) {
288 AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
289 op->tag = Armi_Mem;
290 op->Armi.Mem.am = am;
291 return op;
292 }
293
ppAMD64RMI_wrk(AMD64RMI * op,Bool lo32)294 static void ppAMD64RMI_wrk ( AMD64RMI* op, Bool lo32 ) {
295 switch (op->tag) {
296 case Armi_Imm:
297 vex_printf("$0x%x", op->Armi.Imm.imm32);
298 return;
299 case Armi_Reg:
300 if (lo32)
301 ppHRegAMD64_lo32(op->Armi.Reg.reg);
302 else
303 ppHRegAMD64(op->Armi.Reg.reg);
304 return;
305 case Armi_Mem:
306 ppAMD64AMode(op->Armi.Mem.am);
307 return;
308 default:
309 vpanic("ppAMD64RMI");
310 }
311 }
ppAMD64RMI(AMD64RMI * op)312 void ppAMD64RMI ( AMD64RMI* op ) {
313 ppAMD64RMI_wrk(op, False/*!lo32*/);
314 }
ppAMD64RMI_lo32(AMD64RMI * op)315 void ppAMD64RMI_lo32 ( AMD64RMI* op ) {
316 ppAMD64RMI_wrk(op, True/*lo32*/);
317 }
318
319 /* An AMD64RMI can only be used in a "read" context (what would it mean
320 to write or modify a literal?) and so we enumerate its registers
321 accordingly. */
addRegUsage_AMD64RMI(HRegUsage * u,AMD64RMI * op)322 static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) {
323 switch (op->tag) {
324 case Armi_Imm:
325 return;
326 case Armi_Reg:
327 addHRegUse(u, HRmRead, op->Armi.Reg.reg);
328 return;
329 case Armi_Mem:
330 addRegUsage_AMD64AMode(u, op->Armi.Mem.am);
331 return;
332 default:
333 vpanic("addRegUsage_AMD64RMI");
334 }
335 }
336
mapRegs_AMD64RMI(HRegRemap * m,AMD64RMI * op)337 static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) {
338 switch (op->tag) {
339 case Armi_Imm:
340 return;
341 case Armi_Reg:
342 op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg);
343 return;
344 case Armi_Mem:
345 mapRegs_AMD64AMode(m, op->Armi.Mem.am);
346 return;
347 default:
348 vpanic("mapRegs_AMD64RMI");
349 }
350 }
351
352
353 /* --------- Operand, which can be reg or immediate only. --------- */
354
AMD64RI_Imm(UInt imm32)355 AMD64RI* AMD64RI_Imm ( UInt imm32 ) {
356 AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI));
357 op->tag = Ari_Imm;
358 op->Ari.Imm.imm32 = imm32;
359 return op;
360 }
AMD64RI_Reg(HReg reg)361 AMD64RI* AMD64RI_Reg ( HReg reg ) {
362 AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI));
363 op->tag = Ari_Reg;
364 op->Ari.Reg.reg = reg;
365 return op;
366 }
367
ppAMD64RI(AMD64RI * op)368 void ppAMD64RI ( AMD64RI* op ) {
369 switch (op->tag) {
370 case Ari_Imm:
371 vex_printf("$0x%x", op->Ari.Imm.imm32);
372 return;
373 case Ari_Reg:
374 ppHRegAMD64(op->Ari.Reg.reg);
375 return;
376 default:
377 vpanic("ppAMD64RI");
378 }
379 }
380
381 /* An AMD64RI can only be used in a "read" context (what would it mean
382 to write or modify a literal?) and so we enumerate its registers
383 accordingly. */
addRegUsage_AMD64RI(HRegUsage * u,AMD64RI * op)384 static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) {
385 switch (op->tag) {
386 case Ari_Imm:
387 return;
388 case Ari_Reg:
389 addHRegUse(u, HRmRead, op->Ari.Reg.reg);
390 return;
391 default:
392 vpanic("addRegUsage_AMD64RI");
393 }
394 }
395
mapRegs_AMD64RI(HRegRemap * m,AMD64RI * op)396 static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) {
397 switch (op->tag) {
398 case Ari_Imm:
399 return;
400 case Ari_Reg:
401 op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg);
402 return;
403 default:
404 vpanic("mapRegs_AMD64RI");
405 }
406 }
407
408
409 /* --------- Operand, which can be reg or memory only. --------- */
410
AMD64RM_Reg(HReg reg)411 AMD64RM* AMD64RM_Reg ( HReg reg ) {
412 AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM));
413 op->tag = Arm_Reg;
414 op->Arm.Reg.reg = reg;
415 return op;
416 }
AMD64RM_Mem(AMD64AMode * am)417 AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) {
418 AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM));
419 op->tag = Arm_Mem;
420 op->Arm.Mem.am = am;
421 return op;
422 }
423
ppAMD64RM(AMD64RM * op)424 void ppAMD64RM ( AMD64RM* op ) {
425 switch (op->tag) {
426 case Arm_Mem:
427 ppAMD64AMode(op->Arm.Mem.am);
428 return;
429 case Arm_Reg:
430 ppHRegAMD64(op->Arm.Reg.reg);
431 return;
432 default:
433 vpanic("ppAMD64RM");
434 }
435 }
436
437 /* Because an AMD64RM can be both a source or destination operand, we
438 have to supply a mode -- pertaining to the operand as a whole --
439 indicating how it's being used. */
addRegUsage_AMD64RM(HRegUsage * u,AMD64RM * op,HRegMode mode)440 static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) {
441 switch (op->tag) {
442 case Arm_Mem:
443 /* Memory is read, written or modified. So we just want to
444 know the regs read by the amode. */
445 addRegUsage_AMD64AMode(u, op->Arm.Mem.am);
446 return;
447 case Arm_Reg:
448 /* reg is read, written or modified. Add it in the
449 appropriate way. */
450 addHRegUse(u, mode, op->Arm.Reg.reg);
451 return;
452 default:
453 vpanic("addRegUsage_AMD64RM");
454 }
455 }
456
mapRegs_AMD64RM(HRegRemap * m,AMD64RM * op)457 static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op )
458 {
459 switch (op->tag) {
460 case Arm_Mem:
461 mapRegs_AMD64AMode(m, op->Arm.Mem.am);
462 return;
463 case Arm_Reg:
464 op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg);
465 return;
466 default:
467 vpanic("mapRegs_AMD64RM");
468 }
469 }
470
471
472 /* --------- Instructions. --------- */
473
showAMD64ScalarSz(Int sz)474 static const HChar* showAMD64ScalarSz ( Int sz ) {
475 switch (sz) {
476 case 2: return "w";
477 case 4: return "l";
478 case 8: return "q";
479 default: vpanic("showAMD64ScalarSz");
480 }
481 }
482
showAMD64UnaryOp(AMD64UnaryOp op)483 const HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) {
484 switch (op) {
485 case Aun_NOT: return "not";
486 case Aun_NEG: return "neg";
487 default: vpanic("showAMD64UnaryOp");
488 }
489 }
490
showAMD64AluOp(AMD64AluOp op)491 const HChar* showAMD64AluOp ( AMD64AluOp op ) {
492 switch (op) {
493 case Aalu_MOV: return "mov";
494 case Aalu_CMP: return "cmp";
495 case Aalu_ADD: return "add";
496 case Aalu_SUB: return "sub";
497 case Aalu_ADC: return "adc";
498 case Aalu_SBB: return "sbb";
499 case Aalu_AND: return "and";
500 case Aalu_OR: return "or";
501 case Aalu_XOR: return "xor";
502 case Aalu_MUL: return "imul";
503 default: vpanic("showAMD64AluOp");
504 }
505 }
506
showAMD64ShiftOp(AMD64ShiftOp op)507 const HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) {
508 switch (op) {
509 case Ash_SHL: return "shl";
510 case Ash_SHR: return "shr";
511 case Ash_SAR: return "sar";
512 default: vpanic("showAMD64ShiftOp");
513 }
514 }
515
showA87FpOp(A87FpOp op)516 const HChar* showA87FpOp ( A87FpOp op ) {
517 switch (op) {
518 case Afp_SCALE: return "scale";
519 case Afp_ATAN: return "atan";
520 case Afp_YL2X: return "yl2x";
521 case Afp_YL2XP1: return "yl2xp1";
522 case Afp_PREM: return "prem";
523 case Afp_PREM1: return "prem1";
524 case Afp_SQRT: return "sqrt";
525 case Afp_SIN: return "sin";
526 case Afp_COS: return "cos";
527 case Afp_TAN: return "tan";
528 case Afp_ROUND: return "round";
529 case Afp_2XM1: return "2xm1";
530 default: vpanic("showA87FpOp");
531 }
532 }
533
showAMD64SseOp(AMD64SseOp op)534 const HChar* showAMD64SseOp ( AMD64SseOp op ) {
535 switch (op) {
536 case Asse_MOV: return "movups";
537 case Asse_ADDF: return "add";
538 case Asse_SUBF: return "sub";
539 case Asse_MULF: return "mul";
540 case Asse_DIVF: return "div";
541 case Asse_MAXF: return "max";
542 case Asse_MINF: return "min";
543 case Asse_CMPEQF: return "cmpFeq";
544 case Asse_CMPLTF: return "cmpFlt";
545 case Asse_CMPLEF: return "cmpFle";
546 case Asse_CMPUNF: return "cmpFun";
547 case Asse_RCPF: return "rcp";
548 case Asse_RSQRTF: return "rsqrt";
549 case Asse_SQRTF: return "sqrt";
550 case Asse_AND: return "and";
551 case Asse_OR: return "or";
552 case Asse_XOR: return "xor";
553 case Asse_ANDN: return "andn";
554 case Asse_ADD8: return "paddb";
555 case Asse_ADD16: return "paddw";
556 case Asse_ADD32: return "paddd";
557 case Asse_ADD64: return "paddq";
558 case Asse_QADD8U: return "paddusb";
559 case Asse_QADD16U: return "paddusw";
560 case Asse_QADD8S: return "paddsb";
561 case Asse_QADD16S: return "paddsw";
562 case Asse_SUB8: return "psubb";
563 case Asse_SUB16: return "psubw";
564 case Asse_SUB32: return "psubd";
565 case Asse_SUB64: return "psubq";
566 case Asse_QSUB8U: return "psubusb";
567 case Asse_QSUB16U: return "psubusw";
568 case Asse_QSUB8S: return "psubsb";
569 case Asse_QSUB16S: return "psubsw";
570 case Asse_MUL16: return "pmullw";
571 case Asse_MULHI16U: return "pmulhuw";
572 case Asse_MULHI16S: return "pmulhw";
573 case Asse_AVG8U: return "pavgb";
574 case Asse_AVG16U: return "pavgw";
575 case Asse_MAX16S: return "pmaxw";
576 case Asse_MAX8U: return "pmaxub";
577 case Asse_MIN16S: return "pminw";
578 case Asse_MIN8U: return "pminub";
579 case Asse_CMPEQ8: return "pcmpeqb";
580 case Asse_CMPEQ16: return "pcmpeqw";
581 case Asse_CMPEQ32: return "pcmpeqd";
582 case Asse_CMPGT8S: return "pcmpgtb";
583 case Asse_CMPGT16S: return "pcmpgtw";
584 case Asse_CMPGT32S: return "pcmpgtd";
585 case Asse_SHL16: return "psllw";
586 case Asse_SHL32: return "pslld";
587 case Asse_SHL64: return "psllq";
588 case Asse_SHR16: return "psrlw";
589 case Asse_SHR32: return "psrld";
590 case Asse_SHR64: return "psrlq";
591 case Asse_SAR16: return "psraw";
592 case Asse_SAR32: return "psrad";
593 case Asse_PACKSSD: return "packssdw";
594 case Asse_PACKSSW: return "packsswb";
595 case Asse_PACKUSW: return "packuswb";
596 case Asse_UNPCKHB: return "punpckhb";
597 case Asse_UNPCKHW: return "punpckhw";
598 case Asse_UNPCKHD: return "punpckhd";
599 case Asse_UNPCKHQ: return "punpckhq";
600 case Asse_UNPCKLB: return "punpcklb";
601 case Asse_UNPCKLW: return "punpcklw";
602 case Asse_UNPCKLD: return "punpckld";
603 case Asse_UNPCKLQ: return "punpcklq";
604 default: vpanic("showAMD64SseOp");
605 }
606 }
607
AMD64Instr_Imm64(ULong imm64,HReg dst)608 AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) {
609 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
610 i->tag = Ain_Imm64;
611 i->Ain.Imm64.imm64 = imm64;
612 i->Ain.Imm64.dst = dst;
613 return i;
614 }
AMD64Instr_Alu64R(AMD64AluOp op,AMD64RMI * src,HReg dst)615 AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
616 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
617 i->tag = Ain_Alu64R;
618 i->Ain.Alu64R.op = op;
619 i->Ain.Alu64R.src = src;
620 i->Ain.Alu64R.dst = dst;
621 return i;
622 }
AMD64Instr_Alu64M(AMD64AluOp op,AMD64RI * src,AMD64AMode * dst)623 AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) {
624 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
625 i->tag = Ain_Alu64M;
626 i->Ain.Alu64M.op = op;
627 i->Ain.Alu64M.src = src;
628 i->Ain.Alu64M.dst = dst;
629 vassert(op != Aalu_MUL);
630 return i;
631 }
AMD64Instr_Sh64(AMD64ShiftOp op,UInt src,HReg dst)632 AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) {
633 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
634 i->tag = Ain_Sh64;
635 i->Ain.Sh64.op = op;
636 i->Ain.Sh64.src = src;
637 i->Ain.Sh64.dst = dst;
638 return i;
639 }
AMD64Instr_Test64(UInt imm32,HReg dst)640 AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) {
641 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
642 i->tag = Ain_Test64;
643 i->Ain.Test64.imm32 = imm32;
644 i->Ain.Test64.dst = dst;
645 return i;
646 }
AMD64Instr_Unary64(AMD64UnaryOp op,HReg dst)647 AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) {
648 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
649 i->tag = Ain_Unary64;
650 i->Ain.Unary64.op = op;
651 i->Ain.Unary64.dst = dst;
652 return i;
653 }
AMD64Instr_Lea64(AMD64AMode * am,HReg dst)654 AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) {
655 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
656 i->tag = Ain_Lea64;
657 i->Ain.Lea64.am = am;
658 i->Ain.Lea64.dst = dst;
659 return i;
660 }
AMD64Instr_Alu32R(AMD64AluOp op,AMD64RMI * src,HReg dst)661 AMD64Instr* AMD64Instr_Alu32R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
662 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
663 i->tag = Ain_Alu32R;
664 i->Ain.Alu32R.op = op;
665 i->Ain.Alu32R.src = src;
666 i->Ain.Alu32R.dst = dst;
667 switch (op) {
668 case Aalu_ADD: case Aalu_SUB: case Aalu_CMP:
669 case Aalu_AND: case Aalu_OR: case Aalu_XOR: break;
670 default: vassert(0);
671 }
672 return i;
673 }
AMD64Instr_MulL(Bool syned,AMD64RM * src)674 AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) {
675 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
676 i->tag = Ain_MulL;
677 i->Ain.MulL.syned = syned;
678 i->Ain.MulL.src = src;
679 return i;
680 }
AMD64Instr_Div(Bool syned,Int sz,AMD64RM * src)681 AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) {
682 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
683 i->tag = Ain_Div;
684 i->Ain.Div.syned = syned;
685 i->Ain.Div.sz = sz;
686 i->Ain.Div.src = src;
687 vassert(sz == 4 || sz == 8);
688 return i;
689 }
AMD64Instr_Push(AMD64RMI * src)690 AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) {
691 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
692 i->tag = Ain_Push;
693 i->Ain.Push.src = src;
694 return i;
695 }
AMD64Instr_Call(AMD64CondCode cond,Addr64 target,Int regparms,RetLoc rloc)696 AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms,
697 RetLoc rloc ) {
698 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
699 i->tag = Ain_Call;
700 i->Ain.Call.cond = cond;
701 i->Ain.Call.target = target;
702 i->Ain.Call.regparms = regparms;
703 i->Ain.Call.rloc = rloc;
704 vassert(regparms >= 0 && regparms <= 6);
705 vassert(is_sane_RetLoc(rloc));
706 return i;
707 }
708
AMD64Instr_XDirect(Addr64 dstGA,AMD64AMode * amRIP,AMD64CondCode cond,Bool toFastEP)709 AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP,
710 AMD64CondCode cond, Bool toFastEP ) {
711 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
712 i->tag = Ain_XDirect;
713 i->Ain.XDirect.dstGA = dstGA;
714 i->Ain.XDirect.amRIP = amRIP;
715 i->Ain.XDirect.cond = cond;
716 i->Ain.XDirect.toFastEP = toFastEP;
717 return i;
718 }
AMD64Instr_XIndir(HReg dstGA,AMD64AMode * amRIP,AMD64CondCode cond)719 AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP,
720 AMD64CondCode cond ) {
721 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
722 i->tag = Ain_XIndir;
723 i->Ain.XIndir.dstGA = dstGA;
724 i->Ain.XIndir.amRIP = amRIP;
725 i->Ain.XIndir.cond = cond;
726 return i;
727 }
AMD64Instr_XAssisted(HReg dstGA,AMD64AMode * amRIP,AMD64CondCode cond,IRJumpKind jk)728 AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP,
729 AMD64CondCode cond, IRJumpKind jk ) {
730 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
731 i->tag = Ain_XAssisted;
732 i->Ain.XAssisted.dstGA = dstGA;
733 i->Ain.XAssisted.amRIP = amRIP;
734 i->Ain.XAssisted.cond = cond;
735 i->Ain.XAssisted.jk = jk;
736 return i;
737 }
738
AMD64Instr_CMov64(AMD64CondCode cond,AMD64RM * src,HReg dst)739 AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, AMD64RM* src, HReg dst ) {
740 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
741 i->tag = Ain_CMov64;
742 i->Ain.CMov64.cond = cond;
743 i->Ain.CMov64.src = src;
744 i->Ain.CMov64.dst = dst;
745 vassert(cond != Acc_ALWAYS);
746 return i;
747 }
AMD64Instr_MovxLQ(Bool syned,HReg src,HReg dst)748 AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
749 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
750 i->tag = Ain_MovxLQ;
751 i->Ain.MovxLQ.syned = syned;
752 i->Ain.MovxLQ.src = src;
753 i->Ain.MovxLQ.dst = dst;
754 return i;
755 }
AMD64Instr_LoadEX(UChar szSmall,Bool syned,AMD64AMode * src,HReg dst)756 AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
757 AMD64AMode* src, HReg dst ) {
758 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
759 i->tag = Ain_LoadEX;
760 i->Ain.LoadEX.szSmall = szSmall;
761 i->Ain.LoadEX.syned = syned;
762 i->Ain.LoadEX.src = src;
763 i->Ain.LoadEX.dst = dst;
764 vassert(szSmall == 1 || szSmall == 2 || szSmall == 4);
765 return i;
766 }
AMD64Instr_Store(UChar sz,HReg src,AMD64AMode * dst)767 AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) {
768 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
769 i->tag = Ain_Store;
770 i->Ain.Store.sz = sz;
771 i->Ain.Store.src = src;
772 i->Ain.Store.dst = dst;
773 vassert(sz == 1 || sz == 2 || sz == 4);
774 return i;
775 }
AMD64Instr_Set64(AMD64CondCode cond,HReg dst)776 AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) {
777 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
778 i->tag = Ain_Set64;
779 i->Ain.Set64.cond = cond;
780 i->Ain.Set64.dst = dst;
781 return i;
782 }
AMD64Instr_Bsfr64(Bool isFwds,HReg src,HReg dst)783 AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) {
784 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
785 i->tag = Ain_Bsfr64;
786 i->Ain.Bsfr64.isFwds = isFwds;
787 i->Ain.Bsfr64.src = src;
788 i->Ain.Bsfr64.dst = dst;
789 return i;
790 }
AMD64Instr_MFence(void)791 AMD64Instr* AMD64Instr_MFence ( void ) {
792 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
793 i->tag = Ain_MFence;
794 return i;
795 }
AMD64Instr_ACAS(AMD64AMode * addr,UChar sz)796 AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) {
797 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
798 i->tag = Ain_ACAS;
799 i->Ain.ACAS.addr = addr;
800 i->Ain.ACAS.sz = sz;
801 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
802 return i;
803 }
AMD64Instr_DACAS(AMD64AMode * addr,UChar sz)804 AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) {
805 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
806 i->tag = Ain_DACAS;
807 i->Ain.DACAS.addr = addr;
808 i->Ain.DACAS.sz = sz;
809 vassert(sz == 8 || sz == 4);
810 return i;
811 }
812
AMD64Instr_A87Free(Int nregs)813 AMD64Instr* AMD64Instr_A87Free ( Int nregs )
814 {
815 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
816 i->tag = Ain_A87Free;
817 i->Ain.A87Free.nregs = nregs;
818 vassert(nregs >= 1 && nregs <= 7);
819 return i;
820 }
AMD64Instr_A87PushPop(AMD64AMode * addr,Bool isPush,UChar szB)821 AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB )
822 {
823 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
824 i->tag = Ain_A87PushPop;
825 i->Ain.A87PushPop.addr = addr;
826 i->Ain.A87PushPop.isPush = isPush;
827 i->Ain.A87PushPop.szB = szB;
828 vassert(szB == 8 || szB == 4);
829 return i;
830 }
AMD64Instr_A87FpOp(A87FpOp op)831 AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
832 {
833 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
834 i->tag = Ain_A87FpOp;
835 i->Ain.A87FpOp.op = op;
836 return i;
837 }
AMD64Instr_A87LdCW(AMD64AMode * addr)838 AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr )
839 {
840 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
841 i->tag = Ain_A87LdCW;
842 i->Ain.A87LdCW.addr = addr;
843 return i;
844 }
AMD64Instr_A87StSW(AMD64AMode * addr)845 AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr )
846 {
847 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
848 i->tag = Ain_A87StSW;
849 i->Ain.A87StSW.addr = addr;
850 return i;
851 }
AMD64Instr_LdMXCSR(AMD64AMode * addr)852 AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
853 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
854 i->tag = Ain_LdMXCSR;
855 i->Ain.LdMXCSR.addr = addr;
856 return i;
857 }
AMD64Instr_SseUComIS(Int sz,HReg srcL,HReg srcR,HReg dst)858 AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) {
859 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
860 i->tag = Ain_SseUComIS;
861 i->Ain.SseUComIS.sz = toUChar(sz);
862 i->Ain.SseUComIS.srcL = srcL;
863 i->Ain.SseUComIS.srcR = srcR;
864 i->Ain.SseUComIS.dst = dst;
865 vassert(sz == 4 || sz == 8);
866 return i;
867 }
AMD64Instr_SseSI2SF(Int szS,Int szD,HReg src,HReg dst)868 AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) {
869 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
870 i->tag = Ain_SseSI2SF;
871 i->Ain.SseSI2SF.szS = toUChar(szS);
872 i->Ain.SseSI2SF.szD = toUChar(szD);
873 i->Ain.SseSI2SF.src = src;
874 i->Ain.SseSI2SF.dst = dst;
875 vassert(szS == 4 || szS == 8);
876 vassert(szD == 4 || szD == 8);
877 return i;
878 }
AMD64Instr_SseSF2SI(Int szS,Int szD,HReg src,HReg dst)879 AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) {
880 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
881 i->tag = Ain_SseSF2SI;
882 i->Ain.SseSF2SI.szS = toUChar(szS);
883 i->Ain.SseSF2SI.szD = toUChar(szD);
884 i->Ain.SseSF2SI.src = src;
885 i->Ain.SseSF2SI.dst = dst;
886 vassert(szS == 4 || szS == 8);
887 vassert(szD == 4 || szD == 8);
888 return i;
889 }
AMD64Instr_SseSDSS(Bool from64,HReg src,HReg dst)890 AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst )
891 {
892 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
893 i->tag = Ain_SseSDSS;
894 i->Ain.SseSDSS.from64 = from64;
895 i->Ain.SseSDSS.src = src;
896 i->Ain.SseSDSS.dst = dst;
897 return i;
898 }
AMD64Instr_SseLdSt(Bool isLoad,Int sz,HReg reg,AMD64AMode * addr)899 AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz,
900 HReg reg, AMD64AMode* addr ) {
901 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
902 i->tag = Ain_SseLdSt;
903 i->Ain.SseLdSt.isLoad = isLoad;
904 i->Ain.SseLdSt.sz = toUChar(sz);
905 i->Ain.SseLdSt.reg = reg;
906 i->Ain.SseLdSt.addr = addr;
907 vassert(sz == 4 || sz == 8 || sz == 16);
908 return i;
909 }
AMD64Instr_SseLdzLO(Int sz,HReg reg,AMD64AMode * addr)910 AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr )
911 {
912 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
913 i->tag = Ain_SseLdzLO;
914 i->Ain.SseLdzLO.sz = sz;
915 i->Ain.SseLdzLO.reg = reg;
916 i->Ain.SseLdzLO.addr = addr;
917 vassert(sz == 4 || sz == 8);
918 return i;
919 }
AMD64Instr_Sse32Fx4(AMD64SseOp op,HReg src,HReg dst)920 AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) {
921 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
922 i->tag = Ain_Sse32Fx4;
923 i->Ain.Sse32Fx4.op = op;
924 i->Ain.Sse32Fx4.src = src;
925 i->Ain.Sse32Fx4.dst = dst;
926 vassert(op != Asse_MOV);
927 return i;
928 }
AMD64Instr_Sse32FLo(AMD64SseOp op,HReg src,HReg dst)929 AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) {
930 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
931 i->tag = Ain_Sse32FLo;
932 i->Ain.Sse32FLo.op = op;
933 i->Ain.Sse32FLo.src = src;
934 i->Ain.Sse32FLo.dst = dst;
935 vassert(op != Asse_MOV);
936 return i;
937 }
AMD64Instr_Sse64Fx2(AMD64SseOp op,HReg src,HReg dst)938 AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) {
939 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
940 i->tag = Ain_Sse64Fx2;
941 i->Ain.Sse64Fx2.op = op;
942 i->Ain.Sse64Fx2.src = src;
943 i->Ain.Sse64Fx2.dst = dst;
944 vassert(op != Asse_MOV);
945 return i;
946 }
AMD64Instr_Sse64FLo(AMD64SseOp op,HReg src,HReg dst)947 AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) {
948 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
949 i->tag = Ain_Sse64FLo;
950 i->Ain.Sse64FLo.op = op;
951 i->Ain.Sse64FLo.src = src;
952 i->Ain.Sse64FLo.dst = dst;
953 vassert(op != Asse_MOV);
954 return i;
955 }
AMD64Instr_SseReRg(AMD64SseOp op,HReg re,HReg rg)956 AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) {
957 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
958 i->tag = Ain_SseReRg;
959 i->Ain.SseReRg.op = op;
960 i->Ain.SseReRg.src = re;
961 i->Ain.SseReRg.dst = rg;
962 return i;
963 }
AMD64Instr_SseCMov(AMD64CondCode cond,HReg src,HReg dst)964 AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
965 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
966 i->tag = Ain_SseCMov;
967 i->Ain.SseCMov.cond = cond;
968 i->Ain.SseCMov.src = src;
969 i->Ain.SseCMov.dst = dst;
970 vassert(cond != Acc_ALWAYS);
971 return i;
972 }
AMD64Instr_SseShuf(Int order,HReg src,HReg dst)973 AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) {
974 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
975 i->tag = Ain_SseShuf;
976 i->Ain.SseShuf.order = order;
977 i->Ain.SseShuf.src = src;
978 i->Ain.SseShuf.dst = dst;
979 vassert(order >= 0 && order <= 0xFF);
980 return i;
981 }
982 //uu AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad,
983 //uu HReg reg, AMD64AMode* addr ) {
984 //uu AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
985 //uu i->tag = Ain_AvxLdSt;
986 //uu i->Ain.AvxLdSt.isLoad = isLoad;
987 //uu i->Ain.AvxLdSt.reg = reg;
988 //uu i->Ain.AvxLdSt.addr = addr;
989 //uu return i;
990 //uu }
991 //uu AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) {
992 //uu AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
993 //uu i->tag = Ain_AvxReRg;
994 //uu i->Ain.AvxReRg.op = op;
995 //uu i->Ain.AvxReRg.src = re;
996 //uu i->Ain.AvxReRg.dst = rg;
997 //uu return i;
998 //uu }
AMD64Instr_EvCheck(AMD64AMode * amCounter,AMD64AMode * amFailAddr)999 AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter,
1000 AMD64AMode* amFailAddr ) {
1001 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
1002 i->tag = Ain_EvCheck;
1003 i->Ain.EvCheck.amCounter = amCounter;
1004 i->Ain.EvCheck.amFailAddr = amFailAddr;
1005 return i;
1006 }
AMD64Instr_ProfInc(void)1007 AMD64Instr* AMD64Instr_ProfInc ( void ) {
1008 AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
1009 i->tag = Ain_ProfInc;
1010 return i;
1011 }
1012
ppAMD64Instr(AMD64Instr * i,Bool mode64)1013 void ppAMD64Instr ( AMD64Instr* i, Bool mode64 )
1014 {
1015 vassert(mode64 == True);
1016 switch (i->tag) {
1017 case Ain_Imm64:
1018 vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64);
1019 ppHRegAMD64(i->Ain.Imm64.dst);
1020 return;
1021 case Ain_Alu64R:
1022 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op));
1023 ppAMD64RMI(i->Ain.Alu64R.src);
1024 vex_printf(",");
1025 ppHRegAMD64(i->Ain.Alu64R.dst);
1026 return;
1027 case Ain_Alu64M:
1028 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op));
1029 ppAMD64RI(i->Ain.Alu64M.src);
1030 vex_printf(",");
1031 ppAMD64AMode(i->Ain.Alu64M.dst);
1032 return;
1033 case Ain_Sh64:
1034 vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op));
1035 if (i->Ain.Sh64.src == 0)
1036 vex_printf("%%cl,");
1037 else
1038 vex_printf("$%d,", (Int)i->Ain.Sh64.src);
1039 ppHRegAMD64(i->Ain.Sh64.dst);
1040 return;
1041 case Ain_Test64:
1042 vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32);
1043 ppHRegAMD64(i->Ain.Test64.dst);
1044 return;
1045 case Ain_Unary64:
1046 vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op));
1047 ppHRegAMD64(i->Ain.Unary64.dst);
1048 return;
1049 case Ain_Lea64:
1050 vex_printf("leaq ");
1051 ppAMD64AMode(i->Ain.Lea64.am);
1052 vex_printf(",");
1053 ppHRegAMD64(i->Ain.Lea64.dst);
1054 return;
1055 case Ain_Alu32R:
1056 vex_printf("%sl ", showAMD64AluOp(i->Ain.Alu32R.op));
1057 ppAMD64RMI_lo32(i->Ain.Alu32R.src);
1058 vex_printf(",");
1059 ppHRegAMD64_lo32(i->Ain.Alu32R.dst);
1060 return;
1061 case Ain_MulL:
1062 vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u');
1063 ppAMD64RM(i->Ain.MulL.src);
1064 return;
1065 case Ain_Div:
1066 vex_printf("%cdiv%s ",
1067 i->Ain.Div.syned ? 's' : 'u',
1068 showAMD64ScalarSz(i->Ain.Div.sz));
1069 ppAMD64RM(i->Ain.Div.src);
1070 return;
1071 case Ain_Push:
1072 vex_printf("pushq ");
1073 ppAMD64RMI(i->Ain.Push.src);
1074 return;
1075 case Ain_Call:
1076 vex_printf("call%s[%d,",
1077 i->Ain.Call.cond==Acc_ALWAYS
1078 ? "" : showAMD64CondCode(i->Ain.Call.cond),
1079 i->Ain.Call.regparms );
1080 ppRetLoc(i->Ain.Call.rloc);
1081 vex_printf("] 0x%llx", i->Ain.Call.target);
1082 break;
1083
1084 case Ain_XDirect:
1085 vex_printf("(xDirect) ");
1086 vex_printf("if (%%rflags.%s) { ",
1087 showAMD64CondCode(i->Ain.XDirect.cond));
1088 vex_printf("movabsq $0x%llx,%%r11; ", i->Ain.XDirect.dstGA);
1089 vex_printf("movq %%r11,");
1090 ppAMD64AMode(i->Ain.XDirect.amRIP);
1091 vex_printf("; ");
1092 vex_printf("movabsq $disp_cp_chain_me_to_%sEP,%%r11; call *%%r11 }",
1093 i->Ain.XDirect.toFastEP ? "fast" : "slow");
1094 return;
1095 case Ain_XIndir:
1096 vex_printf("(xIndir) ");
1097 vex_printf("if (%%rflags.%s) { ",
1098 showAMD64CondCode(i->Ain.XIndir.cond));
1099 vex_printf("movq ");
1100 ppHRegAMD64(i->Ain.XIndir.dstGA);
1101 vex_printf(",");
1102 ppAMD64AMode(i->Ain.XIndir.amRIP);
1103 vex_printf("; movabsq $disp_indir,%%r11; jmp *%%r11 }");
1104 return;
1105 case Ain_XAssisted:
1106 vex_printf("(xAssisted) ");
1107 vex_printf("if (%%rflags.%s) { ",
1108 showAMD64CondCode(i->Ain.XAssisted.cond));
1109 vex_printf("movq ");
1110 ppHRegAMD64(i->Ain.XAssisted.dstGA);
1111 vex_printf(",");
1112 ppAMD64AMode(i->Ain.XAssisted.amRIP);
1113 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%rbp",
1114 (Int)i->Ain.XAssisted.jk);
1115 vex_printf("; movabsq $disp_assisted,%%r11; jmp *%%r11 }");
1116 return;
1117
1118 case Ain_CMov64:
1119 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond));
1120 ppAMD64RM(i->Ain.CMov64.src);
1121 vex_printf(",");
1122 ppHRegAMD64(i->Ain.CMov64.dst);
1123 return;
1124 case Ain_MovxLQ:
1125 vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
1126 ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
1127 vex_printf(",");
1128 ppHRegAMD64(i->Ain.MovxLQ.dst);
1129 return;
1130 case Ain_LoadEX:
1131 if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) {
1132 vex_printf("movl ");
1133 ppAMD64AMode(i->Ain.LoadEX.src);
1134 vex_printf(",");
1135 ppHRegAMD64_lo32(i->Ain.LoadEX.dst);
1136 } else {
1137 vex_printf("mov%c%cq ",
1138 i->Ain.LoadEX.syned ? 's' : 'z',
1139 i->Ain.LoadEX.szSmall==1
1140 ? 'b'
1141 : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l'));
1142 ppAMD64AMode(i->Ain.LoadEX.src);
1143 vex_printf(",");
1144 ppHRegAMD64(i->Ain.LoadEX.dst);
1145 }
1146 return;
1147 case Ain_Store:
1148 vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b'
1149 : (i->Ain.Store.sz==2 ? 'w' : 'l'));
1150 ppHRegAMD64(i->Ain.Store.src);
1151 vex_printf(",");
1152 ppAMD64AMode(i->Ain.Store.dst);
1153 return;
1154 case Ain_Set64:
1155 vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond));
1156 ppHRegAMD64(i->Ain.Set64.dst);
1157 return;
1158 case Ain_Bsfr64:
1159 vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r');
1160 ppHRegAMD64(i->Ain.Bsfr64.src);
1161 vex_printf(",");
1162 ppHRegAMD64(i->Ain.Bsfr64.dst);
1163 return;
1164 case Ain_MFence:
1165 vex_printf("mfence" );
1166 return;
1167 case Ain_ACAS:
1168 vex_printf("lock cmpxchg%c ",
1169 i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w'
1170 : i->Ain.ACAS.sz==4 ? 'l' : 'q' );
1171 vex_printf("{%%rax->%%rbx},");
1172 ppAMD64AMode(i->Ain.ACAS.addr);
1173 return;
1174 case Ain_DACAS:
1175 vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
1176 (Int)(2 * i->Ain.DACAS.sz));
1177 ppAMD64AMode(i->Ain.DACAS.addr);
1178 return;
1179 case Ain_A87Free:
1180 vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
1181 break;
1182 case Ain_A87PushPop:
1183 vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ",
1184 i->Ain.A87PushPop.szB == 4 ? 's' : 'l');
1185 ppAMD64AMode(i->Ain.A87PushPop.addr);
1186 break;
1187 case Ain_A87FpOp:
1188 vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op));
1189 break;
1190 case Ain_A87LdCW:
1191 vex_printf("fldcw ");
1192 ppAMD64AMode(i->Ain.A87LdCW.addr);
1193 break;
1194 case Ain_A87StSW:
1195 vex_printf("fstsw ");
1196 ppAMD64AMode(i->Ain.A87StSW.addr);
1197 break;
1198 case Ain_LdMXCSR:
1199 vex_printf("ldmxcsr ");
1200 ppAMD64AMode(i->Ain.LdMXCSR.addr);
1201 break;
1202 case Ain_SseUComIS:
1203 vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d");
1204 ppHRegAMD64(i->Ain.SseUComIS.srcL);
1205 vex_printf(",");
1206 ppHRegAMD64(i->Ain.SseUComIS.srcR);
1207 vex_printf(" ; pushfq ; popq ");
1208 ppHRegAMD64(i->Ain.SseUComIS.dst);
1209 break;
1210 case Ain_SseSI2SF:
1211 vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d");
1212 (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1213 (i->Ain.SseSI2SF.src);
1214 vex_printf(",");
1215 ppHRegAMD64(i->Ain.SseSI2SF.dst);
1216 break;
1217 case Ain_SseSF2SI:
1218 vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d");
1219 ppHRegAMD64(i->Ain.SseSF2SI.src);
1220 vex_printf(",");
1221 (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1222 (i->Ain.SseSF2SI.dst);
1223 break;
1224 case Ain_SseSDSS:
1225 vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd ");
1226 ppHRegAMD64(i->Ain.SseSDSS.src);
1227 vex_printf(",");
1228 ppHRegAMD64(i->Ain.SseSDSS.dst);
1229 break;
1230 case Ain_SseLdSt:
1231 switch (i->Ain.SseLdSt.sz) {
1232 case 4: vex_printf("movss "); break;
1233 case 8: vex_printf("movsd "); break;
1234 case 16: vex_printf("movups "); break;
1235 default: vassert(0);
1236 }
1237 if (i->Ain.SseLdSt.isLoad) {
1238 ppAMD64AMode(i->Ain.SseLdSt.addr);
1239 vex_printf(",");
1240 ppHRegAMD64(i->Ain.SseLdSt.reg);
1241 } else {
1242 ppHRegAMD64(i->Ain.SseLdSt.reg);
1243 vex_printf(",");
1244 ppAMD64AMode(i->Ain.SseLdSt.addr);
1245 }
1246 return;
1247 case Ain_SseLdzLO:
1248 vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d");
1249 ppAMD64AMode(i->Ain.SseLdzLO.addr);
1250 vex_printf(",");
1251 ppHRegAMD64(i->Ain.SseLdzLO.reg);
1252 return;
1253 case Ain_Sse32Fx4:
1254 vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op));
1255 ppHRegAMD64(i->Ain.Sse32Fx4.src);
1256 vex_printf(",");
1257 ppHRegAMD64(i->Ain.Sse32Fx4.dst);
1258 return;
1259 case Ain_Sse32FLo:
1260 vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op));
1261 ppHRegAMD64(i->Ain.Sse32FLo.src);
1262 vex_printf(",");
1263 ppHRegAMD64(i->Ain.Sse32FLo.dst);
1264 return;
1265 case Ain_Sse64Fx2:
1266 vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op));
1267 ppHRegAMD64(i->Ain.Sse64Fx2.src);
1268 vex_printf(",");
1269 ppHRegAMD64(i->Ain.Sse64Fx2.dst);
1270 return;
1271 case Ain_Sse64FLo:
1272 vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op));
1273 ppHRegAMD64(i->Ain.Sse64FLo.src);
1274 vex_printf(",");
1275 ppHRegAMD64(i->Ain.Sse64FLo.dst);
1276 return;
1277 case Ain_SseReRg:
1278 vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1279 ppHRegAMD64(i->Ain.SseReRg.src);
1280 vex_printf(",");
1281 ppHRegAMD64(i->Ain.SseReRg.dst);
1282 return;
1283 case Ain_SseCMov:
1284 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond));
1285 ppHRegAMD64(i->Ain.SseCMov.src);
1286 vex_printf(",");
1287 ppHRegAMD64(i->Ain.SseCMov.dst);
1288 return;
1289 case Ain_SseShuf:
1290 vex_printf("pshufd $0x%x,", i->Ain.SseShuf.order);
1291 ppHRegAMD64(i->Ain.SseShuf.src);
1292 vex_printf(",");
1293 ppHRegAMD64(i->Ain.SseShuf.dst);
1294 return;
1295 //uu case Ain_AvxLdSt:
1296 //uu vex_printf("vmovups ");
1297 //uu if (i->Ain.AvxLdSt.isLoad) {
1298 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1299 //uu vex_printf(",");
1300 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1301 //uu } else {
1302 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1303 //uu vex_printf(",");
1304 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1305 //uu }
1306 //uu return;
1307 //uu case Ain_AvxReRg:
1308 //uu vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1309 //uu ppHRegAMD64(i->Ain.AvxReRg.src);
1310 //uu vex_printf(",");
1311 //uu ppHRegAMD64(i->Ain.AvxReRg.dst);
1312 //uu return;
1313 case Ain_EvCheck:
1314 vex_printf("(evCheck) decl ");
1315 ppAMD64AMode(i->Ain.EvCheck.amCounter);
1316 vex_printf("; jns nofail; jmp *");
1317 ppAMD64AMode(i->Ain.EvCheck.amFailAddr);
1318 vex_printf("; nofail:");
1319 return;
1320 case Ain_ProfInc:
1321 vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)");
1322 return;
1323 default:
1324 vpanic("ppAMD64Instr");
1325 }
1326 }
1327
1328 /* --------- Helpers for register allocation. --------- */
1329
getRegUsage_AMD64Instr(HRegUsage * u,AMD64Instr * i,Bool mode64)1330 void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 )
1331 {
1332 Bool unary;
1333 vassert(mode64 == True);
1334 initHRegUsage(u);
1335 switch (i->tag) {
1336 case Ain_Imm64:
1337 addHRegUse(u, HRmWrite, i->Ain.Imm64.dst);
1338 return;
1339 case Ain_Alu64R:
1340 addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
1341 if (i->Ain.Alu64R.op == Aalu_MOV) {
1342 addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
1343 return;
1344 }
1345 if (i->Ain.Alu64R.op == Aalu_CMP) {
1346 addHRegUse(u, HRmRead, i->Ain.Alu64R.dst);
1347 return;
1348 }
1349 addHRegUse(u, HRmModify, i->Ain.Alu64R.dst);
1350 return;
1351 case Ain_Alu64M:
1352 addRegUsage_AMD64RI(u, i->Ain.Alu64M.src);
1353 addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst);
1354 return;
1355 case Ain_Sh64:
1356 addHRegUse(u, HRmModify, i->Ain.Sh64.dst);
1357 if (i->Ain.Sh64.src == 0)
1358 addHRegUse(u, HRmRead, hregAMD64_RCX());
1359 return;
1360 case Ain_Test64:
1361 addHRegUse(u, HRmRead, i->Ain.Test64.dst);
1362 return;
1363 case Ain_Unary64:
1364 addHRegUse(u, HRmModify, i->Ain.Unary64.dst);
1365 return;
1366 case Ain_Lea64:
1367 addRegUsage_AMD64AMode(u, i->Ain.Lea64.am);
1368 addHRegUse(u, HRmWrite, i->Ain.Lea64.dst);
1369 return;
1370 case Ain_Alu32R:
1371 vassert(i->Ain.Alu32R.op != Aalu_MOV);
1372 addRegUsage_AMD64RMI(u, i->Ain.Alu32R.src);
1373 if (i->Ain.Alu32R.op == Aalu_CMP) {
1374 addHRegUse(u, HRmRead, i->Ain.Alu32R.dst);
1375 return;
1376 }
1377 addHRegUse(u, HRmModify, i->Ain.Alu32R.dst);
1378 return;
1379 case Ain_MulL:
1380 addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead);
1381 addHRegUse(u, HRmModify, hregAMD64_RAX());
1382 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1383 return;
1384 case Ain_Div:
1385 addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead);
1386 addHRegUse(u, HRmModify, hregAMD64_RAX());
1387 addHRegUse(u, HRmModify, hregAMD64_RDX());
1388 return;
1389 case Ain_Push:
1390 addRegUsage_AMD64RMI(u, i->Ain.Push.src);
1391 addHRegUse(u, HRmModify, hregAMD64_RSP());
1392 return;
1393 case Ain_Call:
1394 /* This is a bit subtle. */
1395 /* First off, claim it trashes all the caller-saved regs
1396 which fall within the register allocator's jurisdiction.
1397 These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11
1398 and all the xmm registers.
1399 */
1400 addHRegUse(u, HRmWrite, hregAMD64_RAX());
1401 addHRegUse(u, HRmWrite, hregAMD64_RCX());
1402 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1403 addHRegUse(u, HRmWrite, hregAMD64_RSI());
1404 addHRegUse(u, HRmWrite, hregAMD64_RDI());
1405 addHRegUse(u, HRmWrite, hregAMD64_R8());
1406 addHRegUse(u, HRmWrite, hregAMD64_R9());
1407 addHRegUse(u, HRmWrite, hregAMD64_R10());
1408 addHRegUse(u, HRmWrite, hregAMD64_R11());
1409 addHRegUse(u, HRmWrite, hregAMD64_XMM0());
1410 addHRegUse(u, HRmWrite, hregAMD64_XMM1());
1411 addHRegUse(u, HRmWrite, hregAMD64_XMM3());
1412 addHRegUse(u, HRmWrite, hregAMD64_XMM4());
1413 addHRegUse(u, HRmWrite, hregAMD64_XMM5());
1414 addHRegUse(u, HRmWrite, hregAMD64_XMM6());
1415 addHRegUse(u, HRmWrite, hregAMD64_XMM7());
1416 addHRegUse(u, HRmWrite, hregAMD64_XMM8());
1417 addHRegUse(u, HRmWrite, hregAMD64_XMM9());
1418 addHRegUse(u, HRmWrite, hregAMD64_XMM10());
1419 addHRegUse(u, HRmWrite, hregAMD64_XMM11());
1420 addHRegUse(u, HRmWrite, hregAMD64_XMM12());
1421
1422 /* Now we have to state any parameter-carrying registers
1423 which might be read. This depends on the regparmness. */
1424 switch (i->Ain.Call.regparms) {
1425 case 6: addHRegUse(u, HRmRead, hregAMD64_R9()); /*fallthru*/
1426 case 5: addHRegUse(u, HRmRead, hregAMD64_R8()); /*fallthru*/
1427 case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/
1428 case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/
1429 case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/
1430 case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break;
1431 case 0: break;
1432 default: vpanic("getRegUsage_AMD64Instr:Call:regparms");
1433 }
1434 /* Finally, there is the issue that the insn trashes a
1435 register because the literal target address has to be
1436 loaded into a register. Fortunately, r11 is stated in the
1437 ABI as a scratch register, and so seems a suitable victim. */
1438 addHRegUse(u, HRmWrite, hregAMD64_R11());
1439 /* Upshot of this is that the assembler really must use r11,
1440 and no other, as a destination temporary. */
1441 return;
1442 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1443 conditionally exit the block. Hence we only need to list (1)
1444 the registers that they read, and (2) the registers that they
1445 write in the case where the block is not exited. (2) is
1446 empty, hence only (1) is relevant here. */
1447 case Ain_XDirect:
1448 /* Don't bother to mention the write to %r11, since it is not
1449 available to the allocator. */
1450 addRegUsage_AMD64AMode(u, i->Ain.XDirect.amRIP);
1451 return;
1452 case Ain_XIndir:
1453 /* Ditto re %r11 */
1454 addHRegUse(u, HRmRead, i->Ain.XIndir.dstGA);
1455 addRegUsage_AMD64AMode(u, i->Ain.XIndir.amRIP);
1456 return;
1457 case Ain_XAssisted:
1458 /* Ditto re %r11 and %rbp (the baseblock ptr) */
1459 addHRegUse(u, HRmRead, i->Ain.XAssisted.dstGA);
1460 addRegUsage_AMD64AMode(u, i->Ain.XAssisted.amRIP);
1461 return;
1462 case Ain_CMov64:
1463 addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead);
1464 addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
1465 return;
1466 case Ain_MovxLQ:
1467 addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
1468 addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
1469 return;
1470 case Ain_LoadEX:
1471 addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src);
1472 addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst);
1473 return;
1474 case Ain_Store:
1475 addHRegUse(u, HRmRead, i->Ain.Store.src);
1476 addRegUsage_AMD64AMode(u, i->Ain.Store.dst);
1477 return;
1478 case Ain_Set64:
1479 addHRegUse(u, HRmWrite, i->Ain.Set64.dst);
1480 return;
1481 case Ain_Bsfr64:
1482 addHRegUse(u, HRmRead, i->Ain.Bsfr64.src);
1483 addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst);
1484 return;
1485 case Ain_MFence:
1486 return;
1487 case Ain_ACAS:
1488 addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr);
1489 addHRegUse(u, HRmRead, hregAMD64_RBX());
1490 addHRegUse(u, HRmModify, hregAMD64_RAX());
1491 return;
1492 case Ain_DACAS:
1493 addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr);
1494 addHRegUse(u, HRmRead, hregAMD64_RCX());
1495 addHRegUse(u, HRmRead, hregAMD64_RBX());
1496 addHRegUse(u, HRmModify, hregAMD64_RDX());
1497 addHRegUse(u, HRmModify, hregAMD64_RAX());
1498 return;
1499 case Ain_A87Free:
1500 return;
1501 case Ain_A87PushPop:
1502 addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr);
1503 return;
1504 case Ain_A87FpOp:
1505 return;
1506 case Ain_A87LdCW:
1507 addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr);
1508 return;
1509 case Ain_A87StSW:
1510 addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr);
1511 return;
1512 case Ain_LdMXCSR:
1513 addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
1514 return;
1515 case Ain_SseUComIS:
1516 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL);
1517 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR);
1518 addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst);
1519 return;
1520 case Ain_SseSI2SF:
1521 addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src);
1522 addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst);
1523 return;
1524 case Ain_SseSF2SI:
1525 addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src);
1526 addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst);
1527 return;
1528 case Ain_SseSDSS:
1529 addHRegUse(u, HRmRead, i->Ain.SseSDSS.src);
1530 addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst);
1531 return;
1532 case Ain_SseLdSt:
1533 addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr);
1534 addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead,
1535 i->Ain.SseLdSt.reg);
1536 return;
1537 case Ain_SseLdzLO:
1538 addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr);
1539 addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg);
1540 return;
1541 case Ain_Sse32Fx4:
1542 vassert(i->Ain.Sse32Fx4.op != Asse_MOV);
1543 unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF
1544 || i->Ain.Sse32Fx4.op == Asse_RSQRTF
1545 || i->Ain.Sse32Fx4.op == Asse_SQRTF );
1546 addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src);
1547 addHRegUse(u, unary ? HRmWrite : HRmModify,
1548 i->Ain.Sse32Fx4.dst);
1549 return;
1550 case Ain_Sse32FLo:
1551 vassert(i->Ain.Sse32FLo.op != Asse_MOV);
1552 unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF
1553 || i->Ain.Sse32FLo.op == Asse_RSQRTF
1554 || i->Ain.Sse32FLo.op == Asse_SQRTF );
1555 addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src);
1556 addHRegUse(u, unary ? HRmWrite : HRmModify,
1557 i->Ain.Sse32FLo.dst);
1558 return;
1559 case Ain_Sse64Fx2:
1560 vassert(i->Ain.Sse64Fx2.op != Asse_MOV);
1561 unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF
1562 || i->Ain.Sse64Fx2.op == Asse_RSQRTF
1563 || i->Ain.Sse64Fx2.op == Asse_SQRTF );
1564 addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src);
1565 addHRegUse(u, unary ? HRmWrite : HRmModify,
1566 i->Ain.Sse64Fx2.dst);
1567 return;
1568 case Ain_Sse64FLo:
1569 vassert(i->Ain.Sse64FLo.op != Asse_MOV);
1570 unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF
1571 || i->Ain.Sse64FLo.op == Asse_RSQRTF
1572 || i->Ain.Sse64FLo.op == Asse_SQRTF );
1573 addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src);
1574 addHRegUse(u, unary ? HRmWrite : HRmModify,
1575 i->Ain.Sse64FLo.dst);
1576 return;
1577 case Ain_SseReRg:
1578 if ( (i->Ain.SseReRg.op == Asse_XOR
1579 || i->Ain.SseReRg.op == Asse_CMPEQ32)
1580 && sameHReg(i->Ain.SseReRg.src, i->Ain.SseReRg.dst)) {
1581 /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd
1582 r,r' as a write of a value to r, and independent of any
1583 previous value in r */
1584 /* (as opposed to a rite of passage :-) */
1585 addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst);
1586 } else {
1587 addHRegUse(u, HRmRead, i->Ain.SseReRg.src);
1588 addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV
1589 ? HRmWrite : HRmModify,
1590 i->Ain.SseReRg.dst);
1591 }
1592 return;
1593 case Ain_SseCMov:
1594 addHRegUse(u, HRmRead, i->Ain.SseCMov.src);
1595 addHRegUse(u, HRmModify, i->Ain.SseCMov.dst);
1596 return;
1597 case Ain_SseShuf:
1598 addHRegUse(u, HRmRead, i->Ain.SseShuf.src);
1599 addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
1600 return;
1601 //uu case Ain_AvxLdSt:
1602 //uu addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr);
1603 //uu addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead,
1604 //uu i->Ain.AvxLdSt.reg);
1605 //uu return;
1606 //uu case Ain_AvxReRg:
1607 //uu if ( (i->Ain.AvxReRg.op == Asse_XOR
1608 //uu || i->Ain.AvxReRg.op == Asse_CMPEQ32)
1609 //uu && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) {
1610 //uu /* See comments on the case for Ain_SseReRg. */
1611 //uu addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst);
1612 //uu } else {
1613 //uu addHRegUse(u, HRmRead, i->Ain.AvxReRg.src);
1614 //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV
1615 //uu ? HRmWrite : HRmModify,
1616 //uu i->Ain.AvxReRg.dst);
1617 //uu }
1618 //uu return;
1619 case Ain_EvCheck:
1620 /* We expect both amodes only to mention %rbp, so this is in
1621 fact pointless, since %rbp isn't allocatable, but anyway.. */
1622 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amCounter);
1623 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amFailAddr);
1624 return;
1625 case Ain_ProfInc:
1626 addHRegUse(u, HRmWrite, hregAMD64_R11());
1627 return;
1628 default:
1629 ppAMD64Instr(i, mode64);
1630 vpanic("getRegUsage_AMD64Instr");
1631 }
1632 }
1633
1634 /* local helper */
mapReg(HRegRemap * m,HReg * r)1635 static inline void mapReg(HRegRemap* m, HReg* r)
1636 {
1637 *r = lookupHRegRemap(m, *r);
1638 }
1639
mapRegs_AMD64Instr(HRegRemap * m,AMD64Instr * i,Bool mode64)1640 void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
1641 {
1642 vassert(mode64 == True);
1643 switch (i->tag) {
1644 case Ain_Imm64:
1645 mapReg(m, &i->Ain.Imm64.dst);
1646 return;
1647 case Ain_Alu64R:
1648 mapRegs_AMD64RMI(m, i->Ain.Alu64R.src);
1649 mapReg(m, &i->Ain.Alu64R.dst);
1650 return;
1651 case Ain_Alu64M:
1652 mapRegs_AMD64RI(m, i->Ain.Alu64M.src);
1653 mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst);
1654 return;
1655 case Ain_Sh64:
1656 mapReg(m, &i->Ain.Sh64.dst);
1657 return;
1658 case Ain_Test64:
1659 mapReg(m, &i->Ain.Test64.dst);
1660 return;
1661 case Ain_Unary64:
1662 mapReg(m, &i->Ain.Unary64.dst);
1663 return;
1664 case Ain_Lea64:
1665 mapRegs_AMD64AMode(m, i->Ain.Lea64.am);
1666 mapReg(m, &i->Ain.Lea64.dst);
1667 return;
1668 case Ain_Alu32R:
1669 mapRegs_AMD64RMI(m, i->Ain.Alu32R.src);
1670 mapReg(m, &i->Ain.Alu32R.dst);
1671 return;
1672 case Ain_MulL:
1673 mapRegs_AMD64RM(m, i->Ain.MulL.src);
1674 return;
1675 case Ain_Div:
1676 mapRegs_AMD64RM(m, i->Ain.Div.src);
1677 return;
1678 case Ain_Push:
1679 mapRegs_AMD64RMI(m, i->Ain.Push.src);
1680 return;
1681 case Ain_Call:
1682 return;
1683 case Ain_XDirect:
1684 mapRegs_AMD64AMode(m, i->Ain.XDirect.amRIP);
1685 return;
1686 case Ain_XIndir:
1687 mapReg(m, &i->Ain.XIndir.dstGA);
1688 mapRegs_AMD64AMode(m, i->Ain.XIndir.amRIP);
1689 return;
1690 case Ain_XAssisted:
1691 mapReg(m, &i->Ain.XAssisted.dstGA);
1692 mapRegs_AMD64AMode(m, i->Ain.XAssisted.amRIP);
1693 return;
1694 case Ain_CMov64:
1695 mapRegs_AMD64RM(m, i->Ain.CMov64.src);
1696 mapReg(m, &i->Ain.CMov64.dst);
1697 return;
1698 case Ain_MovxLQ:
1699 mapReg(m, &i->Ain.MovxLQ.src);
1700 mapReg(m, &i->Ain.MovxLQ.dst);
1701 return;
1702 case Ain_LoadEX:
1703 mapRegs_AMD64AMode(m, i->Ain.LoadEX.src);
1704 mapReg(m, &i->Ain.LoadEX.dst);
1705 return;
1706 case Ain_Store:
1707 mapReg(m, &i->Ain.Store.src);
1708 mapRegs_AMD64AMode(m, i->Ain.Store.dst);
1709 return;
1710 case Ain_Set64:
1711 mapReg(m, &i->Ain.Set64.dst);
1712 return;
1713 case Ain_Bsfr64:
1714 mapReg(m, &i->Ain.Bsfr64.src);
1715 mapReg(m, &i->Ain.Bsfr64.dst);
1716 return;
1717 case Ain_MFence:
1718 return;
1719 case Ain_ACAS:
1720 mapRegs_AMD64AMode(m, i->Ain.ACAS.addr);
1721 return;
1722 case Ain_DACAS:
1723 mapRegs_AMD64AMode(m, i->Ain.DACAS.addr);
1724 return;
1725 case Ain_A87Free:
1726 return;
1727 case Ain_A87PushPop:
1728 mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr);
1729 return;
1730 case Ain_A87FpOp:
1731 return;
1732 case Ain_A87LdCW:
1733 mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr);
1734 return;
1735 case Ain_A87StSW:
1736 mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr);
1737 return;
1738 case Ain_LdMXCSR:
1739 mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
1740 return;
1741 case Ain_SseUComIS:
1742 mapReg(m, &i->Ain.SseUComIS.srcL);
1743 mapReg(m, &i->Ain.SseUComIS.srcR);
1744 mapReg(m, &i->Ain.SseUComIS.dst);
1745 return;
1746 case Ain_SseSI2SF:
1747 mapReg(m, &i->Ain.SseSI2SF.src);
1748 mapReg(m, &i->Ain.SseSI2SF.dst);
1749 return;
1750 case Ain_SseSF2SI:
1751 mapReg(m, &i->Ain.SseSF2SI.src);
1752 mapReg(m, &i->Ain.SseSF2SI.dst);
1753 return;
1754 case Ain_SseSDSS:
1755 mapReg(m, &i->Ain.SseSDSS.src);
1756 mapReg(m, &i->Ain.SseSDSS.dst);
1757 return;
1758 case Ain_SseLdSt:
1759 mapReg(m, &i->Ain.SseLdSt.reg);
1760 mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr);
1761 break;
1762 case Ain_SseLdzLO:
1763 mapReg(m, &i->Ain.SseLdzLO.reg);
1764 mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr);
1765 break;
1766 case Ain_Sse32Fx4:
1767 mapReg(m, &i->Ain.Sse32Fx4.src);
1768 mapReg(m, &i->Ain.Sse32Fx4.dst);
1769 return;
1770 case Ain_Sse32FLo:
1771 mapReg(m, &i->Ain.Sse32FLo.src);
1772 mapReg(m, &i->Ain.Sse32FLo.dst);
1773 return;
1774 case Ain_Sse64Fx2:
1775 mapReg(m, &i->Ain.Sse64Fx2.src);
1776 mapReg(m, &i->Ain.Sse64Fx2.dst);
1777 return;
1778 case Ain_Sse64FLo:
1779 mapReg(m, &i->Ain.Sse64FLo.src);
1780 mapReg(m, &i->Ain.Sse64FLo.dst);
1781 return;
1782 case Ain_SseReRg:
1783 mapReg(m, &i->Ain.SseReRg.src);
1784 mapReg(m, &i->Ain.SseReRg.dst);
1785 return;
1786 case Ain_SseCMov:
1787 mapReg(m, &i->Ain.SseCMov.src);
1788 mapReg(m, &i->Ain.SseCMov.dst);
1789 return;
1790 case Ain_SseShuf:
1791 mapReg(m, &i->Ain.SseShuf.src);
1792 mapReg(m, &i->Ain.SseShuf.dst);
1793 return;
1794 //uu case Ain_AvxLdSt:
1795 //uu mapReg(m, &i->Ain.AvxLdSt.reg);
1796 //uu mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr);
1797 //uu break;
1798 //uu case Ain_AvxReRg:
1799 //uu mapReg(m, &i->Ain.AvxReRg.src);
1800 //uu mapReg(m, &i->Ain.AvxReRg.dst);
1801 //uu return;
1802 case Ain_EvCheck:
1803 /* We expect both amodes only to mention %rbp, so this is in
1804 fact pointless, since %rbp isn't allocatable, but anyway.. */
1805 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amCounter);
1806 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amFailAddr);
1807 return;
1808 case Ain_ProfInc:
1809 /* hardwires r11 -- nothing to modify. */
1810 return;
1811 default:
1812 ppAMD64Instr(i, mode64);
1813 vpanic("mapRegs_AMD64Instr");
1814 }
1815 }
1816
1817 /* Figure out if i represents a reg-reg move, and if so assign the
1818 source and destination to *src and *dst. If in doubt say No. Used
1819 by the register allocator to do move coalescing.
1820 */
isMove_AMD64Instr(AMD64Instr * i,HReg * src,HReg * dst)1821 Bool isMove_AMD64Instr ( AMD64Instr* i, HReg* src, HReg* dst )
1822 {
1823 switch (i->tag) {
1824 case Ain_Alu64R:
1825 /* Moves between integer regs */
1826 if (i->Ain.Alu64R.op != Aalu_MOV)
1827 return False;
1828 if (i->Ain.Alu64R.src->tag != Armi_Reg)
1829 return False;
1830 *src = i->Ain.Alu64R.src->Armi.Reg.reg;
1831 *dst = i->Ain.Alu64R.dst;
1832 return True;
1833 case Ain_SseReRg:
1834 /* Moves between SSE regs */
1835 if (i->Ain.SseReRg.op != Asse_MOV)
1836 return False;
1837 *src = i->Ain.SseReRg.src;
1838 *dst = i->Ain.SseReRg.dst;
1839 return True;
1840 //uu case Ain_AvxReRg:
1841 //uu /* Moves between AVX regs */
1842 //uu if (i->Ain.AvxReRg.op != Asse_MOV)
1843 //uu return False;
1844 //uu *src = i->Ain.AvxReRg.src;
1845 //uu *dst = i->Ain.AvxReRg.dst;
1846 //uu return True;
1847 default:
1848 return False;
1849 }
1850 /*NOTREACHED*/
1851 }
1852
1853
1854 /* Generate amd64 spill/reload instructions under the direction of the
1855 register allocator. Note it's critical these don't write the
1856 condition codes. */
1857
genSpill_AMD64(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)1858 void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1859 HReg rreg, Int offsetB, Bool mode64 )
1860 {
1861 AMD64AMode* am;
1862 vassert(offsetB >= 0);
1863 vassert(!hregIsVirtual(rreg));
1864 vassert(mode64 == True);
1865 *i1 = *i2 = NULL;
1866 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
1867 switch (hregClass(rreg)) {
1868 case HRcInt64:
1869 *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am );
1870 return;
1871 case HRcVec128:
1872 *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am );
1873 return;
1874 default:
1875 ppHRegClass(hregClass(rreg));
1876 vpanic("genSpill_AMD64: unimplemented regclass");
1877 }
1878 }
1879
genReload_AMD64(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)1880 void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1881 HReg rreg, Int offsetB, Bool mode64 )
1882 {
1883 AMD64AMode* am;
1884 vassert(offsetB >= 0);
1885 vassert(!hregIsVirtual(rreg));
1886 vassert(mode64 == True);
1887 *i1 = *i2 = NULL;
1888 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
1889 switch (hregClass(rreg)) {
1890 case HRcInt64:
1891 *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg );
1892 return;
1893 case HRcVec128:
1894 *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am );
1895 return;
1896 default:
1897 ppHRegClass(hregClass(rreg));
1898 vpanic("genReload_AMD64: unimplemented regclass");
1899 }
1900 }
1901
1902
1903 /* --------- The amd64 assembler (bleh.) --------- */
1904
1905 /* Produce the low three bits of an integer register number. */
iregBits210(HReg r)1906 static UChar iregBits210 ( HReg r )
1907 {
1908 UInt n;
1909 vassert(hregClass(r) == HRcInt64);
1910 vassert(!hregIsVirtual(r));
1911 n = hregNumber(r);
1912 vassert(n <= 15);
1913 return toUChar(n & 7);
1914 }
1915
1916 /* Produce bit 3 of an integer register number. */
iregBit3(HReg r)1917 static UChar iregBit3 ( HReg r )
1918 {
1919 UInt n;
1920 vassert(hregClass(r) == HRcInt64);
1921 vassert(!hregIsVirtual(r));
1922 n = hregNumber(r);
1923 vassert(n <= 15);
1924 return toUChar((n >> 3) & 1);
1925 }
1926
1927 /* Produce a complete 4-bit integer register number. */
iregBits3210(HReg r)1928 static UChar iregBits3210 ( HReg r )
1929 {
1930 UInt n;
1931 vassert(hregClass(r) == HRcInt64);
1932 vassert(!hregIsVirtual(r));
1933 n = hregNumber(r);
1934 vassert(n <= 15);
1935 return toUChar(n);
1936 }
1937
1938 /* Given an xmm (128bit V-class) register number, produce the
1939 equivalent numbered register in 64-bit I-class. This is a bit of
1940 fakery which facilitates using functions that work on integer
1941 register numbers to be used when assembling SSE instructions
1942 too. */
vreg2ireg(HReg r)1943 static HReg vreg2ireg ( HReg r )
1944 {
1945 UInt n;
1946 vassert(hregClass(r) == HRcVec128);
1947 vassert(!hregIsVirtual(r));
1948 n = hregNumber(r);
1949 vassert(n <= 15);
1950 return mkHReg(n, HRcInt64, False);
1951 }
1952
1953 //uu /* Ditto for ymm regs. */
1954 //uu static HReg dvreg2ireg ( HReg r )
1955 //uu {
1956 //uu UInt n;
1957 //uu vassert(hregClass(r) == HRcVec256);
1958 //uu vassert(!hregIsVirtual(r));
1959 //uu n = hregNumber(r);
1960 //uu vassert(n <= 15);
1961 //uu return mkHReg(n, HRcInt64, False);
1962 //uu }
1963
mkModRegRM(UInt mod,UInt reg,UInt regmem)1964 static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
1965 {
1966 vassert(mod < 4);
1967 vassert((reg|regmem) < 8);
1968 return toUChar( ((mod & 3) << 6)
1969 | ((reg & 7) << 3)
1970 | (regmem & 7) );
1971 }
1972
mkSIB(UInt shift,UInt regindex,UInt regbase)1973 static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
1974 {
1975 vassert(shift < 4);
1976 vassert((regindex|regbase) < 8);
1977 return toUChar( ((shift & 3) << 6)
1978 | ((regindex & 7) << 3)
1979 | (regbase & 7) );
1980 }
1981
emit32(UChar * p,UInt w32)1982 static UChar* emit32 ( UChar* p, UInt w32 )
1983 {
1984 *p++ = toUChar((w32) & 0x000000FF);
1985 *p++ = toUChar((w32 >> 8) & 0x000000FF);
1986 *p++ = toUChar((w32 >> 16) & 0x000000FF);
1987 *p++ = toUChar((w32 >> 24) & 0x000000FF);
1988 return p;
1989 }
1990
emit64(UChar * p,ULong w64)1991 static UChar* emit64 ( UChar* p, ULong w64 )
1992 {
1993 p = emit32(p, toUInt(w64 & 0xFFFFFFFF));
1994 p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF));
1995 return p;
1996 }
1997
1998 /* Does a sign-extend of the lowest 8 bits give
1999 the original number? */
fits8bits(UInt w32)2000 static Bool fits8bits ( UInt w32 )
2001 {
2002 Int i32 = (Int)w32;
2003 return toBool(i32 == ((i32 << 24) >> 24));
2004 }
2005 /* Can the lower 32 bits be signedly widened to produce the whole
2006 64-bit value? In other words, are the top 33 bits either all 0 or
2007 all 1 ? */
fitsIn32Bits(ULong x)2008 static Bool fitsIn32Bits ( ULong x )
2009 {
2010 Long y0 = (Long)x;
2011 Long y1 = y0;
2012 y1 <<= 32;
2013 y1 >>=/*s*/ 32;
2014 return toBool(x == y1);
2015 }
2016
2017
2018 /* Forming mod-reg-rm bytes and scale-index-base bytes.
2019
2020 greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13
2021 = 00 greg ereg
2022
2023 greg, d8(ereg) | ereg is neither of: RSP R12
2024 = 01 greg ereg, d8
2025
2026 greg, d32(ereg) | ereg is neither of: RSP R12
2027 = 10 greg ereg, d32
2028
2029 greg, d8(ereg) | ereg is either: RSP R12
2030 = 01 greg 100, 0x24, d8
2031 (lowest bit of rex distinguishes R12/RSP)
2032
2033 greg, d32(ereg) | ereg is either: RSP R12
2034 = 10 greg 100, 0x24, d32
2035 (lowest bit of rex distinguishes R12/RSP)
2036
2037 -----------------------------------------------
2038
2039 greg, d8(base,index,scale)
2040 | index != RSP
2041 = 01 greg 100, scale index base, d8
2042
2043 greg, d32(base,index,scale)
2044 | index != RSP
2045 = 10 greg 100, scale index base, d32
2046 */
doAMode_M(UChar * p,HReg greg,AMD64AMode * am)2047 static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am )
2048 {
2049 if (am->tag == Aam_IR) {
2050 if (am->Aam.IR.imm == 0
2051 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2052 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RBP())
2053 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2054 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R13())
2055 ) {
2056 *p++ = mkModRegRM(0, iregBits210(greg),
2057 iregBits210(am->Aam.IR.reg));
2058 return p;
2059 }
2060 if (fits8bits(am->Aam.IR.imm)
2061 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2062 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2063 ) {
2064 *p++ = mkModRegRM(1, iregBits210(greg),
2065 iregBits210(am->Aam.IR.reg));
2066 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2067 return p;
2068 }
2069 if (! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2070 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2071 ) {
2072 *p++ = mkModRegRM(2, iregBits210(greg),
2073 iregBits210(am->Aam.IR.reg));
2074 p = emit32(p, am->Aam.IR.imm);
2075 return p;
2076 }
2077 if ((sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2078 || sameHReg(am->Aam.IR.reg, hregAMD64_R12()))
2079 && fits8bits(am->Aam.IR.imm)) {
2080 *p++ = mkModRegRM(1, iregBits210(greg), 4);
2081 *p++ = 0x24;
2082 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2083 return p;
2084 }
2085 if (/* (sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2086 || wait for test case for RSP case */
2087 sameHReg(am->Aam.IR.reg, hregAMD64_R12())) {
2088 *p++ = mkModRegRM(2, iregBits210(greg), 4);
2089 *p++ = 0x24;
2090 p = emit32(p, am->Aam.IR.imm);
2091 return p;
2092 }
2093 ppAMD64AMode(am);
2094 vpanic("doAMode_M: can't emit amode IR");
2095 /*NOTREACHED*/
2096 }
2097 if (am->tag == Aam_IRRS) {
2098 if (fits8bits(am->Aam.IRRS.imm)
2099 && ! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
2100 *p++ = mkModRegRM(1, iregBits210(greg), 4);
2101 *p++ = mkSIB(am->Aam.IRRS.shift, iregBits210(am->Aam.IRRS.index),
2102 iregBits210(am->Aam.IRRS.base));
2103 *p++ = toUChar(am->Aam.IRRS.imm & 0xFF);
2104 return p;
2105 }
2106 if (! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
2107 *p++ = mkModRegRM(2, iregBits210(greg), 4);
2108 *p++ = mkSIB(am->Aam.IRRS.shift, iregBits210(am->Aam.IRRS.index),
2109 iregBits210(am->Aam.IRRS.base));
2110 p = emit32(p, am->Aam.IRRS.imm);
2111 return p;
2112 }
2113 ppAMD64AMode(am);
2114 vpanic("doAMode_M: can't emit amode IRRS");
2115 /*NOTREACHED*/
2116 }
2117 vpanic("doAMode_M: unknown amode");
2118 /*NOTREACHED*/
2119 }
2120
2121
2122 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
doAMode_R(UChar * p,HReg greg,HReg ereg)2123 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
2124 {
2125 *p++ = mkModRegRM(3, iregBits210(greg), iregBits210(ereg));
2126 return p;
2127 }
2128
2129
2130 /* Clear the W bit on a REX byte, thereby changing the operand size
2131 back to whatever that instruction's default operand size is. */
clearWBit(UChar rex)2132 static inline UChar clearWBit ( UChar rex )
2133 {
2134 return toUChar(rex & ~(1<<3));
2135 }
2136
2137
2138 /* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
rexAMode_M(HReg greg,AMD64AMode * am)2139 static UChar rexAMode_M ( HReg greg, AMD64AMode* am )
2140 {
2141 if (am->tag == Aam_IR) {
2142 UChar W = 1; /* we want 64-bit mode */
2143 UChar R = iregBit3(greg);
2144 UChar X = 0; /* not relevant */
2145 UChar B = iregBit3(am->Aam.IR.reg);
2146 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
2147 }
2148 if (am->tag == Aam_IRRS) {
2149 UChar W = 1; /* we want 64-bit mode */
2150 UChar R = iregBit3(greg);
2151 UChar X = iregBit3(am->Aam.IRRS.index);
2152 UChar B = iregBit3(am->Aam.IRRS.base);
2153 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
2154 }
2155 vassert(0);
2156 return 0; /*NOTREACHED*/
2157 }
2158
2159 /* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */
rexAMode_R(HReg greg,HReg ereg)2160 static UChar rexAMode_R ( HReg greg, HReg ereg )
2161 {
2162 UChar W = 1; /* we want 64-bit mode */
2163 UChar R = iregBit3(greg);
2164 UChar X = 0; /* not relevant */
2165 UChar B = iregBit3(ereg);
2166 return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
2167 }
2168
2169
2170 //uu /* May 2012: this VEX prefix stuff is currently unused, but has
2171 //uu verified correct (I reckon). Certainly it has been known to
2172 //uu produce correct VEX prefixes during testing. */
2173 //uu
2174 //uu /* Assemble a 2 or 3 byte VEX prefix from parts. rexR, rexX, rexB and
2175 //uu notVvvvv need to be not-ed before packing. mmmmm, rexW, L and pp go
2176 //uu in verbatim. There's no range checking on the bits. */
2177 //uu static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB,
2178 //uu UInt mmmmm, UInt rexW, UInt notVvvv,
2179 //uu UInt L, UInt pp )
2180 //uu {
2181 //uu UChar byte0 = 0;
2182 //uu UChar byte1 = 0;
2183 //uu UChar byte2 = 0;
2184 //uu if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) {
2185 //uu /* 2 byte encoding is possible. */
2186 //uu byte0 = 0xC5;
2187 //uu byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3)
2188 //uu | (L << 2) | pp;
2189 //uu } else {
2190 //uu /* 3 byte encoding is needed. */
2191 //uu byte0 = 0xC4;
2192 //uu byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6)
2193 //uu | ((rexB ^ 1) << 5) | mmmmm;
2194 //uu byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp;
2195 //uu }
2196 //uu return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0);
2197 //uu }
2198 //uu
2199 //uu /* Make up a VEX prefix for a (greg,amode) pair. First byte in bits
2200 //uu 7:0 of result, second in 15:8, third (for a 3 byte prefix) in
2201 //uu 23:16. Has m-mmmm set to indicate a prefix of 0F, pp set to
2202 //uu indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and
2203 //uu vvvv=1111 (unused 3rd reg). */
2204 //uu static UInt vexAMode_M ( HReg greg, AMD64AMode* am )
2205 //uu {
2206 //uu UChar L = 1; /* size = 256 */
2207 //uu UChar pp = 0; /* no SIMD prefix */
2208 //uu UChar mmmmm = 1; /* 0F */
2209 //uu UChar notVvvv = 0; /* unused */
2210 //uu UChar rexW = 0;
2211 //uu UChar rexR = 0;
2212 //uu UChar rexX = 0;
2213 //uu UChar rexB = 0;
2214 //uu /* Same logic as in rexAMode_M. */
2215 //uu if (am->tag == Aam_IR) {
2216 //uu rexR = iregBit3(greg);
2217 //uu rexX = 0; /* not relevant */
2218 //uu rexB = iregBit3(am->Aam.IR.reg);
2219 //uu }
2220 //uu else if (am->tag == Aam_IRRS) {
2221 //uu rexR = iregBit3(greg);
2222 //uu rexX = iregBit3(am->Aam.IRRS.index);
2223 //uu rexB = iregBit3(am->Aam.IRRS.base);
2224 //uu } else {
2225 //uu vassert(0);
2226 //uu }
2227 //uu return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp );
2228 //uu }
2229 //uu
2230 //uu static UChar* emitVexPrefix ( UChar* p, UInt vex )
2231 //uu {
2232 //uu switch (vex & 0xFF) {
2233 //uu case 0xC5:
2234 //uu *p++ = 0xC5;
2235 //uu *p++ = (vex >> 8) & 0xFF;
2236 //uu vassert(0 == (vex >> 16));
2237 //uu break;
2238 //uu case 0xC4:
2239 //uu *p++ = 0xC4;
2240 //uu *p++ = (vex >> 8) & 0xFF;
2241 //uu *p++ = (vex >> 16) & 0xFF;
2242 //uu vassert(0 == (vex >> 24));
2243 //uu break;
2244 //uu default:
2245 //uu vassert(0);
2246 //uu }
2247 //uu return p;
2248 //uu }
2249
2250
2251 /* Emit ffree %st(N) */
do_ffree_st(UChar * p,Int n)2252 static UChar* do_ffree_st ( UChar* p, Int n )
2253 {
2254 vassert(n >= 0 && n <= 7);
2255 *p++ = 0xDD;
2256 *p++ = toUChar(0xC0 + n);
2257 return p;
2258 }
2259
2260 /* Emit an instruction into buf and return the number of bytes used.
2261 Note that buf is not the insn's final place, and therefore it is
2262 imperative to emit position-independent code. If the emitted
2263 instruction was a profiler inc, set *is_profInc to True, else
2264 leave it unchanged. */
2265
emit_AMD64Instr(Bool * is_profInc,UChar * buf,Int nbuf,AMD64Instr * i,Bool mode64,void * disp_cp_chain_me_to_slowEP,void * disp_cp_chain_me_to_fastEP,void * disp_cp_xindir,void * disp_cp_xassisted)2266 Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
2267 UChar* buf, Int nbuf, AMD64Instr* i,
2268 Bool mode64,
2269 void* disp_cp_chain_me_to_slowEP,
2270 void* disp_cp_chain_me_to_fastEP,
2271 void* disp_cp_xindir,
2272 void* disp_cp_xassisted )
2273 {
2274 UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
2275 UInt xtra;
2276 UInt reg;
2277 UChar rex;
2278 UChar* p = &buf[0];
2279 UChar* ptmp;
2280 Int j;
2281 vassert(nbuf >= 32);
2282 vassert(mode64 == True);
2283
2284 /* Wrap an integer as a int register, for use assembling
2285 GrpN insns, in which the greg field is used as a sub-opcode
2286 and does not really contain a register. */
2287 # define fake(_n) mkHReg((_n), HRcInt64, False)
2288
2289 /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
2290
2291 switch (i->tag) {
2292
2293 case Ain_Imm64:
2294 if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) {
2295 /* Use the short form (load into 32 bit reg, + default
2296 widening rule) for constants under 1 million. We could
2297 use this form for the range 0 to 0x7FFFFFFF inclusive, but
2298 limit it to a smaller range for verifiability purposes. */
2299 if (1 & iregBit3(i->Ain.Imm64.dst))
2300 *p++ = 0x41;
2301 *p++ = 0xB8 + iregBits210(i->Ain.Imm64.dst);
2302 p = emit32(p, (UInt)i->Ain.Imm64.imm64);
2303 } else {
2304 *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Imm64.dst)));
2305 *p++ = toUChar(0xB8 + iregBits210(i->Ain.Imm64.dst));
2306 p = emit64(p, i->Ain.Imm64.imm64);
2307 }
2308 goto done;
2309
2310 case Ain_Alu64R:
2311 /* Deal specially with MOV */
2312 if (i->Ain.Alu64R.op == Aalu_MOV) {
2313 switch (i->Ain.Alu64R.src->tag) {
2314 case Armi_Imm:
2315 if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFFFF)) {
2316 /* Actually we could use this form for constants in
2317 the range 0 through 0x7FFFFFFF inclusive, but
2318 limit it to a small range for verifiability
2319 purposes. */
2320 /* Generate "movl $imm32, 32-bit-register" and let
2321 the default zero-extend rule cause the upper half
2322 of the dst to be zeroed out too. This saves 1
2323 and sometimes 2 bytes compared to the more
2324 obvious encoding in the 'else' branch. */
2325 if (1 & iregBit3(i->Ain.Alu64R.dst))
2326 *p++ = 0x41;
2327 *p++ = 0xB8 + iregBits210(i->Ain.Alu64R.dst);
2328 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2329 } else {
2330 *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Alu64R.dst)));
2331 *p++ = 0xC7;
2332 *p++ = toUChar(0xC0 + iregBits210(i->Ain.Alu64R.dst));
2333 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2334 }
2335 goto done;
2336 case Armi_Reg:
2337 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2338 i->Ain.Alu64R.dst );
2339 *p++ = 0x89;
2340 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2341 i->Ain.Alu64R.dst);
2342 goto done;
2343 case Armi_Mem:
2344 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2345 i->Ain.Alu64R.src->Armi.Mem.am);
2346 *p++ = 0x8B;
2347 p = doAMode_M(p, i->Ain.Alu64R.dst,
2348 i->Ain.Alu64R.src->Armi.Mem.am);
2349 goto done;
2350 default:
2351 goto bad;
2352 }
2353 }
2354 /* MUL */
2355 if (i->Ain.Alu64R.op == Aalu_MUL) {
2356 switch (i->Ain.Alu64R.src->tag) {
2357 case Armi_Reg:
2358 *p++ = rexAMode_R( i->Ain.Alu64R.dst,
2359 i->Ain.Alu64R.src->Armi.Reg.reg);
2360 *p++ = 0x0F;
2361 *p++ = 0xAF;
2362 p = doAMode_R(p, i->Ain.Alu64R.dst,
2363 i->Ain.Alu64R.src->Armi.Reg.reg);
2364 goto done;
2365 case Armi_Mem:
2366 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2367 i->Ain.Alu64R.src->Armi.Mem.am);
2368 *p++ = 0x0F;
2369 *p++ = 0xAF;
2370 p = doAMode_M(p, i->Ain.Alu64R.dst,
2371 i->Ain.Alu64R.src->Armi.Mem.am);
2372 goto done;
2373 case Armi_Imm:
2374 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2375 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2376 *p++ = 0x6B;
2377 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2378 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2379 } else {
2380 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2381 *p++ = 0x69;
2382 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2383 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2384 }
2385 goto done;
2386 default:
2387 goto bad;
2388 }
2389 }
2390 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2391 opc = opc_rr = subopc_imm = opc_imma = 0;
2392 switch (i->Ain.Alu64R.op) {
2393 case Aalu_ADC: opc = 0x13; opc_rr = 0x11;
2394 subopc_imm = 2; opc_imma = 0x15; break;
2395 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2396 subopc_imm = 0; opc_imma = 0x05; break;
2397 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2398 subopc_imm = 5; opc_imma = 0x2D; break;
2399 case Aalu_SBB: opc = 0x1B; opc_rr = 0x19;
2400 subopc_imm = 3; opc_imma = 0x1D; break;
2401 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2402 subopc_imm = 4; opc_imma = 0x25; break;
2403 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2404 subopc_imm = 6; opc_imma = 0x35; break;
2405 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2406 subopc_imm = 1; opc_imma = 0x0D; break;
2407 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2408 subopc_imm = 7; opc_imma = 0x3D; break;
2409 default: goto bad;
2410 }
2411 switch (i->Ain.Alu64R.src->tag) {
2412 case Armi_Imm:
2413 if (sameHReg(i->Ain.Alu64R.dst, hregAMD64_RAX())
2414 && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2415 goto bad; /* FIXME: awaiting test case */
2416 *p++ = toUChar(opc_imma);
2417 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2418 } else
2419 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2420 *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst );
2421 *p++ = 0x83;
2422 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst);
2423 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2424 } else {
2425 *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst);
2426 *p++ = 0x81;
2427 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst);
2428 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2429 }
2430 goto done;
2431 case Armi_Reg:
2432 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2433 i->Ain.Alu64R.dst);
2434 *p++ = toUChar(opc_rr);
2435 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2436 i->Ain.Alu64R.dst);
2437 goto done;
2438 case Armi_Mem:
2439 *p++ = rexAMode_M( i->Ain.Alu64R.dst,
2440 i->Ain.Alu64R.src->Armi.Mem.am);
2441 *p++ = toUChar(opc);
2442 p = doAMode_M(p, i->Ain.Alu64R.dst,
2443 i->Ain.Alu64R.src->Armi.Mem.am);
2444 goto done;
2445 default:
2446 goto bad;
2447 }
2448 break;
2449
2450 case Ain_Alu64M:
2451 /* Deal specially with MOV */
2452 if (i->Ain.Alu64M.op == Aalu_MOV) {
2453 switch (i->Ain.Alu64M.src->tag) {
2454 case Ari_Reg:
2455 *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg,
2456 i->Ain.Alu64M.dst);
2457 *p++ = 0x89;
2458 p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg,
2459 i->Ain.Alu64M.dst);
2460 goto done;
2461 case Ari_Imm:
2462 *p++ = rexAMode_M(fake(0), i->Ain.Alu64M.dst);
2463 *p++ = 0xC7;
2464 p = doAMode_M(p, fake(0), i->Ain.Alu64M.dst);
2465 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
2466 goto done;
2467 default:
2468 goto bad;
2469 }
2470 }
2471 break;
2472
2473 case Ain_Sh64:
2474 opc_cl = opc_imm = subopc = 0;
2475 switch (i->Ain.Sh64.op) {
2476 case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2477 case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2478 case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2479 default: goto bad;
2480 }
2481 if (i->Ain.Sh64.src == 0) {
2482 *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst);
2483 *p++ = toUChar(opc_cl);
2484 p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst);
2485 goto done;
2486 } else {
2487 *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst);
2488 *p++ = toUChar(opc_imm);
2489 p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst);
2490 *p++ = (UChar)(i->Ain.Sh64.src);
2491 goto done;
2492 }
2493 break;
2494
2495 case Ain_Test64:
2496 /* testq sign-extend($imm32), %reg */
2497 *p++ = rexAMode_R(fake(0), i->Ain.Test64.dst);
2498 *p++ = 0xF7;
2499 p = doAMode_R(p, fake(0), i->Ain.Test64.dst);
2500 p = emit32(p, i->Ain.Test64.imm32);
2501 goto done;
2502
2503 case Ain_Unary64:
2504 if (i->Ain.Unary64.op == Aun_NOT) {
2505 *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst);
2506 *p++ = 0xF7;
2507 p = doAMode_R(p, fake(2), i->Ain.Unary64.dst);
2508 goto done;
2509 }
2510 if (i->Ain.Unary64.op == Aun_NEG) {
2511 *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst);
2512 *p++ = 0xF7;
2513 p = doAMode_R(p, fake(3), i->Ain.Unary64.dst);
2514 goto done;
2515 }
2516 break;
2517
2518 case Ain_Lea64:
2519 *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am);
2520 *p++ = 0x8D;
2521 p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am);
2522 goto done;
2523
2524 case Ain_Alu32R:
2525 /* ADD/SUB/AND/OR/XOR/CMP */
2526 opc = opc_rr = subopc_imm = opc_imma = 0;
2527 switch (i->Ain.Alu32R.op) {
2528 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2529 subopc_imm = 0; opc_imma = 0x05; break;
2530 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2531 subopc_imm = 5; opc_imma = 0x2D; break;
2532 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2533 subopc_imm = 4; opc_imma = 0x25; break;
2534 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2535 subopc_imm = 6; opc_imma = 0x35; break;
2536 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2537 subopc_imm = 1; opc_imma = 0x0D; break;
2538 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2539 subopc_imm = 7; opc_imma = 0x3D; break;
2540 default: goto bad;
2541 }
2542 switch (i->Ain.Alu32R.src->tag) {
2543 case Armi_Imm:
2544 if (sameHReg(i->Ain.Alu32R.dst, hregAMD64_RAX())
2545 && !fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2546 goto bad; /* FIXME: awaiting test case */
2547 *p++ = toUChar(opc_imma);
2548 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2549 } else
2550 if (fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2551 rex = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst ) );
2552 if (rex != 0x40) *p++ = rex;
2553 *p++ = 0x83;
2554 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst);
2555 *p++ = toUChar(0xFF & i->Ain.Alu32R.src->Armi.Imm.imm32);
2556 } else {
2557 rex = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst) );
2558 if (rex != 0x40) *p++ = rex;
2559 *p++ = 0x81;
2560 p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst);
2561 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2562 }
2563 goto done;
2564 case Armi_Reg:
2565 rex = clearWBit(
2566 rexAMode_R( i->Ain.Alu32R.src->Armi.Reg.reg,
2567 i->Ain.Alu32R.dst) );
2568 if (rex != 0x40) *p++ = rex;
2569 *p++ = toUChar(opc_rr);
2570 p = doAMode_R(p, i->Ain.Alu32R.src->Armi.Reg.reg,
2571 i->Ain.Alu32R.dst);
2572 goto done;
2573 case Armi_Mem:
2574 rex = clearWBit(
2575 rexAMode_M( i->Ain.Alu32R.dst,
2576 i->Ain.Alu32R.src->Armi.Mem.am) );
2577 if (rex != 0x40) *p++ = rex;
2578 *p++ = toUChar(opc);
2579 p = doAMode_M(p, i->Ain.Alu32R.dst,
2580 i->Ain.Alu32R.src->Armi.Mem.am);
2581 goto done;
2582 default:
2583 goto bad;
2584 }
2585 break;
2586
2587 case Ain_MulL:
2588 subopc = i->Ain.MulL.syned ? 5 : 4;
2589 switch (i->Ain.MulL.src->tag) {
2590 case Arm_Mem:
2591 *p++ = rexAMode_M( fake(0),
2592 i->Ain.MulL.src->Arm.Mem.am);
2593 *p++ = 0xF7;
2594 p = doAMode_M(p, fake(subopc),
2595 i->Ain.MulL.src->Arm.Mem.am);
2596 goto done;
2597 case Arm_Reg:
2598 *p++ = rexAMode_R(fake(0),
2599 i->Ain.MulL.src->Arm.Reg.reg);
2600 *p++ = 0xF7;
2601 p = doAMode_R(p, fake(subopc),
2602 i->Ain.MulL.src->Arm.Reg.reg);
2603 goto done;
2604 default:
2605 goto bad;
2606 }
2607 break;
2608
2609 case Ain_Div:
2610 subopc = i->Ain.Div.syned ? 7 : 6;
2611 if (i->Ain.Div.sz == 4) {
2612 switch (i->Ain.Div.src->tag) {
2613 case Arm_Mem:
2614 goto bad;
2615 /*FIXME*/
2616 *p++ = 0xF7;
2617 p = doAMode_M(p, fake(subopc),
2618 i->Ain.Div.src->Arm.Mem.am);
2619 goto done;
2620 case Arm_Reg:
2621 *p++ = clearWBit(
2622 rexAMode_R( fake(0), i->Ain.Div.src->Arm.Reg.reg));
2623 *p++ = 0xF7;
2624 p = doAMode_R(p, fake(subopc),
2625 i->Ain.Div.src->Arm.Reg.reg);
2626 goto done;
2627 default:
2628 goto bad;
2629 }
2630 }
2631 if (i->Ain.Div.sz == 8) {
2632 switch (i->Ain.Div.src->tag) {
2633 case Arm_Mem:
2634 *p++ = rexAMode_M( fake(0),
2635 i->Ain.Div.src->Arm.Mem.am);
2636 *p++ = 0xF7;
2637 p = doAMode_M(p, fake(subopc),
2638 i->Ain.Div.src->Arm.Mem.am);
2639 goto done;
2640 case Arm_Reg:
2641 *p++ = rexAMode_R( fake(0),
2642 i->Ain.Div.src->Arm.Reg.reg);
2643 *p++ = 0xF7;
2644 p = doAMode_R(p, fake(subopc),
2645 i->Ain.Div.src->Arm.Reg.reg);
2646 goto done;
2647 default:
2648 goto bad;
2649 }
2650 }
2651 break;
2652
2653 case Ain_Push:
2654 switch (i->Ain.Push.src->tag) {
2655 case Armi_Mem:
2656 *p++ = clearWBit(
2657 rexAMode_M(fake(0), i->Ain.Push.src->Armi.Mem.am));
2658 *p++ = 0xFF;
2659 p = doAMode_M(p, fake(6), i->Ain.Push.src->Armi.Mem.am);
2660 goto done;
2661 case Armi_Imm:
2662 *p++ = 0x68;
2663 p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32);
2664 goto done;
2665 case Armi_Reg:
2666 *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.Push.src->Armi.Reg.reg)));
2667 *p++ = toUChar(0x50 + iregBits210(i->Ain.Push.src->Armi.Reg.reg));
2668 goto done;
2669 default:
2670 goto bad;
2671 }
2672
2673 case Ain_Call: {
2674 if (i->Ain.Call.cond != Acc_ALWAYS
2675 && i->Ain.Call.rloc.pri != RLPri_None) {
2676 /* The call might not happen (it isn't unconditional) and it
2677 returns a result. In this case we will need to generate a
2678 control flow diamond to put 0x555..555 in the return
2679 register(s) in the case where the call doesn't happen. If
2680 this ever becomes necessary, maybe copy code from the ARM
2681 equivalent. Until that day, just give up. */
2682 goto bad;
2683 }
2684 /* As per detailed comment for Ain_Call in
2685 getRegUsage_AMD64Instr above, %r11 is used as an address
2686 temporary. */
2687 /* jump over the following two insns if the condition does not
2688 hold */
2689 Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
2690 if (i->Ain.Call.cond != Acc_ALWAYS) {
2691 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
2692 *p++ = shortImm ? 10 : 13;
2693 /* 10 or 13 bytes in the next two insns */
2694 }
2695 if (shortImm) {
2696 /* 7 bytes: movl sign-extend(imm32), %r11 */
2697 *p++ = 0x49;
2698 *p++ = 0xC7;
2699 *p++ = 0xC3;
2700 p = emit32(p, (UInt)i->Ain.Call.target);
2701 } else {
2702 /* 10 bytes: movabsq $target, %r11 */
2703 *p++ = 0x49;
2704 *p++ = 0xBB;
2705 p = emit64(p, i->Ain.Call.target);
2706 }
2707 /* 3 bytes: call *%r11 */
2708 *p++ = 0x41;
2709 *p++ = 0xFF;
2710 *p++ = 0xD3;
2711 goto done;
2712 }
2713
2714 case Ain_XDirect: {
2715 /* NB: what goes on here has to be very closely coordinated with the
2716 chainXDirect_AMD64 and unchainXDirect_AMD64 below. */
2717 /* We're generating chain-me requests here, so we need to be
2718 sure this is actually allowed -- no-redir translations can't
2719 use chain-me's. Hence: */
2720 vassert(disp_cp_chain_me_to_slowEP != NULL);
2721 vassert(disp_cp_chain_me_to_fastEP != NULL);
2722
2723 HReg r11 = hregAMD64_R11();
2724
2725 /* Use ptmp for backpatching conditional jumps. */
2726 ptmp = NULL;
2727
2728 /* First off, if this is conditional, create a conditional
2729 jump over the rest of it. */
2730 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
2731 /* jmp fwds if !condition */
2732 *p++ = toUChar(0x70 + (0xF & (i->Ain.XDirect.cond ^ 1)));
2733 ptmp = p; /* fill in this bit later */
2734 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2735 }
2736
2737 /* Update the guest RIP. */
2738 if (fitsIn32Bits(i->Ain.XDirect.dstGA)) {
2739 /* use a shorter encoding */
2740 /* movl sign-extend(dstGA), %r11 */
2741 *p++ = 0x49;
2742 *p++ = 0xC7;
2743 *p++ = 0xC3;
2744 p = emit32(p, (UInt)i->Ain.XDirect.dstGA);
2745 } else {
2746 /* movabsq $dstGA, %r11 */
2747 *p++ = 0x49;
2748 *p++ = 0xBB;
2749 p = emit64(p, i->Ain.XDirect.dstGA);
2750 }
2751
2752 /* movq %r11, amRIP */
2753 *p++ = rexAMode_M(r11, i->Ain.XDirect.amRIP);
2754 *p++ = 0x89;
2755 p = doAMode_M(p, r11, i->Ain.XDirect.amRIP);
2756
2757 /* --- FIRST PATCHABLE BYTE follows --- */
2758 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
2759 to) backs up the return address, so as to find the address of
2760 the first patchable byte. So: don't change the length of the
2761 two instructions below. */
2762 /* movabsq $disp_cp_chain_me_to_{slow,fast}EP,%r11; */
2763 *p++ = 0x49;
2764 *p++ = 0xBB;
2765 void* disp_cp_chain_me
2766 = i->Ain.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
2767 : disp_cp_chain_me_to_slowEP;
2768 p = emit64(p, Ptr_to_ULong(disp_cp_chain_me));
2769 /* call *%r11 */
2770 *p++ = 0x41;
2771 *p++ = 0xFF;
2772 *p++ = 0xD3;
2773 /* --- END of PATCHABLE BYTES --- */
2774
2775 /* Fix up the conditional jump, if there was one. */
2776 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
2777 Int delta = p - ptmp;
2778 vassert(delta > 0 && delta < 40);
2779 *ptmp = toUChar(delta-1);
2780 }
2781 goto done;
2782 }
2783
2784 case Ain_XIndir: {
2785 /* We're generating transfers that could lead indirectly to a
2786 chain-me, so we need to be sure this is actually allowed --
2787 no-redir translations are not allowed to reach normal
2788 translations without going through the scheduler. That means
2789 no XDirects or XIndirs out from no-redir translations.
2790 Hence: */
2791 vassert(disp_cp_xindir != NULL);
2792
2793 /* Use ptmp for backpatching conditional jumps. */
2794 ptmp = NULL;
2795
2796 /* First off, if this is conditional, create a conditional
2797 jump over the rest of it. */
2798 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
2799 /* jmp fwds if !condition */
2800 *p++ = toUChar(0x70 + (0xF & (i->Ain.XIndir.cond ^ 1)));
2801 ptmp = p; /* fill in this bit later */
2802 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2803 }
2804
2805 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
2806 *p++ = rexAMode_M(i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
2807 *p++ = 0x89;
2808 p = doAMode_M(p, i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
2809
2810 /* get $disp_cp_xindir into %r11 */
2811 if (fitsIn32Bits(Ptr_to_ULong(disp_cp_xindir))) {
2812 /* use a shorter encoding */
2813 /* movl sign-extend(disp_cp_xindir), %r11 */
2814 *p++ = 0x49;
2815 *p++ = 0xC7;
2816 *p++ = 0xC3;
2817 p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xindir));
2818 } else {
2819 /* movabsq $disp_cp_xindir, %r11 */
2820 *p++ = 0x49;
2821 *p++ = 0xBB;
2822 p = emit64(p, Ptr_to_ULong(disp_cp_xindir));
2823 }
2824
2825 /* jmp *%r11 */
2826 *p++ = 0x41;
2827 *p++ = 0xFF;
2828 *p++ = 0xE3;
2829
2830 /* Fix up the conditional jump, if there was one. */
2831 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
2832 Int delta = p - ptmp;
2833 vassert(delta > 0 && delta < 40);
2834 *ptmp = toUChar(delta-1);
2835 }
2836 goto done;
2837 }
2838
2839 case Ain_XAssisted: {
2840 /* Use ptmp for backpatching conditional jumps. */
2841 ptmp = NULL;
2842
2843 /* First off, if this is conditional, create a conditional
2844 jump over the rest of it. */
2845 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
2846 /* jmp fwds if !condition */
2847 *p++ = toUChar(0x70 + (0xF & (i->Ain.XAssisted.cond ^ 1)));
2848 ptmp = p; /* fill in this bit later */
2849 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2850 }
2851
2852 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
2853 *p++ = rexAMode_M(i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
2854 *p++ = 0x89;
2855 p = doAMode_M(p, i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
2856 /* movl $magic_number, %ebp. Since these numbers are all small positive
2857 integers, we can get away with "movl $N, %ebp" rather than
2858 the longer "movq $N, %rbp". */
2859 UInt trcval = 0;
2860 switch (i->Ain.XAssisted.jk) {
2861 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
2862 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
2863 case Ijk_Sys_int32: trcval = VEX_TRC_JMP_SYS_INT32; break;
2864 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
2865 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
2866 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
2867 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
2868 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
2869 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
2870 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
2871 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
2872 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
2873 /* We don't expect to see the following being assisted. */
2874 case Ijk_Ret:
2875 case Ijk_Call:
2876 /* fallthrough */
2877 default:
2878 ppIRJumpKind(i->Ain.XAssisted.jk);
2879 vpanic("emit_AMD64Instr.Ain_XAssisted: unexpected jump kind");
2880 }
2881 vassert(trcval != 0);
2882 *p++ = 0xBD;
2883 p = emit32(p, trcval);
2884 /* movabsq $disp_assisted, %r11 */
2885 *p++ = 0x49;
2886 *p++ = 0xBB;
2887 p = emit64(p, Ptr_to_ULong(disp_cp_xassisted));
2888 /* jmp *%r11 */
2889 *p++ = 0x41;
2890 *p++ = 0xFF;
2891 *p++ = 0xE3;
2892
2893 /* Fix up the conditional jump, if there was one. */
2894 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
2895 Int delta = p - ptmp;
2896 vassert(delta > 0 && delta < 40);
2897 *ptmp = toUChar(delta-1);
2898 }
2899 goto done;
2900 }
2901
2902 case Ain_CMov64:
2903 vassert(i->Ain.CMov64.cond != Acc_ALWAYS);
2904 if (i->Ain.CMov64.src->tag == Arm_Reg) {
2905 *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg);
2906 *p++ = 0x0F;
2907 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
2908 p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg);
2909 goto done;
2910 }
2911 if (i->Ain.CMov64.src->tag == Arm_Mem) {
2912 *p++ = rexAMode_M(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am);
2913 *p++ = 0x0F;
2914 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
2915 p = doAMode_M(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am);
2916 goto done;
2917 }
2918 break;
2919
2920 case Ain_MovxLQ:
2921 /* No, _don't_ ask me why the sense of the args has to be
2922 different in the S vs Z case. I don't know. */
2923 if (i->Ain.MovxLQ.syned) {
2924 /* Need REX.W = 1 here, but rexAMode_R does that for us. */
2925 *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
2926 *p++ = 0x63;
2927 p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
2928 } else {
2929 /* Produce a 32-bit reg-reg move, since the implicit
2930 zero-extend does what we want. */
2931 *p++ = clearWBit (
2932 rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst));
2933 *p++ = 0x89;
2934 p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst);
2935 }
2936 goto done;
2937
2938 case Ain_LoadEX:
2939 if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) {
2940 /* movzbq */
2941 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2942 *p++ = 0x0F;
2943 *p++ = 0xB6;
2944 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2945 goto done;
2946 }
2947 if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) {
2948 /* movzwq */
2949 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2950 *p++ = 0x0F;
2951 *p++ = 0xB7;
2952 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2953 goto done;
2954 }
2955 if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) {
2956 /* movzlq */
2957 /* This isn't really an existing AMD64 instruction per se.
2958 Rather, we have to do a 32-bit load. Because a 32-bit
2959 write implicitly clears the upper 32 bits of the target
2960 register, we get what we want. */
2961 *p++ = clearWBit(
2962 rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src));
2963 *p++ = 0x8B;
2964 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
2965 goto done;
2966 }
2967 break;
2968
2969 case Ain_Set64:
2970 /* Make the destination register be 1 or 0, depending on whether
2971 the relevant condition holds. Complication: the top 56 bits
2972 of the destination should be forced to zero, but doing 'xorq
2973 %r,%r' kills the flag(s) we are about to read. Sigh. So
2974 start off my moving $0 into the dest. */
2975 reg = iregBits3210(i->Ain.Set64.dst);
2976 vassert(reg < 16);
2977
2978 /* movq $0, %dst */
2979 *p++ = toUChar(reg >= 8 ? 0x49 : 0x48);
2980 *p++ = 0xC7;
2981 *p++ = toUChar(0xC0 + (reg & 7));
2982 p = emit32(p, 0);
2983
2984 /* setb lo8(%dst) */
2985 /* note, 8-bit register rex trickyness. Be careful here. */
2986 *p++ = toUChar(reg >= 8 ? 0x41 : 0x40);
2987 *p++ = 0x0F;
2988 *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond));
2989 *p++ = toUChar(0xC0 + (reg & 7));
2990 goto done;
2991
2992 case Ain_Bsfr64:
2993 *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
2994 *p++ = 0x0F;
2995 if (i->Ain.Bsfr64.isFwds) {
2996 *p++ = 0xBC;
2997 } else {
2998 *p++ = 0xBD;
2999 }
3000 p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
3001 goto done;
3002
3003 case Ain_MFence:
3004 /* mfence */
3005 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
3006 goto done;
3007
3008 case Ain_ACAS:
3009 /* lock */
3010 *p++ = 0xF0;
3011 if (i->Ain.ACAS.sz == 2) *p++ = 0x66;
3012 /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value
3013 in %rbx. The new-value register is hardwired to be %rbx
3014 since dealing with byte integer registers is too much hassle,
3015 so we force the register operand to %rbx (could equally be
3016 %rcx or %rdx). */
3017 rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr );
3018 if (i->Ain.ACAS.sz != 8)
3019 rex = clearWBit(rex);
3020
3021 *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */
3022 *p++ = 0x0F;
3023 if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
3024 p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr);
3025 goto done;
3026
3027 case Ain_DACAS:
3028 /* lock */
3029 *p++ = 0xF0;
3030 /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
3031 value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
3032 aren't encoded in the insn. */
3033 rex = rexAMode_M( fake(1), i->Ain.ACAS.addr );
3034 if (i->Ain.ACAS.sz != 8)
3035 rex = clearWBit(rex);
3036 *p++ = rex;
3037 *p++ = 0x0F;
3038 *p++ = 0xC7;
3039 p = doAMode_M(p, fake(1), i->Ain.DACAS.addr);
3040 goto done;
3041
3042 case Ain_A87Free:
3043 vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7);
3044 for (j = 0; j < i->Ain.A87Free.nregs; j++) {
3045 p = do_ffree_st(p, 7-j);
3046 }
3047 goto done;
3048
3049 case Ain_A87PushPop:
3050 vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4);
3051 if (i->Ain.A87PushPop.isPush) {
3052 /* Load from memory into %st(0): flds/fldl amode */
3053 *p++ = clearWBit(
3054 rexAMode_M(fake(0), i->Ain.A87PushPop.addr) );
3055 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
3056 p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr);
3057 } else {
3058 /* Dump %st(0) to memory: fstps/fstpl amode */
3059 *p++ = clearWBit(
3060 rexAMode_M(fake(3), i->Ain.A87PushPop.addr) );
3061 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
3062 p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr);
3063 goto done;
3064 }
3065 goto done;
3066
3067 case Ain_A87FpOp:
3068 switch (i->Ain.A87FpOp.op) {
3069 case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
3070 case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
3071 case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
3072 case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
3073 case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
3074 case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break;
3075 case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break;
3076 case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break;
3077 case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break;
3078 case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break;
3079 case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break;
3080 case Afp_TAN:
3081 /* fptan pushes 1.0 on the FP stack, except when the
3082 argument is out of range. Hence we have to do the
3083 instruction, then inspect C2 to see if there is an out
3084 of range condition. If there is, we skip the fincstp
3085 that is used by the in-range case to get rid of this
3086 extra 1.0 value. */
3087 *p++ = 0xD9; *p++ = 0xF2; // fptan
3088 *p++ = 0x50; // pushq %rax
3089 *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax
3090 *p++ = 0x66; *p++ = 0xA9;
3091 *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax
3092 *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp
3093 *p++ = 0xD9; *p++ = 0xF7; // fincstp
3094 *p++ = 0x58; // after_fincstp: popq %rax
3095 break;
3096 default:
3097 goto bad;
3098 }
3099 goto done;
3100
3101 case Ain_A87LdCW:
3102 *p++ = clearWBit(
3103 rexAMode_M(fake(5), i->Ain.A87LdCW.addr) );
3104 *p++ = 0xD9;
3105 p = doAMode_M(p, fake(5)/*subopcode*/, i->Ain.A87LdCW.addr);
3106 goto done;
3107
3108 case Ain_A87StSW:
3109 *p++ = clearWBit(
3110 rexAMode_M(fake(7), i->Ain.A87StSW.addr) );
3111 *p++ = 0xDD;
3112 p = doAMode_M(p, fake(7)/*subopcode*/, i->Ain.A87StSW.addr);
3113 goto done;
3114
3115 case Ain_Store:
3116 if (i->Ain.Store.sz == 2) {
3117 /* This just goes to show the crazyness of the instruction
3118 set encoding. We have to insert two prefix bytes, but be
3119 careful to avoid a conflict in what the size should be, by
3120 ensuring that REX.W = 0. */
3121 *p++ = 0x66; /* override to 16-bits */
3122 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3123 *p++ = 0x89;
3124 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3125 goto done;
3126 }
3127 if (i->Ain.Store.sz == 4) {
3128 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3129 *p++ = 0x89;
3130 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3131 goto done;
3132 }
3133 if (i->Ain.Store.sz == 1) {
3134 /* This is one place where it would be wrong to skip emitting
3135 a rex byte of 0x40, since the mere presence of rex changes
3136 the meaning of the byte register access. Be careful. */
3137 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3138 *p++ = 0x88;
3139 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3140 goto done;
3141 }
3142 break;
3143
3144 case Ain_LdMXCSR:
3145 *p++ = clearWBit(rexAMode_M( fake(0), i->Ain.LdMXCSR.addr));
3146 *p++ = 0x0F;
3147 *p++ = 0xAE;
3148 p = doAMode_M(p, fake(2)/*subopcode*/, i->Ain.LdMXCSR.addr);
3149 goto done;
3150
3151 case Ain_SseUComIS:
3152 /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */
3153 /* ucomi[sd] %srcL, %srcR */
3154 if (i->Ain.SseUComIS.sz == 8) {
3155 *p++ = 0x66;
3156 } else {
3157 goto bad;
3158 vassert(i->Ain.SseUComIS.sz == 4);
3159 }
3160 *p++ = clearWBit (
3161 rexAMode_R( vreg2ireg(i->Ain.SseUComIS.srcL),
3162 vreg2ireg(i->Ain.SseUComIS.srcR) ));
3163 *p++ = 0x0F;
3164 *p++ = 0x2E;
3165 p = doAMode_R(p, vreg2ireg(i->Ain.SseUComIS.srcL),
3166 vreg2ireg(i->Ain.SseUComIS.srcR) );
3167 /* pushfq */
3168 *p++ = 0x9C;
3169 /* popq %dst */
3170 *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.SseUComIS.dst)));
3171 *p++ = toUChar(0x58 + iregBits210(i->Ain.SseUComIS.dst));
3172 goto done;
3173
3174 case Ain_SseSI2SF:
3175 /* cvssi2s[sd] %src, %dst */
3176 rex = rexAMode_R( vreg2ireg(i->Ain.SseSI2SF.dst),
3177 i->Ain.SseSI2SF.src );
3178 *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2);
3179 *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex);
3180 *p++ = 0x0F;
3181 *p++ = 0x2A;
3182 p = doAMode_R( p, vreg2ireg(i->Ain.SseSI2SF.dst),
3183 i->Ain.SseSI2SF.src );
3184 goto done;
3185
3186 case Ain_SseSF2SI:
3187 /* cvss[sd]2si %src, %dst */
3188 rex = rexAMode_R( i->Ain.SseSF2SI.dst,
3189 vreg2ireg(i->Ain.SseSF2SI.src) );
3190 *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2);
3191 *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex);
3192 *p++ = 0x0F;
3193 *p++ = 0x2D;
3194 p = doAMode_R( p, i->Ain.SseSF2SI.dst,
3195 vreg2ireg(i->Ain.SseSF2SI.src) );
3196 goto done;
3197
3198 case Ain_SseSDSS:
3199 /* cvtsd2ss/cvtss2sd %src, %dst */
3200 *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3);
3201 *p++ = clearWBit(
3202 rexAMode_R( vreg2ireg(i->Ain.SseSDSS.dst),
3203 vreg2ireg(i->Ain.SseSDSS.src) ));
3204 *p++ = 0x0F;
3205 *p++ = 0x5A;
3206 p = doAMode_R( p, vreg2ireg(i->Ain.SseSDSS.dst),
3207 vreg2ireg(i->Ain.SseSDSS.src) );
3208 goto done;
3209
3210 case Ain_SseLdSt:
3211 if (i->Ain.SseLdSt.sz == 8) {
3212 *p++ = 0xF2;
3213 } else
3214 if (i->Ain.SseLdSt.sz == 4) {
3215 *p++ = 0xF3;
3216 } else
3217 if (i->Ain.SseLdSt.sz != 16) {
3218 vassert(0);
3219 }
3220 *p++ = clearWBit(
3221 rexAMode_M( vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr));
3222 *p++ = 0x0F;
3223 *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11);
3224 p = doAMode_M(p, vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr);
3225 goto done;
3226
3227 case Ain_SseLdzLO:
3228 vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8);
3229 /* movs[sd] amode, %xmm-dst */
3230 *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
3231 *p++ = clearWBit(
3232 rexAMode_M(vreg2ireg(i->Ain.SseLdzLO.reg),
3233 i->Ain.SseLdzLO.addr));
3234 *p++ = 0x0F;
3235 *p++ = 0x10;
3236 p = doAMode_M(p, vreg2ireg(i->Ain.SseLdzLO.reg),
3237 i->Ain.SseLdzLO.addr);
3238 goto done;
3239
3240 case Ain_Sse32Fx4:
3241 xtra = 0;
3242 *p++ = clearWBit(
3243 rexAMode_R( vreg2ireg(i->Ain.Sse32Fx4.dst),
3244 vreg2ireg(i->Ain.Sse32Fx4.src) ));
3245 *p++ = 0x0F;
3246 switch (i->Ain.Sse32Fx4.op) {
3247 case Asse_ADDF: *p++ = 0x58; break;
3248 case Asse_DIVF: *p++ = 0x5E; break;
3249 case Asse_MAXF: *p++ = 0x5F; break;
3250 case Asse_MINF: *p++ = 0x5D; break;
3251 case Asse_MULF: *p++ = 0x59; break;
3252 case Asse_RCPF: *p++ = 0x53; break;
3253 case Asse_RSQRTF: *p++ = 0x52; break;
3254 case Asse_SQRTF: *p++ = 0x51; break;
3255 case Asse_SUBF: *p++ = 0x5C; break;
3256 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3257 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3258 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3259 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3260 default: goto bad;
3261 }
3262 p = doAMode_R(p, vreg2ireg(i->Ain.Sse32Fx4.dst),
3263 vreg2ireg(i->Ain.Sse32Fx4.src) );
3264 if (xtra & 0x100)
3265 *p++ = toUChar(xtra & 0xFF);
3266 goto done;
3267
3268 case Ain_Sse64Fx2:
3269 xtra = 0;
3270 *p++ = 0x66;
3271 *p++ = clearWBit(
3272 rexAMode_R( vreg2ireg(i->Ain.Sse64Fx2.dst),
3273 vreg2ireg(i->Ain.Sse64Fx2.src) ));
3274 *p++ = 0x0F;
3275 switch (i->Ain.Sse64Fx2.op) {
3276 case Asse_ADDF: *p++ = 0x58; break;
3277 case Asse_DIVF: *p++ = 0x5E; break;
3278 case Asse_MAXF: *p++ = 0x5F; break;
3279 case Asse_MINF: *p++ = 0x5D; break;
3280 case Asse_MULF: *p++ = 0x59; break;
3281 case Asse_SQRTF: *p++ = 0x51; break;
3282 case Asse_SUBF: *p++ = 0x5C; break;
3283 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3284 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3285 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3286 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3287 default: goto bad;
3288 }
3289 p = doAMode_R(p, vreg2ireg(i->Ain.Sse64Fx2.dst),
3290 vreg2ireg(i->Ain.Sse64Fx2.src) );
3291 if (xtra & 0x100)
3292 *p++ = toUChar(xtra & 0xFF);
3293 goto done;
3294
3295 case Ain_Sse32FLo:
3296 xtra = 0;
3297 *p++ = 0xF3;
3298 *p++ = clearWBit(
3299 rexAMode_R( vreg2ireg(i->Ain.Sse32FLo.dst),
3300 vreg2ireg(i->Ain.Sse32FLo.src) ));
3301 *p++ = 0x0F;
3302 switch (i->Ain.Sse32FLo.op) {
3303 case Asse_ADDF: *p++ = 0x58; break;
3304 case Asse_DIVF: *p++ = 0x5E; break;
3305 case Asse_MAXF: *p++ = 0x5F; break;
3306 case Asse_MINF: *p++ = 0x5D; break;
3307 case Asse_MULF: *p++ = 0x59; break;
3308 case Asse_RCPF: *p++ = 0x53; break;
3309 case Asse_RSQRTF: *p++ = 0x52; break;
3310 case Asse_SQRTF: *p++ = 0x51; break;
3311 case Asse_SUBF: *p++ = 0x5C; break;
3312 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3313 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3314 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3315 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3316 default: goto bad;
3317 }
3318 p = doAMode_R(p, vreg2ireg(i->Ain.Sse32FLo.dst),
3319 vreg2ireg(i->Ain.Sse32FLo.src) );
3320 if (xtra & 0x100)
3321 *p++ = toUChar(xtra & 0xFF);
3322 goto done;
3323
3324 case Ain_Sse64FLo:
3325 xtra = 0;
3326 *p++ = 0xF2;
3327 *p++ = clearWBit(
3328 rexAMode_R( vreg2ireg(i->Ain.Sse64FLo.dst),
3329 vreg2ireg(i->Ain.Sse64FLo.src) ));
3330 *p++ = 0x0F;
3331 switch (i->Ain.Sse64FLo.op) {
3332 case Asse_ADDF: *p++ = 0x58; break;
3333 case Asse_DIVF: *p++ = 0x5E; break;
3334 case Asse_MAXF: *p++ = 0x5F; break;
3335 case Asse_MINF: *p++ = 0x5D; break;
3336 case Asse_MULF: *p++ = 0x59; break;
3337 case Asse_SQRTF: *p++ = 0x51; break;
3338 case Asse_SUBF: *p++ = 0x5C; break;
3339 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3340 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3341 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3342 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3343 default: goto bad;
3344 }
3345 p = doAMode_R(p, vreg2ireg(i->Ain.Sse64FLo.dst),
3346 vreg2ireg(i->Ain.Sse64FLo.src) );
3347 if (xtra & 0x100)
3348 *p++ = toUChar(xtra & 0xFF);
3349 goto done;
3350
3351 case Ain_SseReRg:
3352 # define XX(_n) *p++ = (_n)
3353
3354 rex = clearWBit(
3355 rexAMode_R( vreg2ireg(i->Ain.SseReRg.dst),
3356 vreg2ireg(i->Ain.SseReRg.src) ));
3357
3358 switch (i->Ain.SseReRg.op) {
3359 case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break;
3360 case Asse_OR: XX(rex); XX(0x0F); XX(0x56); break;
3361 case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break;
3362 case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break;
3363 case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); break;
3364 case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break;
3365 case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break;
3366 case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break;
3367 case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break;
3368 case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break;
3369 case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break;
3370 case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break;
3371 case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break;
3372 case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); break;
3373 case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break;
3374 case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break;
3375 case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break;
3376 case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break;
3377 case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); break;
3378 case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); break;
3379 case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); break;
3380 case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); break;
3381 case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break;
3382 case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break;
3383 case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break;
3384 case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break;
3385 case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break;
3386 case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break;
3387 case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break;
3388 case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break;
3389 case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break;
3390 case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break;
3391 case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break;
3392 case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break;
3393 case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break;
3394 case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break;
3395 case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break;
3396 case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break;
3397 case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break;
3398 case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break;
3399 case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break;
3400 case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break;
3401 case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break;
3402 case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break;
3403 case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break;
3404 case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break;
3405 case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break;
3406 case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); break;
3407 case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); break;
3408 case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break;
3409 case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break;
3410 case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); break;
3411 case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); break;
3412 case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); break;
3413 case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break;
3414 default: goto bad;
3415 }
3416 p = doAMode_R(p, vreg2ireg(i->Ain.SseReRg.dst),
3417 vreg2ireg(i->Ain.SseReRg.src) );
3418 # undef XX
3419 goto done;
3420
3421 case Ain_SseCMov:
3422 /* jmp fwds if !condition */
3423 *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1));
3424 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3425 ptmp = p;
3426
3427 /* movaps %src, %dst */
3428 *p++ = clearWBit(
3429 rexAMode_R( vreg2ireg(i->Ain.SseCMov.dst),
3430 vreg2ireg(i->Ain.SseCMov.src) ));
3431 *p++ = 0x0F;
3432 *p++ = 0x28;
3433 p = doAMode_R(p, vreg2ireg(i->Ain.SseCMov.dst),
3434 vreg2ireg(i->Ain.SseCMov.src) );
3435
3436 /* Fill in the jump offset. */
3437 *(ptmp-1) = toUChar(p - ptmp);
3438 goto done;
3439
3440 case Ain_SseShuf:
3441 *p++ = 0x66;
3442 *p++ = clearWBit(
3443 rexAMode_R( vreg2ireg(i->Ain.SseShuf.dst),
3444 vreg2ireg(i->Ain.SseShuf.src) ));
3445 *p++ = 0x0F;
3446 *p++ = 0x70;
3447 p = doAMode_R(p, vreg2ireg(i->Ain.SseShuf.dst),
3448 vreg2ireg(i->Ain.SseShuf.src) );
3449 *p++ = (UChar)(i->Ain.SseShuf.order);
3450 goto done;
3451
3452 //uu case Ain_AvxLdSt: {
3453 //uu UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg),
3454 //uu i->Ain.AvxLdSt.addr );
3455 //uu p = emitVexPrefix(p, vex);
3456 //uu *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11);
3457 //uu p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr);
3458 //uu goto done;
3459 //uu }
3460
3461 case Ain_EvCheck: {
3462 /* We generate:
3463 (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER)
3464 (2 bytes) jns nofail expected taken
3465 (3 bytes) jmp* 0(%rbp) 0 == offsetof(host_EvC_FAILADDR)
3466 nofail:
3467 */
3468 /* This is heavily asserted re instruction lengths. It needs to
3469 be. If we get given unexpected forms of .amCounter or
3470 .amFailAddr -- basically, anything that's not of the form
3471 uimm7(%rbp) -- they are likely to fail. */
3472 /* Note also that after the decl we must be very careful not to
3473 read the carry flag, else we get a partial flags stall.
3474 js/jns avoids that, though. */
3475 UChar* p0 = p;
3476 /* --- decl 8(%rbp) --- */
3477 /* Need to compute the REX byte for the decl in order to prove
3478 that we don't need it, since this is a 32-bit inc and all
3479 registers involved in the amode are < r8. "fake(1)" because
3480 there's no register in this encoding; instead the register
3481 field is used as a sub opcode. The encoding for "decl r/m32"
3482 is FF /1, hence the fake(1). */
3483 rex = clearWBit(rexAMode_M(fake(1), i->Ain.EvCheck.amCounter));
3484 if (rex != 0x40) goto bad; /* We don't expect to need the REX byte. */
3485 *p++ = 0xFF;
3486 p = doAMode_M(p, fake(1), i->Ain.EvCheck.amCounter);
3487 vassert(p - p0 == 3);
3488 /* --- jns nofail --- */
3489 *p++ = 0x79;
3490 *p++ = 0x03; /* need to check this 0x03 after the next insn */
3491 vassert(p - p0 == 5);
3492 /* --- jmp* 0(%rbp) --- */
3493 /* Once again, verify we don't need REX. The encoding is FF /4.
3494 We don't need REX.W since by default FF /4 in 64-bit mode
3495 implies a 64 bit load. */
3496 rex = clearWBit(rexAMode_M(fake(4), i->Ain.EvCheck.amFailAddr));
3497 if (rex != 0x40) goto bad;
3498 *p++ = 0xFF;
3499 p = doAMode_M(p, fake(4), i->Ain.EvCheck.amFailAddr);
3500 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
3501 /* And crosscheck .. */
3502 vassert(evCheckSzB_AMD64() == 8);
3503 goto done;
3504 }
3505
3506 case Ain_ProfInc: {
3507 /* We generate movabsq $0, %r11
3508 incq (%r11)
3509 in the expectation that a later call to LibVEX_patchProfCtr
3510 will be used to fill in the immediate field once the right
3511 value is known.
3512 49 BB 00 00 00 00 00 00 00 00
3513 49 FF 03
3514 */
3515 *p++ = 0x49; *p++ = 0xBB;
3516 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3517 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3518 *p++ = 0x49; *p++ = 0xFF; *p++ = 0x03;
3519 /* Tell the caller .. */
3520 vassert(!(*is_profInc));
3521 *is_profInc = True;
3522 goto done;
3523 }
3524
3525 default:
3526 goto bad;
3527 }
3528
3529 bad:
3530 ppAMD64Instr(i, mode64);
3531 vpanic("emit_AMD64Instr");
3532 /*NOTREACHED*/
3533
3534 done:
3535 vassert(p - &buf[0] <= 32);
3536 return p - &buf[0];
3537
3538 # undef fake
3539 }
3540
3541
3542 /* How big is an event check? See case for Ain_EvCheck in
3543 emit_AMD64Instr just above. That crosschecks what this returns, so
3544 we can tell if we're inconsistent. */
evCheckSzB_AMD64(void)3545 Int evCheckSzB_AMD64 ( void )
3546 {
3547 return 8;
3548 }
3549
3550
3551 /* NB: what goes on here has to be very closely coordinated with the
3552 emitInstr case for XDirect, above. */
chainXDirect_AMD64(void * place_to_chain,void * disp_cp_chain_me_EXPECTED,void * place_to_jump_to)3553 VexInvalRange chainXDirect_AMD64 ( void* place_to_chain,
3554 void* disp_cp_chain_me_EXPECTED,
3555 void* place_to_jump_to )
3556 {
3557 /* What we're expecting to see is:
3558 movabsq $disp_cp_chain_me_EXPECTED, %r11
3559 call *%r11
3560 viz
3561 49 BB <8 bytes value == disp_cp_chain_me_EXPECTED>
3562 41 FF D3
3563 */
3564 UChar* p = (UChar*)place_to_chain;
3565 vassert(p[0] == 0x49);
3566 vassert(p[1] == 0xBB);
3567 vassert(*(ULong*)(&p[2]) == Ptr_to_ULong(disp_cp_chain_me_EXPECTED));
3568 vassert(p[10] == 0x41);
3569 vassert(p[11] == 0xFF);
3570 vassert(p[12] == 0xD3);
3571 /* And what we want to change it to is either:
3572 (general case):
3573 movabsq $place_to_jump_to, %r11
3574 jmpq *%r11
3575 viz
3576 49 BB <8 bytes value == place_to_jump_to>
3577 41 FF E3
3578 So it's the same length (convenient, huh) and we don't
3579 need to change all the bits.
3580 ---OR---
3581 in the case where the displacement falls within 32 bits
3582 jmpq disp32 where disp32 is relative to the next insn
3583 ud2; ud2; ud2; ud2
3584 viz
3585 E9 <4 bytes == disp32>
3586 0F 0B 0F 0B 0F 0B 0F 0B
3587
3588 In both cases the replacement has the same length as the original.
3589 To remain sane & verifiable,
3590 (1) limit the displacement for the short form to
3591 (say) +/- one billion, so as to avoid wraparound
3592 off-by-ones
3593 (2) even if the short form is applicable, once every (say)
3594 1024 times use the long form anyway, so as to maintain
3595 verifiability
3596 */
3597 /* This is the delta we need to put into a JMP d32 insn. It's
3598 relative to the start of the next insn, hence the -5. */
3599 Long delta = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)5;
3600 Bool shortOK = delta >= -1000*1000*1000 && delta < 1000*1000*1000;
3601
3602 static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
3603 if (shortOK) {
3604 shortCTR++; // thread safety bleh
3605 if (0 == (shortCTR & 0x3FF)) {
3606 shortOK = False;
3607 if (0)
3608 vex_printf("QQQ chainXDirect_AMD64: shortCTR = %u, "
3609 "using long jmp\n", shortCTR);
3610 }
3611 }
3612
3613 /* And make the modifications. */
3614 if (shortOK) {
3615 p[0] = 0xE9;
3616 p[1] = (delta >> 0) & 0xFF;
3617 p[2] = (delta >> 8) & 0xFF;
3618 p[3] = (delta >> 16) & 0xFF;
3619 p[4] = (delta >> 24) & 0xFF;
3620 p[5] = 0x0F; p[6] = 0x0B;
3621 p[7] = 0x0F; p[8] = 0x0B;
3622 p[9] = 0x0F; p[10] = 0x0B;
3623 p[11] = 0x0F; p[12] = 0x0B;
3624 /* sanity check on the delta -- top 32 are all 0 or all 1 */
3625 delta >>= 32;
3626 vassert(delta == 0LL || delta == -1LL);
3627 } else {
3628 /* Minimal modifications from the starting sequence. */
3629 *(ULong*)(&p[2]) = Ptr_to_ULong(place_to_jump_to);
3630 p[12] = 0xE3;
3631 }
3632 VexInvalRange vir = { (HWord)place_to_chain, 13 };
3633 return vir;
3634 }
3635
3636
3637 /* NB: what goes on here has to be very closely coordinated with the
3638 emitInstr case for XDirect, above. */
unchainXDirect_AMD64(void * place_to_unchain,void * place_to_jump_to_EXPECTED,void * disp_cp_chain_me)3639 VexInvalRange unchainXDirect_AMD64 ( void* place_to_unchain,
3640 void* place_to_jump_to_EXPECTED,
3641 void* disp_cp_chain_me )
3642 {
3643 /* What we're expecting to see is either:
3644 (general case)
3645 movabsq $place_to_jump_to_EXPECTED, %r11
3646 jmpq *%r11
3647 viz
3648 49 BB <8 bytes value == place_to_jump_to_EXPECTED>
3649 41 FF E3
3650 ---OR---
3651 in the case where the displacement falls within 32 bits
3652 jmpq d32
3653 ud2; ud2; ud2; ud2
3654 viz
3655 E9 <4 bytes == disp32>
3656 0F 0B 0F 0B 0F 0B 0F 0B
3657 */
3658 UChar* p = (UChar*)place_to_unchain;
3659 Bool valid = False;
3660 if (p[0] == 0x49 && p[1] == 0xBB
3661 && *(ULong*)(&p[2]) == Ptr_to_ULong(place_to_jump_to_EXPECTED)
3662 && p[10] == 0x41 && p[11] == 0xFF && p[12] == 0xE3) {
3663 /* it's the long form */
3664 valid = True;
3665 }
3666 else
3667 if (p[0] == 0xE9
3668 && p[5] == 0x0F && p[6] == 0x0B
3669 && p[7] == 0x0F && p[8] == 0x0B
3670 && p[9] == 0x0F && p[10] == 0x0B
3671 && p[11] == 0x0F && p[12] == 0x0B) {
3672 /* It's the short form. Check the offset is right. */
3673 Int s32 = *(Int*)(&p[1]);
3674 Long s64 = (Long)s32;
3675 if ((UChar*)p + 5 + s64 == (UChar*)place_to_jump_to_EXPECTED) {
3676 valid = True;
3677 if (0)
3678 vex_printf("QQQ unchainXDirect_AMD64: found short form\n");
3679 }
3680 }
3681 vassert(valid);
3682 /* And what we want to change it to is:
3683 movabsq $disp_cp_chain_me, %r11
3684 call *%r11
3685 viz
3686 49 BB <8 bytes value == disp_cp_chain_me>
3687 41 FF D3
3688 So it's the same length (convenient, huh).
3689 */
3690 p[0] = 0x49;
3691 p[1] = 0xBB;
3692 *(ULong*)(&p[2]) = Ptr_to_ULong(disp_cp_chain_me);
3693 p[10] = 0x41;
3694 p[11] = 0xFF;
3695 p[12] = 0xD3;
3696 VexInvalRange vir = { (HWord)place_to_unchain, 13 };
3697 return vir;
3698 }
3699
3700
3701 /* Patch the counter address into a profile inc point, as previously
3702 created by the Ain_ProfInc case for emit_AMD64Instr. */
patchProfInc_AMD64(void * place_to_patch,ULong * location_of_counter)3703 VexInvalRange patchProfInc_AMD64 ( void* place_to_patch,
3704 ULong* location_of_counter )
3705 {
3706 vassert(sizeof(ULong*) == 8);
3707 UChar* p = (UChar*)place_to_patch;
3708 vassert(p[0] == 0x49);
3709 vassert(p[1] == 0xBB);
3710 vassert(p[2] == 0x00);
3711 vassert(p[3] == 0x00);
3712 vassert(p[4] == 0x00);
3713 vassert(p[5] == 0x00);
3714 vassert(p[6] == 0x00);
3715 vassert(p[7] == 0x00);
3716 vassert(p[8] == 0x00);
3717 vassert(p[9] == 0x00);
3718 vassert(p[10] == 0x49);
3719 vassert(p[11] == 0xFF);
3720 vassert(p[12] == 0x03);
3721 ULong imm64 = (ULong)Ptr_to_ULong(location_of_counter);
3722 p[2] = imm64 & 0xFF; imm64 >>= 8;
3723 p[3] = imm64 & 0xFF; imm64 >>= 8;
3724 p[4] = imm64 & 0xFF; imm64 >>= 8;
3725 p[5] = imm64 & 0xFF; imm64 >>= 8;
3726 p[6] = imm64 & 0xFF; imm64 >>= 8;
3727 p[7] = imm64 & 0xFF; imm64 >>= 8;
3728 p[8] = imm64 & 0xFF; imm64 >>= 8;
3729 p[9] = imm64 & 0xFF; imm64 >>= 8;
3730 VexInvalRange vir = { (HWord)place_to_patch, 13 };
3731 return vir;
3732 }
3733
3734
3735 /*---------------------------------------------------------------*/
3736 /*--- end host_amd64_defs.c ---*/
3737 /*---------------------------------------------------------------*/
3738