• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  
2  /*---------------------------------------------------------------*/
3  /*--- begin                                   host_x86_defs.c ---*/
4  /*---------------------------------------------------------------*/
5  
6  /*
7     This file is part of Valgrind, a dynamic binary instrumentation
8     framework.
9  
10     Copyright (C) 2004-2013 OpenWorks LLP
11        info@open-works.net
12  
13     This program is free software; you can redistribute it and/or
14     modify it under the terms of the GNU General Public License as
15     published by the Free Software Foundation; either version 2 of the
16     License, or (at your option) any later version.
17  
18     This program is distributed in the hope that it will be useful, but
19     WITHOUT ANY WARRANTY; without even the implied warranty of
20     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21     General Public License for more details.
22  
23     You should have received a copy of the GNU General Public License
24     along with this program; if not, write to the Free Software
25     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26     02110-1301, USA.
27  
28     The GNU General Public License is contained in the file COPYING.
29  
30     Neither the names of the U.S. Department of Energy nor the
31     University of California nor the names of its contributors may be
32     used to endorse or promote products derived from this software
33     without prior written permission.
34  */
35  
36  #include "libvex_basictypes.h"
37  #include "libvex.h"
38  #include "libvex_trc_values.h"
39  
40  #include "main_util.h"
41  #include "host_generic_regs.h"
42  #include "host_x86_defs.h"
43  
44  
45  /* --------- Registers. --------- */
46  
ppHRegX86(HReg reg)47  void ppHRegX86 ( HReg reg )
48  {
49     Int r;
50     static const HChar* ireg32_names[8]
51       = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" };
52     /* Be generic for all virtual regs. */
53     if (hregIsVirtual(reg)) {
54        ppHReg(reg);
55        return;
56     }
57     /* But specific for real regs. */
58     switch (hregClass(reg)) {
59        case HRcInt32:
60           r = hregNumber(reg);
61           vassert(r >= 0 && r < 8);
62           vex_printf("%s", ireg32_names[r]);
63           return;
64        case HRcFlt64:
65           r = hregNumber(reg);
66           vassert(r >= 0 && r < 6);
67           vex_printf("%%fake%d", r);
68           return;
69        case HRcVec128:
70           r = hregNumber(reg);
71           vassert(r >= 0 && r < 8);
72           vex_printf("%%xmm%d", r);
73           return;
74        default:
75           vpanic("ppHRegX86");
76     }
77  }
78  
hregX86_EAX(void)79  HReg hregX86_EAX ( void ) { return mkHReg(0, HRcInt32, False); }
hregX86_ECX(void)80  HReg hregX86_ECX ( void ) { return mkHReg(1, HRcInt32, False); }
hregX86_EDX(void)81  HReg hregX86_EDX ( void ) { return mkHReg(2, HRcInt32, False); }
hregX86_EBX(void)82  HReg hregX86_EBX ( void ) { return mkHReg(3, HRcInt32, False); }
hregX86_ESP(void)83  HReg hregX86_ESP ( void ) { return mkHReg(4, HRcInt32, False); }
hregX86_EBP(void)84  HReg hregX86_EBP ( void ) { return mkHReg(5, HRcInt32, False); }
hregX86_ESI(void)85  HReg hregX86_ESI ( void ) { return mkHReg(6, HRcInt32, False); }
hregX86_EDI(void)86  HReg hregX86_EDI ( void ) { return mkHReg(7, HRcInt32, False); }
87  
hregX86_FAKE0(void)88  HReg hregX86_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); }
hregX86_FAKE1(void)89  HReg hregX86_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); }
hregX86_FAKE2(void)90  HReg hregX86_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); }
hregX86_FAKE3(void)91  HReg hregX86_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); }
hregX86_FAKE4(void)92  HReg hregX86_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); }
hregX86_FAKE5(void)93  HReg hregX86_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); }
94  
hregX86_XMM0(void)95  HReg hregX86_XMM0 ( void ) { return mkHReg(0, HRcVec128, False); }
hregX86_XMM1(void)96  HReg hregX86_XMM1 ( void ) { return mkHReg(1, HRcVec128, False); }
hregX86_XMM2(void)97  HReg hregX86_XMM2 ( void ) { return mkHReg(2, HRcVec128, False); }
hregX86_XMM3(void)98  HReg hregX86_XMM3 ( void ) { return mkHReg(3, HRcVec128, False); }
hregX86_XMM4(void)99  HReg hregX86_XMM4 ( void ) { return mkHReg(4, HRcVec128, False); }
hregX86_XMM5(void)100  HReg hregX86_XMM5 ( void ) { return mkHReg(5, HRcVec128, False); }
hregX86_XMM6(void)101  HReg hregX86_XMM6 ( void ) { return mkHReg(6, HRcVec128, False); }
hregX86_XMM7(void)102  HReg hregX86_XMM7 ( void ) { return mkHReg(7, HRcVec128, False); }
103  
104  
getAllocableRegs_X86(Int * nregs,HReg ** arr)105  void getAllocableRegs_X86 ( Int* nregs, HReg** arr )
106  {
107     *nregs = 20;
108     *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
109     (*arr)[0] = hregX86_EAX();
110     (*arr)[1] = hregX86_EBX();
111     (*arr)[2] = hregX86_ECX();
112     (*arr)[3] = hregX86_EDX();
113     (*arr)[4] = hregX86_ESI();
114     (*arr)[5] = hregX86_EDI();
115     (*arr)[6] = hregX86_FAKE0();
116     (*arr)[7] = hregX86_FAKE1();
117     (*arr)[8] = hregX86_FAKE2();
118     (*arr)[9] = hregX86_FAKE3();
119     (*arr)[10] = hregX86_FAKE4();
120     (*arr)[11] = hregX86_FAKE5();
121     (*arr)[12] = hregX86_XMM0();
122     (*arr)[13] = hregX86_XMM1();
123     (*arr)[14] = hregX86_XMM2();
124     (*arr)[15] = hregX86_XMM3();
125     (*arr)[16] = hregX86_XMM4();
126     (*arr)[17] = hregX86_XMM5();
127     (*arr)[18] = hregX86_XMM6();
128     (*arr)[19] = hregX86_XMM7();
129  }
130  
131  
132  /* --------- Condition codes, Intel encoding. --------- */
133  
showX86CondCode(X86CondCode cond)134  const HChar* showX86CondCode ( X86CondCode cond )
135  {
136     switch (cond) {
137        case Xcc_O:      return "o";
138        case Xcc_NO:     return "no";
139        case Xcc_B:      return "b";
140        case Xcc_NB:     return "nb";
141        case Xcc_Z:      return "z";
142        case Xcc_NZ:     return "nz";
143        case Xcc_BE:     return "be";
144        case Xcc_NBE:    return "nbe";
145        case Xcc_S:      return "s";
146        case Xcc_NS:     return "ns";
147        case Xcc_P:      return "p";
148        case Xcc_NP:     return "np";
149        case Xcc_L:      return "l";
150        case Xcc_NL:     return "nl";
151        case Xcc_LE:     return "le";
152        case Xcc_NLE:    return "nle";
153        case Xcc_ALWAYS: return "ALWAYS";
154        default: vpanic("ppX86CondCode");
155     }
156  }
157  
158  
159  /* --------- X86AMode: memory address expressions. --------- */
160  
X86AMode_IR(UInt imm32,HReg reg)161  X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) {
162     X86AMode* am = LibVEX_Alloc(sizeof(X86AMode));
163     am->tag = Xam_IR;
164     am->Xam.IR.imm = imm32;
165     am->Xam.IR.reg = reg;
166     return am;
167  }
X86AMode_IRRS(UInt imm32,HReg base,HReg indEx,Int shift)168  X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
169     X86AMode* am = LibVEX_Alloc(sizeof(X86AMode));
170     am->tag = Xam_IRRS;
171     am->Xam.IRRS.imm = imm32;
172     am->Xam.IRRS.base = base;
173     am->Xam.IRRS.index = indEx;
174     am->Xam.IRRS.shift = shift;
175     vassert(shift >= 0 && shift <= 3);
176     return am;
177  }
178  
dopyX86AMode(X86AMode * am)179  X86AMode* dopyX86AMode ( X86AMode* am ) {
180     switch (am->tag) {
181        case Xam_IR:
182           return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg );
183        case Xam_IRRS:
184           return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base,
185                                 am->Xam.IRRS.index, am->Xam.IRRS.shift );
186        default:
187           vpanic("dopyX86AMode");
188     }
189  }
190  
ppX86AMode(X86AMode * am)191  void ppX86AMode ( X86AMode* am ) {
192     switch (am->tag) {
193        case Xam_IR:
194           if (am->Xam.IR.imm == 0)
195              vex_printf("(");
196           else
197              vex_printf("0x%x(", am->Xam.IR.imm);
198           ppHRegX86(am->Xam.IR.reg);
199           vex_printf(")");
200           return;
201        case Xam_IRRS:
202           vex_printf("0x%x(", am->Xam.IRRS.imm);
203           ppHRegX86(am->Xam.IRRS.base);
204           vex_printf(",");
205           ppHRegX86(am->Xam.IRRS.index);
206           vex_printf(",%d)", 1 << am->Xam.IRRS.shift);
207           return;
208        default:
209           vpanic("ppX86AMode");
210     }
211  }
212  
addRegUsage_X86AMode(HRegUsage * u,X86AMode * am)213  static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) {
214     switch (am->tag) {
215        case Xam_IR:
216           addHRegUse(u, HRmRead, am->Xam.IR.reg);
217           return;
218        case Xam_IRRS:
219           addHRegUse(u, HRmRead, am->Xam.IRRS.base);
220           addHRegUse(u, HRmRead, am->Xam.IRRS.index);
221           return;
222        default:
223           vpanic("addRegUsage_X86AMode");
224     }
225  }
226  
mapRegs_X86AMode(HRegRemap * m,X86AMode * am)227  static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) {
228     switch (am->tag) {
229        case Xam_IR:
230           am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg);
231           return;
232        case Xam_IRRS:
233           am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base);
234           am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index);
235           return;
236        default:
237           vpanic("mapRegs_X86AMode");
238     }
239  }
240  
241  /* --------- Operand, which can be reg, immediate or memory. --------- */
242  
X86RMI_Imm(UInt imm32)243  X86RMI* X86RMI_Imm ( UInt imm32 ) {
244     X86RMI* op         = LibVEX_Alloc(sizeof(X86RMI));
245     op->tag            = Xrmi_Imm;
246     op->Xrmi.Imm.imm32 = imm32;
247     return op;
248  }
X86RMI_Reg(HReg reg)249  X86RMI* X86RMI_Reg ( HReg reg ) {
250     X86RMI* op       = LibVEX_Alloc(sizeof(X86RMI));
251     op->tag          = Xrmi_Reg;
252     op->Xrmi.Reg.reg = reg;
253     return op;
254  }
X86RMI_Mem(X86AMode * am)255  X86RMI* X86RMI_Mem ( X86AMode* am ) {
256     X86RMI* op      = LibVEX_Alloc(sizeof(X86RMI));
257     op->tag         = Xrmi_Mem;
258     op->Xrmi.Mem.am = am;
259     return op;
260  }
261  
ppX86RMI(X86RMI * op)262  void ppX86RMI ( X86RMI* op ) {
263     switch (op->tag) {
264        case Xrmi_Imm:
265           vex_printf("$0x%x", op->Xrmi.Imm.imm32);
266           return;
267        case Xrmi_Reg:
268           ppHRegX86(op->Xrmi.Reg.reg);
269           return;
270        case Xrmi_Mem:
271           ppX86AMode(op->Xrmi.Mem.am);
272           return;
273       default:
274           vpanic("ppX86RMI");
275     }
276  }
277  
278  /* An X86RMI can only be used in a "read" context (what would it mean
279     to write or modify a literal?) and so we enumerate its registers
280     accordingly. */
addRegUsage_X86RMI(HRegUsage * u,X86RMI * op)281  static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) {
282     switch (op->tag) {
283        case Xrmi_Imm:
284           return;
285        case Xrmi_Reg:
286           addHRegUse(u, HRmRead, op->Xrmi.Reg.reg);
287           return;
288        case Xrmi_Mem:
289           addRegUsage_X86AMode(u, op->Xrmi.Mem.am);
290           return;
291        default:
292           vpanic("addRegUsage_X86RMI");
293     }
294  }
295  
mapRegs_X86RMI(HRegRemap * m,X86RMI * op)296  static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) {
297     switch (op->tag) {
298        case Xrmi_Imm:
299           return;
300        case Xrmi_Reg:
301           op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg);
302           return;
303        case Xrmi_Mem:
304           mapRegs_X86AMode(m, op->Xrmi.Mem.am);
305           return;
306        default:
307           vpanic("mapRegs_X86RMI");
308     }
309  }
310  
311  
312  /* --------- Operand, which can be reg or immediate only. --------- */
313  
X86RI_Imm(UInt imm32)314  X86RI* X86RI_Imm ( UInt imm32 ) {
315     X86RI* op         = LibVEX_Alloc(sizeof(X86RI));
316     op->tag           = Xri_Imm;
317     op->Xri.Imm.imm32 = imm32;
318     return op;
319  }
X86RI_Reg(HReg reg)320  X86RI* X86RI_Reg ( HReg reg ) {
321     X86RI* op       = LibVEX_Alloc(sizeof(X86RI));
322     op->tag         = Xri_Reg;
323     op->Xri.Reg.reg = reg;
324     return op;
325  }
326  
ppX86RI(X86RI * op)327  void ppX86RI ( X86RI* op ) {
328     switch (op->tag) {
329        case Xri_Imm:
330           vex_printf("$0x%x", op->Xri.Imm.imm32);
331           return;
332        case Xri_Reg:
333           ppHRegX86(op->Xri.Reg.reg);
334           return;
335       default:
336           vpanic("ppX86RI");
337     }
338  }
339  
340  /* An X86RI can only be used in a "read" context (what would it mean
341     to write or modify a literal?) and so we enumerate its registers
342     accordingly. */
addRegUsage_X86RI(HRegUsage * u,X86RI * op)343  static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) {
344     switch (op->tag) {
345        case Xri_Imm:
346           return;
347        case Xri_Reg:
348           addHRegUse(u, HRmRead, op->Xri.Reg.reg);
349           return;
350        default:
351           vpanic("addRegUsage_X86RI");
352     }
353  }
354  
mapRegs_X86RI(HRegRemap * m,X86RI * op)355  static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) {
356     switch (op->tag) {
357        case Xri_Imm:
358           return;
359        case Xri_Reg:
360           op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg);
361           return;
362        default:
363           vpanic("mapRegs_X86RI");
364     }
365  }
366  
367  
368  /* --------- Operand, which can be reg or memory only. --------- */
369  
X86RM_Reg(HReg reg)370  X86RM* X86RM_Reg ( HReg reg ) {
371     X86RM* op       = LibVEX_Alloc(sizeof(X86RM));
372     op->tag         = Xrm_Reg;
373     op->Xrm.Reg.reg = reg;
374     return op;
375  }
X86RM_Mem(X86AMode * am)376  X86RM* X86RM_Mem ( X86AMode* am ) {
377     X86RM* op      = LibVEX_Alloc(sizeof(X86RM));
378     op->tag        = Xrm_Mem;
379     op->Xrm.Mem.am = am;
380     return op;
381  }
382  
ppX86RM(X86RM * op)383  void ppX86RM ( X86RM* op ) {
384     switch (op->tag) {
385        case Xrm_Mem:
386           ppX86AMode(op->Xrm.Mem.am);
387           return;
388        case Xrm_Reg:
389           ppHRegX86(op->Xrm.Reg.reg);
390           return;
391       default:
392           vpanic("ppX86RM");
393     }
394  }
395  
396  /* Because an X86RM can be both a source or destination operand, we
397     have to supply a mode -- pertaining to the operand as a whole --
398     indicating how it's being used. */
addRegUsage_X86RM(HRegUsage * u,X86RM * op,HRegMode mode)399  static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) {
400     switch (op->tag) {
401        case Xrm_Mem:
402           /* Memory is read, written or modified.  So we just want to
403              know the regs read by the amode. */
404           addRegUsage_X86AMode(u, op->Xrm.Mem.am);
405           return;
406        case Xrm_Reg:
407           /* reg is read, written or modified.  Add it in the
408              appropriate way. */
409           addHRegUse(u, mode, op->Xrm.Reg.reg);
410           return;
411       default:
412           vpanic("addRegUsage_X86RM");
413     }
414  }
415  
mapRegs_X86RM(HRegRemap * m,X86RM * op)416  static void mapRegs_X86RM ( HRegRemap* m, X86RM* op )
417  {
418     switch (op->tag) {
419        case Xrm_Mem:
420           mapRegs_X86AMode(m, op->Xrm.Mem.am);
421           return;
422        case Xrm_Reg:
423           op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg);
424           return;
425       default:
426           vpanic("mapRegs_X86RM");
427     }
428  }
429  
430  
431  /* --------- Instructions. --------- */
432  
showX86UnaryOp(X86UnaryOp op)433  const HChar* showX86UnaryOp ( X86UnaryOp op ) {
434     switch (op) {
435        case Xun_NOT: return "not";
436        case Xun_NEG: return "neg";
437        default: vpanic("showX86UnaryOp");
438     }
439  }
440  
showX86AluOp(X86AluOp op)441  const HChar* showX86AluOp ( X86AluOp op ) {
442     switch (op) {
443        case Xalu_MOV:  return "mov";
444        case Xalu_CMP:  return "cmp";
445        case Xalu_ADD:  return "add";
446        case Xalu_SUB:  return "sub";
447        case Xalu_ADC:  return "adc";
448        case Xalu_SBB:  return "sbb";
449        case Xalu_AND:  return "and";
450        case Xalu_OR:   return "or";
451        case Xalu_XOR:  return "xor";
452        case Xalu_MUL:  return "mul";
453        default: vpanic("showX86AluOp");
454     }
455  }
456  
showX86ShiftOp(X86ShiftOp op)457  const HChar* showX86ShiftOp ( X86ShiftOp op ) {
458     switch (op) {
459        case Xsh_SHL: return "shl";
460        case Xsh_SHR: return "shr";
461        case Xsh_SAR: return "sar";
462        default: vpanic("showX86ShiftOp");
463     }
464  }
465  
showX86FpOp(X86FpOp op)466  const HChar* showX86FpOp ( X86FpOp op ) {
467     switch (op) {
468        case Xfp_ADD:    return "add";
469        case Xfp_SUB:    return "sub";
470        case Xfp_MUL:    return "mul";
471        case Xfp_DIV:    return "div";
472        case Xfp_SCALE:  return "scale";
473        case Xfp_ATAN:   return "atan";
474        case Xfp_YL2X:   return "yl2x";
475        case Xfp_YL2XP1: return "yl2xp1";
476        case Xfp_PREM:   return "prem";
477        case Xfp_PREM1:  return "prem1";
478        case Xfp_SQRT:   return "sqrt";
479        case Xfp_ABS:    return "abs";
480        case Xfp_NEG:    return "chs";
481        case Xfp_MOV:    return "mov";
482        case Xfp_SIN:    return "sin";
483        case Xfp_COS:    return "cos";
484        case Xfp_TAN:    return "tan";
485        case Xfp_ROUND:  return "round";
486        case Xfp_2XM1:   return "2xm1";
487        default: vpanic("showX86FpOp");
488     }
489  }
490  
showX86SseOp(X86SseOp op)491  const HChar* showX86SseOp ( X86SseOp op ) {
492     switch (op) {
493        case Xsse_MOV:      return "mov(?!)";
494        case Xsse_ADDF:     return "add";
495        case Xsse_SUBF:     return "sub";
496        case Xsse_MULF:     return "mul";
497        case Xsse_DIVF:     return "div";
498        case Xsse_MAXF:     return "max";
499        case Xsse_MINF:     return "min";
500        case Xsse_CMPEQF:   return "cmpFeq";
501        case Xsse_CMPLTF:   return "cmpFlt";
502        case Xsse_CMPLEF:   return "cmpFle";
503        case Xsse_CMPUNF:   return "cmpFun";
504        case Xsse_RCPF:     return "rcp";
505        case Xsse_RSQRTF:   return "rsqrt";
506        case Xsse_SQRTF:    return "sqrt";
507        case Xsse_AND:      return "and";
508        case Xsse_OR:       return "or";
509        case Xsse_XOR:      return "xor";
510        case Xsse_ANDN:     return "andn";
511        case Xsse_ADD8:     return "paddb";
512        case Xsse_ADD16:    return "paddw";
513        case Xsse_ADD32:    return "paddd";
514        case Xsse_ADD64:    return "paddq";
515        case Xsse_QADD8U:   return "paddusb";
516        case Xsse_QADD16U:  return "paddusw";
517        case Xsse_QADD8S:   return "paddsb";
518        case Xsse_QADD16S:  return "paddsw";
519        case Xsse_SUB8:     return "psubb";
520        case Xsse_SUB16:    return "psubw";
521        case Xsse_SUB32:    return "psubd";
522        case Xsse_SUB64:    return "psubq";
523        case Xsse_QSUB8U:   return "psubusb";
524        case Xsse_QSUB16U:  return "psubusw";
525        case Xsse_QSUB8S:   return "psubsb";
526        case Xsse_QSUB16S:  return "psubsw";
527        case Xsse_MUL16:    return "pmullw";
528        case Xsse_MULHI16U: return "pmulhuw";
529        case Xsse_MULHI16S: return "pmulhw";
530        case Xsse_AVG8U:    return "pavgb";
531        case Xsse_AVG16U:   return "pavgw";
532        case Xsse_MAX16S:   return "pmaxw";
533        case Xsse_MAX8U:    return "pmaxub";
534        case Xsse_MIN16S:   return "pminw";
535        case Xsse_MIN8U:    return "pminub";
536        case Xsse_CMPEQ8:   return "pcmpeqb";
537        case Xsse_CMPEQ16:  return "pcmpeqw";
538        case Xsse_CMPEQ32:  return "pcmpeqd";
539        case Xsse_CMPGT8S:  return "pcmpgtb";
540        case Xsse_CMPGT16S: return "pcmpgtw";
541        case Xsse_CMPGT32S: return "pcmpgtd";
542        case Xsse_SHL16:    return "psllw";
543        case Xsse_SHL32:    return "pslld";
544        case Xsse_SHL64:    return "psllq";
545        case Xsse_SHR16:    return "psrlw";
546        case Xsse_SHR32:    return "psrld";
547        case Xsse_SHR64:    return "psrlq";
548        case Xsse_SAR16:    return "psraw";
549        case Xsse_SAR32:    return "psrad";
550        case Xsse_PACKSSD:  return "packssdw";
551        case Xsse_PACKSSW:  return "packsswb";
552        case Xsse_PACKUSW:  return "packuswb";
553        case Xsse_UNPCKHB:  return "punpckhb";
554        case Xsse_UNPCKHW:  return "punpckhw";
555        case Xsse_UNPCKHD:  return "punpckhd";
556        case Xsse_UNPCKHQ:  return "punpckhq";
557        case Xsse_UNPCKLB:  return "punpcklb";
558        case Xsse_UNPCKLW:  return "punpcklw";
559        case Xsse_UNPCKLD:  return "punpckld";
560        case Xsse_UNPCKLQ:  return "punpcklq";
561        default: vpanic("showX86SseOp");
562     }
563  }
564  
X86Instr_Alu32R(X86AluOp op,X86RMI * src,HReg dst)565  X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) {
566     X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
567     i->tag            = Xin_Alu32R;
568     i->Xin.Alu32R.op  = op;
569     i->Xin.Alu32R.src = src;
570     i->Xin.Alu32R.dst = dst;
571     return i;
572  }
X86Instr_Alu32M(X86AluOp op,X86RI * src,X86AMode * dst)573  X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) {
574     X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
575     i->tag            = Xin_Alu32M;
576     i->Xin.Alu32M.op  = op;
577     i->Xin.Alu32M.src = src;
578     i->Xin.Alu32M.dst = dst;
579     vassert(op != Xalu_MUL);
580     return i;
581  }
X86Instr_Sh32(X86ShiftOp op,UInt src,HReg dst)582  X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) {
583     X86Instr* i     = LibVEX_Alloc(sizeof(X86Instr));
584     i->tag          = Xin_Sh32;
585     i->Xin.Sh32.op  = op;
586     i->Xin.Sh32.src = src;
587     i->Xin.Sh32.dst = dst;
588     return i;
589  }
X86Instr_Test32(UInt imm32,X86RM * dst)590  X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) {
591     X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
592     i->tag              = Xin_Test32;
593     i->Xin.Test32.imm32 = imm32;
594     i->Xin.Test32.dst   = dst;
595     return i;
596  }
X86Instr_Unary32(X86UnaryOp op,HReg dst)597  X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) {
598     X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
599     i->tag             = Xin_Unary32;
600     i->Xin.Unary32.op  = op;
601     i->Xin.Unary32.dst = dst;
602     return i;
603  }
X86Instr_Lea32(X86AMode * am,HReg dst)604  X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) {
605     X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
606     i->tag             = Xin_Lea32;
607     i->Xin.Lea32.am    = am;
608     i->Xin.Lea32.dst   = dst;
609     return i;
610  }
X86Instr_MulL(Bool syned,X86RM * src)611  X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) {
612     X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
613     i->tag             = Xin_MulL;
614     i->Xin.MulL.syned  = syned;
615     i->Xin.MulL.src    = src;
616     return i;
617  }
X86Instr_Div(Bool syned,X86RM * src)618  X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) {
619     X86Instr* i      = LibVEX_Alloc(sizeof(X86Instr));
620     i->tag           = Xin_Div;
621     i->Xin.Div.syned = syned;
622     i->Xin.Div.src   = src;
623     return i;
624  }
X86Instr_Sh3232(X86ShiftOp op,UInt amt,HReg src,HReg dst)625  X86Instr* X86Instr_Sh3232  ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) {
626     X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
627     i->tag            = Xin_Sh3232;
628     i->Xin.Sh3232.op  = op;
629     i->Xin.Sh3232.amt = amt;
630     i->Xin.Sh3232.src = src;
631     i->Xin.Sh3232.dst = dst;
632     vassert(op == Xsh_SHL || op == Xsh_SHR);
633     return i;
634  }
X86Instr_Push(X86RMI * src)635  X86Instr* X86Instr_Push( X86RMI* src ) {
636     X86Instr* i     = LibVEX_Alloc(sizeof(X86Instr));
637     i->tag          = Xin_Push;
638     i->Xin.Push.src = src;
639     return i;
640  }
X86Instr_Call(X86CondCode cond,Addr32 target,Int regparms,RetLoc rloc)641  X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms,
642                            RetLoc rloc ) {
643     X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
644     i->tag               = Xin_Call;
645     i->Xin.Call.cond     = cond;
646     i->Xin.Call.target   = target;
647     i->Xin.Call.regparms = regparms;
648     i->Xin.Call.rloc     = rloc;
649     vassert(regparms >= 0 && regparms <= 3);
650     vassert(is_sane_RetLoc(rloc));
651     return i;
652  }
X86Instr_XDirect(Addr32 dstGA,X86AMode * amEIP,X86CondCode cond,Bool toFastEP)653  X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP,
654                               X86CondCode cond, Bool toFastEP ) {
655     X86Instr* i             = LibVEX_Alloc(sizeof(X86Instr));
656     i->tag                  = Xin_XDirect;
657     i->Xin.XDirect.dstGA    = dstGA;
658     i->Xin.XDirect.amEIP    = amEIP;
659     i->Xin.XDirect.cond     = cond;
660     i->Xin.XDirect.toFastEP = toFastEP;
661     return i;
662  }
X86Instr_XIndir(HReg dstGA,X86AMode * amEIP,X86CondCode cond)663  X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP,
664                              X86CondCode cond ) {
665     X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
666     i->tag              = Xin_XIndir;
667     i->Xin.XIndir.dstGA = dstGA;
668     i->Xin.XIndir.amEIP = amEIP;
669     i->Xin.XIndir.cond  = cond;
670     return i;
671  }
X86Instr_XAssisted(HReg dstGA,X86AMode * amEIP,X86CondCode cond,IRJumpKind jk)672  X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP,
673                                 X86CondCode cond, IRJumpKind jk ) {
674     X86Instr* i            = LibVEX_Alloc(sizeof(X86Instr));
675     i->tag                 = Xin_XAssisted;
676     i->Xin.XAssisted.dstGA = dstGA;
677     i->Xin.XAssisted.amEIP = amEIP;
678     i->Xin.XAssisted.cond  = cond;
679     i->Xin.XAssisted.jk    = jk;
680     return i;
681  }
X86Instr_CMov32(X86CondCode cond,X86RM * src,HReg dst)682  X86Instr* X86Instr_CMov32  ( X86CondCode cond, X86RM* src, HReg dst ) {
683     X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
684     i->tag             = Xin_CMov32;
685     i->Xin.CMov32.cond = cond;
686     i->Xin.CMov32.src  = src;
687     i->Xin.CMov32.dst  = dst;
688     vassert(cond != Xcc_ALWAYS);
689     return i;
690  }
X86Instr_LoadEX(UChar szSmall,Bool syned,X86AMode * src,HReg dst)691  X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned,
692                              X86AMode* src, HReg dst ) {
693     X86Instr* i           = LibVEX_Alloc(sizeof(X86Instr));
694     i->tag                = Xin_LoadEX;
695     i->Xin.LoadEX.szSmall = szSmall;
696     i->Xin.LoadEX.syned   = syned;
697     i->Xin.LoadEX.src     = src;
698     i->Xin.LoadEX.dst     = dst;
699     vassert(szSmall == 1 || szSmall == 2);
700     return i;
701  }
X86Instr_Store(UChar sz,HReg src,X86AMode * dst)702  X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) {
703     X86Instr* i      = LibVEX_Alloc(sizeof(X86Instr));
704     i->tag           = Xin_Store;
705     i->Xin.Store.sz  = sz;
706     i->Xin.Store.src = src;
707     i->Xin.Store.dst = dst;
708     vassert(sz == 1 || sz == 2);
709     return i;
710  }
X86Instr_Set32(X86CondCode cond,HReg dst)711  X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) {
712     X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
713     i->tag            = Xin_Set32;
714     i->Xin.Set32.cond = cond;
715     i->Xin.Set32.dst  = dst;
716     return i;
717  }
X86Instr_Bsfr32(Bool isFwds,HReg src,HReg dst)718  X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) {
719     X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
720     i->tag               = Xin_Bsfr32;
721     i->Xin.Bsfr32.isFwds = isFwds;
722     i->Xin.Bsfr32.src    = src;
723     i->Xin.Bsfr32.dst    = dst;
724     return i;
725  }
X86Instr_MFence(UInt hwcaps)726  X86Instr* X86Instr_MFence ( UInt hwcaps ) {
727     X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
728     i->tag               = Xin_MFence;
729     i->Xin.MFence.hwcaps = hwcaps;
730     vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_MMXEXT
731                              |VEX_HWCAPS_X86_SSE1
732                              |VEX_HWCAPS_X86_SSE2
733                              |VEX_HWCAPS_X86_SSE3
734                              |VEX_HWCAPS_X86_LZCNT)));
735     return i;
736  }
X86Instr_ACAS(X86AMode * addr,UChar sz)737  X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) {
738     X86Instr* i      = LibVEX_Alloc(sizeof(X86Instr));
739     i->tag           = Xin_ACAS;
740     i->Xin.ACAS.addr = addr;
741     i->Xin.ACAS.sz   = sz;
742     vassert(sz == 4 || sz == 2 || sz == 1);
743     return i;
744  }
X86Instr_DACAS(X86AMode * addr)745  X86Instr* X86Instr_DACAS ( X86AMode* addr ) {
746     X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
747     i->tag            = Xin_DACAS;
748     i->Xin.DACAS.addr = addr;
749     return i;
750  }
751  
X86Instr_FpUnary(X86FpOp op,HReg src,HReg dst)752  X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) {
753     X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
754     i->tag             = Xin_FpUnary;
755     i->Xin.FpUnary.op  = op;
756     i->Xin.FpUnary.src = src;
757     i->Xin.FpUnary.dst = dst;
758     return i;
759  }
X86Instr_FpBinary(X86FpOp op,HReg srcL,HReg srcR,HReg dst)760  X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) {
761     X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
762     i->tag               = Xin_FpBinary;
763     i->Xin.FpBinary.op   = op;
764     i->Xin.FpBinary.srcL = srcL;
765     i->Xin.FpBinary.srcR = srcR;
766     i->Xin.FpBinary.dst  = dst;
767     return i;
768  }
X86Instr_FpLdSt(Bool isLoad,UChar sz,HReg reg,X86AMode * addr)769  X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) {
770     X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
771     i->tag               = Xin_FpLdSt;
772     i->Xin.FpLdSt.isLoad = isLoad;
773     i->Xin.FpLdSt.sz     = sz;
774     i->Xin.FpLdSt.reg    = reg;
775     i->Xin.FpLdSt.addr   = addr;
776     vassert(sz == 4 || sz == 8 || sz == 10);
777     return i;
778  }
X86Instr_FpLdStI(Bool isLoad,UChar sz,HReg reg,X86AMode * addr)779  X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz,
780                               HReg reg, X86AMode* addr ) {
781     X86Instr* i           = LibVEX_Alloc(sizeof(X86Instr));
782     i->tag                = Xin_FpLdStI;
783     i->Xin.FpLdStI.isLoad = isLoad;
784     i->Xin.FpLdStI.sz     = sz;
785     i->Xin.FpLdStI.reg    = reg;
786     i->Xin.FpLdStI.addr   = addr;
787     vassert(sz == 2 || sz == 4 || sz == 8);
788     return i;
789  }
X86Instr_Fp64to32(HReg src,HReg dst)790  X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) {
791     X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
792     i->tag              = Xin_Fp64to32;
793     i->Xin.Fp64to32.src = src;
794     i->Xin.Fp64to32.dst = dst;
795     return i;
796  }
X86Instr_FpCMov(X86CondCode cond,HReg src,HReg dst)797  X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) {
798     X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
799     i->tag             = Xin_FpCMov;
800     i->Xin.FpCMov.cond = cond;
801     i->Xin.FpCMov.src  = src;
802     i->Xin.FpCMov.dst  = dst;
803     vassert(cond != Xcc_ALWAYS);
804     return i;
805  }
X86Instr_FpLdCW(X86AMode * addr)806  X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) {
807     X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
808     i->tag               = Xin_FpLdCW;
809     i->Xin.FpLdCW.addr   = addr;
810     return i;
811  }
X86Instr_FpStSW_AX(void)812  X86Instr* X86Instr_FpStSW_AX ( void ) {
813     X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
814     i->tag      = Xin_FpStSW_AX;
815     return i;
816  }
X86Instr_FpCmp(HReg srcL,HReg srcR,HReg dst)817  X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) {
818     X86Instr* i       = LibVEX_Alloc(sizeof(X86Instr));
819     i->tag            = Xin_FpCmp;
820     i->Xin.FpCmp.srcL = srcL;
821     i->Xin.FpCmp.srcR = srcR;
822     i->Xin.FpCmp.dst  = dst;
823     return i;
824  }
X86Instr_SseConst(UShort con,HReg dst)825  X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) {
826     X86Instr* i            = LibVEX_Alloc(sizeof(X86Instr));
827     i->tag                 = Xin_SseConst;
828     i->Xin.SseConst.con    = con;
829     i->Xin.SseConst.dst    = dst;
830     vassert(hregClass(dst) == HRcVec128);
831     return i;
832  }
X86Instr_SseLdSt(Bool isLoad,HReg reg,X86AMode * addr)833  X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) {
834     X86Instr* i           = LibVEX_Alloc(sizeof(X86Instr));
835     i->tag                = Xin_SseLdSt;
836     i->Xin.SseLdSt.isLoad = isLoad;
837     i->Xin.SseLdSt.reg    = reg;
838     i->Xin.SseLdSt.addr   = addr;
839     return i;
840  }
X86Instr_SseLdzLO(Int sz,HReg reg,X86AMode * addr)841  X86Instr* X86Instr_SseLdzLO  ( Int sz, HReg reg, X86AMode* addr )
842  {
843     X86Instr* i           = LibVEX_Alloc(sizeof(X86Instr));
844     i->tag                = Xin_SseLdzLO;
845     i->Xin.SseLdzLO.sz    = toUChar(sz);
846     i->Xin.SseLdzLO.reg   = reg;
847     i->Xin.SseLdzLO.addr  = addr;
848     vassert(sz == 4 || sz == 8);
849     return i;
850  }
X86Instr_Sse32Fx4(X86SseOp op,HReg src,HReg dst)851  X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) {
852     X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
853     i->tag              = Xin_Sse32Fx4;
854     i->Xin.Sse32Fx4.op  = op;
855     i->Xin.Sse32Fx4.src = src;
856     i->Xin.Sse32Fx4.dst = dst;
857     vassert(op != Xsse_MOV);
858     return i;
859  }
X86Instr_Sse32FLo(X86SseOp op,HReg src,HReg dst)860  X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) {
861     X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
862     i->tag              = Xin_Sse32FLo;
863     i->Xin.Sse32FLo.op  = op;
864     i->Xin.Sse32FLo.src = src;
865     i->Xin.Sse32FLo.dst = dst;
866     vassert(op != Xsse_MOV);
867     return i;
868  }
X86Instr_Sse64Fx2(X86SseOp op,HReg src,HReg dst)869  X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) {
870     X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
871     i->tag              = Xin_Sse64Fx2;
872     i->Xin.Sse64Fx2.op  = op;
873     i->Xin.Sse64Fx2.src = src;
874     i->Xin.Sse64Fx2.dst = dst;
875     vassert(op != Xsse_MOV);
876     return i;
877  }
X86Instr_Sse64FLo(X86SseOp op,HReg src,HReg dst)878  X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) {
879     X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
880     i->tag              = Xin_Sse64FLo;
881     i->Xin.Sse64FLo.op  = op;
882     i->Xin.Sse64FLo.src = src;
883     i->Xin.Sse64FLo.dst = dst;
884     vassert(op != Xsse_MOV);
885     return i;
886  }
X86Instr_SseReRg(X86SseOp op,HReg re,HReg rg)887  X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) {
888     X86Instr* i        = LibVEX_Alloc(sizeof(X86Instr));
889     i->tag             = Xin_SseReRg;
890     i->Xin.SseReRg.op  = op;
891     i->Xin.SseReRg.src = re;
892     i->Xin.SseReRg.dst = rg;
893     return i;
894  }
X86Instr_SseCMov(X86CondCode cond,HReg src,HReg dst)895  X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) {
896     X86Instr* i         = LibVEX_Alloc(sizeof(X86Instr));
897     i->tag              = Xin_SseCMov;
898     i->Xin.SseCMov.cond = cond;
899     i->Xin.SseCMov.src  = src;
900     i->Xin.SseCMov.dst  = dst;
901     vassert(cond != Xcc_ALWAYS);
902     return i;
903  }
X86Instr_SseShuf(Int order,HReg src,HReg dst)904  X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) {
905     X86Instr* i          = LibVEX_Alloc(sizeof(X86Instr));
906     i->tag               = Xin_SseShuf;
907     i->Xin.SseShuf.order = order;
908     i->Xin.SseShuf.src   = src;
909     i->Xin.SseShuf.dst   = dst;
910     vassert(order >= 0 && order <= 0xFF);
911     return i;
912  }
X86Instr_EvCheck(X86AMode * amCounter,X86AMode * amFailAddr)913  X86Instr* X86Instr_EvCheck ( X86AMode* amCounter,
914                               X86AMode* amFailAddr ) {
915     X86Instr* i               = LibVEX_Alloc(sizeof(X86Instr));
916     i->tag                    = Xin_EvCheck;
917     i->Xin.EvCheck.amCounter  = amCounter;
918     i->Xin.EvCheck.amFailAddr = amFailAddr;
919     return i;
920  }
X86Instr_ProfInc(void)921  X86Instr* X86Instr_ProfInc ( void ) {
922     X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
923     i->tag      = Xin_ProfInc;
924     return i;
925  }
926  
ppX86Instr(X86Instr * i,Bool mode64)927  void ppX86Instr ( X86Instr* i, Bool mode64 ) {
928     vassert(mode64 == False);
929     switch (i->tag) {
930        case Xin_Alu32R:
931           vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op));
932           ppX86RMI(i->Xin.Alu32R.src);
933           vex_printf(",");
934           ppHRegX86(i->Xin.Alu32R.dst);
935           return;
936        case Xin_Alu32M:
937           vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op));
938           ppX86RI(i->Xin.Alu32M.src);
939           vex_printf(",");
940           ppX86AMode(i->Xin.Alu32M.dst);
941           return;
942        case Xin_Sh32:
943           vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op));
944           if (i->Xin.Sh32.src == 0)
945             vex_printf("%%cl,");
946           else
947              vex_printf("$%d,", (Int)i->Xin.Sh32.src);
948           ppHRegX86(i->Xin.Sh32.dst);
949           return;
950        case Xin_Test32:
951           vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32);
952           ppX86RM(i->Xin.Test32.dst);
953           return;
954        case Xin_Unary32:
955           vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op));
956           ppHRegX86(i->Xin.Unary32.dst);
957           return;
958        case Xin_Lea32:
959           vex_printf("leal ");
960           ppX86AMode(i->Xin.Lea32.am);
961           vex_printf(",");
962           ppHRegX86(i->Xin.Lea32.dst);
963           return;
964        case Xin_MulL:
965           vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u');
966           ppX86RM(i->Xin.MulL.src);
967           return;
968        case Xin_Div:
969           vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u');
970           ppX86RM(i->Xin.Div.src);
971           return;
972        case Xin_Sh3232:
973           vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op));
974           if (i->Xin.Sh3232.amt == 0)
975             vex_printf(" %%cl,");
976           else
977              vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt);
978           ppHRegX86(i->Xin.Sh3232.src);
979           vex_printf(",");
980           ppHRegX86(i->Xin.Sh3232.dst);
981           return;
982        case Xin_Push:
983           vex_printf("pushl ");
984           ppX86RMI(i->Xin.Push.src);
985           return;
986        case Xin_Call:
987           vex_printf("call%s[%d,",
988                      i->Xin.Call.cond==Xcc_ALWAYS
989                         ? "" : showX86CondCode(i->Xin.Call.cond),
990                      i->Xin.Call.regparms);
991           ppRetLoc(i->Xin.Call.rloc);
992           vex_printf("] 0x%x", i->Xin.Call.target);
993           break;
994        case Xin_XDirect:
995           vex_printf("(xDirect) ");
996           vex_printf("if (%%eflags.%s) { ",
997                      showX86CondCode(i->Xin.XDirect.cond));
998           vex_printf("movl $0x%x,", i->Xin.XDirect.dstGA);
999           ppX86AMode(i->Xin.XDirect.amEIP);
1000           vex_printf("; ");
1001           vex_printf("movl $disp_cp_chain_me_to_%sEP,%%edx; call *%%edx }",
1002                      i->Xin.XDirect.toFastEP ? "fast" : "slow");
1003           return;
1004        case Xin_XIndir:
1005           vex_printf("(xIndir) ");
1006           vex_printf("if (%%eflags.%s) { movl ",
1007                      showX86CondCode(i->Xin.XIndir.cond));
1008           ppHRegX86(i->Xin.XIndir.dstGA);
1009           vex_printf(",");
1010           ppX86AMode(i->Xin.XIndir.amEIP);
1011           vex_printf("; movl $disp_indir,%%edx; jmp *%%edx }");
1012           return;
1013        case Xin_XAssisted:
1014           vex_printf("(xAssisted) ");
1015           vex_printf("if (%%eflags.%s) { ",
1016                      showX86CondCode(i->Xin.XAssisted.cond));
1017           vex_printf("movl ");
1018           ppHRegX86(i->Xin.XAssisted.dstGA);
1019           vex_printf(",");
1020           ppX86AMode(i->Xin.XAssisted.amEIP);
1021           vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%ebp",
1022                      (Int)i->Xin.XAssisted.jk);
1023           vex_printf("; movl $disp_assisted,%%edx; jmp *%%edx }");
1024           return;
1025        case Xin_CMov32:
1026           vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond));
1027           ppX86RM(i->Xin.CMov32.src);
1028           vex_printf(",");
1029           ppHRegX86(i->Xin.CMov32.dst);
1030           return;
1031        case Xin_LoadEX:
1032           vex_printf("mov%c%cl ",
1033                      i->Xin.LoadEX.syned ? 's' : 'z',
1034                      i->Xin.LoadEX.szSmall==1 ? 'b' : 'w');
1035           ppX86AMode(i->Xin.LoadEX.src);
1036           vex_printf(",");
1037           ppHRegX86(i->Xin.LoadEX.dst);
1038           return;
1039        case Xin_Store:
1040           vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w');
1041           ppHRegX86(i->Xin.Store.src);
1042           vex_printf(",");
1043           ppX86AMode(i->Xin.Store.dst);
1044           return;
1045        case Xin_Set32:
1046           vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond));
1047           ppHRegX86(i->Xin.Set32.dst);
1048           return;
1049        case Xin_Bsfr32:
1050           vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r');
1051           ppHRegX86(i->Xin.Bsfr32.src);
1052           vex_printf(",");
1053           ppHRegX86(i->Xin.Bsfr32.dst);
1054           return;
1055        case Xin_MFence:
1056           vex_printf("mfence(%s)",
1057                      LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps));
1058           return;
1059        case Xin_ACAS:
1060           vex_printf("lock cmpxchg%c ",
1061                       i->Xin.ACAS.sz==1 ? 'b'
1062                                         : i->Xin.ACAS.sz==2 ? 'w' : 'l');
1063           vex_printf("{%%eax->%%ebx},");
1064           ppX86AMode(i->Xin.ACAS.addr);
1065           return;
1066        case Xin_DACAS:
1067           vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},");
1068           ppX86AMode(i->Xin.DACAS.addr);
1069           return;
1070        case Xin_FpUnary:
1071           vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op));
1072           ppHRegX86(i->Xin.FpUnary.src);
1073           vex_printf(",");
1074           ppHRegX86(i->Xin.FpUnary.dst);
1075           break;
1076        case Xin_FpBinary:
1077           vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op));
1078           ppHRegX86(i->Xin.FpBinary.srcL);
1079           vex_printf(",");
1080           ppHRegX86(i->Xin.FpBinary.srcR);
1081           vex_printf(",");
1082           ppHRegX86(i->Xin.FpBinary.dst);
1083           break;
1084        case Xin_FpLdSt:
1085           if (i->Xin.FpLdSt.isLoad) {
1086              vex_printf("gld%c " ,  i->Xin.FpLdSt.sz==10 ? 'T'
1087                                     : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
1088              ppX86AMode(i->Xin.FpLdSt.addr);
1089              vex_printf(", ");
1090              ppHRegX86(i->Xin.FpLdSt.reg);
1091           } else {
1092              vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T'
1093                                    : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
1094              ppHRegX86(i->Xin.FpLdSt.reg);
1095              vex_printf(", ");
1096              ppX86AMode(i->Xin.FpLdSt.addr);
1097           }
1098           return;
1099        case Xin_FpLdStI:
1100           if (i->Xin.FpLdStI.isLoad) {
1101              vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
1102                                    i->Xin.FpLdStI.sz==4 ? "l" : "w");
1103              ppX86AMode(i->Xin.FpLdStI.addr);
1104              vex_printf(", ");
1105              ppHRegX86(i->Xin.FpLdStI.reg);
1106           } else {
1107              vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
1108                                    i->Xin.FpLdStI.sz==4 ? "l" : "w");
1109              ppHRegX86(i->Xin.FpLdStI.reg);
1110              vex_printf(", ");
1111              ppX86AMode(i->Xin.FpLdStI.addr);
1112           }
1113           return;
1114        case Xin_Fp64to32:
1115           vex_printf("gdtof ");
1116           ppHRegX86(i->Xin.Fp64to32.src);
1117           vex_printf(",");
1118           ppHRegX86(i->Xin.Fp64to32.dst);
1119           return;
1120        case Xin_FpCMov:
1121           vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond));
1122           ppHRegX86(i->Xin.FpCMov.src);
1123           vex_printf(",");
1124           ppHRegX86(i->Xin.FpCMov.dst);
1125           return;
1126        case Xin_FpLdCW:
1127           vex_printf("fldcw ");
1128           ppX86AMode(i->Xin.FpLdCW.addr);
1129           return;
1130        case Xin_FpStSW_AX:
1131           vex_printf("fstsw %%ax");
1132           return;
1133        case Xin_FpCmp:
1134           vex_printf("gcmp ");
1135           ppHRegX86(i->Xin.FpCmp.srcL);
1136           vex_printf(",");
1137           ppHRegX86(i->Xin.FpCmp.srcR);
1138           vex_printf(",");
1139           ppHRegX86(i->Xin.FpCmp.dst);
1140           break;
1141        case Xin_SseConst:
1142           vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
1143           ppHRegX86(i->Xin.SseConst.dst);
1144           break;
1145        case Xin_SseLdSt:
1146           vex_printf("movups ");
1147           if (i->Xin.SseLdSt.isLoad) {
1148              ppX86AMode(i->Xin.SseLdSt.addr);
1149              vex_printf(",");
1150              ppHRegX86(i->Xin.SseLdSt.reg);
1151           } else {
1152              ppHRegX86(i->Xin.SseLdSt.reg);
1153              vex_printf(",");
1154              ppX86AMode(i->Xin.SseLdSt.addr);
1155           }
1156           return;
1157        case Xin_SseLdzLO:
1158           vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d");
1159           ppX86AMode(i->Xin.SseLdzLO.addr);
1160           vex_printf(",");
1161           ppHRegX86(i->Xin.SseLdzLO.reg);
1162           return;
1163        case Xin_Sse32Fx4:
1164           vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op));
1165           ppHRegX86(i->Xin.Sse32Fx4.src);
1166           vex_printf(",");
1167           ppHRegX86(i->Xin.Sse32Fx4.dst);
1168           return;
1169        case Xin_Sse32FLo:
1170           vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op));
1171           ppHRegX86(i->Xin.Sse32FLo.src);
1172           vex_printf(",");
1173           ppHRegX86(i->Xin.Sse32FLo.dst);
1174           return;
1175        case Xin_Sse64Fx2:
1176           vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op));
1177           ppHRegX86(i->Xin.Sse64Fx2.src);
1178           vex_printf(",");
1179           ppHRegX86(i->Xin.Sse64Fx2.dst);
1180           return;
1181        case Xin_Sse64FLo:
1182           vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op));
1183           ppHRegX86(i->Xin.Sse64FLo.src);
1184           vex_printf(",");
1185           ppHRegX86(i->Xin.Sse64FLo.dst);
1186           return;
1187        case Xin_SseReRg:
1188           vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op));
1189           ppHRegX86(i->Xin.SseReRg.src);
1190           vex_printf(",");
1191           ppHRegX86(i->Xin.SseReRg.dst);
1192           return;
1193        case Xin_SseCMov:
1194           vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond));
1195           ppHRegX86(i->Xin.SseCMov.src);
1196           vex_printf(",");
1197           ppHRegX86(i->Xin.SseCMov.dst);
1198           return;
1199        case Xin_SseShuf:
1200           vex_printf("pshufd $0x%x,", i->Xin.SseShuf.order);
1201           ppHRegX86(i->Xin.SseShuf.src);
1202           vex_printf(",");
1203           ppHRegX86(i->Xin.SseShuf.dst);
1204           return;
1205        case Xin_EvCheck:
1206           vex_printf("(evCheck) decl ");
1207           ppX86AMode(i->Xin.EvCheck.amCounter);
1208           vex_printf("; jns nofail; jmp *");
1209           ppX86AMode(i->Xin.EvCheck.amFailAddr);
1210           vex_printf("; nofail:");
1211           return;
1212        case Xin_ProfInc:
1213           vex_printf("(profInc) addl $1,NotKnownYet; "
1214                      "adcl $0,NotKnownYet+4");
1215           return;
1216        default:
1217           vpanic("ppX86Instr");
1218     }
1219  }
1220  
1221  /* --------- Helpers for register allocation. --------- */
1222  
getRegUsage_X86Instr(HRegUsage * u,X86Instr * i,Bool mode64)1223  void getRegUsage_X86Instr (HRegUsage* u, X86Instr* i, Bool mode64)
1224  {
1225     Bool unary;
1226     vassert(mode64 == False);
1227     initHRegUsage(u);
1228     switch (i->tag) {
1229        case Xin_Alu32R:
1230           addRegUsage_X86RMI(u, i->Xin.Alu32R.src);
1231           if (i->Xin.Alu32R.op == Xalu_MOV) {
1232              addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst);
1233              return;
1234           }
1235           if (i->Xin.Alu32R.op == Xalu_CMP) {
1236              addHRegUse(u, HRmRead, i->Xin.Alu32R.dst);
1237              return;
1238           }
1239           addHRegUse(u, HRmModify, i->Xin.Alu32R.dst);
1240           return;
1241        case Xin_Alu32M:
1242           addRegUsage_X86RI(u, i->Xin.Alu32M.src);
1243           addRegUsage_X86AMode(u, i->Xin.Alu32M.dst);
1244           return;
1245        case Xin_Sh32:
1246           addHRegUse(u, HRmModify, i->Xin.Sh32.dst);
1247           if (i->Xin.Sh32.src == 0)
1248              addHRegUse(u, HRmRead, hregX86_ECX());
1249           return;
1250        case Xin_Test32:
1251           addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead);
1252           return;
1253        case Xin_Unary32:
1254           addHRegUse(u, HRmModify, i->Xin.Unary32.dst);
1255           return;
1256        case Xin_Lea32:
1257           addRegUsage_X86AMode(u, i->Xin.Lea32.am);
1258           addHRegUse(u, HRmWrite, i->Xin.Lea32.dst);
1259           return;
1260        case Xin_MulL:
1261           addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead);
1262           addHRegUse(u, HRmModify, hregX86_EAX());
1263           addHRegUse(u, HRmWrite, hregX86_EDX());
1264           return;
1265        case Xin_Div:
1266           addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead);
1267           addHRegUse(u, HRmModify, hregX86_EAX());
1268           addHRegUse(u, HRmModify, hregX86_EDX());
1269           return;
1270        case Xin_Sh3232:
1271           addHRegUse(u, HRmRead, i->Xin.Sh3232.src);
1272           addHRegUse(u, HRmModify, i->Xin.Sh3232.dst);
1273           if (i->Xin.Sh3232.amt == 0)
1274              addHRegUse(u, HRmRead, hregX86_ECX());
1275           return;
1276        case Xin_Push:
1277           addRegUsage_X86RMI(u, i->Xin.Push.src);
1278           addHRegUse(u, HRmModify, hregX86_ESP());
1279           return;
1280        case Xin_Call:
1281           /* This is a bit subtle. */
1282           /* First off, claim it trashes all the caller-saved regs
1283              which fall within the register allocator's jurisdiction.
1284              These I believe to be %eax %ecx %edx and all the xmm
1285              registers. */
1286           addHRegUse(u, HRmWrite, hregX86_EAX());
1287           addHRegUse(u, HRmWrite, hregX86_ECX());
1288           addHRegUse(u, HRmWrite, hregX86_EDX());
1289           addHRegUse(u, HRmWrite, hregX86_XMM0());
1290           addHRegUse(u, HRmWrite, hregX86_XMM1());
1291           addHRegUse(u, HRmWrite, hregX86_XMM2());
1292           addHRegUse(u, HRmWrite, hregX86_XMM3());
1293           addHRegUse(u, HRmWrite, hregX86_XMM4());
1294           addHRegUse(u, HRmWrite, hregX86_XMM5());
1295           addHRegUse(u, HRmWrite, hregX86_XMM6());
1296           addHRegUse(u, HRmWrite, hregX86_XMM7());
1297           /* Now we have to state any parameter-carrying registers
1298              which might be read.  This depends on the regparmness. */
1299           switch (i->Xin.Call.regparms) {
1300              case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/
1301              case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/
1302              case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break;
1303              case 0: break;
1304              default: vpanic("getRegUsage_X86Instr:Call:regparms");
1305           }
1306           /* Finally, there is the issue that the insn trashes a
1307              register because the literal target address has to be
1308              loaded into a register.  Fortunately, for the 0/1/2
1309              regparm case, we can use EAX, EDX and ECX respectively, so
1310              this does not cause any further damage.  For the 3-regparm
1311              case, we'll have to choose another register arbitrarily --
1312              since A, D and C are used for parameters -- and so we might
1313              as well choose EDI. */
1314           if (i->Xin.Call.regparms == 3)
1315              addHRegUse(u, HRmWrite, hregX86_EDI());
1316           /* Upshot of this is that the assembler really must observe
1317              the here-stated convention of which register to use as an
1318              address temporary, depending on the regparmness: 0==EAX,
1319              1==EDX, 2==ECX, 3==EDI. */
1320           return;
1321        /* XDirect/XIndir/XAssisted are also a bit subtle.  They
1322           conditionally exit the block.  Hence we only need to list (1)
1323           the registers that they read, and (2) the registers that they
1324           write in the case where the block is not exited.  (2) is
1325           empty, hence only (1) is relevant here. */
1326        case Xin_XDirect:
1327           addRegUsage_X86AMode(u, i->Xin.XDirect.amEIP);
1328           return;
1329        case Xin_XIndir:
1330           addHRegUse(u, HRmRead, i->Xin.XIndir.dstGA);
1331           addRegUsage_X86AMode(u, i->Xin.XIndir.amEIP);
1332           return;
1333        case Xin_XAssisted:
1334           addHRegUse(u, HRmRead, i->Xin.XAssisted.dstGA);
1335           addRegUsage_X86AMode(u, i->Xin.XAssisted.amEIP);
1336           return;
1337        case Xin_CMov32:
1338           addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead);
1339           addHRegUse(u, HRmModify, i->Xin.CMov32.dst);
1340           return;
1341        case Xin_LoadEX:
1342           addRegUsage_X86AMode(u, i->Xin.LoadEX.src);
1343           addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst);
1344           return;
1345        case Xin_Store:
1346           addHRegUse(u, HRmRead, i->Xin.Store.src);
1347           addRegUsage_X86AMode(u, i->Xin.Store.dst);
1348           return;
1349        case Xin_Set32:
1350           addHRegUse(u, HRmWrite, i->Xin.Set32.dst);
1351           return;
1352        case Xin_Bsfr32:
1353           addHRegUse(u, HRmRead, i->Xin.Bsfr32.src);
1354           addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst);
1355           return;
1356        case Xin_MFence:
1357           return;
1358        case Xin_ACAS:
1359           addRegUsage_X86AMode(u, i->Xin.ACAS.addr);
1360           addHRegUse(u, HRmRead, hregX86_EBX());
1361           addHRegUse(u, HRmModify, hregX86_EAX());
1362           return;
1363        case Xin_DACAS:
1364           addRegUsage_X86AMode(u, i->Xin.DACAS.addr);
1365           addHRegUse(u, HRmRead, hregX86_ECX());
1366           addHRegUse(u, HRmRead, hregX86_EBX());
1367           addHRegUse(u, HRmModify, hregX86_EDX());
1368           addHRegUse(u, HRmModify, hregX86_EAX());
1369           return;
1370        case Xin_FpUnary:
1371           addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
1372           addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
1373           return;
1374        case Xin_FpBinary:
1375           addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
1376           addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR);
1377           addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst);
1378           return;
1379        case Xin_FpLdSt:
1380           addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr);
1381           addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead,
1382                         i->Xin.FpLdSt.reg);
1383           return;
1384        case Xin_FpLdStI:
1385           addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr);
1386           addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead,
1387                         i->Xin.FpLdStI.reg);
1388           return;
1389        case Xin_Fp64to32:
1390           addHRegUse(u, HRmRead,  i->Xin.Fp64to32.src);
1391           addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst);
1392           return;
1393        case Xin_FpCMov:
1394           addHRegUse(u, HRmRead,   i->Xin.FpCMov.src);
1395           addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
1396           return;
1397        case Xin_FpLdCW:
1398           addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr);
1399           return;
1400        case Xin_FpStSW_AX:
1401           addHRegUse(u, HRmWrite, hregX86_EAX());
1402           return;
1403        case Xin_FpCmp:
1404           addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL);
1405           addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR);
1406           addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst);
1407           addHRegUse(u, HRmWrite, hregX86_EAX());
1408           return;
1409        case Xin_SseLdSt:
1410           addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr);
1411           addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead,
1412                         i->Xin.SseLdSt.reg);
1413           return;
1414        case Xin_SseLdzLO:
1415           addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr);
1416           addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg);
1417           return;
1418        case Xin_SseConst:
1419           addHRegUse(u, HRmWrite, i->Xin.SseConst.dst);
1420           return;
1421        case Xin_Sse32Fx4:
1422           vassert(i->Xin.Sse32Fx4.op != Xsse_MOV);
1423           unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF
1424                           || i->Xin.Sse32Fx4.op == Xsse_RSQRTF
1425                           || i->Xin.Sse32Fx4.op == Xsse_SQRTF );
1426           addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src);
1427           addHRegUse(u, unary ? HRmWrite : HRmModify,
1428                         i->Xin.Sse32Fx4.dst);
1429           return;
1430        case Xin_Sse32FLo:
1431           vassert(i->Xin.Sse32FLo.op != Xsse_MOV);
1432           unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF
1433                           || i->Xin.Sse32FLo.op == Xsse_RSQRTF
1434                           || i->Xin.Sse32FLo.op == Xsse_SQRTF );
1435           addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src);
1436           addHRegUse(u, unary ? HRmWrite : HRmModify,
1437                         i->Xin.Sse32FLo.dst);
1438           return;
1439        case Xin_Sse64Fx2:
1440           vassert(i->Xin.Sse64Fx2.op != Xsse_MOV);
1441           unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF
1442                           || i->Xin.Sse64Fx2.op == Xsse_RSQRTF
1443                           || i->Xin.Sse64Fx2.op == Xsse_SQRTF );
1444           addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src);
1445           addHRegUse(u, unary ? HRmWrite : HRmModify,
1446                         i->Xin.Sse64Fx2.dst);
1447           return;
1448        case Xin_Sse64FLo:
1449           vassert(i->Xin.Sse64FLo.op != Xsse_MOV);
1450           unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF
1451                           || i->Xin.Sse64FLo.op == Xsse_RSQRTF
1452                           || i->Xin.Sse64FLo.op == Xsse_SQRTF );
1453           addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src);
1454           addHRegUse(u, unary ? HRmWrite : HRmModify,
1455                         i->Xin.Sse64FLo.dst);
1456           return;
1457        case Xin_SseReRg:
1458           if (i->Xin.SseReRg.op == Xsse_XOR
1459               && sameHReg(i->Xin.SseReRg.src, i->Xin.SseReRg.dst)) {
1460              /* reg-alloc needs to understand 'xor r,r' as a write of r */
1461              /* (as opposed to a rite of passage :-) */
1462              addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst);
1463           } else {
1464              addHRegUse(u, HRmRead, i->Xin.SseReRg.src);
1465              addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV
1466                               ? HRmWrite : HRmModify,
1467                            i->Xin.SseReRg.dst);
1468           }
1469           return;
1470        case Xin_SseCMov:
1471           addHRegUse(u, HRmRead,   i->Xin.SseCMov.src);
1472           addHRegUse(u, HRmModify, i->Xin.SseCMov.dst);
1473           return;
1474        case Xin_SseShuf:
1475           addHRegUse(u, HRmRead,  i->Xin.SseShuf.src);
1476           addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst);
1477           return;
1478        case Xin_EvCheck:
1479           /* We expect both amodes only to mention %ebp, so this is in
1480              fact pointless, since %ebp isn't allocatable, but anyway.. */
1481           addRegUsage_X86AMode(u, i->Xin.EvCheck.amCounter);
1482           addRegUsage_X86AMode(u, i->Xin.EvCheck.amFailAddr);
1483           return;
1484        case Xin_ProfInc:
1485           /* does not use any registers. */
1486           return;
1487        default:
1488           ppX86Instr(i, False);
1489           vpanic("getRegUsage_X86Instr");
1490     }
1491  }
1492  
1493  /* local helper */
mapReg(HRegRemap * m,HReg * r)1494  static void mapReg( HRegRemap* m, HReg* r )
1495  {
1496     *r = lookupHRegRemap(m, *r);
1497  }
1498  
mapRegs_X86Instr(HRegRemap * m,X86Instr * i,Bool mode64)1499  void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 )
1500  {
1501     vassert(mode64 == False);
1502     switch (i->tag) {
1503        case Xin_Alu32R:
1504           mapRegs_X86RMI(m, i->Xin.Alu32R.src);
1505           mapReg(m, &i->Xin.Alu32R.dst);
1506           return;
1507        case Xin_Alu32M:
1508           mapRegs_X86RI(m, i->Xin.Alu32M.src);
1509           mapRegs_X86AMode(m, i->Xin.Alu32M.dst);
1510           return;
1511        case Xin_Sh32:
1512           mapReg(m, &i->Xin.Sh32.dst);
1513           return;
1514        case Xin_Test32:
1515           mapRegs_X86RM(m, i->Xin.Test32.dst);
1516           return;
1517        case Xin_Unary32:
1518           mapReg(m, &i->Xin.Unary32.dst);
1519           return;
1520        case Xin_Lea32:
1521           mapRegs_X86AMode(m, i->Xin.Lea32.am);
1522           mapReg(m, &i->Xin.Lea32.dst);
1523           return;
1524        case Xin_MulL:
1525           mapRegs_X86RM(m, i->Xin.MulL.src);
1526           return;
1527        case Xin_Div:
1528           mapRegs_X86RM(m, i->Xin.Div.src);
1529           return;
1530        case Xin_Sh3232:
1531           mapReg(m, &i->Xin.Sh3232.src);
1532           mapReg(m, &i->Xin.Sh3232.dst);
1533           return;
1534        case Xin_Push:
1535           mapRegs_X86RMI(m, i->Xin.Push.src);
1536           return;
1537        case Xin_Call:
1538           return;
1539        case Xin_XDirect:
1540           mapRegs_X86AMode(m, i->Xin.XDirect.amEIP);
1541           return;
1542        case Xin_XIndir:
1543           mapReg(m, &i->Xin.XIndir.dstGA);
1544           mapRegs_X86AMode(m, i->Xin.XIndir.amEIP);
1545           return;
1546        case Xin_XAssisted:
1547           mapReg(m, &i->Xin.XAssisted.dstGA);
1548           mapRegs_X86AMode(m, i->Xin.XAssisted.amEIP);
1549           return;
1550        case Xin_CMov32:
1551           mapRegs_X86RM(m, i->Xin.CMov32.src);
1552           mapReg(m, &i->Xin.CMov32.dst);
1553           return;
1554        case Xin_LoadEX:
1555           mapRegs_X86AMode(m, i->Xin.LoadEX.src);
1556           mapReg(m, &i->Xin.LoadEX.dst);
1557           return;
1558        case Xin_Store:
1559           mapReg(m, &i->Xin.Store.src);
1560           mapRegs_X86AMode(m, i->Xin.Store.dst);
1561           return;
1562        case Xin_Set32:
1563           mapReg(m, &i->Xin.Set32.dst);
1564           return;
1565        case Xin_Bsfr32:
1566           mapReg(m, &i->Xin.Bsfr32.src);
1567           mapReg(m, &i->Xin.Bsfr32.dst);
1568           return;
1569        case Xin_MFence:
1570           return;
1571        case Xin_ACAS:
1572           mapRegs_X86AMode(m, i->Xin.ACAS.addr);
1573           return;
1574        case Xin_DACAS:
1575           mapRegs_X86AMode(m, i->Xin.DACAS.addr);
1576           return;
1577        case Xin_FpUnary:
1578           mapReg(m, &i->Xin.FpUnary.src);
1579           mapReg(m, &i->Xin.FpUnary.dst);
1580           return;
1581        case Xin_FpBinary:
1582           mapReg(m, &i->Xin.FpBinary.srcL);
1583           mapReg(m, &i->Xin.FpBinary.srcR);
1584           mapReg(m, &i->Xin.FpBinary.dst);
1585           return;
1586        case Xin_FpLdSt:
1587           mapRegs_X86AMode(m, i->Xin.FpLdSt.addr);
1588           mapReg(m, &i->Xin.FpLdSt.reg);
1589           return;
1590        case Xin_FpLdStI:
1591           mapRegs_X86AMode(m, i->Xin.FpLdStI.addr);
1592           mapReg(m, &i->Xin.FpLdStI.reg);
1593           return;
1594        case Xin_Fp64to32:
1595           mapReg(m, &i->Xin.Fp64to32.src);
1596           mapReg(m, &i->Xin.Fp64to32.dst);
1597           return;
1598        case Xin_FpCMov:
1599           mapReg(m, &i->Xin.FpCMov.src);
1600           mapReg(m, &i->Xin.FpCMov.dst);
1601           return;
1602        case Xin_FpLdCW:
1603           mapRegs_X86AMode(m, i->Xin.FpLdCW.addr);
1604           return;
1605        case Xin_FpStSW_AX:
1606           return;
1607        case Xin_FpCmp:
1608           mapReg(m, &i->Xin.FpCmp.srcL);
1609           mapReg(m, &i->Xin.FpCmp.srcR);
1610           mapReg(m, &i->Xin.FpCmp.dst);
1611           return;
1612        case Xin_SseConst:
1613           mapReg(m, &i->Xin.SseConst.dst);
1614           return;
1615        case Xin_SseLdSt:
1616           mapReg(m, &i->Xin.SseLdSt.reg);
1617           mapRegs_X86AMode(m, i->Xin.SseLdSt.addr);
1618           break;
1619        case Xin_SseLdzLO:
1620           mapReg(m, &i->Xin.SseLdzLO.reg);
1621           mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr);
1622           break;
1623        case Xin_Sse32Fx4:
1624           mapReg(m, &i->Xin.Sse32Fx4.src);
1625           mapReg(m, &i->Xin.Sse32Fx4.dst);
1626           return;
1627        case Xin_Sse32FLo:
1628           mapReg(m, &i->Xin.Sse32FLo.src);
1629           mapReg(m, &i->Xin.Sse32FLo.dst);
1630           return;
1631        case Xin_Sse64Fx2:
1632           mapReg(m, &i->Xin.Sse64Fx2.src);
1633           mapReg(m, &i->Xin.Sse64Fx2.dst);
1634           return;
1635        case Xin_Sse64FLo:
1636           mapReg(m, &i->Xin.Sse64FLo.src);
1637           mapReg(m, &i->Xin.Sse64FLo.dst);
1638           return;
1639        case Xin_SseReRg:
1640           mapReg(m, &i->Xin.SseReRg.src);
1641           mapReg(m, &i->Xin.SseReRg.dst);
1642           return;
1643        case Xin_SseCMov:
1644           mapReg(m, &i->Xin.SseCMov.src);
1645           mapReg(m, &i->Xin.SseCMov.dst);
1646           return;
1647        case Xin_SseShuf:
1648           mapReg(m, &i->Xin.SseShuf.src);
1649           mapReg(m, &i->Xin.SseShuf.dst);
1650           return;
1651        case Xin_EvCheck:
1652           /* We expect both amodes only to mention %ebp, so this is in
1653              fact pointless, since %ebp isn't allocatable, but anyway.. */
1654           mapRegs_X86AMode(m, i->Xin.EvCheck.amCounter);
1655           mapRegs_X86AMode(m, i->Xin.EvCheck.amFailAddr);
1656           return;
1657        case Xin_ProfInc:
1658           /* does not use any registers. */
1659           return;
1660  
1661        default:
1662           ppX86Instr(i, mode64);
1663           vpanic("mapRegs_X86Instr");
1664     }
1665  }
1666  
1667  /* Figure out if i represents a reg-reg move, and if so assign the
1668     source and destination to *src and *dst.  If in doubt say No.  Used
1669     by the register allocator to do move coalescing.
1670  */
isMove_X86Instr(X86Instr * i,HReg * src,HReg * dst)1671  Bool isMove_X86Instr ( X86Instr* i, HReg* src, HReg* dst )
1672  {
1673     /* Moves between integer regs */
1674     if (i->tag == Xin_Alu32R) {
1675        if (i->Xin.Alu32R.op != Xalu_MOV)
1676           return False;
1677        if (i->Xin.Alu32R.src->tag != Xrmi_Reg)
1678           return False;
1679        *src = i->Xin.Alu32R.src->Xrmi.Reg.reg;
1680        *dst = i->Xin.Alu32R.dst;
1681        return True;
1682     }
1683     /* Moves between FP regs */
1684     if (i->tag == Xin_FpUnary) {
1685        if (i->Xin.FpUnary.op != Xfp_MOV)
1686           return False;
1687        *src = i->Xin.FpUnary.src;
1688        *dst = i->Xin.FpUnary.dst;
1689        return True;
1690     }
1691     if (i->tag == Xin_SseReRg) {
1692        if (i->Xin.SseReRg.op != Xsse_MOV)
1693           return False;
1694        *src = i->Xin.SseReRg.src;
1695        *dst = i->Xin.SseReRg.dst;
1696        return True;
1697     }
1698     return False;
1699  }
1700  
1701  
1702  /* Generate x86 spill/reload instructions under the direction of the
1703     register allocator.  Note it's critical these don't write the
1704     condition codes. */
1705  
genSpill_X86(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)1706  void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1707                      HReg rreg, Int offsetB, Bool mode64 )
1708  {
1709     X86AMode* am;
1710     vassert(offsetB >= 0);
1711     vassert(!hregIsVirtual(rreg));
1712     vassert(mode64 == False);
1713     *i1 = *i2 = NULL;
1714     am = X86AMode_IR(offsetB, hregX86_EBP());
1715     switch (hregClass(rreg)) {
1716        case HRcInt32:
1717           *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am );
1718           return;
1719        case HRcFlt64:
1720           *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am );
1721           return;
1722        case HRcVec128:
1723           *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am );
1724           return;
1725        default:
1726           ppHRegClass(hregClass(rreg));
1727           vpanic("genSpill_X86: unimplemented regclass");
1728     }
1729  }
1730  
genReload_X86(HInstr ** i1,HInstr ** i2,HReg rreg,Int offsetB,Bool mode64)1731  void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1732                       HReg rreg, Int offsetB, Bool mode64 )
1733  {
1734     X86AMode* am;
1735     vassert(offsetB >= 0);
1736     vassert(!hregIsVirtual(rreg));
1737     vassert(mode64 == False);
1738     *i1 = *i2 = NULL;
1739     am = X86AMode_IR(offsetB, hregX86_EBP());
1740     switch (hregClass(rreg)) {
1741        case HRcInt32:
1742           *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg );
1743           return;
1744        case HRcFlt64:
1745           *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am );
1746           return;
1747        case HRcVec128:
1748           *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am );
1749           return;
1750        default:
1751           ppHRegClass(hregClass(rreg));
1752           vpanic("genReload_X86: unimplemented regclass");
1753     }
1754  }
1755  
1756  /* The given instruction reads the specified vreg exactly once, and
1757     that vreg is currently located at the given spill offset.  If
1758     possible, return a variant of the instruction to one which instead
1759     references the spill slot directly. */
1760  
directReload_X86(X86Instr * i,HReg vreg,Short spill_off)1761  X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off )
1762  {
1763     vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
1764  
1765     /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
1766        Convert to: src=RMI_Mem, dst=Reg
1767     */
1768     if (i->tag == Xin_Alu32R
1769         && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR
1770             || i->Xin.Alu32R.op == Xalu_XOR)
1771         && i->Xin.Alu32R.src->tag == Xrmi_Reg
1772         && sameHReg(i->Xin.Alu32R.src->Xrmi.Reg.reg, vreg)) {
1773        vassert(! sameHReg(i->Xin.Alu32R.dst, vreg));
1774        return X86Instr_Alu32R(
1775                  i->Xin.Alu32R.op,
1776                  X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())),
1777                  i->Xin.Alu32R.dst
1778               );
1779     }
1780  
1781     /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
1782        Convert to: src=RI_Imm, dst=Mem
1783     */
1784     if (i->tag == Xin_Alu32R
1785         && (i->Xin.Alu32R.op == Xalu_CMP)
1786         && i->Xin.Alu32R.src->tag == Xrmi_Imm
1787         && sameHReg(i->Xin.Alu32R.dst, vreg)) {
1788        return X86Instr_Alu32M(
1789                  i->Xin.Alu32R.op,
1790  		X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ),
1791                  X86AMode_IR( spill_off, hregX86_EBP())
1792               );
1793     }
1794  
1795     /* Deal with form: Push(RMI_Reg)
1796        Convert to: Push(RMI_Mem)
1797     */
1798     if (i->tag == Xin_Push
1799         && i->Xin.Push.src->tag == Xrmi_Reg
1800         && sameHReg(i->Xin.Push.src->Xrmi.Reg.reg, vreg)) {
1801        return X86Instr_Push(
1802                  X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP()))
1803               );
1804     }
1805  
1806     /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src
1807        Convert to CMov32(RM_Mem, dst) */
1808     if (i->tag == Xin_CMov32
1809         && i->Xin.CMov32.src->tag == Xrm_Reg
1810         && sameHReg(i->Xin.CMov32.src->Xrm.Reg.reg, vreg)) {
1811        vassert(! sameHReg(i->Xin.CMov32.dst, vreg));
1812        return X86Instr_CMov32(
1813                  i->Xin.CMov32.cond,
1814                  X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )),
1815                  i->Xin.CMov32.dst
1816               );
1817     }
1818  
1819     /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */
1820     if (i->tag == Xin_Test32
1821         && i->Xin.Test32.dst->tag == Xrm_Reg
1822         && sameHReg(i->Xin.Test32.dst->Xrm.Reg.reg, vreg)) {
1823        return X86Instr_Test32(
1824                  i->Xin.Test32.imm32,
1825                  X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) )
1826               );
1827     }
1828  
1829     return NULL;
1830  }
1831  
1832  
1833  /* --------- The x86 assembler (bleh.) --------- */
1834  
iregNo(HReg r)1835  static UChar iregNo ( HReg r )
1836  {
1837     UInt n;
1838     vassert(hregClass(r) == HRcInt32);
1839     vassert(!hregIsVirtual(r));
1840     n = hregNumber(r);
1841     vassert(n <= 7);
1842     return toUChar(n);
1843  }
1844  
fregNo(HReg r)1845  static UInt fregNo ( HReg r )
1846  {
1847     UInt n;
1848     vassert(hregClass(r) == HRcFlt64);
1849     vassert(!hregIsVirtual(r));
1850     n = hregNumber(r);
1851     vassert(n <= 5);
1852     return n;
1853  }
1854  
vregNo(HReg r)1855  static UInt vregNo ( HReg r )
1856  {
1857     UInt n;
1858     vassert(hregClass(r) == HRcVec128);
1859     vassert(!hregIsVirtual(r));
1860     n = hregNumber(r);
1861     vassert(n <= 7);
1862     return n;
1863  }
1864  
mkModRegRM(UInt mod,UInt reg,UInt regmem)1865  static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
1866  {
1867     vassert(mod < 4);
1868     vassert((reg|regmem) < 8);
1869     return toUChar( ((mod & 3) << 6)
1870                     | ((reg & 7) << 3)
1871                     | (regmem & 7) );
1872  }
1873  
mkSIB(UInt shift,UInt regindex,UInt regbase)1874  static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
1875  {
1876     vassert(shift < 4);
1877     vassert((regindex|regbase) < 8);
1878     return toUChar( ((shift & 3) << 6)
1879                     | ((regindex & 7) << 3)
1880                     | (regbase & 7) );
1881  }
1882  
emit32(UChar * p,UInt w32)1883  static UChar* emit32 ( UChar* p, UInt w32 )
1884  {
1885     *p++ = toUChar( w32        & 0x000000FF);
1886     *p++ = toUChar((w32 >>  8) & 0x000000FF);
1887     *p++ = toUChar((w32 >> 16) & 0x000000FF);
1888     *p++ = toUChar((w32 >> 24) & 0x000000FF);
1889     return p;
1890  }
1891  
1892  /* Does a sign-extend of the lowest 8 bits give
1893     the original number? */
fits8bits(UInt w32)1894  static Bool fits8bits ( UInt w32 )
1895  {
1896     Int i32 = (Int)w32;
1897     return toBool(i32 == ((i32 << 24) >> 24));
1898  }
1899  
1900  
1901  /* Forming mod-reg-rm bytes and scale-index-base bytes.
1902  
1903       greg,  0(ereg)    |  ereg != ESP && ereg != EBP
1904                         =  00 greg ereg
1905  
1906       greg,  d8(ereg)   |  ereg != ESP
1907                         =  01 greg ereg, d8
1908  
1909       greg,  d32(ereg)  |  ereg != ESP
1910                         =  10 greg ereg, d32
1911  
1912       greg,  d8(%esp)   =  01 greg 100, 0x24, d8
1913  
1914       -----------------------------------------------
1915  
1916       greg,  d8(base,index,scale)
1917                 |  index != ESP
1918                 =  01 greg 100, scale index base, d8
1919  
1920       greg,  d32(base,index,scale)
1921                 |  index != ESP
1922                 =  10 greg 100, scale index base, d32
1923  */
doAMode_M(UChar * p,HReg greg,X86AMode * am)1924  static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am )
1925  {
1926     if (am->tag == Xam_IR) {
1927        if (am->Xam.IR.imm == 0
1928            && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())
1929            && ! sameHReg(am->Xam.IR.reg, hregX86_EBP()) ) {
1930           *p++ = mkModRegRM(0, iregNo(greg), iregNo(am->Xam.IR.reg));
1931           return p;
1932        }
1933        if (fits8bits(am->Xam.IR.imm)
1934            && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())) {
1935           *p++ = mkModRegRM(1, iregNo(greg), iregNo(am->Xam.IR.reg));
1936           *p++ = toUChar(am->Xam.IR.imm & 0xFF);
1937           return p;
1938        }
1939        if (! sameHReg(am->Xam.IR.reg, hregX86_ESP())) {
1940           *p++ = mkModRegRM(2, iregNo(greg), iregNo(am->Xam.IR.reg));
1941           p = emit32(p, am->Xam.IR.imm);
1942           return p;
1943        }
1944        if (sameHReg(am->Xam.IR.reg, hregX86_ESP())
1945            && fits8bits(am->Xam.IR.imm)) {
1946   	 *p++ = mkModRegRM(1, iregNo(greg), 4);
1947           *p++ = 0x24;
1948           *p++ = toUChar(am->Xam.IR.imm & 0xFF);
1949           return p;
1950        }
1951        ppX86AMode(am);
1952        vpanic("doAMode_M: can't emit amode IR");
1953        /*NOTREACHED*/
1954     }
1955     if (am->tag == Xam_IRRS) {
1956        if (fits8bits(am->Xam.IRRS.imm)
1957            && ! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) {
1958           *p++ = mkModRegRM(1, iregNo(greg), 4);
1959           *p++ = mkSIB(am->Xam.IRRS.shift, iregNo(am->Xam.IRRS.index),
1960                                            iregNo(am->Xam.IRRS.base));
1961           *p++ = toUChar(am->Xam.IRRS.imm & 0xFF);
1962           return p;
1963        }
1964        if (! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) {
1965           *p++ = mkModRegRM(2, iregNo(greg), 4);
1966           *p++ = mkSIB(am->Xam.IRRS.shift, iregNo(am->Xam.IRRS.index),
1967                                            iregNo(am->Xam.IRRS.base));
1968           p = emit32(p, am->Xam.IRRS.imm);
1969           return p;
1970        }
1971        ppX86AMode(am);
1972        vpanic("doAMode_M: can't emit amode IRRS");
1973        /*NOTREACHED*/
1974     }
1975     vpanic("doAMode_M: unknown amode");
1976     /*NOTREACHED*/
1977  }
1978  
1979  
1980  /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
doAMode_R(UChar * p,HReg greg,HReg ereg)1981  static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
1982  {
1983     *p++ = mkModRegRM(3, iregNo(greg), iregNo(ereg));
1984     return p;
1985  }
1986  
1987  
1988  /* Emit ffree %st(7) */
do_ffree_st7(UChar * p)1989  static UChar* do_ffree_st7 ( UChar* p )
1990  {
1991     *p++ = 0xDD;
1992     *p++ = 0xC7;
1993     return p;
1994  }
1995  
1996  /* Emit fstp %st(i), 1 <= i <= 7 */
do_fstp_st(UChar * p,Int i)1997  static UChar* do_fstp_st ( UChar* p, Int i )
1998  {
1999     vassert(1 <= i && i <= 7);
2000     *p++ = 0xDD;
2001     *p++ = toUChar(0xD8+i);
2002     return p;
2003  }
2004  
2005  /* Emit fld %st(i), 0 <= i <= 6 */
do_fld_st(UChar * p,Int i)2006  static UChar* do_fld_st ( UChar* p, Int i )
2007  {
2008     vassert(0 <= i && i <= 6);
2009     *p++ = 0xD9;
2010     *p++ = toUChar(0xC0+i);
2011     return p;
2012  }
2013  
2014  /* Emit f<op> %st(0) */
do_fop1_st(UChar * p,X86FpOp op)2015  static UChar* do_fop1_st ( UChar* p, X86FpOp op )
2016  {
2017     switch (op) {
2018        case Xfp_NEG:    *p++ = 0xD9; *p++ = 0xE0; break;
2019        case Xfp_ABS:    *p++ = 0xD9; *p++ = 0xE1; break;
2020        case Xfp_SQRT:   *p++ = 0xD9; *p++ = 0xFA; break;
2021        case Xfp_ROUND:  *p++ = 0xD9; *p++ = 0xFC; break;
2022        case Xfp_SIN:    *p++ = 0xD9; *p++ = 0xFE; break;
2023        case Xfp_COS:    *p++ = 0xD9; *p++ = 0xFF; break;
2024        case Xfp_2XM1:   *p++ = 0xD9; *p++ = 0xF0; break;
2025        case Xfp_MOV:    break;
2026        case Xfp_TAN:
2027           /* fptan pushes 1.0 on the FP stack, except when the argument
2028              is out of range.  Hence we have to do the instruction,
2029              then inspect C2 to see if there is an out of range
2030              condition.  If there is, we skip the fincstp that is used
2031              by the in-range case to get rid of this extra 1.0
2032              value. */
2033           p = do_ffree_st7(p); /* since fptan sometimes pushes 1.0 */
2034           *p++ = 0xD9; *p++ = 0xF2; // fptan
2035           *p++ = 0x50;              // pushl %eax
2036           *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax
2037           *p++ = 0x66; *p++ = 0xA9;
2038           *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax
2039           *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp
2040           *p++ = 0xD9; *p++ = 0xF7; // fincstp
2041           *p++ = 0x58;              // after_fincstp: popl %eax
2042           break;
2043        default:
2044           vpanic("do_fop1_st: unknown op");
2045     }
2046     return p;
2047  }
2048  
2049  /* Emit f<op> %st(i), 1 <= i <= 5 */
do_fop2_st(UChar * p,X86FpOp op,Int i)2050  static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i )
2051  {
2052  #  define fake(_n) mkHReg((_n), HRcInt32, False)
2053     Int subopc;
2054     switch (op) {
2055        case Xfp_ADD: subopc = 0; break;
2056        case Xfp_SUB: subopc = 4; break;
2057        case Xfp_MUL: subopc = 1; break;
2058        case Xfp_DIV: subopc = 6; break;
2059        default: vpanic("do_fop2_st: unknown op");
2060     }
2061     *p++ = 0xD8;
2062     p    = doAMode_R(p, fake(subopc), fake(i));
2063     return p;
2064  #  undef fake
2065  }
2066  
2067  /* Push a 32-bit word on the stack.  The word depends on tags[3:0];
2068  each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
2069  */
push_word_from_tags(UChar * p,UShort tags)2070  static UChar* push_word_from_tags ( UChar* p, UShort tags )
2071  {
2072     UInt w;
2073     vassert(0 == (tags & ~0xF));
2074     if (tags == 0) {
2075        /* pushl $0x00000000 */
2076        *p++ = 0x6A;
2077        *p++ = 0x00;
2078     }
2079     else
2080     /* pushl $0xFFFFFFFF */
2081     if (tags == 0xF) {
2082        *p++ = 0x6A;
2083        *p++ = 0xFF;
2084     } else {
2085        vassert(0); /* awaiting test case */
2086        w = 0;
2087        if (tags & 1) w |= 0x000000FF;
2088        if (tags & 2) w |= 0x0000FF00;
2089        if (tags & 4) w |= 0x00FF0000;
2090        if (tags & 8) w |= 0xFF000000;
2091        *p++ = 0x68;
2092        p = emit32(p, w);
2093     }
2094     return p;
2095  }
2096  
2097  /* Emit an instruction into buf and return the number of bytes used.
2098     Note that buf is not the insn's final place, and therefore it is
2099     imperative to emit position-independent code.  If the emitted
2100     instruction was a profiler inc, set *is_profInc to True, else
2101     leave it unchanged. */
2102  
emit_X86Instr(Bool * is_profInc,UChar * buf,Int nbuf,X86Instr * i,Bool mode64,void * disp_cp_chain_me_to_slowEP,void * disp_cp_chain_me_to_fastEP,void * disp_cp_xindir,void * disp_cp_xassisted)2103  Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc,
2104                      UChar* buf, Int nbuf, X86Instr* i,
2105                      Bool mode64,
2106                      void* disp_cp_chain_me_to_slowEP,
2107                      void* disp_cp_chain_me_to_fastEP,
2108                      void* disp_cp_xindir,
2109                      void* disp_cp_xassisted )
2110  {
2111     UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
2112  
2113     UInt   xtra;
2114     UChar* p = &buf[0];
2115     UChar* ptmp;
2116     vassert(nbuf >= 32);
2117     vassert(mode64 == False);
2118  
2119     /* Wrap an integer as a int register, for use assembling
2120        GrpN insns, in which the greg field is used as a sub-opcode
2121        and does not really contain a register. */
2122  #  define fake(_n) mkHReg((_n), HRcInt32, False)
2123  
2124     /* vex_printf("asm  ");ppX86Instr(i, mode64); vex_printf("\n"); */
2125  
2126     switch (i->tag) {
2127  
2128     case Xin_Alu32R:
2129        /* Deal specially with MOV */
2130        if (i->Xin.Alu32R.op == Xalu_MOV) {
2131           switch (i->Xin.Alu32R.src->tag) {
2132              case Xrmi_Imm:
2133                 *p++ = toUChar(0xB8 + iregNo(i->Xin.Alu32R.dst));
2134                 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2135                 goto done;
2136              case Xrmi_Reg:
2137                 *p++ = 0x89;
2138                 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
2139                                  i->Xin.Alu32R.dst);
2140                 goto done;
2141              case Xrmi_Mem:
2142                 *p++ = 0x8B;
2143                 p = doAMode_M(p, i->Xin.Alu32R.dst,
2144                                  i->Xin.Alu32R.src->Xrmi.Mem.am);
2145                 goto done;
2146              default:
2147                 goto bad;
2148           }
2149        }
2150        /* MUL */
2151        if (i->Xin.Alu32R.op == Xalu_MUL) {
2152           switch (i->Xin.Alu32R.src->tag) {
2153              case Xrmi_Reg:
2154                 *p++ = 0x0F;
2155                 *p++ = 0xAF;
2156                 p = doAMode_R(p, i->Xin.Alu32R.dst,
2157                                  i->Xin.Alu32R.src->Xrmi.Reg.reg);
2158                 goto done;
2159              case Xrmi_Mem:
2160                 *p++ = 0x0F;
2161                 *p++ = 0xAF;
2162                 p = doAMode_M(p, i->Xin.Alu32R.dst,
2163                                  i->Xin.Alu32R.src->Xrmi.Mem.am);
2164                 goto done;
2165              case Xrmi_Imm:
2166                 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2167                    *p++ = 0x6B;
2168                    p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
2169                    *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2170                 } else {
2171                    *p++ = 0x69;
2172                    p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
2173                    p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2174                 }
2175                 goto done;
2176              default:
2177                 goto bad;
2178           }
2179        }
2180        /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2181        opc = opc_rr = subopc_imm = opc_imma = 0;
2182        switch (i->Xin.Alu32R.op) {
2183           case Xalu_ADC: opc = 0x13; opc_rr = 0x11;
2184                          subopc_imm = 2; opc_imma = 0x15; break;
2185           case Xalu_ADD: opc = 0x03; opc_rr = 0x01;
2186                          subopc_imm = 0; opc_imma = 0x05; break;
2187           case Xalu_SUB: opc = 0x2B; opc_rr = 0x29;
2188                          subopc_imm = 5; opc_imma = 0x2D; break;
2189           case Xalu_SBB: opc = 0x1B; opc_rr = 0x19;
2190                          subopc_imm = 3; opc_imma = 0x1D; break;
2191           case Xalu_AND: opc = 0x23; opc_rr = 0x21;
2192                          subopc_imm = 4; opc_imma = 0x25; break;
2193           case Xalu_XOR: opc = 0x33; opc_rr = 0x31;
2194                          subopc_imm = 6; opc_imma = 0x35; break;
2195           case Xalu_OR:  opc = 0x0B; opc_rr = 0x09;
2196                          subopc_imm = 1; opc_imma = 0x0D; break;
2197           case Xalu_CMP: opc = 0x3B; opc_rr = 0x39;
2198                          subopc_imm = 7; opc_imma = 0x3D; break;
2199           default: goto bad;
2200        }
2201        switch (i->Xin.Alu32R.src->tag) {
2202           case Xrmi_Imm:
2203              if (sameHReg(i->Xin.Alu32R.dst, hregX86_EAX())
2204                  && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2205                 *p++ = toUChar(opc_imma);
2206                 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2207              } else
2208              if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2209                 *p++ = 0x83;
2210                 p    = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst);
2211                 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2212              } else {
2213                 *p++ = 0x81;
2214                 p    = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst);
2215                 p    = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2216              }
2217              goto done;
2218           case Xrmi_Reg:
2219              *p++ = toUChar(opc_rr);
2220              p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
2221                               i->Xin.Alu32R.dst);
2222              goto done;
2223           case Xrmi_Mem:
2224              *p++ = toUChar(opc);
2225              p = doAMode_M(p, i->Xin.Alu32R.dst,
2226                               i->Xin.Alu32R.src->Xrmi.Mem.am);
2227              goto done;
2228           default:
2229              goto bad;
2230        }
2231        break;
2232  
2233     case Xin_Alu32M:
2234        /* Deal specially with MOV */
2235        if (i->Xin.Alu32M.op == Xalu_MOV) {
2236           switch (i->Xin.Alu32M.src->tag) {
2237              case Xri_Reg:
2238                 *p++ = 0x89;
2239                 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2240                                  i->Xin.Alu32M.dst);
2241                 goto done;
2242              case Xri_Imm:
2243                 *p++ = 0xC7;
2244                 p = doAMode_M(p, fake(0), i->Xin.Alu32M.dst);
2245                 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
2246                 goto done;
2247              default:
2248                 goto bad;
2249           }
2250        }
2251        /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP.  MUL is not
2252           allowed here. */
2253        opc = subopc_imm = opc_imma = 0;
2254        switch (i->Xin.Alu32M.op) {
2255           case Xalu_ADD: opc = 0x01; subopc_imm = 0; break;
2256           case Xalu_SUB: opc = 0x29; subopc_imm = 5; break;
2257           case Xalu_CMP: opc = 0x39; subopc_imm = 7; break;
2258           default: goto bad;
2259        }
2260        switch (i->Xin.Alu32M.src->tag) {
2261           case Xri_Reg:
2262              *p++ = toUChar(opc);
2263              p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2264                               i->Xin.Alu32M.dst);
2265              goto done;
2266           case Xri_Imm:
2267              if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) {
2268                 *p++ = 0x83;
2269                 p    = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
2270                 *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32);
2271                 goto done;
2272              } else {
2273                 *p++ = 0x81;
2274                 p    = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
2275                 p    = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
2276                 goto done;
2277              }
2278           default:
2279              goto bad;
2280        }
2281        break;
2282  
2283     case Xin_Sh32:
2284        opc_cl = opc_imm = subopc = 0;
2285        switch (i->Xin.Sh32.op) {
2286           case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2287           case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2288           case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2289           default: goto bad;
2290        }
2291        if (i->Xin.Sh32.src == 0) {
2292           *p++ = toUChar(opc_cl);
2293           p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst);
2294        } else {
2295           *p++ = toUChar(opc_imm);
2296           p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst);
2297           *p++ = (UChar)(i->Xin.Sh32.src);
2298        }
2299        goto done;
2300  
2301     case Xin_Test32:
2302        if (i->Xin.Test32.dst->tag == Xrm_Reg) {
2303           /* testl $imm32, %reg */
2304           *p++ = 0xF7;
2305           p = doAMode_R(p, fake(0), i->Xin.Test32.dst->Xrm.Reg.reg);
2306           p = emit32(p, i->Xin.Test32.imm32);
2307           goto done;
2308        } else {
2309           /* testl $imm32, amode */
2310           *p++ = 0xF7;
2311           p = doAMode_M(p, fake(0), i->Xin.Test32.dst->Xrm.Mem.am);
2312           p = emit32(p, i->Xin.Test32.imm32);
2313           goto done;
2314        }
2315  
2316     case Xin_Unary32:
2317        if (i->Xin.Unary32.op == Xun_NOT) {
2318           *p++ = 0xF7;
2319           p = doAMode_R(p, fake(2), i->Xin.Unary32.dst);
2320           goto done;
2321        }
2322        if (i->Xin.Unary32.op == Xun_NEG) {
2323           *p++ = 0xF7;
2324           p = doAMode_R(p, fake(3), i->Xin.Unary32.dst);
2325           goto done;
2326        }
2327        break;
2328  
2329     case Xin_Lea32:
2330        *p++ = 0x8D;
2331        p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am);
2332        goto done;
2333  
2334     case Xin_MulL:
2335        subopc = i->Xin.MulL.syned ? 5 : 4;
2336        *p++ = 0xF7;
2337        switch (i->Xin.MulL.src->tag)  {
2338           case Xrm_Mem:
2339              p = doAMode_M(p, fake(subopc),
2340                               i->Xin.MulL.src->Xrm.Mem.am);
2341              goto done;
2342           case Xrm_Reg:
2343              p = doAMode_R(p, fake(subopc),
2344                               i->Xin.MulL.src->Xrm.Reg.reg);
2345              goto done;
2346           default:
2347              goto bad;
2348        }
2349        break;
2350  
2351     case Xin_Div:
2352        subopc = i->Xin.Div.syned ? 7 : 6;
2353        *p++ = 0xF7;
2354        switch (i->Xin.Div.src->tag)  {
2355           case Xrm_Mem:
2356              p = doAMode_M(p, fake(subopc),
2357                               i->Xin.Div.src->Xrm.Mem.am);
2358              goto done;
2359           case Xrm_Reg:
2360              p = doAMode_R(p, fake(subopc),
2361                               i->Xin.Div.src->Xrm.Reg.reg);
2362              goto done;
2363           default:
2364              goto bad;
2365        }
2366        break;
2367  
2368     case Xin_Sh3232:
2369        vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR);
2370        if (i->Xin.Sh3232.amt == 0) {
2371           /* shldl/shrdl by %cl */
2372           *p++ = 0x0F;
2373           if (i->Xin.Sh3232.op == Xsh_SHL) {
2374              *p++ = 0xA5;
2375           } else {
2376              *p++ = 0xAD;
2377           }
2378           p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst);
2379           goto done;
2380        }
2381        break;
2382  
2383     case Xin_Push:
2384        switch (i->Xin.Push.src->tag) {
2385           case Xrmi_Mem:
2386              *p++ = 0xFF;
2387              p = doAMode_M(p, fake(6), i->Xin.Push.src->Xrmi.Mem.am);
2388              goto done;
2389           case Xrmi_Imm:
2390              *p++ = 0x68;
2391              p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32);
2392              goto done;
2393           case Xrmi_Reg:
2394              *p++ = toUChar(0x50 + iregNo(i->Xin.Push.src->Xrmi.Reg.reg));
2395              goto done;
2396          default:
2397              goto bad;
2398        }
2399  
2400     case Xin_Call:
2401        if (i->Xin.Call.cond != Xcc_ALWAYS
2402            && i->Xin.Call.rloc.pri != RLPri_None) {
2403           /* The call might not happen (it isn't unconditional) and it
2404              returns a result.  In this case we will need to generate a
2405              control flow diamond to put 0x555..555 in the return
2406              register(s) in the case where the call doesn't happen.  If
2407              this ever becomes necessary, maybe copy code from the ARM
2408              equivalent.  Until that day, just give up. */
2409           goto bad;
2410        }
2411        /* See detailed comment for Xin_Call in getRegUsage_X86Instr above
2412           for explanation of this. */
2413        switch (i->Xin.Call.regparms) {
2414           case 0: irno = iregNo(hregX86_EAX()); break;
2415           case 1: irno = iregNo(hregX86_EDX()); break;
2416           case 2: irno = iregNo(hregX86_ECX()); break;
2417           case 3: irno = iregNo(hregX86_EDI()); break;
2418           default: vpanic(" emit_X86Instr:call:regparms");
2419        }
2420        /* jump over the following two insns if the condition does not
2421           hold */
2422        if (i->Xin.Call.cond != Xcc_ALWAYS) {
2423           *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1)));
2424           *p++ = 0x07; /* 7 bytes in the next two insns */
2425        }
2426        /* movl $target, %tmp */
2427        *p++ = toUChar(0xB8 + irno);
2428        p = emit32(p, i->Xin.Call.target);
2429        /* call *%tmp */
2430        *p++ = 0xFF;
2431        *p++ = toUChar(0xD0 + irno);
2432        goto done;
2433  
2434     case Xin_XDirect: {
2435        /* NB: what goes on here has to be very closely coordinated with the
2436           chainXDirect_X86 and unchainXDirect_X86 below. */
2437        /* We're generating chain-me requests here, so we need to be
2438           sure this is actually allowed -- no-redir translations can't
2439           use chain-me's.  Hence: */
2440        vassert(disp_cp_chain_me_to_slowEP != NULL);
2441        vassert(disp_cp_chain_me_to_fastEP != NULL);
2442  
2443        /* Use ptmp for backpatching conditional jumps. */
2444        ptmp = NULL;
2445  
2446        /* First off, if this is conditional, create a conditional
2447           jump over the rest of it. */
2448        if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
2449           /* jmp fwds if !condition */
2450           *p++ = toUChar(0x70 + (0xF & (i->Xin.XDirect.cond ^ 1)));
2451           ptmp = p; /* fill in this bit later */
2452           *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2453        }
2454  
2455        /* Update the guest EIP. */
2456        /* movl $dstGA, amEIP */
2457        *p++ = 0xC7;
2458        p    = doAMode_M(p, fake(0), i->Xin.XDirect.amEIP);
2459        p    = emit32(p, i->Xin.XDirect.dstGA);
2460  
2461        /* --- FIRST PATCHABLE BYTE follows --- */
2462        /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
2463           to) backs up the return address, so as to find the address of
2464           the first patchable byte.  So: don't change the length of the
2465           two instructions below. */
2466        /* movl $disp_cp_chain_me_to_{slow,fast}EP,%edx; */
2467        *p++ = 0xBA;
2468        void* disp_cp_chain_me
2469                 = i->Xin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
2470                                           : disp_cp_chain_me_to_slowEP;
2471        p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_chain_me));
2472        /* call *%edx */
2473        *p++ = 0xFF;
2474        *p++ = 0xD2;
2475        /* --- END of PATCHABLE BYTES --- */
2476  
2477        /* Fix up the conditional jump, if there was one. */
2478        if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
2479           Int delta = p - ptmp;
2480           vassert(delta > 0 && delta < 40);
2481           *ptmp = toUChar(delta-1);
2482        }
2483        goto done;
2484     }
2485  
2486     case Xin_XIndir: {
2487        /* We're generating transfers that could lead indirectly to a
2488           chain-me, so we need to be sure this is actually allowed --
2489           no-redir translations are not allowed to reach normal
2490           translations without going through the scheduler.  That means
2491           no XDirects or XIndirs out from no-redir translations.
2492           Hence: */
2493        vassert(disp_cp_xindir != NULL);
2494  
2495        /* Use ptmp for backpatching conditional jumps. */
2496        ptmp = NULL;
2497  
2498        /* First off, if this is conditional, create a conditional
2499           jump over the rest of it. */
2500        if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
2501           /* jmp fwds if !condition */
2502           *p++ = toUChar(0x70 + (0xF & (i->Xin.XIndir.cond ^ 1)));
2503           ptmp = p; /* fill in this bit later */
2504           *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2505        }
2506  
2507        /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
2508        *p++ = 0x89;
2509        p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
2510  
2511        /* movl $disp_indir, %edx */
2512        *p++ = 0xBA;
2513        p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xindir));
2514        /* jmp *%edx */
2515        *p++ = 0xFF;
2516        *p++ = 0xE2;
2517  
2518        /* Fix up the conditional jump, if there was one. */
2519        if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
2520           Int delta = p - ptmp;
2521           vassert(delta > 0 && delta < 40);
2522           *ptmp = toUChar(delta-1);
2523        }
2524        goto done;
2525     }
2526  
2527     case Xin_XAssisted: {
2528        /* Use ptmp for backpatching conditional jumps. */
2529        ptmp = NULL;
2530  
2531        /* First off, if this is conditional, create a conditional
2532           jump over the rest of it. */
2533        if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
2534           /* jmp fwds if !condition */
2535           *p++ = toUChar(0x70 + (0xF & (i->Xin.XAssisted.cond ^ 1)));
2536           ptmp = p; /* fill in this bit later */
2537           *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2538        }
2539  
2540        /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
2541        *p++ = 0x89;
2542        p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
2543        /* movl $magic_number, %ebp. */
2544        UInt trcval = 0;
2545        switch (i->Xin.XAssisted.jk) {
2546           case Ijk_ClientReq:    trcval = VEX_TRC_JMP_CLIENTREQ;    break;
2547           case Ijk_Sys_syscall:  trcval = VEX_TRC_JMP_SYS_SYSCALL;  break;
2548           case Ijk_Sys_int128:   trcval = VEX_TRC_JMP_SYS_INT128;   break;
2549           case Ijk_Sys_int129:   trcval = VEX_TRC_JMP_SYS_INT129;   break;
2550           case Ijk_Sys_int130:   trcval = VEX_TRC_JMP_SYS_INT130;   break;
2551           case Ijk_Sys_sysenter: trcval = VEX_TRC_JMP_SYS_SYSENTER; break;
2552           case Ijk_Yield:        trcval = VEX_TRC_JMP_YIELD;        break;
2553           case Ijk_EmWarn:       trcval = VEX_TRC_JMP_EMWARN;       break;
2554           case Ijk_MapFail:      trcval = VEX_TRC_JMP_MAPFAIL;      break;
2555           case Ijk_NoDecode:     trcval = VEX_TRC_JMP_NODECODE;     break;
2556           case Ijk_InvalICache:  trcval = VEX_TRC_JMP_INVALICACHE;  break;
2557           case Ijk_NoRedir:      trcval = VEX_TRC_JMP_NOREDIR;      break;
2558           case Ijk_SigTRAP:      trcval = VEX_TRC_JMP_SIGTRAP;      break;
2559           case Ijk_SigSEGV:      trcval = VEX_TRC_JMP_SIGSEGV;      break;
2560           case Ijk_Boring:       trcval = VEX_TRC_JMP_BORING;       break;
2561           /* We don't expect to see the following being assisted. */
2562           case Ijk_Ret:
2563           case Ijk_Call:
2564           /* fallthrough */
2565           default:
2566              ppIRJumpKind(i->Xin.XAssisted.jk);
2567              vpanic("emit_X86Instr.Xin_XAssisted: unexpected jump kind");
2568        }
2569        vassert(trcval != 0);
2570        *p++ = 0xBD;
2571        p = emit32(p, trcval);
2572  
2573        /* movl $disp_indir, %edx */
2574        *p++ = 0xBA;
2575        p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xassisted));
2576        /* jmp *%edx */
2577        *p++ = 0xFF;
2578        *p++ = 0xE2;
2579  
2580        /* Fix up the conditional jump, if there was one. */
2581        if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
2582           Int delta = p - ptmp;
2583           vassert(delta > 0 && delta < 40);
2584           *ptmp = toUChar(delta-1);
2585        }
2586        goto done;
2587     }
2588  
2589     case Xin_CMov32:
2590        vassert(i->Xin.CMov32.cond != Xcc_ALWAYS);
2591  
2592        /* This generates cmov, which is illegal on P54/P55. */
2593        /*
2594        *p++ = 0x0F;
2595        *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond));
2596        if (i->Xin.CMov32.src->tag == Xrm_Reg) {
2597           p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg);
2598           goto done;
2599        }
2600        if (i->Xin.CMov32.src->tag == Xrm_Mem) {
2601           p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am);
2602           goto done;
2603        }
2604        */
2605  
2606        /* Alternative version which works on any x86 variant. */
2607        /* jmp fwds if !condition */
2608        *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1));
2609        *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
2610        ptmp = p;
2611  
2612        switch (i->Xin.CMov32.src->tag) {
2613           case Xrm_Reg:
2614              /* Big sigh.  This is movl E -> G ... */
2615              *p++ = 0x89;
2616              p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg,
2617                               i->Xin.CMov32.dst);
2618  
2619              break;
2620           case Xrm_Mem:
2621              /* ... whereas this is movl G -> E.  That's why the args
2622                 to doAMode_R appear to be the wrong way round in the
2623                 Xrm_Reg case. */
2624              *p++ = 0x8B;
2625              p = doAMode_M(p, i->Xin.CMov32.dst,
2626                               i->Xin.CMov32.src->Xrm.Mem.am);
2627              break;
2628           default:
2629              goto bad;
2630        }
2631        /* Fill in the jump offset. */
2632        *(ptmp-1) = toUChar(p - ptmp);
2633        goto done;
2634  
2635        break;
2636  
2637     case Xin_LoadEX:
2638        if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) {
2639           /* movzbl */
2640           *p++ = 0x0F;
2641           *p++ = 0xB6;
2642           p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2643           goto done;
2644        }
2645        if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) {
2646           /* movzwl */
2647           *p++ = 0x0F;
2648           *p++ = 0xB7;
2649           p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2650           goto done;
2651        }
2652        if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) {
2653           /* movsbl */
2654           *p++ = 0x0F;
2655           *p++ = 0xBE;
2656           p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2657           goto done;
2658        }
2659        break;
2660  
2661     case Xin_Set32:
2662        /* Make the destination register be 1 or 0, depending on whether
2663           the relevant condition holds.  We have to dodge and weave
2664           when the destination is %esi or %edi as we cannot directly
2665           emit the native 'setb %reg' for those.  Further complication:
2666           the top 24 bits of the destination should be forced to zero,
2667           but doing 'xor %r,%r' kills the flag(s) we are about to read.
2668           Sigh.  So start off my moving $0 into the dest. */
2669  
2670        /* Do we need to swap in %eax? */
2671        if (iregNo(i->Xin.Set32.dst) >= 4) {
2672           /* xchg %eax, %dst */
2673           *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst));
2674           /* movl $0, %eax */
2675           *p++ =toUChar(0xB8 + iregNo(hregX86_EAX()));
2676           p = emit32(p, 0);
2677           /* setb lo8(%eax) */
2678           *p++ = 0x0F;
2679           *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
2680           p = doAMode_R(p, fake(0), hregX86_EAX());
2681           /* xchg %eax, %dst */
2682           *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst));
2683        } else {
2684           /* movl $0, %dst */
2685           *p++ = toUChar(0xB8 + iregNo(i->Xin.Set32.dst));
2686           p = emit32(p, 0);
2687           /* setb lo8(%dst) */
2688           *p++ = 0x0F;
2689           *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
2690           p = doAMode_R(p, fake(0), i->Xin.Set32.dst);
2691        }
2692        goto done;
2693  
2694     case Xin_Bsfr32:
2695        *p++ = 0x0F;
2696        if (i->Xin.Bsfr32.isFwds) {
2697           *p++ = 0xBC;
2698        } else {
2699           *p++ = 0xBD;
2700        }
2701        p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src);
2702        goto done;
2703  
2704     case Xin_MFence:
2705        /* see comment in hdefs.h re this insn */
2706        if (0) vex_printf("EMIT FENCE\n");
2707        if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3
2708                                    |VEX_HWCAPS_X86_SSE2)) {
2709           /* mfence */
2710           *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
2711           goto done;
2712        }
2713        if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_MMXEXT) {
2714           /* sfence */
2715           *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8;
2716           /* lock addl $0,0(%esp) */
2717           *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
2718           *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
2719           goto done;
2720        }
2721        if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) {
2722           /* lock addl $0,0(%esp) */
2723           *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
2724           *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
2725           goto done;
2726        }
2727        vpanic("emit_X86Instr:mfence:hwcaps");
2728        /*NOTREACHED*/
2729        break;
2730  
2731     case Xin_ACAS:
2732        /* lock */
2733        *p++ = 0xF0;
2734        /* cmpxchg{b,w,l} %ebx,mem.  Expected-value in %eax, new value
2735           in %ebx.  The new-value register is hardwired to be %ebx
2736           since letting it be any integer register gives the problem
2737           that %sil and %dil are unaddressible on x86 and hence we
2738           would have to resort to the same kind of trickery as with
2739           byte-sized Xin.Store, just below.  Given that this isn't
2740           performance critical, it is simpler just to force the
2741           register operand to %ebx (could equally be %ecx or %edx).
2742           (Although %ebx is more consistent with cmpxchg8b.) */
2743        if (i->Xin.ACAS.sz == 2) *p++ = 0x66;
2744        *p++ = 0x0F;
2745        if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
2746        p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr);
2747        goto done;
2748  
2749     case Xin_DACAS:
2750        /* lock */
2751        *p++ = 0xF0;
2752        /* cmpxchg8b m64.  Expected-value in %edx:%eax, new value
2753           in %ecx:%ebx.  All 4 regs are hardwired in the ISA, so
2754           aren't encoded in the insn. */
2755        *p++ = 0x0F;
2756        *p++ = 0xC7;
2757        p = doAMode_M(p, fake(1), i->Xin.DACAS.addr);
2758        goto done;
2759  
2760     case Xin_Store:
2761        if (i->Xin.Store.sz == 2) {
2762           /* This case, at least, is simple, given that we can
2763              reference the low 16 bits of any integer register. */
2764           *p++ = 0x66;
2765           *p++ = 0x89;
2766           p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
2767           goto done;
2768        }
2769  
2770        if (i->Xin.Store.sz == 1) {
2771           /* We have to do complex dodging and weaving if src is not
2772              the low 8 bits of %eax/%ebx/%ecx/%edx. */
2773           if (iregNo(i->Xin.Store.src) < 4) {
2774              /* we're OK, can do it directly */
2775              *p++ = 0x88;
2776              p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
2777             goto done;
2778           } else {
2779              /* Bleh.  This means the source is %edi or %esi.  Since
2780                 the address mode can only mention three registers, at
2781                 least one of %eax/%ebx/%ecx/%edx must be available to
2782                 temporarily swap the source into, so the store can
2783                 happen.  So we have to look at the regs mentioned
2784                 in the amode. */
2785              HReg swap = INVALID_HREG;
2786              HReg  eax = hregX86_EAX(), ebx = hregX86_EBX(),
2787                    ecx = hregX86_ECX(), edx = hregX86_EDX();
2788              Bool a_ok = True, b_ok = True, c_ok = True, d_ok = True;
2789              HRegUsage u;
2790              Int j;
2791              initHRegUsage(&u);
2792              addRegUsage_X86AMode(&u,  i->Xin.Store.dst);
2793              for (j = 0; j < u.n_used; j++) {
2794                 HReg r = u.hreg[j];
2795                 if (sameHReg(r, eax)) a_ok = False;
2796                 if (sameHReg(r, ebx)) b_ok = False;
2797                 if (sameHReg(r, ecx)) c_ok = False;
2798                 if (sameHReg(r, edx)) d_ok = False;
2799              }
2800              if (a_ok) swap = eax;
2801              if (b_ok) swap = ebx;
2802              if (c_ok) swap = ecx;
2803              if (d_ok) swap = edx;
2804              vassert(! hregIsInvalid(swap));
2805              /* xchgl %source, %swap. Could do better if swap is %eax. */
2806              *p++ = 0x87;
2807              p = doAMode_R(p, i->Xin.Store.src, swap);
2808              /* movb lo8{%swap}, (dst) */
2809              *p++ = 0x88;
2810              p = doAMode_M(p, swap, i->Xin.Store.dst);
2811              /* xchgl %source, %swap. Could do better if swap is %eax. */
2812              *p++ = 0x87;
2813              p = doAMode_R(p, i->Xin.Store.src, swap);
2814              goto done;
2815           }
2816        } /* if (i->Xin.Store.sz == 1) */
2817        break;
2818  
2819     case Xin_FpUnary:
2820        /* gop %src, %dst
2821           --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
2822        */
2823        p = do_ffree_st7(p);
2824        p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src));
2825        p = do_fop1_st(p, i->Xin.FpUnary.op);
2826        p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst));
2827        goto done;
2828  
2829     case Xin_FpBinary:
2830        if (i->Xin.FpBinary.op == Xfp_YL2X
2831            || i->Xin.FpBinary.op == Xfp_YL2XP1) {
2832           /* Have to do this specially. */
2833           /* ffree %st7 ; fld %st(srcL) ;
2834              ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
2835           p = do_ffree_st7(p);
2836           p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
2837           p = do_ffree_st7(p);
2838           p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
2839           *p++ = 0xD9;
2840           *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9);
2841           p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
2842           goto done;
2843        }
2844        if (i->Xin.FpBinary.op == Xfp_ATAN) {
2845           /* Have to do this specially. */
2846           /* ffree %st7 ; fld %st(srcL) ;
2847              ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
2848           p = do_ffree_st7(p);
2849           p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
2850           p = do_ffree_st7(p);
2851           p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
2852           *p++ = 0xD9; *p++ = 0xF3;
2853           p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
2854           goto done;
2855        }
2856        if (i->Xin.FpBinary.op == Xfp_PREM
2857            || i->Xin.FpBinary.op == Xfp_PREM1
2858            || i->Xin.FpBinary.op == Xfp_SCALE) {
2859           /* Have to do this specially. */
2860           /* ffree %st7 ; fld %st(srcR) ;
2861              ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
2862              fincstp ; ffree %st7 */
2863           p = do_ffree_st7(p);
2864           p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR));
2865           p = do_ffree_st7(p);
2866           p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL));
2867           *p++ = 0xD9;
2868           switch (i->Xin.FpBinary.op) {
2869              case Xfp_PREM: *p++ = 0xF8; break;
2870              case Xfp_PREM1: *p++ = 0xF5; break;
2871              case Xfp_SCALE: *p++ =  0xFD; break;
2872              default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)");
2873           }
2874           p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst));
2875           *p++ = 0xD9; *p++ = 0xF7;
2876           p = do_ffree_st7(p);
2877           goto done;
2878        }
2879        /* General case */
2880        /* gop %srcL, %srcR, %dst
2881           --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
2882        */
2883        p = do_ffree_st7(p);
2884        p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
2885        p = do_fop2_st(p, i->Xin.FpBinary.op,
2886                          1+hregNumber(i->Xin.FpBinary.srcR));
2887        p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
2888        goto done;
2889  
2890     case Xin_FpLdSt:
2891        if (i->Xin.FpLdSt.isLoad) {
2892           /* Load from memory into %fakeN.
2893              --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1)
2894           */
2895           p = do_ffree_st7(p);
2896           switch (i->Xin.FpLdSt.sz) {
2897              case 4:
2898                 *p++ = 0xD9;
2899                 p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
2900                 break;
2901              case 8:
2902                 *p++ = 0xDD;
2903                 p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
2904                 break;
2905              case 10:
2906                 *p++ = 0xDB;
2907                 p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdSt.addr);
2908                 break;
2909              default:
2910                 vpanic("emitX86Instr(FpLdSt,load)");
2911           }
2912           p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg));
2913           goto done;
2914        } else {
2915           /* Store from %fakeN into memory.
2916              --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
2917  	 */
2918           p = do_ffree_st7(p);
2919           p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg));
2920           switch (i->Xin.FpLdSt.sz) {
2921              case 4:
2922                 *p++ = 0xD9;
2923                 p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
2924                 break;
2925              case 8:
2926                 *p++ = 0xDD;
2927                 p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
2928                 break;
2929              case 10:
2930                 *p++ = 0xDB;
2931                 p = doAMode_M(p, fake(7)/*subopcode*/, i->Xin.FpLdSt.addr);
2932                 break;
2933              default:
2934                 vpanic("emitX86Instr(FpLdSt,store)");
2935           }
2936           goto done;
2937        }
2938        break;
2939  
2940     case Xin_FpLdStI:
2941        if (i->Xin.FpLdStI.isLoad) {
2942           /* Load from memory into %fakeN, converting from an int.
2943              --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
2944           */
2945           switch (i->Xin.FpLdStI.sz) {
2946              case 8:  opc = 0xDF; subopc_imm = 5; break;
2947              case 4:  opc = 0xDB; subopc_imm = 0; break;
2948              case 2:  vassert(0); opc = 0xDF; subopc_imm = 0; break;
2949              default: vpanic("emitX86Instr(Xin_FpLdStI-load)");
2950           }
2951           p = do_ffree_st7(p);
2952           *p++ = toUChar(opc);
2953           p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
2954           p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg));
2955           goto done;
2956        } else {
2957           /* Store from %fakeN into memory, converting to an int.
2958              --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
2959  	 */
2960           switch (i->Xin.FpLdStI.sz) {
2961              case 8:  opc = 0xDF; subopc_imm = 7; break;
2962              case 4:  opc = 0xDB; subopc_imm = 3; break;
2963              case 2:  opc = 0xDF; subopc_imm = 3; break;
2964              default: vpanic("emitX86Instr(Xin_FpLdStI-store)");
2965           }
2966           p = do_ffree_st7(p);
2967           p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg));
2968           *p++ = toUChar(opc);
2969           p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
2970           goto done;
2971        }
2972        break;
2973  
2974     case Xin_Fp64to32:
2975        /* ffree %st7 ; fld %st(src) */
2976        p = do_ffree_st7(p);
2977        p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src));
2978        /* subl $4, %esp */
2979        *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04;
2980        /* fstps (%esp) */
2981        *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24;
2982        /* flds (%esp) */
2983        *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24;
2984        /* addl $4, %esp */
2985        *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04;
2986        /* fstp %st(1+dst) */
2987        p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst));
2988        goto done;
2989  
2990     case Xin_FpCMov:
2991        /* jmp fwds if !condition */
2992        *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1));
2993        *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
2994        ptmp = p;
2995  
2996        /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
2997        p = do_ffree_st7(p);
2998        p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src));
2999        p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst));
3000  
3001        /* Fill in the jump offset. */
3002        *(ptmp-1) = toUChar(p - ptmp);
3003        goto done;
3004  
3005     case Xin_FpLdCW:
3006        *p++ = 0xD9;
3007        p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdCW.addr);
3008        goto done;
3009  
3010     case Xin_FpStSW_AX:
3011        /* note, this emits fnstsw %ax, not fstsw %ax */
3012        *p++ = 0xDF;
3013        *p++ = 0xE0;
3014        goto done;
3015  
3016     case Xin_FpCmp:
3017        /* gcmp %fL, %fR, %dst
3018           -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
3019              fnstsw %ax ; movl %eax, %dst
3020        */
3021        /* ffree %st7 */
3022        p = do_ffree_st7(p);
3023        /* fpush %fL */
3024        p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL));
3025        /* fucomp %(fR+1) */
3026        *p++ = 0xDD;
3027        *p++ = toUChar(0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR))));
3028        /* fnstsw %ax */
3029        *p++ = 0xDF;
3030        *p++ = 0xE0;
3031        /*  movl %eax, %dst */
3032        *p++ = 0x89;
3033        p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst);
3034        goto done;
3035  
3036     case Xin_SseConst: {
3037        UShort con = i->Xin.SseConst.con;
3038        p = push_word_from_tags(p, toUShort((con >> 12) & 0xF));
3039        p = push_word_from_tags(p, toUShort((con >> 8) & 0xF));
3040        p = push_word_from_tags(p, toUShort((con >> 4) & 0xF));
3041        p = push_word_from_tags(p, toUShort(con & 0xF));
3042        /* movl (%esp), %xmm-dst */
3043        *p++ = 0x0F;
3044        *p++ = 0x10;
3045        *p++ = toUChar(0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst)));
3046        *p++ = 0x24;
3047        /* addl $16, %esp */
3048        *p++ = 0x83;
3049        *p++ = 0xC4;
3050        *p++ = 0x10;
3051        goto done;
3052     }
3053  
3054     case Xin_SseLdSt:
3055        *p++ = 0x0F;
3056        *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11);
3057        p = doAMode_M(p, fake(vregNo(i->Xin.SseLdSt.reg)), i->Xin.SseLdSt.addr);
3058        goto done;
3059  
3060     case Xin_SseLdzLO:
3061        vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8);
3062        /* movs[sd] amode, %xmm-dst */
3063        *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
3064        *p++ = 0x0F;
3065        *p++ = 0x10;
3066        p = doAMode_M(p, fake(vregNo(i->Xin.SseLdzLO.reg)),
3067                         i->Xin.SseLdzLO.addr);
3068        goto done;
3069  
3070     case Xin_Sse32Fx4:
3071        xtra = 0;
3072        *p++ = 0x0F;
3073        switch (i->Xin.Sse32Fx4.op) {
3074           case Xsse_ADDF:   *p++ = 0x58; break;
3075           case Xsse_DIVF:   *p++ = 0x5E; break;
3076           case Xsse_MAXF:   *p++ = 0x5F; break;
3077           case Xsse_MINF:   *p++ = 0x5D; break;
3078           case Xsse_MULF:   *p++ = 0x59; break;
3079           case Xsse_RCPF:   *p++ = 0x53; break;
3080           case Xsse_RSQRTF: *p++ = 0x52; break;
3081           case Xsse_SQRTF:  *p++ = 0x51; break;
3082           case Xsse_SUBF:   *p++ = 0x5C; break;
3083           case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3084           case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3085           case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3086           case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3087           default: goto bad;
3088        }
3089        p = doAMode_R(p, fake(vregNo(i->Xin.Sse32Fx4.dst)),
3090                         fake(vregNo(i->Xin.Sse32Fx4.src)) );
3091        if (xtra & 0x100)
3092           *p++ = toUChar(xtra & 0xFF);
3093        goto done;
3094  
3095     case Xin_Sse64Fx2:
3096        xtra = 0;
3097        *p++ = 0x66;
3098        *p++ = 0x0F;
3099        switch (i->Xin.Sse64Fx2.op) {
3100           case Xsse_ADDF:   *p++ = 0x58; break;
3101           case Xsse_DIVF:   *p++ = 0x5E; break;
3102           case Xsse_MAXF:   *p++ = 0x5F; break;
3103           case Xsse_MINF:   *p++ = 0x5D; break;
3104           case Xsse_MULF:   *p++ = 0x59; break;
3105           case Xsse_RCPF:   *p++ = 0x53; break;
3106           case Xsse_RSQRTF: *p++ = 0x52; break;
3107           case Xsse_SQRTF:  *p++ = 0x51; break;
3108           case Xsse_SUBF:   *p++ = 0x5C; break;
3109           case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3110           case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3111           case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3112           case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3113           default: goto bad;
3114        }
3115        p = doAMode_R(p, fake(vregNo(i->Xin.Sse64Fx2.dst)),
3116                         fake(vregNo(i->Xin.Sse64Fx2.src)) );
3117        if (xtra & 0x100)
3118           *p++ = toUChar(xtra & 0xFF);
3119        goto done;
3120  
3121     case Xin_Sse32FLo:
3122        xtra = 0;
3123        *p++ = 0xF3;
3124        *p++ = 0x0F;
3125        switch (i->Xin.Sse32FLo.op) {
3126           case Xsse_ADDF:   *p++ = 0x58; break;
3127           case Xsse_DIVF:   *p++ = 0x5E; break;
3128           case Xsse_MAXF:   *p++ = 0x5F; break;
3129           case Xsse_MINF:   *p++ = 0x5D; break;
3130           case Xsse_MULF:   *p++ = 0x59; break;
3131           case Xsse_RCPF:   *p++ = 0x53; break;
3132           case Xsse_RSQRTF: *p++ = 0x52; break;
3133           case Xsse_SQRTF:  *p++ = 0x51; break;
3134           case Xsse_SUBF:   *p++ = 0x5C; break;
3135           case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3136           case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3137           case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3138           case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3139           default: goto bad;
3140        }
3141        p = doAMode_R(p, fake(vregNo(i->Xin.Sse32FLo.dst)),
3142                         fake(vregNo(i->Xin.Sse32FLo.src)) );
3143        if (xtra & 0x100)
3144           *p++ = toUChar(xtra & 0xFF);
3145        goto done;
3146  
3147     case Xin_Sse64FLo:
3148        xtra = 0;
3149        *p++ = 0xF2;
3150        *p++ = 0x0F;
3151        switch (i->Xin.Sse64FLo.op) {
3152           case Xsse_ADDF:   *p++ = 0x58; break;
3153           case Xsse_DIVF:   *p++ = 0x5E; break;
3154           case Xsse_MAXF:   *p++ = 0x5F; break;
3155           case Xsse_MINF:   *p++ = 0x5D; break;
3156           case Xsse_MULF:   *p++ = 0x59; break;
3157           case Xsse_RCPF:   *p++ = 0x53; break;
3158           case Xsse_RSQRTF: *p++ = 0x52; break;
3159           case Xsse_SQRTF:  *p++ = 0x51; break;
3160           case Xsse_SUBF:   *p++ = 0x5C; break;
3161           case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3162           case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3163           case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3164           case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3165           default: goto bad;
3166        }
3167        p = doAMode_R(p, fake(vregNo(i->Xin.Sse64FLo.dst)),
3168                         fake(vregNo(i->Xin.Sse64FLo.src)) );
3169        if (xtra & 0x100)
3170           *p++ = toUChar(xtra & 0xFF);
3171        goto done;
3172  
3173     case Xin_SseReRg:
3174  #     define XX(_n) *p++ = (_n)
3175        switch (i->Xin.SseReRg.op) {
3176           case Xsse_MOV:     /*movups*/ XX(0x0F); XX(0x10); break;
3177           case Xsse_OR:                 XX(0x0F); XX(0x56); break;
3178           case Xsse_XOR:                XX(0x0F); XX(0x57); break;
3179           case Xsse_AND:                XX(0x0F); XX(0x54); break;
3180           case Xsse_PACKSSD:  XX(0x66); XX(0x0F); XX(0x6B); break;
3181           case Xsse_PACKSSW:  XX(0x66); XX(0x0F); XX(0x63); break;
3182           case Xsse_PACKUSW:  XX(0x66); XX(0x0F); XX(0x67); break;
3183           case Xsse_ADD8:     XX(0x66); XX(0x0F); XX(0xFC); break;
3184           case Xsse_ADD16:    XX(0x66); XX(0x0F); XX(0xFD); break;
3185           case Xsse_ADD32:    XX(0x66); XX(0x0F); XX(0xFE); break;
3186           case Xsse_ADD64:    XX(0x66); XX(0x0F); XX(0xD4); break;
3187           case Xsse_QADD8S:   XX(0x66); XX(0x0F); XX(0xEC); break;
3188           case Xsse_QADD16S:  XX(0x66); XX(0x0F); XX(0xED); break;
3189           case Xsse_QADD8U:   XX(0x66); XX(0x0F); XX(0xDC); break;
3190           case Xsse_QADD16U:  XX(0x66); XX(0x0F); XX(0xDD); break;
3191           case Xsse_AVG8U:    XX(0x66); XX(0x0F); XX(0xE0); break;
3192           case Xsse_AVG16U:   XX(0x66); XX(0x0F); XX(0xE3); break;
3193           case Xsse_CMPEQ8:   XX(0x66); XX(0x0F); XX(0x74); break;
3194           case Xsse_CMPEQ16:  XX(0x66); XX(0x0F); XX(0x75); break;
3195           case Xsse_CMPEQ32:  XX(0x66); XX(0x0F); XX(0x76); break;
3196           case Xsse_CMPGT8S:  XX(0x66); XX(0x0F); XX(0x64); break;
3197           case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break;
3198           case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break;
3199           case Xsse_MAX16S:   XX(0x66); XX(0x0F); XX(0xEE); break;
3200           case Xsse_MAX8U:    XX(0x66); XX(0x0F); XX(0xDE); break;
3201           case Xsse_MIN16S:   XX(0x66); XX(0x0F); XX(0xEA); break;
3202           case Xsse_MIN8U:    XX(0x66); XX(0x0F); XX(0xDA); break;
3203           case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break;
3204           case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break;
3205           case Xsse_MUL16:    XX(0x66); XX(0x0F); XX(0xD5); break;
3206           case Xsse_SHL16:    XX(0x66); XX(0x0F); XX(0xF1); break;
3207           case Xsse_SHL32:    XX(0x66); XX(0x0F); XX(0xF2); break;
3208           case Xsse_SHL64:    XX(0x66); XX(0x0F); XX(0xF3); break;
3209           case Xsse_SAR16:    XX(0x66); XX(0x0F); XX(0xE1); break;
3210           case Xsse_SAR32:    XX(0x66); XX(0x0F); XX(0xE2); break;
3211           case Xsse_SHR16:    XX(0x66); XX(0x0F); XX(0xD1); break;
3212           case Xsse_SHR32:    XX(0x66); XX(0x0F); XX(0xD2); break;
3213           case Xsse_SHR64:    XX(0x66); XX(0x0F); XX(0xD3); break;
3214           case Xsse_SUB8:     XX(0x66); XX(0x0F); XX(0xF8); break;
3215           case Xsse_SUB16:    XX(0x66); XX(0x0F); XX(0xF9); break;
3216           case Xsse_SUB32:    XX(0x66); XX(0x0F); XX(0xFA); break;
3217           case Xsse_SUB64:    XX(0x66); XX(0x0F); XX(0xFB); break;
3218           case Xsse_QSUB8S:   XX(0x66); XX(0x0F); XX(0xE8); break;
3219           case Xsse_QSUB16S:  XX(0x66); XX(0x0F); XX(0xE9); break;
3220           case Xsse_QSUB8U:   XX(0x66); XX(0x0F); XX(0xD8); break;
3221           case Xsse_QSUB16U:  XX(0x66); XX(0x0F); XX(0xD9); break;
3222           case Xsse_UNPCKHB:  XX(0x66); XX(0x0F); XX(0x68); break;
3223           case Xsse_UNPCKHW:  XX(0x66); XX(0x0F); XX(0x69); break;
3224           case Xsse_UNPCKHD:  XX(0x66); XX(0x0F); XX(0x6A); break;
3225           case Xsse_UNPCKHQ:  XX(0x66); XX(0x0F); XX(0x6D); break;
3226           case Xsse_UNPCKLB:  XX(0x66); XX(0x0F); XX(0x60); break;
3227           case Xsse_UNPCKLW:  XX(0x66); XX(0x0F); XX(0x61); break;
3228           case Xsse_UNPCKLD:  XX(0x66); XX(0x0F); XX(0x62); break;
3229           case Xsse_UNPCKLQ:  XX(0x66); XX(0x0F); XX(0x6C); break;
3230           default: goto bad;
3231        }
3232        p = doAMode_R(p, fake(vregNo(i->Xin.SseReRg.dst)),
3233                         fake(vregNo(i->Xin.SseReRg.src)) );
3234  #     undef XX
3235        goto done;
3236  
3237     case Xin_SseCMov:
3238        /* jmp fwds if !condition */
3239        *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1));
3240        *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3241        ptmp = p;
3242  
3243        /* movaps %src, %dst */
3244        *p++ = 0x0F;
3245        *p++ = 0x28;
3246        p = doAMode_R(p, fake(vregNo(i->Xin.SseCMov.dst)),
3247                         fake(vregNo(i->Xin.SseCMov.src)) );
3248  
3249        /* Fill in the jump offset. */
3250        *(ptmp-1) = toUChar(p - ptmp);
3251        goto done;
3252  
3253     case Xin_SseShuf:
3254        *p++ = 0x66;
3255        *p++ = 0x0F;
3256        *p++ = 0x70;
3257        p = doAMode_R(p, fake(vregNo(i->Xin.SseShuf.dst)),
3258                         fake(vregNo(i->Xin.SseShuf.src)) );
3259        *p++ = (UChar)(i->Xin.SseShuf.order);
3260        goto done;
3261  
3262     case Xin_EvCheck: {
3263        /* We generate:
3264              (3 bytes)  decl 4(%ebp)    4 == offsetof(host_EvC_COUNTER)
3265              (2 bytes)  jns  nofail     expected taken
3266              (3 bytes)  jmp* 0(%ebp)    0 == offsetof(host_EvC_FAILADDR)
3267              nofail:
3268        */
3269        /* This is heavily asserted re instruction lengths.  It needs to
3270           be.  If we get given unexpected forms of .amCounter or
3271           .amFailAddr -- basically, anything that's not of the form
3272           uimm7(%ebp) -- they are likely to fail. */
3273        /* Note also that after the decl we must be very careful not to
3274           read the carry flag, else we get a partial flags stall.
3275           js/jns avoids that, though. */
3276        UChar* p0 = p;
3277        /* ---  decl 8(%ebp) --- */
3278        /* "fake(1)" because + there's no register in this encoding;
3279           instead the register + field is used as a sub opcode.  The
3280           encoding for "decl r/m32" + is FF /1, hence the fake(1). */
3281        *p++ = 0xFF;
3282        p = doAMode_M(p, fake(1), i->Xin.EvCheck.amCounter);
3283        vassert(p - p0 == 3);
3284        /* --- jns nofail --- */
3285        *p++ = 0x79;
3286        *p++ = 0x03; /* need to check this 0x03 after the next insn */
3287        vassert(p - p0 == 5);
3288        /* --- jmp* 0(%ebp) --- */
3289        /* The encoding is FF /4. */
3290        *p++ = 0xFF;
3291        p = doAMode_M(p, fake(4), i->Xin.EvCheck.amFailAddr);
3292        vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
3293        /* And crosscheck .. */
3294        vassert(evCheckSzB_X86() == 8);
3295        goto done;
3296     }
3297  
3298     case Xin_ProfInc: {
3299        /* We generate   addl $1,NotKnownYet
3300                         adcl $0,NotKnownYet+4
3301           in the expectation that a later call to LibVEX_patchProfCtr
3302           will be used to fill in the immediate fields once the right
3303           value is known.
3304             83 05  00 00 00 00  01
3305             83 15  00 00 00 00  00
3306        */
3307        *p++ = 0x83; *p++ = 0x05;
3308        *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3309        *p++ = 0x01;
3310        *p++ = 0x83; *p++ = 0x15;
3311        *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3312        *p++ = 0x00;
3313        /* Tell the caller .. */
3314        vassert(!(*is_profInc));
3315        *is_profInc = True;
3316        goto done;
3317     }
3318  
3319     default:
3320        goto bad;
3321     }
3322  
3323    bad:
3324     ppX86Instr(i, mode64);
3325     vpanic("emit_X86Instr");
3326     /*NOTREACHED*/
3327  
3328    done:
3329     vassert(p - &buf[0] <= 32);
3330     return p - &buf[0];
3331  
3332  #  undef fake
3333  }
3334  
3335  
3336  /* How big is an event check?  See case for Xin_EvCheck in
3337     emit_X86Instr just above.  That crosschecks what this returns, so
3338     we can tell if we're inconsistent. */
evCheckSzB_X86(void)3339  Int evCheckSzB_X86 ( void )
3340  {
3341     return 8;
3342  }
3343  
3344  
3345  /* NB: what goes on here has to be very closely coordinated with the
3346     emitInstr case for XDirect, above. */
chainXDirect_X86(void * place_to_chain,void * disp_cp_chain_me_EXPECTED,void * place_to_jump_to)3347  VexInvalRange chainXDirect_X86 ( void* place_to_chain,
3348                                   void* disp_cp_chain_me_EXPECTED,
3349                                   void* place_to_jump_to )
3350  {
3351     /* What we're expecting to see is:
3352          movl $disp_cp_chain_me_EXPECTED, %edx
3353          call *%edx
3354        viz
3355          BA <4 bytes value == disp_cp_chain_me_EXPECTED>
3356          FF D2
3357     */
3358     UChar* p = (UChar*)place_to_chain;
3359     vassert(p[0] == 0xBA);
3360     vassert(*(UInt*)(&p[1]) == (UInt)Ptr_to_ULong(disp_cp_chain_me_EXPECTED));
3361     vassert(p[5] == 0xFF);
3362     vassert(p[6] == 0xD2);
3363     /* And what we want to change it to is:
3364            jmp disp32   where disp32 is relative to the next insn
3365            ud2;
3366          viz
3367            E9 <4 bytes == disp32>
3368            0F 0B
3369        The replacement has the same length as the original.
3370     */
3371     /* This is the delta we need to put into a JMP d32 insn.  It's
3372        relative to the start of the next insn, hence the -5.  */
3373     Long delta = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)5;
3374  
3375     /* And make the modifications. */
3376     p[0] = 0xE9;
3377     p[1] = (delta >> 0) & 0xFF;
3378     p[2] = (delta >> 8) & 0xFF;
3379     p[3] = (delta >> 16) & 0xFF;
3380     p[4] = (delta >> 24) & 0xFF;
3381     p[5] = 0x0F; p[6]  = 0x0B;
3382     /* sanity check on the delta -- top 32 are all 0 or all 1 */
3383     delta >>= 32;
3384     vassert(delta == 0LL || delta == -1LL);
3385     VexInvalRange vir = { (HWord)place_to_chain, 7 };
3386     return vir;
3387  }
3388  
3389  
3390  /* NB: what goes on here has to be very closely coordinated with the
3391     emitInstr case for XDirect, above. */
unchainXDirect_X86(void * place_to_unchain,void * place_to_jump_to_EXPECTED,void * disp_cp_chain_me)3392  VexInvalRange unchainXDirect_X86 ( void* place_to_unchain,
3393                                     void* place_to_jump_to_EXPECTED,
3394                                     void* disp_cp_chain_me )
3395  {
3396     /* What we're expecting to see is:
3397            jmp d32
3398            ud2;
3399         viz
3400            E9 <4 bytes == disp32>
3401            0F 0B
3402     */
3403     UChar* p     = (UChar*)place_to_unchain;
3404     Bool   valid = False;
3405     if (p[0] == 0xE9
3406         && p[5]  == 0x0F && p[6]  == 0x0B) {
3407        /* Check the offset is right. */
3408        Int s32 = *(Int*)(&p[1]);
3409        if ((UChar*)p + 5 + s32 == (UChar*)place_to_jump_to_EXPECTED) {
3410           valid = True;
3411           if (0)
3412              vex_printf("QQQ unchainXDirect_X86: found valid\n");
3413        }
3414     }
3415     vassert(valid);
3416     /* And what we want to change it to is:
3417           movl $disp_cp_chain_me, %edx
3418           call *%edx
3419        viz
3420           BA <4 bytes value == disp_cp_chain_me_EXPECTED>
3421           FF D2
3422        So it's the same length (convenient, huh).
3423     */
3424     p[0] = 0xBA;
3425     *(UInt*)(&p[1]) = (UInt)Ptr_to_ULong(disp_cp_chain_me);
3426     p[5] = 0xFF;
3427     p[6] = 0xD2;
3428     VexInvalRange vir = { (HWord)place_to_unchain, 7 };
3429     return vir;
3430  }
3431  
3432  
3433  /* Patch the counter address into a profile inc point, as previously
3434     created by the Xin_ProfInc case for emit_X86Instr. */
patchProfInc_X86(void * place_to_patch,ULong * location_of_counter)3435  VexInvalRange patchProfInc_X86 ( void*  place_to_patch,
3436                                   ULong* location_of_counter )
3437  {
3438     vassert(sizeof(ULong*) == 4);
3439     UChar* p = (UChar*)place_to_patch;
3440     vassert(p[0] == 0x83);
3441     vassert(p[1] == 0x05);
3442     vassert(p[2] == 0x00);
3443     vassert(p[3] == 0x00);
3444     vassert(p[4] == 0x00);
3445     vassert(p[5] == 0x00);
3446     vassert(p[6] == 0x01);
3447     vassert(p[7] == 0x83);
3448     vassert(p[8] == 0x15);
3449     vassert(p[9] == 0x00);
3450     vassert(p[10] == 0x00);
3451     vassert(p[11] == 0x00);
3452     vassert(p[12] == 0x00);
3453     vassert(p[13] == 0x00);
3454     UInt imm32 = (UInt)Ptr_to_ULong(location_of_counter);
3455     p[2] = imm32 & 0xFF; imm32 >>= 8;
3456     p[3] = imm32 & 0xFF; imm32 >>= 8;
3457     p[4] = imm32 & 0xFF; imm32 >>= 8;
3458     p[5] = imm32 & 0xFF; imm32 >>= 8;
3459     imm32 = 4 + (UInt)Ptr_to_ULong(location_of_counter);
3460     p[9]  = imm32 & 0xFF; imm32 >>= 8;
3461     p[10] = imm32 & 0xFF; imm32 >>= 8;
3462     p[11] = imm32 & 0xFF; imm32 >>= 8;
3463     p[12] = imm32 & 0xFF; imm32 >>= 8;
3464     VexInvalRange vir = { (HWord)place_to_patch, 14 };
3465     return vir;
3466  }
3467  
3468  
3469  /*---------------------------------------------------------------*/
3470  /*--- end                                     host_x86_defs.c ---*/
3471  /*---------------------------------------------------------------*/
3472