• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*---------------------------------------------------------------*/
3 /*--- begin                                 host_amd64_isel.c ---*/
4 /*---------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2004-2010 OpenWorks LLP
11       info@open-works.net
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26    02110-1301, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 
30    Neither the names of the U.S. Department of Energy nor the
31    University of California nor the names of its contributors may be
32    used to endorse or promote products derived from this software
33    without prior written permission.
34 */
35 
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39 
40 #include "ir_match.h"
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h"
45 #include "host_generic_simd128.h"
46 #include "host_amd64_defs.h"
47 
48 
49 /*---------------------------------------------------------*/
50 /*--- x87/SSE control word stuff                        ---*/
51 /*---------------------------------------------------------*/
52 
53 /* Vex-generated code expects to run with the FPU set as follows: all
54    exceptions masked, round-to-nearest, precision = 53 bits.  This
55    corresponds to a FPU control word value of 0x027F.
56 
57    Similarly the SSE control word (%mxcsr) should be 0x1F80.
58 
59    %fpucw and %mxcsr should have these values on entry to
60    Vex-generated code, and should those values should be
61    unchanged at exit.
62 */
63 
64 #define DEFAULT_FPUCW 0x027F
65 
66 #define DEFAULT_MXCSR 0x1F80
67 
68 /* debugging only, do not use */
69 /* define DEFAULT_FPUCW 0x037F */
70 
71 
72 /*---------------------------------------------------------*/
73 /*--- misc helpers                                      ---*/
74 /*---------------------------------------------------------*/
75 
76 /* These are duplicated in guest-amd64/toIR.c */
unop(IROp op,IRExpr * a)77 static IRExpr* unop ( IROp op, IRExpr* a )
78 {
79    return IRExpr_Unop(op, a);
80 }
81 
binop(IROp op,IRExpr * a1,IRExpr * a2)82 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
83 {
84    return IRExpr_Binop(op, a1, a2);
85 }
86 
bind(Int binder)87 static IRExpr* bind ( Int binder )
88 {
89    return IRExpr_Binder(binder);
90 }
91 
92 
93 /*---------------------------------------------------------*/
94 /*--- ISelEnv                                           ---*/
95 /*---------------------------------------------------------*/
96 
97 /* This carries around:
98 
99    - A mapping from IRTemp to IRType, giving the type of any IRTemp we
100      might encounter.  This is computed before insn selection starts,
101      and does not change.
102 
103    - A mapping from IRTemp to HReg.  This tells the insn selector
104      which virtual register is associated with each IRTemp
105      temporary.  This is computed before insn selection starts, and
106      does not change.  We expect this mapping to map precisely the
107      same set of IRTemps as the type mapping does.
108 
109         - vregmap   holds the primary register for the IRTemp.
110         - vregmapHI is only used for 128-bit integer-typed
111              IRTemps.  It holds the identity of a second
112              64-bit virtual HReg, which holds the high half
113              of the value.
114 
115    - The code array, that is, the insns selected so far.
116 
117    - A counter, for generating new virtual registers.
118 
119    - The host subarchitecture we are selecting insns for.
120      This is set at the start and does not change.
121 
122    Note, this is all host-independent.  (JRS 20050201: well, kinda
123    ... not completely.  Compare with ISelEnv for X86.)
124 */
125 
126 typedef
127    struct {
128       IRTypeEnv*   type_env;
129 
130       HReg*        vregmap;
131       HReg*        vregmapHI;
132       Int          n_vregmap;
133 
134       HInstrArray* code;
135 
136       Int          vreg_ctr;
137 
138       UInt         hwcaps;
139    }
140    ISelEnv;
141 
142 
lookupIRTemp(ISelEnv * env,IRTemp tmp)143 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
144 {
145    vassert(tmp >= 0);
146    vassert(tmp < env->n_vregmap);
147    return env->vregmap[tmp];
148 }
149 
lookupIRTemp128(HReg * vrHI,HReg * vrLO,ISelEnv * env,IRTemp tmp)150 static void lookupIRTemp128 ( HReg* vrHI, HReg* vrLO,
151                               ISelEnv* env, IRTemp tmp )
152 {
153    vassert(tmp >= 0);
154    vassert(tmp < env->n_vregmap);
155    vassert(env->vregmapHI[tmp] != INVALID_HREG);
156    *vrLO = env->vregmap[tmp];
157    *vrHI = env->vregmapHI[tmp];
158 }
159 
addInstr(ISelEnv * env,AMD64Instr * instr)160 static void addInstr ( ISelEnv* env, AMD64Instr* instr )
161 {
162    addHInstr(env->code, instr);
163    if (vex_traceflags & VEX_TRACE_VCODE) {
164       ppAMD64Instr(instr, True);
165       vex_printf("\n");
166    }
167 }
168 
newVRegI(ISelEnv * env)169 static HReg newVRegI ( ISelEnv* env )
170 {
171    HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
172    env->vreg_ctr++;
173    return reg;
174 }
175 
176 //.. static HReg newVRegF ( ISelEnv* env )
177 //.. {
178 //..    HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
179 //..    env->vreg_ctr++;
180 //..    return reg;
181 //.. }
182 
newVRegV(ISelEnv * env)183 static HReg newVRegV ( ISelEnv* env )
184 {
185    HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
186    env->vreg_ctr++;
187    return reg;
188 }
189 
190 
191 /*---------------------------------------------------------*/
192 /*--- ISEL: Forward declarations                        ---*/
193 /*---------------------------------------------------------*/
194 
195 /* These are organised as iselXXX and iselXXX_wrk pairs.  The
196    iselXXX_wrk do the real work, but are not to be called directly.
197    For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
198    checks that all returned registers are virtual.  You should not
199    call the _wrk version directly.
200 */
201 static AMD64RMI*     iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
202 static AMD64RMI*     iselIntExpr_RMI     ( ISelEnv* env, IRExpr* e );
203 
204 static AMD64RI*      iselIntExpr_RI_wrk  ( ISelEnv* env, IRExpr* e );
205 static AMD64RI*      iselIntExpr_RI      ( ISelEnv* env, IRExpr* e );
206 
207 static AMD64RM*      iselIntExpr_RM_wrk  ( ISelEnv* env, IRExpr* e );
208 static AMD64RM*      iselIntExpr_RM      ( ISelEnv* env, IRExpr* e );
209 
210 static HReg          iselIntExpr_R_wrk   ( ISelEnv* env, IRExpr* e );
211 static HReg          iselIntExpr_R       ( ISelEnv* env, IRExpr* e );
212 
213 static AMD64AMode*   iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
214 static AMD64AMode*   iselIntExpr_AMode     ( ISelEnv* env, IRExpr* e );
215 
216 static void          iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
217                                           ISelEnv* env, IRExpr* e );
218 static void          iselInt128Expr     ( HReg* rHi, HReg* rLo,
219                                           ISelEnv* env, IRExpr* e );
220 
221 static AMD64CondCode iselCondCode_wrk    ( ISelEnv* env, IRExpr* e );
222 static AMD64CondCode iselCondCode        ( ISelEnv* env, IRExpr* e );
223 
224 static HReg          iselDblExpr_wrk     ( ISelEnv* env, IRExpr* e );
225 static HReg          iselDblExpr         ( ISelEnv* env, IRExpr* e );
226 
227 static HReg          iselFltExpr_wrk     ( ISelEnv* env, IRExpr* e );
228 static HReg          iselFltExpr         ( ISelEnv* env, IRExpr* e );
229 
230 static HReg          iselVecExpr_wrk     ( ISelEnv* env, IRExpr* e );
231 static HReg          iselVecExpr         ( ISelEnv* env, IRExpr* e );
232 
233 
234 /*---------------------------------------------------------*/
235 /*--- ISEL: Misc helpers                                ---*/
236 /*---------------------------------------------------------*/
237 
sane_AMode(AMD64AMode * am)238 static Bool sane_AMode ( AMD64AMode* am )
239 {
240    switch (am->tag) {
241       case Aam_IR:
242          return
243             toBool( hregClass(am->Aam.IR.reg) == HRcInt64
244                     && (hregIsVirtual(am->Aam.IR.reg)
245                         || am->Aam.IR.reg == hregAMD64_RBP()) );
246       case Aam_IRRS:
247          return
248             toBool( hregClass(am->Aam.IRRS.base) == HRcInt64
249                     && hregIsVirtual(am->Aam.IRRS.base)
250                     && hregClass(am->Aam.IRRS.index) == HRcInt64
251                     && hregIsVirtual(am->Aam.IRRS.index) );
252       default:
253         vpanic("sane_AMode: unknown amd64 amode tag");
254    }
255 }
256 
257 
258 /* Can the lower 32 bits be signedly widened to produce the whole
259    64-bit value?  In other words, are the top 33 bits either all 0 or
260    all 1 ? */
fitsIn32Bits(ULong x)261 static Bool fitsIn32Bits ( ULong x )
262 {
263    Long y0 = (Long)x;
264    Long y1 = y0;
265    y1 <<= 32;
266    y1 >>=/*s*/ 32;
267    return toBool(x == y1);
268 }
269 
270 /* Is this a 64-bit zero expression? */
271 
isZeroU64(IRExpr * e)272 static Bool isZeroU64 ( IRExpr* e )
273 {
274    return e->tag == Iex_Const
275           && e->Iex.Const.con->tag == Ico_U64
276           && e->Iex.Const.con->Ico.U64 == 0ULL;
277 }
278 
isZeroU32(IRExpr * e)279 static Bool isZeroU32 ( IRExpr* e )
280 {
281    return e->tag == Iex_Const
282           && e->Iex.Const.con->tag == Ico_U32
283           && e->Iex.Const.con->Ico.U32 == 0;
284 }
285 
286 /* Make a int reg-reg move. */
287 
mk_iMOVsd_RR(HReg src,HReg dst)288 static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
289 {
290    vassert(hregClass(src) == HRcInt64);
291    vassert(hregClass(dst) == HRcInt64);
292    return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
293 }
294 
295 /* Make a vector reg-reg move. */
296 
mk_vMOVsd_RR(HReg src,HReg dst)297 static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
298 {
299    vassert(hregClass(src) == HRcVec128);
300    vassert(hregClass(dst) == HRcVec128);
301    return AMD64Instr_SseReRg(Asse_MOV, src, dst);
302 }
303 
304 /* Advance/retreat %rsp by n. */
305 
add_to_rsp(ISelEnv * env,Int n)306 static void add_to_rsp ( ISelEnv* env, Int n )
307 {
308    vassert(n > 0 && n < 256 && (n%8) == 0);
309    addInstr(env,
310             AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n),
311                                         hregAMD64_RSP()));
312 }
313 
sub_from_rsp(ISelEnv * env,Int n)314 static void sub_from_rsp ( ISelEnv* env, Int n )
315 {
316    vassert(n > 0 && n < 256 && (n%8) == 0);
317    addInstr(env,
318             AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n),
319                                         hregAMD64_RSP()));
320 }
321 
322 /* Push 64-bit constants on the stack. */
push_uimm64(ISelEnv * env,ULong uimm64)323 static void push_uimm64( ISelEnv* env, ULong uimm64 )
324 {
325    /* If uimm64 can be expressed as the sign extension of its
326       lower 32 bits, we can do it the easy way. */
327    Long simm64 = (Long)uimm64;
328    if ( simm64 == ((simm64 << 32) >> 32) ) {
329       addInstr( env, AMD64Instr_Push(AMD64RMI_Imm( (UInt)uimm64 )) );
330    } else {
331       HReg tmp = newVRegI(env);
332       addInstr( env, AMD64Instr_Imm64(uimm64, tmp) );
333       addInstr( env, AMD64Instr_Push(AMD64RMI_Reg(tmp)) );
334    }
335 }
336 
337 //.. /* Given an amode, return one which references 4 bytes further
338 //..    along. */
339 //..
340 //.. static X86AMode* advance4 ( X86AMode* am )
341 //.. {
342 //..    X86AMode* am4 = dopyX86AMode(am);
343 //..    switch (am4->tag) {
344 //..       case Xam_IRRS:
345 //..          am4->Xam.IRRS.imm += 4; break;
346 //..       case Xam_IR:
347 //..          am4->Xam.IR.imm += 4; break;
348 //..       default:
349 //..          vpanic("advance4(x86,host)");
350 //..    }
351 //..    return am4;
352 //.. }
353 //..
354 //..
355 //.. /* Push an arg onto the host stack, in preparation for a call to a
356 //..    helper function of some kind.  Returns the number of 32-bit words
357 //..    pushed. */
358 //..
359 //.. static Int pushArg ( ISelEnv* env, IRExpr* arg )
360 //.. {
361 //..    IRType arg_ty = typeOfIRExpr(env->type_env, arg);
362 //..    if (arg_ty == Ity_I32) {
363 //..       addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
364 //..       return 1;
365 //..    } else
366 //..    if (arg_ty == Ity_I64) {
367 //..       HReg rHi, rLo;
368 //..       iselInt64Expr(&rHi, &rLo, env, arg);
369 //..       addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
370 //..       addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
371 //..       return 2;
372 //..    }
373 //..    ppIRExpr(arg);
374 //..    vpanic("pushArg(x86): can't handle arg of this type");
375 //.. }
376 
377 
378 /* Used only in doHelperCall.  If possible, produce a single
379    instruction which computes 'e' into 'dst'.  If not possible, return
380    NULL. */
381 
iselIntExpr_single_instruction(ISelEnv * env,HReg dst,IRExpr * e)382 static AMD64Instr* iselIntExpr_single_instruction ( ISelEnv* env,
383                                                     HReg     dst,
384                                                     IRExpr*  e )
385 {
386    vassert(typeOfIRExpr(env->type_env, e) == Ity_I64);
387 
388    if (e->tag == Iex_Const) {
389       vassert(e->Iex.Const.con->tag == Ico_U64);
390       if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
391          return AMD64Instr_Alu64R(
392                    Aalu_MOV,
393                    AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)),
394                    dst
395                 );
396       } else {
397          return AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, dst);
398       }
399    }
400 
401    if (e->tag == Iex_RdTmp) {
402       HReg src = lookupIRTemp(env, e->Iex.RdTmp.tmp);
403       return mk_iMOVsd_RR(src, dst);
404    }
405 
406    if (e->tag == Iex_Get) {
407       vassert(e->Iex.Get.ty == Ity_I64);
408       return AMD64Instr_Alu64R(
409                 Aalu_MOV,
410                 AMD64RMI_Mem(
411                    AMD64AMode_IR(e->Iex.Get.offset,
412                                  hregAMD64_RBP())),
413                 dst);
414    }
415 
416    if (e->tag == Iex_Unop
417        && e->Iex.Unop.op == Iop_32Uto64
418        && e->Iex.Unop.arg->tag == Iex_RdTmp) {
419       HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
420       return AMD64Instr_MovxLQ(False, src, dst);
421    }
422 
423    if (0) { ppIRExpr(e); vex_printf("\n"); }
424 
425    return NULL;
426 }
427 
428 
429 /* Do a complete function call.  guard is a Ity_Bit expression
430    indicating whether or not the call happens.  If guard==NULL, the
431    call is unconditional. */
432 
433 static
doHelperCall(ISelEnv * env,Bool passBBP,IRExpr * guard,IRCallee * cee,IRExpr ** args)434 void doHelperCall ( ISelEnv* env,
435                     Bool passBBP,
436                     IRExpr* guard, IRCallee* cee, IRExpr** args )
437 {
438    AMD64CondCode cc;
439    HReg          argregs[6];
440    HReg          tmpregs[6];
441    AMD64Instr*   fastinstrs[6];
442    Int           n_args, i, argreg;
443 
444    /* Marshal args for a call and do the call.
445 
446       If passBBP is True, %rbp (the baseblock pointer) is to be passed
447       as the first arg.
448 
449       This function only deals with a tiny set of possibilities, which
450       cover all helpers in practice.  The restrictions are that only
451       arguments in registers are supported, hence only 6x64 integer
452       bits in total can be passed.  In fact the only supported arg
453       type is I64.
454 
455       Generating code which is both efficient and correct when
456       parameters are to be passed in registers is difficult, for the
457       reasons elaborated in detail in comments attached to
458       doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
459       of the method described in those comments.
460 
461       The problem is split into two cases: the fast scheme and the
462       slow scheme.  In the fast scheme, arguments are computed
463       directly into the target (real) registers.  This is only safe
464       when we can be sure that computation of each argument will not
465       trash any real registers set by computation of any other
466       argument.
467 
468       In the slow scheme, all args are first computed into vregs, and
469       once they are all done, they are moved to the relevant real
470       regs.  This always gives correct code, but it also gives a bunch
471       of vreg-to-rreg moves which are usually redundant but are hard
472       for the register allocator to get rid of.
473 
474       To decide which scheme to use, all argument expressions are
475       first examined.  If they are all so simple that it is clear they
476       will be evaluated without use of any fixed registers, use the
477       fast scheme, else use the slow scheme.  Note also that only
478       unconditional calls may use the fast scheme, since having to
479       compute a condition expression could itself trash real
480       registers.
481 
482       Note this requires being able to examine an expression and
483       determine whether or not evaluation of it might use a fixed
484       register.  That requires knowledge of how the rest of this insn
485       selector works.  Currently just the following 3 are regarded as
486       safe -- hopefully they cover the majority of arguments in
487       practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
488    */
489 
490    /* Note that the cee->regparms field is meaningless on AMD64 host
491       (since there is only one calling convention) and so we always
492       ignore it. */
493 
494    n_args = 0;
495    for (i = 0; args[i]; i++)
496       n_args++;
497 
498    if (6 < n_args + (passBBP ? 1 : 0))
499       vpanic("doHelperCall(AMD64): cannot currently handle > 6 args");
500 
501    argregs[0] = hregAMD64_RDI();
502    argregs[1] = hregAMD64_RSI();
503    argregs[2] = hregAMD64_RDX();
504    argregs[3] = hregAMD64_RCX();
505    argregs[4] = hregAMD64_R8();
506    argregs[5] = hregAMD64_R9();
507 
508    tmpregs[0] = tmpregs[1] = tmpregs[2] =
509    tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG;
510 
511    fastinstrs[0] = fastinstrs[1] = fastinstrs[2] =
512    fastinstrs[3] = fastinstrs[4] = fastinstrs[5] = NULL;
513 
514    /* First decide which scheme (slow or fast) is to be used.  First
515       assume the fast scheme, and select slow if any contraindications
516       (wow) appear. */
517 
518    if (guard) {
519       if (guard->tag == Iex_Const
520           && guard->Iex.Const.con->tag == Ico_U1
521           && guard->Iex.Const.con->Ico.U1 == True) {
522          /* unconditional */
523       } else {
524          /* Not manifestly unconditional -- be conservative. */
525          goto slowscheme;
526       }
527    }
528 
529    /* Ok, let's try for the fast scheme.  If it doesn't pan out, we'll
530       use the slow scheme.  Because this is tentative, we can't call
531       addInstr (that is, commit to) any instructions until we're
532       handled all the arguments.  So park the resulting instructions
533       in a buffer and emit that if we're successful. */
534 
535    /* FAST SCHEME */
536    argreg = 0;
537    if (passBBP) {
538       fastinstrs[argreg] = mk_iMOVsd_RR( hregAMD64_RBP(), argregs[argreg]);
539       argreg++;
540    }
541 
542    for (i = 0; i < n_args; i++) {
543       vassert(argreg < 6);
544       vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
545       fastinstrs[argreg]
546          = iselIntExpr_single_instruction( env, argregs[argreg], args[i] );
547       if (fastinstrs[argreg] == NULL)
548          goto slowscheme;
549       argreg++;
550    }
551 
552    /* Looks like we're in luck.  Emit the accumulated instructions and
553       move on to doing the call itself. */
554    vassert(argreg <= 6);
555    for (i = 0; i < argreg; i++)
556       addInstr(env, fastinstrs[i]);
557 
558    /* Fast scheme only applies for unconditional calls.  Hence: */
559    cc = Acc_ALWAYS;
560 
561    goto handle_call;
562 
563 
564    /* SLOW SCHEME; move via temporaries */
565   slowscheme:
566 #if 0
567 if (n_args > 0) {for (i = 0; args[i]; i++) {
568 ppIRExpr(args[i]); vex_printf(" "); }
569 vex_printf("\n");}
570 #endif
571    argreg = 0;
572 
573    if (passBBP) {
574       /* This is pretty stupid; better to move directly to rdi
575          after the rest of the args are done. */
576       tmpregs[argreg] = newVRegI(env);
577       addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[argreg]));
578       argreg++;
579    }
580 
581    for (i = 0; i < n_args; i++) {
582       vassert(argreg < 6);
583       vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
584       tmpregs[argreg] = iselIntExpr_R(env, args[i]);
585       argreg++;
586    }
587 
588    /* Now we can compute the condition.  We can't do it earlier
589       because the argument computations could trash the condition
590       codes.  Be a bit clever to handle the common case where the
591       guard is 1:Bit. */
592    cc = Acc_ALWAYS;
593    if (guard) {
594       if (guard->tag == Iex_Const
595           && guard->Iex.Const.con->tag == Ico_U1
596           && guard->Iex.Const.con->Ico.U1 == True) {
597          /* unconditional -- do nothing */
598       } else {
599          cc = iselCondCode( env, guard );
600       }
601    }
602 
603    /* Move the args to their final destinations. */
604    for (i = 0; i < argreg; i++) {
605       /* None of these insns, including any spill code that might
606          be generated, may alter the condition codes. */
607       addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
608    }
609 
610 
611    /* Finally, the call itself. */
612   handle_call:
613    addInstr(env, AMD64Instr_Call(
614                     cc,
615                     Ptr_to_ULong(cee->addr),
616                     n_args + (passBBP ? 1 : 0)
617                  )
618    );
619 }
620 
621 
622 /* Given a guest-state array descriptor, an index expression and a
623    bias, generate an AMD64AMode holding the relevant guest state
624    offset. */
625 
626 static
genGuestArrayOffset(ISelEnv * env,IRRegArray * descr,IRExpr * off,Int bias)627 AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
628                                   IRExpr* off, Int bias )
629 {
630    HReg tmp, roff;
631    Int  elemSz = sizeofIRType(descr->elemTy);
632    Int  nElems = descr->nElems;
633 
634    /* Throw out any cases not generated by an amd64 front end.  In
635       theory there might be a day where we need to handle them -- if
636       we ever run non-amd64-guest on amd64 host. */
637 
638    if (nElems != 8 || (elemSz != 1 && elemSz != 8))
639       vpanic("genGuestArrayOffset(amd64 host)");
640 
641    /* Compute off into a reg, %off.  Then return:
642 
643          movq %off, %tmp
644          addq $bias, %tmp  (if bias != 0)
645          andq %tmp, 7
646          ... base(%rbp, %tmp, shift) ...
647    */
648    tmp  = newVRegI(env);
649    roff = iselIntExpr_R(env, off);
650    addInstr(env, mk_iMOVsd_RR(roff, tmp));
651    if (bias != 0) {
652       /* Make sure the bias is sane, in the sense that there are
653          no significant bits above bit 30 in it. */
654       vassert(-10000 < bias && bias < 10000);
655       addInstr(env,
656                AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp));
657    }
658    addInstr(env,
659             AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp));
660    vassert(elemSz == 1 || elemSz == 8);
661    return
662       AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp,
663                                     elemSz==8 ? 3 : 0);
664 }
665 
666 
667 /* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */
668 static
set_SSE_rounding_default(ISelEnv * env)669 void set_SSE_rounding_default ( ISelEnv* env )
670 {
671    /* pushq $DEFAULT_MXCSR
672       ldmxcsr 0(%rsp)
673       addq $8, %rsp
674    */
675    AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
676    addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR)));
677    addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
678    add_to_rsp(env, 8);
679 }
680 
681 /* Mess with the FPU's rounding mode: set to the default rounding mode
682    (DEFAULT_FPUCW). */
683 static
set_FPU_rounding_default(ISelEnv * env)684 void set_FPU_rounding_default ( ISelEnv* env )
685 {
686    /* movq $DEFAULT_FPUCW, -8(%rsp)
687       fldcw -8(%esp)
688    */
689    AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
690    addInstr(env, AMD64Instr_Alu64M(
691                     Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp));
692    addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
693 }
694 
695 
696 /* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
697    expression denoting a value in the range 0 .. 3, indicating a round
698    mode encoded as per type IRRoundingMode.  Set the SSE machinery to
699    have the same rounding.
700 */
701 static
set_SSE_rounding_mode(ISelEnv * env,IRExpr * mode)702 void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode )
703 {
704    /* Note: this sequence only makes sense because DEFAULT_MXCSR has
705       both rounding bits == 0.  If that wasn't the case, we couldn't
706       create a new rounding field simply by ORing the new value into
707       place. */
708 
709    /* movq $3, %reg
710       andq [[mode]], %reg  -- shouldn't be needed; paranoia
711       shlq $13, %reg
712       orq $DEFAULT_MXCSR, %reg
713       pushq %reg
714       ldmxcsr 0(%esp)
715       addq $8, %rsp
716    */
717    HReg        reg      = newVRegI(env);
718    AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
719    addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg));
720    addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
721                                    iselIntExpr_RMI(env, mode), reg));
722    addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, reg));
723    addInstr(env, AMD64Instr_Alu64R(
724                     Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg));
725    addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg)));
726    addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
727    add_to_rsp(env, 8);
728 }
729 
730 
731 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
732    expression denoting a value in the range 0 .. 3, indicating a round
733    mode encoded as per type IRRoundingMode.  Set the x87 FPU to have
734    the same rounding.
735 */
736 static
set_FPU_rounding_mode(ISelEnv * env,IRExpr * mode)737 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
738 {
739    HReg rrm  = iselIntExpr_R(env, mode);
740    HReg rrm2 = newVRegI(env);
741    AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
742 
743    /* movq  %rrm, %rrm2
744       andq  $3, %rrm2   -- shouldn't be needed; paranoia
745       shlq  $10, %rrm2
746       orq   $DEFAULT_FPUCW, %rrm2
747       movq  %rrm2, -8(%rsp)
748       fldcw -8(%esp)
749    */
750    addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
751    addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2));
752    addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, rrm2));
753    addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
754                                    AMD64RMI_Imm(DEFAULT_FPUCW), rrm2));
755    addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,
756                                    AMD64RI_Reg(rrm2), m8_rsp));
757    addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
758 }
759 
760 
761 /* Generate all-zeroes into a new vector register.
762 */
generate_zeroes_V128(ISelEnv * env)763 static HReg generate_zeroes_V128 ( ISelEnv* env )
764 {
765    HReg dst = newVRegV(env);
766    addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
767    return dst;
768 }
769 
770 /* Generate all-ones into a new vector register.
771 */
generate_ones_V128(ISelEnv * env)772 static HReg generate_ones_V128 ( ISelEnv* env )
773 {
774    HReg dst = newVRegV(env);
775    addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, dst, dst));
776    return dst;
777 }
778 
779 
780 /* Generate !src into a new vector register.  Amazing that there isn't
781    a less crappy way to do this.
782 */
do_sse_NotV128(ISelEnv * env,HReg src)783 static HReg do_sse_NotV128 ( ISelEnv* env, HReg src )
784 {
785    HReg dst = generate_ones_V128(env);
786    addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst));
787    return dst;
788 }
789 
790 
791 /* Expand the given byte into a 64-bit word, by cloning each bit
792    8 times. */
bitmask8_to_bytemask64(UShort w8)793 static ULong bitmask8_to_bytemask64 ( UShort w8 )
794 {
795    vassert(w8 == (w8 & 0xFF));
796    ULong w64 = 0;
797    Int i;
798    for (i = 0; i < 8; i++) {
799       if (w8 & (1<<i))
800          w64 |= (0xFFULL << (8 * i));
801    }
802    return w64;
803 }
804 
805 
806 //.. /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
807 //..    after most non-simple FPU operations (simple = +, -, *, / and
808 //..    sqrt).
809 //..
810 //..    This could be done a lot more efficiently if needed, by loading
811 //..    zero and adding it to the value to be rounded (fldz ; faddp?).
812 //.. */
813 //.. static void roundToF64 ( ISelEnv* env, HReg reg )
814 //.. {
815 //..    X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
816 //..    sub_from_esp(env, 8);
817 //..    addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
818 //..    addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
819 //..    add_to_esp(env, 8);
820 //.. }
821 
822 
823 /*---------------------------------------------------------*/
824 /*--- ISEL: Integer expressions (64/32/16/8 bit)        ---*/
825 /*---------------------------------------------------------*/
826 
827 /* Select insns for an integer-typed expression, and add them to the
828    code list.  Return a reg holding the result.  This reg will be a
829    virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
830    want to modify it, ask for a new vreg, copy it in there, and modify
831    the copy.  The register allocator will do its best to map both
832    vregs to the same real register, so the copies will often disappear
833    later in the game.
834 
835    This should handle expressions of 64, 32, 16 and 8-bit type.  All
836    results are returned in a 64-bit register.  For 32-, 16- and 8-bit
837    expressions, the upper 32/16/24 bits are arbitrary, so you should
838    mask or sign extend partial values if necessary.
839 */
840 
iselIntExpr_R(ISelEnv * env,IRExpr * e)841 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
842 {
843    HReg r = iselIntExpr_R_wrk(env, e);
844    /* sanity checks ... */
845 #  if 0
846    vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n");
847 #  endif
848    vassert(hregClass(r) == HRcInt64);
849    vassert(hregIsVirtual(r));
850    return r;
851 }
852 
853 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_R_wrk(ISelEnv * env,IRExpr * e)854 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
855 {
856    /* Used for unary/binary SIMD64 ops. */
857    HWord fn = 0;
858    Bool second_is_UInt;
859 
860    MatchInfo mi;
861    DECLARE_PATTERN(p_1Uto8_64to1);
862    DECLARE_PATTERN(p_LDle8_then_8Uto64);
863    DECLARE_PATTERN(p_LDle16_then_16Uto64);
864 
865    IRType ty = typeOfIRExpr(env->type_env,e);
866    vassert(ty == Ity_I32 || Ity_I16 || Ity_I8);
867 
868    switch (e->tag) {
869 
870    /* --------- TEMP --------- */
871    case Iex_RdTmp: {
872       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
873    }
874 
875    /* --------- LOAD --------- */
876    case Iex_Load: {
877       HReg dst = newVRegI(env);
878       AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
879 
880       /* We can't handle big-endian loads, nor load-linked. */
881       if (e->Iex.Load.end != Iend_LE)
882          goto irreducible;
883 
884       if (ty == Ity_I64) {
885          addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
886                                          AMD64RMI_Mem(amode), dst) );
887          return dst;
888       }
889       if (ty == Ity_I32) {
890          addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst));
891          return dst;
892       }
893       if (ty == Ity_I16) {
894          addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
895          return dst;
896       }
897       if (ty == Ity_I8) {
898          addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
899          return dst;
900       }
901       break;
902    }
903 
904    /* --------- BINARY OP --------- */
905    case Iex_Binop: {
906       AMD64AluOp   aluOp;
907       AMD64ShiftOp shOp;
908 
909       /* Pattern: Sub64(0,x) */
910       /*     and: Sub32(0,x) */
911       if ((e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg1))
912           || (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1))) {
913          HReg dst = newVRegI(env);
914          HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
915          addInstr(env, mk_iMOVsd_RR(reg,dst));
916          addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
917          return dst;
918       }
919 
920       /* Is it an addition or logical style op? */
921       switch (e->Iex.Binop.op) {
922          case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
923             aluOp = Aalu_ADD; break;
924          case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
925             aluOp = Aalu_SUB; break;
926          case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
927             aluOp = Aalu_AND; break;
928          case Iop_Or8:  case Iop_Or16:  case Iop_Or32:  case Iop_Or64:
929             aluOp = Aalu_OR; break;
930          case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
931             aluOp = Aalu_XOR; break;
932          case Iop_Mul16: case Iop_Mul32: case Iop_Mul64:
933             aluOp = Aalu_MUL; break;
934          default:
935             aluOp = Aalu_INVALID; break;
936       }
937       /* For commutative ops we assume any literal
938          values are on the second operand. */
939       if (aluOp != Aalu_INVALID) {
940          HReg dst      = newVRegI(env);
941          HReg reg      = iselIntExpr_R(env, e->Iex.Binop.arg1);
942          AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
943          addInstr(env, mk_iMOVsd_RR(reg,dst));
944          addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst));
945          return dst;
946       }
947 
948       /* Perhaps a shift op? */
949       switch (e->Iex.Binop.op) {
950          case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
951             shOp = Ash_SHL; break;
952          case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
953             shOp = Ash_SHR; break;
954          case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
955             shOp = Ash_SAR; break;
956          default:
957             shOp = Ash_INVALID; break;
958       }
959       if (shOp != Ash_INVALID) {
960          HReg dst = newVRegI(env);
961 
962          /* regL = the value to be shifted */
963          HReg regL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
964          addInstr(env, mk_iMOVsd_RR(regL,dst));
965 
966          /* Do any necessary widening for 32/16/8 bit operands */
967          switch (e->Iex.Binop.op) {
968             case Iop_Shr64: case Iop_Shl64: case Iop_Sar64:
969                break;
970             case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
971                break;
972             case Iop_Shr8:
973                addInstr(env, AMD64Instr_Alu64R(
974                                 Aalu_AND, AMD64RMI_Imm(0xFF), dst));
975                break;
976             case Iop_Shr16:
977                addInstr(env, AMD64Instr_Alu64R(
978                                 Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
979                break;
980             case Iop_Shr32:
981                addInstr(env, AMD64Instr_MovxLQ(False, dst, dst));
982                break;
983             case Iop_Sar8:
984                addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
985                addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
986                break;
987             case Iop_Sar16:
988                addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst));
989                addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
990                break;
991             case Iop_Sar32:
992                addInstr(env, AMD64Instr_MovxLQ(True, dst, dst));
993                break;
994             default:
995                ppIROp(e->Iex.Binop.op);
996                vassert(0);
997          }
998 
999          /* Now consider the shift amount.  If it's a literal, we
1000             can do a much better job than the general case. */
1001          if (e->Iex.Binop.arg2->tag == Iex_Const) {
1002             /* assert that the IR is well-typed */
1003             Int nshift;
1004             vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1005             nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1006             vassert(nshift >= 0);
1007             if (nshift > 0)
1008                /* Can't allow nshift==0 since that means %cl */
1009                addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
1010          } else {
1011             /* General case; we have to force the amount into %cl. */
1012             HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1013             addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
1014             addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
1015          }
1016          return dst;
1017       }
1018 
1019       /* Deal with 64-bit SIMD binary ops */
1020       second_is_UInt = False;
1021       switch (e->Iex.Binop.op) {
1022          case Iop_Add8x8:
1023             fn = (HWord)h_generic_calc_Add8x8; break;
1024          case Iop_Add16x4:
1025             fn = (HWord)h_generic_calc_Add16x4; break;
1026          case Iop_Add32x2:
1027             fn = (HWord)h_generic_calc_Add32x2; break;
1028 
1029          case Iop_Avg8Ux8:
1030             fn = (HWord)h_generic_calc_Avg8Ux8; break;
1031          case Iop_Avg16Ux4:
1032             fn = (HWord)h_generic_calc_Avg16Ux4; break;
1033 
1034          case Iop_CmpEQ8x8:
1035             fn = (HWord)h_generic_calc_CmpEQ8x8; break;
1036          case Iop_CmpEQ16x4:
1037             fn = (HWord)h_generic_calc_CmpEQ16x4; break;
1038          case Iop_CmpEQ32x2:
1039             fn = (HWord)h_generic_calc_CmpEQ32x2; break;
1040 
1041          case Iop_CmpGT8Sx8:
1042             fn = (HWord)h_generic_calc_CmpGT8Sx8; break;
1043          case Iop_CmpGT16Sx4:
1044             fn = (HWord)h_generic_calc_CmpGT16Sx4; break;
1045          case Iop_CmpGT32Sx2:
1046             fn = (HWord)h_generic_calc_CmpGT32Sx2; break;
1047 
1048          case Iop_InterleaveHI8x8:
1049             fn = (HWord)h_generic_calc_InterleaveHI8x8; break;
1050          case Iop_InterleaveLO8x8:
1051             fn = (HWord)h_generic_calc_InterleaveLO8x8; break;
1052          case Iop_InterleaveHI16x4:
1053             fn = (HWord)h_generic_calc_InterleaveHI16x4; break;
1054          case Iop_InterleaveLO16x4:
1055             fn = (HWord)h_generic_calc_InterleaveLO16x4; break;
1056          case Iop_InterleaveHI32x2:
1057             fn = (HWord)h_generic_calc_InterleaveHI32x2; break;
1058          case Iop_InterleaveLO32x2:
1059             fn = (HWord)h_generic_calc_InterleaveLO32x2; break;
1060          case Iop_CatOddLanes16x4:
1061             fn = (HWord)h_generic_calc_CatOddLanes16x4; break;
1062          case Iop_CatEvenLanes16x4:
1063             fn = (HWord)h_generic_calc_CatEvenLanes16x4; break;
1064          case Iop_Perm8x8:
1065             fn = (HWord)h_generic_calc_Perm8x8; break;
1066 
1067          case Iop_Max8Ux8:
1068             fn = (HWord)h_generic_calc_Max8Ux8; break;
1069          case Iop_Max16Sx4:
1070             fn = (HWord)h_generic_calc_Max16Sx4; break;
1071          case Iop_Min8Ux8:
1072             fn = (HWord)h_generic_calc_Min8Ux8; break;
1073          case Iop_Min16Sx4:
1074             fn = (HWord)h_generic_calc_Min16Sx4; break;
1075 
1076          case Iop_Mul16x4:
1077             fn = (HWord)h_generic_calc_Mul16x4; break;
1078          case Iop_Mul32x2:
1079             fn = (HWord)h_generic_calc_Mul32x2; break;
1080          case Iop_MulHi16Sx4:
1081             fn = (HWord)h_generic_calc_MulHi16Sx4; break;
1082          case Iop_MulHi16Ux4:
1083             fn = (HWord)h_generic_calc_MulHi16Ux4; break;
1084 
1085          case Iop_QAdd8Sx8:
1086             fn = (HWord)h_generic_calc_QAdd8Sx8; break;
1087          case Iop_QAdd16Sx4:
1088             fn = (HWord)h_generic_calc_QAdd16Sx4; break;
1089          case Iop_QAdd8Ux8:
1090             fn = (HWord)h_generic_calc_QAdd8Ux8; break;
1091          case Iop_QAdd16Ux4:
1092             fn = (HWord)h_generic_calc_QAdd16Ux4; break;
1093 
1094          case Iop_QNarrow32Sx2:
1095             fn = (HWord)h_generic_calc_QNarrow32Sx2; break;
1096          case Iop_QNarrow16Sx4:
1097             fn = (HWord)h_generic_calc_QNarrow16Sx4; break;
1098          case Iop_QNarrow16Ux4:
1099             fn = (HWord)h_generic_calc_QNarrow16Ux4; break;
1100 
1101          case Iop_QSub8Sx8:
1102             fn = (HWord)h_generic_calc_QSub8Sx8; break;
1103          case Iop_QSub16Sx4:
1104             fn = (HWord)h_generic_calc_QSub16Sx4; break;
1105          case Iop_QSub8Ux8:
1106             fn = (HWord)h_generic_calc_QSub8Ux8; break;
1107          case Iop_QSub16Ux4:
1108             fn = (HWord)h_generic_calc_QSub16Ux4; break;
1109 
1110          case Iop_Sub8x8:
1111             fn = (HWord)h_generic_calc_Sub8x8; break;
1112          case Iop_Sub16x4:
1113             fn = (HWord)h_generic_calc_Sub16x4; break;
1114          case Iop_Sub32x2:
1115             fn = (HWord)h_generic_calc_Sub32x2; break;
1116 
1117          case Iop_ShlN32x2:
1118             fn = (HWord)h_generic_calc_ShlN32x2;
1119             second_is_UInt = True;
1120             break;
1121          case Iop_ShlN16x4:
1122             fn = (HWord)h_generic_calc_ShlN16x4;
1123             second_is_UInt = True;
1124             break;
1125          case Iop_ShlN8x8:
1126             fn = (HWord)h_generic_calc_ShlN8x8;
1127             second_is_UInt = True;
1128             break;
1129          case Iop_ShrN32x2:
1130             fn = (HWord)h_generic_calc_ShrN32x2;
1131             second_is_UInt = True;
1132             break;
1133          case Iop_ShrN16x4:
1134             fn = (HWord)h_generic_calc_ShrN16x4;
1135             second_is_UInt = True;
1136             break;
1137          case Iop_SarN32x2:
1138             fn = (HWord)h_generic_calc_SarN32x2;
1139             second_is_UInt = True;
1140             break;
1141          case Iop_SarN16x4:
1142             fn = (HWord)h_generic_calc_SarN16x4;
1143             second_is_UInt = True;
1144             break;
1145          case Iop_SarN8x8:
1146             fn = (HWord)h_generic_calc_SarN8x8;
1147             second_is_UInt = True;
1148             break;
1149 
1150          default:
1151             fn = (HWord)0; break;
1152       }
1153       if (fn != (HWord)0) {
1154          /* Note: the following assumes all helpers are of signature
1155                ULong fn ( ULong, ULong ), and they are
1156             not marked as regparm functions.
1157          */
1158          HReg dst  = newVRegI(env);
1159          HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1160          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1161          if (second_is_UInt)
1162             addInstr(env, AMD64Instr_MovxLQ(False, argR, argR));
1163          addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) );
1164          addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) );
1165          addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2 ));
1166          addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1167          return dst;
1168       }
1169 
1170       /* Handle misc other ops. */
1171 
1172       if (e->Iex.Binop.op == Iop_Max32U) {
1173          /* This generates a truly rotten piece of code.  Just as well
1174             it doesn't happen very often. */
1175          HReg src1  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1176          HReg src1L = newVRegI(env);
1177          HReg src2  = iselIntExpr_R(env, e->Iex.Binop.arg2);
1178          HReg src2L = newVRegI(env);
1179          HReg dst   = newVRegI(env);
1180          addInstr(env, mk_iMOVsd_RR(src1,dst));
1181          addInstr(env, mk_iMOVsd_RR(src1,src1L));
1182          addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, src1L));
1183          addInstr(env, mk_iMOVsd_RR(src2,src2L));
1184          addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, src2L));
1185          addInstr(env, AMD64Instr_Alu64R(Aalu_CMP, AMD64RMI_Reg(src2L), src1L));
1186          addInstr(env, AMD64Instr_CMov64(Acc_B, AMD64RM_Reg(src2), dst));
1187          return dst;
1188       }
1189 
1190       if (e->Iex.Binop.op == Iop_DivModS64to32
1191           || e->Iex.Binop.op == Iop_DivModU64to32) {
1192          /* 64 x 32 -> (32(rem),32(div)) division */
1193          /* Get the 64-bit operand into edx:eax, and the other into
1194             any old R/M. */
1195          HReg      rax     = hregAMD64_RAX();
1196          HReg      rdx     = hregAMD64_RDX();
1197          HReg      dst     = newVRegI(env);
1198          Bool      syned   = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
1199          AMD64RM*  rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
1200          /* Compute the left operand into a reg, and then
1201             put the top half in edx and the bottom in eax. */
1202          HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1203          addInstr(env, mk_iMOVsd_RR(left64, rdx));
1204          addInstr(env, mk_iMOVsd_RR(left64, rax));
1205          addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx));
1206          addInstr(env, AMD64Instr_Div(syned, 4, rmRight));
1207 	 addInstr(env, AMD64Instr_MovxLQ(False, rdx, rdx));
1208 	 addInstr(env, AMD64Instr_MovxLQ(False, rax, rax));
1209          addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx));
1210          addInstr(env, mk_iMOVsd_RR(rax, dst));
1211          addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst));
1212          return dst;
1213       }
1214 
1215       if (e->Iex.Binop.op == Iop_32HLto64) {
1216          HReg hi32  = newVRegI(env);
1217          HReg lo32  = newVRegI(env);
1218          HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1219          HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1220          addInstr(env, mk_iMOVsd_RR(hi32s, hi32));
1221          addInstr(env, mk_iMOVsd_RR(lo32s, lo32));
1222          addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32));
1223 	 addInstr(env, AMD64Instr_MovxLQ(False, lo32, lo32));
1224          addInstr(env, AMD64Instr_Alu64R(
1225                           Aalu_OR, AMD64RMI_Reg(lo32), hi32));
1226          return hi32;
1227       }
1228 
1229       if (e->Iex.Binop.op == Iop_16HLto32) {
1230          HReg hi16  = newVRegI(env);
1231          HReg lo16  = newVRegI(env);
1232          HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1233          HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1234          addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1235          addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
1236          addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, hi16));
1237          addInstr(env, AMD64Instr_Alu64R(
1238                           Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16));
1239          addInstr(env, AMD64Instr_Alu64R(
1240                           Aalu_OR, AMD64RMI_Reg(lo16), hi16));
1241          return hi16;
1242       }
1243 
1244       if (e->Iex.Binop.op == Iop_8HLto16) {
1245          HReg hi8  = newVRegI(env);
1246          HReg lo8  = newVRegI(env);
1247          HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1248          HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1249          addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1250          addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
1251          addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, hi8));
1252          addInstr(env, AMD64Instr_Alu64R(
1253                           Aalu_AND, AMD64RMI_Imm(0xFF), lo8));
1254          addInstr(env, AMD64Instr_Alu64R(
1255                           Aalu_OR, AMD64RMI_Reg(lo8), hi8));
1256          return hi8;
1257       }
1258 
1259       if (e->Iex.Binop.op == Iop_MullS32
1260           || e->Iex.Binop.op == Iop_MullS16
1261           || e->Iex.Binop.op == Iop_MullS8
1262           || e->Iex.Binop.op == Iop_MullU32
1263           || e->Iex.Binop.op == Iop_MullU16
1264           || e->Iex.Binop.op == Iop_MullU8) {
1265          HReg a32   = newVRegI(env);
1266          HReg b32   = newVRegI(env);
1267          HReg a32s  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1268          HReg b32s  = iselIntExpr_R(env, e->Iex.Binop.arg2);
1269          Int          shift  = 0;
1270          AMD64ShiftOp shr_op = Ash_SHR;
1271          switch (e->Iex.Binop.op) {
1272             case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break;
1273             case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break;
1274             case Iop_MullS8:  shr_op = Ash_SAR; shift = 56; break;
1275             case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break;
1276             case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break;
1277             case Iop_MullU8:  shr_op = Ash_SHR; shift = 56; break;
1278             default: vassert(0);
1279          }
1280 
1281          addInstr(env, mk_iMOVsd_RR(a32s, a32));
1282          addInstr(env, mk_iMOVsd_RR(b32s, b32));
1283          addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, a32));
1284          addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, b32));
1285          addInstr(env, AMD64Instr_Sh64(shr_op,  shift, a32));
1286          addInstr(env, AMD64Instr_Sh64(shr_op,  shift, b32));
1287          addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32));
1288          return b32;
1289       }
1290 
1291       if (e->Iex.Binop.op == Iop_CmpF64) {
1292          HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1293          HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1294          HReg dst = newVRegI(env);
1295          addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst));
1296          /* Mask out irrelevant parts of the result so as to conform
1297             to the CmpF64 definition. */
1298          addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst));
1299          return dst;
1300       }
1301 
1302       if (e->Iex.Binop.op == Iop_F64toI32S
1303           || e->Iex.Binop.op == Iop_F64toI64S) {
1304          Int  szD = e->Iex.Binop.op==Iop_F64toI32S ? 4 : 8;
1305          HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
1306          HReg dst = newVRegI(env);
1307          set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
1308          addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst ));
1309          set_SSE_rounding_default(env);
1310          return dst;
1311       }
1312 
1313 //..       if (e->Iex.Binop.op == Iop_F64toI32 || e->Iex.Binop.op == Iop_F64toI16) {
1314 //..          Int  sz  = e->Iex.Binop.op == Iop_F64toI16 ? 2 : 4;
1315 //..          HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
1316 //..          HReg dst = newVRegI(env);
1317 //..
1318 //..          /* Used several times ... */
1319 //..          X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1320 //..
1321 //..          /* rf now holds the value to be converted, and rrm holds the
1322 //.. 	    rounding mode value, encoded as per the IRRoundingMode
1323 //.. 	    enum.  The first thing to do is set the FPU's rounding
1324 //.. 	    mode accordingly. */
1325 //..
1326 //..          /* Create a space for the format conversion. */
1327 //..          /* subl $4, %esp */
1328 //..          sub_from_esp(env, 4);
1329 //..
1330 //.. 	 /* Set host rounding mode */
1331 //.. 	 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1332 //..
1333 //..          /* gistw/l %rf, 0(%esp) */
1334 //..          addInstr(env, X86Instr_FpLdStI(False/*store*/, sz, rf, zero_esp));
1335 //..
1336 //..          if (sz == 2) {
1337 //..             /* movzwl 0(%esp), %dst */
1338 //..             addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1339 //..          } else {
1340 //..             /* movl 0(%esp), %dst */
1341 //..             vassert(sz == 4);
1342 //..             addInstr(env, X86Instr_Alu32R(
1343 //..                              Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1344 //..          }
1345 //..
1346 //.. 	 /* Restore default FPU rounding. */
1347 //..          set_FPU_rounding_default( env );
1348 //..
1349 //..          /* addl $4, %esp */
1350 //.. 	 add_to_esp(env, 4);
1351 //..          return dst;
1352 //..       }
1353 //..
1354 //..       /* C3210 flags following FPU partial remainder (fprem), both
1355 //..          IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1356 //..       if (e->Iex.Binop.op == Iop_PRemC3210F64
1357 //..           || e->Iex.Binop.op == Iop_PRem1C3210F64) {
1358 //..          HReg junk = newVRegF(env);
1359 //..          HReg dst  = newVRegI(env);
1360 //..          HReg srcL = iselDblExpr(env, e->Iex.Binop.arg1);
1361 //..          HReg srcR = iselDblExpr(env, e->Iex.Binop.arg2);
1362 //..          addInstr(env, X86Instr_FpBinary(
1363 //..                            e->Iex.Binop.op==Iop_PRemC3210F64
1364 //..                               ? Xfp_PREM : Xfp_PREM1,
1365 //..                            srcL,srcR,junk
1366 //..                  ));
1367 //..          /* The previous pseudo-insn will have left the FPU's C3210
1368 //..             flags set correctly.  So bag them. */
1369 //..          addInstr(env, X86Instr_FpStSW_AX());
1370 //..          addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1371 //.. 	 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1372 //..          return dst;
1373 //..       }
1374 
1375       break;
1376    }
1377 
1378    /* --------- UNARY OP --------- */
1379    case Iex_Unop: {
1380 
1381       /* 1Uto8(64to1(expr64)) */
1382       {
1383          DEFINE_PATTERN( p_1Uto8_64to1,
1384                          unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) );
1385          if (matchIRExpr(&mi,p_1Uto8_64to1,e)) {
1386             IRExpr* expr64 = mi.bindee[0];
1387             HReg    dst    = newVRegI(env);
1388             HReg    src    = iselIntExpr_R(env, expr64);
1389             addInstr(env, mk_iMOVsd_RR(src,dst) );
1390             addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1391                                             AMD64RMI_Imm(1), dst));
1392             return dst;
1393          }
1394       }
1395 
1396       /* 8Uto64(LDle(expr64)) */
1397       {
1398          DEFINE_PATTERN(p_LDle8_then_8Uto64,
1399                         unop(Iop_8Uto64,
1400                              IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1401          if (matchIRExpr(&mi,p_LDle8_then_8Uto64,e)) {
1402             HReg dst = newVRegI(env);
1403             AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1404             addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
1405             return dst;
1406          }
1407       }
1408 
1409       /* 16Uto64(LDle(expr64)) */
1410       {
1411          DEFINE_PATTERN(p_LDle16_then_16Uto64,
1412                         unop(Iop_16Uto64,
1413                              IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1414          if (matchIRExpr(&mi,p_LDle16_then_16Uto64,e)) {
1415             HReg dst = newVRegI(env);
1416             AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1417             addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
1418             return dst;
1419          }
1420       }
1421 
1422       switch (e->Iex.Unop.op) {
1423          case Iop_32Uto64:
1424          case Iop_32Sto64: {
1425             HReg dst = newVRegI(env);
1426             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1427             addInstr(env, AMD64Instr_MovxLQ(e->Iex.Unop.op == Iop_32Sto64,
1428                                             src, dst) );
1429             return dst;
1430          }
1431          case Iop_128HIto64: {
1432             HReg rHi, rLo;
1433             iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1434             return rHi; /* and abandon rLo */
1435          }
1436          case Iop_128to64: {
1437             HReg rHi, rLo;
1438             iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1439             return rLo; /* and abandon rHi */
1440          }
1441          case Iop_8Uto16:
1442          case Iop_8Uto32:
1443          case Iop_8Uto64:
1444          case Iop_16Uto64:
1445          case Iop_16Uto32: {
1446             HReg dst     = newVRegI(env);
1447             HReg src     = iselIntExpr_R(env, e->Iex.Unop.arg);
1448             Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32
1449                                    || e->Iex.Unop.op==Iop_16Uto64 );
1450             UInt mask    = srcIs16 ? 0xFFFF : 0xFF;
1451             addInstr(env, mk_iMOVsd_RR(src,dst) );
1452             addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1453                                             AMD64RMI_Imm(mask), dst));
1454             return dst;
1455          }
1456          case Iop_8Sto16:
1457          case Iop_8Sto64:
1458          case Iop_8Sto32:
1459          case Iop_16Sto32:
1460          case Iop_16Sto64: {
1461             HReg dst     = newVRegI(env);
1462             HReg src     = iselIntExpr_R(env, e->Iex.Unop.arg);
1463             Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32
1464                                    || e->Iex.Unop.op==Iop_16Sto64 );
1465             UInt amt     = srcIs16 ? 48 : 56;
1466             addInstr(env, mk_iMOVsd_RR(src,dst) );
1467             addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
1468             addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
1469             return dst;
1470          }
1471  	 case Iop_Not8:
1472  	 case Iop_Not16:
1473          case Iop_Not32:
1474          case Iop_Not64: {
1475             HReg dst = newVRegI(env);
1476             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1477             addInstr(env, mk_iMOVsd_RR(src,dst) );
1478             addInstr(env, AMD64Instr_Unary64(Aun_NOT,dst));
1479             return dst;
1480          }
1481 //..          case Iop_64HIto32: {
1482 //..             HReg rHi, rLo;
1483 //..             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1484 //..             return rHi; /* and abandon rLo .. poor wee thing :-) */
1485 //..          }
1486 //..          case Iop_64to32: {
1487 //..             HReg rHi, rLo;
1488 //..             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1489 //..             return rLo; /* similar stupid comment to the above ... */
1490 //..          }
1491          case Iop_16HIto8:
1492          case Iop_32HIto16:
1493          case Iop_64HIto32: {
1494             HReg dst  = newVRegI(env);
1495             HReg src  = iselIntExpr_R(env, e->Iex.Unop.arg);
1496             Int shift = 0;
1497             switch (e->Iex.Unop.op) {
1498                case Iop_16HIto8:  shift = 8;  break;
1499                case Iop_32HIto16: shift = 16; break;
1500                case Iop_64HIto32: shift = 32; break;
1501                default: vassert(0);
1502             }
1503             addInstr(env, mk_iMOVsd_RR(src,dst) );
1504             addInstr(env, AMD64Instr_Sh64(Ash_SHR, shift, dst));
1505             return dst;
1506          }
1507          case Iop_1Uto64:
1508          case Iop_1Uto32:
1509          case Iop_1Uto8: {
1510             HReg dst           = newVRegI(env);
1511             AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1512             addInstr(env, AMD64Instr_Set64(cond,dst));
1513             return dst;
1514          }
1515          case Iop_1Sto8:
1516          case Iop_1Sto16:
1517          case Iop_1Sto32:
1518          case Iop_1Sto64: {
1519             /* could do better than this, but for now ... */
1520             HReg dst           = newVRegI(env);
1521             AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1522             addInstr(env, AMD64Instr_Set64(cond,dst));
1523             addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, dst));
1524             addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1525             return dst;
1526          }
1527          case Iop_Ctz64: {
1528             /* Count trailing zeroes, implemented by amd64 'bsfq' */
1529             HReg dst = newVRegI(env);
1530             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1531             addInstr(env, AMD64Instr_Bsfr64(True,src,dst));
1532             return dst;
1533          }
1534          case Iop_Clz64: {
1535             /* Count leading zeroes.  Do 'bsrq' to establish the index
1536                of the highest set bit, and subtract that value from
1537                63. */
1538             HReg tmp = newVRegI(env);
1539             HReg dst = newVRegI(env);
1540             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1541             addInstr(env, AMD64Instr_Bsfr64(False,src,tmp));
1542             addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
1543                                             AMD64RMI_Imm(63), dst));
1544             addInstr(env, AMD64Instr_Alu64R(Aalu_SUB,
1545                                             AMD64RMI_Reg(tmp), dst));
1546             return dst;
1547          }
1548 
1549          case Iop_CmpwNEZ64: {
1550             HReg dst = newVRegI(env);
1551             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1552             addInstr(env, mk_iMOVsd_RR(src,dst));
1553             addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
1554             addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1555                                             AMD64RMI_Reg(src), dst));
1556             addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1557             return dst;
1558          }
1559 
1560          case Iop_CmpwNEZ32: {
1561             HReg src = newVRegI(env);
1562             HReg dst = newVRegI(env);
1563             HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
1564             addInstr(env, mk_iMOVsd_RR(pre,src));
1565             addInstr(env, AMD64Instr_MovxLQ(False, src, src));
1566             addInstr(env, mk_iMOVsd_RR(src,dst));
1567             addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
1568             addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1569                                             AMD64RMI_Reg(src), dst));
1570             addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1571             return dst;
1572          }
1573 
1574          case Iop_Left8:
1575          case Iop_Left16:
1576          case Iop_Left32:
1577          case Iop_Left64: {
1578             HReg dst = newVRegI(env);
1579             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1580             addInstr(env, mk_iMOVsd_RR(src, dst));
1581             addInstr(env, AMD64Instr_Unary64(Aun_NEG, dst));
1582             addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(src), dst));
1583             return dst;
1584          }
1585 
1586          case Iop_V128to32: {
1587             HReg        dst     = newVRegI(env);
1588             HReg        vec     = iselVecExpr(env, e->Iex.Unop.arg);
1589             AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
1590             addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16));
1591             addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst));
1592             return dst;
1593          }
1594 
1595          /* V128{HI}to64 */
1596          case Iop_V128HIto64:
1597          case Iop_V128to64: {
1598             Int  off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
1599             HReg dst = newVRegI(env);
1600             HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1601             AMD64AMode* rsp0 = AMD64AMode_IR(0,   hregAMD64_RSP());
1602             AMD64AMode* rspN = AMD64AMode_IR(off, hregAMD64_RSP());
1603             sub_from_rsp(env, 16);
1604             addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp0));
1605             addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1606                                              AMD64RMI_Mem(rspN), dst ));
1607             add_to_rsp(env, 16);
1608             return dst;
1609          }
1610 
1611          /* ReinterpF64asI64(e) */
1612          /* Given an IEEE754 double, produce an I64 with the same bit
1613             pattern. */
1614          case Iop_ReinterpF64asI64: {
1615             AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1616             HReg        dst    = newVRegI(env);
1617             HReg        src    = iselDblExpr(env, e->Iex.Unop.arg);
1618             /* paranoia */
1619             set_SSE_rounding_default(env);
1620             addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp));
1621             addInstr(env, AMD64Instr_Alu64R(
1622                              Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst));
1623             return dst;
1624          }
1625 
1626          /* ReinterpF32asI32(e) */
1627          /* Given an IEEE754 single, produce an I64 with the same bit
1628             pattern in the lower half. */
1629          case Iop_ReinterpF32asI32: {
1630             AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1631             HReg        dst    = newVRegI(env);
1632             HReg        src    = iselFltExpr(env, e->Iex.Unop.arg);
1633             /* paranoia */
1634             set_SSE_rounding_default(env);
1635             addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, src, m8_rsp));
1636             addInstr(env, AMD64Instr_LoadEX(4, False/*unsigned*/, m8_rsp, dst ));
1637             return dst;
1638          }
1639 
1640          case Iop_16to8:
1641          case Iop_32to8:
1642          case Iop_64to8:
1643          case Iop_32to16:
1644          case Iop_64to16:
1645          case Iop_64to32:
1646             /* These are no-ops. */
1647             return iselIntExpr_R(env, e->Iex.Unop.arg);
1648 
1649          default:
1650             break;
1651       }
1652 
1653       /* Deal with unary 64-bit SIMD ops. */
1654       switch (e->Iex.Unop.op) {
1655          case Iop_CmpNEZ32x2:
1656             fn = (HWord)h_generic_calc_CmpNEZ32x2; break;
1657          case Iop_CmpNEZ16x4:
1658             fn = (HWord)h_generic_calc_CmpNEZ16x4; break;
1659          case Iop_CmpNEZ8x8:
1660             fn = (HWord)h_generic_calc_CmpNEZ8x8; break;
1661          default:
1662             fn = (HWord)0; break;
1663       }
1664       if (fn != (HWord)0) {
1665          /* Note: the following assumes all helpers are of
1666             signature
1667                ULong fn ( ULong ), and they are
1668             not marked as regparm functions.
1669          */
1670          HReg dst = newVRegI(env);
1671          HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1672          addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
1673          addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1 ));
1674          addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1675          return dst;
1676       }
1677 
1678       break;
1679    }
1680 
1681    /* --------- GET --------- */
1682    case Iex_Get: {
1683       if (ty == Ity_I64) {
1684          HReg dst = newVRegI(env);
1685          addInstr(env, AMD64Instr_Alu64R(
1686                           Aalu_MOV,
1687                           AMD64RMI_Mem(
1688                              AMD64AMode_IR(e->Iex.Get.offset,
1689                                            hregAMD64_RBP())),
1690                           dst));
1691          return dst;
1692       }
1693       if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
1694          HReg dst = newVRegI(env);
1695          addInstr(env, AMD64Instr_LoadEX(
1696                           toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
1697                           False,
1698                           AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()),
1699                           dst));
1700          return dst;
1701       }
1702       break;
1703    }
1704 
1705    case Iex_GetI: {
1706       AMD64AMode* am
1707          = genGuestArrayOffset(
1708               env, e->Iex.GetI.descr,
1709                    e->Iex.GetI.ix, e->Iex.GetI.bias );
1710       HReg dst = newVRegI(env);
1711       if (ty == Ity_I8) {
1712          addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst ));
1713          return dst;
1714       }
1715       if (ty == Ity_I64) {
1716          addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst ));
1717          return dst;
1718       }
1719       break;
1720    }
1721 
1722    /* --------- CCALL --------- */
1723    case Iex_CCall: {
1724       HReg    dst = newVRegI(env);
1725       vassert(ty == e->Iex.CCall.retty);
1726 
1727       /* be very restrictive for now.  Only 64-bit ints allowed
1728          for args, and 64 or 32 bits for return type. */
1729       if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32)
1730          goto irreducible;
1731 
1732       /* Marshal args, do the call. */
1733       doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1734 
1735       /* Move to dst, and zero out the top 32 bits if the result type is
1736          Ity_I32.  Probably overkill, but still .. */
1737       if (e->Iex.CCall.retty == Ity_I64)
1738          addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1739       else
1740          addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
1741 
1742       return dst;
1743    }
1744 
1745    /* --------- LITERAL --------- */
1746    /* 64/32/16/8-bit literals */
1747    case Iex_Const:
1748       if (ty == Ity_I64) {
1749          HReg r = newVRegI(env);
1750          addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r));
1751          return r;
1752       } else {
1753          AMD64RMI* rmi = iselIntExpr_RMI ( env, e );
1754          HReg      r   = newVRegI(env);
1755          addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r));
1756          return r;
1757       }
1758 
1759    /* --------- MULTIPLEX --------- */
1760    case Iex_Mux0X: {
1761      if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1762          && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
1763         HReg     r8;
1764         HReg     rX  = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1765         AMD64RM* r0  = iselIntExpr_RM(env, e->Iex.Mux0X.expr0);
1766         HReg dst = newVRegI(env);
1767         addInstr(env, mk_iMOVsd_RR(rX,dst));
1768         r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
1769         addInstr(env, AMD64Instr_Test64(0xFF, r8));
1770         addInstr(env, AMD64Instr_CMov64(Acc_Z,r0,dst));
1771         return dst;
1772       }
1773       break;
1774    }
1775 
1776    /* --------- TERNARY OP --------- */
1777    case Iex_Triop: {
1778       /* C3210 flags following FPU partial remainder (fprem), both
1779          IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1780       if (e->Iex.Triop.op == Iop_PRemC3210F64
1781           || e->Iex.Triop.op == Iop_PRem1C3210F64) {
1782          AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1783          HReg        arg1   = iselDblExpr(env, e->Iex.Triop.arg2);
1784          HReg        arg2   = iselDblExpr(env, e->Iex.Triop.arg3);
1785          HReg        dst    = newVRegI(env);
1786          addInstr(env, AMD64Instr_A87Free(2));
1787 
1788          /* one arg -> top of x87 stack */
1789          addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp));
1790          addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
1791 
1792          /* other arg -> top of x87 stack */
1793          addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp));
1794          addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
1795 
1796          switch (e->Iex.Triop.op) {
1797             case Iop_PRemC3210F64:
1798                addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
1799                break;
1800             case Iop_PRem1C3210F64:
1801                addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
1802                break;
1803             default:
1804                vassert(0);
1805          }
1806          /* Ignore the result, and instead make off with the FPU's
1807 	    C3210 flags (in the status word). */
1808          addInstr(env, AMD64Instr_A87StSW(m8_rsp));
1809          addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),dst));
1810          addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),dst));
1811          return dst;
1812       }
1813       break;
1814    }
1815 
1816    default:
1817    break;
1818    } /* switch (e->tag) */
1819 
1820    /* We get here if no pattern matched. */
1821   irreducible:
1822    ppIRExpr(e);
1823    vpanic("iselIntExpr_R(amd64): cannot reduce tree");
1824 }
1825 
1826 
1827 /*---------------------------------------------------------*/
1828 /*--- ISEL: Integer expression auxiliaries              ---*/
1829 /*---------------------------------------------------------*/
1830 
1831 /* --------------------- AMODEs --------------------- */
1832 
1833 /* Return an AMode which computes the value of the specified
1834    expression, possibly also adding insns to the code list as a
1835    result.  The expression may only be a 32-bit one.
1836 */
1837 
iselIntExpr_AMode(ISelEnv * env,IRExpr * e)1838 static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1839 {
1840    AMD64AMode* am = iselIntExpr_AMode_wrk(env, e);
1841    vassert(sane_AMode(am));
1842    return am;
1843 }
1844 
1845 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_AMode_wrk(ISelEnv * env,IRExpr * e)1846 static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1847 {
1848    MatchInfo mi;
1849    DECLARE_PATTERN(p_complex);
1850    IRType ty = typeOfIRExpr(env->type_env,e);
1851    vassert(ty == Ity_I64);
1852 
1853    /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */
1854    /*              bind0        bind1  bind2   bind3   */
1855    DEFINE_PATTERN(p_complex,
1856       binop( Iop_Add64,
1857              binop( Iop_Add64,
1858                     bind(0),
1859                     binop(Iop_Shl64, bind(1), bind(2))
1860                   ),
1861              bind(3)
1862            )
1863    );
1864    if (matchIRExpr(&mi, p_complex, e)) {
1865       IRExpr* expr1  = mi.bindee[0];
1866       IRExpr* expr2  = mi.bindee[1];
1867       IRExpr* imm8   = mi.bindee[2];
1868       IRExpr* simm32 = mi.bindee[3];
1869       if (imm8->tag == Iex_Const
1870           && imm8->Iex.Const.con->tag == Ico_U8
1871           && imm8->Iex.Const.con->Ico.U8 < 4
1872           /* imm8 is OK, now check simm32 */
1873           && simm32->tag == Iex_Const
1874           && simm32->Iex.Const.con->tag == Ico_U64
1875           && fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) {
1876          UInt shift = imm8->Iex.Const.con->Ico.U8;
1877          UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64);
1878          HReg r1 = iselIntExpr_R(env, expr1);
1879          HReg r2 = iselIntExpr_R(env, expr2);
1880          vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3);
1881          return AMD64AMode_IRRS(offset, r1, r2, shift);
1882       }
1883    }
1884 
1885    /* Add64(expr1, Shl64(expr2, imm)) */
1886    if (e->tag == Iex_Binop
1887        && e->Iex.Binop.op == Iop_Add64
1888        && e->Iex.Binop.arg2->tag == Iex_Binop
1889        && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64
1890        && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1891        && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1892       UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1893       if (shift == 1 || shift == 2 || shift == 3) {
1894          HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1895          HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1896          return AMD64AMode_IRRS(0, r1, r2, shift);
1897       }
1898    }
1899 
1900    /* Add64(expr,i) */
1901    if (e->tag == Iex_Binop
1902        && e->Iex.Binop.op == Iop_Add64
1903        && e->Iex.Binop.arg2->tag == Iex_Const
1904        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
1905        && fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) {
1906       HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1907       return AMD64AMode_IR(
1908                 toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64),
1909                 r1
1910              );
1911    }
1912 
1913    /* Doesn't match anything in particular.  Generate it into
1914       a register and use that. */
1915    {
1916       HReg r1 = iselIntExpr_R(env, e);
1917       return AMD64AMode_IR(0, r1);
1918    }
1919 }
1920 
1921 
1922 /* --------------------- RMIs --------------------- */
1923 
1924 /* Similarly, calculate an expression into an X86RMI operand.  As with
1925    iselIntExpr_R, the expression can have type 32, 16 or 8 bits.  */
1926 
iselIntExpr_RMI(ISelEnv * env,IRExpr * e)1927 static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
1928 {
1929    AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1930    /* sanity checks ... */
1931    switch (rmi->tag) {
1932       case Armi_Imm:
1933          return rmi;
1934       case Armi_Reg:
1935          vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64);
1936          vassert(hregIsVirtual(rmi->Armi.Reg.reg));
1937          return rmi;
1938       case Armi_Mem:
1939          vassert(sane_AMode(rmi->Armi.Mem.am));
1940          return rmi;
1941       default:
1942          vpanic("iselIntExpr_RMI: unknown amd64 RMI tag");
1943    }
1944 }
1945 
1946 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RMI_wrk(ISelEnv * env,IRExpr * e)1947 static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
1948 {
1949    IRType ty = typeOfIRExpr(env->type_env,e);
1950    vassert(ty == Ity_I64 || ty == Ity_I32
1951            || ty == Ity_I16 || ty == Ity_I8);
1952 
1953    /* special case: immediate 64/32/16/8 */
1954    if (e->tag == Iex_Const) {
1955       switch (e->Iex.Const.con->tag) {
1956         case Ico_U64:
1957            if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
1958               return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
1959            }
1960            break;
1961          case Ico_U32:
1962             return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break;
1963          case Ico_U16:
1964             return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break;
1965          case Ico_U8:
1966             return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break;
1967          default:
1968             vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
1969       }
1970    }
1971 
1972    /* special case: 64-bit GET */
1973    if (e->tag == Iex_Get && ty == Ity_I64) {
1974       return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset,
1975                                         hregAMD64_RBP()));
1976    }
1977 
1978    /* special case: 64-bit load from memory */
1979    if (e->tag == Iex_Load && ty == Ity_I64
1980        && e->Iex.Load.end == Iend_LE) {
1981       AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
1982       return AMD64RMI_Mem(am);
1983    }
1984 
1985    /* default case: calculate into a register and return that */
1986    {
1987       HReg r = iselIntExpr_R ( env, e );
1988       return AMD64RMI_Reg(r);
1989    }
1990 }
1991 
1992 
1993 /* --------------------- RIs --------------------- */
1994 
1995 /* Calculate an expression into an AMD64RI operand.  As with
1996    iselIntExpr_R, the expression can have type 64, 32, 16 or 8
1997    bits. */
1998 
iselIntExpr_RI(ISelEnv * env,IRExpr * e)1999 static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
2000 {
2001    AMD64RI* ri = iselIntExpr_RI_wrk(env, e);
2002    /* sanity checks ... */
2003    switch (ri->tag) {
2004       case Ari_Imm:
2005          return ri;
2006       case Ari_Reg:
2007          vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64);
2008          vassert(hregIsVirtual(ri->Ari.Reg.reg));
2009          return ri;
2010       default:
2011          vpanic("iselIntExpr_RI: unknown amd64 RI tag");
2012    }
2013 }
2014 
2015 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI_wrk(ISelEnv * env,IRExpr * e)2016 static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
2017 {
2018    IRType ty = typeOfIRExpr(env->type_env,e);
2019    vassert(ty == Ity_I64 || ty == Ity_I32
2020            || ty == Ity_I16 || ty == Ity_I8);
2021 
2022    /* special case: immediate */
2023    if (e->tag == Iex_Const) {
2024       switch (e->Iex.Const.con->tag) {
2025         case Ico_U64:
2026            if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
2027               return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
2028            }
2029            break;
2030          case Ico_U32:
2031             return AMD64RI_Imm(e->Iex.Const.con->Ico.U32);
2032          case Ico_U16:
2033             return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16);
2034          case Ico_U8:
2035             return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8);
2036          default:
2037             vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
2038       }
2039    }
2040 
2041    /* default case: calculate into a register and return that */
2042    {
2043       HReg r = iselIntExpr_R ( env, e );
2044       return AMD64RI_Reg(r);
2045    }
2046 }
2047 
2048 
2049 /* --------------------- RMs --------------------- */
2050 
2051 /* Similarly, calculate an expression into an AMD64RM operand.  As
2052    with iselIntExpr_R, the expression can have type 64, 32, 16 or 8
2053    bits.  */
2054 
iselIntExpr_RM(ISelEnv * env,IRExpr * e)2055 static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
2056 {
2057    AMD64RM* rm = iselIntExpr_RM_wrk(env, e);
2058    /* sanity checks ... */
2059    switch (rm->tag) {
2060       case Arm_Reg:
2061          vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64);
2062          vassert(hregIsVirtual(rm->Arm.Reg.reg));
2063          return rm;
2064       case Arm_Mem:
2065          vassert(sane_AMode(rm->Arm.Mem.am));
2066          return rm;
2067       default:
2068          vpanic("iselIntExpr_RM: unknown amd64 RM tag");
2069    }
2070 }
2071 
2072 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RM_wrk(ISelEnv * env,IRExpr * e)2073 static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
2074 {
2075    IRType ty = typeOfIRExpr(env->type_env,e);
2076    vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
2077 
2078    /* special case: 64-bit GET */
2079    if (e->tag == Iex_Get && ty == Ity_I64) {
2080       return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset,
2081                                        hregAMD64_RBP()));
2082    }
2083 
2084    /* special case: load from memory */
2085 
2086    /* default case: calculate into a register and return that */
2087    {
2088       HReg r = iselIntExpr_R ( env, e );
2089       return AMD64RM_Reg(r);
2090    }
2091 }
2092 
2093 
2094 /* --------------------- CONDCODE --------------------- */
2095 
2096 /* Generate code to evaluated a bit-typed expression, returning the
2097    condition code which would correspond when the expression would
2098    notionally have returned 1. */
2099 
iselCondCode(ISelEnv * env,IRExpr * e)2100 static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
2101 {
2102    /* Uh, there's nothing we can sanity check here, unfortunately. */
2103    return iselCondCode_wrk(env,e);
2104 }
2105 
2106 /* DO NOT CALL THIS DIRECTLY ! */
iselCondCode_wrk(ISelEnv * env,IRExpr * e)2107 static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
2108 {
2109    MatchInfo mi;
2110 
2111    vassert(e);
2112    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
2113 
2114    /* var */
2115    if (e->tag == Iex_RdTmp) {
2116       HReg r64 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
2117       HReg dst = newVRegI(env);
2118       addInstr(env, mk_iMOVsd_RR(r64,dst));
2119       addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst));
2120       return Acc_NZ;
2121    }
2122 
2123    /* Constant 1:Bit */
2124    if (e->tag == Iex_Const) {
2125       HReg r;
2126       vassert(e->Iex.Const.con->tag == Ico_U1);
2127       vassert(e->Iex.Const.con->Ico.U1 == True
2128               || e->Iex.Const.con->Ico.U1 == False);
2129       r = newVRegI(env);
2130       addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Imm(0),r));
2131       addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,AMD64RMI_Reg(r),r));
2132       return e->Iex.Const.con->Ico.U1 ? Acc_Z : Acc_NZ;
2133    }
2134 
2135    /* Not1(...) */
2136    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
2137       /* Generate code for the arg, and negate the test condition */
2138       return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
2139    }
2140 
2141    /* --- patterns rooted at: 64to1 --- */
2142 
2143    /* 64to1 */
2144    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) {
2145       HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
2146       addInstr(env, AMD64Instr_Test64(1,reg));
2147       return Acc_NZ;
2148    }
2149 
2150    /* --- patterns rooted at: CmpNEZ8 --- */
2151 
2152    /* CmpNEZ8(x) */
2153    if (e->tag == Iex_Unop
2154        && e->Iex.Unop.op == Iop_CmpNEZ8) {
2155       HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
2156       addInstr(env, AMD64Instr_Test64(0xFF,r));
2157       return Acc_NZ;
2158    }
2159 
2160    /* --- patterns rooted at: CmpNEZ16 --- */
2161 
2162    /* CmpNEZ16(x) */
2163    if (e->tag == Iex_Unop
2164        && e->Iex.Unop.op == Iop_CmpNEZ16) {
2165       HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
2166       addInstr(env, AMD64Instr_Test64(0xFFFF,r));
2167       return Acc_NZ;
2168    }
2169 
2170    /* --- patterns rooted at: CmpNEZ32 --- */
2171 
2172    /* CmpNEZ32(x) */
2173    if (e->tag == Iex_Unop
2174        && e->Iex.Unop.op == Iop_CmpNEZ32) {
2175       HReg      r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
2176       HReg      tmp  = newVRegI(env);
2177       AMD64RMI* rmi2 = AMD64RMI_Imm(0);
2178       addInstr(env, AMD64Instr_MovxLQ(False, r1, tmp));
2179       addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,tmp));
2180       return Acc_NZ;
2181    }
2182 
2183    /* --- patterns rooted at: CmpNEZ64 --- */
2184 
2185    /* CmpNEZ64(Or64(x,y)) */
2186    {
2187       DECLARE_PATTERN(p_CmpNEZ64_Or64);
2188       DEFINE_PATTERN(p_CmpNEZ64_Or64,
2189                      unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
2190       if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
2191          HReg      r0   = iselIntExpr_R(env, mi.bindee[0]);
2192          AMD64RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
2193          HReg      tmp  = newVRegI(env);
2194          addInstr(env, mk_iMOVsd_RR(r0, tmp));
2195          addInstr(env, AMD64Instr_Alu64R(Aalu_OR,rmi1,tmp));
2196          return Acc_NZ;
2197       }
2198    }
2199 
2200    /* CmpNEZ64(x) */
2201    if (e->tag == Iex_Unop
2202        && e->Iex.Unop.op == Iop_CmpNEZ64) {
2203       HReg      r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
2204       AMD64RMI* rmi2 = AMD64RMI_Imm(0);
2205       addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2206       return Acc_NZ;
2207    }
2208 
2209    /* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */
2210 
2211    /* CmpEQ8 / CmpNE8 */
2212    if (e->tag == Iex_Binop
2213        && (e->Iex.Binop.op == Iop_CmpEQ8
2214            || e->Iex.Binop.op == Iop_CmpNE8
2215            || e->Iex.Binop.op == Iop_CasCmpEQ8
2216            || e->Iex.Binop.op == Iop_CasCmpNE8)) {
2217       HReg      r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
2218       AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2219       HReg      r    = newVRegI(env);
2220       addInstr(env, mk_iMOVsd_RR(r1,r));
2221       addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2222       addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r));
2223       switch (e->Iex.Binop.op) {
2224          case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
2225          case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
2226          default: vpanic("iselCondCode(amd64): CmpXX8");
2227       }
2228    }
2229 
2230    /* CmpEQ16 / CmpNE16 */
2231    if (e->tag == Iex_Binop
2232        && (e->Iex.Binop.op == Iop_CmpEQ16
2233            || e->Iex.Binop.op == Iop_CmpNE16
2234            || e->Iex.Binop.op == Iop_CasCmpEQ16
2235            || e->Iex.Binop.op == Iop_CasCmpNE16)) {
2236       HReg      r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
2237       AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2238       HReg      r    = newVRegI(env);
2239       addInstr(env, mk_iMOVsd_RR(r1,r));
2240       addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2241       addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r));
2242       switch (e->Iex.Binop.op) {
2243          case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Acc_Z;
2244          case Iop_CmpNE16: case Iop_CasCmpNE16: return Acc_NZ;
2245          default: vpanic("iselCondCode(amd64): CmpXX16");
2246       }
2247    }
2248 
2249    /* CmpEQ32 / CmpNE32 */
2250    if (e->tag == Iex_Binop
2251        && (e->Iex.Binop.op == Iop_CmpEQ32
2252            || e->Iex.Binop.op == Iop_CmpNE32
2253            || e->Iex.Binop.op == Iop_CasCmpEQ32
2254            || e->Iex.Binop.op == Iop_CasCmpNE32)) {
2255       HReg      r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
2256       AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2257       HReg      r    = newVRegI(env);
2258       addInstr(env, mk_iMOVsd_RR(r1,r));
2259       addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2260       addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, r));
2261       switch (e->Iex.Binop.op) {
2262          case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z;
2263          case Iop_CmpNE32: case Iop_CasCmpNE32: return Acc_NZ;
2264          default: vpanic("iselCondCode(amd64): CmpXX32");
2265       }
2266    }
2267 
2268    /* Cmp*64*(x,y) */
2269    if (e->tag == Iex_Binop
2270        && (e->Iex.Binop.op == Iop_CmpEQ64
2271            || e->Iex.Binop.op == Iop_CmpNE64
2272            || e->Iex.Binop.op == Iop_CmpLT64S
2273            || e->Iex.Binop.op == Iop_CmpLT64U
2274            || e->Iex.Binop.op == Iop_CmpLE64S
2275            || e->Iex.Binop.op == Iop_CmpLE64U
2276            || e->Iex.Binop.op == Iop_CasCmpEQ64
2277            || e->Iex.Binop.op == Iop_CasCmpNE64)) {
2278       HReg      r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
2279       AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2280       addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2281       switch (e->Iex.Binop.op) {
2282          case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z;
2283          case Iop_CmpNE64: case Iop_CasCmpNE64: return Acc_NZ;
2284 	 case Iop_CmpLT64S: return Acc_L;
2285 	 case Iop_CmpLT64U: return Acc_B;
2286 	 case Iop_CmpLE64S: return Acc_LE;
2287          case Iop_CmpLE64U: return Acc_BE;
2288          default: vpanic("iselCondCode(amd64): CmpXX64");
2289       }
2290    }
2291 
2292    ppIRExpr(e);
2293    vpanic("iselCondCode(amd64)");
2294 }
2295 
2296 
2297 /*---------------------------------------------------------*/
2298 /*--- ISEL: Integer expressions (128 bit)               ---*/
2299 /*---------------------------------------------------------*/
2300 
2301 /* Compute a 128-bit value into a register pair, which is returned as
2302    the first two parameters.  As with iselIntExpr_R, these may be
2303    either real or virtual regs; in any case they must not be changed
2304    by subsequent code emitted by the caller.  */
2305 
iselInt128Expr(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)2306 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2307                              ISelEnv* env, IRExpr* e )
2308 {
2309    iselInt128Expr_wrk(rHi, rLo, env, e);
2310 #  if 0
2311    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2312 #  endif
2313    vassert(hregClass(*rHi) == HRcInt64);
2314    vassert(hregIsVirtual(*rHi));
2315    vassert(hregClass(*rLo) == HRcInt64);
2316    vassert(hregIsVirtual(*rLo));
2317 }
2318 
2319 /* DO NOT CALL THIS DIRECTLY ! */
iselInt128Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)2320 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2321                                  ISelEnv* env, IRExpr* e )
2322 {
2323 //..    HWord fn = 0; /* helper fn for most SIMD64 stuff */
2324    vassert(e);
2325    vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2326 
2327 //..    /* 64-bit literal */
2328 //..    if (e->tag == Iex_Const) {
2329 //..       ULong w64 = e->Iex.Const.con->Ico.U64;
2330 //..       UInt  wHi = ((UInt)(w64 >> 32)) & 0xFFFFFFFF;
2331 //..       UInt  wLo = ((UInt)w64) & 0xFFFFFFFF;
2332 //..       HReg  tLo = newVRegI(env);
2333 //..       HReg  tHi = newVRegI(env);
2334 //..       vassert(e->Iex.Const.con->tag == Ico_U64);
2335 //..       addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
2336 //..       addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2337 //..       *rHi = tHi;
2338 //..       *rLo = tLo;
2339 //..       return;
2340 //..    }
2341 
2342    /* read 128-bit IRTemp */
2343    if (e->tag == Iex_RdTmp) {
2344       lookupIRTemp128( rHi, rLo, env, e->Iex.RdTmp.tmp);
2345       return;
2346    }
2347 
2348 //..    /* 64-bit load */
2349 //..    if (e->tag == Iex_LDle) {
2350 //..       HReg     tLo, tHi;
2351 //..       X86AMode *am0, *am4;
2352 //..       vassert(e->Iex.LDle.ty == Ity_I64);
2353 //..       tLo = newVRegI(env);
2354 //..       tHi = newVRegI(env);
2355 //..       am0 = iselIntExpr_AMode(env, e->Iex.LDle.addr);
2356 //..       am4 = advance4(am0);
2357 //..       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
2358 //..       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2359 //..       *rHi = tHi;
2360 //..       *rLo = tLo;
2361 //..       return;
2362 //..    }
2363 //..
2364 //..    /* 64-bit GET */
2365 //..    if (e->tag == Iex_Get) {
2366 //..       X86AMode* am  = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
2367 //..       X86AMode* am4 = advance4(am);
2368 //..       HReg tLo = newVRegI(env);
2369 //..       HReg tHi = newVRegI(env);
2370 //..       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2371 //..       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2372 //..       *rHi = tHi;
2373 //..       *rLo = tLo;
2374 //..       return;
2375 //..    }
2376 //..
2377 //..    /* 64-bit GETI */
2378 //..    if (e->tag == Iex_GetI) {
2379 //..       X86AMode* am
2380 //..          = genGuestArrayOffset( env, e->Iex.GetI.descr,
2381 //..                                      e->Iex.GetI.ix, e->Iex.GetI.bias );
2382 //..       X86AMode* am4 = advance4(am);
2383 //..       HReg tLo = newVRegI(env);
2384 //..       HReg tHi = newVRegI(env);
2385 //..       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2386 //..       addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2387 //..       *rHi = tHi;
2388 //..       *rLo = tLo;
2389 //..       return;
2390 //..    }
2391 //..
2392 //..    /* 64-bit Mux0X */
2393 //..    if (e->tag == Iex_Mux0X) {
2394 //..       HReg e0Lo, e0Hi, eXLo, eXHi, r8;
2395 //..       HReg tLo = newVRegI(env);
2396 //..       HReg tHi = newVRegI(env);
2397 //..       iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
2398 //..       iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX);
2399 //..       addInstr(env, mk_iMOVsd_RR(eXHi, tHi));
2400 //..       addInstr(env, mk_iMOVsd_RR(eXLo, tLo));
2401 //..       r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2402 //..       addInstr(env, X86Instr_Test32(X86RI_Imm(0xFF), X86RM_Reg(r8)));
2403 //..       /* This assumes the first cmov32 doesn't trash the condition
2404 //..          codes, so they are still available for the second cmov32 */
2405 //..       addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi));
2406 //..       addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo));
2407 //..       *rHi = tHi;
2408 //..       *rLo = tLo;
2409 //..       return;
2410 //..    }
2411 
2412    /* --------- BINARY ops --------- */
2413    if (e->tag == Iex_Binop) {
2414       switch (e->Iex.Binop.op) {
2415          /* 64 x 64 -> 128 multiply */
2416          case Iop_MullU64:
2417          case Iop_MullS64: {
2418             /* get one operand into %rax, and the other into a R/M.
2419                Need to make an educated guess about which is better in
2420                which. */
2421             HReg     tLo    = newVRegI(env);
2422             HReg     tHi    = newVRegI(env);
2423             Bool     syned  = toBool(e->Iex.Binop.op == Iop_MullS64);
2424             AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2425             HReg     rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2426             addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX()));
2427             addInstr(env, AMD64Instr_MulL(syned, rmLeft));
2428             /* Result is now in RDX:RAX.  Tell the caller. */
2429             addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2430             addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2431             *rHi = tHi;
2432             *rLo = tLo;
2433             return;
2434          }
2435 
2436          /* 128 x 64 -> (64(rem),64(div)) division */
2437          case Iop_DivModU128to64:
2438          case Iop_DivModS128to64: {
2439             /* Get the 128-bit operand into rdx:rax, and the other into
2440                any old R/M. */
2441             HReg sHi, sLo;
2442             HReg     tLo     = newVRegI(env);
2443             HReg     tHi     = newVRegI(env);
2444             Bool     syned   = toBool(e->Iex.Binop.op == Iop_DivModS128to64);
2445             AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2446             iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2447             addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX()));
2448             addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX()));
2449             addInstr(env, AMD64Instr_Div(syned, 8, rmRight));
2450             addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2451             addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2452             *rHi = tHi;
2453             *rLo = tLo;
2454             return;
2455          }
2456 
2457          /* 64HLto128(e1,e2) */
2458          case Iop_64HLto128:
2459             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2460             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2461             return;
2462 
2463 //..          /* Or64/And64/Xor64 */
2464 //..          case Iop_Or64:
2465 //..          case Iop_And64:
2466 //..          case Iop_Xor64: {
2467 //..             HReg xLo, xHi, yLo, yHi;
2468 //..             HReg tLo = newVRegI(env);
2469 //..             HReg tHi = newVRegI(env);
2470 //..             X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2471 //..                           : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2472 //..                           : Xalu_XOR;
2473 //..             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2474 //..             addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2475 //..             addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2476 //..             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2477 //..             addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2478 //..             addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2479 //..             *rHi = tHi;
2480 //..             *rLo = tLo;
2481 //..             return;
2482 //..          }
2483 //..
2484 //..          /* Add64/Sub64 */
2485 //..          case Iop_Add64:
2486 //..          case Iop_Sub64: {
2487 //..             HReg xLo, xHi, yLo, yHi;
2488 //..             HReg tLo = newVRegI(env);
2489 //..             HReg tHi = newVRegI(env);
2490 //..             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2491 //..             addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2492 //..             addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2493 //..             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2494 //..             if (e->Iex.Binop.op==Iop_Add64) {
2495 //..                addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2496 //..                addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2497 //..             } else {
2498 //..                addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2499 //..                addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2500 //..             }
2501 //..             *rHi = tHi;
2502 //..             *rLo = tLo;
2503 //..             return;
2504 //..          }
2505 //..
2506 //..          /* 32HLto64(e1,e2) */
2507 //..          case Iop_32HLto64:
2508 //..             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2509 //..             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2510 //..             return;
2511 //..
2512 //..          /* 64-bit shifts */
2513 //..          case Iop_Shl64: {
2514 //..             /* We use the same ingenious scheme as gcc.  Put the value
2515 //..                to be shifted into %hi:%lo, and the shift amount into
2516 //..                %cl.  Then (dsts on right, a la ATT syntax):
2517 //..
2518 //..                shldl %cl, %lo, %hi   -- make %hi be right for the
2519 //..                                      -- shift amt %cl % 32
2520 //..                shll  %cl, %lo        -- make %lo be right for the
2521 //..                                      -- shift amt %cl % 32
2522 //..
2523 //..                Now, if (shift amount % 64) is in the range 32 .. 63,
2524 //..                we have to do a fixup, which puts the result low half
2525 //..                into the result high half, and zeroes the low half:
2526 //..
2527 //..                testl $32, %ecx
2528 //..
2529 //..                cmovnz %lo, %hi
2530 //..                movl $0, %tmp         -- sigh; need yet another reg
2531 //..                cmovnz %tmp, %lo
2532 //..             */
2533 //..             HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2534 //..             tLo = newVRegI(env);
2535 //..             tHi = newVRegI(env);
2536 //..             tTemp = newVRegI(env);
2537 //..             rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2538 //..             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2539 //..             addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2540 //..             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2541 //..             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2542 //..             /* Ok.  Now shift amt is in %ecx, and value is in tHi/tLo
2543 //..                and those regs are legitimately modifiable. */
2544 //..             addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2545 //..             addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, X86RM_Reg(tLo)));
2546 //..             addInstr(env, X86Instr_Test32(X86RI_Imm(32),
2547 //..                           X86RM_Reg(hregX86_ECX())));
2548 //..             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2549 //..             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2550 //..             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2551 //..             *rHi = tHi;
2552 //..             *rLo = tLo;
2553 //..             return;
2554 //..          }
2555 //..
2556 //..          case Iop_Shr64: {
2557 //..             /* We use the same ingenious scheme as gcc.  Put the value
2558 //..                to be shifted into %hi:%lo, and the shift amount into
2559 //..                %cl.  Then:
2560 //..
2561 //..                shrdl %cl, %hi, %lo   -- make %lo be right for the
2562 //..                                      -- shift amt %cl % 32
2563 //..                shrl  %cl, %hi        -- make %hi be right for the
2564 //..                                      -- shift amt %cl % 32
2565 //..
2566 //..                Now, if (shift amount % 64) is in the range 32 .. 63,
2567 //..                we have to do a fixup, which puts the result high half
2568 //..                into the result low half, and zeroes the high half:
2569 //..
2570 //..                testl $32, %ecx
2571 //..
2572 //..                cmovnz %hi, %lo
2573 //..                movl $0, %tmp         -- sigh; need yet another reg
2574 //..                cmovnz %tmp, %hi
2575 //..             */
2576 //..             HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2577 //..             tLo = newVRegI(env);
2578 //..             tHi = newVRegI(env);
2579 //..             tTemp = newVRegI(env);
2580 //..             rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2581 //..             iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2582 //..             addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2583 //..             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2584 //..             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2585 //..             /* Ok.  Now shift amt is in %ecx, and value is in tHi/tLo
2586 //..                and those regs are legitimately modifiable. */
2587 //..             addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2588 //..             addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, X86RM_Reg(tHi)));
2589 //..             addInstr(env, X86Instr_Test32(X86RI_Imm(32),
2590 //..                           X86RM_Reg(hregX86_ECX())));
2591 //..             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2592 //..             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2593 //..             addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2594 //..             *rHi = tHi;
2595 //..             *rLo = tLo;
2596 //..             return;
2597 //..          }
2598 //..
2599 //..          /* F64 -> I64 */
2600 //..          /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2601 //..             case.  Unfortunately I see no easy way to avoid the
2602 //..             duplication. */
2603 //..          case Iop_F64toI64: {
2604 //..             HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
2605 //..             HReg tLo = newVRegI(env);
2606 //..             HReg tHi = newVRegI(env);
2607 //..
2608 //..             /* Used several times ... */
2609 //..             /* Careful ... this sharing is only safe because
2610 //.. 	       zero_esp/four_esp do not hold any registers which the
2611 //.. 	       register allocator could attempt to swizzle later. */
2612 //..             X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2613 //..             X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2614 //..
2615 //..             /* rf now holds the value to be converted, and rrm holds
2616 //..                the rounding mode value, encoded as per the
2617 //..                IRRoundingMode enum.  The first thing to do is set the
2618 //..                FPU's rounding mode accordingly. */
2619 //..
2620 //..             /* Create a space for the format conversion. */
2621 //..             /* subl $8, %esp */
2622 //..             sub_from_esp(env, 8);
2623 //..
2624 //..             /* Set host rounding mode */
2625 //..             set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2626 //..
2627 //..             /* gistll %rf, 0(%esp) */
2628 //..             addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2629 //..
2630 //..             /* movl 0(%esp), %dstLo */
2631 //..             /* movl 4(%esp), %dstHi */
2632 //..             addInstr(env, X86Instr_Alu32R(
2633 //..                              Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2634 //..             addInstr(env, X86Instr_Alu32R(
2635 //..                              Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2636 //..
2637 //..             /* Restore default FPU rounding. */
2638 //..             set_FPU_rounding_default( env );
2639 //..
2640 //..             /* addl $8, %esp */
2641 //..             add_to_esp(env, 8);
2642 //..
2643 //..             *rHi = tHi;
2644 //..             *rLo = tLo;
2645 //..             return;
2646 //..          }
2647 //..
2648          default:
2649             break;
2650       }
2651    } /* if (e->tag == Iex_Binop) */
2652 
2653 
2654 //..    /* --------- UNARY ops --------- */
2655 //..    if (e->tag == Iex_Unop) {
2656 //..       switch (e->Iex.Unop.op) {
2657 //..
2658 //..          /* 32Sto64(e) */
2659 //..          case Iop_32Sto64: {
2660 //..             HReg tLo = newVRegI(env);
2661 //..             HReg tHi = newVRegI(env);
2662 //..             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2663 //..             addInstr(env, mk_iMOVsd_RR(src,tHi));
2664 //..             addInstr(env, mk_iMOVsd_RR(src,tLo));
2665 //..             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tHi)));
2666 //..             *rHi = tHi;
2667 //..             *rLo = tLo;
2668 //..             return;
2669 //..          }
2670 //..
2671 //..          /* 32Uto64(e) */
2672 //..          case Iop_32Uto64: {
2673 //..             HReg tLo = newVRegI(env);
2674 //..             HReg tHi = newVRegI(env);
2675 //..             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2676 //..             addInstr(env, mk_iMOVsd_RR(src,tLo));
2677 //..             addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2678 //..             *rHi = tHi;
2679 //..             *rLo = tLo;
2680 //..             return;
2681 //..          }
2682 
2683 //..          /* could do better than this, but for now ... */
2684 //..          case Iop_1Sto64: {
2685 //..             HReg tLo = newVRegI(env);
2686 //..             HReg tHi = newVRegI(env);
2687 //..             X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2688 //..             addInstr(env, X86Instr_Set32(cond,tLo));
2689 //..             addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, X86RM_Reg(tLo)));
2690 //..             addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tLo)));
2691 //..             addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2692 //..             *rHi = tHi;
2693 //..             *rLo = tLo;
2694 //..             return;
2695 //..          }
2696 //..
2697 //..          /* Not64(e) */
2698 //..          case Iop_Not64: {
2699 //..             HReg tLo = newVRegI(env);
2700 //..             HReg tHi = newVRegI(env);
2701 //..             HReg sHi, sLo;
2702 //..             iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2703 //..             addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2704 //..             addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2705 //..             addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tHi)));
2706 //..             addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tLo)));
2707 //..             *rHi = tHi;
2708 //..             *rLo = tLo;
2709 //..             return;
2710 //..          }
2711 //..
2712 //..          default:
2713 //..             break;
2714 //..       }
2715 //..    } /* if (e->tag == Iex_Unop) */
2716 //..
2717 //..
2718 //..    /* --------- CCALL --------- */
2719 //..    if (e->tag == Iex_CCall) {
2720 //..       HReg tLo = newVRegI(env);
2721 //..       HReg tHi = newVRegI(env);
2722 //..
2723 //..       /* Marshal args, do the call, clear stack. */
2724 //..       doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
2725 //..
2726 //..       addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2727 //..       addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2728 //..       *rHi = tHi;
2729 //..       *rLo = tLo;
2730 //..       return;
2731 //..    }
2732 
2733    ppIRExpr(e);
2734    vpanic("iselInt128Expr");
2735 }
2736 
2737 
2738 /*---------------------------------------------------------*/
2739 /*--- ISEL: Floating point expressions (32 bit)         ---*/
2740 /*---------------------------------------------------------*/
2741 
2742 /* Nothing interesting here; really just wrappers for
2743    64-bit stuff. */
2744 
iselFltExpr(ISelEnv * env,IRExpr * e)2745 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2746 {
2747    HReg r = iselFltExpr_wrk( env, e );
2748 #  if 0
2749    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2750 #  endif
2751    vassert(hregClass(r) == HRcVec128);
2752    vassert(hregIsVirtual(r));
2753    return r;
2754 }
2755 
2756 /* DO NOT CALL THIS DIRECTLY */
iselFltExpr_wrk(ISelEnv * env,IRExpr * e)2757 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2758 {
2759    IRType ty = typeOfIRExpr(env->type_env,e);
2760    vassert(ty == Ity_F32);
2761 
2762    if (e->tag == Iex_RdTmp) {
2763       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2764    }
2765 
2766    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2767       AMD64AMode* am;
2768       HReg res = newVRegV(env);
2769       vassert(e->Iex.Load.ty == Ity_F32);
2770       am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2771       addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, res, am));
2772       return res;
2773    }
2774 
2775    if (e->tag == Iex_Binop
2776        && e->Iex.Binop.op == Iop_F64toF32) {
2777       /* Although the result is still held in a standard SSE register,
2778          we need to round it to reflect the loss of accuracy/range
2779          entailed in casting it to a 32-bit float. */
2780       HReg dst = newVRegV(env);
2781       HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2782       set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2783       addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst));
2784       set_SSE_rounding_default( env );
2785       return dst;
2786    }
2787 
2788    if (e->tag == Iex_Get) {
2789       AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2790                                        hregAMD64_RBP() );
2791       HReg res = newVRegV(env);
2792       addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, res, am ));
2793       return res;
2794    }
2795 
2796    if (e->tag == Iex_Unop
2797        && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2798        /* Given an I32, produce an IEEE754 float with the same bit
2799           pattern. */
2800        HReg        dst    = newVRegV(env);
2801        HReg        src    = iselIntExpr_R(env, e->Iex.Unop.arg);
2802        AMD64AMode* m4_rsp = AMD64AMode_IR(-4, hregAMD64_RSP());
2803        addInstr(env, AMD64Instr_Store(4, src, m4_rsp));
2804        addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, dst, m4_rsp ));
2805        return dst;
2806    }
2807 
2808    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
2809       AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2810       HReg        arg    = iselFltExpr(env, e->Iex.Binop.arg2);
2811       HReg        dst    = newVRegV(env);
2812 
2813       /* rf now holds the value to be rounded.  The first thing to do
2814          is set the FPU's rounding mode accordingly. */
2815 
2816       /* Set host x87 rounding mode */
2817       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2818 
2819       addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, arg, m8_rsp));
2820       addInstr(env, AMD64Instr_A87Free(1));
2821       addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 4));
2822       addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
2823       addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 4));
2824       addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, dst, m8_rsp));
2825 
2826       /* Restore default x87 rounding. */
2827       set_FPU_rounding_default( env );
2828 
2829       return dst;
2830    }
2831 
2832    ppIRExpr(e);
2833    vpanic("iselFltExpr_wrk");
2834 }
2835 
2836 
2837 /*---------------------------------------------------------*/
2838 /*--- ISEL: Floating point expressions (64 bit)         ---*/
2839 /*---------------------------------------------------------*/
2840 
2841 /* Compute a 64-bit floating point value into the lower half of an xmm
2842    register, the identity of which is returned.  As with
2843    iselIntExpr_R, the returned reg will be virtual, and it must not be
2844    changed by subsequent code emitted by the caller.
2845 */
2846 
2847 /* IEEE 754 formats.  From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2848 
2849     Type                  S (1 bit)   E (11 bits)   F (52 bits)
2850     ----                  ---------   -----------   -----------
2851     signalling NaN        u           2047 (max)    .0uuuuu---u
2852                                                     (with at least
2853                                                      one 1 bit)
2854     quiet NaN             u           2047 (max)    .1uuuuu---u
2855 
2856     negative infinity     1           2047 (max)    .000000---0
2857 
2858     positive infinity     0           2047 (max)    .000000---0
2859 
2860     negative zero         1           0             .000000---0
2861 
2862     positive zero         0           0             .000000---0
2863 */
2864 
iselDblExpr(ISelEnv * env,IRExpr * e)2865 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2866 {
2867    HReg r = iselDblExpr_wrk( env, e );
2868 #  if 0
2869    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2870 #  endif
2871    vassert(hregClass(r) == HRcVec128);
2872    vassert(hregIsVirtual(r));
2873    return r;
2874 }
2875 
2876 /* DO NOT CALL THIS DIRECTLY */
iselDblExpr_wrk(ISelEnv * env,IRExpr * e)2877 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2878 {
2879    IRType ty = typeOfIRExpr(env->type_env,e);
2880    vassert(e);
2881    vassert(ty == Ity_F64);
2882 
2883    if (e->tag == Iex_RdTmp) {
2884       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2885    }
2886 
2887    if (e->tag == Iex_Const) {
2888       union { ULong u64; Double f64; } u;
2889       HReg res = newVRegV(env);
2890       HReg tmp = newVRegI(env);
2891       vassert(sizeof(u) == 8);
2892       vassert(sizeof(u.u64) == 8);
2893       vassert(sizeof(u.f64) == 8);
2894 
2895       if (e->Iex.Const.con->tag == Ico_F64) {
2896          u.f64 = e->Iex.Const.con->Ico.F64;
2897       }
2898       else if (e->Iex.Const.con->tag == Ico_F64i) {
2899          u.u64 = e->Iex.Const.con->Ico.F64i;
2900       }
2901       else
2902          vpanic("iselDblExpr(amd64): const");
2903 
2904       addInstr(env, AMD64Instr_Imm64(u.u64, tmp));
2905       addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp)));
2906       addInstr(env, AMD64Instr_SseLdSt(
2907                        True/*load*/, 8, res,
2908                        AMD64AMode_IR(0, hregAMD64_RSP())
2909               ));
2910       add_to_rsp(env, 8);
2911       return res;
2912    }
2913 
2914    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2915       AMD64AMode* am;
2916       HReg res = newVRegV(env);
2917       vassert(e->Iex.Load.ty == Ity_F64);
2918       am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2919       addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2920       return res;
2921    }
2922 
2923    if (e->tag == Iex_Get) {
2924       AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2925                                       hregAMD64_RBP() );
2926       HReg res = newVRegV(env);
2927       addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2928       return res;
2929    }
2930 
2931    if (e->tag == Iex_GetI) {
2932       AMD64AMode* am
2933          = genGuestArrayOffset(
2934               env, e->Iex.GetI.descr,
2935                    e->Iex.GetI.ix, e->Iex.GetI.bias );
2936       HReg res = newVRegV(env);
2937       addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2938       return res;
2939    }
2940 
2941    if (e->tag == Iex_Triop) {
2942       AMD64SseOp op = Asse_INVALID;
2943       switch (e->Iex.Triop.op) {
2944          case Iop_AddF64: op = Asse_ADDF; break;
2945          case Iop_SubF64: op = Asse_SUBF; break;
2946          case Iop_MulF64: op = Asse_MULF; break;
2947          case Iop_DivF64: op = Asse_DIVF; break;
2948          default: break;
2949       }
2950       if (op != Asse_INVALID) {
2951          HReg dst  = newVRegV(env);
2952          HReg argL = iselDblExpr(env, e->Iex.Triop.arg2);
2953          HReg argR = iselDblExpr(env, e->Iex.Triop.arg3);
2954          addInstr(env, mk_vMOVsd_RR(argL, dst));
2955          /* XXXROUNDINGFIXME */
2956          /* set roundingmode here */
2957          addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
2958          return dst;
2959       }
2960    }
2961 
2962    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
2963       AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2964       HReg        arg    = iselDblExpr(env, e->Iex.Binop.arg2);
2965       HReg        dst    = newVRegV(env);
2966 
2967       /* rf now holds the value to be rounded.  The first thing to do
2968          is set the FPU's rounding mode accordingly. */
2969 
2970       /* Set host x87 rounding mode */
2971       set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2972 
2973       addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
2974       addInstr(env, AMD64Instr_A87Free(1));
2975       addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
2976       addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
2977       addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
2978       addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
2979 
2980       /* Restore default x87 rounding. */
2981       set_FPU_rounding_default( env );
2982 
2983       return dst;
2984    }
2985 
2986    if (e->tag == Iex_Triop
2987        && (e->Iex.Triop.op == Iop_ScaleF64
2988            || e->Iex.Triop.op == Iop_AtanF64
2989            || e->Iex.Triop.op == Iop_Yl2xF64
2990            || e->Iex.Triop.op == Iop_Yl2xp1F64
2991            || e->Iex.Triop.op == Iop_PRemF64
2992            || e->Iex.Triop.op == Iop_PRem1F64)
2993       ) {
2994       AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2995       HReg        arg1   = iselDblExpr(env, e->Iex.Triop.arg2);
2996       HReg        arg2   = iselDblExpr(env, e->Iex.Triop.arg3);
2997       HReg        dst    = newVRegV(env);
2998       Bool     arg2first = toBool(e->Iex.Triop.op == Iop_ScaleF64
2999                                   || e->Iex.Triop.op == Iop_PRemF64
3000                                   || e->Iex.Triop.op == Iop_PRem1F64);
3001       addInstr(env, AMD64Instr_A87Free(2));
3002 
3003       /* one arg -> top of x87 stack */
3004       addInstr(env, AMD64Instr_SseLdSt(
3005                        False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp));
3006       addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
3007 
3008       /* other arg -> top of x87 stack */
3009       addInstr(env, AMD64Instr_SseLdSt(
3010                        False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp));
3011       addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
3012 
3013       /* do it */
3014       /* XXXROUNDINGFIXME */
3015       /* set roundingmode here */
3016       switch (e->Iex.Triop.op) {
3017          case Iop_ScaleF64:
3018             addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE));
3019             break;
3020          case Iop_AtanF64:
3021             addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN));
3022             break;
3023          case Iop_Yl2xF64:
3024             addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X));
3025             break;
3026          case Iop_Yl2xp1F64:
3027             addInstr(env, AMD64Instr_A87FpOp(Afp_YL2XP1));
3028             break;
3029          case Iop_PRemF64:
3030             addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
3031             break;
3032          case Iop_PRem1F64:
3033             addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
3034             break;
3035          default:
3036             vassert(0);
3037       }
3038 
3039       /* save result */
3040       addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
3041       addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3042       return dst;
3043    }
3044 
3045    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
3046       HReg dst = newVRegV(env);
3047       HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
3048       set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
3049       addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst ));
3050       set_SSE_rounding_default( env );
3051       return dst;
3052    }
3053 
3054    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32StoF64) {
3055       HReg dst = newVRegV(env);
3056       HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
3057       set_SSE_rounding_default( env );
3058       addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst ));
3059       return dst;
3060    }
3061 
3062    if (e->tag == Iex_Unop
3063        && (e->Iex.Unop.op == Iop_NegF64
3064            || e->Iex.Unop.op == Iop_AbsF64)) {
3065       /* Sigh ... very rough code.  Could do much better. */
3066       /* Get the 128-bit literal 00---0 10---0 into a register
3067          and xor/nand it with the value to be negated. */
3068       HReg r1  = newVRegI(env);
3069       HReg dst = newVRegV(env);
3070       HReg tmp = newVRegV(env);
3071       HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3072       AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3073       addInstr(env, mk_vMOVsd_RR(src,tmp));
3074       addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3075       addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 ));
3076       addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
3077       addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
3078 
3079       if (e->Iex.Unop.op == Iop_NegF64)
3080          addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
3081       else
3082          addInstr(env, AMD64Instr_SseReRg(Asse_ANDN, tmp, dst));
3083 
3084       add_to_rsp(env, 16);
3085       return dst;
3086    }
3087 
3088    if (e->tag == Iex_Binop) {
3089       A87FpOp fpop = Afp_INVALID;
3090       switch (e->Iex.Binop.op) {
3091          case Iop_SqrtF64: fpop = Afp_SQRT; break;
3092          case Iop_SinF64:  fpop = Afp_SIN;  break;
3093          case Iop_CosF64:  fpop = Afp_COS;  break;
3094          case Iop_TanF64:  fpop = Afp_TAN;  break;
3095          case Iop_2xm1F64: fpop = Afp_2XM1; break;
3096          default: break;
3097       }
3098       if (fpop != Afp_INVALID) {
3099          AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
3100          HReg        arg    = iselDblExpr(env, e->Iex.Binop.arg2);
3101          HReg        dst    = newVRegV(env);
3102          Int     nNeeded    = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1;
3103          addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
3104          addInstr(env, AMD64Instr_A87Free(nNeeded));
3105          addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
3106          /* XXXROUNDINGFIXME */
3107          /* set roundingmode here */
3108          addInstr(env, AMD64Instr_A87FpOp(fpop));
3109          if (e->Iex.Binop.op==Iop_TanF64) {
3110             /* get rid of the extra 1.0 that fptan pushes */
3111             addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
3112          }
3113          addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
3114          addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3115          return dst;
3116       }
3117    }
3118 
3119    if (e->tag == Iex_Unop) {
3120       switch (e->Iex.Unop.op) {
3121 //..          case Iop_I32toF64: {
3122 //..             HReg dst = newVRegF(env);
3123 //..             HReg ri  = iselIntExpr_R(env, e->Iex.Unop.arg);
3124 //..             addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3125 //..             set_FPU_rounding_default(env);
3126 //..             addInstr(env, X86Instr_FpLdStI(
3127 //..                              True/*load*/, 4, dst,
3128 //..                              X86AMode_IR(0, hregX86_ESP())));
3129 //..             add_to_esp(env, 4);
3130 //..             return dst;
3131 //..          }
3132          case Iop_ReinterpI64asF64: {
3133             /* Given an I64, produce an IEEE754 double with the same
3134                bit pattern. */
3135             AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
3136             HReg        dst    = newVRegV(env);
3137             AMD64RI*    src    = iselIntExpr_RI(env, e->Iex.Unop.arg);
3138             /* paranoia */
3139             set_SSE_rounding_default(env);
3140             addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, src, m8_rsp));
3141             addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3142             return dst;
3143          }
3144          case Iop_F32toF64: {
3145             HReg f32;
3146             HReg f64 = newVRegV(env);
3147             /* this shouldn't be necessary, but be paranoid ... */
3148             set_SSE_rounding_default(env);
3149             f32 = iselFltExpr(env, e->Iex.Unop.arg);
3150             addInstr(env, AMD64Instr_SseSDSS(False/*S->D*/, f32, f64));
3151             return f64;
3152          }
3153          default:
3154             break;
3155       }
3156    }
3157 
3158    /* --------- MULTIPLEX --------- */
3159    if (e->tag == Iex_Mux0X) {
3160       HReg r8, rX, r0, dst;
3161       vassert(ty == Ity_F64);
3162       vassert(typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8);
3163       r8  = iselIntExpr_R(env, e->Iex.Mux0X.cond);
3164       rX  = iselDblExpr(env, e->Iex.Mux0X.exprX);
3165       r0  = iselDblExpr(env, e->Iex.Mux0X.expr0);
3166       dst = newVRegV(env);
3167       addInstr(env, mk_vMOVsd_RR(rX,dst));
3168       addInstr(env, AMD64Instr_Test64(0xFF, r8));
3169       addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst));
3170       return dst;
3171    }
3172 
3173    ppIRExpr(e);
3174    vpanic("iselDblExpr_wrk");
3175 }
3176 
3177 
3178 /*---------------------------------------------------------*/
3179 /*--- ISEL: SIMD (Vector) expressions, 128 bit.         ---*/
3180 /*---------------------------------------------------------*/
3181 
iselVecExpr(ISelEnv * env,IRExpr * e)3182 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
3183 {
3184    HReg r = iselVecExpr_wrk( env, e );
3185 #  if 0
3186    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3187 #  endif
3188    vassert(hregClass(r) == HRcVec128);
3189    vassert(hregIsVirtual(r));
3190    return r;
3191 }
3192 
3193 
3194 /* DO NOT CALL THIS DIRECTLY */
iselVecExpr_wrk(ISelEnv * env,IRExpr * e)3195 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
3196 {
3197    HWord      fn = 0; /* address of helper fn, if required */
3198    Bool       arg1isEReg = False;
3199    AMD64SseOp op = Asse_INVALID;
3200    IRType     ty = typeOfIRExpr(env->type_env,e);
3201    vassert(e);
3202    vassert(ty == Ity_V128);
3203 
3204    if (e->tag == Iex_RdTmp) {
3205       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3206    }
3207 
3208    if (e->tag == Iex_Get) {
3209       HReg dst = newVRegV(env);
3210       addInstr(env, AMD64Instr_SseLdSt(
3211                        True/*load*/,
3212                        16,
3213                        dst,
3214                        AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP())
3215                     )
3216               );
3217       return dst;
3218    }
3219 
3220    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3221       HReg        dst = newVRegV(env);
3222       AMD64AMode* am  = iselIntExpr_AMode(env, e->Iex.Load.addr);
3223       addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
3224       return dst;
3225    }
3226 
3227    if (e->tag == Iex_Const) {
3228       HReg dst = newVRegV(env);
3229       vassert(e->Iex.Const.con->tag == Ico_V128);
3230       switch (e->Iex.Const.con->Ico.V128) {
3231          case 0x0000:
3232             dst = generate_zeroes_V128(env);
3233             break;
3234          case 0xFFFF:
3235             dst = generate_ones_V128(env);
3236             break;
3237          default: {
3238             AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3239             /* do push_uimm64 twice, first time for the high-order half. */
3240             push_uimm64(env, bitmask8_to_bytemask64(
3241                                 (e->Iex.Const.con->Ico.V128 >> 8) & 0xFF
3242                        ));
3243             push_uimm64(env, bitmask8_to_bytemask64(
3244                                 (e->Iex.Const.con->Ico.V128 >> 0) & 0xFF
3245                        ));
3246             addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
3247             add_to_rsp(env, 16);
3248             break;
3249          }
3250       }
3251       return dst;
3252    }
3253 
3254    if (e->tag == Iex_Unop) {
3255    switch (e->Iex.Unop.op) {
3256 
3257       case Iop_NotV128: {
3258          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3259          return do_sse_NotV128(env, arg);
3260       }
3261 
3262       case Iop_CmpNEZ64x2: {
3263          /* We can use SSE2 instructions for this. */
3264          /* Ideally, we want to do a 64Ix2 comparison against zero of
3265             the operand.  Problem is no such insn exists.  Solution
3266             therefore is to do a 32Ix4 comparison instead, and bitwise-
3267             negate (NOT) the result.  Let a,b,c,d be 32-bit lanes, and
3268             let the not'd result of this initial comparison be a:b:c:d.
3269             What we need to compute is (a|b):(a|b):(c|d):(c|d).  So, use
3270             pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3271             giving the required result.
3272 
3273             The required selection sequence is 2,3,0,1, which
3274             according to Intel's documentation means the pshufd
3275             literal value is 0xB1, that is,
3276             (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3277          */
3278          HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
3279          HReg tmp  = generate_zeroes_V128(env);
3280          HReg dst  = newVRegV(env);
3281          addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, arg, tmp));
3282          tmp = do_sse_NotV128(env, tmp);
3283          addInstr(env, AMD64Instr_SseShuf(0xB1, tmp, dst));
3284          addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmp, dst));
3285          return dst;
3286       }
3287 
3288       case Iop_CmpNEZ32x4: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
3289       case Iop_CmpNEZ16x8: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
3290       case Iop_CmpNEZ8x16: op = Asse_CMPEQ8;  goto do_CmpNEZ_vector;
3291       do_CmpNEZ_vector:
3292       {
3293          HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
3294          HReg tmp  = newVRegV(env);
3295          HReg zero = generate_zeroes_V128(env);
3296          HReg dst;
3297          addInstr(env, mk_vMOVsd_RR(arg, tmp));
3298          addInstr(env, AMD64Instr_SseReRg(op, zero, tmp));
3299          dst = do_sse_NotV128(env, tmp);
3300          return dst;
3301       }
3302 
3303       case Iop_Recip32Fx4: op = Asse_RCPF;   goto do_32Fx4_unary;
3304       case Iop_RSqrt32Fx4: op = Asse_RSQRTF; goto do_32Fx4_unary;
3305       case Iop_Sqrt32Fx4:  op = Asse_SQRTF;  goto do_32Fx4_unary;
3306       do_32Fx4_unary:
3307       {
3308          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3309          HReg dst = newVRegV(env);
3310          addInstr(env, AMD64Instr_Sse32Fx4(op, arg, dst));
3311          return dst;
3312       }
3313 
3314 //..       case Iop_Recip64Fx2: op = Xsse_RCPF;   goto do_64Fx2_unary;
3315 //..       case Iop_RSqrt64Fx2: op = Asse_RSQRTF; goto do_64Fx2_unary;
3316       case Iop_Sqrt64Fx2:  op = Asse_SQRTF;  goto do_64Fx2_unary;
3317       do_64Fx2_unary:
3318       {
3319          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3320          HReg dst = newVRegV(env);
3321          addInstr(env, AMD64Instr_Sse64Fx2(op, arg, dst));
3322          return dst;
3323       }
3324 
3325       case Iop_Recip32F0x4: op = Asse_RCPF;   goto do_32F0x4_unary;
3326       case Iop_RSqrt32F0x4: op = Asse_RSQRTF; goto do_32F0x4_unary;
3327       case Iop_Sqrt32F0x4:  op = Asse_SQRTF;  goto do_32F0x4_unary;
3328       do_32F0x4_unary:
3329       {
3330          /* A bit subtle.  We have to copy the arg to the result
3331             register first, because actually doing the SSE scalar insn
3332             leaves the upper 3/4 of the destination register
3333             unchanged.  Whereas the required semantics of these
3334             primops is that the upper 3/4 is simply copied in from the
3335             argument. */
3336          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3337          HReg dst = newVRegV(env);
3338          addInstr(env, mk_vMOVsd_RR(arg, dst));
3339          addInstr(env, AMD64Instr_Sse32FLo(op, arg, dst));
3340          return dst;
3341       }
3342 
3343 //..       case Iop_Recip64F0x2: op = Xsse_RCPF;   goto do_64F0x2_unary;
3344 //..       case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary;
3345       case Iop_Sqrt64F0x2:  op = Asse_SQRTF;  goto do_64F0x2_unary;
3346       do_64F0x2_unary:
3347       {
3348          /* A bit subtle.  We have to copy the arg to the result
3349             register first, because actually doing the SSE scalar insn
3350             leaves the upper half of the destination register
3351             unchanged.  Whereas the required semantics of these
3352             primops is that the upper half is simply copied in from the
3353             argument. */
3354          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3355          HReg dst = newVRegV(env);
3356          addInstr(env, mk_vMOVsd_RR(arg, dst));
3357          addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst));
3358          return dst;
3359       }
3360 
3361       case Iop_32UtoV128: {
3362          HReg        dst     = newVRegV(env);
3363          AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP());
3364          AMD64RI*    ri      = iselIntExpr_RI(env, e->Iex.Unop.arg);
3365          addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32));
3366          addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32));
3367          return dst;
3368       }
3369 
3370       case Iop_64UtoV128: {
3371          HReg        dst  = newVRegV(env);
3372          AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3373          AMD64RMI*   rmi  = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3374          addInstr(env, AMD64Instr_Push(rmi));
3375          addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0));
3376          add_to_rsp(env, 8);
3377          return dst;
3378       }
3379 
3380       default:
3381          break;
3382    } /* switch (e->Iex.Unop.op) */
3383    } /* if (e->tag == Iex_Unop) */
3384 
3385    if (e->tag == Iex_Binop) {
3386    switch (e->Iex.Binop.op) {
3387 
3388       case Iop_SetV128lo64: {
3389          HReg dst  = newVRegV(env);
3390          HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3391          HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3392          AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3393          addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3394          addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp_m16));
3395          addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
3396          return dst;
3397       }
3398 
3399       case Iop_SetV128lo32: {
3400          HReg dst  = newVRegV(env);
3401          HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3402          HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3403          AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3404          addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3405          addInstr(env, AMD64Instr_Store(4, srcI, rsp_m16));
3406          addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
3407          return dst;
3408       }
3409 
3410       case Iop_64HLtoV128: {
3411          AMD64AMode* rsp = AMD64AMode_IR(0, hregAMD64_RSP());
3412          HReg        dst = newVRegV(env);
3413          /* do this via the stack (easy, convenient, etc) */
3414          addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg1)));
3415          addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg2)));
3416          addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp));
3417          add_to_rsp(env, 16);
3418          return dst;
3419       }
3420 
3421       case Iop_CmpEQ32Fx4: op = Asse_CMPEQF; goto do_32Fx4;
3422       case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4;
3423       case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4;
3424       case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4;
3425       case Iop_Add32Fx4:   op = Asse_ADDF;   goto do_32Fx4;
3426       case Iop_Div32Fx4:   op = Asse_DIVF;   goto do_32Fx4;
3427       case Iop_Max32Fx4:   op = Asse_MAXF;   goto do_32Fx4;
3428       case Iop_Min32Fx4:   op = Asse_MINF;   goto do_32Fx4;
3429       case Iop_Mul32Fx4:   op = Asse_MULF;   goto do_32Fx4;
3430       case Iop_Sub32Fx4:   op = Asse_SUBF;   goto do_32Fx4;
3431       do_32Fx4:
3432       {
3433          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3434          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3435          HReg dst = newVRegV(env);
3436          addInstr(env, mk_vMOVsd_RR(argL, dst));
3437          addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
3438          return dst;
3439       }
3440 
3441       case Iop_CmpEQ64Fx2: op = Asse_CMPEQF; goto do_64Fx2;
3442       case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2;
3443       case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2;
3444       case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2;
3445       case Iop_Add64Fx2:   op = Asse_ADDF;   goto do_64Fx2;
3446       case Iop_Div64Fx2:   op = Asse_DIVF;   goto do_64Fx2;
3447       case Iop_Max64Fx2:   op = Asse_MAXF;   goto do_64Fx2;
3448       case Iop_Min64Fx2:   op = Asse_MINF;   goto do_64Fx2;
3449       case Iop_Mul64Fx2:   op = Asse_MULF;   goto do_64Fx2;
3450       case Iop_Sub64Fx2:   op = Asse_SUBF;   goto do_64Fx2;
3451       do_64Fx2:
3452       {
3453          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3454          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3455          HReg dst = newVRegV(env);
3456          addInstr(env, mk_vMOVsd_RR(argL, dst));
3457          addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
3458          return dst;
3459       }
3460 
3461       case Iop_CmpEQ32F0x4: op = Asse_CMPEQF; goto do_32F0x4;
3462       case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4;
3463       case Iop_CmpLE32F0x4: op = Asse_CMPLEF; goto do_32F0x4;
3464       case Iop_CmpUN32F0x4: op = Asse_CMPUNF; goto do_32F0x4;
3465       case Iop_Add32F0x4:   op = Asse_ADDF;   goto do_32F0x4;
3466       case Iop_Div32F0x4:   op = Asse_DIVF;   goto do_32F0x4;
3467       case Iop_Max32F0x4:   op = Asse_MAXF;   goto do_32F0x4;
3468       case Iop_Min32F0x4:   op = Asse_MINF;   goto do_32F0x4;
3469       case Iop_Mul32F0x4:   op = Asse_MULF;   goto do_32F0x4;
3470       case Iop_Sub32F0x4:   op = Asse_SUBF;   goto do_32F0x4;
3471       do_32F0x4: {
3472          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3473          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3474          HReg dst = newVRegV(env);
3475          addInstr(env, mk_vMOVsd_RR(argL, dst));
3476          addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst));
3477          return dst;
3478       }
3479 
3480       case Iop_CmpEQ64F0x2: op = Asse_CMPEQF; goto do_64F0x2;
3481       case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2;
3482       case Iop_CmpLE64F0x2: op = Asse_CMPLEF; goto do_64F0x2;
3483       case Iop_CmpUN64F0x2: op = Asse_CMPUNF; goto do_64F0x2;
3484       case Iop_Add64F0x2:   op = Asse_ADDF;   goto do_64F0x2;
3485       case Iop_Div64F0x2:   op = Asse_DIVF;   goto do_64F0x2;
3486       case Iop_Max64F0x2:   op = Asse_MAXF;   goto do_64F0x2;
3487       case Iop_Min64F0x2:   op = Asse_MINF;   goto do_64F0x2;
3488       case Iop_Mul64F0x2:   op = Asse_MULF;   goto do_64F0x2;
3489       case Iop_Sub64F0x2:   op = Asse_SUBF;   goto do_64F0x2;
3490       do_64F0x2: {
3491          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3492          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3493          HReg dst = newVRegV(env);
3494          addInstr(env, mk_vMOVsd_RR(argL, dst));
3495          addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
3496          return dst;
3497       }
3498 
3499       case Iop_QNarrow32Sx4:
3500          op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3501       case Iop_QNarrow16Sx8:
3502          op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3503       case Iop_QNarrow16Ux8:
3504          op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3505 
3506       case Iop_InterleaveHI8x16:
3507          op = Asse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3508       case Iop_InterleaveHI16x8:
3509          op = Asse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3510       case Iop_InterleaveHI32x4:
3511          op = Asse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3512       case Iop_InterleaveHI64x2:
3513          op = Asse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3514 
3515       case Iop_InterleaveLO8x16:
3516          op = Asse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3517       case Iop_InterleaveLO16x8:
3518          op = Asse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3519       case Iop_InterleaveLO32x4:
3520          op = Asse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3521       case Iop_InterleaveLO64x2:
3522          op = Asse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3523 
3524       case Iop_AndV128:    op = Asse_AND;      goto do_SseReRg;
3525       case Iop_OrV128:     op = Asse_OR;       goto do_SseReRg;
3526       case Iop_XorV128:    op = Asse_XOR;      goto do_SseReRg;
3527       case Iop_Add8x16:    op = Asse_ADD8;     goto do_SseReRg;
3528       case Iop_Add16x8:    op = Asse_ADD16;    goto do_SseReRg;
3529       case Iop_Add32x4:    op = Asse_ADD32;    goto do_SseReRg;
3530       case Iop_Add64x2:    op = Asse_ADD64;    goto do_SseReRg;
3531       case Iop_QAdd8Sx16:  op = Asse_QADD8S;   goto do_SseReRg;
3532       case Iop_QAdd16Sx8:  op = Asse_QADD16S;  goto do_SseReRg;
3533       case Iop_QAdd8Ux16:  op = Asse_QADD8U;   goto do_SseReRg;
3534       case Iop_QAdd16Ux8:  op = Asse_QADD16U;  goto do_SseReRg;
3535       case Iop_Avg8Ux16:   op = Asse_AVG8U;    goto do_SseReRg;
3536       case Iop_Avg16Ux8:   op = Asse_AVG16U;   goto do_SseReRg;
3537       case Iop_CmpEQ8x16:  op = Asse_CMPEQ8;   goto do_SseReRg;
3538       case Iop_CmpEQ16x8:  op = Asse_CMPEQ16;  goto do_SseReRg;
3539       case Iop_CmpEQ32x4:  op = Asse_CMPEQ32;  goto do_SseReRg;
3540       case Iop_CmpGT8Sx16: op = Asse_CMPGT8S;  goto do_SseReRg;
3541       case Iop_CmpGT16Sx8: op = Asse_CMPGT16S; goto do_SseReRg;
3542       case Iop_CmpGT32Sx4: op = Asse_CMPGT32S; goto do_SseReRg;
3543       case Iop_Max16Sx8:   op = Asse_MAX16S;   goto do_SseReRg;
3544       case Iop_Max8Ux16:   op = Asse_MAX8U;    goto do_SseReRg;
3545       case Iop_Min16Sx8:   op = Asse_MIN16S;   goto do_SseReRg;
3546       case Iop_Min8Ux16:   op = Asse_MIN8U;    goto do_SseReRg;
3547       case Iop_MulHi16Ux8: op = Asse_MULHI16U; goto do_SseReRg;
3548       case Iop_MulHi16Sx8: op = Asse_MULHI16S; goto do_SseReRg;
3549       case Iop_Mul16x8:    op = Asse_MUL16;    goto do_SseReRg;
3550       case Iop_Sub8x16:    op = Asse_SUB8;     goto do_SseReRg;
3551       case Iop_Sub16x8:    op = Asse_SUB16;    goto do_SseReRg;
3552       case Iop_Sub32x4:    op = Asse_SUB32;    goto do_SseReRg;
3553       case Iop_Sub64x2:    op = Asse_SUB64;    goto do_SseReRg;
3554       case Iop_QSub8Sx16:  op = Asse_QSUB8S;   goto do_SseReRg;
3555       case Iop_QSub16Sx8:  op = Asse_QSUB16S;  goto do_SseReRg;
3556       case Iop_QSub8Ux16:  op = Asse_QSUB8U;   goto do_SseReRg;
3557       case Iop_QSub16Ux8:  op = Asse_QSUB16U;  goto do_SseReRg;
3558       do_SseReRg: {
3559          HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3560          HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3561          HReg dst = newVRegV(env);
3562          if (arg1isEReg) {
3563             addInstr(env, mk_vMOVsd_RR(arg2, dst));
3564             addInstr(env, AMD64Instr_SseReRg(op, arg1, dst));
3565          } else {
3566             addInstr(env, mk_vMOVsd_RR(arg1, dst));
3567             addInstr(env, AMD64Instr_SseReRg(op, arg2, dst));
3568          }
3569          return dst;
3570       }
3571 
3572       case Iop_ShlN16x8: op = Asse_SHL16; goto do_SseShift;
3573       case Iop_ShlN32x4: op = Asse_SHL32; goto do_SseShift;
3574       case Iop_ShlN64x2: op = Asse_SHL64; goto do_SseShift;
3575       case Iop_SarN16x8: op = Asse_SAR16; goto do_SseShift;
3576       case Iop_SarN32x4: op = Asse_SAR32; goto do_SseShift;
3577       case Iop_ShrN16x8: op = Asse_SHR16; goto do_SseShift;
3578       case Iop_ShrN32x4: op = Asse_SHR32; goto do_SseShift;
3579       case Iop_ShrN64x2: op = Asse_SHR64; goto do_SseShift;
3580       do_SseShift: {
3581          HReg        greg = iselVecExpr(env, e->Iex.Binop.arg1);
3582          AMD64RMI*   rmi  = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3583          AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3584          HReg        ereg = newVRegV(env);
3585          HReg        dst  = newVRegV(env);
3586          addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3587          addInstr(env, AMD64Instr_Push(rmi));
3588          addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
3589          addInstr(env, mk_vMOVsd_RR(greg, dst));
3590          addInstr(env, AMD64Instr_SseReRg(op, ereg, dst));
3591          add_to_rsp(env, 16);
3592          return dst;
3593       }
3594 
3595       case Iop_Mul32x4:    fn = (HWord)h_generic_calc_Mul32x4;
3596                            goto do_SseAssistedBinary;
3597       case Iop_Max32Sx4:   fn = (HWord)h_generic_calc_Max32Sx4;
3598                            goto do_SseAssistedBinary;
3599       case Iop_Min32Sx4:   fn = (HWord)h_generic_calc_Min32Sx4;
3600                            goto do_SseAssistedBinary;
3601       case Iop_Max32Ux4:   fn = (HWord)h_generic_calc_Max32Ux4;
3602                            goto do_SseAssistedBinary;
3603       case Iop_Min32Ux4:   fn = (HWord)h_generic_calc_Min32Ux4;
3604                            goto do_SseAssistedBinary;
3605       case Iop_Max16Ux8:   fn = (HWord)h_generic_calc_Max16Ux8;
3606                            goto do_SseAssistedBinary;
3607       case Iop_Min16Ux8:   fn = (HWord)h_generic_calc_Min16Ux8;
3608                            goto do_SseAssistedBinary;
3609       case Iop_Max8Sx16:   fn = (HWord)h_generic_calc_Max8Sx16;
3610                            goto do_SseAssistedBinary;
3611       case Iop_Min8Sx16:   fn = (HWord)h_generic_calc_Min8Sx16;
3612                            goto do_SseAssistedBinary;
3613       case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2;
3614                            goto do_SseAssistedBinary;
3615       do_SseAssistedBinary: {
3616          /* RRRufff!  RRRufff code is what we're generating here.  Oh
3617             well. */
3618          vassert(fn != 0);
3619          HReg dst = newVRegV(env);
3620          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3621          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3622          HReg argp = newVRegI(env);
3623          /* subq $112, %rsp         -- make a space*/
3624          sub_from_rsp(env, 112);
3625          /* leaq 48(%rsp), %r_argp  -- point into it */
3626          addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
3627                                         argp));
3628          /* andq $-16, %r_argp      -- 16-align the pointer */
3629          addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
3630                                          AMD64RMI_Imm( ~(UInt)15 ),
3631                                          argp));
3632          /* Prepare 3 arg regs:
3633             leaq 0(%r_argp), %rdi
3634             leaq 16(%r_argp), %rsi
3635             leaq 32(%r_argp), %rdx
3636          */
3637          addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
3638                                         hregAMD64_RDI()));
3639          addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
3640                                         hregAMD64_RSI()));
3641          addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
3642                                         hregAMD64_RDX()));
3643          /* Store the two args, at (%rsi) and (%rdx):
3644             movupd  %argL, 0(%rsi)
3645             movupd  %argR, 0(%rdx)
3646          */
3647          addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
3648                                           AMD64AMode_IR(0, hregAMD64_RSI())));
3649          addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argR,
3650                                           AMD64AMode_IR(0, hregAMD64_RDX())));
3651          /* call the helper */
3652          addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3 ));
3653          /* fetch the result from memory, using %r_argp, which the
3654             register allocator will keep alive across the call. */
3655          addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
3656                                           AMD64AMode_IR(0, argp)));
3657          /* and finally, clear the space */
3658          add_to_rsp(env, 112);
3659          return dst;
3660       }
3661 
3662       case Iop_SarN64x2: fn = (HWord)h_generic_calc_SarN64x2;
3663                          goto do_SseAssistedVectorAndScalar;
3664       case Iop_SarN8x16: fn = (HWord)h_generic_calc_SarN8x16;
3665                          goto do_SseAssistedVectorAndScalar;
3666       do_SseAssistedVectorAndScalar: {
3667          /* RRRufff!  RRRufff code is what we're generating here.  Oh
3668             well. */
3669          vassert(fn != 0);
3670          HReg dst = newVRegV(env);
3671          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3672          HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3673          HReg argp = newVRegI(env);
3674          /* subq $112, %rsp         -- make a space*/
3675          sub_from_rsp(env, 112);
3676          /* leaq 48(%rsp), %r_argp  -- point into it */
3677          addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
3678                                         argp));
3679          /* andq $-16, %r_argp      -- 16-align the pointer */
3680          addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
3681                                          AMD64RMI_Imm( ~(UInt)15 ),
3682                                          argp));
3683          /* Prepare 2 vector arg regs:
3684             leaq 0(%r_argp), %rdi
3685             leaq 16(%r_argp), %rsi
3686          */
3687          addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
3688                                         hregAMD64_RDI()));
3689          addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
3690                                         hregAMD64_RSI()));
3691          /* Store the vector arg, at (%rsi):
3692             movupd  %argL, 0(%rsi)
3693          */
3694          addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
3695                                           AMD64AMode_IR(0, hregAMD64_RSI())));
3696          /* And get the scalar value into rdx */
3697          addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RDX()));
3698 
3699          /* call the helper */
3700          addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3 ));
3701          /* fetch the result from memory, using %r_argp, which the
3702             register allocator will keep alive across the call. */
3703          addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
3704                                           AMD64AMode_IR(0, argp)));
3705          /* and finally, clear the space */
3706          add_to_rsp(env, 112);
3707          return dst;
3708       }
3709 
3710       default:
3711          break;
3712    } /* switch (e->Iex.Binop.op) */
3713    } /* if (e->tag == Iex_Binop) */
3714 
3715    if (e->tag == Iex_Mux0X) {
3716       HReg r8  = iselIntExpr_R(env, e->Iex.Mux0X.cond);
3717       HReg rX  = iselVecExpr(env, e->Iex.Mux0X.exprX);
3718       HReg r0  = iselVecExpr(env, e->Iex.Mux0X.expr0);
3719       HReg dst = newVRegV(env);
3720       addInstr(env, mk_vMOVsd_RR(rX,dst));
3721       addInstr(env, AMD64Instr_Test64(0xFF, r8));
3722       addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst));
3723       return dst;
3724    }
3725 
3726    //vec_fail:
3727    vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n",
3728               LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
3729    ppIRExpr(e);
3730    vpanic("iselVecExpr_wrk");
3731 }
3732 
3733 
3734 /*---------------------------------------------------------*/
3735 /*--- ISEL: Statements                                  ---*/
3736 /*---------------------------------------------------------*/
3737 
iselStmt(ISelEnv * env,IRStmt * stmt)3738 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3739 {
3740    if (vex_traceflags & VEX_TRACE_VCODE) {
3741       vex_printf("\n-- ");
3742       ppIRStmt(stmt);
3743       vex_printf("\n");
3744    }
3745 
3746    switch (stmt->tag) {
3747 
3748    /* --------- STORE --------- */
3749    case Ist_Store: {
3750       IRType    tya   = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3751       IRType    tyd   = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3752       IREndness end   = stmt->Ist.Store.end;
3753 
3754       if (tya != Ity_I64 || end != Iend_LE)
3755          goto stmt_fail;
3756 
3757       if (tyd == Ity_I64) {
3758          AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3759          AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
3760          addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am));
3761          return;
3762       }
3763       if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) {
3764          AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3765          HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
3766          addInstr(env, AMD64Instr_Store(
3767                           toUChar(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4)),
3768                           r,am));
3769          return;
3770       }
3771       if (tyd == Ity_F64) {
3772          AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3773          HReg r = iselDblExpr(env, stmt->Ist.Store.data);
3774          addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am));
3775          return;
3776       }
3777       if (tyd == Ity_F32) {
3778          AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3779          HReg r = iselFltExpr(env, stmt->Ist.Store.data);
3780          addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, r, am));
3781          return;
3782       }
3783       if (tyd == Ity_V128) {
3784          AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3785          HReg r = iselVecExpr(env, stmt->Ist.Store.data);
3786          addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am));
3787          return;
3788       }
3789       break;
3790    }
3791 
3792    /* --------- PUT --------- */
3793    case Ist_Put: {
3794       IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3795       if (ty == Ity_I64) {
3796          /* We're going to write to memory, so compute the RHS into an
3797             AMD64RI. */
3798          AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3799          addInstr(env,
3800                   AMD64Instr_Alu64M(
3801                      Aalu_MOV,
3802                      ri,
3803                      AMD64AMode_IR(stmt->Ist.Put.offset,
3804                                    hregAMD64_RBP())
3805                  ));
3806          return;
3807       }
3808       if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
3809          HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
3810          addInstr(env, AMD64Instr_Store(
3811                           toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
3812                           r,
3813                           AMD64AMode_IR(stmt->Ist.Put.offset,
3814                                         hregAMD64_RBP())));
3815          return;
3816       }
3817       if (ty == Ity_V128) {
3818          HReg        vec = iselVecExpr(env, stmt->Ist.Put.data);
3819          AMD64AMode* am  = AMD64AMode_IR(stmt->Ist.Put.offset,
3820                                          hregAMD64_RBP());
3821          addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am));
3822          return;
3823       }
3824       if (ty == Ity_F32) {
3825          HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
3826          AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP());
3827          set_SSE_rounding_default(env); /* paranoia */
3828          addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am ));
3829          return;
3830       }
3831       if (ty == Ity_F64) {
3832          HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
3833          AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset,
3834                                          hregAMD64_RBP() );
3835          addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am ));
3836          return;
3837       }
3838       break;
3839    }
3840 
3841    /* --------- Indexed PUT --------- */
3842    case Ist_PutI: {
3843       AMD64AMode* am
3844          = genGuestArrayOffset(
3845               env, stmt->Ist.PutI.descr,
3846                    stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
3847 
3848       IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
3849       if (ty == Ity_F64) {
3850          HReg val = iselDblExpr(env, stmt->Ist.PutI.data);
3851          addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am ));
3852          return;
3853       }
3854       if (ty == Ity_I8) {
3855          HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
3856          addInstr(env, AMD64Instr_Store( 1, r, am ));
3857          return;
3858       }
3859       if (ty == Ity_I64) {
3860          AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.PutI.data);
3861          addInstr(env, AMD64Instr_Alu64M( Aalu_MOV, ri, am ));
3862          return;
3863       }
3864       break;
3865    }
3866 
3867    /* --------- TMP --------- */
3868    case Ist_WrTmp: {
3869       IRTemp tmp = stmt->Ist.WrTmp.tmp;
3870       IRType ty = typeOfIRTemp(env->type_env, tmp);
3871 
3872       /* optimisation: if stmt->Ist.WrTmp.data is Add64(..,..),
3873          compute it into an AMode and then use LEA.  This usually
3874          produces fewer instructions, often because (for memcheck
3875          created IR) we get t = address-expression, (t is later used
3876          twice) and so doing this naturally turns address-expression
3877          back into an AMD64 amode. */
3878       if (ty == Ity_I64
3879           && stmt->Ist.WrTmp.data->tag == Iex_Binop
3880           && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add64) {
3881          AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
3882          HReg dst = lookupIRTemp(env, tmp);
3883          if (am->tag == Aam_IR && am->Aam.IR.imm == 0) {
3884             /* Hmm, iselIntExpr_AMode wimped out and just computed the
3885                value into a register.  Just emit a normal reg-reg move
3886                so reg-alloc can coalesce it away in the usual way. */
3887             HReg src = am->Aam.IR.reg;
3888             addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst));
3889          } else {
3890             addInstr(env, AMD64Instr_Lea64(am,dst));
3891          }
3892          return;
3893       }
3894 
3895       if (ty == Ity_I64 || ty == Ity_I32
3896           || ty == Ity_I16 || ty == Ity_I8) {
3897          AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
3898          HReg dst = lookupIRTemp(env, tmp);
3899          addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst));
3900          return;
3901       }
3902       if (ty == Ity_I128) {
3903          HReg rHi, rLo, dstHi, dstLo;
3904          iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
3905          lookupIRTemp128( &dstHi, &dstLo, env, tmp);
3906          addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
3907          addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
3908          return;
3909       }
3910       if (ty == Ity_I1) {
3911          AMD64CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
3912          HReg dst = lookupIRTemp(env, tmp);
3913          addInstr(env, AMD64Instr_Set64(cond, dst));
3914          return;
3915       }
3916       if (ty == Ity_F64) {
3917          HReg dst = lookupIRTemp(env, tmp);
3918          HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
3919          addInstr(env, mk_vMOVsd_RR(src, dst));
3920          return;
3921       }
3922       if (ty == Ity_F32) {
3923          HReg dst = lookupIRTemp(env, tmp);
3924          HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
3925          addInstr(env, mk_vMOVsd_RR(src, dst));
3926          return;
3927       }
3928       if (ty == Ity_V128) {
3929          HReg dst = lookupIRTemp(env, tmp);
3930          HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
3931          addInstr(env, mk_vMOVsd_RR(src, dst));
3932          return;
3933       }
3934       break;
3935    }
3936 
3937    /* --------- Call to DIRTY helper --------- */
3938    case Ist_Dirty: {
3939       IRType   retty;
3940       IRDirty* d = stmt->Ist.Dirty.details;
3941       Bool     passBBP = False;
3942 
3943       if (d->nFxState == 0)
3944          vassert(!d->needsBBP);
3945 
3946       passBBP = toBool(d->nFxState > 0 && d->needsBBP);
3947 
3948       /* Marshal args, do the call, clear stack. */
3949       doHelperCall( env, passBBP, d->guard, d->cee, d->args );
3950 
3951       /* Now figure out what to do with the returned value, if any. */
3952       if (d->tmp == IRTemp_INVALID)
3953          /* No return value.  Nothing to do. */
3954          return;
3955 
3956       retty = typeOfIRTemp(env->type_env, d->tmp);
3957       if (retty == Ity_I64 || retty == Ity_I32
3958           || retty == Ity_I16 || retty == Ity_I8) {
3959          /* The returned value is in %rax.  Park it in the register
3960             associated with tmp. */
3961          HReg dst = lookupIRTemp(env, d->tmp);
3962          addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) );
3963          return;
3964       }
3965       break;
3966    }
3967 
3968    /* --------- MEM FENCE --------- */
3969    case Ist_MBE:
3970       switch (stmt->Ist.MBE.event) {
3971          case Imbe_Fence:
3972             addInstr(env, AMD64Instr_MFence());
3973             return;
3974          default:
3975             break;
3976       }
3977       break;
3978 
3979    /* --------- ACAS --------- */
3980    case Ist_CAS:
3981       if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
3982          /* "normal" singleton CAS */
3983          UChar  sz;
3984          IRCAS* cas = stmt->Ist.CAS.details;
3985          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
3986          /* get: cas->expd into %rax, and cas->data into %rbx */
3987          AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
3988          HReg rData = iselIntExpr_R(env, cas->dataLo);
3989          HReg rExpd = iselIntExpr_R(env, cas->expdLo);
3990          HReg rOld  = lookupIRTemp(env, cas->oldLo);
3991          vassert(cas->expdHi == NULL);
3992          vassert(cas->dataHi == NULL);
3993          addInstr(env, mk_iMOVsd_RR(rExpd, rOld));
3994          addInstr(env, mk_iMOVsd_RR(rExpd, hregAMD64_RAX()));
3995          addInstr(env, mk_iMOVsd_RR(rData, hregAMD64_RBX()));
3996          switch (ty) {
3997             case Ity_I64: sz = 8; break;
3998             case Ity_I32: sz = 4; break;
3999             case Ity_I16: sz = 2; break;
4000             case Ity_I8:  sz = 1; break;
4001             default: goto unhandled_cas;
4002          }
4003          addInstr(env, AMD64Instr_ACAS(am, sz));
4004          addInstr(env, AMD64Instr_CMov64(
4005                           Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOld));
4006          return;
4007       } else {
4008          /* double CAS */
4009          UChar  sz;
4010          IRCAS* cas = stmt->Ist.CAS.details;
4011          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
4012          /* only 32-bit and 64-bit allowed in this case */
4013          /* get: cas->expdLo into %rax, and cas->dataLo into %rbx */
4014          /* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */
4015          AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
4016          HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4017          HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4018          HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4019          HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4020          HReg rOldHi  = lookupIRTemp(env, cas->oldHi);
4021          HReg rOldLo  = lookupIRTemp(env, cas->oldLo);
4022          switch (ty) {
4023             case Ity_I64:
4024                if (!(env->hwcaps & VEX_HWCAPS_AMD64_CX16))
4025                   goto unhandled_cas; /* we'd have to generate
4026                                          cmpxchg16b, but the host
4027                                          doesn't support that */
4028                sz = 8;
4029                break;
4030             case Ity_I32:
4031                sz = 4;
4032                break;
4033             default:
4034                goto unhandled_cas;
4035          }
4036          addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4037          addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4038          addInstr(env, mk_iMOVsd_RR(rExpdHi, hregAMD64_RDX()));
4039          addInstr(env, mk_iMOVsd_RR(rExpdLo, hregAMD64_RAX()));
4040          addInstr(env, mk_iMOVsd_RR(rDataHi, hregAMD64_RCX()));
4041          addInstr(env, mk_iMOVsd_RR(rDataLo, hregAMD64_RBX()));
4042          addInstr(env, AMD64Instr_DACAS(am, sz));
4043          addInstr(env,
4044                   AMD64Instr_CMov64(
4045                      Acc_NZ, AMD64RM_Reg(hregAMD64_RDX()), rOldHi));
4046          addInstr(env,
4047                   AMD64Instr_CMov64(
4048                      Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOldLo));
4049          return;
4050       }
4051       unhandled_cas:
4052       break;
4053 
4054    /* --------- INSTR MARK --------- */
4055    /* Doesn't generate any executable code ... */
4056    case Ist_IMark:
4057        return;
4058 
4059    /* --------- ABI HINT --------- */
4060    /* These have no meaning (denotation in the IR) and so we ignore
4061       them ... if any actually made it this far. */
4062    case Ist_AbiHint:
4063        return;
4064 
4065    /* --------- NO-OP --------- */
4066    case Ist_NoOp:
4067        return;
4068 
4069    /* --------- EXIT --------- */
4070    case Ist_Exit: {
4071       AMD64RI*      dst;
4072       AMD64CondCode cc;
4073       if (stmt->Ist.Exit.dst->tag != Ico_U64)
4074          vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value");
4075       dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
4076       cc  = iselCondCode(env,stmt->Ist.Exit.guard);
4077       addInstr(env, AMD64Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
4078       return;
4079    }
4080 
4081    default: break;
4082    }
4083   stmt_fail:
4084    ppIRStmt(stmt);
4085    vpanic("iselStmt(amd64)");
4086 }
4087 
4088 
4089 /*---------------------------------------------------------*/
4090 /*--- ISEL: Basic block terminators (Nexts)             ---*/
4091 /*---------------------------------------------------------*/
4092 
iselNext(ISelEnv * env,IRExpr * next,IRJumpKind jk)4093 static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
4094 {
4095    AMD64RI* ri;
4096    if (vex_traceflags & VEX_TRACE_VCODE) {
4097       vex_printf("\n-- goto {");
4098       ppIRJumpKind(jk);
4099       vex_printf("} ");
4100       ppIRExpr(next);
4101       vex_printf("\n");
4102    }
4103    ri = iselIntExpr_RI(env, next);
4104    addInstr(env, AMD64Instr_Goto(jk, Acc_ALWAYS,ri));
4105 }
4106 
4107 
4108 /*---------------------------------------------------------*/
4109 /*--- Insn selector top-level                           ---*/
4110 /*---------------------------------------------------------*/
4111 
4112 /* Translate an entire SB to amd64 code. */
4113 
iselSB_AMD64(IRSB * bb,VexArch arch_host,VexArchInfo * archinfo_host,VexAbiInfo * vbi)4114 HInstrArray* iselSB_AMD64 ( IRSB* bb, VexArch      arch_host,
4115                                       VexArchInfo* archinfo_host,
4116                                       VexAbiInfo*  vbi/*UNUSED*/ )
4117 {
4118    Int      i, j;
4119    HReg     hreg, hregHI;
4120    ISelEnv* env;
4121    UInt     hwcaps_host = archinfo_host->hwcaps;
4122 
4123    /* sanity ... */
4124    vassert(arch_host == VexArchAMD64);
4125    vassert(0 == (hwcaps_host
4126                  & ~(VEX_HWCAPS_AMD64_SSE3
4127                      | VEX_HWCAPS_AMD64_CX16
4128                      | VEX_HWCAPS_AMD64_LZCNT)));
4129 
4130    /* Make up an initial environment to use. */
4131    env = LibVEX_Alloc(sizeof(ISelEnv));
4132    env->vreg_ctr = 0;
4133 
4134    /* Set up output code array. */
4135    env->code = newHInstrArray();
4136 
4137    /* Copy BB's type env. */
4138    env->type_env = bb->tyenv;
4139 
4140    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
4141       change as we go along. */
4142    env->n_vregmap = bb->tyenv->types_used;
4143    env->vregmap   = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
4144    env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
4145 
4146    /* and finally ... */
4147    env->hwcaps = hwcaps_host;
4148 
4149    /* For each IR temporary, allocate a suitably-kinded virtual
4150       register. */
4151    j = 0;
4152    for (i = 0; i < env->n_vregmap; i++) {
4153       hregHI = hreg = INVALID_HREG;
4154       switch (bb->tyenv->types[i]) {
4155          case Ity_I1:
4156          case Ity_I8:
4157          case Ity_I16:
4158          case Ity_I32:
4159          case Ity_I64:  hreg   = mkHReg(j++, HRcInt64, True); break;
4160          case Ity_I128: hreg   = mkHReg(j++, HRcInt64, True);
4161                         hregHI = mkHReg(j++, HRcInt64, True); break;
4162          case Ity_F32:
4163          case Ity_F64:
4164          case Ity_V128: hreg   = mkHReg(j++, HRcVec128, True); break;
4165          default: ppIRType(bb->tyenv->types[i]);
4166                   vpanic("iselBB(amd64): IRTemp type");
4167       }
4168       env->vregmap[i]   = hreg;
4169       env->vregmapHI[i] = hregHI;
4170    }
4171    env->vreg_ctr = j;
4172 
4173    /* Ok, finally we can iterate over the statements. */
4174    for (i = 0; i < bb->stmts_used; i++)
4175       if (bb->stmts[i])
4176          iselStmt(env,bb->stmts[i]);
4177 
4178    iselNext(env,bb->next,bb->jumpkind);
4179 
4180    /* record the number of vregs we used. */
4181    env->code->n_vregs = env->vreg_ctr;
4182    return env->code;
4183 }
4184 
4185 
4186 /*---------------------------------------------------------------*/
4187 /*--- end                                   host_amd64_isel.c ---*/
4188 /*---------------------------------------------------------------*/
4189