1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_x86_isel.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2011 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39
40 #include "ir_match.h"
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h"
45 #include "host_generic_simd128.h"
46 #include "host_x86_defs.h"
47
48 /* TODO 21 Apr 2005:
49
50 -- (Really an assembler issue) don't emit CMov32 as a cmov
51 insn, since that's expensive on P4 and conditional branch
52 is cheaper if (as we expect) the condition is highly predictable
53
54 -- preserve xmm registers across function calls (by declaring them
55 as trashed by call insns)
56
57 -- preserve x87 ST stack discipline across function calls. Sigh.
58
59 -- Check doHelperCall: if a call is conditional, we cannot safely
60 compute any regparm args directly to registers. Hence, the
61 fast-regparm marshalling should be restricted to unconditional
62 calls only.
63 */
64
65 /*---------------------------------------------------------*/
66 /*--- x87 control word stuff ---*/
67 /*---------------------------------------------------------*/
68
69 /* Vex-generated code expects to run with the FPU set as follows: all
70 exceptions masked, round-to-nearest, precision = 53 bits. This
71 corresponds to a FPU control word value of 0x027F.
72
73 Similarly the SSE control word (%mxcsr) should be 0x1F80.
74
75 %fpucw and %mxcsr should have these values on entry to
76 Vex-generated code, and should those values should be
77 unchanged at exit.
78 */
79
80 #define DEFAULT_FPUCW 0x027F
81
82 /* debugging only, do not use */
83 /* define DEFAULT_FPUCW 0x037F */
84
85
86 /*---------------------------------------------------------*/
87 /*--- misc helpers ---*/
88 /*---------------------------------------------------------*/
89
90 /* These are duplicated in guest-x86/toIR.c */
unop(IROp op,IRExpr * a)91 static IRExpr* unop ( IROp op, IRExpr* a )
92 {
93 return IRExpr_Unop(op, a);
94 }
95
binop(IROp op,IRExpr * a1,IRExpr * a2)96 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
97 {
98 return IRExpr_Binop(op, a1, a2);
99 }
100
bind(Int binder)101 static IRExpr* bind ( Int binder )
102 {
103 return IRExpr_Binder(binder);
104 }
105
isZeroU8(IRExpr * e)106 static Bool isZeroU8 ( IRExpr* e )
107 {
108 return e->tag == Iex_Const
109 && e->Iex.Const.con->tag == Ico_U8
110 && e->Iex.Const.con->Ico.U8 == 0;
111 }
112
isZeroU32(IRExpr * e)113 static Bool isZeroU32 ( IRExpr* e )
114 {
115 return e->tag == Iex_Const
116 && e->Iex.Const.con->tag == Ico_U32
117 && e->Iex.Const.con->Ico.U32 == 0;
118 }
119
isZeroU64(IRExpr * e)120 static Bool isZeroU64 ( IRExpr* e )
121 {
122 return e->tag == Iex_Const
123 && e->Iex.Const.con->tag == Ico_U64
124 && e->Iex.Const.con->Ico.U64 == 0ULL;
125 }
126
127
128 /*---------------------------------------------------------*/
129 /*--- ISelEnv ---*/
130 /*---------------------------------------------------------*/
131
132 /* This carries around:
133
134 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
135 might encounter. This is computed before insn selection starts,
136 and does not change.
137
138 - A mapping from IRTemp to HReg. This tells the insn selector
139 which virtual register(s) are associated with each IRTemp
140 temporary. This is computed before insn selection starts, and
141 does not change. We expect this mapping to map precisely the
142 same set of IRTemps as the type mapping does.
143
144 - vregmap holds the primary register for the IRTemp.
145 - vregmapHI is only used for 64-bit integer-typed
146 IRTemps. It holds the identity of a second
147 32-bit virtual HReg, which holds the high half
148 of the value.
149
150 - The code array, that is, the insns selected so far.
151
152 - A counter, for generating new virtual registers.
153
154 - The host subarchitecture we are selecting insns for.
155 This is set at the start and does not change.
156
157 Note, this is all host-independent. */
158
159 typedef
160 struct {
161 IRTypeEnv* type_env;
162
163 HReg* vregmap;
164 HReg* vregmapHI;
165 Int n_vregmap;
166
167 HInstrArray* code;
168
169 Int vreg_ctr;
170
171 UInt hwcaps;
172 }
173 ISelEnv;
174
175
lookupIRTemp(ISelEnv * env,IRTemp tmp)176 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
177 {
178 vassert(tmp >= 0);
179 vassert(tmp < env->n_vregmap);
180 return env->vregmap[tmp];
181 }
182
lookupIRTemp64(HReg * vrHI,HReg * vrLO,ISelEnv * env,IRTemp tmp)183 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
184 {
185 vassert(tmp >= 0);
186 vassert(tmp < env->n_vregmap);
187 vassert(env->vregmapHI[tmp] != INVALID_HREG);
188 *vrLO = env->vregmap[tmp];
189 *vrHI = env->vregmapHI[tmp];
190 }
191
addInstr(ISelEnv * env,X86Instr * instr)192 static void addInstr ( ISelEnv* env, X86Instr* instr )
193 {
194 addHInstr(env->code, instr);
195 if (vex_traceflags & VEX_TRACE_VCODE) {
196 ppX86Instr(instr, False);
197 vex_printf("\n");
198 }
199 }
200
newVRegI(ISelEnv * env)201 static HReg newVRegI ( ISelEnv* env )
202 {
203 HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
204 env->vreg_ctr++;
205 return reg;
206 }
207
newVRegF(ISelEnv * env)208 static HReg newVRegF ( ISelEnv* env )
209 {
210 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
211 env->vreg_ctr++;
212 return reg;
213 }
214
newVRegV(ISelEnv * env)215 static HReg newVRegV ( ISelEnv* env )
216 {
217 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
218 env->vreg_ctr++;
219 return reg;
220 }
221
222
223 /*---------------------------------------------------------*/
224 /*--- ISEL: Forward declarations ---*/
225 /*---------------------------------------------------------*/
226
227 /* These are organised as iselXXX and iselXXX_wrk pairs. The
228 iselXXX_wrk do the real work, but are not to be called directly.
229 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
230 checks that all returned registers are virtual. You should not
231 call the _wrk version directly.
232 */
233 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
234 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
235
236 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
237 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
238
239 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
240 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
241
242 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
243 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
244
245 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
246 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
247
248 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
249 ISelEnv* env, IRExpr* e );
250 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
251 ISelEnv* env, IRExpr* e );
252
253 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
254 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
255
256 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
257 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
258
259 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
260 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
261
262 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
263 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
264
265
266 /*---------------------------------------------------------*/
267 /*--- ISEL: Misc helpers ---*/
268 /*---------------------------------------------------------*/
269
270 /* Make a int reg-reg move. */
271
mk_iMOVsd_RR(HReg src,HReg dst)272 static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
273 {
274 vassert(hregClass(src) == HRcInt32);
275 vassert(hregClass(dst) == HRcInt32);
276 return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst);
277 }
278
279
280 /* Make a vector reg-reg move. */
281
mk_vMOVsd_RR(HReg src,HReg dst)282 static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
283 {
284 vassert(hregClass(src) == HRcVec128);
285 vassert(hregClass(dst) == HRcVec128);
286 return X86Instr_SseReRg(Xsse_MOV, src, dst);
287 }
288
289 /* Advance/retreat %esp by n. */
290
add_to_esp(ISelEnv * env,Int n)291 static void add_to_esp ( ISelEnv* env, Int n )
292 {
293 vassert(n > 0 && n < 256 && (n%4) == 0);
294 addInstr(env,
295 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP()));
296 }
297
sub_from_esp(ISelEnv * env,Int n)298 static void sub_from_esp ( ISelEnv* env, Int n )
299 {
300 vassert(n > 0 && n < 256 && (n%4) == 0);
301 addInstr(env,
302 X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP()));
303 }
304
305
306 /* Given an amode, return one which references 4 bytes further
307 along. */
308
advance4(X86AMode * am)309 static X86AMode* advance4 ( X86AMode* am )
310 {
311 X86AMode* am4 = dopyX86AMode(am);
312 switch (am4->tag) {
313 case Xam_IRRS:
314 am4->Xam.IRRS.imm += 4; break;
315 case Xam_IR:
316 am4->Xam.IR.imm += 4; break;
317 default:
318 vpanic("advance4(x86,host)");
319 }
320 return am4;
321 }
322
323
324 /* Push an arg onto the host stack, in preparation for a call to a
325 helper function of some kind. Returns the number of 32-bit words
326 pushed. */
327
pushArg(ISelEnv * env,IRExpr * arg)328 static Int pushArg ( ISelEnv* env, IRExpr* arg )
329 {
330 IRType arg_ty = typeOfIRExpr(env->type_env, arg);
331 if (arg_ty == Ity_I32) {
332 addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
333 return 1;
334 } else
335 if (arg_ty == Ity_I64) {
336 HReg rHi, rLo;
337 iselInt64Expr(&rHi, &rLo, env, arg);
338 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
339 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
340 return 2;
341 }
342 ppIRExpr(arg);
343 vpanic("pushArg(x86): can't handle arg of this type");
344 }
345
346
347 /* Complete the call to a helper function, by calling the
348 helper and clearing the args off the stack. */
349
350 static
callHelperAndClearArgs(ISelEnv * env,X86CondCode cc,IRCallee * cee,Int n_arg_ws)351 void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc,
352 IRCallee* cee, Int n_arg_ws )
353 {
354 /* Complication. Need to decide which reg to use as the fn address
355 pointer, in a way that doesn't trash regparm-passed
356 parameters. */
357 vassert(sizeof(void*) == 4);
358
359 addInstr(env, X86Instr_Call( cc, toUInt(Ptr_to_ULong(cee->addr)),
360 cee->regparms));
361 if (n_arg_ws > 0)
362 add_to_esp(env, 4*n_arg_ws);
363 }
364
365
366 /* Used only in doHelperCall. See big comment in doHelperCall re
367 handling of regparm args. This function figures out whether
368 evaluation of an expression might require use of a fixed register.
369 If in doubt return True (safe but suboptimal).
370 */
371 static
mightRequireFixedRegs(IRExpr * e)372 Bool mightRequireFixedRegs ( IRExpr* e )
373 {
374 switch (e->tag) {
375 case Iex_RdTmp: case Iex_Const: case Iex_Get:
376 return False;
377 default:
378 return True;
379 }
380 }
381
382
383 /* Do a complete function call. guard is a Ity_Bit expression
384 indicating whether or not the call happens. If guard==NULL, the
385 call is unconditional. */
386
387 static
doHelperCall(ISelEnv * env,Bool passBBP,IRExpr * guard,IRCallee * cee,IRExpr ** args)388 void doHelperCall ( ISelEnv* env,
389 Bool passBBP,
390 IRExpr* guard, IRCallee* cee, IRExpr** args )
391 {
392 X86CondCode cc;
393 HReg argregs[3];
394 HReg tmpregs[3];
395 Bool danger;
396 Int not_done_yet, n_args, n_arg_ws, stack_limit,
397 i, argreg, argregX;
398
399 /* Marshal args for a call, do the call, and clear the stack.
400 Complexities to consider:
401
402 * if passBBP is True, %ebp (the baseblock pointer) is to be
403 passed as the first arg.
404
405 * If the callee claims regparmness of 1, 2 or 3, we must pass the
406 first 1, 2 or 3 args in registers (EAX, EDX, and ECX
407 respectively). To keep things relatively simple, only args of
408 type I32 may be passed as regparms -- just bomb out if anything
409 else turns up. Clearly this depends on the front ends not
410 trying to pass any other types as regparms.
411 */
412
413 /* 16 Nov 2004: the regparm handling is complicated by the
414 following problem.
415
416 Consider a call two a function with two regparm parameters:
417 f(e1,e2). We need to compute e1 into %eax and e2 into %edx.
418 Suppose code is first generated to compute e1 into %eax. Then,
419 code is generated to compute e2 into %edx. Unfortunately, if
420 the latter code sequence uses %eax, it will trash the value of
421 e1 computed by the former sequence. This could happen if (for
422 example) e2 itself involved a function call. In the code below,
423 args are evaluated right-to-left, not left-to-right, but the
424 principle and the problem are the same.
425
426 One solution is to compute all regparm-bound args into vregs
427 first, and once they are all done, move them to the relevant
428 real regs. This always gives correct code, but it also gives
429 a bunch of vreg-to-rreg moves which are usually redundant but
430 are hard for the register allocator to get rid of.
431
432 A compromise is to first examine all regparm'd argument
433 expressions. If they are all so simple that it is clear
434 they will be evaluated without use of any fixed registers,
435 use the old compute-directly-to-fixed-target scheme. If not,
436 be safe and use the via-vregs scheme.
437
438 Note this requires being able to examine an expression and
439 determine whether or not evaluation of it might use a fixed
440 register. That requires knowledge of how the rest of this
441 insn selector works. Currently just the following 3 are
442 regarded as safe -- hopefully they cover the majority of
443 arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
444 */
445 vassert(cee->regparms >= 0 && cee->regparms <= 3);
446
447 n_args = n_arg_ws = 0;
448 while (args[n_args]) n_args++;
449
450 not_done_yet = n_args;
451 if (passBBP)
452 not_done_yet++;
453
454 stack_limit = cee->regparms;
455 if (cee->regparms > 0 && passBBP) stack_limit--;
456
457 /* ------ BEGIN marshall all arguments ------ */
458
459 /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
460 for (i = n_args-1; i >= stack_limit; i--) {
461 n_arg_ws += pushArg(env, args[i]);
462 not_done_yet--;
463 }
464
465 /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
466 registers. */
467
468 if (cee->regparms > 0) {
469
470 /* ------ BEGIN deal with regparms ------ */
471
472 /* deal with regparms, not forgetting %ebp if needed. */
473 argregs[0] = hregX86_EAX();
474 argregs[1] = hregX86_EDX();
475 argregs[2] = hregX86_ECX();
476 tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG;
477
478 argreg = cee->regparms;
479
480 /* In keeping with big comment above, detect potential danger
481 and use the via-vregs scheme if needed. */
482 danger = False;
483 for (i = stack_limit-1; i >= 0; i--) {
484 if (mightRequireFixedRegs(args[i])) {
485 danger = True;
486 break;
487 }
488 }
489
490 if (danger) {
491
492 /* Move via temporaries */
493 argregX = argreg;
494 for (i = stack_limit-1; i >= 0; i--) {
495
496 if (0) {
497 vex_printf("x86 host: register param is complex: ");
498 ppIRExpr(args[i]);
499 vex_printf("\n");
500 }
501
502 argreg--;
503 vassert(argreg >= 0);
504 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32);
505 tmpregs[argreg] = iselIntExpr_R(env, args[i]);
506 not_done_yet--;
507 }
508 for (i = stack_limit-1; i >= 0; i--) {
509 argregX--;
510 vassert(argregX >= 0);
511 addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) );
512 }
513
514 } else {
515 /* It's safe to compute all regparm args directly into their
516 target registers. */
517 for (i = stack_limit-1; i >= 0; i--) {
518 argreg--;
519 vassert(argreg >= 0);
520 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32);
521 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
522 iselIntExpr_RMI(env, args[i]),
523 argregs[argreg]));
524 not_done_yet--;
525 }
526
527 }
528
529 /* Not forgetting %ebp if needed. */
530 if (passBBP) {
531 vassert(argreg == 1);
532 addInstr(env, mk_iMOVsd_RR( hregX86_EBP(), argregs[0]));
533 not_done_yet--;
534 }
535
536 /* ------ END deal with regparms ------ */
537
538 } else {
539
540 /* No regparms. Heave %ebp on the stack if needed. */
541 if (passBBP) {
542 addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
543 n_arg_ws++;
544 not_done_yet--;
545 }
546
547 }
548
549 vassert(not_done_yet == 0);
550
551 /* ------ END marshall all arguments ------ */
552
553 /* Now we can compute the condition. We can't do it earlier
554 because the argument computations could trash the condition
555 codes. Be a bit clever to handle the common case where the
556 guard is 1:Bit. */
557 cc = Xcc_ALWAYS;
558 if (guard) {
559 if (guard->tag == Iex_Const
560 && guard->Iex.Const.con->tag == Ico_U1
561 && guard->Iex.Const.con->Ico.U1 == True) {
562 /* unconditional -- do nothing */
563 } else {
564 cc = iselCondCode( env, guard );
565 }
566 }
567
568 /* call the helper, and get the args off the stack afterwards. */
569 callHelperAndClearArgs( env, cc, cee, n_arg_ws );
570 }
571
572
573 /* Given a guest-state array descriptor, an index expression and a
574 bias, generate an X86AMode holding the relevant guest state
575 offset. */
576
577 static
genGuestArrayOffset(ISelEnv * env,IRRegArray * descr,IRExpr * off,Int bias)578 X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
579 IRExpr* off, Int bias )
580 {
581 HReg tmp, roff;
582 Int elemSz = sizeofIRType(descr->elemTy);
583 Int nElems = descr->nElems;
584 Int shift = 0;
585
586 /* throw out any cases not generated by an x86 front end. In
587 theory there might be a day where we need to handle them -- if
588 we ever run non-x86-guest on x86 host. */
589
590 if (nElems != 8)
591 vpanic("genGuestArrayOffset(x86 host)(1)");
592
593 switch (elemSz) {
594 case 1: shift = 0; break;
595 case 4: shift = 2; break;
596 case 8: shift = 3; break;
597 default: vpanic("genGuestArrayOffset(x86 host)(2)");
598 }
599
600 /* Compute off into a reg, %off. Then return:
601
602 movl %off, %tmp
603 addl $bias, %tmp (if bias != 0)
604 andl %tmp, 7
605 ... base(%ebp, %tmp, shift) ...
606 */
607 tmp = newVRegI(env);
608 roff = iselIntExpr_R(env, off);
609 addInstr(env, mk_iMOVsd_RR(roff, tmp));
610 if (bias != 0) {
611 addInstr(env,
612 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp));
613 }
614 addInstr(env,
615 X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp));
616 return
617 X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift );
618 }
619
620
621 /* Mess with the FPU's rounding mode: set to the default rounding mode
622 (DEFAULT_FPUCW). */
623 static
set_FPU_rounding_default(ISelEnv * env)624 void set_FPU_rounding_default ( ISelEnv* env )
625 {
626 /* pushl $DEFAULT_FPUCW
627 fldcw 0(%esp)
628 addl $4, %esp
629 */
630 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
631 addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
632 addInstr(env, X86Instr_FpLdCW(zero_esp));
633 add_to_esp(env, 4);
634 }
635
636
637 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
638 expression denoting a value in the range 0 .. 3, indicating a round
639 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
640 the same rounding.
641 */
642 static
set_FPU_rounding_mode(ISelEnv * env,IRExpr * mode)643 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
644 {
645 HReg rrm = iselIntExpr_R(env, mode);
646 HReg rrm2 = newVRegI(env);
647 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
648
649 /* movl %rrm, %rrm2
650 andl $3, %rrm2 -- shouldn't be needed; paranoia
651 shll $10, %rrm2
652 orl $DEFAULT_FPUCW, %rrm2
653 pushl %rrm2
654 fldcw 0(%esp)
655 addl $4, %esp
656 */
657 addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
658 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
659 addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2));
660 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
661 addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
662 addInstr(env, X86Instr_FpLdCW(zero_esp));
663 add_to_esp(env, 4);
664 }
665
666
667 /* Generate !src into a new vector register, and be sure that the code
668 is SSE1 compatible. Amazing that Intel doesn't offer a less crappy
669 way to do this.
670 */
do_sse_Not128(ISelEnv * env,HReg src)671 static HReg do_sse_Not128 ( ISelEnv* env, HReg src )
672 {
673 HReg dst = newVRegV(env);
674 /* Set dst to zero. If dst contains a NaN then all hell might
675 break loose after the comparison. So, first zero it. */
676 addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst));
677 /* And now make it all 1s ... */
678 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst));
679 /* Finally, xor 'src' into it. */
680 addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst));
681 /* Doesn't that just totally suck? */
682 return dst;
683 }
684
685
686 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
687 after most non-simple FPU operations (simple = +, -, *, / and
688 sqrt).
689
690 This could be done a lot more efficiently if needed, by loading
691 zero and adding it to the value to be rounded (fldz ; faddp?).
692 */
roundToF64(ISelEnv * env,HReg reg)693 static void roundToF64 ( ISelEnv* env, HReg reg )
694 {
695 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
696 sub_from_esp(env, 8);
697 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
698 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
699 add_to_esp(env, 8);
700 }
701
702
703 /*---------------------------------------------------------*/
704 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/
705 /*---------------------------------------------------------*/
706
707 /* Select insns for an integer-typed expression, and add them to the
708 code list. Return a reg holding the result. This reg will be a
709 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
710 want to modify it, ask for a new vreg, copy it in there, and modify
711 the copy. The register allocator will do its best to map both
712 vregs to the same real register, so the copies will often disappear
713 later in the game.
714
715 This should handle expressions of 32, 16 and 8-bit type. All
716 results are returned in a 32-bit register. For 16- and 8-bit
717 expressions, the upper 16/24 bits are arbitrary, so you should mask
718 or sign extend partial values if necessary.
719 */
720
iselIntExpr_R(ISelEnv * env,IRExpr * e)721 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
722 {
723 HReg r = iselIntExpr_R_wrk(env, e);
724 /* sanity checks ... */
725 # if 0
726 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
727 # endif
728 vassert(hregClass(r) == HRcInt32);
729 vassert(hregIsVirtual(r));
730 return r;
731 }
732
733 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_R_wrk(ISelEnv * env,IRExpr * e)734 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
735 {
736 MatchInfo mi;
737
738 IRType ty = typeOfIRExpr(env->type_env,e);
739 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
740
741 switch (e->tag) {
742
743 /* --------- TEMP --------- */
744 case Iex_RdTmp: {
745 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
746 }
747
748 /* --------- LOAD --------- */
749 case Iex_Load: {
750 HReg dst = newVRegI(env);
751 X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
752
753 /* We can't handle big-endian loads, nor load-linked. */
754 if (e->Iex.Load.end != Iend_LE)
755 goto irreducible;
756
757 if (ty == Ity_I32) {
758 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
759 X86RMI_Mem(amode), dst) );
760 return dst;
761 }
762 if (ty == Ity_I16) {
763 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
764 return dst;
765 }
766 if (ty == Ity_I8) {
767 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
768 return dst;
769 }
770 break;
771 }
772
773 /* --------- TERNARY OP --------- */
774 case Iex_Triop: {
775 /* C3210 flags following FPU partial remainder (fprem), both
776 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
777 if (e->Iex.Triop.op == Iop_PRemC3210F64
778 || e->Iex.Triop.op == Iop_PRem1C3210F64) {
779 HReg junk = newVRegF(env);
780 HReg dst = newVRegI(env);
781 HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
782 HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
783 /* XXXROUNDINGFIXME */
784 /* set roundingmode here */
785 addInstr(env, X86Instr_FpBinary(
786 e->Iex.Binop.op==Iop_PRemC3210F64
787 ? Xfp_PREM : Xfp_PREM1,
788 srcL,srcR,junk
789 ));
790 /* The previous pseudo-insn will have left the FPU's C3210
791 flags set correctly. So bag them. */
792 addInstr(env, X86Instr_FpStSW_AX());
793 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
794 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
795 return dst;
796 }
797
798 break;
799 }
800
801 /* --------- BINARY OP --------- */
802 case Iex_Binop: {
803 X86AluOp aluOp;
804 X86ShiftOp shOp;
805
806 /* Pattern: Sub32(0,x) */
807 if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) {
808 HReg dst = newVRegI(env);
809 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
810 addInstr(env, mk_iMOVsd_RR(reg,dst));
811 addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
812 return dst;
813 }
814
815 /* Is it an addition or logical style op? */
816 switch (e->Iex.Binop.op) {
817 case Iop_Add8: case Iop_Add16: case Iop_Add32:
818 aluOp = Xalu_ADD; break;
819 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32:
820 aluOp = Xalu_SUB; break;
821 case Iop_And8: case Iop_And16: case Iop_And32:
822 aluOp = Xalu_AND; break;
823 case Iop_Or8: case Iop_Or16: case Iop_Or32:
824 aluOp = Xalu_OR; break;
825 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32:
826 aluOp = Xalu_XOR; break;
827 case Iop_Mul16: case Iop_Mul32:
828 aluOp = Xalu_MUL; break;
829 default:
830 aluOp = Xalu_INVALID; break;
831 }
832 /* For commutative ops we assume any literal
833 values are on the second operand. */
834 if (aluOp != Xalu_INVALID) {
835 HReg dst = newVRegI(env);
836 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
837 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
838 addInstr(env, mk_iMOVsd_RR(reg,dst));
839 addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst));
840 return dst;
841 }
842 /* Could do better here; forcing the first arg into a reg
843 isn't always clever.
844 -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
845 LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
846 t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
847 movl 0xFFFFFFA0(%vr41),%vr107
848 movl 0xFFFFFFA4(%vr41),%vr108
849 movl %vr107,%vr106
850 xorl %vr108,%vr106
851 movl 0xFFFFFFA8(%vr41),%vr109
852 movl %vr106,%vr105
853 andl %vr109,%vr105
854 movl 0xFFFFFFA0(%vr41),%vr110
855 movl %vr105,%vr104
856 xorl %vr110,%vr104
857 movl %vr104,%vr70
858 */
859
860 /* Perhaps a shift op? */
861 switch (e->Iex.Binop.op) {
862 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
863 shOp = Xsh_SHL; break;
864 case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
865 shOp = Xsh_SHR; break;
866 case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
867 shOp = Xsh_SAR; break;
868 default:
869 shOp = Xsh_INVALID; break;
870 }
871 if (shOp != Xsh_INVALID) {
872 HReg dst = newVRegI(env);
873
874 /* regL = the value to be shifted */
875 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
876 addInstr(env, mk_iMOVsd_RR(regL,dst));
877
878 /* Do any necessary widening for 16/8 bit operands */
879 switch (e->Iex.Binop.op) {
880 case Iop_Shr8:
881 addInstr(env, X86Instr_Alu32R(
882 Xalu_AND, X86RMI_Imm(0xFF), dst));
883 break;
884 case Iop_Shr16:
885 addInstr(env, X86Instr_Alu32R(
886 Xalu_AND, X86RMI_Imm(0xFFFF), dst));
887 break;
888 case Iop_Sar8:
889 addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst));
890 addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst));
891 break;
892 case Iop_Sar16:
893 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst));
894 addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst));
895 break;
896 default: break;
897 }
898
899 /* Now consider the shift amount. If it's a literal, we
900 can do a much better job than the general case. */
901 if (e->Iex.Binop.arg2->tag == Iex_Const) {
902 /* assert that the IR is well-typed */
903 Int nshift;
904 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
905 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
906 vassert(nshift >= 0);
907 if (nshift > 0)
908 /* Can't allow nshift==0 since that means %cl */
909 addInstr(env, X86Instr_Sh32( shOp, nshift, dst ));
910 } else {
911 /* General case; we have to force the amount into %cl. */
912 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
913 addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX()));
914 addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst));
915 }
916 return dst;
917 }
918
919 /* Handle misc other ops. */
920
921 if (e->Iex.Binop.op == Iop_Max32U) {
922 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
923 HReg dst = newVRegI(env);
924 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
925 addInstr(env, mk_iMOVsd_RR(src1,dst));
926 addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
927 addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
928 return dst;
929 }
930
931 if (e->Iex.Binop.op == Iop_8HLto16) {
932 HReg hi8 = newVRegI(env);
933 HReg lo8 = newVRegI(env);
934 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
935 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
936 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
937 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
938 addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8));
939 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
940 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
941 return hi8;
942 }
943
944 if (e->Iex.Binop.op == Iop_16HLto32) {
945 HReg hi16 = newVRegI(env);
946 HReg lo16 = newVRegI(env);
947 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
948 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
949 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
950 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
951 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16));
952 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
953 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
954 return hi16;
955 }
956
957 if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8
958 || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) {
959 HReg a16 = newVRegI(env);
960 HReg b16 = newVRegI(env);
961 HReg a16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
962 HReg b16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
963 Int shift = (e->Iex.Binop.op == Iop_MullS8
964 || e->Iex.Binop.op == Iop_MullU8)
965 ? 24 : 16;
966 X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8
967 || e->Iex.Binop.op == Iop_MullS16)
968 ? Xsh_SAR : Xsh_SHR;
969
970 addInstr(env, mk_iMOVsd_RR(a16s, a16));
971 addInstr(env, mk_iMOVsd_RR(b16s, b16));
972 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16));
973 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16));
974 addInstr(env, X86Instr_Sh32(shr_op, shift, a16));
975 addInstr(env, X86Instr_Sh32(shr_op, shift, b16));
976 addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16));
977 return b16;
978 }
979
980 if (e->Iex.Binop.op == Iop_CmpF64) {
981 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
982 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
983 HReg dst = newVRegI(env);
984 addInstr(env, X86Instr_FpCmp(fL,fR,dst));
985 /* shift this right 8 bits so as to conform to CmpF64
986 definition. */
987 addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst));
988 return dst;
989 }
990
991 if (e->Iex.Binop.op == Iop_F64toI32S
992 || e->Iex.Binop.op == Iop_F64toI16S) {
993 Int sz = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4;
994 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
995 HReg dst = newVRegI(env);
996
997 /* Used several times ... */
998 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
999
1000 /* rf now holds the value to be converted, and rrm holds the
1001 rounding mode value, encoded as per the IRRoundingMode
1002 enum. The first thing to do is set the FPU's rounding
1003 mode accordingly. */
1004
1005 /* Create a space for the format conversion. */
1006 /* subl $4, %esp */
1007 sub_from_esp(env, 4);
1008
1009 /* Set host rounding mode */
1010 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1011
1012 /* gistw/l %rf, 0(%esp) */
1013 addInstr(env, X86Instr_FpLdStI(False/*store*/,
1014 toUChar(sz), rf, zero_esp));
1015
1016 if (sz == 2) {
1017 /* movzwl 0(%esp), %dst */
1018 addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1019 } else {
1020 /* movl 0(%esp), %dst */
1021 vassert(sz == 4);
1022 addInstr(env, X86Instr_Alu32R(
1023 Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1024 }
1025
1026 /* Restore default FPU rounding. */
1027 set_FPU_rounding_default( env );
1028
1029 /* addl $4, %esp */
1030 add_to_esp(env, 4);
1031 return dst;
1032 }
1033
1034 break;
1035 }
1036
1037 /* --------- UNARY OP --------- */
1038 case Iex_Unop: {
1039
1040 /* 1Uto8(32to1(expr32)) */
1041 if (e->Iex.Unop.op == Iop_1Uto8) {
1042 DECLARE_PATTERN(p_32to1_then_1Uto8);
1043 DEFINE_PATTERN(p_32to1_then_1Uto8,
1044 unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1045 if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1046 IRExpr* expr32 = mi.bindee[0];
1047 HReg dst = newVRegI(env);
1048 HReg src = iselIntExpr_R(env, expr32);
1049 addInstr(env, mk_iMOVsd_RR(src,dst) );
1050 addInstr(env, X86Instr_Alu32R(Xalu_AND,
1051 X86RMI_Imm(1), dst));
1052 return dst;
1053 }
1054 }
1055
1056 /* 8Uto32(LDle(expr32)) */
1057 if (e->Iex.Unop.op == Iop_8Uto32) {
1058 DECLARE_PATTERN(p_LDle8_then_8Uto32);
1059 DEFINE_PATTERN(p_LDle8_then_8Uto32,
1060 unop(Iop_8Uto32,
1061 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1062 if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1063 HReg dst = newVRegI(env);
1064 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1065 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1066 return dst;
1067 }
1068 }
1069
1070 /* 8Sto32(LDle(expr32)) */
1071 if (e->Iex.Unop.op == Iop_8Sto32) {
1072 DECLARE_PATTERN(p_LDle8_then_8Sto32);
1073 DEFINE_PATTERN(p_LDle8_then_8Sto32,
1074 unop(Iop_8Sto32,
1075 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1076 if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1077 HReg dst = newVRegI(env);
1078 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1079 addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1080 return dst;
1081 }
1082 }
1083
1084 /* 16Uto32(LDle(expr32)) */
1085 if (e->Iex.Unop.op == Iop_16Uto32) {
1086 DECLARE_PATTERN(p_LDle16_then_16Uto32);
1087 DEFINE_PATTERN(p_LDle16_then_16Uto32,
1088 unop(Iop_16Uto32,
1089 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1090 if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1091 HReg dst = newVRegI(env);
1092 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1093 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1094 return dst;
1095 }
1096 }
1097
1098 /* 8Uto32(GET:I8) */
1099 if (e->Iex.Unop.op == Iop_8Uto32) {
1100 if (e->Iex.Unop.arg->tag == Iex_Get) {
1101 HReg dst;
1102 X86AMode* amode;
1103 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1104 dst = newVRegI(env);
1105 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1106 hregX86_EBP());
1107 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1108 return dst;
1109 }
1110 }
1111
1112 /* 16to32(GET:I16) */
1113 if (e->Iex.Unop.op == Iop_16Uto32) {
1114 if (e->Iex.Unop.arg->tag == Iex_Get) {
1115 HReg dst;
1116 X86AMode* amode;
1117 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1118 dst = newVRegI(env);
1119 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1120 hregX86_EBP());
1121 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1122 return dst;
1123 }
1124 }
1125
1126 switch (e->Iex.Unop.op) {
1127 case Iop_8Uto16:
1128 case Iop_8Uto32:
1129 case Iop_16Uto32: {
1130 HReg dst = newVRegI(env);
1131 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1132 UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1133 addInstr(env, mk_iMOVsd_RR(src,dst) );
1134 addInstr(env, X86Instr_Alu32R(Xalu_AND,
1135 X86RMI_Imm(mask), dst));
1136 return dst;
1137 }
1138 case Iop_8Sto16:
1139 case Iop_8Sto32:
1140 case Iop_16Sto32: {
1141 HReg dst = newVRegI(env);
1142 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1143 UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24;
1144 addInstr(env, mk_iMOVsd_RR(src,dst) );
1145 addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst));
1146 addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst));
1147 return dst;
1148 }
1149 case Iop_Not8:
1150 case Iop_Not16:
1151 case Iop_Not32: {
1152 HReg dst = newVRegI(env);
1153 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1154 addInstr(env, mk_iMOVsd_RR(src,dst) );
1155 addInstr(env, X86Instr_Unary32(Xun_NOT,dst));
1156 return dst;
1157 }
1158 case Iop_64HIto32: {
1159 HReg rHi, rLo;
1160 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1161 return rHi; /* and abandon rLo .. poor wee thing :-) */
1162 }
1163 case Iop_64to32: {
1164 HReg rHi, rLo;
1165 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1166 return rLo; /* similar stupid comment to the above ... */
1167 }
1168 case Iop_16HIto8:
1169 case Iop_32HIto16: {
1170 HReg dst = newVRegI(env);
1171 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1172 Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1173 addInstr(env, mk_iMOVsd_RR(src,dst) );
1174 addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1175 return dst;
1176 }
1177 case Iop_1Uto32:
1178 case Iop_1Uto8: {
1179 HReg dst = newVRegI(env);
1180 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1181 addInstr(env, X86Instr_Set32(cond,dst));
1182 return dst;
1183 }
1184 case Iop_1Sto8:
1185 case Iop_1Sto16:
1186 case Iop_1Sto32: {
1187 /* could do better than this, but for now ... */
1188 HReg dst = newVRegI(env);
1189 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1190 addInstr(env, X86Instr_Set32(cond,dst));
1191 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1192 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1193 return dst;
1194 }
1195 case Iop_Ctz32: {
1196 /* Count trailing zeroes, implemented by x86 'bsfl' */
1197 HReg dst = newVRegI(env);
1198 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1199 addInstr(env, X86Instr_Bsfr32(True,src,dst));
1200 return dst;
1201 }
1202 case Iop_Clz32: {
1203 /* Count leading zeroes. Do 'bsrl' to establish the index
1204 of the highest set bit, and subtract that value from
1205 31. */
1206 HReg tmp = newVRegI(env);
1207 HReg dst = newVRegI(env);
1208 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1209 addInstr(env, X86Instr_Bsfr32(False,src,tmp));
1210 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
1211 X86RMI_Imm(31), dst));
1212 addInstr(env, X86Instr_Alu32R(Xalu_SUB,
1213 X86RMI_Reg(tmp), dst));
1214 return dst;
1215 }
1216
1217 case Iop_CmpwNEZ32: {
1218 HReg dst = newVRegI(env);
1219 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1220 addInstr(env, mk_iMOVsd_RR(src,dst));
1221 addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
1222 addInstr(env, X86Instr_Alu32R(Xalu_OR,
1223 X86RMI_Reg(src), dst));
1224 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1225 return dst;
1226 }
1227 case Iop_Left8:
1228 case Iop_Left16:
1229 case Iop_Left32: {
1230 HReg dst = newVRegI(env);
1231 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1232 addInstr(env, mk_iMOVsd_RR(src, dst));
1233 addInstr(env, X86Instr_Unary32(Xun_NEG, dst));
1234 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst));
1235 return dst;
1236 }
1237
1238 case Iop_V128to32: {
1239 HReg dst = newVRegI(env);
1240 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1241 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1242 sub_from_esp(env, 16);
1243 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1244 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1245 add_to_esp(env, 16);
1246 return dst;
1247 }
1248
1249 /* ReinterpF32asI32(e) */
1250 /* Given an IEEE754 single, produce an I32 with the same bit
1251 pattern. Keep stack 8-aligned even though only using 4
1252 bytes. */
1253 case Iop_ReinterpF32asI32: {
1254 HReg rf = iselFltExpr(env, e->Iex.Unop.arg);
1255 HReg dst = newVRegI(env);
1256 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1257 /* paranoia */
1258 set_FPU_rounding_default(env);
1259 /* subl $8, %esp */
1260 sub_from_esp(env, 8);
1261 /* gstF %rf, 0(%esp) */
1262 addInstr(env,
1263 X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp));
1264 /* movl 0(%esp), %dst */
1265 addInstr(env,
1266 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1267 /* addl $8, %esp */
1268 add_to_esp(env, 8);
1269 return dst;
1270 }
1271
1272 case Iop_16to8:
1273 case Iop_32to8:
1274 case Iop_32to16:
1275 /* These are no-ops. */
1276 return iselIntExpr_R(env, e->Iex.Unop.arg);
1277
1278 default:
1279 break;
1280 }
1281 break;
1282 }
1283
1284 /* --------- GET --------- */
1285 case Iex_Get: {
1286 if (ty == Ity_I32) {
1287 HReg dst = newVRegI(env);
1288 addInstr(env, X86Instr_Alu32R(
1289 Xalu_MOV,
1290 X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1291 hregX86_EBP())),
1292 dst));
1293 return dst;
1294 }
1295 if (ty == Ity_I8 || ty == Ity_I16) {
1296 HReg dst = newVRegI(env);
1297 addInstr(env, X86Instr_LoadEX(
1298 toUChar(ty==Ity_I8 ? 1 : 2),
1299 False,
1300 X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1301 dst));
1302 return dst;
1303 }
1304 break;
1305 }
1306
1307 case Iex_GetI: {
1308 X86AMode* am
1309 = genGuestArrayOffset(
1310 env, e->Iex.GetI.descr,
1311 e->Iex.GetI.ix, e->Iex.GetI.bias );
1312 HReg dst = newVRegI(env);
1313 if (ty == Ity_I8) {
1314 addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1315 return dst;
1316 }
1317 if (ty == Ity_I32) {
1318 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1319 return dst;
1320 }
1321 break;
1322 }
1323
1324 /* --------- CCALL --------- */
1325 case Iex_CCall: {
1326 HReg dst = newVRegI(env);
1327 vassert(ty == e->Iex.CCall.retty);
1328
1329 /* be very restrictive for now. Only 32/64-bit ints allowed
1330 for args, and 32 bits for return type. */
1331 if (e->Iex.CCall.retty != Ity_I32)
1332 goto irreducible;
1333
1334 /* Marshal args, do the call, clear stack. */
1335 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1336
1337 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1338 return dst;
1339 }
1340
1341 /* --------- LITERAL --------- */
1342 /* 32/16/8-bit literals */
1343 case Iex_Const: {
1344 X86RMI* rmi = iselIntExpr_RMI ( env, e );
1345 HReg r = newVRegI(env);
1346 addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r));
1347 return r;
1348 }
1349
1350 /* --------- MULTIPLEX --------- */
1351 case Iex_Mux0X: {
1352 if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1353 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
1354 X86RM* r8;
1355 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1356 X86RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0);
1357 HReg dst = newVRegI(env);
1358 addInstr(env, mk_iMOVsd_RR(rX,dst));
1359 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
1360 addInstr(env, X86Instr_Test32(0xFF, r8));
1361 addInstr(env, X86Instr_CMov32(Xcc_Z,r0,dst));
1362 return dst;
1363 }
1364 break;
1365 }
1366
1367 default:
1368 break;
1369 } /* switch (e->tag) */
1370
1371 /* We get here if no pattern matched. */
1372 irreducible:
1373 ppIRExpr(e);
1374 vpanic("iselIntExpr_R: cannot reduce tree");
1375 }
1376
1377
1378 /*---------------------------------------------------------*/
1379 /*--- ISEL: Integer expression auxiliaries ---*/
1380 /*---------------------------------------------------------*/
1381
1382 /* --------------------- AMODEs --------------------- */
1383
1384 /* Return an AMode which computes the value of the specified
1385 expression, possibly also adding insns to the code list as a
1386 result. The expression may only be a 32-bit one.
1387 */
1388
sane_AMode(X86AMode * am)1389 static Bool sane_AMode ( X86AMode* am )
1390 {
1391 switch (am->tag) {
1392 case Xam_IR:
1393 return
1394 toBool( hregClass(am->Xam.IR.reg) == HRcInt32
1395 && (hregIsVirtual(am->Xam.IR.reg)
1396 || am->Xam.IR.reg == hregX86_EBP()) );
1397 case Xam_IRRS:
1398 return
1399 toBool( hregClass(am->Xam.IRRS.base) == HRcInt32
1400 && hregIsVirtual(am->Xam.IRRS.base)
1401 && hregClass(am->Xam.IRRS.index) == HRcInt32
1402 && hregIsVirtual(am->Xam.IRRS.index) );
1403 default:
1404 vpanic("sane_AMode: unknown x86 amode tag");
1405 }
1406 }
1407
iselIntExpr_AMode(ISelEnv * env,IRExpr * e)1408 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1409 {
1410 X86AMode* am = iselIntExpr_AMode_wrk(env, e);
1411 vassert(sane_AMode(am));
1412 return am;
1413 }
1414
1415 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_AMode_wrk(ISelEnv * env,IRExpr * e)1416 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1417 {
1418 IRType ty = typeOfIRExpr(env->type_env,e);
1419 vassert(ty == Ity_I32);
1420
1421 /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
1422 if (e->tag == Iex_Binop
1423 && e->Iex.Binop.op == Iop_Add32
1424 && e->Iex.Binop.arg2->tag == Iex_Const
1425 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
1426 && e->Iex.Binop.arg1->tag == Iex_Binop
1427 && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32
1428 && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop
1429 && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1430 && e->Iex.Binop.arg1
1431 ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1432 && e->Iex.Binop.arg1
1433 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1434 UInt shift = e->Iex.Binop.arg1
1435 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1436 UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1437 if (shift == 1 || shift == 2 || shift == 3) {
1438 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1);
1439 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1
1440 ->Iex.Binop.arg2->Iex.Binop.arg1 );
1441 return X86AMode_IRRS(imm32, r1, r2, shift);
1442 }
1443 }
1444
1445 /* Add32(expr1, Shl32(expr2, imm)) */
1446 if (e->tag == Iex_Binop
1447 && e->Iex.Binop.op == Iop_Add32
1448 && e->Iex.Binop.arg2->tag == Iex_Binop
1449 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1450 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1451 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1452 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1453 if (shift == 1 || shift == 2 || shift == 3) {
1454 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1455 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1456 return X86AMode_IRRS(0, r1, r2, shift);
1457 }
1458 }
1459
1460 /* Add32(expr,i) */
1461 if (e->tag == Iex_Binop
1462 && e->Iex.Binop.op == Iop_Add32
1463 && e->Iex.Binop.arg2->tag == Iex_Const
1464 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
1465 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1466 return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1);
1467 }
1468
1469 /* Doesn't match anything in particular. Generate it into
1470 a register and use that. */
1471 {
1472 HReg r1 = iselIntExpr_R(env, e);
1473 return X86AMode_IR(0, r1);
1474 }
1475 }
1476
1477
1478 /* --------------------- RMIs --------------------- */
1479
1480 /* Similarly, calculate an expression into an X86RMI operand. As with
1481 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1482
iselIntExpr_RMI(ISelEnv * env,IRExpr * e)1483 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
1484 {
1485 X86RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1486 /* sanity checks ... */
1487 switch (rmi->tag) {
1488 case Xrmi_Imm:
1489 return rmi;
1490 case Xrmi_Reg:
1491 vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32);
1492 vassert(hregIsVirtual(rmi->Xrmi.Reg.reg));
1493 return rmi;
1494 case Xrmi_Mem:
1495 vassert(sane_AMode(rmi->Xrmi.Mem.am));
1496 return rmi;
1497 default:
1498 vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
1499 }
1500 }
1501
1502 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RMI_wrk(ISelEnv * env,IRExpr * e)1503 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
1504 {
1505 IRType ty = typeOfIRExpr(env->type_env,e);
1506 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1507
1508 /* special case: immediate */
1509 if (e->tag == Iex_Const) {
1510 UInt u;
1511 switch (e->Iex.Const.con->tag) {
1512 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1513 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1514 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1515 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1516 }
1517 return X86RMI_Imm(u);
1518 }
1519
1520 /* special case: 32-bit GET */
1521 if (e->tag == Iex_Get && ty == Ity_I32) {
1522 return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1523 hregX86_EBP()));
1524 }
1525
1526 /* special case: 32-bit load from memory */
1527 if (e->tag == Iex_Load && ty == Ity_I32
1528 && e->Iex.Load.end == Iend_LE) {
1529 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
1530 return X86RMI_Mem(am);
1531 }
1532
1533 /* default case: calculate into a register and return that */
1534 {
1535 HReg r = iselIntExpr_R ( env, e );
1536 return X86RMI_Reg(r);
1537 }
1538 }
1539
1540
1541 /* --------------------- RIs --------------------- */
1542
1543 /* Calculate an expression into an X86RI operand. As with
1544 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1545
iselIntExpr_RI(ISelEnv * env,IRExpr * e)1546 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
1547 {
1548 X86RI* ri = iselIntExpr_RI_wrk(env, e);
1549 /* sanity checks ... */
1550 switch (ri->tag) {
1551 case Xri_Imm:
1552 return ri;
1553 case Xri_Reg:
1554 vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32);
1555 vassert(hregIsVirtual(ri->Xri.Reg.reg));
1556 return ri;
1557 default:
1558 vpanic("iselIntExpr_RI: unknown x86 RI tag");
1559 }
1560 }
1561
1562 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI_wrk(ISelEnv * env,IRExpr * e)1563 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
1564 {
1565 IRType ty = typeOfIRExpr(env->type_env,e);
1566 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1567
1568 /* special case: immediate */
1569 if (e->tag == Iex_Const) {
1570 UInt u;
1571 switch (e->Iex.Const.con->tag) {
1572 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1573 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1574 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1575 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1576 }
1577 return X86RI_Imm(u);
1578 }
1579
1580 /* default case: calculate into a register and return that */
1581 {
1582 HReg r = iselIntExpr_R ( env, e );
1583 return X86RI_Reg(r);
1584 }
1585 }
1586
1587
1588 /* --------------------- RMs --------------------- */
1589
1590 /* Similarly, calculate an expression into an X86RM operand. As with
1591 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1592
iselIntExpr_RM(ISelEnv * env,IRExpr * e)1593 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
1594 {
1595 X86RM* rm = iselIntExpr_RM_wrk(env, e);
1596 /* sanity checks ... */
1597 switch (rm->tag) {
1598 case Xrm_Reg:
1599 vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32);
1600 vassert(hregIsVirtual(rm->Xrm.Reg.reg));
1601 return rm;
1602 case Xrm_Mem:
1603 vassert(sane_AMode(rm->Xrm.Mem.am));
1604 return rm;
1605 default:
1606 vpanic("iselIntExpr_RM: unknown x86 RM tag");
1607 }
1608 }
1609
1610 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RM_wrk(ISelEnv * env,IRExpr * e)1611 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
1612 {
1613 IRType ty = typeOfIRExpr(env->type_env,e);
1614 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1615
1616 /* special case: 32-bit GET */
1617 if (e->tag == Iex_Get && ty == Ity_I32) {
1618 return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset,
1619 hregX86_EBP()));
1620 }
1621
1622 /* special case: load from memory */
1623
1624 /* default case: calculate into a register and return that */
1625 {
1626 HReg r = iselIntExpr_R ( env, e );
1627 return X86RM_Reg(r);
1628 }
1629 }
1630
1631
1632 /* --------------------- CONDCODE --------------------- */
1633
1634 /* Generate code to evaluated a bit-typed expression, returning the
1635 condition code which would correspond when the expression would
1636 notionally have returned 1. */
1637
iselCondCode(ISelEnv * env,IRExpr * e)1638 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1639 {
1640 /* Uh, there's nothing we can sanity check here, unfortunately. */
1641 return iselCondCode_wrk(env,e);
1642 }
1643
1644 /* DO NOT CALL THIS DIRECTLY ! */
iselCondCode_wrk(ISelEnv * env,IRExpr * e)1645 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1646 {
1647 MatchInfo mi;
1648
1649 vassert(e);
1650 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1651
1652 /* var */
1653 if (e->tag == Iex_RdTmp) {
1654 HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1655 /* Test32 doesn't modify r32; so this is OK. */
1656 addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32)));
1657 return Xcc_NZ;
1658 }
1659
1660 /* Constant 1:Bit */
1661 if (e->tag == Iex_Const) {
1662 HReg r;
1663 vassert(e->Iex.Const.con->tag == Ico_U1);
1664 vassert(e->Iex.Const.con->Ico.U1 == True
1665 || e->Iex.Const.con->Ico.U1 == False);
1666 r = newVRegI(env);
1667 addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
1668 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
1669 return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ;
1670 }
1671
1672 /* Not1(e) */
1673 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1674 /* Generate code for the arg, and negate the test condition */
1675 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1676 }
1677
1678 /* --- patterns rooted at: 32to1 --- */
1679
1680 if (e->tag == Iex_Unop
1681 && e->Iex.Unop.op == Iop_32to1) {
1682 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1683 addInstr(env, X86Instr_Test32(1,rm));
1684 return Xcc_NZ;
1685 }
1686
1687 /* --- patterns rooted at: CmpNEZ8 --- */
1688
1689 /* CmpNEZ8(x) */
1690 if (e->tag == Iex_Unop
1691 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1692 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1693 addInstr(env, X86Instr_Test32(0xFF,rm));
1694 return Xcc_NZ;
1695 }
1696
1697 /* --- patterns rooted at: CmpNEZ16 --- */
1698
1699 /* CmpNEZ16(x) */
1700 if (e->tag == Iex_Unop
1701 && e->Iex.Unop.op == Iop_CmpNEZ16) {
1702 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1703 addInstr(env, X86Instr_Test32(0xFFFF,rm));
1704 return Xcc_NZ;
1705 }
1706
1707 /* --- patterns rooted at: CmpNEZ32 --- */
1708
1709 /* CmpNEZ32(And32(x,y)) */
1710 {
1711 DECLARE_PATTERN(p_CmpNEZ32_And32);
1712 DEFINE_PATTERN(p_CmpNEZ32_And32,
1713 unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1))));
1714 if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) {
1715 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
1716 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1717 HReg tmp = newVRegI(env);
1718 addInstr(env, mk_iMOVsd_RR(r0, tmp));
1719 addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp));
1720 return Xcc_NZ;
1721 }
1722 }
1723
1724 /* CmpNEZ32(Or32(x,y)) */
1725 {
1726 DECLARE_PATTERN(p_CmpNEZ32_Or32);
1727 DEFINE_PATTERN(p_CmpNEZ32_Or32,
1728 unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1))));
1729 if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) {
1730 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
1731 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1732 HReg tmp = newVRegI(env);
1733 addInstr(env, mk_iMOVsd_RR(r0, tmp));
1734 addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp));
1735 return Xcc_NZ;
1736 }
1737 }
1738
1739 /* CmpNEZ32(GET(..):I32) */
1740 if (e->tag == Iex_Unop
1741 && e->Iex.Unop.op == Iop_CmpNEZ32
1742 && e->Iex.Unop.arg->tag == Iex_Get) {
1743 X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1744 hregX86_EBP());
1745 addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am));
1746 return Xcc_NZ;
1747 }
1748
1749 /* CmpNEZ32(x) */
1750 if (e->tag == Iex_Unop
1751 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1752 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1753 X86RMI* rmi2 = X86RMI_Imm(0);
1754 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1755 return Xcc_NZ;
1756 }
1757
1758 /* --- patterns rooted at: CmpNEZ64 --- */
1759
1760 /* CmpNEZ64(Or64(x,y)) */
1761 {
1762 DECLARE_PATTERN(p_CmpNEZ64_Or64);
1763 DEFINE_PATTERN(p_CmpNEZ64_Or64,
1764 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
1765 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
1766 HReg hi1, lo1, hi2, lo2;
1767 HReg tmp = newVRegI(env);
1768 iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] );
1769 addInstr(env, mk_iMOVsd_RR(hi1, tmp));
1770 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp));
1771 iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] );
1772 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp));
1773 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp));
1774 return Xcc_NZ;
1775 }
1776 }
1777
1778 /* CmpNEZ64(x) */
1779 if (e->tag == Iex_Unop
1780 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1781 HReg hi, lo;
1782 HReg tmp = newVRegI(env);
1783 iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
1784 addInstr(env, mk_iMOVsd_RR(hi, tmp));
1785 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
1786 return Xcc_NZ;
1787 }
1788
1789 /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
1790
1791 /* CmpEQ8 / CmpNE8 */
1792 if (e->tag == Iex_Binop
1793 && (e->Iex.Binop.op == Iop_CmpEQ8
1794 || e->Iex.Binop.op == Iop_CmpNE8
1795 || e->Iex.Binop.op == Iop_CasCmpEQ8
1796 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1797 if (isZeroU8(e->Iex.Binop.arg2)) {
1798 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1799 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1)));
1800 switch (e->Iex.Binop.op) {
1801 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1802 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1803 default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
1804 }
1805 } else {
1806 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1807 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1808 HReg r = newVRegI(env);
1809 addInstr(env, mk_iMOVsd_RR(r1,r));
1810 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1811 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r)));
1812 switch (e->Iex.Binop.op) {
1813 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1814 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1815 default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
1816 }
1817 }
1818 }
1819
1820 /* CmpEQ16 / CmpNE16 */
1821 if (e->tag == Iex_Binop
1822 && (e->Iex.Binop.op == Iop_CmpEQ16
1823 || e->Iex.Binop.op == Iop_CmpNE16
1824 || e->Iex.Binop.op == Iop_CasCmpEQ16
1825 || e->Iex.Binop.op == Iop_CasCmpNE16)) {
1826 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1827 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1828 HReg r = newVRegI(env);
1829 addInstr(env, mk_iMOVsd_RR(r1,r));
1830 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1831 addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
1832 switch (e->Iex.Binop.op) {
1833 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Xcc_Z;
1834 case Iop_CmpNE16: case Iop_CasCmpNE16: return Xcc_NZ;
1835 default: vpanic("iselCondCode(x86): CmpXX16");
1836 }
1837 }
1838
1839 /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
1840 Saves a "movl %eax, %tmp" compared to the default route. */
1841 if (e->tag == Iex_Binop
1842 && e->Iex.Binop.op == Iop_CmpNE32
1843 && e->Iex.Binop.arg1->tag == Iex_CCall
1844 && e->Iex.Binop.arg2->tag == Iex_Const) {
1845 IRExpr* cal = e->Iex.Binop.arg1;
1846 IRExpr* con = e->Iex.Binop.arg2;
1847 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
1848 vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */
1849 vassert(con->Iex.Const.con->tag == Ico_U32);
1850 /* Marshal args, do the call. */
1851 doHelperCall( env, False, NULL, cal->Iex.CCall.cee, cal->Iex.CCall.args );
1852 addInstr(env, X86Instr_Alu32R(Xalu_CMP,
1853 X86RMI_Imm(con->Iex.Const.con->Ico.U32),
1854 hregX86_EAX()));
1855 return Xcc_NZ;
1856 }
1857
1858 /* Cmp*32*(x,y) */
1859 if (e->tag == Iex_Binop
1860 && (e->Iex.Binop.op == Iop_CmpEQ32
1861 || e->Iex.Binop.op == Iop_CmpNE32
1862 || e->Iex.Binop.op == Iop_CmpLT32S
1863 || e->Iex.Binop.op == Iop_CmpLT32U
1864 || e->Iex.Binop.op == Iop_CmpLE32S
1865 || e->Iex.Binop.op == Iop_CmpLE32U
1866 || e->Iex.Binop.op == Iop_CasCmpEQ32
1867 || e->Iex.Binop.op == Iop_CasCmpNE32)) {
1868 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1869 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1870 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1871 switch (e->Iex.Binop.op) {
1872 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
1873 case Iop_CmpNE32: case Iop_CasCmpNE32: return Xcc_NZ;
1874 case Iop_CmpLT32S: return Xcc_L;
1875 case Iop_CmpLT32U: return Xcc_B;
1876 case Iop_CmpLE32S: return Xcc_LE;
1877 case Iop_CmpLE32U: return Xcc_BE;
1878 default: vpanic("iselCondCode(x86): CmpXX32");
1879 }
1880 }
1881
1882 /* CmpNE64 */
1883 if (e->tag == Iex_Binop
1884 && (e->Iex.Binop.op == Iop_CmpNE64
1885 || e->Iex.Binop.op == Iop_CmpEQ64)) {
1886 HReg hi1, hi2, lo1, lo2;
1887 HReg tHi = newVRegI(env);
1888 HReg tLo = newVRegI(env);
1889 iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
1890 iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
1891 addInstr(env, mk_iMOVsd_RR(hi1, tHi));
1892 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
1893 addInstr(env, mk_iMOVsd_RR(lo1, tLo));
1894 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
1895 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
1896 switch (e->Iex.Binop.op) {
1897 case Iop_CmpNE64: return Xcc_NZ;
1898 case Iop_CmpEQ64: return Xcc_Z;
1899 default: vpanic("iselCondCode(x86): CmpXX64");
1900 }
1901 }
1902
1903 ppIRExpr(e);
1904 vpanic("iselCondCode");
1905 }
1906
1907
1908 /*---------------------------------------------------------*/
1909 /*--- ISEL: Integer expressions (64 bit) ---*/
1910 /*---------------------------------------------------------*/
1911
1912 /* Compute a 64-bit value into a register pair, which is returned as
1913 the first two parameters. As with iselIntExpr_R, these may be
1914 either real or virtual regs; in any case they must not be changed
1915 by subsequent code emitted by the caller. */
1916
iselInt64Expr(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)1917 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1918 {
1919 iselInt64Expr_wrk(rHi, rLo, env, e);
1920 # if 0
1921 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1922 # endif
1923 vassert(hregClass(*rHi) == HRcInt32);
1924 vassert(hregIsVirtual(*rHi));
1925 vassert(hregClass(*rLo) == HRcInt32);
1926 vassert(hregIsVirtual(*rLo));
1927 }
1928
1929 /* DO NOT CALL THIS DIRECTLY ! */
iselInt64Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)1930 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1931 {
1932 MatchInfo mi;
1933 HWord fn = 0; /* helper fn for most SIMD64 stuff */
1934 vassert(e);
1935 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1936
1937 /* 64-bit literal */
1938 if (e->tag == Iex_Const) {
1939 ULong w64 = e->Iex.Const.con->Ico.U64;
1940 UInt wHi = toUInt(w64 >> 32);
1941 UInt wLo = toUInt(w64);
1942 HReg tLo = newVRegI(env);
1943 HReg tHi = newVRegI(env);
1944 vassert(e->Iex.Const.con->tag == Ico_U64);
1945 if (wLo == wHi) {
1946 /* Save a precious Int register in this special case. */
1947 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
1948 *rHi = tLo;
1949 *rLo = tLo;
1950 } else {
1951 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
1952 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
1953 *rHi = tHi;
1954 *rLo = tLo;
1955 }
1956 return;
1957 }
1958
1959 /* read 64-bit IRTemp */
1960 if (e->tag == Iex_RdTmp) {
1961 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1962 return;
1963 }
1964
1965 /* 64-bit load */
1966 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1967 HReg tLo, tHi;
1968 X86AMode *am0, *am4;
1969 vassert(e->Iex.Load.ty == Ity_I64);
1970 tLo = newVRegI(env);
1971 tHi = newVRegI(env);
1972 am0 = iselIntExpr_AMode(env, e->Iex.Load.addr);
1973 am4 = advance4(am0);
1974 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
1975 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
1976 *rHi = tHi;
1977 *rLo = tLo;
1978 return;
1979 }
1980
1981 /* 64-bit GET */
1982 if (e->tag == Iex_Get) {
1983 X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
1984 X86AMode* am4 = advance4(am);
1985 HReg tLo = newVRegI(env);
1986 HReg tHi = newVRegI(env);
1987 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
1988 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
1989 *rHi = tHi;
1990 *rLo = tLo;
1991 return;
1992 }
1993
1994 /* 64-bit GETI */
1995 if (e->tag == Iex_GetI) {
1996 X86AMode* am
1997 = genGuestArrayOffset( env, e->Iex.GetI.descr,
1998 e->Iex.GetI.ix, e->Iex.GetI.bias );
1999 X86AMode* am4 = advance4(am);
2000 HReg tLo = newVRegI(env);
2001 HReg tHi = newVRegI(env);
2002 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2003 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2004 *rHi = tHi;
2005 *rLo = tLo;
2006 return;
2007 }
2008
2009 /* 64-bit Mux0X: Mux0X(g, expr, 0:I64) */
2010 if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.exprX)) {
2011 X86RM* r8;
2012 HReg e0Lo, e0Hi;
2013 HReg tLo = newVRegI(env);
2014 HReg tHi = newVRegI(env);
2015 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2016 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
2017 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
2018 addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) );
2019 addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) );
2020 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
2021 addInstr(env, X86Instr_Test32(0xFF, r8));
2022 addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tHi));
2023 addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tLo));
2024 add_to_esp(env, 4);
2025 *rHi = tHi;
2026 *rLo = tLo;
2027 return;
2028 }
2029 /* 64-bit Mux0X: Mux0X(g, 0:I64, expr) */
2030 if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.expr0)) {
2031 X86RM* r8;
2032 HReg e0Lo, e0Hi;
2033 HReg tLo = newVRegI(env);
2034 HReg tHi = newVRegI(env);
2035 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2036 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.exprX);
2037 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
2038 addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) );
2039 addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) );
2040 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
2041 addInstr(env, X86Instr_Test32(0xFF, r8));
2042 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tHi));
2043 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tLo));
2044 add_to_esp(env, 4);
2045 *rHi = tHi;
2046 *rLo = tLo;
2047 return;
2048 }
2049
2050 /* 64-bit Mux0X: Mux0X(g, expr, expr) */
2051 if (e->tag == Iex_Mux0X) {
2052 X86RM* r8;
2053 HReg e0Lo, e0Hi, eXLo, eXHi;
2054 HReg tLo = newVRegI(env);
2055 HReg tHi = newVRegI(env);
2056 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
2057 iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX);
2058 addInstr(env, mk_iMOVsd_RR(eXHi, tHi));
2059 addInstr(env, mk_iMOVsd_RR(eXLo, tLo));
2060 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
2061 addInstr(env, X86Instr_Test32(0xFF, r8));
2062 /* This assumes the first cmov32 doesn't trash the condition
2063 codes, so they are still available for the second cmov32 */
2064 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi));
2065 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo));
2066 *rHi = tHi;
2067 *rLo = tLo;
2068 return;
2069 }
2070
2071 /* --------- BINARY ops --------- */
2072 if (e->tag == Iex_Binop) {
2073 switch (e->Iex.Binop.op) {
2074 /* 32 x 32 -> 64 multiply */
2075 case Iop_MullU32:
2076 case Iop_MullS32: {
2077 /* get one operand into %eax, and the other into a R/M.
2078 Need to make an educated guess about which is better in
2079 which. */
2080 HReg tLo = newVRegI(env);
2081 HReg tHi = newVRegI(env);
2082 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32);
2083 X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2084 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2085 addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX()));
2086 addInstr(env, X86Instr_MulL(syned, rmLeft));
2087 /* Result is now in EDX:EAX. Tell the caller. */
2088 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2089 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2090 *rHi = tHi;
2091 *rLo = tLo;
2092 return;
2093 }
2094
2095 /* 64 x 32 -> (32(rem),32(div)) division */
2096 case Iop_DivModU64to32:
2097 case Iop_DivModS64to32: {
2098 /* Get the 64-bit operand into edx:eax, and the other into
2099 any old R/M. */
2100 HReg sHi, sLo;
2101 HReg tLo = newVRegI(env);
2102 HReg tHi = newVRegI(env);
2103 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
2104 X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2105 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2106 addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX()));
2107 addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX()));
2108 addInstr(env, X86Instr_Div(syned, rmRight));
2109 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2110 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2111 *rHi = tHi;
2112 *rLo = tLo;
2113 return;
2114 }
2115
2116 /* Or64/And64/Xor64 */
2117 case Iop_Or64:
2118 case Iop_And64:
2119 case Iop_Xor64: {
2120 HReg xLo, xHi, yLo, yHi;
2121 HReg tLo = newVRegI(env);
2122 HReg tHi = newVRegI(env);
2123 X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2124 : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2125 : Xalu_XOR;
2126 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2127 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2128 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2129 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2130 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2131 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2132 *rHi = tHi;
2133 *rLo = tLo;
2134 return;
2135 }
2136
2137 /* Add64/Sub64 */
2138 case Iop_Add64:
2139 if (e->Iex.Binop.arg2->tag == Iex_Const) {
2140 /* special case Add64(e, const) */
2141 ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2142 UInt wHi = toUInt(w64 >> 32);
2143 UInt wLo = toUInt(w64);
2144 HReg tLo = newVRegI(env);
2145 HReg tHi = newVRegI(env);
2146 HReg xLo, xHi;
2147 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64);
2148 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2149 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2150 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2151 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo));
2152 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi));
2153 *rHi = tHi;
2154 *rLo = tLo;
2155 return;
2156 }
2157 /* else fall through to the generic case */
2158 case Iop_Sub64: {
2159 HReg xLo, xHi, yLo, yHi;
2160 HReg tLo = newVRegI(env);
2161 HReg tHi = newVRegI(env);
2162 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2163 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2164 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2165 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2166 if (e->Iex.Binop.op==Iop_Add64) {
2167 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2168 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2169 } else {
2170 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2171 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2172 }
2173 *rHi = tHi;
2174 *rLo = tLo;
2175 return;
2176 }
2177
2178 /* 32HLto64(e1,e2) */
2179 case Iop_32HLto64:
2180 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2181 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2182 return;
2183
2184 /* 64-bit shifts */
2185 case Iop_Shl64: {
2186 /* We use the same ingenious scheme as gcc. Put the value
2187 to be shifted into %hi:%lo, and the shift amount into
2188 %cl. Then (dsts on right, a la ATT syntax):
2189
2190 shldl %cl, %lo, %hi -- make %hi be right for the
2191 -- shift amt %cl % 32
2192 shll %cl, %lo -- make %lo be right for the
2193 -- shift amt %cl % 32
2194
2195 Now, if (shift amount % 64) is in the range 32 .. 63,
2196 we have to do a fixup, which puts the result low half
2197 into the result high half, and zeroes the low half:
2198
2199 testl $32, %ecx
2200
2201 cmovnz %lo, %hi
2202 movl $0, %tmp -- sigh; need yet another reg
2203 cmovnz %tmp, %lo
2204 */
2205 HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2206 tLo = newVRegI(env);
2207 tHi = newVRegI(env);
2208 tTemp = newVRegI(env);
2209 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2210 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2211 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2212 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2213 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2214 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2215 and those regs are legitimately modifiable. */
2216 addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2217 addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo));
2218 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2219 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2220 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2221 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2222 *rHi = tHi;
2223 *rLo = tLo;
2224 return;
2225 }
2226
2227 case Iop_Shr64: {
2228 /* We use the same ingenious scheme as gcc. Put the value
2229 to be shifted into %hi:%lo, and the shift amount into
2230 %cl. Then:
2231
2232 shrdl %cl, %hi, %lo -- make %lo be right for the
2233 -- shift amt %cl % 32
2234 shrl %cl, %hi -- make %hi be right for the
2235 -- shift amt %cl % 32
2236
2237 Now, if (shift amount % 64) is in the range 32 .. 63,
2238 we have to do a fixup, which puts the result high half
2239 into the result low half, and zeroes the high half:
2240
2241 testl $32, %ecx
2242
2243 cmovnz %hi, %lo
2244 movl $0, %tmp -- sigh; need yet another reg
2245 cmovnz %tmp, %hi
2246 */
2247 HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2248 tLo = newVRegI(env);
2249 tHi = newVRegI(env);
2250 tTemp = newVRegI(env);
2251 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2252 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2253 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2254 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2255 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2256 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2257 and those regs are legitimately modifiable. */
2258 addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2259 addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi));
2260 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2261 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2262 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2263 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2264 *rHi = tHi;
2265 *rLo = tLo;
2266 return;
2267 }
2268
2269 /* F64 -> I64 */
2270 /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2271 case. Unfortunately I see no easy way to avoid the
2272 duplication. */
2273 case Iop_F64toI64S: {
2274 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2275 HReg tLo = newVRegI(env);
2276 HReg tHi = newVRegI(env);
2277
2278 /* Used several times ... */
2279 /* Careful ... this sharing is only safe because
2280 zero_esp/four_esp do not hold any registers which the
2281 register allocator could attempt to swizzle later. */
2282 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2283 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2284
2285 /* rf now holds the value to be converted, and rrm holds
2286 the rounding mode value, encoded as per the
2287 IRRoundingMode enum. The first thing to do is set the
2288 FPU's rounding mode accordingly. */
2289
2290 /* Create a space for the format conversion. */
2291 /* subl $8, %esp */
2292 sub_from_esp(env, 8);
2293
2294 /* Set host rounding mode */
2295 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2296
2297 /* gistll %rf, 0(%esp) */
2298 addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2299
2300 /* movl 0(%esp), %dstLo */
2301 /* movl 4(%esp), %dstHi */
2302 addInstr(env, X86Instr_Alu32R(
2303 Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2304 addInstr(env, X86Instr_Alu32R(
2305 Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2306
2307 /* Restore default FPU rounding. */
2308 set_FPU_rounding_default( env );
2309
2310 /* addl $8, %esp */
2311 add_to_esp(env, 8);
2312
2313 *rHi = tHi;
2314 *rLo = tLo;
2315 return;
2316 }
2317
2318 case Iop_Add8x8:
2319 fn = (HWord)h_generic_calc_Add8x8; goto binnish;
2320 case Iop_Add16x4:
2321 fn = (HWord)h_generic_calc_Add16x4; goto binnish;
2322 case Iop_Add32x2:
2323 fn = (HWord)h_generic_calc_Add32x2; goto binnish;
2324
2325 case Iop_Avg8Ux8:
2326 fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
2327 case Iop_Avg16Ux4:
2328 fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
2329
2330 case Iop_CmpEQ8x8:
2331 fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
2332 case Iop_CmpEQ16x4:
2333 fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
2334 case Iop_CmpEQ32x2:
2335 fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
2336
2337 case Iop_CmpGT8Sx8:
2338 fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
2339 case Iop_CmpGT16Sx4:
2340 fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
2341 case Iop_CmpGT32Sx2:
2342 fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
2343
2344 case Iop_InterleaveHI8x8:
2345 fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
2346 case Iop_InterleaveLO8x8:
2347 fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
2348 case Iop_InterleaveHI16x4:
2349 fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
2350 case Iop_InterleaveLO16x4:
2351 fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
2352 case Iop_InterleaveHI32x2:
2353 fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
2354 case Iop_InterleaveLO32x2:
2355 fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
2356 case Iop_CatOddLanes16x4:
2357 fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
2358 case Iop_CatEvenLanes16x4:
2359 fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
2360 case Iop_Perm8x8:
2361 fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
2362
2363 case Iop_Max8Ux8:
2364 fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
2365 case Iop_Max16Sx4:
2366 fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
2367 case Iop_Min8Ux8:
2368 fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
2369 case Iop_Min16Sx4:
2370 fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
2371
2372 case Iop_Mul16x4:
2373 fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
2374 case Iop_Mul32x2:
2375 fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
2376 case Iop_MulHi16Sx4:
2377 fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
2378 case Iop_MulHi16Ux4:
2379 fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
2380
2381 case Iop_QAdd8Sx8:
2382 fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
2383 case Iop_QAdd16Sx4:
2384 fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
2385 case Iop_QAdd8Ux8:
2386 fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
2387 case Iop_QAdd16Ux4:
2388 fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
2389
2390 case Iop_QNarrowBin32Sto16Sx4:
2391 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish;
2392 case Iop_QNarrowBin16Sto8Sx8:
2393 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish;
2394 case Iop_QNarrowBin16Sto8Ux8:
2395 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish;
2396 case Iop_NarrowBin16to8x8:
2397 fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish;
2398 case Iop_NarrowBin32to16x4:
2399 fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish;
2400
2401 case Iop_QSub8Sx8:
2402 fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
2403 case Iop_QSub16Sx4:
2404 fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
2405 case Iop_QSub8Ux8:
2406 fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
2407 case Iop_QSub16Ux4:
2408 fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
2409
2410 case Iop_Sub8x8:
2411 fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
2412 case Iop_Sub16x4:
2413 fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
2414 case Iop_Sub32x2:
2415 fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
2416
2417 binnish: {
2418 /* Note: the following assumes all helpers are of
2419 signature
2420 ULong fn ( ULong, ULong ), and they are
2421 not marked as regparm functions.
2422 */
2423 HReg xLo, xHi, yLo, yHi;
2424 HReg tLo = newVRegI(env);
2425 HReg tHi = newVRegI(env);
2426 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2427 addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
2428 addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
2429 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2430 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2431 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2432 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2433 add_to_esp(env, 4*4);
2434 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2435 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2436 *rHi = tHi;
2437 *rLo = tLo;
2438 return;
2439 }
2440
2441 case Iop_ShlN32x2:
2442 fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
2443 case Iop_ShlN16x4:
2444 fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
2445 case Iop_ShlN8x8:
2446 fn = (HWord)h_generic_calc_ShlN8x8; goto shifty;
2447 case Iop_ShrN32x2:
2448 fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
2449 case Iop_ShrN16x4:
2450 fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
2451 case Iop_SarN32x2:
2452 fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
2453 case Iop_SarN16x4:
2454 fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
2455 case Iop_SarN8x8:
2456 fn = (HWord)h_generic_calc_SarN8x8; goto shifty;
2457 shifty: {
2458 /* Note: the following assumes all helpers are of
2459 signature
2460 ULong fn ( ULong, UInt ), and they are
2461 not marked as regparm functions.
2462 */
2463 HReg xLo, xHi;
2464 HReg tLo = newVRegI(env);
2465 HReg tHi = newVRegI(env);
2466 X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2467 addInstr(env, X86Instr_Push(y));
2468 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2469 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2470 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2471 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2472 add_to_esp(env, 3*4);
2473 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2474 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2475 *rHi = tHi;
2476 *rLo = tLo;
2477 return;
2478 }
2479
2480 default:
2481 break;
2482 }
2483 } /* if (e->tag == Iex_Binop) */
2484
2485
2486 /* --------- UNARY ops --------- */
2487 if (e->tag == Iex_Unop) {
2488 switch (e->Iex.Unop.op) {
2489
2490 /* 32Sto64(e) */
2491 case Iop_32Sto64: {
2492 HReg tLo = newVRegI(env);
2493 HReg tHi = newVRegI(env);
2494 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2495 addInstr(env, mk_iMOVsd_RR(src,tHi));
2496 addInstr(env, mk_iMOVsd_RR(src,tLo));
2497 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi));
2498 *rHi = tHi;
2499 *rLo = tLo;
2500 return;
2501 }
2502
2503 /* 32Uto64(e) */
2504 case Iop_32Uto64: {
2505 HReg tLo = newVRegI(env);
2506 HReg tHi = newVRegI(env);
2507 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2508 addInstr(env, mk_iMOVsd_RR(src,tLo));
2509 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2510 *rHi = tHi;
2511 *rLo = tLo;
2512 return;
2513 }
2514
2515 /* 16Uto64(e) */
2516 case Iop_16Uto64: {
2517 HReg tLo = newVRegI(env);
2518 HReg tHi = newVRegI(env);
2519 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2520 addInstr(env, mk_iMOVsd_RR(src,tLo));
2521 addInstr(env, X86Instr_Alu32R(Xalu_AND,
2522 X86RMI_Imm(0xFFFF), tLo));
2523 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2524 *rHi = tHi;
2525 *rLo = tLo;
2526 return;
2527 }
2528
2529 /* V128{HI}to64 */
2530 case Iop_V128HIto64:
2531 case Iop_V128to64: {
2532 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
2533 HReg tLo = newVRegI(env);
2534 HReg tHi = newVRegI(env);
2535 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
2536 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
2537 X86AMode* espLO = X86AMode_IR(off, hregX86_ESP());
2538 X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP());
2539 sub_from_esp(env, 16);
2540 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
2541 addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2542 X86RMI_Mem(espLO), tLo ));
2543 addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2544 X86RMI_Mem(espHI), tHi ));
2545 add_to_esp(env, 16);
2546 *rHi = tHi;
2547 *rLo = tLo;
2548 return;
2549 }
2550
2551 /* could do better than this, but for now ... */
2552 case Iop_1Sto64: {
2553 HReg tLo = newVRegI(env);
2554 HReg tHi = newVRegI(env);
2555 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2556 addInstr(env, X86Instr_Set32(cond,tLo));
2557 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo));
2558 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo));
2559 addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2560 *rHi = tHi;
2561 *rLo = tLo;
2562 return;
2563 }
2564
2565 /* Not64(e) */
2566 case Iop_Not64: {
2567 HReg tLo = newVRegI(env);
2568 HReg tHi = newVRegI(env);
2569 HReg sHi, sLo;
2570 iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2571 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2572 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2573 addInstr(env, X86Instr_Unary32(Xun_NOT,tHi));
2574 addInstr(env, X86Instr_Unary32(Xun_NOT,tLo));
2575 *rHi = tHi;
2576 *rLo = tLo;
2577 return;
2578 }
2579
2580 /* Left64(e) */
2581 case Iop_Left64: {
2582 HReg yLo, yHi;
2583 HReg tLo = newVRegI(env);
2584 HReg tHi = newVRegI(env);
2585 /* yHi:yLo = arg */
2586 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2587 /* tLo = 0 - yLo, and set carry */
2588 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo));
2589 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2590 /* tHi = 0 - yHi - carry */
2591 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2592 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2593 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2594 back in, so as to give the final result
2595 tHi:tLo = arg | -arg. */
2596 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo));
2597 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi));
2598 *rHi = tHi;
2599 *rLo = tLo;
2600 return;
2601 }
2602
2603 /* --- patterns rooted at: CmpwNEZ64 --- */
2604
2605 /* CmpwNEZ64(e) */
2606 case Iop_CmpwNEZ64: {
2607
2608 DECLARE_PATTERN(p_CmpwNEZ64_Or64);
2609 DEFINE_PATTERN(p_CmpwNEZ64_Or64,
2610 unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1))));
2611 if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) {
2612 /* CmpwNEZ64(Or64(x,y)) */
2613 HReg xHi,xLo,yHi,yLo;
2614 HReg xBoth = newVRegI(env);
2615 HReg merged = newVRegI(env);
2616 HReg tmp2 = newVRegI(env);
2617
2618 iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]);
2619 addInstr(env, mk_iMOVsd_RR(xHi,xBoth));
2620 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2621 X86RMI_Reg(xLo),xBoth));
2622
2623 iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]);
2624 addInstr(env, mk_iMOVsd_RR(yHi,merged));
2625 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2626 X86RMI_Reg(yLo),merged));
2627 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2628 X86RMI_Reg(xBoth),merged));
2629
2630 /* tmp2 = (merged | -merged) >>s 31 */
2631 addInstr(env, mk_iMOVsd_RR(merged,tmp2));
2632 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2633 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2634 X86RMI_Reg(merged), tmp2));
2635 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2636 *rHi = tmp2;
2637 *rLo = tmp2;
2638 return;
2639 } else {
2640 /* CmpwNEZ64(e) */
2641 HReg srcLo, srcHi;
2642 HReg tmp1 = newVRegI(env);
2643 HReg tmp2 = newVRegI(env);
2644 /* srcHi:srcLo = arg */
2645 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2646 /* tmp1 = srcHi | srcLo */
2647 addInstr(env, mk_iMOVsd_RR(srcHi,tmp1));
2648 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2649 X86RMI_Reg(srcLo), tmp1));
2650 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2651 addInstr(env, mk_iMOVsd_RR(tmp1,tmp2));
2652 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2653 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2654 X86RMI_Reg(tmp1), tmp2));
2655 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2656 *rHi = tmp2;
2657 *rLo = tmp2;
2658 return;
2659 }
2660 }
2661
2662 /* ReinterpF64asI64(e) */
2663 /* Given an IEEE754 double, produce an I64 with the same bit
2664 pattern. */
2665 case Iop_ReinterpF64asI64: {
2666 HReg rf = iselDblExpr(env, e->Iex.Unop.arg);
2667 HReg tLo = newVRegI(env);
2668 HReg tHi = newVRegI(env);
2669 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2670 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2671 /* paranoia */
2672 set_FPU_rounding_default(env);
2673 /* subl $8, %esp */
2674 sub_from_esp(env, 8);
2675 /* gstD %rf, 0(%esp) */
2676 addInstr(env,
2677 X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp));
2678 /* movl 0(%esp), %tLo */
2679 addInstr(env,
2680 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2681 /* movl 4(%esp), %tHi */
2682 addInstr(env,
2683 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2684 /* addl $8, %esp */
2685 add_to_esp(env, 8);
2686 *rHi = tHi;
2687 *rLo = tLo;
2688 return;
2689 }
2690
2691 case Iop_CmpNEZ32x2:
2692 fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
2693 case Iop_CmpNEZ16x4:
2694 fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
2695 case Iop_CmpNEZ8x8:
2696 fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
2697 unish: {
2698 /* Note: the following assumes all helpers are of
2699 signature
2700 ULong fn ( ULong ), and they are
2701 not marked as regparm functions.
2702 */
2703 HReg xLo, xHi;
2704 HReg tLo = newVRegI(env);
2705 HReg tHi = newVRegI(env);
2706 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
2707 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2708 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2709 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2710 add_to_esp(env, 2*4);
2711 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2712 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2713 *rHi = tHi;
2714 *rLo = tLo;
2715 return;
2716 }
2717
2718 default:
2719 break;
2720 }
2721 } /* if (e->tag == Iex_Unop) */
2722
2723
2724 /* --------- CCALL --------- */
2725 if (e->tag == Iex_CCall) {
2726 HReg tLo = newVRegI(env);
2727 HReg tHi = newVRegI(env);
2728
2729 /* Marshal args, do the call, clear stack. */
2730 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
2731
2732 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2733 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2734 *rHi = tHi;
2735 *rLo = tLo;
2736 return;
2737 }
2738
2739 ppIRExpr(e);
2740 vpanic("iselInt64Expr");
2741 }
2742
2743
2744 /*---------------------------------------------------------*/
2745 /*--- ISEL: Floating point expressions (32 bit) ---*/
2746 /*---------------------------------------------------------*/
2747
2748 /* Nothing interesting here; really just wrappers for
2749 64-bit stuff. */
2750
iselFltExpr(ISelEnv * env,IRExpr * e)2751 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2752 {
2753 HReg r = iselFltExpr_wrk( env, e );
2754 # if 0
2755 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2756 # endif
2757 vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
2758 vassert(hregIsVirtual(r));
2759 return r;
2760 }
2761
2762 /* DO NOT CALL THIS DIRECTLY */
iselFltExpr_wrk(ISelEnv * env,IRExpr * e)2763 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2764 {
2765 IRType ty = typeOfIRExpr(env->type_env,e);
2766 vassert(ty == Ity_F32);
2767
2768 if (e->tag == Iex_RdTmp) {
2769 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2770 }
2771
2772 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2773 X86AMode* am;
2774 HReg res = newVRegF(env);
2775 vassert(e->Iex.Load.ty == Ity_F32);
2776 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2777 addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am));
2778 return res;
2779 }
2780
2781 if (e->tag == Iex_Binop
2782 && e->Iex.Binop.op == Iop_F64toF32) {
2783 /* Although the result is still held in a standard FPU register,
2784 we need to round it to reflect the loss of accuracy/range
2785 entailed in casting it to a 32-bit float. */
2786 HReg dst = newVRegF(env);
2787 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2788 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2789 addInstr(env, X86Instr_Fp64to32(src,dst));
2790 set_FPU_rounding_default( env );
2791 return dst;
2792 }
2793
2794 if (e->tag == Iex_Get) {
2795 X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2796 hregX86_EBP() );
2797 HReg res = newVRegF(env);
2798 addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am ));
2799 return res;
2800 }
2801
2802 if (e->tag == Iex_Unop
2803 && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2804 /* Given an I32, produce an IEEE754 float with the same bit
2805 pattern. */
2806 HReg dst = newVRegF(env);
2807 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
2808 /* paranoia */
2809 addInstr(env, X86Instr_Push(rmi));
2810 addInstr(env, X86Instr_FpLdSt(
2811 True/*load*/, 4, dst,
2812 X86AMode_IR(0, hregX86_ESP())));
2813 add_to_esp(env, 4);
2814 return dst;
2815 }
2816
2817 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
2818 HReg rf = iselFltExpr(env, e->Iex.Binop.arg2);
2819 HReg dst = newVRegF(env);
2820
2821 /* rf now holds the value to be rounded. The first thing to do
2822 is set the FPU's rounding mode accordingly. */
2823
2824 /* Set host rounding mode */
2825 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2826
2827 /* grndint %rf, %dst */
2828 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
2829
2830 /* Restore default FPU rounding. */
2831 set_FPU_rounding_default( env );
2832
2833 return dst;
2834 }
2835
2836 ppIRExpr(e);
2837 vpanic("iselFltExpr_wrk");
2838 }
2839
2840
2841 /*---------------------------------------------------------*/
2842 /*--- ISEL: Floating point expressions (64 bit) ---*/
2843 /*---------------------------------------------------------*/
2844
2845 /* Compute a 64-bit floating point value into a register, the identity
2846 of which is returned. As with iselIntExpr_R, the reg may be either
2847 real or virtual; in any case it must not be changed by subsequent
2848 code emitted by the caller. */
2849
2850 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2851
2852 Type S (1 bit) E (11 bits) F (52 bits)
2853 ---- --------- ----------- -----------
2854 signalling NaN u 2047 (max) .0uuuuu---u
2855 (with at least
2856 one 1 bit)
2857 quiet NaN u 2047 (max) .1uuuuu---u
2858
2859 negative infinity 1 2047 (max) .000000---0
2860
2861 positive infinity 0 2047 (max) .000000---0
2862
2863 negative zero 1 0 .000000---0
2864
2865 positive zero 0 0 .000000---0
2866 */
2867
iselDblExpr(ISelEnv * env,IRExpr * e)2868 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2869 {
2870 HReg r = iselDblExpr_wrk( env, e );
2871 # if 0
2872 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2873 # endif
2874 vassert(hregClass(r) == HRcFlt64);
2875 vassert(hregIsVirtual(r));
2876 return r;
2877 }
2878
2879 /* DO NOT CALL THIS DIRECTLY */
iselDblExpr_wrk(ISelEnv * env,IRExpr * e)2880 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2881 {
2882 IRType ty = typeOfIRExpr(env->type_env,e);
2883 vassert(e);
2884 vassert(ty == Ity_F64);
2885
2886 if (e->tag == Iex_RdTmp) {
2887 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2888 }
2889
2890 if (e->tag == Iex_Const) {
2891 union { UInt u32x2[2]; ULong u64; Double f64; } u;
2892 HReg freg = newVRegF(env);
2893 vassert(sizeof(u) == 8);
2894 vassert(sizeof(u.u64) == 8);
2895 vassert(sizeof(u.f64) == 8);
2896 vassert(sizeof(u.u32x2) == 8);
2897
2898 if (e->Iex.Const.con->tag == Ico_F64) {
2899 u.f64 = e->Iex.Const.con->Ico.F64;
2900 }
2901 else if (e->Iex.Const.con->tag == Ico_F64i) {
2902 u.u64 = e->Iex.Const.con->Ico.F64i;
2903 }
2904 else
2905 vpanic("iselDblExpr(x86): const");
2906
2907 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1])));
2908 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0])));
2909 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg,
2910 X86AMode_IR(0, hregX86_ESP())));
2911 add_to_esp(env, 8);
2912 return freg;
2913 }
2914
2915 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2916 X86AMode* am;
2917 HReg res = newVRegF(env);
2918 vassert(e->Iex.Load.ty == Ity_F64);
2919 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2920 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am));
2921 return res;
2922 }
2923
2924 if (e->tag == Iex_Get) {
2925 X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2926 hregX86_EBP() );
2927 HReg res = newVRegF(env);
2928 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
2929 return res;
2930 }
2931
2932 if (e->tag == Iex_GetI) {
2933 X86AMode* am
2934 = genGuestArrayOffset(
2935 env, e->Iex.GetI.descr,
2936 e->Iex.GetI.ix, e->Iex.GetI.bias );
2937 HReg res = newVRegF(env);
2938 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
2939 return res;
2940 }
2941
2942 if (e->tag == Iex_Triop) {
2943 X86FpOp fpop = Xfp_INVALID;
2944 switch (e->Iex.Triop.op) {
2945 case Iop_AddF64: fpop = Xfp_ADD; break;
2946 case Iop_SubF64: fpop = Xfp_SUB; break;
2947 case Iop_MulF64: fpop = Xfp_MUL; break;
2948 case Iop_DivF64: fpop = Xfp_DIV; break;
2949 case Iop_ScaleF64: fpop = Xfp_SCALE; break;
2950 case Iop_Yl2xF64: fpop = Xfp_YL2X; break;
2951 case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
2952 case Iop_AtanF64: fpop = Xfp_ATAN; break;
2953 case Iop_PRemF64: fpop = Xfp_PREM; break;
2954 case Iop_PRem1F64: fpop = Xfp_PREM1; break;
2955 default: break;
2956 }
2957 if (fpop != Xfp_INVALID) {
2958 HReg res = newVRegF(env);
2959 HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
2960 HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
2961 /* XXXROUNDINGFIXME */
2962 /* set roundingmode here */
2963 addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
2964 if (fpop != Xfp_ADD && fpop != Xfp_SUB
2965 && fpop != Xfp_MUL && fpop != Xfp_DIV)
2966 roundToF64(env, res);
2967 return res;
2968 }
2969 }
2970
2971 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
2972 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2973 HReg dst = newVRegF(env);
2974
2975 /* rf now holds the value to be rounded. The first thing to do
2976 is set the FPU's rounding mode accordingly. */
2977
2978 /* Set host rounding mode */
2979 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2980
2981 /* grndint %rf, %dst */
2982 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
2983
2984 /* Restore default FPU rounding. */
2985 set_FPU_rounding_default( env );
2986
2987 return dst;
2988 }
2989
2990 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
2991 HReg dst = newVRegF(env);
2992 HReg rHi,rLo;
2993 iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2);
2994 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
2995 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
2996
2997 /* Set host rounding mode */
2998 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2999
3000 addInstr(env, X86Instr_FpLdStI(
3001 True/*load*/, 8, dst,
3002 X86AMode_IR(0, hregX86_ESP())));
3003
3004 /* Restore default FPU rounding. */
3005 set_FPU_rounding_default( env );
3006
3007 add_to_esp(env, 8);
3008 return dst;
3009 }
3010
3011 if (e->tag == Iex_Binop) {
3012 X86FpOp fpop = Xfp_INVALID;
3013 switch (e->Iex.Binop.op) {
3014 case Iop_SinF64: fpop = Xfp_SIN; break;
3015 case Iop_CosF64: fpop = Xfp_COS; break;
3016 case Iop_TanF64: fpop = Xfp_TAN; break;
3017 case Iop_2xm1F64: fpop = Xfp_2XM1; break;
3018 case Iop_SqrtF64: fpop = Xfp_SQRT; break;
3019 default: break;
3020 }
3021 if (fpop != Xfp_INVALID) {
3022 HReg res = newVRegF(env);
3023 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3024 /* XXXROUNDINGFIXME */
3025 /* set roundingmode here */
3026 addInstr(env, X86Instr_FpUnary(fpop,src,res));
3027 if (fpop != Xfp_SQRT
3028 && fpop != Xfp_NEG && fpop != Xfp_ABS)
3029 roundToF64(env, res);
3030 return res;
3031 }
3032 }
3033
3034 if (e->tag == Iex_Unop) {
3035 X86FpOp fpop = Xfp_INVALID;
3036 switch (e->Iex.Unop.op) {
3037 case Iop_NegF64: fpop = Xfp_NEG; break;
3038 case Iop_AbsF64: fpop = Xfp_ABS; break;
3039 default: break;
3040 }
3041 if (fpop != Xfp_INVALID) {
3042 HReg res = newVRegF(env);
3043 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3044 addInstr(env, X86Instr_FpUnary(fpop,src,res));
3045 if (fpop != Xfp_NEG && fpop != Xfp_ABS)
3046 roundToF64(env, res);
3047 return res;
3048 }
3049 }
3050
3051 if (e->tag == Iex_Unop) {
3052 switch (e->Iex.Unop.op) {
3053 case Iop_I32StoF64: {
3054 HReg dst = newVRegF(env);
3055 HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
3056 addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3057 set_FPU_rounding_default(env);
3058 addInstr(env, X86Instr_FpLdStI(
3059 True/*load*/, 4, dst,
3060 X86AMode_IR(0, hregX86_ESP())));
3061 add_to_esp(env, 4);
3062 return dst;
3063 }
3064 case Iop_ReinterpI64asF64: {
3065 /* Given an I64, produce an IEEE754 double with the same
3066 bit pattern. */
3067 HReg dst = newVRegF(env);
3068 HReg rHi, rLo;
3069 iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg);
3070 /* paranoia */
3071 set_FPU_rounding_default(env);
3072 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3073 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3074 addInstr(env, X86Instr_FpLdSt(
3075 True/*load*/, 8, dst,
3076 X86AMode_IR(0, hregX86_ESP())));
3077 add_to_esp(env, 8);
3078 return dst;
3079 }
3080 case Iop_F32toF64: {
3081 /* this is a no-op */
3082 HReg res = iselFltExpr(env, e->Iex.Unop.arg);
3083 return res;
3084 }
3085 default:
3086 break;
3087 }
3088 }
3089
3090 /* --------- MULTIPLEX --------- */
3091 if (e->tag == Iex_Mux0X) {
3092 if (ty == Ity_F64
3093 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
3094 X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
3095 HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
3096 HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
3097 HReg dst = newVRegF(env);
3098 addInstr(env, X86Instr_FpUnary(Xfp_MOV,rX,dst));
3099 addInstr(env, X86Instr_Test32(0xFF, r8));
3100 addInstr(env, X86Instr_FpCMov(Xcc_Z,r0,dst));
3101 return dst;
3102 }
3103 }
3104
3105 ppIRExpr(e);
3106 vpanic("iselDblExpr_wrk");
3107 }
3108
3109
3110 /*---------------------------------------------------------*/
3111 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3112 /*---------------------------------------------------------*/
3113
iselVecExpr(ISelEnv * env,IRExpr * e)3114 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
3115 {
3116 HReg r = iselVecExpr_wrk( env, e );
3117 # if 0
3118 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3119 # endif
3120 vassert(hregClass(r) == HRcVec128);
3121 vassert(hregIsVirtual(r));
3122 return r;
3123 }
3124
3125
3126 /* DO NOT CALL THIS DIRECTLY */
iselVecExpr_wrk(ISelEnv * env,IRExpr * e)3127 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
3128 {
3129
3130 # define REQUIRE_SSE1 \
3131 do { if (env->hwcaps == 0/*baseline, no sse*/) \
3132 goto vec_fail; \
3133 } while (0)
3134
3135 # define REQUIRE_SSE2 \
3136 do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \
3137 goto vec_fail; \
3138 } while (0)
3139
3140 # define SSE2_OR_ABOVE \
3141 (env->hwcaps & VEX_HWCAPS_X86_SSE2)
3142
3143 HWord fn = 0; /* address of helper fn, if required */
3144 MatchInfo mi;
3145 Bool arg1isEReg = False;
3146 X86SseOp op = Xsse_INVALID;
3147 IRType ty = typeOfIRExpr(env->type_env,e);
3148 vassert(e);
3149 vassert(ty == Ity_V128);
3150
3151 REQUIRE_SSE1;
3152
3153 if (e->tag == Iex_RdTmp) {
3154 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3155 }
3156
3157 if (e->tag == Iex_Get) {
3158 HReg dst = newVRegV(env);
3159 addInstr(env, X86Instr_SseLdSt(
3160 True/*load*/,
3161 dst,
3162 X86AMode_IR(e->Iex.Get.offset, hregX86_EBP())
3163 )
3164 );
3165 return dst;
3166 }
3167
3168 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3169 HReg dst = newVRegV(env);
3170 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3171 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
3172 return dst;
3173 }
3174
3175 if (e->tag == Iex_Const) {
3176 HReg dst = newVRegV(env);
3177 vassert(e->Iex.Const.con->tag == Ico_V128);
3178 addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
3179 return dst;
3180 }
3181
3182 if (e->tag == Iex_Unop) {
3183
3184 if (SSE2_OR_ABOVE) {
3185 /* 64UtoV128(LDle:I64(addr)) */
3186 DECLARE_PATTERN(p_zwiden_load64);
3187 DEFINE_PATTERN(p_zwiden_load64,
3188 unop(Iop_64UtoV128,
3189 IRExpr_Load(Iend_LE,Ity_I64,bind(0))));
3190 if (matchIRExpr(&mi, p_zwiden_load64, e)) {
3191 X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]);
3192 HReg dst = newVRegV(env);
3193 addInstr(env, X86Instr_SseLdzLO(8, dst, am));
3194 return dst;
3195 }
3196 }
3197
3198 switch (e->Iex.Unop.op) {
3199
3200 case Iop_NotV128: {
3201 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3202 return do_sse_Not128(env, arg);
3203 }
3204
3205 case Iop_CmpNEZ64x2: {
3206 /* We can use SSE2 instructions for this. */
3207 /* Ideally, we want to do a 64Ix2 comparison against zero of
3208 the operand. Problem is no such insn exists. Solution
3209 therefore is to do a 32Ix4 comparison instead, and bitwise-
3210 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3211 let the not'd result of this initial comparison be a:b:c:d.
3212 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3213 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3214 giving the required result.
3215
3216 The required selection sequence is 2,3,0,1, which
3217 according to Intel's documentation means the pshufd
3218 literal value is 0xB1, that is,
3219 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3220 */
3221 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3222 HReg tmp = newVRegV(env);
3223 HReg dst = newVRegV(env);
3224 REQUIRE_SSE2;
3225 addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
3226 addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
3227 tmp = do_sse_Not128(env, tmp);
3228 addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
3229 addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
3230 return dst;
3231 }
3232
3233 case Iop_CmpNEZ32x4: {
3234 /* Sigh, we have to generate lousy code since this has to
3235 work on SSE1 hosts */
3236 /* basically, the idea is: for each lane:
3237 movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1)
3238 sbbl %r, %r (now %r = 1Sto32(CF))
3239 movl %r, lane
3240 */
3241 Int i;
3242 X86AMode* am;
3243 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3244 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3245 HReg dst = newVRegV(env);
3246 HReg r32 = newVRegI(env);
3247 sub_from_esp(env, 16);
3248 addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
3249 for (i = 0; i < 4; i++) {
3250 am = X86AMode_IR(i*4, hregX86_ESP());
3251 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
3252 addInstr(env, X86Instr_Unary32(Xun_NEG, r32));
3253 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
3254 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
3255 }
3256 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3257 add_to_esp(env, 16);
3258 return dst;
3259 }
3260
3261 case Iop_CmpNEZ8x16:
3262 case Iop_CmpNEZ16x8: {
3263 /* We can use SSE2 instructions for this. */
3264 HReg arg;
3265 HReg vec0 = newVRegV(env);
3266 HReg vec1 = newVRegV(env);
3267 HReg dst = newVRegV(env);
3268 X86SseOp cmpOp
3269 = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
3270 : Xsse_CMPEQ8;
3271 REQUIRE_SSE2;
3272 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
3273 addInstr(env, mk_vMOVsd_RR(vec0, vec1));
3274 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
3275 /* defer arg computation to here so as to give CMPEQF as long
3276 as possible to complete */
3277 arg = iselVecExpr(env, e->Iex.Unop.arg);
3278 /* vec0 is all 0s; vec1 is all 1s */
3279 addInstr(env, mk_vMOVsd_RR(arg, dst));
3280 /* 16x8 or 8x16 comparison == */
3281 addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
3282 /* invert result */
3283 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
3284 return dst;
3285 }
3286
3287 case Iop_Recip32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary;
3288 case Iop_RSqrt32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
3289 case Iop_Sqrt32Fx4: op = Xsse_SQRTF; goto do_32Fx4_unary;
3290 do_32Fx4_unary:
3291 {
3292 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3293 HReg dst = newVRegV(env);
3294 addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
3295 return dst;
3296 }
3297
3298 case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary;
3299 case Iop_RSqrt64Fx2: op = Xsse_RSQRTF; goto do_64Fx2_unary;
3300 case Iop_Sqrt64Fx2: op = Xsse_SQRTF; goto do_64Fx2_unary;
3301 do_64Fx2_unary:
3302 {
3303 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3304 HReg dst = newVRegV(env);
3305 REQUIRE_SSE2;
3306 addInstr(env, X86Instr_Sse64Fx2(op, arg, dst));
3307 return dst;
3308 }
3309
3310 case Iop_Recip32F0x4: op = Xsse_RCPF; goto do_32F0x4_unary;
3311 case Iop_RSqrt32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
3312 case Iop_Sqrt32F0x4: op = Xsse_SQRTF; goto do_32F0x4_unary;
3313 do_32F0x4_unary:
3314 {
3315 /* A bit subtle. We have to copy the arg to the result
3316 register first, because actually doing the SSE scalar insn
3317 leaves the upper 3/4 of the destination register
3318 unchanged. Whereas the required semantics of these
3319 primops is that the upper 3/4 is simply copied in from the
3320 argument. */
3321 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3322 HReg dst = newVRegV(env);
3323 addInstr(env, mk_vMOVsd_RR(arg, dst));
3324 addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
3325 return dst;
3326 }
3327
3328 case Iop_Recip64F0x2: op = Xsse_RCPF; goto do_64F0x2_unary;
3329 case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary;
3330 case Iop_Sqrt64F0x2: op = Xsse_SQRTF; goto do_64F0x2_unary;
3331 do_64F0x2_unary:
3332 {
3333 /* A bit subtle. We have to copy the arg to the result
3334 register first, because actually doing the SSE scalar insn
3335 leaves the upper half of the destination register
3336 unchanged. Whereas the required semantics of these
3337 primops is that the upper half is simply copied in from the
3338 argument. */
3339 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3340 HReg dst = newVRegV(env);
3341 REQUIRE_SSE2;
3342 addInstr(env, mk_vMOVsd_RR(arg, dst));
3343 addInstr(env, X86Instr_Sse64FLo(op, arg, dst));
3344 return dst;
3345 }
3346
3347 case Iop_32UtoV128: {
3348 HReg dst = newVRegV(env);
3349 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3350 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3351 addInstr(env, X86Instr_Push(rmi));
3352 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0));
3353 add_to_esp(env, 4);
3354 return dst;
3355 }
3356
3357 case Iop_64UtoV128: {
3358 HReg rHi, rLo;
3359 HReg dst = newVRegV(env);
3360 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3361 iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
3362 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3363 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3364 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0));
3365 add_to_esp(env, 8);
3366 return dst;
3367 }
3368
3369 default:
3370 break;
3371 } /* switch (e->Iex.Unop.op) */
3372 } /* if (e->tag == Iex_Unop) */
3373
3374 if (e->tag == Iex_Binop) {
3375 switch (e->Iex.Binop.op) {
3376
3377 case Iop_SetV128lo32: {
3378 HReg dst = newVRegV(env);
3379 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3380 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3381 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3382 sub_from_esp(env, 16);
3383 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3384 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0));
3385 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3386 add_to_esp(env, 16);
3387 return dst;
3388 }
3389
3390 case Iop_SetV128lo64: {
3391 HReg dst = newVRegV(env);
3392 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3393 HReg srcIhi, srcIlo;
3394 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3395 X86AMode* esp4 = advance4(esp0);
3396 iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2);
3397 sub_from_esp(env, 16);
3398 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3399 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0));
3400 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4));
3401 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3402 add_to_esp(env, 16);
3403 return dst;
3404 }
3405
3406 case Iop_64HLtoV128: {
3407 HReg r3, r2, r1, r0;
3408 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3409 X86AMode* esp4 = advance4(esp0);
3410 X86AMode* esp8 = advance4(esp4);
3411 X86AMode* esp12 = advance4(esp8);
3412 HReg dst = newVRegV(env);
3413 /* do this via the stack (easy, convenient, etc) */
3414 sub_from_esp(env, 16);
3415 /* Do the less significant 64 bits */
3416 iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
3417 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0));
3418 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4));
3419 /* Do the more significant 64 bits */
3420 iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
3421 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8));
3422 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12));
3423 /* Fetch result back from stack. */
3424 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3425 add_to_esp(env, 16);
3426 return dst;
3427 }
3428
3429 case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
3430 case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
3431 case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
3432 case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4;
3433 case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4;
3434 case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4;
3435 case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4;
3436 case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4;
3437 case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4;
3438 case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4;
3439 do_32Fx4:
3440 {
3441 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3442 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3443 HReg dst = newVRegV(env);
3444 addInstr(env, mk_vMOVsd_RR(argL, dst));
3445 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3446 return dst;
3447 }
3448
3449 case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
3450 case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
3451 case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
3452 case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2;
3453 case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2;
3454 case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2;
3455 case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2;
3456 case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2;
3457 case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2;
3458 case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2;
3459 do_64Fx2:
3460 {
3461 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3462 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3463 HReg dst = newVRegV(env);
3464 REQUIRE_SSE2;
3465 addInstr(env, mk_vMOVsd_RR(argL, dst));
3466 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3467 return dst;
3468 }
3469
3470 case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
3471 case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4;
3472 case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
3473 case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4;
3474 case Iop_Add32F0x4: op = Xsse_ADDF; goto do_32F0x4;
3475 case Iop_Div32F0x4: op = Xsse_DIVF; goto do_32F0x4;
3476 case Iop_Max32F0x4: op = Xsse_MAXF; goto do_32F0x4;
3477 case Iop_Min32F0x4: op = Xsse_MINF; goto do_32F0x4;
3478 case Iop_Mul32F0x4: op = Xsse_MULF; goto do_32F0x4;
3479 case Iop_Sub32F0x4: op = Xsse_SUBF; goto do_32F0x4;
3480 do_32F0x4: {
3481 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3482 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3483 HReg dst = newVRegV(env);
3484 addInstr(env, mk_vMOVsd_RR(argL, dst));
3485 addInstr(env, X86Instr_Sse32FLo(op, argR, dst));
3486 return dst;
3487 }
3488
3489 case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2;
3490 case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2;
3491 case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
3492 case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2;
3493 case Iop_Add64F0x2: op = Xsse_ADDF; goto do_64F0x2;
3494 case Iop_Div64F0x2: op = Xsse_DIVF; goto do_64F0x2;
3495 case Iop_Max64F0x2: op = Xsse_MAXF; goto do_64F0x2;
3496 case Iop_Min64F0x2: op = Xsse_MINF; goto do_64F0x2;
3497 case Iop_Mul64F0x2: op = Xsse_MULF; goto do_64F0x2;
3498 case Iop_Sub64F0x2: op = Xsse_SUBF; goto do_64F0x2;
3499 do_64F0x2: {
3500 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3501 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3502 HReg dst = newVRegV(env);
3503 REQUIRE_SSE2;
3504 addInstr(env, mk_vMOVsd_RR(argL, dst));
3505 addInstr(env, X86Instr_Sse64FLo(op, argR, dst));
3506 return dst;
3507 }
3508
3509 case Iop_QNarrowBin32Sto16Sx8:
3510 op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3511 case Iop_QNarrowBin16Sto8Sx16:
3512 op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3513 case Iop_QNarrowBin16Sto8Ux16:
3514 op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3515
3516 case Iop_InterleaveHI8x16:
3517 op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3518 case Iop_InterleaveHI16x8:
3519 op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3520 case Iop_InterleaveHI32x4:
3521 op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3522 case Iop_InterleaveHI64x2:
3523 op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3524
3525 case Iop_InterleaveLO8x16:
3526 op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3527 case Iop_InterleaveLO16x8:
3528 op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3529 case Iop_InterleaveLO32x4:
3530 op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3531 case Iop_InterleaveLO64x2:
3532 op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3533
3534 case Iop_AndV128: op = Xsse_AND; goto do_SseReRg;
3535 case Iop_OrV128: op = Xsse_OR; goto do_SseReRg;
3536 case Iop_XorV128: op = Xsse_XOR; goto do_SseReRg;
3537 case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg;
3538 case Iop_Add16x8: op = Xsse_ADD16; goto do_SseReRg;
3539 case Iop_Add32x4: op = Xsse_ADD32; goto do_SseReRg;
3540 case Iop_Add64x2: op = Xsse_ADD64; goto do_SseReRg;
3541 case Iop_QAdd8Sx16: op = Xsse_QADD8S; goto do_SseReRg;
3542 case Iop_QAdd16Sx8: op = Xsse_QADD16S; goto do_SseReRg;
3543 case Iop_QAdd8Ux16: op = Xsse_QADD8U; goto do_SseReRg;
3544 case Iop_QAdd16Ux8: op = Xsse_QADD16U; goto do_SseReRg;
3545 case Iop_Avg8Ux16: op = Xsse_AVG8U; goto do_SseReRg;
3546 case Iop_Avg16Ux8: op = Xsse_AVG16U; goto do_SseReRg;
3547 case Iop_CmpEQ8x16: op = Xsse_CMPEQ8; goto do_SseReRg;
3548 case Iop_CmpEQ16x8: op = Xsse_CMPEQ16; goto do_SseReRg;
3549 case Iop_CmpEQ32x4: op = Xsse_CMPEQ32; goto do_SseReRg;
3550 case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S; goto do_SseReRg;
3551 case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
3552 case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
3553 case Iop_Max16Sx8: op = Xsse_MAX16S; goto do_SseReRg;
3554 case Iop_Max8Ux16: op = Xsse_MAX8U; goto do_SseReRg;
3555 case Iop_Min16Sx8: op = Xsse_MIN16S; goto do_SseReRg;
3556 case Iop_Min8Ux16: op = Xsse_MIN8U; goto do_SseReRg;
3557 case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
3558 case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
3559 case Iop_Mul16x8: op = Xsse_MUL16; goto do_SseReRg;
3560 case Iop_Sub8x16: op = Xsse_SUB8; goto do_SseReRg;
3561 case Iop_Sub16x8: op = Xsse_SUB16; goto do_SseReRg;
3562 case Iop_Sub32x4: op = Xsse_SUB32; goto do_SseReRg;
3563 case Iop_Sub64x2: op = Xsse_SUB64; goto do_SseReRg;
3564 case Iop_QSub8Sx16: op = Xsse_QSUB8S; goto do_SseReRg;
3565 case Iop_QSub16Sx8: op = Xsse_QSUB16S; goto do_SseReRg;
3566 case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg;
3567 case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg;
3568 do_SseReRg: {
3569 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3570 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3571 HReg dst = newVRegV(env);
3572 if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR)
3573 REQUIRE_SSE2;
3574 if (arg1isEReg) {
3575 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3576 addInstr(env, X86Instr_SseReRg(op, arg1, dst));
3577 } else {
3578 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3579 addInstr(env, X86Instr_SseReRg(op, arg2, dst));
3580 }
3581 return dst;
3582 }
3583
3584 case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
3585 case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
3586 case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
3587 case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
3588 case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
3589 case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
3590 case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
3591 case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
3592 do_SseShift: {
3593 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3594 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3595 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3596 HReg ereg = newVRegV(env);
3597 HReg dst = newVRegV(env);
3598 REQUIRE_SSE2;
3599 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3600 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3601 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3602 addInstr(env, X86Instr_Push(rmi));
3603 addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
3604 addInstr(env, mk_vMOVsd_RR(greg, dst));
3605 addInstr(env, X86Instr_SseReRg(op, ereg, dst));
3606 add_to_esp(env, 16);
3607 return dst;
3608 }
3609
3610 case Iop_NarrowBin32to16x8:
3611 fn = (HWord)h_generic_calc_NarrowBin32to16x8;
3612 goto do_SseAssistedBinary;
3613 case Iop_NarrowBin16to8x16:
3614 fn = (HWord)h_generic_calc_NarrowBin16to8x16;
3615 goto do_SseAssistedBinary;
3616 do_SseAssistedBinary: {
3617 /* As with the amd64 case (where this is copied from) we
3618 generate pretty bad code. */
3619 vassert(fn != 0);
3620 HReg dst = newVRegV(env);
3621 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3622 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3623 HReg argp = newVRegI(env);
3624 /* subl $112, %esp -- make a space */
3625 sub_from_esp(env, 112);
3626 /* leal 48(%esp), %r_argp -- point into it */
3627 addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
3628 argp));
3629 /* andl $-16, %r_argp -- 16-align the pointer */
3630 addInstr(env, X86Instr_Alu32R(Xalu_AND,
3631 X86RMI_Imm( ~(UInt)15 ),
3632 argp));
3633 /* Prepare 3 arg regs:
3634 leal 0(%r_argp), %eax
3635 leal 16(%r_argp), %edx
3636 leal 32(%r_argp), %ecx
3637 */
3638 addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp),
3639 hregX86_EAX()));
3640 addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp),
3641 hregX86_EDX()));
3642 addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp),
3643 hregX86_ECX()));
3644 /* Store the two args, at (%edx) and (%ecx):
3645 movupd %argL, 0(%edx)
3646 movupd %argR, 0(%ecx)
3647 */
3648 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL,
3649 X86AMode_IR(0, hregX86_EDX())));
3650 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR,
3651 X86AMode_IR(0, hregX86_ECX())));
3652 /* call the helper */
3653 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn, 3 ));
3654 /* fetch the result from memory, using %r_argp, which the
3655 register allocator will keep alive across the call. */
3656 addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst,
3657 X86AMode_IR(0, argp)));
3658 /* and finally, clear the space */
3659 add_to_esp(env, 112);
3660 return dst;
3661 }
3662
3663 default:
3664 break;
3665 } /* switch (e->Iex.Binop.op) */
3666 } /* if (e->tag == Iex_Binop) */
3667
3668 if (e->tag == Iex_Mux0X) {
3669 X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
3670 HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX);
3671 HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0);
3672 HReg dst = newVRegV(env);
3673 addInstr(env, mk_vMOVsd_RR(rX,dst));
3674 addInstr(env, X86Instr_Test32(0xFF, r8));
3675 addInstr(env, X86Instr_SseCMov(Xcc_Z,r0,dst));
3676 return dst;
3677 }
3678
3679 vec_fail:
3680 vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
3681 LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps));
3682 ppIRExpr(e);
3683 vpanic("iselVecExpr_wrk");
3684
3685 # undef REQUIRE_SSE1
3686 # undef REQUIRE_SSE2
3687 # undef SSE2_OR_ABOVE
3688 }
3689
3690
3691 /*---------------------------------------------------------*/
3692 /*--- ISEL: Statements ---*/
3693 /*---------------------------------------------------------*/
3694
iselStmt(ISelEnv * env,IRStmt * stmt)3695 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3696 {
3697 if (vex_traceflags & VEX_TRACE_VCODE) {
3698 vex_printf("\n-- ");
3699 ppIRStmt(stmt);
3700 vex_printf("\n");
3701 }
3702
3703 switch (stmt->tag) {
3704
3705 /* --------- STORE --------- */
3706 case Ist_Store: {
3707 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3708 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3709 IREndness end = stmt->Ist.Store.end;
3710
3711 if (tya != Ity_I32 || end != Iend_LE)
3712 goto stmt_fail;
3713
3714 if (tyd == Ity_I32) {
3715 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3716 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
3717 addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am));
3718 return;
3719 }
3720 if (tyd == Ity_I8 || tyd == Ity_I16) {
3721 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3722 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
3723 addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2),
3724 r,am ));
3725 return;
3726 }
3727 if (tyd == Ity_F64) {
3728 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3729 HReg r = iselDblExpr(env, stmt->Ist.Store.data);
3730 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am));
3731 return;
3732 }
3733 if (tyd == Ity_F32) {
3734 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3735 HReg r = iselFltExpr(env, stmt->Ist.Store.data);
3736 addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am));
3737 return;
3738 }
3739 if (tyd == Ity_I64) {
3740 HReg vHi, vLo, rA;
3741 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
3742 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
3743 addInstr(env, X86Instr_Alu32M(
3744 Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
3745 addInstr(env, X86Instr_Alu32M(
3746 Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
3747 return;
3748 }
3749 if (tyd == Ity_V128) {
3750 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3751 HReg r = iselVecExpr(env, stmt->Ist.Store.data);
3752 addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am));
3753 return;
3754 }
3755 break;
3756 }
3757
3758 /* --------- PUT --------- */
3759 case Ist_Put: {
3760 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3761 if (ty == Ity_I32) {
3762 /* We're going to write to memory, so compute the RHS into an
3763 X86RI. */
3764 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3765 addInstr(env,
3766 X86Instr_Alu32M(
3767 Xalu_MOV,
3768 ri,
3769 X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP())
3770 ));
3771 return;
3772 }
3773 if (ty == Ity_I8 || ty == Ity_I16) {
3774 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
3775 addInstr(env, X86Instr_Store(
3776 toUChar(ty==Ity_I8 ? 1 : 2),
3777 r,
3778 X86AMode_IR(stmt->Ist.Put.offset,
3779 hregX86_EBP())));
3780 return;
3781 }
3782 if (ty == Ity_I64) {
3783 HReg vHi, vLo;
3784 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3785 X86AMode* am4 = advance4(am);
3786 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data);
3787 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am ));
3788 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 ));
3789 return;
3790 }
3791 if (ty == Ity_V128) {
3792 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
3793 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3794 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am));
3795 return;
3796 }
3797 if (ty == Ity_F32) {
3798 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
3799 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3800 set_FPU_rounding_default(env); /* paranoia */
3801 addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am ));
3802 return;
3803 }
3804 if (ty == Ity_F64) {
3805 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
3806 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3807 set_FPU_rounding_default(env); /* paranoia */
3808 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am ));
3809 return;
3810 }
3811 break;
3812 }
3813
3814 /* --------- Indexed PUT --------- */
3815 case Ist_PutI: {
3816 X86AMode* am
3817 = genGuestArrayOffset(
3818 env, stmt->Ist.PutI.descr,
3819 stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
3820
3821 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
3822 if (ty == Ity_F64) {
3823 HReg val = iselDblExpr(env, stmt->Ist.PutI.data);
3824 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am ));
3825 return;
3826 }
3827 if (ty == Ity_I8) {
3828 HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
3829 addInstr(env, X86Instr_Store( 1, r, am ));
3830 return;
3831 }
3832 if (ty == Ity_I32) {
3833 HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
3834 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am ));
3835 return;
3836 }
3837 if (ty == Ity_I64) {
3838 HReg rHi, rLo;
3839 X86AMode* am4 = advance4(am);
3840 iselInt64Expr(&rHi, &rLo, env, stmt->Ist.PutI.data);
3841 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am ));
3842 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 ));
3843 return;
3844 }
3845 break;
3846 }
3847
3848 /* --------- TMP --------- */
3849 case Ist_WrTmp: {
3850 IRTemp tmp = stmt->Ist.WrTmp.tmp;
3851 IRType ty = typeOfIRTemp(env->type_env, tmp);
3852
3853 /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
3854 compute it into an AMode and then use LEA. This usually
3855 produces fewer instructions, often because (for memcheck
3856 created IR) we get t = address-expression, (t is later used
3857 twice) and so doing this naturally turns address-expression
3858 back into an X86 amode. */
3859 if (ty == Ity_I32
3860 && stmt->Ist.WrTmp.data->tag == Iex_Binop
3861 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) {
3862 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
3863 HReg dst = lookupIRTemp(env, tmp);
3864 if (am->tag == Xam_IR && am->Xam.IR.imm == 0) {
3865 /* Hmm, iselIntExpr_AMode wimped out and just computed the
3866 value into a register. Just emit a normal reg-reg move
3867 so reg-alloc can coalesce it away in the usual way. */
3868 HReg src = am->Xam.IR.reg;
3869 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst));
3870 } else {
3871 addInstr(env, X86Instr_Lea32(am,dst));
3872 }
3873 return;
3874 }
3875
3876 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
3877 X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
3878 HReg dst = lookupIRTemp(env, tmp);
3879 addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst));
3880 return;
3881 }
3882 if (ty == Ity_I64) {
3883 HReg rHi, rLo, dstHi, dstLo;
3884 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
3885 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
3886 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
3887 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
3888 return;
3889 }
3890 if (ty == Ity_I1) {
3891 X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
3892 HReg dst = lookupIRTemp(env, tmp);
3893 addInstr(env, X86Instr_Set32(cond, dst));
3894 return;
3895 }
3896 if (ty == Ity_F64) {
3897 HReg dst = lookupIRTemp(env, tmp);
3898 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
3899 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
3900 return;
3901 }
3902 if (ty == Ity_F32) {
3903 HReg dst = lookupIRTemp(env, tmp);
3904 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
3905 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
3906 return;
3907 }
3908 if (ty == Ity_V128) {
3909 HReg dst = lookupIRTemp(env, tmp);
3910 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
3911 addInstr(env, mk_vMOVsd_RR(src,dst));
3912 return;
3913 }
3914 break;
3915 }
3916
3917 /* --------- Call to DIRTY helper --------- */
3918 case Ist_Dirty: {
3919 IRType retty;
3920 IRDirty* d = stmt->Ist.Dirty.details;
3921 Bool passBBP = False;
3922
3923 if (d->nFxState == 0)
3924 vassert(!d->needsBBP);
3925
3926 passBBP = toBool(d->nFxState > 0 && d->needsBBP);
3927
3928 /* Marshal args, do the call, clear stack. */
3929 doHelperCall( env, passBBP, d->guard, d->cee, d->args );
3930
3931 /* Now figure out what to do with the returned value, if any. */
3932 if (d->tmp == IRTemp_INVALID)
3933 /* No return value. Nothing to do. */
3934 return;
3935
3936 retty = typeOfIRTemp(env->type_env, d->tmp);
3937 if (retty == Ity_I64) {
3938 HReg dstHi, dstLo;
3939 /* The returned value is in %edx:%eax. Park it in the
3940 register-pair associated with tmp. */
3941 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
3942 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
3943 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
3944 return;
3945 }
3946 if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
3947 /* The returned value is in %eax. Park it in the register
3948 associated with tmp. */
3949 HReg dst = lookupIRTemp(env, d->tmp);
3950 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
3951 return;
3952 }
3953 break;
3954 }
3955
3956 /* --------- MEM FENCE --------- */
3957 case Ist_MBE:
3958 switch (stmt->Ist.MBE.event) {
3959 case Imbe_Fence:
3960 addInstr(env, X86Instr_MFence(env->hwcaps));
3961 return;
3962 default:
3963 break;
3964 }
3965 break;
3966
3967 /* --------- ACAS --------- */
3968 case Ist_CAS:
3969 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
3970 /* "normal" singleton CAS */
3971 UChar sz;
3972 IRCAS* cas = stmt->Ist.CAS.details;
3973 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
3974 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
3975 X86AMode* am = iselIntExpr_AMode(env, cas->addr);
3976 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
3977 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
3978 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
3979 vassert(cas->expdHi == NULL);
3980 vassert(cas->dataHi == NULL);
3981 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
3982 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
3983 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
3984 switch (ty) {
3985 case Ity_I32: sz = 4; break;
3986 case Ity_I16: sz = 2; break;
3987 case Ity_I8: sz = 1; break;
3988 default: goto unhandled_cas;
3989 }
3990 addInstr(env, X86Instr_ACAS(am, sz));
3991 addInstr(env,
3992 X86Instr_CMov32(Xcc_NZ,
3993 X86RM_Reg(hregX86_EAX()), rOldLo));
3994 return;
3995 } else {
3996 /* double CAS */
3997 IRCAS* cas = stmt->Ist.CAS.details;
3998 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
3999 /* only 32-bit allowed in this case */
4000 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4001 /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
4002 X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4003 HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4004 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4005 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4006 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4007 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4008 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4009 if (ty != Ity_I32)
4010 goto unhandled_cas;
4011 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4012 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4013 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
4014 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4015 addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
4016 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4017 addInstr(env, X86Instr_DACAS(am));
4018 addInstr(env,
4019 X86Instr_CMov32(Xcc_NZ,
4020 X86RM_Reg(hregX86_EDX()), rOldHi));
4021 addInstr(env,
4022 X86Instr_CMov32(Xcc_NZ,
4023 X86RM_Reg(hregX86_EAX()), rOldLo));
4024 return;
4025 }
4026 unhandled_cas:
4027 break;
4028
4029 /* --------- INSTR MARK --------- */
4030 /* Doesn't generate any executable code ... */
4031 case Ist_IMark:
4032 return;
4033
4034 /* --------- NO-OP --------- */
4035 /* Fairly self-explanatory, wouldn't you say? */
4036 case Ist_NoOp:
4037 return;
4038
4039 /* --------- EXIT --------- */
4040 case Ist_Exit: {
4041 X86RI* dst;
4042 X86CondCode cc;
4043 if (stmt->Ist.Exit.dst->tag != Ico_U32)
4044 vpanic("isel_x86: Ist_Exit: dst is not a 32-bit value");
4045 dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
4046 cc = iselCondCode(env,stmt->Ist.Exit.guard);
4047 addInstr(env, X86Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
4048 return;
4049 }
4050
4051 default: break;
4052 }
4053 stmt_fail:
4054 ppIRStmt(stmt);
4055 vpanic("iselStmt");
4056 }
4057
4058
4059 /*---------------------------------------------------------*/
4060 /*--- ISEL: Basic block terminators (Nexts) ---*/
4061 /*---------------------------------------------------------*/
4062
iselNext(ISelEnv * env,IRExpr * next,IRJumpKind jk)4063 static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
4064 {
4065 X86RI* ri;
4066 if (vex_traceflags & VEX_TRACE_VCODE) {
4067 vex_printf("\n-- goto {");
4068 ppIRJumpKind(jk);
4069 vex_printf("} ");
4070 ppIRExpr(next);
4071 vex_printf("\n");
4072 }
4073 ri = iselIntExpr_RI(env, next);
4074 addInstr(env, X86Instr_Goto(jk, Xcc_ALWAYS,ri));
4075 }
4076
4077
4078 /*---------------------------------------------------------*/
4079 /*--- Insn selector top-level ---*/
4080 /*---------------------------------------------------------*/
4081
4082 /* Translate an entire SB to x86 code. */
4083
iselSB_X86(IRSB * bb,VexArch arch_host,VexArchInfo * archinfo_host,VexAbiInfo * vbi)4084 HInstrArray* iselSB_X86 ( IRSB* bb, VexArch arch_host,
4085 VexArchInfo* archinfo_host,
4086 VexAbiInfo* vbi/*UNUSED*/ )
4087 {
4088 Int i, j;
4089 HReg hreg, hregHI;
4090 ISelEnv* env;
4091 UInt hwcaps_host = archinfo_host->hwcaps;
4092
4093 /* sanity ... */
4094 vassert(arch_host == VexArchX86);
4095 vassert(0 == (hwcaps_host
4096 & ~(VEX_HWCAPS_X86_SSE1
4097 | VEX_HWCAPS_X86_SSE2
4098 | VEX_HWCAPS_X86_SSE3
4099 | VEX_HWCAPS_X86_LZCNT)));
4100
4101 /* Make up an initial environment to use. */
4102 env = LibVEX_Alloc(sizeof(ISelEnv));
4103 env->vreg_ctr = 0;
4104
4105 /* Set up output code array. */
4106 env->code = newHInstrArray();
4107
4108 /* Copy BB's type env. */
4109 env->type_env = bb->tyenv;
4110
4111 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4112 change as we go along. */
4113 env->n_vregmap = bb->tyenv->types_used;
4114 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
4115 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
4116
4117 /* and finally ... */
4118 env->hwcaps = hwcaps_host;
4119
4120 /* For each IR temporary, allocate a suitably-kinded virtual
4121 register. */
4122 j = 0;
4123 for (i = 0; i < env->n_vregmap; i++) {
4124 hregHI = hreg = INVALID_HREG;
4125 switch (bb->tyenv->types[i]) {
4126 case Ity_I1:
4127 case Ity_I8:
4128 case Ity_I16:
4129 case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break;
4130 case Ity_I64: hreg = mkHReg(j++, HRcInt32, True);
4131 hregHI = mkHReg(j++, HRcInt32, True); break;
4132 case Ity_F32:
4133 case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break;
4134 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
4135 default: ppIRType(bb->tyenv->types[i]);
4136 vpanic("iselBB: IRTemp type");
4137 }
4138 env->vregmap[i] = hreg;
4139 env->vregmapHI[i] = hregHI;
4140 }
4141 env->vreg_ctr = j;
4142
4143 /* Ok, finally we can iterate over the statements. */
4144 for (i = 0; i < bb->stmts_used; i++)
4145 iselStmt(env,bb->stmts[i]);
4146
4147 iselNext(env,bb->next,bb->jumpkind);
4148
4149 /* record the number of vregs we used. */
4150 env->code->n_vregs = env->vreg_ctr;
4151 return env->code;
4152 }
4153
4154
4155 /*---------------------------------------------------------------*/
4156 /*--- end host_x86_isel.c ---*/
4157 /*---------------------------------------------------------------*/
4158