1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_x86_isel.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2012 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39
40 #include "ir_match.h"
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h"
45 #include "host_generic_simd128.h"
46 #include "host_x86_defs.h"
47
48 /* TODO 21 Apr 2005:
49
50 -- (Really an assembler issue) don't emit CMov32 as a cmov
51 insn, since that's expensive on P4 and conditional branch
52 is cheaper if (as we expect) the condition is highly predictable
53
54 -- preserve xmm registers across function calls (by declaring them
55 as trashed by call insns)
56
57 -- preserve x87 ST stack discipline across function calls. Sigh.
58
59 -- Check doHelperCall: if a call is conditional, we cannot safely
60 compute any regparm args directly to registers. Hence, the
61 fast-regparm marshalling should be restricted to unconditional
62 calls only.
63 */
64
65 /*---------------------------------------------------------*/
66 /*--- x87 control word stuff ---*/
67 /*---------------------------------------------------------*/
68
69 /* Vex-generated code expects to run with the FPU set as follows: all
70 exceptions masked, round-to-nearest, precision = 53 bits. This
71 corresponds to a FPU control word value of 0x027F.
72
73 Similarly the SSE control word (%mxcsr) should be 0x1F80.
74
75 %fpucw and %mxcsr should have these values on entry to
76 Vex-generated code, and should those values should be
77 unchanged at exit.
78 */
79
80 #define DEFAULT_FPUCW 0x027F
81
82 /* debugging only, do not use */
83 /* define DEFAULT_FPUCW 0x037F */
84
85
86 /*---------------------------------------------------------*/
87 /*--- misc helpers ---*/
88 /*---------------------------------------------------------*/
89
90 /* These are duplicated in guest-x86/toIR.c */
unop(IROp op,IRExpr * a)91 static IRExpr* unop ( IROp op, IRExpr* a )
92 {
93 return IRExpr_Unop(op, a);
94 }
95
binop(IROp op,IRExpr * a1,IRExpr * a2)96 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
97 {
98 return IRExpr_Binop(op, a1, a2);
99 }
100
bind(Int binder)101 static IRExpr* bind ( Int binder )
102 {
103 return IRExpr_Binder(binder);
104 }
105
isZeroU8(IRExpr * e)106 static Bool isZeroU8 ( IRExpr* e )
107 {
108 return e->tag == Iex_Const
109 && e->Iex.Const.con->tag == Ico_U8
110 && e->Iex.Const.con->Ico.U8 == 0;
111 }
112
isZeroU32(IRExpr * e)113 static Bool isZeroU32 ( IRExpr* e )
114 {
115 return e->tag == Iex_Const
116 && e->Iex.Const.con->tag == Ico_U32
117 && e->Iex.Const.con->Ico.U32 == 0;
118 }
119
isZeroU64(IRExpr * e)120 static Bool isZeroU64 ( IRExpr* e )
121 {
122 return e->tag == Iex_Const
123 && e->Iex.Const.con->tag == Ico_U64
124 && e->Iex.Const.con->Ico.U64 == 0ULL;
125 }
126
127
128 /*---------------------------------------------------------*/
129 /*--- ISelEnv ---*/
130 /*---------------------------------------------------------*/
131
132 /* This carries around:
133
134 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
135 might encounter. This is computed before insn selection starts,
136 and does not change.
137
138 - A mapping from IRTemp to HReg. This tells the insn selector
139 which virtual register(s) are associated with each IRTemp
140 temporary. This is computed before insn selection starts, and
141 does not change. We expect this mapping to map precisely the
142 same set of IRTemps as the type mapping does.
143
144 - vregmap holds the primary register for the IRTemp.
145 - vregmapHI is only used for 64-bit integer-typed
146 IRTemps. It holds the identity of a second
147 32-bit virtual HReg, which holds the high half
148 of the value.
149
150 - The code array, that is, the insns selected so far.
151
152 - A counter, for generating new virtual registers.
153
154 - The host subarchitecture we are selecting insns for.
155 This is set at the start and does not change.
156
157 - A Bool for indicating whether we may generate chain-me
158 instructions for control flow transfers, or whether we must use
159 XAssisted.
160
161 - The maximum guest address of any guest insn in this block.
162 Actually, the address of the highest-addressed byte from any insn
163 in this block. Is set at the start and does not change. This is
164 used for detecting jumps which are definitely forward-edges from
165 this block, and therefore can be made (chained) to the fast entry
166 point of the destination, thereby avoiding the destination's
167 event check.
168
169 Note, this is all (well, mostly) host-independent.
170 */
171
172 typedef
173 struct {
174 /* Constant -- are set at the start and do not change. */
175 IRTypeEnv* type_env;
176
177 HReg* vregmap;
178 HReg* vregmapHI;
179 Int n_vregmap;
180
181 UInt hwcaps;
182
183 Bool chainingAllowed;
184 Addr64 max_ga;
185
186 /* These are modified as we go along. */
187 HInstrArray* code;
188 Int vreg_ctr;
189 }
190 ISelEnv;
191
192
lookupIRTemp(ISelEnv * env,IRTemp tmp)193 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
194 {
195 vassert(tmp >= 0);
196 vassert(tmp < env->n_vregmap);
197 return env->vregmap[tmp];
198 }
199
lookupIRTemp64(HReg * vrHI,HReg * vrLO,ISelEnv * env,IRTemp tmp)200 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
201 {
202 vassert(tmp >= 0);
203 vassert(tmp < env->n_vregmap);
204 vassert(env->vregmapHI[tmp] != INVALID_HREG);
205 *vrLO = env->vregmap[tmp];
206 *vrHI = env->vregmapHI[tmp];
207 }
208
addInstr(ISelEnv * env,X86Instr * instr)209 static void addInstr ( ISelEnv* env, X86Instr* instr )
210 {
211 addHInstr(env->code, instr);
212 if (vex_traceflags & VEX_TRACE_VCODE) {
213 ppX86Instr(instr, False);
214 vex_printf("\n");
215 }
216 }
217
newVRegI(ISelEnv * env)218 static HReg newVRegI ( ISelEnv* env )
219 {
220 HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
221 env->vreg_ctr++;
222 return reg;
223 }
224
newVRegF(ISelEnv * env)225 static HReg newVRegF ( ISelEnv* env )
226 {
227 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
228 env->vreg_ctr++;
229 return reg;
230 }
231
newVRegV(ISelEnv * env)232 static HReg newVRegV ( ISelEnv* env )
233 {
234 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
235 env->vreg_ctr++;
236 return reg;
237 }
238
239
240 /*---------------------------------------------------------*/
241 /*--- ISEL: Forward declarations ---*/
242 /*---------------------------------------------------------*/
243
244 /* These are organised as iselXXX and iselXXX_wrk pairs. The
245 iselXXX_wrk do the real work, but are not to be called directly.
246 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
247 checks that all returned registers are virtual. You should not
248 call the _wrk version directly.
249 */
250 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
251 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
252
253 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
254 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
255
256 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
257 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
258
259 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
260 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
261
262 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
263 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
264
265 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
266 ISelEnv* env, IRExpr* e );
267 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
268 ISelEnv* env, IRExpr* e );
269
270 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
271 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
272
273 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
274 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
275
276 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
277 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
278
279 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
280 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
281
282
283 /*---------------------------------------------------------*/
284 /*--- ISEL: Misc helpers ---*/
285 /*---------------------------------------------------------*/
286
287 /* Make a int reg-reg move. */
288
mk_iMOVsd_RR(HReg src,HReg dst)289 static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
290 {
291 vassert(hregClass(src) == HRcInt32);
292 vassert(hregClass(dst) == HRcInt32);
293 return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst);
294 }
295
296
297 /* Make a vector reg-reg move. */
298
mk_vMOVsd_RR(HReg src,HReg dst)299 static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
300 {
301 vassert(hregClass(src) == HRcVec128);
302 vassert(hregClass(dst) == HRcVec128);
303 return X86Instr_SseReRg(Xsse_MOV, src, dst);
304 }
305
306 /* Advance/retreat %esp by n. */
307
add_to_esp(ISelEnv * env,Int n)308 static void add_to_esp ( ISelEnv* env, Int n )
309 {
310 vassert(n > 0 && n < 256 && (n%4) == 0);
311 addInstr(env,
312 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP()));
313 }
314
sub_from_esp(ISelEnv * env,Int n)315 static void sub_from_esp ( ISelEnv* env, Int n )
316 {
317 vassert(n > 0 && n < 256 && (n%4) == 0);
318 addInstr(env,
319 X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP()));
320 }
321
322
323 /* Given an amode, return one which references 4 bytes further
324 along. */
325
advance4(X86AMode * am)326 static X86AMode* advance4 ( X86AMode* am )
327 {
328 X86AMode* am4 = dopyX86AMode(am);
329 switch (am4->tag) {
330 case Xam_IRRS:
331 am4->Xam.IRRS.imm += 4; break;
332 case Xam_IR:
333 am4->Xam.IR.imm += 4; break;
334 default:
335 vpanic("advance4(x86,host)");
336 }
337 return am4;
338 }
339
340
341 /* Push an arg onto the host stack, in preparation for a call to a
342 helper function of some kind. Returns the number of 32-bit words
343 pushed. */
344
pushArg(ISelEnv * env,IRExpr * arg)345 static Int pushArg ( ISelEnv* env, IRExpr* arg )
346 {
347 IRType arg_ty = typeOfIRExpr(env->type_env, arg);
348 if (arg_ty == Ity_I32) {
349 addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
350 return 1;
351 } else
352 if (arg_ty == Ity_I64) {
353 HReg rHi, rLo;
354 iselInt64Expr(&rHi, &rLo, env, arg);
355 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
356 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
357 return 2;
358 }
359 ppIRExpr(arg);
360 vpanic("pushArg(x86): can't handle arg of this type");
361 }
362
363
364 /* Complete the call to a helper function, by calling the
365 helper and clearing the args off the stack. */
366
367 static
callHelperAndClearArgs(ISelEnv * env,X86CondCode cc,IRCallee * cee,Int n_arg_ws)368 void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc,
369 IRCallee* cee, Int n_arg_ws )
370 {
371 /* Complication. Need to decide which reg to use as the fn address
372 pointer, in a way that doesn't trash regparm-passed
373 parameters. */
374 vassert(sizeof(void*) == 4);
375
376 addInstr(env, X86Instr_Call( cc, toUInt(Ptr_to_ULong(cee->addr)),
377 cee->regparms));
378 if (n_arg_ws > 0)
379 add_to_esp(env, 4*n_arg_ws);
380 }
381
382
383 /* Used only in doHelperCall. See big comment in doHelperCall re
384 handling of regparm args. This function figures out whether
385 evaluation of an expression might require use of a fixed register.
386 If in doubt return True (safe but suboptimal).
387 */
388 static
mightRequireFixedRegs(IRExpr * e)389 Bool mightRequireFixedRegs ( IRExpr* e )
390 {
391 switch (e->tag) {
392 case Iex_RdTmp: case Iex_Const: case Iex_Get:
393 return False;
394 default:
395 return True;
396 }
397 }
398
399
400 /* Do a complete function call. guard is a Ity_Bit expression
401 indicating whether or not the call happens. If guard==NULL, the
402 call is unconditional. */
403
404 static
doHelperCall(ISelEnv * env,Bool passBBP,IRExpr * guard,IRCallee * cee,IRExpr ** args)405 void doHelperCall ( ISelEnv* env,
406 Bool passBBP,
407 IRExpr* guard, IRCallee* cee, IRExpr** args )
408 {
409 X86CondCode cc;
410 HReg argregs[3];
411 HReg tmpregs[3];
412 Bool danger;
413 Int not_done_yet, n_args, n_arg_ws, stack_limit,
414 i, argreg, argregX;
415
416 /* Marshal args for a call, do the call, and clear the stack.
417 Complexities to consider:
418
419 * if passBBP is True, %ebp (the baseblock pointer) is to be
420 passed as the first arg.
421
422 * If the callee claims regparmness of 1, 2 or 3, we must pass the
423 first 1, 2 or 3 args in registers (EAX, EDX, and ECX
424 respectively). To keep things relatively simple, only args of
425 type I32 may be passed as regparms -- just bomb out if anything
426 else turns up. Clearly this depends on the front ends not
427 trying to pass any other types as regparms.
428 */
429
430 /* 16 Nov 2004: the regparm handling is complicated by the
431 following problem.
432
433 Consider a call two a function with two regparm parameters:
434 f(e1,e2). We need to compute e1 into %eax and e2 into %edx.
435 Suppose code is first generated to compute e1 into %eax. Then,
436 code is generated to compute e2 into %edx. Unfortunately, if
437 the latter code sequence uses %eax, it will trash the value of
438 e1 computed by the former sequence. This could happen if (for
439 example) e2 itself involved a function call. In the code below,
440 args are evaluated right-to-left, not left-to-right, but the
441 principle and the problem are the same.
442
443 One solution is to compute all regparm-bound args into vregs
444 first, and once they are all done, move them to the relevant
445 real regs. This always gives correct code, but it also gives
446 a bunch of vreg-to-rreg moves which are usually redundant but
447 are hard for the register allocator to get rid of.
448
449 A compromise is to first examine all regparm'd argument
450 expressions. If they are all so simple that it is clear
451 they will be evaluated without use of any fixed registers,
452 use the old compute-directly-to-fixed-target scheme. If not,
453 be safe and use the via-vregs scheme.
454
455 Note this requires being able to examine an expression and
456 determine whether or not evaluation of it might use a fixed
457 register. That requires knowledge of how the rest of this
458 insn selector works. Currently just the following 3 are
459 regarded as safe -- hopefully they cover the majority of
460 arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
461 */
462 vassert(cee->regparms >= 0 && cee->regparms <= 3);
463
464 n_args = n_arg_ws = 0;
465 while (args[n_args]) n_args++;
466
467 not_done_yet = n_args;
468 if (passBBP)
469 not_done_yet++;
470
471 stack_limit = cee->regparms;
472 if (cee->regparms > 0 && passBBP) stack_limit--;
473
474 /* ------ BEGIN marshall all arguments ------ */
475
476 /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
477 for (i = n_args-1; i >= stack_limit; i--) {
478 n_arg_ws += pushArg(env, args[i]);
479 not_done_yet--;
480 }
481
482 /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
483 registers. */
484
485 if (cee->regparms > 0) {
486
487 /* ------ BEGIN deal with regparms ------ */
488
489 /* deal with regparms, not forgetting %ebp if needed. */
490 argregs[0] = hregX86_EAX();
491 argregs[1] = hregX86_EDX();
492 argregs[2] = hregX86_ECX();
493 tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG;
494
495 argreg = cee->regparms;
496
497 /* In keeping with big comment above, detect potential danger
498 and use the via-vregs scheme if needed. */
499 danger = False;
500 for (i = stack_limit-1; i >= 0; i--) {
501 if (mightRequireFixedRegs(args[i])) {
502 danger = True;
503 break;
504 }
505 }
506
507 if (danger) {
508
509 /* Move via temporaries */
510 argregX = argreg;
511 for (i = stack_limit-1; i >= 0; i--) {
512
513 if (0) {
514 vex_printf("x86 host: register param is complex: ");
515 ppIRExpr(args[i]);
516 vex_printf("\n");
517 }
518
519 argreg--;
520 vassert(argreg >= 0);
521 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32);
522 tmpregs[argreg] = iselIntExpr_R(env, args[i]);
523 not_done_yet--;
524 }
525 for (i = stack_limit-1; i >= 0; i--) {
526 argregX--;
527 vassert(argregX >= 0);
528 addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) );
529 }
530
531 } else {
532 /* It's safe to compute all regparm args directly into their
533 target registers. */
534 for (i = stack_limit-1; i >= 0; i--) {
535 argreg--;
536 vassert(argreg >= 0);
537 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32);
538 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
539 iselIntExpr_RMI(env, args[i]),
540 argregs[argreg]));
541 not_done_yet--;
542 }
543
544 }
545
546 /* Not forgetting %ebp if needed. */
547 if (passBBP) {
548 vassert(argreg == 1);
549 addInstr(env, mk_iMOVsd_RR( hregX86_EBP(), argregs[0]));
550 not_done_yet--;
551 }
552
553 /* ------ END deal with regparms ------ */
554
555 } else {
556
557 /* No regparms. Heave %ebp on the stack if needed. */
558 if (passBBP) {
559 addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
560 n_arg_ws++;
561 not_done_yet--;
562 }
563
564 }
565
566 vassert(not_done_yet == 0);
567
568 /* ------ END marshall all arguments ------ */
569
570 /* Now we can compute the condition. We can't do it earlier
571 because the argument computations could trash the condition
572 codes. Be a bit clever to handle the common case where the
573 guard is 1:Bit. */
574 cc = Xcc_ALWAYS;
575 if (guard) {
576 if (guard->tag == Iex_Const
577 && guard->Iex.Const.con->tag == Ico_U1
578 && guard->Iex.Const.con->Ico.U1 == True) {
579 /* unconditional -- do nothing */
580 } else {
581 cc = iselCondCode( env, guard );
582 }
583 }
584
585 /* call the helper, and get the args off the stack afterwards. */
586 callHelperAndClearArgs( env, cc, cee, n_arg_ws );
587 }
588
589
590 /* Given a guest-state array descriptor, an index expression and a
591 bias, generate an X86AMode holding the relevant guest state
592 offset. */
593
594 static
genGuestArrayOffset(ISelEnv * env,IRRegArray * descr,IRExpr * off,Int bias)595 X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
596 IRExpr* off, Int bias )
597 {
598 HReg tmp, roff;
599 Int elemSz = sizeofIRType(descr->elemTy);
600 Int nElems = descr->nElems;
601 Int shift = 0;
602
603 /* throw out any cases not generated by an x86 front end. In
604 theory there might be a day where we need to handle them -- if
605 we ever run non-x86-guest on x86 host. */
606
607 if (nElems != 8)
608 vpanic("genGuestArrayOffset(x86 host)(1)");
609
610 switch (elemSz) {
611 case 1: shift = 0; break;
612 case 4: shift = 2; break;
613 case 8: shift = 3; break;
614 default: vpanic("genGuestArrayOffset(x86 host)(2)");
615 }
616
617 /* Compute off into a reg, %off. Then return:
618
619 movl %off, %tmp
620 addl $bias, %tmp (if bias != 0)
621 andl %tmp, 7
622 ... base(%ebp, %tmp, shift) ...
623 */
624 tmp = newVRegI(env);
625 roff = iselIntExpr_R(env, off);
626 addInstr(env, mk_iMOVsd_RR(roff, tmp));
627 if (bias != 0) {
628 addInstr(env,
629 X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp));
630 }
631 addInstr(env,
632 X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp));
633 return
634 X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift );
635 }
636
637
638 /* Mess with the FPU's rounding mode: set to the default rounding mode
639 (DEFAULT_FPUCW). */
640 static
set_FPU_rounding_default(ISelEnv * env)641 void set_FPU_rounding_default ( ISelEnv* env )
642 {
643 /* pushl $DEFAULT_FPUCW
644 fldcw 0(%esp)
645 addl $4, %esp
646 */
647 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
648 addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
649 addInstr(env, X86Instr_FpLdCW(zero_esp));
650 add_to_esp(env, 4);
651 }
652
653
654 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
655 expression denoting a value in the range 0 .. 3, indicating a round
656 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
657 the same rounding.
658 */
659 static
set_FPU_rounding_mode(ISelEnv * env,IRExpr * mode)660 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
661 {
662 HReg rrm = iselIntExpr_R(env, mode);
663 HReg rrm2 = newVRegI(env);
664 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
665
666 /* movl %rrm, %rrm2
667 andl $3, %rrm2 -- shouldn't be needed; paranoia
668 shll $10, %rrm2
669 orl $DEFAULT_FPUCW, %rrm2
670 pushl %rrm2
671 fldcw 0(%esp)
672 addl $4, %esp
673 */
674 addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
675 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
676 addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2));
677 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
678 addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
679 addInstr(env, X86Instr_FpLdCW(zero_esp));
680 add_to_esp(env, 4);
681 }
682
683
684 /* Generate !src into a new vector register, and be sure that the code
685 is SSE1 compatible. Amazing that Intel doesn't offer a less crappy
686 way to do this.
687 */
do_sse_Not128(ISelEnv * env,HReg src)688 static HReg do_sse_Not128 ( ISelEnv* env, HReg src )
689 {
690 HReg dst = newVRegV(env);
691 /* Set dst to zero. If dst contains a NaN then all hell might
692 break loose after the comparison. So, first zero it. */
693 addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst));
694 /* And now make it all 1s ... */
695 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst));
696 /* Finally, xor 'src' into it. */
697 addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst));
698 /* Doesn't that just totally suck? */
699 return dst;
700 }
701
702
703 /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
704 after most non-simple FPU operations (simple = +, -, *, / and
705 sqrt).
706
707 This could be done a lot more efficiently if needed, by loading
708 zero and adding it to the value to be rounded (fldz ; faddp?).
709 */
roundToF64(ISelEnv * env,HReg reg)710 static void roundToF64 ( ISelEnv* env, HReg reg )
711 {
712 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
713 sub_from_esp(env, 8);
714 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
715 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
716 add_to_esp(env, 8);
717 }
718
719
720 /*---------------------------------------------------------*/
721 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/
722 /*---------------------------------------------------------*/
723
724 /* Select insns for an integer-typed expression, and add them to the
725 code list. Return a reg holding the result. This reg will be a
726 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
727 want to modify it, ask for a new vreg, copy it in there, and modify
728 the copy. The register allocator will do its best to map both
729 vregs to the same real register, so the copies will often disappear
730 later in the game.
731
732 This should handle expressions of 32, 16 and 8-bit type. All
733 results are returned in a 32-bit register. For 16- and 8-bit
734 expressions, the upper 16/24 bits are arbitrary, so you should mask
735 or sign extend partial values if necessary.
736 */
737
iselIntExpr_R(ISelEnv * env,IRExpr * e)738 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
739 {
740 HReg r = iselIntExpr_R_wrk(env, e);
741 /* sanity checks ... */
742 # if 0
743 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
744 # endif
745 vassert(hregClass(r) == HRcInt32);
746 vassert(hregIsVirtual(r));
747 return r;
748 }
749
750 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_R_wrk(ISelEnv * env,IRExpr * e)751 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
752 {
753 MatchInfo mi;
754
755 IRType ty = typeOfIRExpr(env->type_env,e);
756 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
757
758 switch (e->tag) {
759
760 /* --------- TEMP --------- */
761 case Iex_RdTmp: {
762 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
763 }
764
765 /* --------- LOAD --------- */
766 case Iex_Load: {
767 HReg dst = newVRegI(env);
768 X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
769
770 /* We can't handle big-endian loads, nor load-linked. */
771 if (e->Iex.Load.end != Iend_LE)
772 goto irreducible;
773
774 if (ty == Ity_I32) {
775 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
776 X86RMI_Mem(amode), dst) );
777 return dst;
778 }
779 if (ty == Ity_I16) {
780 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
781 return dst;
782 }
783 if (ty == Ity_I8) {
784 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
785 return dst;
786 }
787 break;
788 }
789
790 /* --------- TERNARY OP --------- */
791 case Iex_Triop: {
792 IRTriop *triop = e->Iex.Triop.details;
793 /* C3210 flags following FPU partial remainder (fprem), both
794 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
795 if (triop->op == Iop_PRemC3210F64
796 || triop->op == Iop_PRem1C3210F64) {
797 HReg junk = newVRegF(env);
798 HReg dst = newVRegI(env);
799 HReg srcL = iselDblExpr(env, triop->arg2);
800 HReg srcR = iselDblExpr(env, triop->arg3);
801 /* XXXROUNDINGFIXME */
802 /* set roundingmode here */
803 addInstr(env, X86Instr_FpBinary(
804 e->Iex.Binop.op==Iop_PRemC3210F64
805 ? Xfp_PREM : Xfp_PREM1,
806 srcL,srcR,junk
807 ));
808 /* The previous pseudo-insn will have left the FPU's C3210
809 flags set correctly. So bag them. */
810 addInstr(env, X86Instr_FpStSW_AX());
811 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
812 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
813 return dst;
814 }
815
816 break;
817 }
818
819 /* --------- BINARY OP --------- */
820 case Iex_Binop: {
821 X86AluOp aluOp;
822 X86ShiftOp shOp;
823
824 /* Pattern: Sub32(0,x) */
825 if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) {
826 HReg dst = newVRegI(env);
827 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
828 addInstr(env, mk_iMOVsd_RR(reg,dst));
829 addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
830 return dst;
831 }
832
833 /* Is it an addition or logical style op? */
834 switch (e->Iex.Binop.op) {
835 case Iop_Add8: case Iop_Add16: case Iop_Add32:
836 aluOp = Xalu_ADD; break;
837 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32:
838 aluOp = Xalu_SUB; break;
839 case Iop_And8: case Iop_And16: case Iop_And32:
840 aluOp = Xalu_AND; break;
841 case Iop_Or8: case Iop_Or16: case Iop_Or32:
842 aluOp = Xalu_OR; break;
843 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32:
844 aluOp = Xalu_XOR; break;
845 case Iop_Mul16: case Iop_Mul32:
846 aluOp = Xalu_MUL; break;
847 default:
848 aluOp = Xalu_INVALID; break;
849 }
850 /* For commutative ops we assume any literal
851 values are on the second operand. */
852 if (aluOp != Xalu_INVALID) {
853 HReg dst = newVRegI(env);
854 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
855 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
856 addInstr(env, mk_iMOVsd_RR(reg,dst));
857 addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst));
858 return dst;
859 }
860 /* Could do better here; forcing the first arg into a reg
861 isn't always clever.
862 -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
863 LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
864 t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
865 movl 0xFFFFFFA0(%vr41),%vr107
866 movl 0xFFFFFFA4(%vr41),%vr108
867 movl %vr107,%vr106
868 xorl %vr108,%vr106
869 movl 0xFFFFFFA8(%vr41),%vr109
870 movl %vr106,%vr105
871 andl %vr109,%vr105
872 movl 0xFFFFFFA0(%vr41),%vr110
873 movl %vr105,%vr104
874 xorl %vr110,%vr104
875 movl %vr104,%vr70
876 */
877
878 /* Perhaps a shift op? */
879 switch (e->Iex.Binop.op) {
880 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
881 shOp = Xsh_SHL; break;
882 case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
883 shOp = Xsh_SHR; break;
884 case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
885 shOp = Xsh_SAR; break;
886 default:
887 shOp = Xsh_INVALID; break;
888 }
889 if (shOp != Xsh_INVALID) {
890 HReg dst = newVRegI(env);
891
892 /* regL = the value to be shifted */
893 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
894 addInstr(env, mk_iMOVsd_RR(regL,dst));
895
896 /* Do any necessary widening for 16/8 bit operands */
897 switch (e->Iex.Binop.op) {
898 case Iop_Shr8:
899 addInstr(env, X86Instr_Alu32R(
900 Xalu_AND, X86RMI_Imm(0xFF), dst));
901 break;
902 case Iop_Shr16:
903 addInstr(env, X86Instr_Alu32R(
904 Xalu_AND, X86RMI_Imm(0xFFFF), dst));
905 break;
906 case Iop_Sar8:
907 addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst));
908 addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst));
909 break;
910 case Iop_Sar16:
911 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst));
912 addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst));
913 break;
914 default: break;
915 }
916
917 /* Now consider the shift amount. If it's a literal, we
918 can do a much better job than the general case. */
919 if (e->Iex.Binop.arg2->tag == Iex_Const) {
920 /* assert that the IR is well-typed */
921 Int nshift;
922 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
923 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
924 vassert(nshift >= 0);
925 if (nshift > 0)
926 /* Can't allow nshift==0 since that means %cl */
927 addInstr(env, X86Instr_Sh32( shOp, nshift, dst ));
928 } else {
929 /* General case; we have to force the amount into %cl. */
930 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
931 addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX()));
932 addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst));
933 }
934 return dst;
935 }
936
937 /* Handle misc other ops. */
938
939 if (e->Iex.Binop.op == Iop_Max32U) {
940 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
941 HReg dst = newVRegI(env);
942 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
943 addInstr(env, mk_iMOVsd_RR(src1,dst));
944 addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
945 addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
946 return dst;
947 }
948
949 if (e->Iex.Binop.op == Iop_8HLto16) {
950 HReg hi8 = newVRegI(env);
951 HReg lo8 = newVRegI(env);
952 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
953 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
954 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
955 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
956 addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8));
957 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
958 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
959 return hi8;
960 }
961
962 if (e->Iex.Binop.op == Iop_16HLto32) {
963 HReg hi16 = newVRegI(env);
964 HReg lo16 = newVRegI(env);
965 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
966 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
967 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
968 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
969 addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16));
970 addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
971 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
972 return hi16;
973 }
974
975 if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8
976 || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) {
977 HReg a16 = newVRegI(env);
978 HReg b16 = newVRegI(env);
979 HReg a16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
980 HReg b16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
981 Int shift = (e->Iex.Binop.op == Iop_MullS8
982 || e->Iex.Binop.op == Iop_MullU8)
983 ? 24 : 16;
984 X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8
985 || e->Iex.Binop.op == Iop_MullS16)
986 ? Xsh_SAR : Xsh_SHR;
987
988 addInstr(env, mk_iMOVsd_RR(a16s, a16));
989 addInstr(env, mk_iMOVsd_RR(b16s, b16));
990 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16));
991 addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16));
992 addInstr(env, X86Instr_Sh32(shr_op, shift, a16));
993 addInstr(env, X86Instr_Sh32(shr_op, shift, b16));
994 addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16));
995 return b16;
996 }
997
998 if (e->Iex.Binop.op == Iop_CmpF64) {
999 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1000 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1001 HReg dst = newVRegI(env);
1002 addInstr(env, X86Instr_FpCmp(fL,fR,dst));
1003 /* shift this right 8 bits so as to conform to CmpF64
1004 definition. */
1005 addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst));
1006 return dst;
1007 }
1008
1009 if (e->Iex.Binop.op == Iop_F64toI32S
1010 || e->Iex.Binop.op == Iop_F64toI16S) {
1011 Int sz = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4;
1012 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1013 HReg dst = newVRegI(env);
1014
1015 /* Used several times ... */
1016 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1017
1018 /* rf now holds the value to be converted, and rrm holds the
1019 rounding mode value, encoded as per the IRRoundingMode
1020 enum. The first thing to do is set the FPU's rounding
1021 mode accordingly. */
1022
1023 /* Create a space for the format conversion. */
1024 /* subl $4, %esp */
1025 sub_from_esp(env, 4);
1026
1027 /* Set host rounding mode */
1028 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1029
1030 /* gistw/l %rf, 0(%esp) */
1031 addInstr(env, X86Instr_FpLdStI(False/*store*/,
1032 toUChar(sz), rf, zero_esp));
1033
1034 if (sz == 2) {
1035 /* movzwl 0(%esp), %dst */
1036 addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1037 } else {
1038 /* movl 0(%esp), %dst */
1039 vassert(sz == 4);
1040 addInstr(env, X86Instr_Alu32R(
1041 Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1042 }
1043
1044 /* Restore default FPU rounding. */
1045 set_FPU_rounding_default( env );
1046
1047 /* addl $4, %esp */
1048 add_to_esp(env, 4);
1049 return dst;
1050 }
1051
1052 break;
1053 }
1054
1055 /* --------- UNARY OP --------- */
1056 case Iex_Unop: {
1057
1058 /* 1Uto8(32to1(expr32)) */
1059 if (e->Iex.Unop.op == Iop_1Uto8) {
1060 DECLARE_PATTERN(p_32to1_then_1Uto8);
1061 DEFINE_PATTERN(p_32to1_then_1Uto8,
1062 unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1063 if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1064 IRExpr* expr32 = mi.bindee[0];
1065 HReg dst = newVRegI(env);
1066 HReg src = iselIntExpr_R(env, expr32);
1067 addInstr(env, mk_iMOVsd_RR(src,dst) );
1068 addInstr(env, X86Instr_Alu32R(Xalu_AND,
1069 X86RMI_Imm(1), dst));
1070 return dst;
1071 }
1072 }
1073
1074 /* 8Uto32(LDle(expr32)) */
1075 if (e->Iex.Unop.op == Iop_8Uto32) {
1076 DECLARE_PATTERN(p_LDle8_then_8Uto32);
1077 DEFINE_PATTERN(p_LDle8_then_8Uto32,
1078 unop(Iop_8Uto32,
1079 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1080 if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1081 HReg dst = newVRegI(env);
1082 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1083 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1084 return dst;
1085 }
1086 }
1087
1088 /* 8Sto32(LDle(expr32)) */
1089 if (e->Iex.Unop.op == Iop_8Sto32) {
1090 DECLARE_PATTERN(p_LDle8_then_8Sto32);
1091 DEFINE_PATTERN(p_LDle8_then_8Sto32,
1092 unop(Iop_8Sto32,
1093 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1094 if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1095 HReg dst = newVRegI(env);
1096 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1097 addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1098 return dst;
1099 }
1100 }
1101
1102 /* 16Uto32(LDle(expr32)) */
1103 if (e->Iex.Unop.op == Iop_16Uto32) {
1104 DECLARE_PATTERN(p_LDle16_then_16Uto32);
1105 DEFINE_PATTERN(p_LDle16_then_16Uto32,
1106 unop(Iop_16Uto32,
1107 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1108 if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1109 HReg dst = newVRegI(env);
1110 X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1111 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1112 return dst;
1113 }
1114 }
1115
1116 /* 8Uto32(GET:I8) */
1117 if (e->Iex.Unop.op == Iop_8Uto32) {
1118 if (e->Iex.Unop.arg->tag == Iex_Get) {
1119 HReg dst;
1120 X86AMode* amode;
1121 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1122 dst = newVRegI(env);
1123 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1124 hregX86_EBP());
1125 addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1126 return dst;
1127 }
1128 }
1129
1130 /* 16to32(GET:I16) */
1131 if (e->Iex.Unop.op == Iop_16Uto32) {
1132 if (e->Iex.Unop.arg->tag == Iex_Get) {
1133 HReg dst;
1134 X86AMode* amode;
1135 vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1136 dst = newVRegI(env);
1137 amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1138 hregX86_EBP());
1139 addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1140 return dst;
1141 }
1142 }
1143
1144 switch (e->Iex.Unop.op) {
1145 case Iop_8Uto16:
1146 case Iop_8Uto32:
1147 case Iop_16Uto32: {
1148 HReg dst = newVRegI(env);
1149 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1150 UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1151 addInstr(env, mk_iMOVsd_RR(src,dst) );
1152 addInstr(env, X86Instr_Alu32R(Xalu_AND,
1153 X86RMI_Imm(mask), dst));
1154 return dst;
1155 }
1156 case Iop_8Sto16:
1157 case Iop_8Sto32:
1158 case Iop_16Sto32: {
1159 HReg dst = newVRegI(env);
1160 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1161 UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24;
1162 addInstr(env, mk_iMOVsd_RR(src,dst) );
1163 addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst));
1164 addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst));
1165 return dst;
1166 }
1167 case Iop_Not8:
1168 case Iop_Not16:
1169 case Iop_Not32: {
1170 HReg dst = newVRegI(env);
1171 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1172 addInstr(env, mk_iMOVsd_RR(src,dst) );
1173 addInstr(env, X86Instr_Unary32(Xun_NOT,dst));
1174 return dst;
1175 }
1176 case Iop_64HIto32: {
1177 HReg rHi, rLo;
1178 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1179 return rHi; /* and abandon rLo .. poor wee thing :-) */
1180 }
1181 case Iop_64to32: {
1182 HReg rHi, rLo;
1183 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1184 return rLo; /* similar stupid comment to the above ... */
1185 }
1186 case Iop_16HIto8:
1187 case Iop_32HIto16: {
1188 HReg dst = newVRegI(env);
1189 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1190 Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1191 addInstr(env, mk_iMOVsd_RR(src,dst) );
1192 addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1193 return dst;
1194 }
1195 case Iop_1Uto32:
1196 case Iop_1Uto8: {
1197 HReg dst = newVRegI(env);
1198 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1199 addInstr(env, X86Instr_Set32(cond,dst));
1200 return dst;
1201 }
1202 case Iop_1Sto8:
1203 case Iop_1Sto16:
1204 case Iop_1Sto32: {
1205 /* could do better than this, but for now ... */
1206 HReg dst = newVRegI(env);
1207 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1208 addInstr(env, X86Instr_Set32(cond,dst));
1209 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1210 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1211 return dst;
1212 }
1213 case Iop_Ctz32: {
1214 /* Count trailing zeroes, implemented by x86 'bsfl' */
1215 HReg dst = newVRegI(env);
1216 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1217 addInstr(env, X86Instr_Bsfr32(True,src,dst));
1218 return dst;
1219 }
1220 case Iop_Clz32: {
1221 /* Count leading zeroes. Do 'bsrl' to establish the index
1222 of the highest set bit, and subtract that value from
1223 31. */
1224 HReg tmp = newVRegI(env);
1225 HReg dst = newVRegI(env);
1226 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1227 addInstr(env, X86Instr_Bsfr32(False,src,tmp));
1228 addInstr(env, X86Instr_Alu32R(Xalu_MOV,
1229 X86RMI_Imm(31), dst));
1230 addInstr(env, X86Instr_Alu32R(Xalu_SUB,
1231 X86RMI_Reg(tmp), dst));
1232 return dst;
1233 }
1234
1235 case Iop_CmpwNEZ32: {
1236 HReg dst = newVRegI(env);
1237 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1238 addInstr(env, mk_iMOVsd_RR(src,dst));
1239 addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
1240 addInstr(env, X86Instr_Alu32R(Xalu_OR,
1241 X86RMI_Reg(src), dst));
1242 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1243 return dst;
1244 }
1245 case Iop_Left8:
1246 case Iop_Left16:
1247 case Iop_Left32: {
1248 HReg dst = newVRegI(env);
1249 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1250 addInstr(env, mk_iMOVsd_RR(src, dst));
1251 addInstr(env, X86Instr_Unary32(Xun_NEG, dst));
1252 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst));
1253 return dst;
1254 }
1255
1256 case Iop_V128to32: {
1257 HReg dst = newVRegI(env);
1258 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1259 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1260 sub_from_esp(env, 16);
1261 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1262 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1263 add_to_esp(env, 16);
1264 return dst;
1265 }
1266
1267 /* ReinterpF32asI32(e) */
1268 /* Given an IEEE754 single, produce an I32 with the same bit
1269 pattern. Keep stack 8-aligned even though only using 4
1270 bytes. */
1271 case Iop_ReinterpF32asI32: {
1272 HReg rf = iselFltExpr(env, e->Iex.Unop.arg);
1273 HReg dst = newVRegI(env);
1274 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1275 /* paranoia */
1276 set_FPU_rounding_default(env);
1277 /* subl $8, %esp */
1278 sub_from_esp(env, 8);
1279 /* gstF %rf, 0(%esp) */
1280 addInstr(env,
1281 X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp));
1282 /* movl 0(%esp), %dst */
1283 addInstr(env,
1284 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1285 /* addl $8, %esp */
1286 add_to_esp(env, 8);
1287 return dst;
1288 }
1289
1290 case Iop_16to8:
1291 case Iop_32to8:
1292 case Iop_32to16:
1293 /* These are no-ops. */
1294 return iselIntExpr_R(env, e->Iex.Unop.arg);
1295
1296 default:
1297 break;
1298 }
1299 break;
1300 }
1301
1302 /* --------- GET --------- */
1303 case Iex_Get: {
1304 if (ty == Ity_I32) {
1305 HReg dst = newVRegI(env);
1306 addInstr(env, X86Instr_Alu32R(
1307 Xalu_MOV,
1308 X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1309 hregX86_EBP())),
1310 dst));
1311 return dst;
1312 }
1313 if (ty == Ity_I8 || ty == Ity_I16) {
1314 HReg dst = newVRegI(env);
1315 addInstr(env, X86Instr_LoadEX(
1316 toUChar(ty==Ity_I8 ? 1 : 2),
1317 False,
1318 X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1319 dst));
1320 return dst;
1321 }
1322 break;
1323 }
1324
1325 case Iex_GetI: {
1326 X86AMode* am
1327 = genGuestArrayOffset(
1328 env, e->Iex.GetI.descr,
1329 e->Iex.GetI.ix, e->Iex.GetI.bias );
1330 HReg dst = newVRegI(env);
1331 if (ty == Ity_I8) {
1332 addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1333 return dst;
1334 }
1335 if (ty == Ity_I32) {
1336 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1337 return dst;
1338 }
1339 break;
1340 }
1341
1342 /* --------- CCALL --------- */
1343 case Iex_CCall: {
1344 HReg dst = newVRegI(env);
1345 vassert(ty == e->Iex.CCall.retty);
1346
1347 /* be very restrictive for now. Only 32/64-bit ints allowed
1348 for args, and 32 bits for return type. */
1349 if (e->Iex.CCall.retty != Ity_I32)
1350 goto irreducible;
1351
1352 /* Marshal args, do the call, clear stack. */
1353 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1354
1355 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1356 return dst;
1357 }
1358
1359 /* --------- LITERAL --------- */
1360 /* 32/16/8-bit literals */
1361 case Iex_Const: {
1362 X86RMI* rmi = iselIntExpr_RMI ( env, e );
1363 HReg r = newVRegI(env);
1364 addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r));
1365 return r;
1366 }
1367
1368 /* --------- MULTIPLEX --------- */
1369 case Iex_Mux0X: {
1370 if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1371 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
1372 X86RM* r8;
1373 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1374 X86RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0);
1375 HReg dst = newVRegI(env);
1376 addInstr(env, mk_iMOVsd_RR(rX,dst));
1377 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
1378 addInstr(env, X86Instr_Test32(0xFF, r8));
1379 addInstr(env, X86Instr_CMov32(Xcc_Z,r0,dst));
1380 return dst;
1381 }
1382 break;
1383 }
1384
1385 default:
1386 break;
1387 } /* switch (e->tag) */
1388
1389 /* We get here if no pattern matched. */
1390 irreducible:
1391 ppIRExpr(e);
1392 vpanic("iselIntExpr_R: cannot reduce tree");
1393 }
1394
1395
1396 /*---------------------------------------------------------*/
1397 /*--- ISEL: Integer expression auxiliaries ---*/
1398 /*---------------------------------------------------------*/
1399
1400 /* --------------------- AMODEs --------------------- */
1401
1402 /* Return an AMode which computes the value of the specified
1403 expression, possibly also adding insns to the code list as a
1404 result. The expression may only be a 32-bit one.
1405 */
1406
sane_AMode(X86AMode * am)1407 static Bool sane_AMode ( X86AMode* am )
1408 {
1409 switch (am->tag) {
1410 case Xam_IR:
1411 return
1412 toBool( hregClass(am->Xam.IR.reg) == HRcInt32
1413 && (hregIsVirtual(am->Xam.IR.reg)
1414 || am->Xam.IR.reg == hregX86_EBP()) );
1415 case Xam_IRRS:
1416 return
1417 toBool( hregClass(am->Xam.IRRS.base) == HRcInt32
1418 && hregIsVirtual(am->Xam.IRRS.base)
1419 && hregClass(am->Xam.IRRS.index) == HRcInt32
1420 && hregIsVirtual(am->Xam.IRRS.index) );
1421 default:
1422 vpanic("sane_AMode: unknown x86 amode tag");
1423 }
1424 }
1425
iselIntExpr_AMode(ISelEnv * env,IRExpr * e)1426 static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1427 {
1428 X86AMode* am = iselIntExpr_AMode_wrk(env, e);
1429 vassert(sane_AMode(am));
1430 return am;
1431 }
1432
1433 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_AMode_wrk(ISelEnv * env,IRExpr * e)1434 static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1435 {
1436 IRType ty = typeOfIRExpr(env->type_env,e);
1437 vassert(ty == Ity_I32);
1438
1439 /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
1440 if (e->tag == Iex_Binop
1441 && e->Iex.Binop.op == Iop_Add32
1442 && e->Iex.Binop.arg2->tag == Iex_Const
1443 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
1444 && e->Iex.Binop.arg1->tag == Iex_Binop
1445 && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32
1446 && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop
1447 && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1448 && e->Iex.Binop.arg1
1449 ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1450 && e->Iex.Binop.arg1
1451 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1452 UInt shift = e->Iex.Binop.arg1
1453 ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1454 UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1455 if (shift == 1 || shift == 2 || shift == 3) {
1456 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1);
1457 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1
1458 ->Iex.Binop.arg2->Iex.Binop.arg1 );
1459 return X86AMode_IRRS(imm32, r1, r2, shift);
1460 }
1461 }
1462
1463 /* Add32(expr1, Shl32(expr2, imm)) */
1464 if (e->tag == Iex_Binop
1465 && e->Iex.Binop.op == Iop_Add32
1466 && e->Iex.Binop.arg2->tag == Iex_Binop
1467 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
1468 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1469 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1470 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1471 if (shift == 1 || shift == 2 || shift == 3) {
1472 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1473 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1474 return X86AMode_IRRS(0, r1, r2, shift);
1475 }
1476 }
1477
1478 /* Add32(expr,i) */
1479 if (e->tag == Iex_Binop
1480 && e->Iex.Binop.op == Iop_Add32
1481 && e->Iex.Binop.arg2->tag == Iex_Const
1482 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
1483 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1484 return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1);
1485 }
1486
1487 /* Doesn't match anything in particular. Generate it into
1488 a register and use that. */
1489 {
1490 HReg r1 = iselIntExpr_R(env, e);
1491 return X86AMode_IR(0, r1);
1492 }
1493 }
1494
1495
1496 /* --------------------- RMIs --------------------- */
1497
1498 /* Similarly, calculate an expression into an X86RMI operand. As with
1499 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1500
iselIntExpr_RMI(ISelEnv * env,IRExpr * e)1501 static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
1502 {
1503 X86RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1504 /* sanity checks ... */
1505 switch (rmi->tag) {
1506 case Xrmi_Imm:
1507 return rmi;
1508 case Xrmi_Reg:
1509 vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32);
1510 vassert(hregIsVirtual(rmi->Xrmi.Reg.reg));
1511 return rmi;
1512 case Xrmi_Mem:
1513 vassert(sane_AMode(rmi->Xrmi.Mem.am));
1514 return rmi;
1515 default:
1516 vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
1517 }
1518 }
1519
1520 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RMI_wrk(ISelEnv * env,IRExpr * e)1521 static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
1522 {
1523 IRType ty = typeOfIRExpr(env->type_env,e);
1524 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1525
1526 /* special case: immediate */
1527 if (e->tag == Iex_Const) {
1528 UInt u;
1529 switch (e->Iex.Const.con->tag) {
1530 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1531 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1532 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1533 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1534 }
1535 return X86RMI_Imm(u);
1536 }
1537
1538 /* special case: 32-bit GET */
1539 if (e->tag == Iex_Get && ty == Ity_I32) {
1540 return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
1541 hregX86_EBP()));
1542 }
1543
1544 /* special case: 32-bit load from memory */
1545 if (e->tag == Iex_Load && ty == Ity_I32
1546 && e->Iex.Load.end == Iend_LE) {
1547 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
1548 return X86RMI_Mem(am);
1549 }
1550
1551 /* default case: calculate into a register and return that */
1552 {
1553 HReg r = iselIntExpr_R ( env, e );
1554 return X86RMI_Reg(r);
1555 }
1556 }
1557
1558
1559 /* --------------------- RIs --------------------- */
1560
1561 /* Calculate an expression into an X86RI operand. As with
1562 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1563
iselIntExpr_RI(ISelEnv * env,IRExpr * e)1564 static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
1565 {
1566 X86RI* ri = iselIntExpr_RI_wrk(env, e);
1567 /* sanity checks ... */
1568 switch (ri->tag) {
1569 case Xri_Imm:
1570 return ri;
1571 case Xri_Reg:
1572 vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32);
1573 vassert(hregIsVirtual(ri->Xri.Reg.reg));
1574 return ri;
1575 default:
1576 vpanic("iselIntExpr_RI: unknown x86 RI tag");
1577 }
1578 }
1579
1580 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI_wrk(ISelEnv * env,IRExpr * e)1581 static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
1582 {
1583 IRType ty = typeOfIRExpr(env->type_env,e);
1584 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1585
1586 /* special case: immediate */
1587 if (e->tag == Iex_Const) {
1588 UInt u;
1589 switch (e->Iex.Const.con->tag) {
1590 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1591 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1592 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1593 default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
1594 }
1595 return X86RI_Imm(u);
1596 }
1597
1598 /* default case: calculate into a register and return that */
1599 {
1600 HReg r = iselIntExpr_R ( env, e );
1601 return X86RI_Reg(r);
1602 }
1603 }
1604
1605
1606 /* --------------------- RMs --------------------- */
1607
1608 /* Similarly, calculate an expression into an X86RM operand. As with
1609 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1610
iselIntExpr_RM(ISelEnv * env,IRExpr * e)1611 static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
1612 {
1613 X86RM* rm = iselIntExpr_RM_wrk(env, e);
1614 /* sanity checks ... */
1615 switch (rm->tag) {
1616 case Xrm_Reg:
1617 vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32);
1618 vassert(hregIsVirtual(rm->Xrm.Reg.reg));
1619 return rm;
1620 case Xrm_Mem:
1621 vassert(sane_AMode(rm->Xrm.Mem.am));
1622 return rm;
1623 default:
1624 vpanic("iselIntExpr_RM: unknown x86 RM tag");
1625 }
1626 }
1627
1628 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RM_wrk(ISelEnv * env,IRExpr * e)1629 static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
1630 {
1631 IRType ty = typeOfIRExpr(env->type_env,e);
1632 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1633
1634 /* special case: 32-bit GET */
1635 if (e->tag == Iex_Get && ty == Ity_I32) {
1636 return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset,
1637 hregX86_EBP()));
1638 }
1639
1640 /* special case: load from memory */
1641
1642 /* default case: calculate into a register and return that */
1643 {
1644 HReg r = iselIntExpr_R ( env, e );
1645 return X86RM_Reg(r);
1646 }
1647 }
1648
1649
1650 /* --------------------- CONDCODE --------------------- */
1651
1652 /* Generate code to evaluated a bit-typed expression, returning the
1653 condition code which would correspond when the expression would
1654 notionally have returned 1. */
1655
iselCondCode(ISelEnv * env,IRExpr * e)1656 static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1657 {
1658 /* Uh, there's nothing we can sanity check here, unfortunately. */
1659 return iselCondCode_wrk(env,e);
1660 }
1661
1662 /* DO NOT CALL THIS DIRECTLY ! */
iselCondCode_wrk(ISelEnv * env,IRExpr * e)1663 static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1664 {
1665 MatchInfo mi;
1666
1667 vassert(e);
1668 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1669
1670 /* var */
1671 if (e->tag == Iex_RdTmp) {
1672 HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1673 /* Test32 doesn't modify r32; so this is OK. */
1674 addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32)));
1675 return Xcc_NZ;
1676 }
1677
1678 /* Constant 1:Bit */
1679 if (e->tag == Iex_Const) {
1680 HReg r;
1681 vassert(e->Iex.Const.con->tag == Ico_U1);
1682 vassert(e->Iex.Const.con->Ico.U1 == True
1683 || e->Iex.Const.con->Ico.U1 == False);
1684 r = newVRegI(env);
1685 addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
1686 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
1687 return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ;
1688 }
1689
1690 /* Not1(e) */
1691 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1692 /* Generate code for the arg, and negate the test condition */
1693 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1694 }
1695
1696 /* --- patterns rooted at: 32to1 --- */
1697
1698 if (e->tag == Iex_Unop
1699 && e->Iex.Unop.op == Iop_32to1) {
1700 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1701 addInstr(env, X86Instr_Test32(1,rm));
1702 return Xcc_NZ;
1703 }
1704
1705 /* --- patterns rooted at: CmpNEZ8 --- */
1706
1707 /* CmpNEZ8(x) */
1708 if (e->tag == Iex_Unop
1709 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1710 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1711 addInstr(env, X86Instr_Test32(0xFF,rm));
1712 return Xcc_NZ;
1713 }
1714
1715 /* --- patterns rooted at: CmpNEZ16 --- */
1716
1717 /* CmpNEZ16(x) */
1718 if (e->tag == Iex_Unop
1719 && e->Iex.Unop.op == Iop_CmpNEZ16) {
1720 X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
1721 addInstr(env, X86Instr_Test32(0xFFFF,rm));
1722 return Xcc_NZ;
1723 }
1724
1725 /* --- patterns rooted at: CmpNEZ32 --- */
1726
1727 /* CmpNEZ32(And32(x,y)) */
1728 {
1729 DECLARE_PATTERN(p_CmpNEZ32_And32);
1730 DEFINE_PATTERN(p_CmpNEZ32_And32,
1731 unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1))));
1732 if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) {
1733 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
1734 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1735 HReg tmp = newVRegI(env);
1736 addInstr(env, mk_iMOVsd_RR(r0, tmp));
1737 addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp));
1738 return Xcc_NZ;
1739 }
1740 }
1741
1742 /* CmpNEZ32(Or32(x,y)) */
1743 {
1744 DECLARE_PATTERN(p_CmpNEZ32_Or32);
1745 DEFINE_PATTERN(p_CmpNEZ32_Or32,
1746 unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1))));
1747 if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) {
1748 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
1749 X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
1750 HReg tmp = newVRegI(env);
1751 addInstr(env, mk_iMOVsd_RR(r0, tmp));
1752 addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp));
1753 return Xcc_NZ;
1754 }
1755 }
1756
1757 /* CmpNEZ32(GET(..):I32) */
1758 if (e->tag == Iex_Unop
1759 && e->Iex.Unop.op == Iop_CmpNEZ32
1760 && e->Iex.Unop.arg->tag == Iex_Get) {
1761 X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1762 hregX86_EBP());
1763 addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am));
1764 return Xcc_NZ;
1765 }
1766
1767 /* CmpNEZ32(x) */
1768 if (e->tag == Iex_Unop
1769 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1770 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1771 X86RMI* rmi2 = X86RMI_Imm(0);
1772 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1773 return Xcc_NZ;
1774 }
1775
1776 /* --- patterns rooted at: CmpNEZ64 --- */
1777
1778 /* CmpNEZ64(Or64(x,y)) */
1779 {
1780 DECLARE_PATTERN(p_CmpNEZ64_Or64);
1781 DEFINE_PATTERN(p_CmpNEZ64_Or64,
1782 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
1783 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
1784 HReg hi1, lo1, hi2, lo2;
1785 HReg tmp = newVRegI(env);
1786 iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] );
1787 addInstr(env, mk_iMOVsd_RR(hi1, tmp));
1788 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp));
1789 iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] );
1790 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp));
1791 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp));
1792 return Xcc_NZ;
1793 }
1794 }
1795
1796 /* CmpNEZ64(x) */
1797 if (e->tag == Iex_Unop
1798 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1799 HReg hi, lo;
1800 HReg tmp = newVRegI(env);
1801 iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
1802 addInstr(env, mk_iMOVsd_RR(hi, tmp));
1803 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
1804 return Xcc_NZ;
1805 }
1806
1807 /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
1808
1809 /* CmpEQ8 / CmpNE8 */
1810 if (e->tag == Iex_Binop
1811 && (e->Iex.Binop.op == Iop_CmpEQ8
1812 || e->Iex.Binop.op == Iop_CmpNE8
1813 || e->Iex.Binop.op == Iop_CasCmpEQ8
1814 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
1815 if (isZeroU8(e->Iex.Binop.arg2)) {
1816 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1817 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1)));
1818 switch (e->Iex.Binop.op) {
1819 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1820 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1821 default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
1822 }
1823 } else {
1824 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1825 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1826 HReg r = newVRegI(env);
1827 addInstr(env, mk_iMOVsd_RR(r1,r));
1828 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1829 addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r)));
1830 switch (e->Iex.Binop.op) {
1831 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
1832 case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
1833 default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
1834 }
1835 }
1836 }
1837
1838 /* CmpEQ16 / CmpNE16 */
1839 if (e->tag == Iex_Binop
1840 && (e->Iex.Binop.op == Iop_CmpEQ16
1841 || e->Iex.Binop.op == Iop_CmpNE16
1842 || e->Iex.Binop.op == Iop_CasCmpEQ16
1843 || e->Iex.Binop.op == Iop_CasCmpNE16)) {
1844 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1845 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1846 HReg r = newVRegI(env);
1847 addInstr(env, mk_iMOVsd_RR(r1,r));
1848 addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
1849 addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
1850 switch (e->Iex.Binop.op) {
1851 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Xcc_Z;
1852 case Iop_CmpNE16: case Iop_CasCmpNE16: return Xcc_NZ;
1853 default: vpanic("iselCondCode(x86): CmpXX16");
1854 }
1855 }
1856
1857 /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
1858 Saves a "movl %eax, %tmp" compared to the default route. */
1859 if (e->tag == Iex_Binop
1860 && e->Iex.Binop.op == Iop_CmpNE32
1861 && e->Iex.Binop.arg1->tag == Iex_CCall
1862 && e->Iex.Binop.arg2->tag == Iex_Const) {
1863 IRExpr* cal = e->Iex.Binop.arg1;
1864 IRExpr* con = e->Iex.Binop.arg2;
1865 /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
1866 vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */
1867 vassert(con->Iex.Const.con->tag == Ico_U32);
1868 /* Marshal args, do the call. */
1869 doHelperCall( env, False, NULL, cal->Iex.CCall.cee, cal->Iex.CCall.args );
1870 addInstr(env, X86Instr_Alu32R(Xalu_CMP,
1871 X86RMI_Imm(con->Iex.Const.con->Ico.U32),
1872 hregX86_EAX()));
1873 return Xcc_NZ;
1874 }
1875
1876 /* Cmp*32*(x,y) */
1877 if (e->tag == Iex_Binop
1878 && (e->Iex.Binop.op == Iop_CmpEQ32
1879 || e->Iex.Binop.op == Iop_CmpNE32
1880 || e->Iex.Binop.op == Iop_CmpLT32S
1881 || e->Iex.Binop.op == Iop_CmpLT32U
1882 || e->Iex.Binop.op == Iop_CmpLE32S
1883 || e->Iex.Binop.op == Iop_CmpLE32U
1884 || e->Iex.Binop.op == Iop_CasCmpEQ32
1885 || e->Iex.Binop.op == Iop_CasCmpNE32)) {
1886 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1887 X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
1888 addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
1889 switch (e->Iex.Binop.op) {
1890 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
1891 case Iop_CmpNE32: case Iop_CasCmpNE32: return Xcc_NZ;
1892 case Iop_CmpLT32S: return Xcc_L;
1893 case Iop_CmpLT32U: return Xcc_B;
1894 case Iop_CmpLE32S: return Xcc_LE;
1895 case Iop_CmpLE32U: return Xcc_BE;
1896 default: vpanic("iselCondCode(x86): CmpXX32");
1897 }
1898 }
1899
1900 /* CmpNE64 */
1901 if (e->tag == Iex_Binop
1902 && (e->Iex.Binop.op == Iop_CmpNE64
1903 || e->Iex.Binop.op == Iop_CmpEQ64)) {
1904 HReg hi1, hi2, lo1, lo2;
1905 HReg tHi = newVRegI(env);
1906 HReg tLo = newVRegI(env);
1907 iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
1908 iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
1909 addInstr(env, mk_iMOVsd_RR(hi1, tHi));
1910 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
1911 addInstr(env, mk_iMOVsd_RR(lo1, tLo));
1912 addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
1913 addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
1914 switch (e->Iex.Binop.op) {
1915 case Iop_CmpNE64: return Xcc_NZ;
1916 case Iop_CmpEQ64: return Xcc_Z;
1917 default: vpanic("iselCondCode(x86): CmpXX64");
1918 }
1919 }
1920
1921 ppIRExpr(e);
1922 vpanic("iselCondCode");
1923 }
1924
1925
1926 /*---------------------------------------------------------*/
1927 /*--- ISEL: Integer expressions (64 bit) ---*/
1928 /*---------------------------------------------------------*/
1929
1930 /* Compute a 64-bit value into a register pair, which is returned as
1931 the first two parameters. As with iselIntExpr_R, these may be
1932 either real or virtual regs; in any case they must not be changed
1933 by subsequent code emitted by the caller. */
1934
iselInt64Expr(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)1935 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1936 {
1937 iselInt64Expr_wrk(rHi, rLo, env, e);
1938 # if 0
1939 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1940 # endif
1941 vassert(hregClass(*rHi) == HRcInt32);
1942 vassert(hregIsVirtual(*rHi));
1943 vassert(hregClass(*rLo) == HRcInt32);
1944 vassert(hregIsVirtual(*rLo));
1945 }
1946
1947 /* DO NOT CALL THIS DIRECTLY ! */
iselInt64Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)1948 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1949 {
1950 MatchInfo mi;
1951 HWord fn = 0; /* helper fn for most SIMD64 stuff */
1952 vassert(e);
1953 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1954
1955 /* 64-bit literal */
1956 if (e->tag == Iex_Const) {
1957 ULong w64 = e->Iex.Const.con->Ico.U64;
1958 UInt wHi = toUInt(w64 >> 32);
1959 UInt wLo = toUInt(w64);
1960 HReg tLo = newVRegI(env);
1961 HReg tHi = newVRegI(env);
1962 vassert(e->Iex.Const.con->tag == Ico_U64);
1963 if (wLo == wHi) {
1964 /* Save a precious Int register in this special case. */
1965 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
1966 *rHi = tLo;
1967 *rLo = tLo;
1968 } else {
1969 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
1970 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
1971 *rHi = tHi;
1972 *rLo = tLo;
1973 }
1974 return;
1975 }
1976
1977 /* read 64-bit IRTemp */
1978 if (e->tag == Iex_RdTmp) {
1979 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1980 return;
1981 }
1982
1983 /* 64-bit load */
1984 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1985 HReg tLo, tHi;
1986 X86AMode *am0, *am4;
1987 vassert(e->Iex.Load.ty == Ity_I64);
1988 tLo = newVRegI(env);
1989 tHi = newVRegI(env);
1990 am0 = iselIntExpr_AMode(env, e->Iex.Load.addr);
1991 am4 = advance4(am0);
1992 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
1993 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
1994 *rHi = tHi;
1995 *rLo = tLo;
1996 return;
1997 }
1998
1999 /* 64-bit GET */
2000 if (e->tag == Iex_Get) {
2001 X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
2002 X86AMode* am4 = advance4(am);
2003 HReg tLo = newVRegI(env);
2004 HReg tHi = newVRegI(env);
2005 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2006 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2007 *rHi = tHi;
2008 *rLo = tLo;
2009 return;
2010 }
2011
2012 /* 64-bit GETI */
2013 if (e->tag == Iex_GetI) {
2014 X86AMode* am
2015 = genGuestArrayOffset( env, e->Iex.GetI.descr,
2016 e->Iex.GetI.ix, e->Iex.GetI.bias );
2017 X86AMode* am4 = advance4(am);
2018 HReg tLo = newVRegI(env);
2019 HReg tHi = newVRegI(env);
2020 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2021 addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2022 *rHi = tHi;
2023 *rLo = tLo;
2024 return;
2025 }
2026
2027 /* 64-bit Mux0X: Mux0X(g, expr, 0:I64) */
2028 if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.exprX)) {
2029 X86RM* r8;
2030 HReg e0Lo, e0Hi;
2031 HReg tLo = newVRegI(env);
2032 HReg tHi = newVRegI(env);
2033 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2034 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
2035 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
2036 addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) );
2037 addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) );
2038 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
2039 addInstr(env, X86Instr_Test32(0xFF, r8));
2040 addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tHi));
2041 addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tLo));
2042 add_to_esp(env, 4);
2043 *rHi = tHi;
2044 *rLo = tLo;
2045 return;
2046 }
2047 /* 64-bit Mux0X: Mux0X(g, 0:I64, expr) */
2048 if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.expr0)) {
2049 X86RM* r8;
2050 HReg e0Lo, e0Hi;
2051 HReg tLo = newVRegI(env);
2052 HReg tHi = newVRegI(env);
2053 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2054 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.exprX);
2055 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
2056 addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) );
2057 addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) );
2058 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
2059 addInstr(env, X86Instr_Test32(0xFF, r8));
2060 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tHi));
2061 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tLo));
2062 add_to_esp(env, 4);
2063 *rHi = tHi;
2064 *rLo = tLo;
2065 return;
2066 }
2067
2068 /* 64-bit Mux0X: Mux0X(g, expr, expr) */
2069 if (e->tag == Iex_Mux0X) {
2070 X86RM* r8;
2071 HReg e0Lo, e0Hi, eXLo, eXHi;
2072 HReg tLo = newVRegI(env);
2073 HReg tHi = newVRegI(env);
2074 iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
2075 iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX);
2076 addInstr(env, mk_iMOVsd_RR(eXHi, tHi));
2077 addInstr(env, mk_iMOVsd_RR(eXLo, tLo));
2078 r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
2079 addInstr(env, X86Instr_Test32(0xFF, r8));
2080 /* This assumes the first cmov32 doesn't trash the condition
2081 codes, so they are still available for the second cmov32 */
2082 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi));
2083 addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo));
2084 *rHi = tHi;
2085 *rLo = tLo;
2086 return;
2087 }
2088
2089 /* --------- BINARY ops --------- */
2090 if (e->tag == Iex_Binop) {
2091 switch (e->Iex.Binop.op) {
2092 /* 32 x 32 -> 64 multiply */
2093 case Iop_MullU32:
2094 case Iop_MullS32: {
2095 /* get one operand into %eax, and the other into a R/M.
2096 Need to make an educated guess about which is better in
2097 which. */
2098 HReg tLo = newVRegI(env);
2099 HReg tHi = newVRegI(env);
2100 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32);
2101 X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2102 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2103 addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX()));
2104 addInstr(env, X86Instr_MulL(syned, rmLeft));
2105 /* Result is now in EDX:EAX. Tell the caller. */
2106 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2107 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2108 *rHi = tHi;
2109 *rLo = tLo;
2110 return;
2111 }
2112
2113 /* 64 x 32 -> (32(rem),32(div)) division */
2114 case Iop_DivModU64to32:
2115 case Iop_DivModS64to32: {
2116 /* Get the 64-bit operand into edx:eax, and the other into
2117 any old R/M. */
2118 HReg sHi, sLo;
2119 HReg tLo = newVRegI(env);
2120 HReg tHi = newVRegI(env);
2121 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
2122 X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2123 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2124 addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX()));
2125 addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX()));
2126 addInstr(env, X86Instr_Div(syned, rmRight));
2127 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2128 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2129 *rHi = tHi;
2130 *rLo = tLo;
2131 return;
2132 }
2133
2134 /* Or64/And64/Xor64 */
2135 case Iop_Or64:
2136 case Iop_And64:
2137 case Iop_Xor64: {
2138 HReg xLo, xHi, yLo, yHi;
2139 HReg tLo = newVRegI(env);
2140 HReg tHi = newVRegI(env);
2141 X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2142 : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2143 : Xalu_XOR;
2144 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2145 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2146 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2147 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2148 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2149 addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2150 *rHi = tHi;
2151 *rLo = tLo;
2152 return;
2153 }
2154
2155 /* Add64/Sub64 */
2156 case Iop_Add64:
2157 if (e->Iex.Binop.arg2->tag == Iex_Const) {
2158 /* special case Add64(e, const) */
2159 ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2160 UInt wHi = toUInt(w64 >> 32);
2161 UInt wLo = toUInt(w64);
2162 HReg tLo = newVRegI(env);
2163 HReg tHi = newVRegI(env);
2164 HReg xLo, xHi;
2165 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64);
2166 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2167 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2168 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2169 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo));
2170 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi));
2171 *rHi = tHi;
2172 *rLo = tLo;
2173 return;
2174 }
2175 /* else fall through to the generic case */
2176 case Iop_Sub64: {
2177 HReg xLo, xHi, yLo, yHi;
2178 HReg tLo = newVRegI(env);
2179 HReg tHi = newVRegI(env);
2180 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2181 addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2182 addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2183 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2184 if (e->Iex.Binop.op==Iop_Add64) {
2185 addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2186 addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2187 } else {
2188 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2189 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2190 }
2191 *rHi = tHi;
2192 *rLo = tLo;
2193 return;
2194 }
2195
2196 /* 32HLto64(e1,e2) */
2197 case Iop_32HLto64:
2198 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2199 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2200 return;
2201
2202 /* 64-bit shifts */
2203 case Iop_Shl64: {
2204 /* We use the same ingenious scheme as gcc. Put the value
2205 to be shifted into %hi:%lo, and the shift amount into
2206 %cl. Then (dsts on right, a la ATT syntax):
2207
2208 shldl %cl, %lo, %hi -- make %hi be right for the
2209 -- shift amt %cl % 32
2210 shll %cl, %lo -- make %lo be right for the
2211 -- shift amt %cl % 32
2212
2213 Now, if (shift amount % 64) is in the range 32 .. 63,
2214 we have to do a fixup, which puts the result low half
2215 into the result high half, and zeroes the low half:
2216
2217 testl $32, %ecx
2218
2219 cmovnz %lo, %hi
2220 movl $0, %tmp -- sigh; need yet another reg
2221 cmovnz %tmp, %lo
2222 */
2223 HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2224 tLo = newVRegI(env);
2225 tHi = newVRegI(env);
2226 tTemp = newVRegI(env);
2227 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2228 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2229 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2230 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2231 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2232 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2233 and those regs are legitimately modifiable. */
2234 addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2235 addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo));
2236 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2237 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2238 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2239 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2240 *rHi = tHi;
2241 *rLo = tLo;
2242 return;
2243 }
2244
2245 case Iop_Shr64: {
2246 /* We use the same ingenious scheme as gcc. Put the value
2247 to be shifted into %hi:%lo, and the shift amount into
2248 %cl. Then:
2249
2250 shrdl %cl, %hi, %lo -- make %lo be right for the
2251 -- shift amt %cl % 32
2252 shrl %cl, %hi -- make %hi be right for the
2253 -- shift amt %cl % 32
2254
2255 Now, if (shift amount % 64) is in the range 32 .. 63,
2256 we have to do a fixup, which puts the result high half
2257 into the result low half, and zeroes the high half:
2258
2259 testl $32, %ecx
2260
2261 cmovnz %hi, %lo
2262 movl $0, %tmp -- sigh; need yet another reg
2263 cmovnz %tmp, %hi
2264 */
2265 HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2266 tLo = newVRegI(env);
2267 tHi = newVRegI(env);
2268 tTemp = newVRegI(env);
2269 rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2270 iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2271 addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2272 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2273 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2274 /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2275 and those regs are legitimately modifiable. */
2276 addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2277 addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi));
2278 addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
2279 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2280 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2281 addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2282 *rHi = tHi;
2283 *rLo = tLo;
2284 return;
2285 }
2286
2287 /* F64 -> I64 */
2288 /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2289 case. Unfortunately I see no easy way to avoid the
2290 duplication. */
2291 case Iop_F64toI64S: {
2292 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2293 HReg tLo = newVRegI(env);
2294 HReg tHi = newVRegI(env);
2295
2296 /* Used several times ... */
2297 /* Careful ... this sharing is only safe because
2298 zero_esp/four_esp do not hold any registers which the
2299 register allocator could attempt to swizzle later. */
2300 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2301 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2302
2303 /* rf now holds the value to be converted, and rrm holds
2304 the rounding mode value, encoded as per the
2305 IRRoundingMode enum. The first thing to do is set the
2306 FPU's rounding mode accordingly. */
2307
2308 /* Create a space for the format conversion. */
2309 /* subl $8, %esp */
2310 sub_from_esp(env, 8);
2311
2312 /* Set host rounding mode */
2313 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2314
2315 /* gistll %rf, 0(%esp) */
2316 addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2317
2318 /* movl 0(%esp), %dstLo */
2319 /* movl 4(%esp), %dstHi */
2320 addInstr(env, X86Instr_Alu32R(
2321 Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2322 addInstr(env, X86Instr_Alu32R(
2323 Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2324
2325 /* Restore default FPU rounding. */
2326 set_FPU_rounding_default( env );
2327
2328 /* addl $8, %esp */
2329 add_to_esp(env, 8);
2330
2331 *rHi = tHi;
2332 *rLo = tLo;
2333 return;
2334 }
2335
2336 case Iop_Add8x8:
2337 fn = (HWord)h_generic_calc_Add8x8; goto binnish;
2338 case Iop_Add16x4:
2339 fn = (HWord)h_generic_calc_Add16x4; goto binnish;
2340 case Iop_Add32x2:
2341 fn = (HWord)h_generic_calc_Add32x2; goto binnish;
2342
2343 case Iop_Avg8Ux8:
2344 fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
2345 case Iop_Avg16Ux4:
2346 fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
2347
2348 case Iop_CmpEQ8x8:
2349 fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
2350 case Iop_CmpEQ16x4:
2351 fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
2352 case Iop_CmpEQ32x2:
2353 fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
2354
2355 case Iop_CmpGT8Sx8:
2356 fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
2357 case Iop_CmpGT16Sx4:
2358 fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
2359 case Iop_CmpGT32Sx2:
2360 fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
2361
2362 case Iop_InterleaveHI8x8:
2363 fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
2364 case Iop_InterleaveLO8x8:
2365 fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
2366 case Iop_InterleaveHI16x4:
2367 fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
2368 case Iop_InterleaveLO16x4:
2369 fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
2370 case Iop_InterleaveHI32x2:
2371 fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
2372 case Iop_InterleaveLO32x2:
2373 fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
2374 case Iop_CatOddLanes16x4:
2375 fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
2376 case Iop_CatEvenLanes16x4:
2377 fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
2378 case Iop_Perm8x8:
2379 fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
2380
2381 case Iop_Max8Ux8:
2382 fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
2383 case Iop_Max16Sx4:
2384 fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
2385 case Iop_Min8Ux8:
2386 fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
2387 case Iop_Min16Sx4:
2388 fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
2389
2390 case Iop_Mul16x4:
2391 fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
2392 case Iop_Mul32x2:
2393 fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
2394 case Iop_MulHi16Sx4:
2395 fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
2396 case Iop_MulHi16Ux4:
2397 fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
2398
2399 case Iop_QAdd8Sx8:
2400 fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
2401 case Iop_QAdd16Sx4:
2402 fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
2403 case Iop_QAdd8Ux8:
2404 fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
2405 case Iop_QAdd16Ux4:
2406 fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
2407
2408 case Iop_QNarrowBin32Sto16Sx4:
2409 fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish;
2410 case Iop_QNarrowBin16Sto8Sx8:
2411 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish;
2412 case Iop_QNarrowBin16Sto8Ux8:
2413 fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish;
2414 case Iop_NarrowBin16to8x8:
2415 fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish;
2416 case Iop_NarrowBin32to16x4:
2417 fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish;
2418
2419 case Iop_QSub8Sx8:
2420 fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
2421 case Iop_QSub16Sx4:
2422 fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
2423 case Iop_QSub8Ux8:
2424 fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
2425 case Iop_QSub16Ux4:
2426 fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
2427
2428 case Iop_Sub8x8:
2429 fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
2430 case Iop_Sub16x4:
2431 fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
2432 case Iop_Sub32x2:
2433 fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
2434
2435 binnish: {
2436 /* Note: the following assumes all helpers are of
2437 signature
2438 ULong fn ( ULong, ULong ), and they are
2439 not marked as regparm functions.
2440 */
2441 HReg xLo, xHi, yLo, yHi;
2442 HReg tLo = newVRegI(env);
2443 HReg tHi = newVRegI(env);
2444 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2445 addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
2446 addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
2447 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2448 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2449 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2450 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2451 add_to_esp(env, 4*4);
2452 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2453 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2454 *rHi = tHi;
2455 *rLo = tLo;
2456 return;
2457 }
2458
2459 case Iop_ShlN32x2:
2460 fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
2461 case Iop_ShlN16x4:
2462 fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
2463 case Iop_ShlN8x8:
2464 fn = (HWord)h_generic_calc_ShlN8x8; goto shifty;
2465 case Iop_ShrN32x2:
2466 fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
2467 case Iop_ShrN16x4:
2468 fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
2469 case Iop_SarN32x2:
2470 fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
2471 case Iop_SarN16x4:
2472 fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
2473 case Iop_SarN8x8:
2474 fn = (HWord)h_generic_calc_SarN8x8; goto shifty;
2475 shifty: {
2476 /* Note: the following assumes all helpers are of
2477 signature
2478 ULong fn ( ULong, UInt ), and they are
2479 not marked as regparm functions.
2480 */
2481 HReg xLo, xHi;
2482 HReg tLo = newVRegI(env);
2483 HReg tHi = newVRegI(env);
2484 X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2485 addInstr(env, X86Instr_Push(y));
2486 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2487 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2488 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2489 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2490 add_to_esp(env, 3*4);
2491 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2492 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2493 *rHi = tHi;
2494 *rLo = tLo;
2495 return;
2496 }
2497
2498 default:
2499 break;
2500 }
2501 } /* if (e->tag == Iex_Binop) */
2502
2503
2504 /* --------- UNARY ops --------- */
2505 if (e->tag == Iex_Unop) {
2506 switch (e->Iex.Unop.op) {
2507
2508 /* 32Sto64(e) */
2509 case Iop_32Sto64: {
2510 HReg tLo = newVRegI(env);
2511 HReg tHi = newVRegI(env);
2512 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2513 addInstr(env, mk_iMOVsd_RR(src,tHi));
2514 addInstr(env, mk_iMOVsd_RR(src,tLo));
2515 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi));
2516 *rHi = tHi;
2517 *rLo = tLo;
2518 return;
2519 }
2520
2521 /* 32Uto64(e) */
2522 case Iop_32Uto64: {
2523 HReg tLo = newVRegI(env);
2524 HReg tHi = newVRegI(env);
2525 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2526 addInstr(env, mk_iMOVsd_RR(src,tLo));
2527 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2528 *rHi = tHi;
2529 *rLo = tLo;
2530 return;
2531 }
2532
2533 /* 16Uto64(e) */
2534 case Iop_16Uto64: {
2535 HReg tLo = newVRegI(env);
2536 HReg tHi = newVRegI(env);
2537 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2538 addInstr(env, mk_iMOVsd_RR(src,tLo));
2539 addInstr(env, X86Instr_Alu32R(Xalu_AND,
2540 X86RMI_Imm(0xFFFF), tLo));
2541 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2542 *rHi = tHi;
2543 *rLo = tLo;
2544 return;
2545 }
2546
2547 /* V128{HI}to64 */
2548 case Iop_V128HIto64:
2549 case Iop_V128to64: {
2550 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
2551 HReg tLo = newVRegI(env);
2552 HReg tHi = newVRegI(env);
2553 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
2554 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
2555 X86AMode* espLO = X86AMode_IR(off, hregX86_ESP());
2556 X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP());
2557 sub_from_esp(env, 16);
2558 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
2559 addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2560 X86RMI_Mem(espLO), tLo ));
2561 addInstr(env, X86Instr_Alu32R( Xalu_MOV,
2562 X86RMI_Mem(espHI), tHi ));
2563 add_to_esp(env, 16);
2564 *rHi = tHi;
2565 *rLo = tLo;
2566 return;
2567 }
2568
2569 /* could do better than this, but for now ... */
2570 case Iop_1Sto64: {
2571 HReg tLo = newVRegI(env);
2572 HReg tHi = newVRegI(env);
2573 X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2574 addInstr(env, X86Instr_Set32(cond,tLo));
2575 addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo));
2576 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo));
2577 addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2578 *rHi = tHi;
2579 *rLo = tLo;
2580 return;
2581 }
2582
2583 /* Not64(e) */
2584 case Iop_Not64: {
2585 HReg tLo = newVRegI(env);
2586 HReg tHi = newVRegI(env);
2587 HReg sHi, sLo;
2588 iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2589 addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2590 addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2591 addInstr(env, X86Instr_Unary32(Xun_NOT,tHi));
2592 addInstr(env, X86Instr_Unary32(Xun_NOT,tLo));
2593 *rHi = tHi;
2594 *rLo = tLo;
2595 return;
2596 }
2597
2598 /* Left64(e) */
2599 case Iop_Left64: {
2600 HReg yLo, yHi;
2601 HReg tLo = newVRegI(env);
2602 HReg tHi = newVRegI(env);
2603 /* yHi:yLo = arg */
2604 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2605 /* tLo = 0 - yLo, and set carry */
2606 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo));
2607 addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2608 /* tHi = 0 - yHi - carry */
2609 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2610 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2611 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2612 back in, so as to give the final result
2613 tHi:tLo = arg | -arg. */
2614 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo));
2615 addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi));
2616 *rHi = tHi;
2617 *rLo = tLo;
2618 return;
2619 }
2620
2621 /* --- patterns rooted at: CmpwNEZ64 --- */
2622
2623 /* CmpwNEZ64(e) */
2624 case Iop_CmpwNEZ64: {
2625
2626 DECLARE_PATTERN(p_CmpwNEZ64_Or64);
2627 DEFINE_PATTERN(p_CmpwNEZ64_Or64,
2628 unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1))));
2629 if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) {
2630 /* CmpwNEZ64(Or64(x,y)) */
2631 HReg xHi,xLo,yHi,yLo;
2632 HReg xBoth = newVRegI(env);
2633 HReg merged = newVRegI(env);
2634 HReg tmp2 = newVRegI(env);
2635
2636 iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]);
2637 addInstr(env, mk_iMOVsd_RR(xHi,xBoth));
2638 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2639 X86RMI_Reg(xLo),xBoth));
2640
2641 iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]);
2642 addInstr(env, mk_iMOVsd_RR(yHi,merged));
2643 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2644 X86RMI_Reg(yLo),merged));
2645 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2646 X86RMI_Reg(xBoth),merged));
2647
2648 /* tmp2 = (merged | -merged) >>s 31 */
2649 addInstr(env, mk_iMOVsd_RR(merged,tmp2));
2650 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2651 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2652 X86RMI_Reg(merged), tmp2));
2653 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2654 *rHi = tmp2;
2655 *rLo = tmp2;
2656 return;
2657 } else {
2658 /* CmpwNEZ64(e) */
2659 HReg srcLo, srcHi;
2660 HReg tmp1 = newVRegI(env);
2661 HReg tmp2 = newVRegI(env);
2662 /* srcHi:srcLo = arg */
2663 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2664 /* tmp1 = srcHi | srcLo */
2665 addInstr(env, mk_iMOVsd_RR(srcHi,tmp1));
2666 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2667 X86RMI_Reg(srcLo), tmp1));
2668 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2669 addInstr(env, mk_iMOVsd_RR(tmp1,tmp2));
2670 addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
2671 addInstr(env, X86Instr_Alu32R(Xalu_OR,
2672 X86RMI_Reg(tmp1), tmp2));
2673 addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
2674 *rHi = tmp2;
2675 *rLo = tmp2;
2676 return;
2677 }
2678 }
2679
2680 /* ReinterpF64asI64(e) */
2681 /* Given an IEEE754 double, produce an I64 with the same bit
2682 pattern. */
2683 case Iop_ReinterpF64asI64: {
2684 HReg rf = iselDblExpr(env, e->Iex.Unop.arg);
2685 HReg tLo = newVRegI(env);
2686 HReg tHi = newVRegI(env);
2687 X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2688 X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2689 /* paranoia */
2690 set_FPU_rounding_default(env);
2691 /* subl $8, %esp */
2692 sub_from_esp(env, 8);
2693 /* gstD %rf, 0(%esp) */
2694 addInstr(env,
2695 X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp));
2696 /* movl 0(%esp), %tLo */
2697 addInstr(env,
2698 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2699 /* movl 4(%esp), %tHi */
2700 addInstr(env,
2701 X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2702 /* addl $8, %esp */
2703 add_to_esp(env, 8);
2704 *rHi = tHi;
2705 *rLo = tLo;
2706 return;
2707 }
2708
2709 case Iop_CmpNEZ32x2:
2710 fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
2711 case Iop_CmpNEZ16x4:
2712 fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
2713 case Iop_CmpNEZ8x8:
2714 fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
2715 unish: {
2716 /* Note: the following assumes all helpers are of
2717 signature
2718 ULong fn ( ULong ), and they are
2719 not marked as regparm functions.
2720 */
2721 HReg xLo, xHi;
2722 HReg tLo = newVRegI(env);
2723 HReg tHi = newVRegI(env);
2724 iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
2725 addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
2726 addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
2727 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
2728 add_to_esp(env, 2*4);
2729 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2730 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2731 *rHi = tHi;
2732 *rLo = tLo;
2733 return;
2734 }
2735
2736 default:
2737 break;
2738 }
2739 } /* if (e->tag == Iex_Unop) */
2740
2741
2742 /* --------- CCALL --------- */
2743 if (e->tag == Iex_CCall) {
2744 HReg tLo = newVRegI(env);
2745 HReg tHi = newVRegI(env);
2746
2747 /* Marshal args, do the call, clear stack. */
2748 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
2749
2750 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2751 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2752 *rHi = tHi;
2753 *rLo = tLo;
2754 return;
2755 }
2756
2757 ppIRExpr(e);
2758 vpanic("iselInt64Expr");
2759 }
2760
2761
2762 /*---------------------------------------------------------*/
2763 /*--- ISEL: Floating point expressions (32 bit) ---*/
2764 /*---------------------------------------------------------*/
2765
2766 /* Nothing interesting here; really just wrappers for
2767 64-bit stuff. */
2768
iselFltExpr(ISelEnv * env,IRExpr * e)2769 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2770 {
2771 HReg r = iselFltExpr_wrk( env, e );
2772 # if 0
2773 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2774 # endif
2775 vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
2776 vassert(hregIsVirtual(r));
2777 return r;
2778 }
2779
2780 /* DO NOT CALL THIS DIRECTLY */
iselFltExpr_wrk(ISelEnv * env,IRExpr * e)2781 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2782 {
2783 IRType ty = typeOfIRExpr(env->type_env,e);
2784 vassert(ty == Ity_F32);
2785
2786 if (e->tag == Iex_RdTmp) {
2787 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2788 }
2789
2790 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2791 X86AMode* am;
2792 HReg res = newVRegF(env);
2793 vassert(e->Iex.Load.ty == Ity_F32);
2794 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2795 addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am));
2796 return res;
2797 }
2798
2799 if (e->tag == Iex_Binop
2800 && e->Iex.Binop.op == Iop_F64toF32) {
2801 /* Although the result is still held in a standard FPU register,
2802 we need to round it to reflect the loss of accuracy/range
2803 entailed in casting it to a 32-bit float. */
2804 HReg dst = newVRegF(env);
2805 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2806 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2807 addInstr(env, X86Instr_Fp64to32(src,dst));
2808 set_FPU_rounding_default( env );
2809 return dst;
2810 }
2811
2812 if (e->tag == Iex_Get) {
2813 X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2814 hregX86_EBP() );
2815 HReg res = newVRegF(env);
2816 addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am ));
2817 return res;
2818 }
2819
2820 if (e->tag == Iex_Unop
2821 && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2822 /* Given an I32, produce an IEEE754 float with the same bit
2823 pattern. */
2824 HReg dst = newVRegF(env);
2825 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
2826 /* paranoia */
2827 addInstr(env, X86Instr_Push(rmi));
2828 addInstr(env, X86Instr_FpLdSt(
2829 True/*load*/, 4, dst,
2830 X86AMode_IR(0, hregX86_ESP())));
2831 add_to_esp(env, 4);
2832 return dst;
2833 }
2834
2835 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
2836 HReg rf = iselFltExpr(env, e->Iex.Binop.arg2);
2837 HReg dst = newVRegF(env);
2838
2839 /* rf now holds the value to be rounded. The first thing to do
2840 is set the FPU's rounding mode accordingly. */
2841
2842 /* Set host rounding mode */
2843 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2844
2845 /* grndint %rf, %dst */
2846 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
2847
2848 /* Restore default FPU rounding. */
2849 set_FPU_rounding_default( env );
2850
2851 return dst;
2852 }
2853
2854 ppIRExpr(e);
2855 vpanic("iselFltExpr_wrk");
2856 }
2857
2858
2859 /*---------------------------------------------------------*/
2860 /*--- ISEL: Floating point expressions (64 bit) ---*/
2861 /*---------------------------------------------------------*/
2862
2863 /* Compute a 64-bit floating point value into a register, the identity
2864 of which is returned. As with iselIntExpr_R, the reg may be either
2865 real or virtual; in any case it must not be changed by subsequent
2866 code emitted by the caller. */
2867
2868 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2869
2870 Type S (1 bit) E (11 bits) F (52 bits)
2871 ---- --------- ----------- -----------
2872 signalling NaN u 2047 (max) .0uuuuu---u
2873 (with at least
2874 one 1 bit)
2875 quiet NaN u 2047 (max) .1uuuuu---u
2876
2877 negative infinity 1 2047 (max) .000000---0
2878
2879 positive infinity 0 2047 (max) .000000---0
2880
2881 negative zero 1 0 .000000---0
2882
2883 positive zero 0 0 .000000---0
2884 */
2885
iselDblExpr(ISelEnv * env,IRExpr * e)2886 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2887 {
2888 HReg r = iselDblExpr_wrk( env, e );
2889 # if 0
2890 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2891 # endif
2892 vassert(hregClass(r) == HRcFlt64);
2893 vassert(hregIsVirtual(r));
2894 return r;
2895 }
2896
2897 /* DO NOT CALL THIS DIRECTLY */
iselDblExpr_wrk(ISelEnv * env,IRExpr * e)2898 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2899 {
2900 IRType ty = typeOfIRExpr(env->type_env,e);
2901 vassert(e);
2902 vassert(ty == Ity_F64);
2903
2904 if (e->tag == Iex_RdTmp) {
2905 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2906 }
2907
2908 if (e->tag == Iex_Const) {
2909 union { UInt u32x2[2]; ULong u64; Double f64; } u;
2910 HReg freg = newVRegF(env);
2911 vassert(sizeof(u) == 8);
2912 vassert(sizeof(u.u64) == 8);
2913 vassert(sizeof(u.f64) == 8);
2914 vassert(sizeof(u.u32x2) == 8);
2915
2916 if (e->Iex.Const.con->tag == Ico_F64) {
2917 u.f64 = e->Iex.Const.con->Ico.F64;
2918 }
2919 else if (e->Iex.Const.con->tag == Ico_F64i) {
2920 u.u64 = e->Iex.Const.con->Ico.F64i;
2921 }
2922 else
2923 vpanic("iselDblExpr(x86): const");
2924
2925 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1])));
2926 addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0])));
2927 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg,
2928 X86AMode_IR(0, hregX86_ESP())));
2929 add_to_esp(env, 8);
2930 return freg;
2931 }
2932
2933 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2934 X86AMode* am;
2935 HReg res = newVRegF(env);
2936 vassert(e->Iex.Load.ty == Ity_F64);
2937 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2938 addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am));
2939 return res;
2940 }
2941
2942 if (e->tag == Iex_Get) {
2943 X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
2944 hregX86_EBP() );
2945 HReg res = newVRegF(env);
2946 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
2947 return res;
2948 }
2949
2950 if (e->tag == Iex_GetI) {
2951 X86AMode* am
2952 = genGuestArrayOffset(
2953 env, e->Iex.GetI.descr,
2954 e->Iex.GetI.ix, e->Iex.GetI.bias );
2955 HReg res = newVRegF(env);
2956 addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
2957 return res;
2958 }
2959
2960 if (e->tag == Iex_Triop) {
2961 X86FpOp fpop = Xfp_INVALID;
2962 IRTriop *triop = e->Iex.Triop.details;
2963 switch (triop->op) {
2964 case Iop_AddF64: fpop = Xfp_ADD; break;
2965 case Iop_SubF64: fpop = Xfp_SUB; break;
2966 case Iop_MulF64: fpop = Xfp_MUL; break;
2967 case Iop_DivF64: fpop = Xfp_DIV; break;
2968 case Iop_ScaleF64: fpop = Xfp_SCALE; break;
2969 case Iop_Yl2xF64: fpop = Xfp_YL2X; break;
2970 case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
2971 case Iop_AtanF64: fpop = Xfp_ATAN; break;
2972 case Iop_PRemF64: fpop = Xfp_PREM; break;
2973 case Iop_PRem1F64: fpop = Xfp_PREM1; break;
2974 default: break;
2975 }
2976 if (fpop != Xfp_INVALID) {
2977 HReg res = newVRegF(env);
2978 HReg srcL = iselDblExpr(env, triop->arg2);
2979 HReg srcR = iselDblExpr(env, triop->arg3);
2980 /* XXXROUNDINGFIXME */
2981 /* set roundingmode here */
2982 addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
2983 if (fpop != Xfp_ADD && fpop != Xfp_SUB
2984 && fpop != Xfp_MUL && fpop != Xfp_DIV)
2985 roundToF64(env, res);
2986 return res;
2987 }
2988 }
2989
2990 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
2991 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2992 HReg dst = newVRegF(env);
2993
2994 /* rf now holds the value to be rounded. The first thing to do
2995 is set the FPU's rounding mode accordingly. */
2996
2997 /* Set host rounding mode */
2998 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2999
3000 /* grndint %rf, %dst */
3001 addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
3002
3003 /* Restore default FPU rounding. */
3004 set_FPU_rounding_default( env );
3005
3006 return dst;
3007 }
3008
3009 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
3010 HReg dst = newVRegF(env);
3011 HReg rHi,rLo;
3012 iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2);
3013 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3014 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3015
3016 /* Set host rounding mode */
3017 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
3018
3019 addInstr(env, X86Instr_FpLdStI(
3020 True/*load*/, 8, dst,
3021 X86AMode_IR(0, hregX86_ESP())));
3022
3023 /* Restore default FPU rounding. */
3024 set_FPU_rounding_default( env );
3025
3026 add_to_esp(env, 8);
3027 return dst;
3028 }
3029
3030 if (e->tag == Iex_Binop) {
3031 X86FpOp fpop = Xfp_INVALID;
3032 switch (e->Iex.Binop.op) {
3033 case Iop_SinF64: fpop = Xfp_SIN; break;
3034 case Iop_CosF64: fpop = Xfp_COS; break;
3035 case Iop_TanF64: fpop = Xfp_TAN; break;
3036 case Iop_2xm1F64: fpop = Xfp_2XM1; break;
3037 case Iop_SqrtF64: fpop = Xfp_SQRT; break;
3038 default: break;
3039 }
3040 if (fpop != Xfp_INVALID) {
3041 HReg res = newVRegF(env);
3042 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3043 /* XXXROUNDINGFIXME */
3044 /* set roundingmode here */
3045 addInstr(env, X86Instr_FpUnary(fpop,src,res));
3046 if (fpop != Xfp_SQRT
3047 && fpop != Xfp_NEG && fpop != Xfp_ABS)
3048 roundToF64(env, res);
3049 return res;
3050 }
3051 }
3052
3053 if (e->tag == Iex_Unop) {
3054 X86FpOp fpop = Xfp_INVALID;
3055 switch (e->Iex.Unop.op) {
3056 case Iop_NegF64: fpop = Xfp_NEG; break;
3057 case Iop_AbsF64: fpop = Xfp_ABS; break;
3058 default: break;
3059 }
3060 if (fpop != Xfp_INVALID) {
3061 HReg res = newVRegF(env);
3062 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3063 addInstr(env, X86Instr_FpUnary(fpop,src,res));
3064 if (fpop != Xfp_NEG && fpop != Xfp_ABS)
3065 roundToF64(env, res);
3066 return res;
3067 }
3068 }
3069
3070 if (e->tag == Iex_Unop) {
3071 switch (e->Iex.Unop.op) {
3072 case Iop_I32StoF64: {
3073 HReg dst = newVRegF(env);
3074 HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
3075 addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3076 set_FPU_rounding_default(env);
3077 addInstr(env, X86Instr_FpLdStI(
3078 True/*load*/, 4, dst,
3079 X86AMode_IR(0, hregX86_ESP())));
3080 add_to_esp(env, 4);
3081 return dst;
3082 }
3083 case Iop_ReinterpI64asF64: {
3084 /* Given an I64, produce an IEEE754 double with the same
3085 bit pattern. */
3086 HReg dst = newVRegF(env);
3087 HReg rHi, rLo;
3088 iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg);
3089 /* paranoia */
3090 set_FPU_rounding_default(env);
3091 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3092 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3093 addInstr(env, X86Instr_FpLdSt(
3094 True/*load*/, 8, dst,
3095 X86AMode_IR(0, hregX86_ESP())));
3096 add_to_esp(env, 8);
3097 return dst;
3098 }
3099 case Iop_F32toF64: {
3100 /* this is a no-op */
3101 HReg res = iselFltExpr(env, e->Iex.Unop.arg);
3102 return res;
3103 }
3104 default:
3105 break;
3106 }
3107 }
3108
3109 /* --------- MULTIPLEX --------- */
3110 if (e->tag == Iex_Mux0X) {
3111 if (ty == Ity_F64
3112 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
3113 X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
3114 HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
3115 HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
3116 HReg dst = newVRegF(env);
3117 addInstr(env, X86Instr_FpUnary(Xfp_MOV,rX,dst));
3118 addInstr(env, X86Instr_Test32(0xFF, r8));
3119 addInstr(env, X86Instr_FpCMov(Xcc_Z,r0,dst));
3120 return dst;
3121 }
3122 }
3123
3124 ppIRExpr(e);
3125 vpanic("iselDblExpr_wrk");
3126 }
3127
3128
3129 /*---------------------------------------------------------*/
3130 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3131 /*---------------------------------------------------------*/
3132
iselVecExpr(ISelEnv * env,IRExpr * e)3133 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
3134 {
3135 HReg r = iselVecExpr_wrk( env, e );
3136 # if 0
3137 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3138 # endif
3139 vassert(hregClass(r) == HRcVec128);
3140 vassert(hregIsVirtual(r));
3141 return r;
3142 }
3143
3144
3145 /* DO NOT CALL THIS DIRECTLY */
iselVecExpr_wrk(ISelEnv * env,IRExpr * e)3146 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
3147 {
3148
3149 # define REQUIRE_SSE1 \
3150 do { if (env->hwcaps == 0/*baseline, no sse*/) \
3151 goto vec_fail; \
3152 } while (0)
3153
3154 # define REQUIRE_SSE2 \
3155 do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \
3156 goto vec_fail; \
3157 } while (0)
3158
3159 # define SSE2_OR_ABOVE \
3160 (env->hwcaps & VEX_HWCAPS_X86_SSE2)
3161
3162 HWord fn = 0; /* address of helper fn, if required */
3163 MatchInfo mi;
3164 Bool arg1isEReg = False;
3165 X86SseOp op = Xsse_INVALID;
3166 IRType ty = typeOfIRExpr(env->type_env,e);
3167 vassert(e);
3168 vassert(ty == Ity_V128);
3169
3170 REQUIRE_SSE1;
3171
3172 if (e->tag == Iex_RdTmp) {
3173 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3174 }
3175
3176 if (e->tag == Iex_Get) {
3177 HReg dst = newVRegV(env);
3178 addInstr(env, X86Instr_SseLdSt(
3179 True/*load*/,
3180 dst,
3181 X86AMode_IR(e->Iex.Get.offset, hregX86_EBP())
3182 )
3183 );
3184 return dst;
3185 }
3186
3187 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3188 HReg dst = newVRegV(env);
3189 X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3190 addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
3191 return dst;
3192 }
3193
3194 if (e->tag == Iex_Const) {
3195 HReg dst = newVRegV(env);
3196 vassert(e->Iex.Const.con->tag == Ico_V128);
3197 addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
3198 return dst;
3199 }
3200
3201 if (e->tag == Iex_Unop) {
3202
3203 if (SSE2_OR_ABOVE) {
3204 /* 64UtoV128(LDle:I64(addr)) */
3205 DECLARE_PATTERN(p_zwiden_load64);
3206 DEFINE_PATTERN(p_zwiden_load64,
3207 unop(Iop_64UtoV128,
3208 IRExpr_Load(Iend_LE,Ity_I64,bind(0))));
3209 if (matchIRExpr(&mi, p_zwiden_load64, e)) {
3210 X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]);
3211 HReg dst = newVRegV(env);
3212 addInstr(env, X86Instr_SseLdzLO(8, dst, am));
3213 return dst;
3214 }
3215 }
3216
3217 switch (e->Iex.Unop.op) {
3218
3219 case Iop_NotV128: {
3220 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3221 return do_sse_Not128(env, arg);
3222 }
3223
3224 case Iop_CmpNEZ64x2: {
3225 /* We can use SSE2 instructions for this. */
3226 /* Ideally, we want to do a 64Ix2 comparison against zero of
3227 the operand. Problem is no such insn exists. Solution
3228 therefore is to do a 32Ix4 comparison instead, and bitwise-
3229 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3230 let the not'd result of this initial comparison be a:b:c:d.
3231 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3232 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3233 giving the required result.
3234
3235 The required selection sequence is 2,3,0,1, which
3236 according to Intel's documentation means the pshufd
3237 literal value is 0xB1, that is,
3238 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3239 */
3240 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3241 HReg tmp = newVRegV(env);
3242 HReg dst = newVRegV(env);
3243 REQUIRE_SSE2;
3244 addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
3245 addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
3246 tmp = do_sse_Not128(env, tmp);
3247 addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
3248 addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
3249 return dst;
3250 }
3251
3252 case Iop_CmpNEZ32x4: {
3253 /* Sigh, we have to generate lousy code since this has to
3254 work on SSE1 hosts */
3255 /* basically, the idea is: for each lane:
3256 movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1)
3257 sbbl %r, %r (now %r = 1Sto32(CF))
3258 movl %r, lane
3259 */
3260 Int i;
3261 X86AMode* am;
3262 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3263 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3264 HReg dst = newVRegV(env);
3265 HReg r32 = newVRegI(env);
3266 sub_from_esp(env, 16);
3267 addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
3268 for (i = 0; i < 4; i++) {
3269 am = X86AMode_IR(i*4, hregX86_ESP());
3270 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
3271 addInstr(env, X86Instr_Unary32(Xun_NEG, r32));
3272 addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
3273 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
3274 }
3275 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3276 add_to_esp(env, 16);
3277 return dst;
3278 }
3279
3280 case Iop_CmpNEZ8x16:
3281 case Iop_CmpNEZ16x8: {
3282 /* We can use SSE2 instructions for this. */
3283 HReg arg;
3284 HReg vec0 = newVRegV(env);
3285 HReg vec1 = newVRegV(env);
3286 HReg dst = newVRegV(env);
3287 X86SseOp cmpOp
3288 = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
3289 : Xsse_CMPEQ8;
3290 REQUIRE_SSE2;
3291 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
3292 addInstr(env, mk_vMOVsd_RR(vec0, vec1));
3293 addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
3294 /* defer arg computation to here so as to give CMPEQF as long
3295 as possible to complete */
3296 arg = iselVecExpr(env, e->Iex.Unop.arg);
3297 /* vec0 is all 0s; vec1 is all 1s */
3298 addInstr(env, mk_vMOVsd_RR(arg, dst));
3299 /* 16x8 or 8x16 comparison == */
3300 addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
3301 /* invert result */
3302 addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
3303 return dst;
3304 }
3305
3306 case Iop_Recip32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary;
3307 case Iop_RSqrt32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
3308 case Iop_Sqrt32Fx4: op = Xsse_SQRTF; goto do_32Fx4_unary;
3309 do_32Fx4_unary:
3310 {
3311 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3312 HReg dst = newVRegV(env);
3313 addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
3314 return dst;
3315 }
3316
3317 case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary;
3318 case Iop_RSqrt64Fx2: op = Xsse_RSQRTF; goto do_64Fx2_unary;
3319 case Iop_Sqrt64Fx2: op = Xsse_SQRTF; goto do_64Fx2_unary;
3320 do_64Fx2_unary:
3321 {
3322 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3323 HReg dst = newVRegV(env);
3324 REQUIRE_SSE2;
3325 addInstr(env, X86Instr_Sse64Fx2(op, arg, dst));
3326 return dst;
3327 }
3328
3329 case Iop_Recip32F0x4: op = Xsse_RCPF; goto do_32F0x4_unary;
3330 case Iop_RSqrt32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
3331 case Iop_Sqrt32F0x4: op = Xsse_SQRTF; goto do_32F0x4_unary;
3332 do_32F0x4_unary:
3333 {
3334 /* A bit subtle. We have to copy the arg to the result
3335 register first, because actually doing the SSE scalar insn
3336 leaves the upper 3/4 of the destination register
3337 unchanged. Whereas the required semantics of these
3338 primops is that the upper 3/4 is simply copied in from the
3339 argument. */
3340 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3341 HReg dst = newVRegV(env);
3342 addInstr(env, mk_vMOVsd_RR(arg, dst));
3343 addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
3344 return dst;
3345 }
3346
3347 case Iop_Recip64F0x2: op = Xsse_RCPF; goto do_64F0x2_unary;
3348 case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary;
3349 case Iop_Sqrt64F0x2: op = Xsse_SQRTF; goto do_64F0x2_unary;
3350 do_64F0x2_unary:
3351 {
3352 /* A bit subtle. We have to copy the arg to the result
3353 register first, because actually doing the SSE scalar insn
3354 leaves the upper half of the destination register
3355 unchanged. Whereas the required semantics of these
3356 primops is that the upper half is simply copied in from the
3357 argument. */
3358 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3359 HReg dst = newVRegV(env);
3360 REQUIRE_SSE2;
3361 addInstr(env, mk_vMOVsd_RR(arg, dst));
3362 addInstr(env, X86Instr_Sse64FLo(op, arg, dst));
3363 return dst;
3364 }
3365
3366 case Iop_32UtoV128: {
3367 HReg dst = newVRegV(env);
3368 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3369 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3370 addInstr(env, X86Instr_Push(rmi));
3371 addInstr(env, X86Instr_SseLdzLO(4, dst, esp0));
3372 add_to_esp(env, 4);
3373 return dst;
3374 }
3375
3376 case Iop_64UtoV128: {
3377 HReg rHi, rLo;
3378 HReg dst = newVRegV(env);
3379 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3380 iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
3381 addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
3382 addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
3383 addInstr(env, X86Instr_SseLdzLO(8, dst, esp0));
3384 add_to_esp(env, 8);
3385 return dst;
3386 }
3387
3388 default:
3389 break;
3390 } /* switch (e->Iex.Unop.op) */
3391 } /* if (e->tag == Iex_Unop) */
3392
3393 if (e->tag == Iex_Binop) {
3394 switch (e->Iex.Binop.op) {
3395
3396 case Iop_SetV128lo32: {
3397 HReg dst = newVRegV(env);
3398 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3399 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3400 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3401 sub_from_esp(env, 16);
3402 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3403 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0));
3404 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3405 add_to_esp(env, 16);
3406 return dst;
3407 }
3408
3409 case Iop_SetV128lo64: {
3410 HReg dst = newVRegV(env);
3411 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3412 HReg srcIhi, srcIlo;
3413 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3414 X86AMode* esp4 = advance4(esp0);
3415 iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2);
3416 sub_from_esp(env, 16);
3417 addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
3418 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0));
3419 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4));
3420 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3421 add_to_esp(env, 16);
3422 return dst;
3423 }
3424
3425 case Iop_64HLtoV128: {
3426 HReg r3, r2, r1, r0;
3427 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3428 X86AMode* esp4 = advance4(esp0);
3429 X86AMode* esp8 = advance4(esp4);
3430 X86AMode* esp12 = advance4(esp8);
3431 HReg dst = newVRegV(env);
3432 /* do this via the stack (easy, convenient, etc) */
3433 sub_from_esp(env, 16);
3434 /* Do the less significant 64 bits */
3435 iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
3436 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0));
3437 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4));
3438 /* Do the more significant 64 bits */
3439 iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
3440 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8));
3441 addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12));
3442 /* Fetch result back from stack. */
3443 addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
3444 add_to_esp(env, 16);
3445 return dst;
3446 }
3447
3448 case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
3449 case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
3450 case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
3451 case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4;
3452 case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4;
3453 case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4;
3454 case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4;
3455 case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4;
3456 case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4;
3457 case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4;
3458 do_32Fx4:
3459 {
3460 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3461 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3462 HReg dst = newVRegV(env);
3463 addInstr(env, mk_vMOVsd_RR(argL, dst));
3464 addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
3465 return dst;
3466 }
3467
3468 case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
3469 case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
3470 case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
3471 case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2;
3472 case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2;
3473 case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2;
3474 case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2;
3475 case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2;
3476 case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2;
3477 case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2;
3478 do_64Fx2:
3479 {
3480 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3481 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3482 HReg dst = newVRegV(env);
3483 REQUIRE_SSE2;
3484 addInstr(env, mk_vMOVsd_RR(argL, dst));
3485 addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
3486 return dst;
3487 }
3488
3489 case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
3490 case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4;
3491 case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
3492 case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4;
3493 case Iop_Add32F0x4: op = Xsse_ADDF; goto do_32F0x4;
3494 case Iop_Div32F0x4: op = Xsse_DIVF; goto do_32F0x4;
3495 case Iop_Max32F0x4: op = Xsse_MAXF; goto do_32F0x4;
3496 case Iop_Min32F0x4: op = Xsse_MINF; goto do_32F0x4;
3497 case Iop_Mul32F0x4: op = Xsse_MULF; goto do_32F0x4;
3498 case Iop_Sub32F0x4: op = Xsse_SUBF; goto do_32F0x4;
3499 do_32F0x4: {
3500 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3501 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3502 HReg dst = newVRegV(env);
3503 addInstr(env, mk_vMOVsd_RR(argL, dst));
3504 addInstr(env, X86Instr_Sse32FLo(op, argR, dst));
3505 return dst;
3506 }
3507
3508 case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2;
3509 case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2;
3510 case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
3511 case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2;
3512 case Iop_Add64F0x2: op = Xsse_ADDF; goto do_64F0x2;
3513 case Iop_Div64F0x2: op = Xsse_DIVF; goto do_64F0x2;
3514 case Iop_Max64F0x2: op = Xsse_MAXF; goto do_64F0x2;
3515 case Iop_Min64F0x2: op = Xsse_MINF; goto do_64F0x2;
3516 case Iop_Mul64F0x2: op = Xsse_MULF; goto do_64F0x2;
3517 case Iop_Sub64F0x2: op = Xsse_SUBF; goto do_64F0x2;
3518 do_64F0x2: {
3519 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3520 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3521 HReg dst = newVRegV(env);
3522 REQUIRE_SSE2;
3523 addInstr(env, mk_vMOVsd_RR(argL, dst));
3524 addInstr(env, X86Instr_Sse64FLo(op, argR, dst));
3525 return dst;
3526 }
3527
3528 case Iop_QNarrowBin32Sto16Sx8:
3529 op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3530 case Iop_QNarrowBin16Sto8Sx16:
3531 op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3532 case Iop_QNarrowBin16Sto8Ux16:
3533 op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3534
3535 case Iop_InterleaveHI8x16:
3536 op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3537 case Iop_InterleaveHI16x8:
3538 op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3539 case Iop_InterleaveHI32x4:
3540 op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3541 case Iop_InterleaveHI64x2:
3542 op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3543
3544 case Iop_InterleaveLO8x16:
3545 op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3546 case Iop_InterleaveLO16x8:
3547 op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3548 case Iop_InterleaveLO32x4:
3549 op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3550 case Iop_InterleaveLO64x2:
3551 op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3552
3553 case Iop_AndV128: op = Xsse_AND; goto do_SseReRg;
3554 case Iop_OrV128: op = Xsse_OR; goto do_SseReRg;
3555 case Iop_XorV128: op = Xsse_XOR; goto do_SseReRg;
3556 case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg;
3557 case Iop_Add16x8: op = Xsse_ADD16; goto do_SseReRg;
3558 case Iop_Add32x4: op = Xsse_ADD32; goto do_SseReRg;
3559 case Iop_Add64x2: op = Xsse_ADD64; goto do_SseReRg;
3560 case Iop_QAdd8Sx16: op = Xsse_QADD8S; goto do_SseReRg;
3561 case Iop_QAdd16Sx8: op = Xsse_QADD16S; goto do_SseReRg;
3562 case Iop_QAdd8Ux16: op = Xsse_QADD8U; goto do_SseReRg;
3563 case Iop_QAdd16Ux8: op = Xsse_QADD16U; goto do_SseReRg;
3564 case Iop_Avg8Ux16: op = Xsse_AVG8U; goto do_SseReRg;
3565 case Iop_Avg16Ux8: op = Xsse_AVG16U; goto do_SseReRg;
3566 case Iop_CmpEQ8x16: op = Xsse_CMPEQ8; goto do_SseReRg;
3567 case Iop_CmpEQ16x8: op = Xsse_CMPEQ16; goto do_SseReRg;
3568 case Iop_CmpEQ32x4: op = Xsse_CMPEQ32; goto do_SseReRg;
3569 case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S; goto do_SseReRg;
3570 case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
3571 case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
3572 case Iop_Max16Sx8: op = Xsse_MAX16S; goto do_SseReRg;
3573 case Iop_Max8Ux16: op = Xsse_MAX8U; goto do_SseReRg;
3574 case Iop_Min16Sx8: op = Xsse_MIN16S; goto do_SseReRg;
3575 case Iop_Min8Ux16: op = Xsse_MIN8U; goto do_SseReRg;
3576 case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
3577 case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
3578 case Iop_Mul16x8: op = Xsse_MUL16; goto do_SseReRg;
3579 case Iop_Sub8x16: op = Xsse_SUB8; goto do_SseReRg;
3580 case Iop_Sub16x8: op = Xsse_SUB16; goto do_SseReRg;
3581 case Iop_Sub32x4: op = Xsse_SUB32; goto do_SseReRg;
3582 case Iop_Sub64x2: op = Xsse_SUB64; goto do_SseReRg;
3583 case Iop_QSub8Sx16: op = Xsse_QSUB8S; goto do_SseReRg;
3584 case Iop_QSub16Sx8: op = Xsse_QSUB16S; goto do_SseReRg;
3585 case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg;
3586 case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg;
3587 do_SseReRg: {
3588 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3589 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3590 HReg dst = newVRegV(env);
3591 if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR)
3592 REQUIRE_SSE2;
3593 if (arg1isEReg) {
3594 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3595 addInstr(env, X86Instr_SseReRg(op, arg1, dst));
3596 } else {
3597 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3598 addInstr(env, X86Instr_SseReRg(op, arg2, dst));
3599 }
3600 return dst;
3601 }
3602
3603 case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
3604 case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
3605 case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
3606 case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
3607 case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
3608 case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
3609 case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
3610 case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
3611 do_SseShift: {
3612 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3613 X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3614 X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
3615 HReg ereg = newVRegV(env);
3616 HReg dst = newVRegV(env);
3617 REQUIRE_SSE2;
3618 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3619 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3620 addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
3621 addInstr(env, X86Instr_Push(rmi));
3622 addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
3623 addInstr(env, mk_vMOVsd_RR(greg, dst));
3624 addInstr(env, X86Instr_SseReRg(op, ereg, dst));
3625 add_to_esp(env, 16);
3626 return dst;
3627 }
3628
3629 case Iop_NarrowBin32to16x8:
3630 fn = (HWord)h_generic_calc_NarrowBin32to16x8;
3631 goto do_SseAssistedBinary;
3632 case Iop_NarrowBin16to8x16:
3633 fn = (HWord)h_generic_calc_NarrowBin16to8x16;
3634 goto do_SseAssistedBinary;
3635 do_SseAssistedBinary: {
3636 /* As with the amd64 case (where this is copied from) we
3637 generate pretty bad code. */
3638 vassert(fn != 0);
3639 HReg dst = newVRegV(env);
3640 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3641 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3642 HReg argp = newVRegI(env);
3643 /* subl $112, %esp -- make a space */
3644 sub_from_esp(env, 112);
3645 /* leal 48(%esp), %r_argp -- point into it */
3646 addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
3647 argp));
3648 /* andl $-16, %r_argp -- 16-align the pointer */
3649 addInstr(env, X86Instr_Alu32R(Xalu_AND,
3650 X86RMI_Imm( ~(UInt)15 ),
3651 argp));
3652 /* Prepare 3 arg regs:
3653 leal 0(%r_argp), %eax
3654 leal 16(%r_argp), %edx
3655 leal 32(%r_argp), %ecx
3656 */
3657 addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp),
3658 hregX86_EAX()));
3659 addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp),
3660 hregX86_EDX()));
3661 addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp),
3662 hregX86_ECX()));
3663 /* Store the two args, at (%edx) and (%ecx):
3664 movupd %argL, 0(%edx)
3665 movupd %argR, 0(%ecx)
3666 */
3667 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL,
3668 X86AMode_IR(0, hregX86_EDX())));
3669 addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR,
3670 X86AMode_IR(0, hregX86_ECX())));
3671 /* call the helper */
3672 addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn, 3 ));
3673 /* fetch the result from memory, using %r_argp, which the
3674 register allocator will keep alive across the call. */
3675 addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst,
3676 X86AMode_IR(0, argp)));
3677 /* and finally, clear the space */
3678 add_to_esp(env, 112);
3679 return dst;
3680 }
3681
3682 default:
3683 break;
3684 } /* switch (e->Iex.Binop.op) */
3685 } /* if (e->tag == Iex_Binop) */
3686
3687 if (e->tag == Iex_Mux0X) {
3688 X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
3689 HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX);
3690 HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0);
3691 HReg dst = newVRegV(env);
3692 addInstr(env, mk_vMOVsd_RR(rX,dst));
3693 addInstr(env, X86Instr_Test32(0xFF, r8));
3694 addInstr(env, X86Instr_SseCMov(Xcc_Z,r0,dst));
3695 return dst;
3696 }
3697
3698 vec_fail:
3699 vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
3700 LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps));
3701 ppIRExpr(e);
3702 vpanic("iselVecExpr_wrk");
3703
3704 # undef REQUIRE_SSE1
3705 # undef REQUIRE_SSE2
3706 # undef SSE2_OR_ABOVE
3707 }
3708
3709
3710 /*---------------------------------------------------------*/
3711 /*--- ISEL: Statements ---*/
3712 /*---------------------------------------------------------*/
3713
iselStmt(ISelEnv * env,IRStmt * stmt)3714 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3715 {
3716 if (vex_traceflags & VEX_TRACE_VCODE) {
3717 vex_printf("\n-- ");
3718 ppIRStmt(stmt);
3719 vex_printf("\n");
3720 }
3721
3722 switch (stmt->tag) {
3723
3724 /* --------- STORE --------- */
3725 case Ist_Store: {
3726 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3727 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3728 IREndness end = stmt->Ist.Store.end;
3729
3730 if (tya != Ity_I32 || end != Iend_LE)
3731 goto stmt_fail;
3732
3733 if (tyd == Ity_I32) {
3734 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3735 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
3736 addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am));
3737 return;
3738 }
3739 if (tyd == Ity_I8 || tyd == Ity_I16) {
3740 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3741 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
3742 addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2),
3743 r,am ));
3744 return;
3745 }
3746 if (tyd == Ity_F64) {
3747 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3748 HReg r = iselDblExpr(env, stmt->Ist.Store.data);
3749 addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am));
3750 return;
3751 }
3752 if (tyd == Ity_F32) {
3753 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3754 HReg r = iselFltExpr(env, stmt->Ist.Store.data);
3755 addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am));
3756 return;
3757 }
3758 if (tyd == Ity_I64) {
3759 HReg vHi, vLo, rA;
3760 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
3761 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
3762 addInstr(env, X86Instr_Alu32M(
3763 Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
3764 addInstr(env, X86Instr_Alu32M(
3765 Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
3766 return;
3767 }
3768 if (tyd == Ity_V128) {
3769 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3770 HReg r = iselVecExpr(env, stmt->Ist.Store.data);
3771 addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am));
3772 return;
3773 }
3774 break;
3775 }
3776
3777 /* --------- PUT --------- */
3778 case Ist_Put: {
3779 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3780 if (ty == Ity_I32) {
3781 /* We're going to write to memory, so compute the RHS into an
3782 X86RI. */
3783 X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3784 addInstr(env,
3785 X86Instr_Alu32M(
3786 Xalu_MOV,
3787 ri,
3788 X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP())
3789 ));
3790 return;
3791 }
3792 if (ty == Ity_I8 || ty == Ity_I16) {
3793 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
3794 addInstr(env, X86Instr_Store(
3795 toUChar(ty==Ity_I8 ? 1 : 2),
3796 r,
3797 X86AMode_IR(stmt->Ist.Put.offset,
3798 hregX86_EBP())));
3799 return;
3800 }
3801 if (ty == Ity_I64) {
3802 HReg vHi, vLo;
3803 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3804 X86AMode* am4 = advance4(am);
3805 iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data);
3806 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am ));
3807 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 ));
3808 return;
3809 }
3810 if (ty == Ity_V128) {
3811 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
3812 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3813 addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am));
3814 return;
3815 }
3816 if (ty == Ity_F32) {
3817 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
3818 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3819 set_FPU_rounding_default(env); /* paranoia */
3820 addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am ));
3821 return;
3822 }
3823 if (ty == Ity_F64) {
3824 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
3825 X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
3826 set_FPU_rounding_default(env); /* paranoia */
3827 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am ));
3828 return;
3829 }
3830 break;
3831 }
3832
3833 /* --------- Indexed PUT --------- */
3834 case Ist_PutI: {
3835 IRPutI *puti = stmt->Ist.PutI.details;
3836
3837 X86AMode* am
3838 = genGuestArrayOffset(
3839 env, puti->descr,
3840 puti->ix, puti->bias );
3841
3842 IRType ty = typeOfIRExpr(env->type_env, puti->data);
3843 if (ty == Ity_F64) {
3844 HReg val = iselDblExpr(env, puti->data);
3845 addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am ));
3846 return;
3847 }
3848 if (ty == Ity_I8) {
3849 HReg r = iselIntExpr_R(env, puti->data);
3850 addInstr(env, X86Instr_Store( 1, r, am ));
3851 return;
3852 }
3853 if (ty == Ity_I32) {
3854 HReg r = iselIntExpr_R(env, puti->data);
3855 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am ));
3856 return;
3857 }
3858 if (ty == Ity_I64) {
3859 HReg rHi, rLo;
3860 X86AMode* am4 = advance4(am);
3861 iselInt64Expr(&rHi, &rLo, env, puti->data);
3862 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am ));
3863 addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 ));
3864 return;
3865 }
3866 break;
3867 }
3868
3869 /* --------- TMP --------- */
3870 case Ist_WrTmp: {
3871 IRTemp tmp = stmt->Ist.WrTmp.tmp;
3872 IRType ty = typeOfIRTemp(env->type_env, tmp);
3873
3874 /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
3875 compute it into an AMode and then use LEA. This usually
3876 produces fewer instructions, often because (for memcheck
3877 created IR) we get t = address-expression, (t is later used
3878 twice) and so doing this naturally turns address-expression
3879 back into an X86 amode. */
3880 if (ty == Ity_I32
3881 && stmt->Ist.WrTmp.data->tag == Iex_Binop
3882 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) {
3883 X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
3884 HReg dst = lookupIRTemp(env, tmp);
3885 if (am->tag == Xam_IR && am->Xam.IR.imm == 0) {
3886 /* Hmm, iselIntExpr_AMode wimped out and just computed the
3887 value into a register. Just emit a normal reg-reg move
3888 so reg-alloc can coalesce it away in the usual way. */
3889 HReg src = am->Xam.IR.reg;
3890 addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst));
3891 } else {
3892 addInstr(env, X86Instr_Lea32(am,dst));
3893 }
3894 return;
3895 }
3896
3897 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
3898 X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
3899 HReg dst = lookupIRTemp(env, tmp);
3900 addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst));
3901 return;
3902 }
3903 if (ty == Ity_I64) {
3904 HReg rHi, rLo, dstHi, dstLo;
3905 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
3906 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
3907 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
3908 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
3909 return;
3910 }
3911 if (ty == Ity_I1) {
3912 X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
3913 HReg dst = lookupIRTemp(env, tmp);
3914 addInstr(env, X86Instr_Set32(cond, dst));
3915 return;
3916 }
3917 if (ty == Ity_F64) {
3918 HReg dst = lookupIRTemp(env, tmp);
3919 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
3920 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
3921 return;
3922 }
3923 if (ty == Ity_F32) {
3924 HReg dst = lookupIRTemp(env, tmp);
3925 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
3926 addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
3927 return;
3928 }
3929 if (ty == Ity_V128) {
3930 HReg dst = lookupIRTemp(env, tmp);
3931 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
3932 addInstr(env, mk_vMOVsd_RR(src,dst));
3933 return;
3934 }
3935 break;
3936 }
3937
3938 /* --------- Call to DIRTY helper --------- */
3939 case Ist_Dirty: {
3940 IRType retty;
3941 IRDirty* d = stmt->Ist.Dirty.details;
3942 Bool passBBP = False;
3943
3944 if (d->nFxState == 0)
3945 vassert(!d->needsBBP);
3946
3947 passBBP = toBool(d->nFxState > 0 && d->needsBBP);
3948
3949 /* Marshal args, do the call, clear stack. */
3950 doHelperCall( env, passBBP, d->guard, d->cee, d->args );
3951
3952 /* Now figure out what to do with the returned value, if any. */
3953 if (d->tmp == IRTemp_INVALID)
3954 /* No return value. Nothing to do. */
3955 return;
3956
3957 retty = typeOfIRTemp(env->type_env, d->tmp);
3958 if (retty == Ity_I64) {
3959 HReg dstHi, dstLo;
3960 /* The returned value is in %edx:%eax. Park it in the
3961 register-pair associated with tmp. */
3962 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
3963 addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
3964 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
3965 return;
3966 }
3967 if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
3968 /* The returned value is in %eax. Park it in the register
3969 associated with tmp. */
3970 HReg dst = lookupIRTemp(env, d->tmp);
3971 addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
3972 return;
3973 }
3974 break;
3975 }
3976
3977 /* --------- MEM FENCE --------- */
3978 case Ist_MBE:
3979 switch (stmt->Ist.MBE.event) {
3980 case Imbe_Fence:
3981 addInstr(env, X86Instr_MFence(env->hwcaps));
3982 return;
3983 default:
3984 break;
3985 }
3986 break;
3987
3988 /* --------- ACAS --------- */
3989 case Ist_CAS:
3990 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
3991 /* "normal" singleton CAS */
3992 UChar sz;
3993 IRCAS* cas = stmt->Ist.CAS.details;
3994 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
3995 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
3996 X86AMode* am = iselIntExpr_AMode(env, cas->addr);
3997 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
3998 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
3999 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4000 vassert(cas->expdHi == NULL);
4001 vassert(cas->dataHi == NULL);
4002 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4003 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4004 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4005 switch (ty) {
4006 case Ity_I32: sz = 4; break;
4007 case Ity_I16: sz = 2; break;
4008 case Ity_I8: sz = 1; break;
4009 default: goto unhandled_cas;
4010 }
4011 addInstr(env, X86Instr_ACAS(am, sz));
4012 addInstr(env,
4013 X86Instr_CMov32(Xcc_NZ,
4014 X86RM_Reg(hregX86_EAX()), rOldLo));
4015 return;
4016 } else {
4017 /* double CAS */
4018 IRCAS* cas = stmt->Ist.CAS.details;
4019 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4020 /* only 32-bit allowed in this case */
4021 /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
4022 /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
4023 X86AMode* am = iselIntExpr_AMode(env, cas->addr);
4024 HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4025 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4026 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4027 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4028 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4029 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4030 if (ty != Ity_I32)
4031 goto unhandled_cas;
4032 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4033 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4034 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
4035 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
4036 addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
4037 addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
4038 addInstr(env, X86Instr_DACAS(am));
4039 addInstr(env,
4040 X86Instr_CMov32(Xcc_NZ,
4041 X86RM_Reg(hregX86_EDX()), rOldHi));
4042 addInstr(env,
4043 X86Instr_CMov32(Xcc_NZ,
4044 X86RM_Reg(hregX86_EAX()), rOldLo));
4045 return;
4046 }
4047 unhandled_cas:
4048 break;
4049
4050 /* --------- INSTR MARK --------- */
4051 /* Doesn't generate any executable code ... */
4052 case Ist_IMark:
4053 return;
4054
4055 /* --------- NO-OP --------- */
4056 /* Fairly self-explanatory, wouldn't you say? */
4057 case Ist_NoOp:
4058 return;
4059
4060 /* --------- EXIT --------- */
4061 case Ist_Exit: {
4062 if (stmt->Ist.Exit.dst->tag != Ico_U32)
4063 vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value");
4064
4065 X86CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
4066 X86AMode* amEIP = X86AMode_IR(stmt->Ist.Exit.offsIP,
4067 hregX86_EBP());
4068
4069 /* Case: boring transfer to known address */
4070 if (stmt->Ist.Exit.jk == Ijk_Boring) {
4071 if (env->chainingAllowed) {
4072 /* .. almost always true .. */
4073 /* Skip the event check at the dst if this is a forwards
4074 edge. */
4075 Bool toFastEP
4076 = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
4077 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
4078 addInstr(env, X86Instr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
4079 amEIP, cc, toFastEP));
4080 } else {
4081 /* .. very occasionally .. */
4082 /* We can't use chaining, so ask for an assisted transfer,
4083 as that's the only alternative that is allowable. */
4084 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4085 addInstr(env, X86Instr_XAssisted(r, amEIP, cc, Ijk_Boring));
4086 }
4087 return;
4088 }
4089
4090 /* Case: assisted transfer to arbitrary address */
4091 switch (stmt->Ist.Exit.jk) {
4092 /* Keep this list in sync with that in iselNext below */
4093 case Ijk_ClientReq:
4094 case Ijk_EmWarn:
4095 case Ijk_MapFail:
4096 case Ijk_NoDecode:
4097 case Ijk_NoRedir:
4098 case Ijk_SigSEGV:
4099 case Ijk_SigTRAP:
4100 case Ijk_Sys_int128:
4101 case Ijk_Sys_int129:
4102 case Ijk_Sys_int130:
4103 case Ijk_Sys_sysenter:
4104 case Ijk_TInval:
4105 case Ijk_Yield:
4106 {
4107 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4108 addInstr(env, X86Instr_XAssisted(r, amEIP, cc, stmt->Ist.Exit.jk));
4109 return;
4110 }
4111 default:
4112 break;
4113 }
4114
4115 /* Do we ever expect to see any other kind? */
4116 goto stmt_fail;
4117 }
4118
4119 default: break;
4120 }
4121 stmt_fail:
4122 ppIRStmt(stmt);
4123 vpanic("iselStmt");
4124 }
4125
4126
4127 /*---------------------------------------------------------*/
4128 /*--- ISEL: Basic block terminators (Nexts) ---*/
4129 /*---------------------------------------------------------*/
4130
iselNext(ISelEnv * env,IRExpr * next,IRJumpKind jk,Int offsIP)4131 static void iselNext ( ISelEnv* env,
4132 IRExpr* next, IRJumpKind jk, Int offsIP )
4133 {
4134 if (vex_traceflags & VEX_TRACE_VCODE) {
4135 vex_printf( "\n-- PUT(%d) = ", offsIP);
4136 ppIRExpr( next );
4137 vex_printf( "; exit-");
4138 ppIRJumpKind(jk);
4139 vex_printf( "\n");
4140 }
4141
4142 /* Case: boring transfer to known address */
4143 if (next->tag == Iex_Const) {
4144 IRConst* cdst = next->Iex.Const.con;
4145 vassert(cdst->tag == Ico_U32);
4146 if (jk == Ijk_Boring || jk == Ijk_Call) {
4147 /* Boring transfer to known address */
4148 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4149 if (env->chainingAllowed) {
4150 /* .. almost always true .. */
4151 /* Skip the event check at the dst if this is a forwards
4152 edge. */
4153 Bool toFastEP
4154 = ((Addr64)cdst->Ico.U32) > env->max_ga;
4155 if (0) vex_printf("%s", toFastEP ? "X" : ".");
4156 addInstr(env, X86Instr_XDirect(cdst->Ico.U32,
4157 amEIP, Xcc_ALWAYS,
4158 toFastEP));
4159 } else {
4160 /* .. very occasionally .. */
4161 /* We can't use chaining, so ask for an assisted transfer,
4162 as that's the only alternative that is allowable. */
4163 HReg r = iselIntExpr_R(env, next);
4164 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4165 Ijk_Boring));
4166 }
4167 return;
4168 }
4169 }
4170
4171 /* Case: call/return (==boring) transfer to any address */
4172 switch (jk) {
4173 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
4174 HReg r = iselIntExpr_R(env, next);
4175 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4176 if (env->chainingAllowed) {
4177 addInstr(env, X86Instr_XIndir(r, amEIP, Xcc_ALWAYS));
4178 } else {
4179 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
4180 Ijk_Boring));
4181 }
4182 return;
4183 }
4184 default:
4185 break;
4186 }
4187
4188 /* Case: assisted transfer to arbitrary address */
4189 switch (jk) {
4190 /* Keep this list in sync with that for Ist_Exit above */
4191 case Ijk_ClientReq:
4192 case Ijk_EmWarn:
4193 case Ijk_MapFail:
4194 case Ijk_NoDecode:
4195 case Ijk_NoRedir:
4196 case Ijk_SigSEGV:
4197 case Ijk_SigTRAP:
4198 case Ijk_Sys_int128:
4199 case Ijk_Sys_int129:
4200 case Ijk_Sys_int130:
4201 case Ijk_Sys_sysenter:
4202 case Ijk_TInval:
4203 case Ijk_Yield:
4204 {
4205 HReg r = iselIntExpr_R(env, next);
4206 X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
4207 addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, jk));
4208 return;
4209 }
4210 default:
4211 break;
4212 }
4213
4214 vex_printf( "\n-- PUT(%d) = ", offsIP);
4215 ppIRExpr( next );
4216 vex_printf( "; exit-");
4217 ppIRJumpKind(jk);
4218 vex_printf( "\n");
4219 vassert(0); // are we expecting any other kind?
4220 }
4221
4222
4223 /*---------------------------------------------------------*/
4224 /*--- Insn selector top-level ---*/
4225 /*---------------------------------------------------------*/
4226
4227 /* Translate an entire SB to x86 code. */
4228
iselSB_X86(IRSB * bb,VexArch arch_host,VexArchInfo * archinfo_host,VexAbiInfo * vbi,Int offs_Host_EvC_Counter,Int offs_Host_EvC_FailAddr,Bool chainingAllowed,Bool addProfInc,Addr64 max_ga)4229 HInstrArray* iselSB_X86 ( IRSB* bb,
4230 VexArch arch_host,
4231 VexArchInfo* archinfo_host,
4232 VexAbiInfo* vbi/*UNUSED*/,
4233 Int offs_Host_EvC_Counter,
4234 Int offs_Host_EvC_FailAddr,
4235 Bool chainingAllowed,
4236 Bool addProfInc,
4237 Addr64 max_ga )
4238 {
4239 Int i, j;
4240 HReg hreg, hregHI;
4241 ISelEnv* env;
4242 UInt hwcaps_host = archinfo_host->hwcaps;
4243 X86AMode *amCounter, *amFailAddr;
4244
4245 /* sanity ... */
4246 vassert(arch_host == VexArchX86);
4247 vassert(0 == (hwcaps_host
4248 & ~(VEX_HWCAPS_X86_SSE1
4249 | VEX_HWCAPS_X86_SSE2
4250 | VEX_HWCAPS_X86_SSE3
4251 | VEX_HWCAPS_X86_LZCNT)));
4252 vassert(sizeof(max_ga) == 8);
4253 vassert((max_ga >> 32) == 0);
4254
4255 /* Make up an initial environment to use. */
4256 env = LibVEX_Alloc(sizeof(ISelEnv));
4257 env->vreg_ctr = 0;
4258
4259 /* Set up output code array. */
4260 env->code = newHInstrArray();
4261
4262 /* Copy BB's type env. */
4263 env->type_env = bb->tyenv;
4264
4265 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4266 change as we go along. */
4267 env->n_vregmap = bb->tyenv->types_used;
4268 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
4269 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
4270
4271 /* and finally ... */
4272 env->chainingAllowed = chainingAllowed;
4273 env->hwcaps = hwcaps_host;
4274 env->max_ga = max_ga;
4275
4276 /* For each IR temporary, allocate a suitably-kinded virtual
4277 register. */
4278 j = 0;
4279 for (i = 0; i < env->n_vregmap; i++) {
4280 hregHI = hreg = INVALID_HREG;
4281 switch (bb->tyenv->types[i]) {
4282 case Ity_I1:
4283 case Ity_I8:
4284 case Ity_I16:
4285 case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break;
4286 case Ity_I64: hreg = mkHReg(j++, HRcInt32, True);
4287 hregHI = mkHReg(j++, HRcInt32, True); break;
4288 case Ity_F32:
4289 case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break;
4290 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
4291 default: ppIRType(bb->tyenv->types[i]);
4292 vpanic("iselBB: IRTemp type");
4293 }
4294 env->vregmap[i] = hreg;
4295 env->vregmapHI[i] = hregHI;
4296 }
4297 env->vreg_ctr = j;
4298
4299 /* The very first instruction must be an event check. */
4300 amCounter = X86AMode_IR(offs_Host_EvC_Counter, hregX86_EBP());
4301 amFailAddr = X86AMode_IR(offs_Host_EvC_FailAddr, hregX86_EBP());
4302 addInstr(env, X86Instr_EvCheck(amCounter, amFailAddr));
4303
4304 /* Possibly a block counter increment (for profiling). At this
4305 point we don't know the address of the counter, so just pretend
4306 it is zero. It will have to be patched later, but before this
4307 translation is used, by a call to LibVEX_patchProfCtr. */
4308 if (addProfInc) {
4309 addInstr(env, X86Instr_ProfInc());
4310 }
4311
4312 /* Ok, finally we can iterate over the statements. */
4313 for (i = 0; i < bb->stmts_used; i++)
4314 iselStmt(env, bb->stmts[i]);
4315
4316 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4317
4318 /* record the number of vregs we used. */
4319 env->code->n_vregs = env->vreg_ctr;
4320 return env->code;
4321 }
4322
4323
4324 /*---------------------------------------------------------------*/
4325 /*--- end host_x86_isel.c ---*/
4326 /*---------------------------------------------------------------*/
4327