1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_amd64_isel.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2010 OpenWorks LLP
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39
40 #include "ir_match.h"
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h"
45 #include "host_generic_simd128.h"
46 #include "host_amd64_defs.h"
47
48
49 /*---------------------------------------------------------*/
50 /*--- x87/SSE control word stuff ---*/
51 /*---------------------------------------------------------*/
52
53 /* Vex-generated code expects to run with the FPU set as follows: all
54 exceptions masked, round-to-nearest, precision = 53 bits. This
55 corresponds to a FPU control word value of 0x027F.
56
57 Similarly the SSE control word (%mxcsr) should be 0x1F80.
58
59 %fpucw and %mxcsr should have these values on entry to
60 Vex-generated code, and should those values should be
61 unchanged at exit.
62 */
63
64 #define DEFAULT_FPUCW 0x027F
65
66 #define DEFAULT_MXCSR 0x1F80
67
68 /* debugging only, do not use */
69 /* define DEFAULT_FPUCW 0x037F */
70
71
72 /*---------------------------------------------------------*/
73 /*--- misc helpers ---*/
74 /*---------------------------------------------------------*/
75
76 /* These are duplicated in guest-amd64/toIR.c */
unop(IROp op,IRExpr * a)77 static IRExpr* unop ( IROp op, IRExpr* a )
78 {
79 return IRExpr_Unop(op, a);
80 }
81
binop(IROp op,IRExpr * a1,IRExpr * a2)82 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
83 {
84 return IRExpr_Binop(op, a1, a2);
85 }
86
bind(Int binder)87 static IRExpr* bind ( Int binder )
88 {
89 return IRExpr_Binder(binder);
90 }
91
92
93 /*---------------------------------------------------------*/
94 /*--- ISelEnv ---*/
95 /*---------------------------------------------------------*/
96
97 /* This carries around:
98
99 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
100 might encounter. This is computed before insn selection starts,
101 and does not change.
102
103 - A mapping from IRTemp to HReg. This tells the insn selector
104 which virtual register is associated with each IRTemp
105 temporary. This is computed before insn selection starts, and
106 does not change. We expect this mapping to map precisely the
107 same set of IRTemps as the type mapping does.
108
109 - vregmap holds the primary register for the IRTemp.
110 - vregmapHI is only used for 128-bit integer-typed
111 IRTemps. It holds the identity of a second
112 64-bit virtual HReg, which holds the high half
113 of the value.
114
115 - The code array, that is, the insns selected so far.
116
117 - A counter, for generating new virtual registers.
118
119 - The host subarchitecture we are selecting insns for.
120 This is set at the start and does not change.
121
122 Note, this is all host-independent. (JRS 20050201: well, kinda
123 ... not completely. Compare with ISelEnv for X86.)
124 */
125
126 typedef
127 struct {
128 IRTypeEnv* type_env;
129
130 HReg* vregmap;
131 HReg* vregmapHI;
132 Int n_vregmap;
133
134 HInstrArray* code;
135
136 Int vreg_ctr;
137
138 UInt hwcaps;
139 }
140 ISelEnv;
141
142
lookupIRTemp(ISelEnv * env,IRTemp tmp)143 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
144 {
145 vassert(tmp >= 0);
146 vassert(tmp < env->n_vregmap);
147 return env->vregmap[tmp];
148 }
149
lookupIRTemp128(HReg * vrHI,HReg * vrLO,ISelEnv * env,IRTemp tmp)150 static void lookupIRTemp128 ( HReg* vrHI, HReg* vrLO,
151 ISelEnv* env, IRTemp tmp )
152 {
153 vassert(tmp >= 0);
154 vassert(tmp < env->n_vregmap);
155 vassert(env->vregmapHI[tmp] != INVALID_HREG);
156 *vrLO = env->vregmap[tmp];
157 *vrHI = env->vregmapHI[tmp];
158 }
159
addInstr(ISelEnv * env,AMD64Instr * instr)160 static void addInstr ( ISelEnv* env, AMD64Instr* instr )
161 {
162 addHInstr(env->code, instr);
163 if (vex_traceflags & VEX_TRACE_VCODE) {
164 ppAMD64Instr(instr, True);
165 vex_printf("\n");
166 }
167 }
168
newVRegI(ISelEnv * env)169 static HReg newVRegI ( ISelEnv* env )
170 {
171 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
172 env->vreg_ctr++;
173 return reg;
174 }
175
176 //.. static HReg newVRegF ( ISelEnv* env )
177 //.. {
178 //.. HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
179 //.. env->vreg_ctr++;
180 //.. return reg;
181 //.. }
182
newVRegV(ISelEnv * env)183 static HReg newVRegV ( ISelEnv* env )
184 {
185 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
186 env->vreg_ctr++;
187 return reg;
188 }
189
190
191 /*---------------------------------------------------------*/
192 /*--- ISEL: Forward declarations ---*/
193 /*---------------------------------------------------------*/
194
195 /* These are organised as iselXXX and iselXXX_wrk pairs. The
196 iselXXX_wrk do the real work, but are not to be called directly.
197 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
198 checks that all returned registers are virtual. You should not
199 call the _wrk version directly.
200 */
201 static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
202 static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
203
204 static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
205 static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
206
207 static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
208 static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
209
210 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
211 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
212
213 static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
214 static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
215
216 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
217 ISelEnv* env, IRExpr* e );
218 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
219 ISelEnv* env, IRExpr* e );
220
221 static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
222 static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
223
224 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
225 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
226
227 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
228 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
229
230 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
231 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
232
233
234 /*---------------------------------------------------------*/
235 /*--- ISEL: Misc helpers ---*/
236 /*---------------------------------------------------------*/
237
sane_AMode(AMD64AMode * am)238 static Bool sane_AMode ( AMD64AMode* am )
239 {
240 switch (am->tag) {
241 case Aam_IR:
242 return
243 toBool( hregClass(am->Aam.IR.reg) == HRcInt64
244 && (hregIsVirtual(am->Aam.IR.reg)
245 || am->Aam.IR.reg == hregAMD64_RBP()) );
246 case Aam_IRRS:
247 return
248 toBool( hregClass(am->Aam.IRRS.base) == HRcInt64
249 && hregIsVirtual(am->Aam.IRRS.base)
250 && hregClass(am->Aam.IRRS.index) == HRcInt64
251 && hregIsVirtual(am->Aam.IRRS.index) );
252 default:
253 vpanic("sane_AMode: unknown amd64 amode tag");
254 }
255 }
256
257
258 /* Can the lower 32 bits be signedly widened to produce the whole
259 64-bit value? In other words, are the top 33 bits either all 0 or
260 all 1 ? */
fitsIn32Bits(ULong x)261 static Bool fitsIn32Bits ( ULong x )
262 {
263 Long y0 = (Long)x;
264 Long y1 = y0;
265 y1 <<= 32;
266 y1 >>=/*s*/ 32;
267 return toBool(x == y1);
268 }
269
270 /* Is this a 64-bit zero expression? */
271
isZeroU64(IRExpr * e)272 static Bool isZeroU64 ( IRExpr* e )
273 {
274 return e->tag == Iex_Const
275 && e->Iex.Const.con->tag == Ico_U64
276 && e->Iex.Const.con->Ico.U64 == 0ULL;
277 }
278
isZeroU32(IRExpr * e)279 static Bool isZeroU32 ( IRExpr* e )
280 {
281 return e->tag == Iex_Const
282 && e->Iex.Const.con->tag == Ico_U32
283 && e->Iex.Const.con->Ico.U32 == 0;
284 }
285
286 /* Make a int reg-reg move. */
287
mk_iMOVsd_RR(HReg src,HReg dst)288 static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
289 {
290 vassert(hregClass(src) == HRcInt64);
291 vassert(hregClass(dst) == HRcInt64);
292 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
293 }
294
295 /* Make a vector reg-reg move. */
296
mk_vMOVsd_RR(HReg src,HReg dst)297 static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
298 {
299 vassert(hregClass(src) == HRcVec128);
300 vassert(hregClass(dst) == HRcVec128);
301 return AMD64Instr_SseReRg(Asse_MOV, src, dst);
302 }
303
304 /* Advance/retreat %rsp by n. */
305
add_to_rsp(ISelEnv * env,Int n)306 static void add_to_rsp ( ISelEnv* env, Int n )
307 {
308 vassert(n > 0 && n < 256 && (n%8) == 0);
309 addInstr(env,
310 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n),
311 hregAMD64_RSP()));
312 }
313
sub_from_rsp(ISelEnv * env,Int n)314 static void sub_from_rsp ( ISelEnv* env, Int n )
315 {
316 vassert(n > 0 && n < 256 && (n%8) == 0);
317 addInstr(env,
318 AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n),
319 hregAMD64_RSP()));
320 }
321
322 /* Push 64-bit constants on the stack. */
push_uimm64(ISelEnv * env,ULong uimm64)323 static void push_uimm64( ISelEnv* env, ULong uimm64 )
324 {
325 /* If uimm64 can be expressed as the sign extension of its
326 lower 32 bits, we can do it the easy way. */
327 Long simm64 = (Long)uimm64;
328 if ( simm64 == ((simm64 << 32) >> 32) ) {
329 addInstr( env, AMD64Instr_Push(AMD64RMI_Imm( (UInt)uimm64 )) );
330 } else {
331 HReg tmp = newVRegI(env);
332 addInstr( env, AMD64Instr_Imm64(uimm64, tmp) );
333 addInstr( env, AMD64Instr_Push(AMD64RMI_Reg(tmp)) );
334 }
335 }
336
337 //.. /* Given an amode, return one which references 4 bytes further
338 //.. along. */
339 //..
340 //.. static X86AMode* advance4 ( X86AMode* am )
341 //.. {
342 //.. X86AMode* am4 = dopyX86AMode(am);
343 //.. switch (am4->tag) {
344 //.. case Xam_IRRS:
345 //.. am4->Xam.IRRS.imm += 4; break;
346 //.. case Xam_IR:
347 //.. am4->Xam.IR.imm += 4; break;
348 //.. default:
349 //.. vpanic("advance4(x86,host)");
350 //.. }
351 //.. return am4;
352 //.. }
353 //..
354 //..
355 //.. /* Push an arg onto the host stack, in preparation for a call to a
356 //.. helper function of some kind. Returns the number of 32-bit words
357 //.. pushed. */
358 //..
359 //.. static Int pushArg ( ISelEnv* env, IRExpr* arg )
360 //.. {
361 //.. IRType arg_ty = typeOfIRExpr(env->type_env, arg);
362 //.. if (arg_ty == Ity_I32) {
363 //.. addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
364 //.. return 1;
365 //.. } else
366 //.. if (arg_ty == Ity_I64) {
367 //.. HReg rHi, rLo;
368 //.. iselInt64Expr(&rHi, &rLo, env, arg);
369 //.. addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
370 //.. addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
371 //.. return 2;
372 //.. }
373 //.. ppIRExpr(arg);
374 //.. vpanic("pushArg(x86): can't handle arg of this type");
375 //.. }
376
377
378 /* Used only in doHelperCall. If possible, produce a single
379 instruction which computes 'e' into 'dst'. If not possible, return
380 NULL. */
381
iselIntExpr_single_instruction(ISelEnv * env,HReg dst,IRExpr * e)382 static AMD64Instr* iselIntExpr_single_instruction ( ISelEnv* env,
383 HReg dst,
384 IRExpr* e )
385 {
386 vassert(typeOfIRExpr(env->type_env, e) == Ity_I64);
387
388 if (e->tag == Iex_Const) {
389 vassert(e->Iex.Const.con->tag == Ico_U64);
390 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
391 return AMD64Instr_Alu64R(
392 Aalu_MOV,
393 AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)),
394 dst
395 );
396 } else {
397 return AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, dst);
398 }
399 }
400
401 if (e->tag == Iex_RdTmp) {
402 HReg src = lookupIRTemp(env, e->Iex.RdTmp.tmp);
403 return mk_iMOVsd_RR(src, dst);
404 }
405
406 if (e->tag == Iex_Get) {
407 vassert(e->Iex.Get.ty == Ity_I64);
408 return AMD64Instr_Alu64R(
409 Aalu_MOV,
410 AMD64RMI_Mem(
411 AMD64AMode_IR(e->Iex.Get.offset,
412 hregAMD64_RBP())),
413 dst);
414 }
415
416 if (e->tag == Iex_Unop
417 && e->Iex.Unop.op == Iop_32Uto64
418 && e->Iex.Unop.arg->tag == Iex_RdTmp) {
419 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
420 return AMD64Instr_MovxLQ(False, src, dst);
421 }
422
423 if (0) { ppIRExpr(e); vex_printf("\n"); }
424
425 return NULL;
426 }
427
428
429 /* Do a complete function call. guard is a Ity_Bit expression
430 indicating whether or not the call happens. If guard==NULL, the
431 call is unconditional. */
432
433 static
doHelperCall(ISelEnv * env,Bool passBBP,IRExpr * guard,IRCallee * cee,IRExpr ** args)434 void doHelperCall ( ISelEnv* env,
435 Bool passBBP,
436 IRExpr* guard, IRCallee* cee, IRExpr** args )
437 {
438 AMD64CondCode cc;
439 HReg argregs[6];
440 HReg tmpregs[6];
441 AMD64Instr* fastinstrs[6];
442 Int n_args, i, argreg;
443
444 /* Marshal args for a call and do the call.
445
446 If passBBP is True, %rbp (the baseblock pointer) is to be passed
447 as the first arg.
448
449 This function only deals with a tiny set of possibilities, which
450 cover all helpers in practice. The restrictions are that only
451 arguments in registers are supported, hence only 6x64 integer
452 bits in total can be passed. In fact the only supported arg
453 type is I64.
454
455 Generating code which is both efficient and correct when
456 parameters are to be passed in registers is difficult, for the
457 reasons elaborated in detail in comments attached to
458 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
459 of the method described in those comments.
460
461 The problem is split into two cases: the fast scheme and the
462 slow scheme. In the fast scheme, arguments are computed
463 directly into the target (real) registers. This is only safe
464 when we can be sure that computation of each argument will not
465 trash any real registers set by computation of any other
466 argument.
467
468 In the slow scheme, all args are first computed into vregs, and
469 once they are all done, they are moved to the relevant real
470 regs. This always gives correct code, but it also gives a bunch
471 of vreg-to-rreg moves which are usually redundant but are hard
472 for the register allocator to get rid of.
473
474 To decide which scheme to use, all argument expressions are
475 first examined. If they are all so simple that it is clear they
476 will be evaluated without use of any fixed registers, use the
477 fast scheme, else use the slow scheme. Note also that only
478 unconditional calls may use the fast scheme, since having to
479 compute a condition expression could itself trash real
480 registers.
481
482 Note this requires being able to examine an expression and
483 determine whether or not evaluation of it might use a fixed
484 register. That requires knowledge of how the rest of this insn
485 selector works. Currently just the following 3 are regarded as
486 safe -- hopefully they cover the majority of arguments in
487 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
488 */
489
490 /* Note that the cee->regparms field is meaningless on AMD64 host
491 (since there is only one calling convention) and so we always
492 ignore it. */
493
494 n_args = 0;
495 for (i = 0; args[i]; i++)
496 n_args++;
497
498 if (6 < n_args + (passBBP ? 1 : 0))
499 vpanic("doHelperCall(AMD64): cannot currently handle > 6 args");
500
501 argregs[0] = hregAMD64_RDI();
502 argregs[1] = hregAMD64_RSI();
503 argregs[2] = hregAMD64_RDX();
504 argregs[3] = hregAMD64_RCX();
505 argregs[4] = hregAMD64_R8();
506 argregs[5] = hregAMD64_R9();
507
508 tmpregs[0] = tmpregs[1] = tmpregs[2] =
509 tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG;
510
511 fastinstrs[0] = fastinstrs[1] = fastinstrs[2] =
512 fastinstrs[3] = fastinstrs[4] = fastinstrs[5] = NULL;
513
514 /* First decide which scheme (slow or fast) is to be used. First
515 assume the fast scheme, and select slow if any contraindications
516 (wow) appear. */
517
518 if (guard) {
519 if (guard->tag == Iex_Const
520 && guard->Iex.Const.con->tag == Ico_U1
521 && guard->Iex.Const.con->Ico.U1 == True) {
522 /* unconditional */
523 } else {
524 /* Not manifestly unconditional -- be conservative. */
525 goto slowscheme;
526 }
527 }
528
529 /* Ok, let's try for the fast scheme. If it doesn't pan out, we'll
530 use the slow scheme. Because this is tentative, we can't call
531 addInstr (that is, commit to) any instructions until we're
532 handled all the arguments. So park the resulting instructions
533 in a buffer and emit that if we're successful. */
534
535 /* FAST SCHEME */
536 argreg = 0;
537 if (passBBP) {
538 fastinstrs[argreg] = mk_iMOVsd_RR( hregAMD64_RBP(), argregs[argreg]);
539 argreg++;
540 }
541
542 for (i = 0; i < n_args; i++) {
543 vassert(argreg < 6);
544 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
545 fastinstrs[argreg]
546 = iselIntExpr_single_instruction( env, argregs[argreg], args[i] );
547 if (fastinstrs[argreg] == NULL)
548 goto slowscheme;
549 argreg++;
550 }
551
552 /* Looks like we're in luck. Emit the accumulated instructions and
553 move on to doing the call itself. */
554 vassert(argreg <= 6);
555 for (i = 0; i < argreg; i++)
556 addInstr(env, fastinstrs[i]);
557
558 /* Fast scheme only applies for unconditional calls. Hence: */
559 cc = Acc_ALWAYS;
560
561 goto handle_call;
562
563
564 /* SLOW SCHEME; move via temporaries */
565 slowscheme:
566 #if 0
567 if (n_args > 0) {for (i = 0; args[i]; i++) {
568 ppIRExpr(args[i]); vex_printf(" "); }
569 vex_printf("\n");}
570 #endif
571 argreg = 0;
572
573 if (passBBP) {
574 /* This is pretty stupid; better to move directly to rdi
575 after the rest of the args are done. */
576 tmpregs[argreg] = newVRegI(env);
577 addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[argreg]));
578 argreg++;
579 }
580
581 for (i = 0; i < n_args; i++) {
582 vassert(argreg < 6);
583 vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
584 tmpregs[argreg] = iselIntExpr_R(env, args[i]);
585 argreg++;
586 }
587
588 /* Now we can compute the condition. We can't do it earlier
589 because the argument computations could trash the condition
590 codes. Be a bit clever to handle the common case where the
591 guard is 1:Bit. */
592 cc = Acc_ALWAYS;
593 if (guard) {
594 if (guard->tag == Iex_Const
595 && guard->Iex.Const.con->tag == Ico_U1
596 && guard->Iex.Const.con->Ico.U1 == True) {
597 /* unconditional -- do nothing */
598 } else {
599 cc = iselCondCode( env, guard );
600 }
601 }
602
603 /* Move the args to their final destinations. */
604 for (i = 0; i < argreg; i++) {
605 /* None of these insns, including any spill code that might
606 be generated, may alter the condition codes. */
607 addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
608 }
609
610
611 /* Finally, the call itself. */
612 handle_call:
613 addInstr(env, AMD64Instr_Call(
614 cc,
615 Ptr_to_ULong(cee->addr),
616 n_args + (passBBP ? 1 : 0)
617 )
618 );
619 }
620
621
622 /* Given a guest-state array descriptor, an index expression and a
623 bias, generate an AMD64AMode holding the relevant guest state
624 offset. */
625
626 static
genGuestArrayOffset(ISelEnv * env,IRRegArray * descr,IRExpr * off,Int bias)627 AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
628 IRExpr* off, Int bias )
629 {
630 HReg tmp, roff;
631 Int elemSz = sizeofIRType(descr->elemTy);
632 Int nElems = descr->nElems;
633
634 /* Throw out any cases not generated by an amd64 front end. In
635 theory there might be a day where we need to handle them -- if
636 we ever run non-amd64-guest on amd64 host. */
637
638 if (nElems != 8 || (elemSz != 1 && elemSz != 8))
639 vpanic("genGuestArrayOffset(amd64 host)");
640
641 /* Compute off into a reg, %off. Then return:
642
643 movq %off, %tmp
644 addq $bias, %tmp (if bias != 0)
645 andq %tmp, 7
646 ... base(%rbp, %tmp, shift) ...
647 */
648 tmp = newVRegI(env);
649 roff = iselIntExpr_R(env, off);
650 addInstr(env, mk_iMOVsd_RR(roff, tmp));
651 if (bias != 0) {
652 /* Make sure the bias is sane, in the sense that there are
653 no significant bits above bit 30 in it. */
654 vassert(-10000 < bias && bias < 10000);
655 addInstr(env,
656 AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp));
657 }
658 addInstr(env,
659 AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp));
660 vassert(elemSz == 1 || elemSz == 8);
661 return
662 AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp,
663 elemSz==8 ? 3 : 0);
664 }
665
666
667 /* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */
668 static
set_SSE_rounding_default(ISelEnv * env)669 void set_SSE_rounding_default ( ISelEnv* env )
670 {
671 /* pushq $DEFAULT_MXCSR
672 ldmxcsr 0(%rsp)
673 addq $8, %rsp
674 */
675 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
676 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR)));
677 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
678 add_to_rsp(env, 8);
679 }
680
681 /* Mess with the FPU's rounding mode: set to the default rounding mode
682 (DEFAULT_FPUCW). */
683 static
set_FPU_rounding_default(ISelEnv * env)684 void set_FPU_rounding_default ( ISelEnv* env )
685 {
686 /* movq $DEFAULT_FPUCW, -8(%rsp)
687 fldcw -8(%esp)
688 */
689 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
690 addInstr(env, AMD64Instr_Alu64M(
691 Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp));
692 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
693 }
694
695
696 /* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
697 expression denoting a value in the range 0 .. 3, indicating a round
698 mode encoded as per type IRRoundingMode. Set the SSE machinery to
699 have the same rounding.
700 */
701 static
set_SSE_rounding_mode(ISelEnv * env,IRExpr * mode)702 void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode )
703 {
704 /* Note: this sequence only makes sense because DEFAULT_MXCSR has
705 both rounding bits == 0. If that wasn't the case, we couldn't
706 create a new rounding field simply by ORing the new value into
707 place. */
708
709 /* movq $3, %reg
710 andq [[mode]], %reg -- shouldn't be needed; paranoia
711 shlq $13, %reg
712 orq $DEFAULT_MXCSR, %reg
713 pushq %reg
714 ldmxcsr 0(%esp)
715 addq $8, %rsp
716 */
717 HReg reg = newVRegI(env);
718 AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
719 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg));
720 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
721 iselIntExpr_RMI(env, mode), reg));
722 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, reg));
723 addInstr(env, AMD64Instr_Alu64R(
724 Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg));
725 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg)));
726 addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
727 add_to_rsp(env, 8);
728 }
729
730
731 /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
732 expression denoting a value in the range 0 .. 3, indicating a round
733 mode encoded as per type IRRoundingMode. Set the x87 FPU to have
734 the same rounding.
735 */
736 static
set_FPU_rounding_mode(ISelEnv * env,IRExpr * mode)737 void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
738 {
739 HReg rrm = iselIntExpr_R(env, mode);
740 HReg rrm2 = newVRegI(env);
741 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
742
743 /* movq %rrm, %rrm2
744 andq $3, %rrm2 -- shouldn't be needed; paranoia
745 shlq $10, %rrm2
746 orq $DEFAULT_FPUCW, %rrm2
747 movq %rrm2, -8(%rsp)
748 fldcw -8(%esp)
749 */
750 addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
751 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2));
752 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, rrm2));
753 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
754 AMD64RMI_Imm(DEFAULT_FPUCW), rrm2));
755 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,
756 AMD64RI_Reg(rrm2), m8_rsp));
757 addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
758 }
759
760
761 /* Generate all-zeroes into a new vector register.
762 */
generate_zeroes_V128(ISelEnv * env)763 static HReg generate_zeroes_V128 ( ISelEnv* env )
764 {
765 HReg dst = newVRegV(env);
766 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
767 return dst;
768 }
769
770 /* Generate all-ones into a new vector register.
771 */
generate_ones_V128(ISelEnv * env)772 static HReg generate_ones_V128 ( ISelEnv* env )
773 {
774 HReg dst = newVRegV(env);
775 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, dst, dst));
776 return dst;
777 }
778
779
780 /* Generate !src into a new vector register. Amazing that there isn't
781 a less crappy way to do this.
782 */
do_sse_NotV128(ISelEnv * env,HReg src)783 static HReg do_sse_NotV128 ( ISelEnv* env, HReg src )
784 {
785 HReg dst = generate_ones_V128(env);
786 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst));
787 return dst;
788 }
789
790
791 /* Expand the given byte into a 64-bit word, by cloning each bit
792 8 times. */
bitmask8_to_bytemask64(UShort w8)793 static ULong bitmask8_to_bytemask64 ( UShort w8 )
794 {
795 vassert(w8 == (w8 & 0xFF));
796 ULong w64 = 0;
797 Int i;
798 for (i = 0; i < 8; i++) {
799 if (w8 & (1<<i))
800 w64 |= (0xFFULL << (8 * i));
801 }
802 return w64;
803 }
804
805
806 //.. /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
807 //.. after most non-simple FPU operations (simple = +, -, *, / and
808 //.. sqrt).
809 //..
810 //.. This could be done a lot more efficiently if needed, by loading
811 //.. zero and adding it to the value to be rounded (fldz ; faddp?).
812 //.. */
813 //.. static void roundToF64 ( ISelEnv* env, HReg reg )
814 //.. {
815 //.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
816 //.. sub_from_esp(env, 8);
817 //.. addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
818 //.. addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
819 //.. add_to_esp(env, 8);
820 //.. }
821
822
823 /*---------------------------------------------------------*/
824 /*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
825 /*---------------------------------------------------------*/
826
827 /* Select insns for an integer-typed expression, and add them to the
828 code list. Return a reg holding the result. This reg will be a
829 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
830 want to modify it, ask for a new vreg, copy it in there, and modify
831 the copy. The register allocator will do its best to map both
832 vregs to the same real register, so the copies will often disappear
833 later in the game.
834
835 This should handle expressions of 64, 32, 16 and 8-bit type. All
836 results are returned in a 64-bit register. For 32-, 16- and 8-bit
837 expressions, the upper 32/16/24 bits are arbitrary, so you should
838 mask or sign extend partial values if necessary.
839 */
840
iselIntExpr_R(ISelEnv * env,IRExpr * e)841 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
842 {
843 HReg r = iselIntExpr_R_wrk(env, e);
844 /* sanity checks ... */
845 # if 0
846 vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n");
847 # endif
848 vassert(hregClass(r) == HRcInt64);
849 vassert(hregIsVirtual(r));
850 return r;
851 }
852
853 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_R_wrk(ISelEnv * env,IRExpr * e)854 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
855 {
856 /* Used for unary/binary SIMD64 ops. */
857 HWord fn = 0;
858 Bool second_is_UInt;
859
860 MatchInfo mi;
861 DECLARE_PATTERN(p_1Uto8_64to1);
862 DECLARE_PATTERN(p_LDle8_then_8Uto64);
863 DECLARE_PATTERN(p_LDle16_then_16Uto64);
864
865 IRType ty = typeOfIRExpr(env->type_env,e);
866 vassert(ty == Ity_I32 || Ity_I16 || Ity_I8);
867
868 switch (e->tag) {
869
870 /* --------- TEMP --------- */
871 case Iex_RdTmp: {
872 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
873 }
874
875 /* --------- LOAD --------- */
876 case Iex_Load: {
877 HReg dst = newVRegI(env);
878 AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
879
880 /* We can't handle big-endian loads, nor load-linked. */
881 if (e->Iex.Load.end != Iend_LE)
882 goto irreducible;
883
884 if (ty == Ity_I64) {
885 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
886 AMD64RMI_Mem(amode), dst) );
887 return dst;
888 }
889 if (ty == Ity_I32) {
890 addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst));
891 return dst;
892 }
893 if (ty == Ity_I16) {
894 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
895 return dst;
896 }
897 if (ty == Ity_I8) {
898 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
899 return dst;
900 }
901 break;
902 }
903
904 /* --------- BINARY OP --------- */
905 case Iex_Binop: {
906 AMD64AluOp aluOp;
907 AMD64ShiftOp shOp;
908
909 /* Pattern: Sub64(0,x) */
910 /* and: Sub32(0,x) */
911 if ((e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg1))
912 || (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1))) {
913 HReg dst = newVRegI(env);
914 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
915 addInstr(env, mk_iMOVsd_RR(reg,dst));
916 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
917 return dst;
918 }
919
920 /* Is it an addition or logical style op? */
921 switch (e->Iex.Binop.op) {
922 case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
923 aluOp = Aalu_ADD; break;
924 case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
925 aluOp = Aalu_SUB; break;
926 case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
927 aluOp = Aalu_AND; break;
928 case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
929 aluOp = Aalu_OR; break;
930 case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
931 aluOp = Aalu_XOR; break;
932 case Iop_Mul16: case Iop_Mul32: case Iop_Mul64:
933 aluOp = Aalu_MUL; break;
934 default:
935 aluOp = Aalu_INVALID; break;
936 }
937 /* For commutative ops we assume any literal
938 values are on the second operand. */
939 if (aluOp != Aalu_INVALID) {
940 HReg dst = newVRegI(env);
941 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
942 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
943 addInstr(env, mk_iMOVsd_RR(reg,dst));
944 addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst));
945 return dst;
946 }
947
948 /* Perhaps a shift op? */
949 switch (e->Iex.Binop.op) {
950 case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
951 shOp = Ash_SHL; break;
952 case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
953 shOp = Ash_SHR; break;
954 case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
955 shOp = Ash_SAR; break;
956 default:
957 shOp = Ash_INVALID; break;
958 }
959 if (shOp != Ash_INVALID) {
960 HReg dst = newVRegI(env);
961
962 /* regL = the value to be shifted */
963 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
964 addInstr(env, mk_iMOVsd_RR(regL,dst));
965
966 /* Do any necessary widening for 32/16/8 bit operands */
967 switch (e->Iex.Binop.op) {
968 case Iop_Shr64: case Iop_Shl64: case Iop_Sar64:
969 break;
970 case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
971 break;
972 case Iop_Shr8:
973 addInstr(env, AMD64Instr_Alu64R(
974 Aalu_AND, AMD64RMI_Imm(0xFF), dst));
975 break;
976 case Iop_Shr16:
977 addInstr(env, AMD64Instr_Alu64R(
978 Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
979 break;
980 case Iop_Shr32:
981 addInstr(env, AMD64Instr_MovxLQ(False, dst, dst));
982 break;
983 case Iop_Sar8:
984 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
985 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
986 break;
987 case Iop_Sar16:
988 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst));
989 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
990 break;
991 case Iop_Sar32:
992 addInstr(env, AMD64Instr_MovxLQ(True, dst, dst));
993 break;
994 default:
995 ppIROp(e->Iex.Binop.op);
996 vassert(0);
997 }
998
999 /* Now consider the shift amount. If it's a literal, we
1000 can do a much better job than the general case. */
1001 if (e->Iex.Binop.arg2->tag == Iex_Const) {
1002 /* assert that the IR is well-typed */
1003 Int nshift;
1004 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1005 nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1006 vassert(nshift >= 0);
1007 if (nshift > 0)
1008 /* Can't allow nshift==0 since that means %cl */
1009 addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
1010 } else {
1011 /* General case; we have to force the amount into %cl. */
1012 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1013 addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
1014 addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
1015 }
1016 return dst;
1017 }
1018
1019 /* Deal with 64-bit SIMD binary ops */
1020 second_is_UInt = False;
1021 switch (e->Iex.Binop.op) {
1022 case Iop_Add8x8:
1023 fn = (HWord)h_generic_calc_Add8x8; break;
1024 case Iop_Add16x4:
1025 fn = (HWord)h_generic_calc_Add16x4; break;
1026 case Iop_Add32x2:
1027 fn = (HWord)h_generic_calc_Add32x2; break;
1028
1029 case Iop_Avg8Ux8:
1030 fn = (HWord)h_generic_calc_Avg8Ux8; break;
1031 case Iop_Avg16Ux4:
1032 fn = (HWord)h_generic_calc_Avg16Ux4; break;
1033
1034 case Iop_CmpEQ8x8:
1035 fn = (HWord)h_generic_calc_CmpEQ8x8; break;
1036 case Iop_CmpEQ16x4:
1037 fn = (HWord)h_generic_calc_CmpEQ16x4; break;
1038 case Iop_CmpEQ32x2:
1039 fn = (HWord)h_generic_calc_CmpEQ32x2; break;
1040
1041 case Iop_CmpGT8Sx8:
1042 fn = (HWord)h_generic_calc_CmpGT8Sx8; break;
1043 case Iop_CmpGT16Sx4:
1044 fn = (HWord)h_generic_calc_CmpGT16Sx4; break;
1045 case Iop_CmpGT32Sx2:
1046 fn = (HWord)h_generic_calc_CmpGT32Sx2; break;
1047
1048 case Iop_InterleaveHI8x8:
1049 fn = (HWord)h_generic_calc_InterleaveHI8x8; break;
1050 case Iop_InterleaveLO8x8:
1051 fn = (HWord)h_generic_calc_InterleaveLO8x8; break;
1052 case Iop_InterleaveHI16x4:
1053 fn = (HWord)h_generic_calc_InterleaveHI16x4; break;
1054 case Iop_InterleaveLO16x4:
1055 fn = (HWord)h_generic_calc_InterleaveLO16x4; break;
1056 case Iop_InterleaveHI32x2:
1057 fn = (HWord)h_generic_calc_InterleaveHI32x2; break;
1058 case Iop_InterleaveLO32x2:
1059 fn = (HWord)h_generic_calc_InterleaveLO32x2; break;
1060 case Iop_CatOddLanes16x4:
1061 fn = (HWord)h_generic_calc_CatOddLanes16x4; break;
1062 case Iop_CatEvenLanes16x4:
1063 fn = (HWord)h_generic_calc_CatEvenLanes16x4; break;
1064 case Iop_Perm8x8:
1065 fn = (HWord)h_generic_calc_Perm8x8; break;
1066
1067 case Iop_Max8Ux8:
1068 fn = (HWord)h_generic_calc_Max8Ux8; break;
1069 case Iop_Max16Sx4:
1070 fn = (HWord)h_generic_calc_Max16Sx4; break;
1071 case Iop_Min8Ux8:
1072 fn = (HWord)h_generic_calc_Min8Ux8; break;
1073 case Iop_Min16Sx4:
1074 fn = (HWord)h_generic_calc_Min16Sx4; break;
1075
1076 case Iop_Mul16x4:
1077 fn = (HWord)h_generic_calc_Mul16x4; break;
1078 case Iop_Mul32x2:
1079 fn = (HWord)h_generic_calc_Mul32x2; break;
1080 case Iop_MulHi16Sx4:
1081 fn = (HWord)h_generic_calc_MulHi16Sx4; break;
1082 case Iop_MulHi16Ux4:
1083 fn = (HWord)h_generic_calc_MulHi16Ux4; break;
1084
1085 case Iop_QAdd8Sx8:
1086 fn = (HWord)h_generic_calc_QAdd8Sx8; break;
1087 case Iop_QAdd16Sx4:
1088 fn = (HWord)h_generic_calc_QAdd16Sx4; break;
1089 case Iop_QAdd8Ux8:
1090 fn = (HWord)h_generic_calc_QAdd8Ux8; break;
1091 case Iop_QAdd16Ux4:
1092 fn = (HWord)h_generic_calc_QAdd16Ux4; break;
1093
1094 case Iop_QNarrow32Sx2:
1095 fn = (HWord)h_generic_calc_QNarrow32Sx2; break;
1096 case Iop_QNarrow16Sx4:
1097 fn = (HWord)h_generic_calc_QNarrow16Sx4; break;
1098 case Iop_QNarrow16Ux4:
1099 fn = (HWord)h_generic_calc_QNarrow16Ux4; break;
1100
1101 case Iop_QSub8Sx8:
1102 fn = (HWord)h_generic_calc_QSub8Sx8; break;
1103 case Iop_QSub16Sx4:
1104 fn = (HWord)h_generic_calc_QSub16Sx4; break;
1105 case Iop_QSub8Ux8:
1106 fn = (HWord)h_generic_calc_QSub8Ux8; break;
1107 case Iop_QSub16Ux4:
1108 fn = (HWord)h_generic_calc_QSub16Ux4; break;
1109
1110 case Iop_Sub8x8:
1111 fn = (HWord)h_generic_calc_Sub8x8; break;
1112 case Iop_Sub16x4:
1113 fn = (HWord)h_generic_calc_Sub16x4; break;
1114 case Iop_Sub32x2:
1115 fn = (HWord)h_generic_calc_Sub32x2; break;
1116
1117 case Iop_ShlN32x2:
1118 fn = (HWord)h_generic_calc_ShlN32x2;
1119 second_is_UInt = True;
1120 break;
1121 case Iop_ShlN16x4:
1122 fn = (HWord)h_generic_calc_ShlN16x4;
1123 second_is_UInt = True;
1124 break;
1125 case Iop_ShlN8x8:
1126 fn = (HWord)h_generic_calc_ShlN8x8;
1127 second_is_UInt = True;
1128 break;
1129 case Iop_ShrN32x2:
1130 fn = (HWord)h_generic_calc_ShrN32x2;
1131 second_is_UInt = True;
1132 break;
1133 case Iop_ShrN16x4:
1134 fn = (HWord)h_generic_calc_ShrN16x4;
1135 second_is_UInt = True;
1136 break;
1137 case Iop_SarN32x2:
1138 fn = (HWord)h_generic_calc_SarN32x2;
1139 second_is_UInt = True;
1140 break;
1141 case Iop_SarN16x4:
1142 fn = (HWord)h_generic_calc_SarN16x4;
1143 second_is_UInt = True;
1144 break;
1145 case Iop_SarN8x8:
1146 fn = (HWord)h_generic_calc_SarN8x8;
1147 second_is_UInt = True;
1148 break;
1149
1150 default:
1151 fn = (HWord)0; break;
1152 }
1153 if (fn != (HWord)0) {
1154 /* Note: the following assumes all helpers are of signature
1155 ULong fn ( ULong, ULong ), and they are
1156 not marked as regparm functions.
1157 */
1158 HReg dst = newVRegI(env);
1159 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1160 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1161 if (second_is_UInt)
1162 addInstr(env, AMD64Instr_MovxLQ(False, argR, argR));
1163 addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) );
1164 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) );
1165 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2 ));
1166 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1167 return dst;
1168 }
1169
1170 /* Handle misc other ops. */
1171
1172 if (e->Iex.Binop.op == Iop_Max32U) {
1173 /* This generates a truly rotten piece of code. Just as well
1174 it doesn't happen very often. */
1175 HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1176 HReg src1L = newVRegI(env);
1177 HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
1178 HReg src2L = newVRegI(env);
1179 HReg dst = newVRegI(env);
1180 addInstr(env, mk_iMOVsd_RR(src1,dst));
1181 addInstr(env, mk_iMOVsd_RR(src1,src1L));
1182 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, src1L));
1183 addInstr(env, mk_iMOVsd_RR(src2,src2L));
1184 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, src2L));
1185 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP, AMD64RMI_Reg(src2L), src1L));
1186 addInstr(env, AMD64Instr_CMov64(Acc_B, AMD64RM_Reg(src2), dst));
1187 return dst;
1188 }
1189
1190 if (e->Iex.Binop.op == Iop_DivModS64to32
1191 || e->Iex.Binop.op == Iop_DivModU64to32) {
1192 /* 64 x 32 -> (32(rem),32(div)) division */
1193 /* Get the 64-bit operand into edx:eax, and the other into
1194 any old R/M. */
1195 HReg rax = hregAMD64_RAX();
1196 HReg rdx = hregAMD64_RDX();
1197 HReg dst = newVRegI(env);
1198 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
1199 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
1200 /* Compute the left operand into a reg, and then
1201 put the top half in edx and the bottom in eax. */
1202 HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1203 addInstr(env, mk_iMOVsd_RR(left64, rdx));
1204 addInstr(env, mk_iMOVsd_RR(left64, rax));
1205 addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx));
1206 addInstr(env, AMD64Instr_Div(syned, 4, rmRight));
1207 addInstr(env, AMD64Instr_MovxLQ(False, rdx, rdx));
1208 addInstr(env, AMD64Instr_MovxLQ(False, rax, rax));
1209 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx));
1210 addInstr(env, mk_iMOVsd_RR(rax, dst));
1211 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst));
1212 return dst;
1213 }
1214
1215 if (e->Iex.Binop.op == Iop_32HLto64) {
1216 HReg hi32 = newVRegI(env);
1217 HReg lo32 = newVRegI(env);
1218 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1219 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1220 addInstr(env, mk_iMOVsd_RR(hi32s, hi32));
1221 addInstr(env, mk_iMOVsd_RR(lo32s, lo32));
1222 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32));
1223 addInstr(env, AMD64Instr_MovxLQ(False, lo32, lo32));
1224 addInstr(env, AMD64Instr_Alu64R(
1225 Aalu_OR, AMD64RMI_Reg(lo32), hi32));
1226 return hi32;
1227 }
1228
1229 if (e->Iex.Binop.op == Iop_16HLto32) {
1230 HReg hi16 = newVRegI(env);
1231 HReg lo16 = newVRegI(env);
1232 HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1233 HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1234 addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
1235 addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
1236 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, hi16));
1237 addInstr(env, AMD64Instr_Alu64R(
1238 Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16));
1239 addInstr(env, AMD64Instr_Alu64R(
1240 Aalu_OR, AMD64RMI_Reg(lo16), hi16));
1241 return hi16;
1242 }
1243
1244 if (e->Iex.Binop.op == Iop_8HLto16) {
1245 HReg hi8 = newVRegI(env);
1246 HReg lo8 = newVRegI(env);
1247 HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1248 HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1249 addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
1250 addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
1251 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, hi8));
1252 addInstr(env, AMD64Instr_Alu64R(
1253 Aalu_AND, AMD64RMI_Imm(0xFF), lo8));
1254 addInstr(env, AMD64Instr_Alu64R(
1255 Aalu_OR, AMD64RMI_Reg(lo8), hi8));
1256 return hi8;
1257 }
1258
1259 if (e->Iex.Binop.op == Iop_MullS32
1260 || e->Iex.Binop.op == Iop_MullS16
1261 || e->Iex.Binop.op == Iop_MullS8
1262 || e->Iex.Binop.op == Iop_MullU32
1263 || e->Iex.Binop.op == Iop_MullU16
1264 || e->Iex.Binop.op == Iop_MullU8) {
1265 HReg a32 = newVRegI(env);
1266 HReg b32 = newVRegI(env);
1267 HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1268 HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1269 Int shift = 0;
1270 AMD64ShiftOp shr_op = Ash_SHR;
1271 switch (e->Iex.Binop.op) {
1272 case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break;
1273 case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break;
1274 case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break;
1275 case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break;
1276 case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break;
1277 case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break;
1278 default: vassert(0);
1279 }
1280
1281 addInstr(env, mk_iMOVsd_RR(a32s, a32));
1282 addInstr(env, mk_iMOVsd_RR(b32s, b32));
1283 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, a32));
1284 addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, b32));
1285 addInstr(env, AMD64Instr_Sh64(shr_op, shift, a32));
1286 addInstr(env, AMD64Instr_Sh64(shr_op, shift, b32));
1287 addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32));
1288 return b32;
1289 }
1290
1291 if (e->Iex.Binop.op == Iop_CmpF64) {
1292 HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
1293 HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
1294 HReg dst = newVRegI(env);
1295 addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst));
1296 /* Mask out irrelevant parts of the result so as to conform
1297 to the CmpF64 definition. */
1298 addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst));
1299 return dst;
1300 }
1301
1302 if (e->Iex.Binop.op == Iop_F64toI32S
1303 || e->Iex.Binop.op == Iop_F64toI64S) {
1304 Int szD = e->Iex.Binop.op==Iop_F64toI32S ? 4 : 8;
1305 HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1306 HReg dst = newVRegI(env);
1307 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
1308 addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst ));
1309 set_SSE_rounding_default(env);
1310 return dst;
1311 }
1312
1313 //.. if (e->Iex.Binop.op == Iop_F64toI32 || e->Iex.Binop.op == Iop_F64toI16) {
1314 //.. Int sz = e->Iex.Binop.op == Iop_F64toI16 ? 2 : 4;
1315 //.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
1316 //.. HReg dst = newVRegI(env);
1317 //..
1318 //.. /* Used several times ... */
1319 //.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
1320 //..
1321 //.. /* rf now holds the value to be converted, and rrm holds the
1322 //.. rounding mode value, encoded as per the IRRoundingMode
1323 //.. enum. The first thing to do is set the FPU's rounding
1324 //.. mode accordingly. */
1325 //..
1326 //.. /* Create a space for the format conversion. */
1327 //.. /* subl $4, %esp */
1328 //.. sub_from_esp(env, 4);
1329 //..
1330 //.. /* Set host rounding mode */
1331 //.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
1332 //..
1333 //.. /* gistw/l %rf, 0(%esp) */
1334 //.. addInstr(env, X86Instr_FpLdStI(False/*store*/, sz, rf, zero_esp));
1335 //..
1336 //.. if (sz == 2) {
1337 //.. /* movzwl 0(%esp), %dst */
1338 //.. addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
1339 //.. } else {
1340 //.. /* movl 0(%esp), %dst */
1341 //.. vassert(sz == 4);
1342 //.. addInstr(env, X86Instr_Alu32R(
1343 //.. Xalu_MOV, X86RMI_Mem(zero_esp), dst));
1344 //.. }
1345 //..
1346 //.. /* Restore default FPU rounding. */
1347 //.. set_FPU_rounding_default( env );
1348 //..
1349 //.. /* addl $4, %esp */
1350 //.. add_to_esp(env, 4);
1351 //.. return dst;
1352 //.. }
1353 //..
1354 //.. /* C3210 flags following FPU partial remainder (fprem), both
1355 //.. IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1356 //.. if (e->Iex.Binop.op == Iop_PRemC3210F64
1357 //.. || e->Iex.Binop.op == Iop_PRem1C3210F64) {
1358 //.. HReg junk = newVRegF(env);
1359 //.. HReg dst = newVRegI(env);
1360 //.. HReg srcL = iselDblExpr(env, e->Iex.Binop.arg1);
1361 //.. HReg srcR = iselDblExpr(env, e->Iex.Binop.arg2);
1362 //.. addInstr(env, X86Instr_FpBinary(
1363 //.. e->Iex.Binop.op==Iop_PRemC3210F64
1364 //.. ? Xfp_PREM : Xfp_PREM1,
1365 //.. srcL,srcR,junk
1366 //.. ));
1367 //.. /* The previous pseudo-insn will have left the FPU's C3210
1368 //.. flags set correctly. So bag them. */
1369 //.. addInstr(env, X86Instr_FpStSW_AX());
1370 //.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1371 //.. addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1372 //.. return dst;
1373 //.. }
1374
1375 break;
1376 }
1377
1378 /* --------- UNARY OP --------- */
1379 case Iex_Unop: {
1380
1381 /* 1Uto8(64to1(expr64)) */
1382 {
1383 DEFINE_PATTERN( p_1Uto8_64to1,
1384 unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) );
1385 if (matchIRExpr(&mi,p_1Uto8_64to1,e)) {
1386 IRExpr* expr64 = mi.bindee[0];
1387 HReg dst = newVRegI(env);
1388 HReg src = iselIntExpr_R(env, expr64);
1389 addInstr(env, mk_iMOVsd_RR(src,dst) );
1390 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1391 AMD64RMI_Imm(1), dst));
1392 return dst;
1393 }
1394 }
1395
1396 /* 8Uto64(LDle(expr64)) */
1397 {
1398 DEFINE_PATTERN(p_LDle8_then_8Uto64,
1399 unop(Iop_8Uto64,
1400 IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1401 if (matchIRExpr(&mi,p_LDle8_then_8Uto64,e)) {
1402 HReg dst = newVRegI(env);
1403 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1404 addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
1405 return dst;
1406 }
1407 }
1408
1409 /* 16Uto64(LDle(expr64)) */
1410 {
1411 DEFINE_PATTERN(p_LDle16_then_16Uto64,
1412 unop(Iop_16Uto64,
1413 IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1414 if (matchIRExpr(&mi,p_LDle16_then_16Uto64,e)) {
1415 HReg dst = newVRegI(env);
1416 AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1417 addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
1418 return dst;
1419 }
1420 }
1421
1422 switch (e->Iex.Unop.op) {
1423 case Iop_32Uto64:
1424 case Iop_32Sto64: {
1425 HReg dst = newVRegI(env);
1426 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1427 addInstr(env, AMD64Instr_MovxLQ(e->Iex.Unop.op == Iop_32Sto64,
1428 src, dst) );
1429 return dst;
1430 }
1431 case Iop_128HIto64: {
1432 HReg rHi, rLo;
1433 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1434 return rHi; /* and abandon rLo */
1435 }
1436 case Iop_128to64: {
1437 HReg rHi, rLo;
1438 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1439 return rLo; /* and abandon rHi */
1440 }
1441 case Iop_8Uto16:
1442 case Iop_8Uto32:
1443 case Iop_8Uto64:
1444 case Iop_16Uto64:
1445 case Iop_16Uto32: {
1446 HReg dst = newVRegI(env);
1447 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1448 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32
1449 || e->Iex.Unop.op==Iop_16Uto64 );
1450 UInt mask = srcIs16 ? 0xFFFF : 0xFF;
1451 addInstr(env, mk_iMOVsd_RR(src,dst) );
1452 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
1453 AMD64RMI_Imm(mask), dst));
1454 return dst;
1455 }
1456 case Iop_8Sto16:
1457 case Iop_8Sto64:
1458 case Iop_8Sto32:
1459 case Iop_16Sto32:
1460 case Iop_16Sto64: {
1461 HReg dst = newVRegI(env);
1462 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1463 Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32
1464 || e->Iex.Unop.op==Iop_16Sto64 );
1465 UInt amt = srcIs16 ? 48 : 56;
1466 addInstr(env, mk_iMOVsd_RR(src,dst) );
1467 addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
1468 addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
1469 return dst;
1470 }
1471 case Iop_Not8:
1472 case Iop_Not16:
1473 case Iop_Not32:
1474 case Iop_Not64: {
1475 HReg dst = newVRegI(env);
1476 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1477 addInstr(env, mk_iMOVsd_RR(src,dst) );
1478 addInstr(env, AMD64Instr_Unary64(Aun_NOT,dst));
1479 return dst;
1480 }
1481 //.. case Iop_64HIto32: {
1482 //.. HReg rHi, rLo;
1483 //.. iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1484 //.. return rHi; /* and abandon rLo .. poor wee thing :-) */
1485 //.. }
1486 //.. case Iop_64to32: {
1487 //.. HReg rHi, rLo;
1488 //.. iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1489 //.. return rLo; /* similar stupid comment to the above ... */
1490 //.. }
1491 case Iop_16HIto8:
1492 case Iop_32HIto16:
1493 case Iop_64HIto32: {
1494 HReg dst = newVRegI(env);
1495 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1496 Int shift = 0;
1497 switch (e->Iex.Unop.op) {
1498 case Iop_16HIto8: shift = 8; break;
1499 case Iop_32HIto16: shift = 16; break;
1500 case Iop_64HIto32: shift = 32; break;
1501 default: vassert(0);
1502 }
1503 addInstr(env, mk_iMOVsd_RR(src,dst) );
1504 addInstr(env, AMD64Instr_Sh64(Ash_SHR, shift, dst));
1505 return dst;
1506 }
1507 case Iop_1Uto64:
1508 case Iop_1Uto32:
1509 case Iop_1Uto8: {
1510 HReg dst = newVRegI(env);
1511 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1512 addInstr(env, AMD64Instr_Set64(cond,dst));
1513 return dst;
1514 }
1515 case Iop_1Sto8:
1516 case Iop_1Sto16:
1517 case Iop_1Sto32:
1518 case Iop_1Sto64: {
1519 /* could do better than this, but for now ... */
1520 HReg dst = newVRegI(env);
1521 AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1522 addInstr(env, AMD64Instr_Set64(cond,dst));
1523 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, dst));
1524 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1525 return dst;
1526 }
1527 case Iop_Ctz64: {
1528 /* Count trailing zeroes, implemented by amd64 'bsfq' */
1529 HReg dst = newVRegI(env);
1530 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1531 addInstr(env, AMD64Instr_Bsfr64(True,src,dst));
1532 return dst;
1533 }
1534 case Iop_Clz64: {
1535 /* Count leading zeroes. Do 'bsrq' to establish the index
1536 of the highest set bit, and subtract that value from
1537 63. */
1538 HReg tmp = newVRegI(env);
1539 HReg dst = newVRegI(env);
1540 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1541 addInstr(env, AMD64Instr_Bsfr64(False,src,tmp));
1542 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
1543 AMD64RMI_Imm(63), dst));
1544 addInstr(env, AMD64Instr_Alu64R(Aalu_SUB,
1545 AMD64RMI_Reg(tmp), dst));
1546 return dst;
1547 }
1548
1549 case Iop_CmpwNEZ64: {
1550 HReg dst = newVRegI(env);
1551 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1552 addInstr(env, mk_iMOVsd_RR(src,dst));
1553 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
1554 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1555 AMD64RMI_Reg(src), dst));
1556 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1557 return dst;
1558 }
1559
1560 case Iop_CmpwNEZ32: {
1561 HReg src = newVRegI(env);
1562 HReg dst = newVRegI(env);
1563 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
1564 addInstr(env, mk_iMOVsd_RR(pre,src));
1565 addInstr(env, AMD64Instr_MovxLQ(False, src, src));
1566 addInstr(env, mk_iMOVsd_RR(src,dst));
1567 addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
1568 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
1569 AMD64RMI_Reg(src), dst));
1570 addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
1571 return dst;
1572 }
1573
1574 case Iop_Left8:
1575 case Iop_Left16:
1576 case Iop_Left32:
1577 case Iop_Left64: {
1578 HReg dst = newVRegI(env);
1579 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1580 addInstr(env, mk_iMOVsd_RR(src, dst));
1581 addInstr(env, AMD64Instr_Unary64(Aun_NEG, dst));
1582 addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(src), dst));
1583 return dst;
1584 }
1585
1586 case Iop_V128to32: {
1587 HReg dst = newVRegI(env);
1588 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1589 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
1590 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16));
1591 addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst));
1592 return dst;
1593 }
1594
1595 /* V128{HI}to64 */
1596 case Iop_V128HIto64:
1597 case Iop_V128to64: {
1598 Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
1599 HReg dst = newVRegI(env);
1600 HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1601 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
1602 AMD64AMode* rspN = AMD64AMode_IR(off, hregAMD64_RSP());
1603 sub_from_rsp(env, 16);
1604 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp0));
1605 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
1606 AMD64RMI_Mem(rspN), dst ));
1607 add_to_rsp(env, 16);
1608 return dst;
1609 }
1610
1611 /* ReinterpF64asI64(e) */
1612 /* Given an IEEE754 double, produce an I64 with the same bit
1613 pattern. */
1614 case Iop_ReinterpF64asI64: {
1615 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1616 HReg dst = newVRegI(env);
1617 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1618 /* paranoia */
1619 set_SSE_rounding_default(env);
1620 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp));
1621 addInstr(env, AMD64Instr_Alu64R(
1622 Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst));
1623 return dst;
1624 }
1625
1626 /* ReinterpF32asI32(e) */
1627 /* Given an IEEE754 single, produce an I64 with the same bit
1628 pattern in the lower half. */
1629 case Iop_ReinterpF32asI32: {
1630 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1631 HReg dst = newVRegI(env);
1632 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1633 /* paranoia */
1634 set_SSE_rounding_default(env);
1635 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, src, m8_rsp));
1636 addInstr(env, AMD64Instr_LoadEX(4, False/*unsigned*/, m8_rsp, dst ));
1637 return dst;
1638 }
1639
1640 case Iop_16to8:
1641 case Iop_32to8:
1642 case Iop_64to8:
1643 case Iop_32to16:
1644 case Iop_64to16:
1645 case Iop_64to32:
1646 /* These are no-ops. */
1647 return iselIntExpr_R(env, e->Iex.Unop.arg);
1648
1649 default:
1650 break;
1651 }
1652
1653 /* Deal with unary 64-bit SIMD ops. */
1654 switch (e->Iex.Unop.op) {
1655 case Iop_CmpNEZ32x2:
1656 fn = (HWord)h_generic_calc_CmpNEZ32x2; break;
1657 case Iop_CmpNEZ16x4:
1658 fn = (HWord)h_generic_calc_CmpNEZ16x4; break;
1659 case Iop_CmpNEZ8x8:
1660 fn = (HWord)h_generic_calc_CmpNEZ8x8; break;
1661 default:
1662 fn = (HWord)0; break;
1663 }
1664 if (fn != (HWord)0) {
1665 /* Note: the following assumes all helpers are of
1666 signature
1667 ULong fn ( ULong ), and they are
1668 not marked as regparm functions.
1669 */
1670 HReg dst = newVRegI(env);
1671 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1672 addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
1673 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1 ));
1674 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1675 return dst;
1676 }
1677
1678 break;
1679 }
1680
1681 /* --------- GET --------- */
1682 case Iex_Get: {
1683 if (ty == Ity_I64) {
1684 HReg dst = newVRegI(env);
1685 addInstr(env, AMD64Instr_Alu64R(
1686 Aalu_MOV,
1687 AMD64RMI_Mem(
1688 AMD64AMode_IR(e->Iex.Get.offset,
1689 hregAMD64_RBP())),
1690 dst));
1691 return dst;
1692 }
1693 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
1694 HReg dst = newVRegI(env);
1695 addInstr(env, AMD64Instr_LoadEX(
1696 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
1697 False,
1698 AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()),
1699 dst));
1700 return dst;
1701 }
1702 break;
1703 }
1704
1705 case Iex_GetI: {
1706 AMD64AMode* am
1707 = genGuestArrayOffset(
1708 env, e->Iex.GetI.descr,
1709 e->Iex.GetI.ix, e->Iex.GetI.bias );
1710 HReg dst = newVRegI(env);
1711 if (ty == Ity_I8) {
1712 addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst ));
1713 return dst;
1714 }
1715 if (ty == Ity_I64) {
1716 addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst ));
1717 return dst;
1718 }
1719 break;
1720 }
1721
1722 /* --------- CCALL --------- */
1723 case Iex_CCall: {
1724 HReg dst = newVRegI(env);
1725 vassert(ty == e->Iex.CCall.retty);
1726
1727 /* be very restrictive for now. Only 64-bit ints allowed
1728 for args, and 64 or 32 bits for return type. */
1729 if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32)
1730 goto irreducible;
1731
1732 /* Marshal args, do the call. */
1733 doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1734
1735 /* Move to dst, and zero out the top 32 bits if the result type is
1736 Ity_I32. Probably overkill, but still .. */
1737 if (e->Iex.CCall.retty == Ity_I64)
1738 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
1739 else
1740 addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
1741
1742 return dst;
1743 }
1744
1745 /* --------- LITERAL --------- */
1746 /* 64/32/16/8-bit literals */
1747 case Iex_Const:
1748 if (ty == Ity_I64) {
1749 HReg r = newVRegI(env);
1750 addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r));
1751 return r;
1752 } else {
1753 AMD64RMI* rmi = iselIntExpr_RMI ( env, e );
1754 HReg r = newVRegI(env);
1755 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r));
1756 return r;
1757 }
1758
1759 /* --------- MULTIPLEX --------- */
1760 case Iex_Mux0X: {
1761 if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
1762 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
1763 HReg r8;
1764 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1765 AMD64RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0);
1766 HReg dst = newVRegI(env);
1767 addInstr(env, mk_iMOVsd_RR(rX,dst));
1768 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
1769 addInstr(env, AMD64Instr_Test64(0xFF, r8));
1770 addInstr(env, AMD64Instr_CMov64(Acc_Z,r0,dst));
1771 return dst;
1772 }
1773 break;
1774 }
1775
1776 /* --------- TERNARY OP --------- */
1777 case Iex_Triop: {
1778 /* C3210 flags following FPU partial remainder (fprem), both
1779 IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1780 if (e->Iex.Triop.op == Iop_PRemC3210F64
1781 || e->Iex.Triop.op == Iop_PRem1C3210F64) {
1782 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
1783 HReg arg1 = iselDblExpr(env, e->Iex.Triop.arg2);
1784 HReg arg2 = iselDblExpr(env, e->Iex.Triop.arg3);
1785 HReg dst = newVRegI(env);
1786 addInstr(env, AMD64Instr_A87Free(2));
1787
1788 /* one arg -> top of x87 stack */
1789 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp));
1790 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
1791
1792 /* other arg -> top of x87 stack */
1793 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp));
1794 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
1795
1796 switch (e->Iex.Triop.op) {
1797 case Iop_PRemC3210F64:
1798 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
1799 break;
1800 case Iop_PRem1C3210F64:
1801 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
1802 break;
1803 default:
1804 vassert(0);
1805 }
1806 /* Ignore the result, and instead make off with the FPU's
1807 C3210 flags (in the status word). */
1808 addInstr(env, AMD64Instr_A87StSW(m8_rsp));
1809 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),dst));
1810 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),dst));
1811 return dst;
1812 }
1813 break;
1814 }
1815
1816 default:
1817 break;
1818 } /* switch (e->tag) */
1819
1820 /* We get here if no pattern matched. */
1821 irreducible:
1822 ppIRExpr(e);
1823 vpanic("iselIntExpr_R(amd64): cannot reduce tree");
1824 }
1825
1826
1827 /*---------------------------------------------------------*/
1828 /*--- ISEL: Integer expression auxiliaries ---*/
1829 /*---------------------------------------------------------*/
1830
1831 /* --------------------- AMODEs --------------------- */
1832
1833 /* Return an AMode which computes the value of the specified
1834 expression, possibly also adding insns to the code list as a
1835 result. The expression may only be a 32-bit one.
1836 */
1837
iselIntExpr_AMode(ISelEnv * env,IRExpr * e)1838 static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
1839 {
1840 AMD64AMode* am = iselIntExpr_AMode_wrk(env, e);
1841 vassert(sane_AMode(am));
1842 return am;
1843 }
1844
1845 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_AMode_wrk(ISelEnv * env,IRExpr * e)1846 static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
1847 {
1848 MatchInfo mi;
1849 DECLARE_PATTERN(p_complex);
1850 IRType ty = typeOfIRExpr(env->type_env,e);
1851 vassert(ty == Ity_I64);
1852
1853 /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */
1854 /* bind0 bind1 bind2 bind3 */
1855 DEFINE_PATTERN(p_complex,
1856 binop( Iop_Add64,
1857 binop( Iop_Add64,
1858 bind(0),
1859 binop(Iop_Shl64, bind(1), bind(2))
1860 ),
1861 bind(3)
1862 )
1863 );
1864 if (matchIRExpr(&mi, p_complex, e)) {
1865 IRExpr* expr1 = mi.bindee[0];
1866 IRExpr* expr2 = mi.bindee[1];
1867 IRExpr* imm8 = mi.bindee[2];
1868 IRExpr* simm32 = mi.bindee[3];
1869 if (imm8->tag == Iex_Const
1870 && imm8->Iex.Const.con->tag == Ico_U8
1871 && imm8->Iex.Const.con->Ico.U8 < 4
1872 /* imm8 is OK, now check simm32 */
1873 && simm32->tag == Iex_Const
1874 && simm32->Iex.Const.con->tag == Ico_U64
1875 && fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) {
1876 UInt shift = imm8->Iex.Const.con->Ico.U8;
1877 UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64);
1878 HReg r1 = iselIntExpr_R(env, expr1);
1879 HReg r2 = iselIntExpr_R(env, expr2);
1880 vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3);
1881 return AMD64AMode_IRRS(offset, r1, r2, shift);
1882 }
1883 }
1884
1885 /* Add64(expr1, Shl64(expr2, imm)) */
1886 if (e->tag == Iex_Binop
1887 && e->Iex.Binop.op == Iop_Add64
1888 && e->Iex.Binop.arg2->tag == Iex_Binop
1889 && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64
1890 && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
1891 && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
1892 UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1893 if (shift == 1 || shift == 2 || shift == 3) {
1894 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1895 HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
1896 return AMD64AMode_IRRS(0, r1, r2, shift);
1897 }
1898 }
1899
1900 /* Add64(expr,i) */
1901 if (e->tag == Iex_Binop
1902 && e->Iex.Binop.op == Iop_Add64
1903 && e->Iex.Binop.arg2->tag == Iex_Const
1904 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
1905 && fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) {
1906 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
1907 return AMD64AMode_IR(
1908 toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64),
1909 r1
1910 );
1911 }
1912
1913 /* Doesn't match anything in particular. Generate it into
1914 a register and use that. */
1915 {
1916 HReg r1 = iselIntExpr_R(env, e);
1917 return AMD64AMode_IR(0, r1);
1918 }
1919 }
1920
1921
1922 /* --------------------- RMIs --------------------- */
1923
1924 /* Similarly, calculate an expression into an X86RMI operand. As with
1925 iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
1926
iselIntExpr_RMI(ISelEnv * env,IRExpr * e)1927 static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
1928 {
1929 AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e);
1930 /* sanity checks ... */
1931 switch (rmi->tag) {
1932 case Armi_Imm:
1933 return rmi;
1934 case Armi_Reg:
1935 vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64);
1936 vassert(hregIsVirtual(rmi->Armi.Reg.reg));
1937 return rmi;
1938 case Armi_Mem:
1939 vassert(sane_AMode(rmi->Armi.Mem.am));
1940 return rmi;
1941 default:
1942 vpanic("iselIntExpr_RMI: unknown amd64 RMI tag");
1943 }
1944 }
1945
1946 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RMI_wrk(ISelEnv * env,IRExpr * e)1947 static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
1948 {
1949 IRType ty = typeOfIRExpr(env->type_env,e);
1950 vassert(ty == Ity_I64 || ty == Ity_I32
1951 || ty == Ity_I16 || ty == Ity_I8);
1952
1953 /* special case: immediate 64/32/16/8 */
1954 if (e->tag == Iex_Const) {
1955 switch (e->Iex.Const.con->tag) {
1956 case Ico_U64:
1957 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
1958 return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
1959 }
1960 break;
1961 case Ico_U32:
1962 return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break;
1963 case Ico_U16:
1964 return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break;
1965 case Ico_U8:
1966 return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break;
1967 default:
1968 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
1969 }
1970 }
1971
1972 /* special case: 64-bit GET */
1973 if (e->tag == Iex_Get && ty == Ity_I64) {
1974 return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset,
1975 hregAMD64_RBP()));
1976 }
1977
1978 /* special case: 64-bit load from memory */
1979 if (e->tag == Iex_Load && ty == Ity_I64
1980 && e->Iex.Load.end == Iend_LE) {
1981 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
1982 return AMD64RMI_Mem(am);
1983 }
1984
1985 /* default case: calculate into a register and return that */
1986 {
1987 HReg r = iselIntExpr_R ( env, e );
1988 return AMD64RMI_Reg(r);
1989 }
1990 }
1991
1992
1993 /* --------------------- RIs --------------------- */
1994
1995 /* Calculate an expression into an AMD64RI operand. As with
1996 iselIntExpr_R, the expression can have type 64, 32, 16 or 8
1997 bits. */
1998
iselIntExpr_RI(ISelEnv * env,IRExpr * e)1999 static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
2000 {
2001 AMD64RI* ri = iselIntExpr_RI_wrk(env, e);
2002 /* sanity checks ... */
2003 switch (ri->tag) {
2004 case Ari_Imm:
2005 return ri;
2006 case Ari_Reg:
2007 vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64);
2008 vassert(hregIsVirtual(ri->Ari.Reg.reg));
2009 return ri;
2010 default:
2011 vpanic("iselIntExpr_RI: unknown amd64 RI tag");
2012 }
2013 }
2014
2015 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI_wrk(ISelEnv * env,IRExpr * e)2016 static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
2017 {
2018 IRType ty = typeOfIRExpr(env->type_env,e);
2019 vassert(ty == Ity_I64 || ty == Ity_I32
2020 || ty == Ity_I16 || ty == Ity_I8);
2021
2022 /* special case: immediate */
2023 if (e->tag == Iex_Const) {
2024 switch (e->Iex.Const.con->tag) {
2025 case Ico_U64:
2026 if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
2027 return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
2028 }
2029 break;
2030 case Ico_U32:
2031 return AMD64RI_Imm(e->Iex.Const.con->Ico.U32);
2032 case Ico_U16:
2033 return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16);
2034 case Ico_U8:
2035 return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8);
2036 default:
2037 vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
2038 }
2039 }
2040
2041 /* default case: calculate into a register and return that */
2042 {
2043 HReg r = iselIntExpr_R ( env, e );
2044 return AMD64RI_Reg(r);
2045 }
2046 }
2047
2048
2049 /* --------------------- RMs --------------------- */
2050
2051 /* Similarly, calculate an expression into an AMD64RM operand. As
2052 with iselIntExpr_R, the expression can have type 64, 32, 16 or 8
2053 bits. */
2054
iselIntExpr_RM(ISelEnv * env,IRExpr * e)2055 static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
2056 {
2057 AMD64RM* rm = iselIntExpr_RM_wrk(env, e);
2058 /* sanity checks ... */
2059 switch (rm->tag) {
2060 case Arm_Reg:
2061 vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64);
2062 vassert(hregIsVirtual(rm->Arm.Reg.reg));
2063 return rm;
2064 case Arm_Mem:
2065 vassert(sane_AMode(rm->Arm.Mem.am));
2066 return rm;
2067 default:
2068 vpanic("iselIntExpr_RM: unknown amd64 RM tag");
2069 }
2070 }
2071
2072 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RM_wrk(ISelEnv * env,IRExpr * e)2073 static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
2074 {
2075 IRType ty = typeOfIRExpr(env->type_env,e);
2076 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
2077
2078 /* special case: 64-bit GET */
2079 if (e->tag == Iex_Get && ty == Ity_I64) {
2080 return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset,
2081 hregAMD64_RBP()));
2082 }
2083
2084 /* special case: load from memory */
2085
2086 /* default case: calculate into a register and return that */
2087 {
2088 HReg r = iselIntExpr_R ( env, e );
2089 return AMD64RM_Reg(r);
2090 }
2091 }
2092
2093
2094 /* --------------------- CONDCODE --------------------- */
2095
2096 /* Generate code to evaluated a bit-typed expression, returning the
2097 condition code which would correspond when the expression would
2098 notionally have returned 1. */
2099
iselCondCode(ISelEnv * env,IRExpr * e)2100 static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
2101 {
2102 /* Uh, there's nothing we can sanity check here, unfortunately. */
2103 return iselCondCode_wrk(env,e);
2104 }
2105
2106 /* DO NOT CALL THIS DIRECTLY ! */
iselCondCode_wrk(ISelEnv * env,IRExpr * e)2107 static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
2108 {
2109 MatchInfo mi;
2110
2111 vassert(e);
2112 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
2113
2114 /* var */
2115 if (e->tag == Iex_RdTmp) {
2116 HReg r64 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
2117 HReg dst = newVRegI(env);
2118 addInstr(env, mk_iMOVsd_RR(r64,dst));
2119 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst));
2120 return Acc_NZ;
2121 }
2122
2123 /* Constant 1:Bit */
2124 if (e->tag == Iex_Const) {
2125 HReg r;
2126 vassert(e->Iex.Const.con->tag == Ico_U1);
2127 vassert(e->Iex.Const.con->Ico.U1 == True
2128 || e->Iex.Const.con->Ico.U1 == False);
2129 r = newVRegI(env);
2130 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Imm(0),r));
2131 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,AMD64RMI_Reg(r),r));
2132 return e->Iex.Const.con->Ico.U1 ? Acc_Z : Acc_NZ;
2133 }
2134
2135 /* Not1(...) */
2136 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
2137 /* Generate code for the arg, and negate the test condition */
2138 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
2139 }
2140
2141 /* --- patterns rooted at: 64to1 --- */
2142
2143 /* 64to1 */
2144 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) {
2145 HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
2146 addInstr(env, AMD64Instr_Test64(1,reg));
2147 return Acc_NZ;
2148 }
2149
2150 /* --- patterns rooted at: CmpNEZ8 --- */
2151
2152 /* CmpNEZ8(x) */
2153 if (e->tag == Iex_Unop
2154 && e->Iex.Unop.op == Iop_CmpNEZ8) {
2155 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
2156 addInstr(env, AMD64Instr_Test64(0xFF,r));
2157 return Acc_NZ;
2158 }
2159
2160 /* --- patterns rooted at: CmpNEZ16 --- */
2161
2162 /* CmpNEZ16(x) */
2163 if (e->tag == Iex_Unop
2164 && e->Iex.Unop.op == Iop_CmpNEZ16) {
2165 HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
2166 addInstr(env, AMD64Instr_Test64(0xFFFF,r));
2167 return Acc_NZ;
2168 }
2169
2170 /* --- patterns rooted at: CmpNEZ32 --- */
2171
2172 /* CmpNEZ32(x) */
2173 if (e->tag == Iex_Unop
2174 && e->Iex.Unop.op == Iop_CmpNEZ32) {
2175 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
2176 HReg tmp = newVRegI(env);
2177 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
2178 addInstr(env, AMD64Instr_MovxLQ(False, r1, tmp));
2179 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,tmp));
2180 return Acc_NZ;
2181 }
2182
2183 /* --- patterns rooted at: CmpNEZ64 --- */
2184
2185 /* CmpNEZ64(Or64(x,y)) */
2186 {
2187 DECLARE_PATTERN(p_CmpNEZ64_Or64);
2188 DEFINE_PATTERN(p_CmpNEZ64_Or64,
2189 unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
2190 if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
2191 HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
2192 AMD64RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
2193 HReg tmp = newVRegI(env);
2194 addInstr(env, mk_iMOVsd_RR(r0, tmp));
2195 addInstr(env, AMD64Instr_Alu64R(Aalu_OR,rmi1,tmp));
2196 return Acc_NZ;
2197 }
2198 }
2199
2200 /* CmpNEZ64(x) */
2201 if (e->tag == Iex_Unop
2202 && e->Iex.Unop.op == Iop_CmpNEZ64) {
2203 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
2204 AMD64RMI* rmi2 = AMD64RMI_Imm(0);
2205 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2206 return Acc_NZ;
2207 }
2208
2209 /* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */
2210
2211 /* CmpEQ8 / CmpNE8 */
2212 if (e->tag == Iex_Binop
2213 && (e->Iex.Binop.op == Iop_CmpEQ8
2214 || e->Iex.Binop.op == Iop_CmpNE8
2215 || e->Iex.Binop.op == Iop_CasCmpEQ8
2216 || e->Iex.Binop.op == Iop_CasCmpNE8)) {
2217 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2218 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2219 HReg r = newVRegI(env);
2220 addInstr(env, mk_iMOVsd_RR(r1,r));
2221 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2222 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r));
2223 switch (e->Iex.Binop.op) {
2224 case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
2225 case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
2226 default: vpanic("iselCondCode(amd64): CmpXX8");
2227 }
2228 }
2229
2230 /* CmpEQ16 / CmpNE16 */
2231 if (e->tag == Iex_Binop
2232 && (e->Iex.Binop.op == Iop_CmpEQ16
2233 || e->Iex.Binop.op == Iop_CmpNE16
2234 || e->Iex.Binop.op == Iop_CasCmpEQ16
2235 || e->Iex.Binop.op == Iop_CasCmpNE16)) {
2236 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2237 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2238 HReg r = newVRegI(env);
2239 addInstr(env, mk_iMOVsd_RR(r1,r));
2240 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2241 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r));
2242 switch (e->Iex.Binop.op) {
2243 case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Acc_Z;
2244 case Iop_CmpNE16: case Iop_CasCmpNE16: return Acc_NZ;
2245 default: vpanic("iselCondCode(amd64): CmpXX16");
2246 }
2247 }
2248
2249 /* CmpEQ32 / CmpNE32 */
2250 if (e->tag == Iex_Binop
2251 && (e->Iex.Binop.op == Iop_CmpEQ32
2252 || e->Iex.Binop.op == Iop_CmpNE32
2253 || e->Iex.Binop.op == Iop_CasCmpEQ32
2254 || e->Iex.Binop.op == Iop_CasCmpNE32)) {
2255 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2256 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2257 HReg r = newVRegI(env);
2258 addInstr(env, mk_iMOVsd_RR(r1,r));
2259 addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
2260 addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, r));
2261 switch (e->Iex.Binop.op) {
2262 case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z;
2263 case Iop_CmpNE32: case Iop_CasCmpNE32: return Acc_NZ;
2264 default: vpanic("iselCondCode(amd64): CmpXX32");
2265 }
2266 }
2267
2268 /* Cmp*64*(x,y) */
2269 if (e->tag == Iex_Binop
2270 && (e->Iex.Binop.op == Iop_CmpEQ64
2271 || e->Iex.Binop.op == Iop_CmpNE64
2272 || e->Iex.Binop.op == Iop_CmpLT64S
2273 || e->Iex.Binop.op == Iop_CmpLT64U
2274 || e->Iex.Binop.op == Iop_CmpLE64S
2275 || e->Iex.Binop.op == Iop_CmpLE64U
2276 || e->Iex.Binop.op == Iop_CasCmpEQ64
2277 || e->Iex.Binop.op == Iop_CasCmpNE64)) {
2278 HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
2279 AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
2280 addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
2281 switch (e->Iex.Binop.op) {
2282 case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z;
2283 case Iop_CmpNE64: case Iop_CasCmpNE64: return Acc_NZ;
2284 case Iop_CmpLT64S: return Acc_L;
2285 case Iop_CmpLT64U: return Acc_B;
2286 case Iop_CmpLE64S: return Acc_LE;
2287 case Iop_CmpLE64U: return Acc_BE;
2288 default: vpanic("iselCondCode(amd64): CmpXX64");
2289 }
2290 }
2291
2292 ppIRExpr(e);
2293 vpanic("iselCondCode(amd64)");
2294 }
2295
2296
2297 /*---------------------------------------------------------*/
2298 /*--- ISEL: Integer expressions (128 bit) ---*/
2299 /*---------------------------------------------------------*/
2300
2301 /* Compute a 128-bit value into a register pair, which is returned as
2302 the first two parameters. As with iselIntExpr_R, these may be
2303 either real or virtual regs; in any case they must not be changed
2304 by subsequent code emitted by the caller. */
2305
iselInt128Expr(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)2306 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2307 ISelEnv* env, IRExpr* e )
2308 {
2309 iselInt128Expr_wrk(rHi, rLo, env, e);
2310 # if 0
2311 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2312 # endif
2313 vassert(hregClass(*rHi) == HRcInt64);
2314 vassert(hregIsVirtual(*rHi));
2315 vassert(hregClass(*rLo) == HRcInt64);
2316 vassert(hregIsVirtual(*rLo));
2317 }
2318
2319 /* DO NOT CALL THIS DIRECTLY ! */
iselInt128Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)2320 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2321 ISelEnv* env, IRExpr* e )
2322 {
2323 //.. HWord fn = 0; /* helper fn for most SIMD64 stuff */
2324 vassert(e);
2325 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2326
2327 //.. /* 64-bit literal */
2328 //.. if (e->tag == Iex_Const) {
2329 //.. ULong w64 = e->Iex.Const.con->Ico.U64;
2330 //.. UInt wHi = ((UInt)(w64 >> 32)) & 0xFFFFFFFF;
2331 //.. UInt wLo = ((UInt)w64) & 0xFFFFFFFF;
2332 //.. HReg tLo = newVRegI(env);
2333 //.. HReg tHi = newVRegI(env);
2334 //.. vassert(e->Iex.Const.con->tag == Ico_U64);
2335 //.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
2336 //.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
2337 //.. *rHi = tHi;
2338 //.. *rLo = tLo;
2339 //.. return;
2340 //.. }
2341
2342 /* read 128-bit IRTemp */
2343 if (e->tag == Iex_RdTmp) {
2344 lookupIRTemp128( rHi, rLo, env, e->Iex.RdTmp.tmp);
2345 return;
2346 }
2347
2348 //.. /* 64-bit load */
2349 //.. if (e->tag == Iex_LDle) {
2350 //.. HReg tLo, tHi;
2351 //.. X86AMode *am0, *am4;
2352 //.. vassert(e->Iex.LDle.ty == Ity_I64);
2353 //.. tLo = newVRegI(env);
2354 //.. tHi = newVRegI(env);
2355 //.. am0 = iselIntExpr_AMode(env, e->Iex.LDle.addr);
2356 //.. am4 = advance4(am0);
2357 //.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
2358 //.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2359 //.. *rHi = tHi;
2360 //.. *rLo = tLo;
2361 //.. return;
2362 //.. }
2363 //..
2364 //.. /* 64-bit GET */
2365 //.. if (e->tag == Iex_Get) {
2366 //.. X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
2367 //.. X86AMode* am4 = advance4(am);
2368 //.. HReg tLo = newVRegI(env);
2369 //.. HReg tHi = newVRegI(env);
2370 //.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2371 //.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2372 //.. *rHi = tHi;
2373 //.. *rLo = tLo;
2374 //.. return;
2375 //.. }
2376 //..
2377 //.. /* 64-bit GETI */
2378 //.. if (e->tag == Iex_GetI) {
2379 //.. X86AMode* am
2380 //.. = genGuestArrayOffset( env, e->Iex.GetI.descr,
2381 //.. e->Iex.GetI.ix, e->Iex.GetI.bias );
2382 //.. X86AMode* am4 = advance4(am);
2383 //.. HReg tLo = newVRegI(env);
2384 //.. HReg tHi = newVRegI(env);
2385 //.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
2386 //.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
2387 //.. *rHi = tHi;
2388 //.. *rLo = tLo;
2389 //.. return;
2390 //.. }
2391 //..
2392 //.. /* 64-bit Mux0X */
2393 //.. if (e->tag == Iex_Mux0X) {
2394 //.. HReg e0Lo, e0Hi, eXLo, eXHi, r8;
2395 //.. HReg tLo = newVRegI(env);
2396 //.. HReg tHi = newVRegI(env);
2397 //.. iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
2398 //.. iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX);
2399 //.. addInstr(env, mk_iMOVsd_RR(eXHi, tHi));
2400 //.. addInstr(env, mk_iMOVsd_RR(eXLo, tLo));
2401 //.. r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2402 //.. addInstr(env, X86Instr_Test32(X86RI_Imm(0xFF), X86RM_Reg(r8)));
2403 //.. /* This assumes the first cmov32 doesn't trash the condition
2404 //.. codes, so they are still available for the second cmov32 */
2405 //.. addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi));
2406 //.. addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo));
2407 //.. *rHi = tHi;
2408 //.. *rLo = tLo;
2409 //.. return;
2410 //.. }
2411
2412 /* --------- BINARY ops --------- */
2413 if (e->tag == Iex_Binop) {
2414 switch (e->Iex.Binop.op) {
2415 /* 64 x 64 -> 128 multiply */
2416 case Iop_MullU64:
2417 case Iop_MullS64: {
2418 /* get one operand into %rax, and the other into a R/M.
2419 Need to make an educated guess about which is better in
2420 which. */
2421 HReg tLo = newVRegI(env);
2422 HReg tHi = newVRegI(env);
2423 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2424 AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
2425 HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
2426 addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX()));
2427 addInstr(env, AMD64Instr_MulL(syned, rmLeft));
2428 /* Result is now in RDX:RAX. Tell the caller. */
2429 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2430 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2431 *rHi = tHi;
2432 *rLo = tLo;
2433 return;
2434 }
2435
2436 /* 128 x 64 -> (64(rem),64(div)) division */
2437 case Iop_DivModU128to64:
2438 case Iop_DivModS128to64: {
2439 /* Get the 128-bit operand into rdx:rax, and the other into
2440 any old R/M. */
2441 HReg sHi, sLo;
2442 HReg tLo = newVRegI(env);
2443 HReg tHi = newVRegI(env);
2444 Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS128to64);
2445 AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
2446 iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2447 addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX()));
2448 addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX()));
2449 addInstr(env, AMD64Instr_Div(syned, 8, rmRight));
2450 addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
2451 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
2452 *rHi = tHi;
2453 *rLo = tLo;
2454 return;
2455 }
2456
2457 /* 64HLto128(e1,e2) */
2458 case Iop_64HLto128:
2459 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2460 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2461 return;
2462
2463 //.. /* Or64/And64/Xor64 */
2464 //.. case Iop_Or64:
2465 //.. case Iop_And64:
2466 //.. case Iop_Xor64: {
2467 //.. HReg xLo, xHi, yLo, yHi;
2468 //.. HReg tLo = newVRegI(env);
2469 //.. HReg tHi = newVRegI(env);
2470 //.. X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
2471 //.. : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
2472 //.. : Xalu_XOR;
2473 //.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2474 //.. addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2475 //.. addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2476 //.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2477 //.. addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
2478 //.. addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
2479 //.. *rHi = tHi;
2480 //.. *rLo = tLo;
2481 //.. return;
2482 //.. }
2483 //..
2484 //.. /* Add64/Sub64 */
2485 //.. case Iop_Add64:
2486 //.. case Iop_Sub64: {
2487 //.. HReg xLo, xHi, yLo, yHi;
2488 //.. HReg tLo = newVRegI(env);
2489 //.. HReg tHi = newVRegI(env);
2490 //.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2491 //.. addInstr(env, mk_iMOVsd_RR(xHi, tHi));
2492 //.. addInstr(env, mk_iMOVsd_RR(xLo, tLo));
2493 //.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2494 //.. if (e->Iex.Binop.op==Iop_Add64) {
2495 //.. addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
2496 //.. addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
2497 //.. } else {
2498 //.. addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
2499 //.. addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
2500 //.. }
2501 //.. *rHi = tHi;
2502 //.. *rLo = tLo;
2503 //.. return;
2504 //.. }
2505 //..
2506 //.. /* 32HLto64(e1,e2) */
2507 //.. case Iop_32HLto64:
2508 //.. *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2509 //.. *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2510 //.. return;
2511 //..
2512 //.. /* 64-bit shifts */
2513 //.. case Iop_Shl64: {
2514 //.. /* We use the same ingenious scheme as gcc. Put the value
2515 //.. to be shifted into %hi:%lo, and the shift amount into
2516 //.. %cl. Then (dsts on right, a la ATT syntax):
2517 //..
2518 //.. shldl %cl, %lo, %hi -- make %hi be right for the
2519 //.. -- shift amt %cl % 32
2520 //.. shll %cl, %lo -- make %lo be right for the
2521 //.. -- shift amt %cl % 32
2522 //..
2523 //.. Now, if (shift amount % 64) is in the range 32 .. 63,
2524 //.. we have to do a fixup, which puts the result low half
2525 //.. into the result high half, and zeroes the low half:
2526 //..
2527 //.. testl $32, %ecx
2528 //..
2529 //.. cmovnz %lo, %hi
2530 //.. movl $0, %tmp -- sigh; need yet another reg
2531 //.. cmovnz %tmp, %lo
2532 //.. */
2533 //.. HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2534 //.. tLo = newVRegI(env);
2535 //.. tHi = newVRegI(env);
2536 //.. tTemp = newVRegI(env);
2537 //.. rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2538 //.. iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2539 //.. addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2540 //.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2541 //.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2542 //.. /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2543 //.. and those regs are legitimately modifiable. */
2544 //.. addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
2545 //.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, X86RM_Reg(tLo)));
2546 //.. addInstr(env, X86Instr_Test32(X86RI_Imm(32),
2547 //.. X86RM_Reg(hregX86_ECX())));
2548 //.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
2549 //.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2550 //.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
2551 //.. *rHi = tHi;
2552 //.. *rLo = tLo;
2553 //.. return;
2554 //.. }
2555 //..
2556 //.. case Iop_Shr64: {
2557 //.. /* We use the same ingenious scheme as gcc. Put the value
2558 //.. to be shifted into %hi:%lo, and the shift amount into
2559 //.. %cl. Then:
2560 //..
2561 //.. shrdl %cl, %hi, %lo -- make %lo be right for the
2562 //.. -- shift amt %cl % 32
2563 //.. shrl %cl, %hi -- make %hi be right for the
2564 //.. -- shift amt %cl % 32
2565 //..
2566 //.. Now, if (shift amount % 64) is in the range 32 .. 63,
2567 //.. we have to do a fixup, which puts the result high half
2568 //.. into the result low half, and zeroes the high half:
2569 //..
2570 //.. testl $32, %ecx
2571 //..
2572 //.. cmovnz %hi, %lo
2573 //.. movl $0, %tmp -- sigh; need yet another reg
2574 //.. cmovnz %tmp, %hi
2575 //.. */
2576 //.. HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
2577 //.. tLo = newVRegI(env);
2578 //.. tHi = newVRegI(env);
2579 //.. tTemp = newVRegI(env);
2580 //.. rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
2581 //.. iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
2582 //.. addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
2583 //.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2584 //.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2585 //.. /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
2586 //.. and those regs are legitimately modifiable. */
2587 //.. addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
2588 //.. addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, X86RM_Reg(tHi)));
2589 //.. addInstr(env, X86Instr_Test32(X86RI_Imm(32),
2590 //.. X86RM_Reg(hregX86_ECX())));
2591 //.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
2592 //.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
2593 //.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
2594 //.. *rHi = tHi;
2595 //.. *rLo = tLo;
2596 //.. return;
2597 //.. }
2598 //..
2599 //.. /* F64 -> I64 */
2600 //.. /* Sigh, this is an almost exact copy of the F64 -> I32/I16
2601 //.. case. Unfortunately I see no easy way to avoid the
2602 //.. duplication. */
2603 //.. case Iop_F64toI64: {
2604 //.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
2605 //.. HReg tLo = newVRegI(env);
2606 //.. HReg tHi = newVRegI(env);
2607 //..
2608 //.. /* Used several times ... */
2609 //.. /* Careful ... this sharing is only safe because
2610 //.. zero_esp/four_esp do not hold any registers which the
2611 //.. register allocator could attempt to swizzle later. */
2612 //.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
2613 //.. X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
2614 //..
2615 //.. /* rf now holds the value to be converted, and rrm holds
2616 //.. the rounding mode value, encoded as per the
2617 //.. IRRoundingMode enum. The first thing to do is set the
2618 //.. FPU's rounding mode accordingly. */
2619 //..
2620 //.. /* Create a space for the format conversion. */
2621 //.. /* subl $8, %esp */
2622 //.. sub_from_esp(env, 8);
2623 //..
2624 //.. /* Set host rounding mode */
2625 //.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2626 //..
2627 //.. /* gistll %rf, 0(%esp) */
2628 //.. addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
2629 //..
2630 //.. /* movl 0(%esp), %dstLo */
2631 //.. /* movl 4(%esp), %dstHi */
2632 //.. addInstr(env, X86Instr_Alu32R(
2633 //.. Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
2634 //.. addInstr(env, X86Instr_Alu32R(
2635 //.. Xalu_MOV, X86RMI_Mem(four_esp), tHi));
2636 //..
2637 //.. /* Restore default FPU rounding. */
2638 //.. set_FPU_rounding_default( env );
2639 //..
2640 //.. /* addl $8, %esp */
2641 //.. add_to_esp(env, 8);
2642 //..
2643 //.. *rHi = tHi;
2644 //.. *rLo = tLo;
2645 //.. return;
2646 //.. }
2647 //..
2648 default:
2649 break;
2650 }
2651 } /* if (e->tag == Iex_Binop) */
2652
2653
2654 //.. /* --------- UNARY ops --------- */
2655 //.. if (e->tag == Iex_Unop) {
2656 //.. switch (e->Iex.Unop.op) {
2657 //..
2658 //.. /* 32Sto64(e) */
2659 //.. case Iop_32Sto64: {
2660 //.. HReg tLo = newVRegI(env);
2661 //.. HReg tHi = newVRegI(env);
2662 //.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2663 //.. addInstr(env, mk_iMOVsd_RR(src,tHi));
2664 //.. addInstr(env, mk_iMOVsd_RR(src,tLo));
2665 //.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tHi)));
2666 //.. *rHi = tHi;
2667 //.. *rLo = tLo;
2668 //.. return;
2669 //.. }
2670 //..
2671 //.. /* 32Uto64(e) */
2672 //.. case Iop_32Uto64: {
2673 //.. HReg tLo = newVRegI(env);
2674 //.. HReg tHi = newVRegI(env);
2675 //.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2676 //.. addInstr(env, mk_iMOVsd_RR(src,tLo));
2677 //.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
2678 //.. *rHi = tHi;
2679 //.. *rLo = tLo;
2680 //.. return;
2681 //.. }
2682
2683 //.. /* could do better than this, but for now ... */
2684 //.. case Iop_1Sto64: {
2685 //.. HReg tLo = newVRegI(env);
2686 //.. HReg tHi = newVRegI(env);
2687 //.. X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2688 //.. addInstr(env, X86Instr_Set32(cond,tLo));
2689 //.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, X86RM_Reg(tLo)));
2690 //.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tLo)));
2691 //.. addInstr(env, mk_iMOVsd_RR(tLo, tHi));
2692 //.. *rHi = tHi;
2693 //.. *rLo = tLo;
2694 //.. return;
2695 //.. }
2696 //..
2697 //.. /* Not64(e) */
2698 //.. case Iop_Not64: {
2699 //.. HReg tLo = newVRegI(env);
2700 //.. HReg tHi = newVRegI(env);
2701 //.. HReg sHi, sLo;
2702 //.. iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
2703 //.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
2704 //.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
2705 //.. addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tHi)));
2706 //.. addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tLo)));
2707 //.. *rHi = tHi;
2708 //.. *rLo = tLo;
2709 //.. return;
2710 //.. }
2711 //..
2712 //.. default:
2713 //.. break;
2714 //.. }
2715 //.. } /* if (e->tag == Iex_Unop) */
2716 //..
2717 //..
2718 //.. /* --------- CCALL --------- */
2719 //.. if (e->tag == Iex_CCall) {
2720 //.. HReg tLo = newVRegI(env);
2721 //.. HReg tHi = newVRegI(env);
2722 //..
2723 //.. /* Marshal args, do the call, clear stack. */
2724 //.. doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
2725 //..
2726 //.. addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
2727 //.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
2728 //.. *rHi = tHi;
2729 //.. *rLo = tLo;
2730 //.. return;
2731 //.. }
2732
2733 ppIRExpr(e);
2734 vpanic("iselInt128Expr");
2735 }
2736
2737
2738 /*---------------------------------------------------------*/
2739 /*--- ISEL: Floating point expressions (32 bit) ---*/
2740 /*---------------------------------------------------------*/
2741
2742 /* Nothing interesting here; really just wrappers for
2743 64-bit stuff. */
2744
iselFltExpr(ISelEnv * env,IRExpr * e)2745 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
2746 {
2747 HReg r = iselFltExpr_wrk( env, e );
2748 # if 0
2749 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2750 # endif
2751 vassert(hregClass(r) == HRcVec128);
2752 vassert(hregIsVirtual(r));
2753 return r;
2754 }
2755
2756 /* DO NOT CALL THIS DIRECTLY */
iselFltExpr_wrk(ISelEnv * env,IRExpr * e)2757 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
2758 {
2759 IRType ty = typeOfIRExpr(env->type_env,e);
2760 vassert(ty == Ity_F32);
2761
2762 if (e->tag == Iex_RdTmp) {
2763 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2764 }
2765
2766 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2767 AMD64AMode* am;
2768 HReg res = newVRegV(env);
2769 vassert(e->Iex.Load.ty == Ity_F32);
2770 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2771 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, res, am));
2772 return res;
2773 }
2774
2775 if (e->tag == Iex_Binop
2776 && e->Iex.Binop.op == Iop_F64toF32) {
2777 /* Although the result is still held in a standard SSE register,
2778 we need to round it to reflect the loss of accuracy/range
2779 entailed in casting it to a 32-bit float. */
2780 HReg dst = newVRegV(env);
2781 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
2782 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
2783 addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst));
2784 set_SSE_rounding_default( env );
2785 return dst;
2786 }
2787
2788 if (e->tag == Iex_Get) {
2789 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2790 hregAMD64_RBP() );
2791 HReg res = newVRegV(env);
2792 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, res, am ));
2793 return res;
2794 }
2795
2796 if (e->tag == Iex_Unop
2797 && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
2798 /* Given an I32, produce an IEEE754 float with the same bit
2799 pattern. */
2800 HReg dst = newVRegV(env);
2801 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2802 AMD64AMode* m4_rsp = AMD64AMode_IR(-4, hregAMD64_RSP());
2803 addInstr(env, AMD64Instr_Store(4, src, m4_rsp));
2804 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, dst, m4_rsp ));
2805 return dst;
2806 }
2807
2808 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
2809 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2810 HReg arg = iselFltExpr(env, e->Iex.Binop.arg2);
2811 HReg dst = newVRegV(env);
2812
2813 /* rf now holds the value to be rounded. The first thing to do
2814 is set the FPU's rounding mode accordingly. */
2815
2816 /* Set host x87 rounding mode */
2817 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2818
2819 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, arg, m8_rsp));
2820 addInstr(env, AMD64Instr_A87Free(1));
2821 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 4));
2822 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
2823 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 4));
2824 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, dst, m8_rsp));
2825
2826 /* Restore default x87 rounding. */
2827 set_FPU_rounding_default( env );
2828
2829 return dst;
2830 }
2831
2832 ppIRExpr(e);
2833 vpanic("iselFltExpr_wrk");
2834 }
2835
2836
2837 /*---------------------------------------------------------*/
2838 /*--- ISEL: Floating point expressions (64 bit) ---*/
2839 /*---------------------------------------------------------*/
2840
2841 /* Compute a 64-bit floating point value into the lower half of an xmm
2842 register, the identity of which is returned. As with
2843 iselIntExpr_R, the returned reg will be virtual, and it must not be
2844 changed by subsequent code emitted by the caller.
2845 */
2846
2847 /* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
2848
2849 Type S (1 bit) E (11 bits) F (52 bits)
2850 ---- --------- ----------- -----------
2851 signalling NaN u 2047 (max) .0uuuuu---u
2852 (with at least
2853 one 1 bit)
2854 quiet NaN u 2047 (max) .1uuuuu---u
2855
2856 negative infinity 1 2047 (max) .000000---0
2857
2858 positive infinity 0 2047 (max) .000000---0
2859
2860 negative zero 1 0 .000000---0
2861
2862 positive zero 0 0 .000000---0
2863 */
2864
iselDblExpr(ISelEnv * env,IRExpr * e)2865 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2866 {
2867 HReg r = iselDblExpr_wrk( env, e );
2868 # if 0
2869 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2870 # endif
2871 vassert(hregClass(r) == HRcVec128);
2872 vassert(hregIsVirtual(r));
2873 return r;
2874 }
2875
2876 /* DO NOT CALL THIS DIRECTLY */
iselDblExpr_wrk(ISelEnv * env,IRExpr * e)2877 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2878 {
2879 IRType ty = typeOfIRExpr(env->type_env,e);
2880 vassert(e);
2881 vassert(ty == Ity_F64);
2882
2883 if (e->tag == Iex_RdTmp) {
2884 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2885 }
2886
2887 if (e->tag == Iex_Const) {
2888 union { ULong u64; Double f64; } u;
2889 HReg res = newVRegV(env);
2890 HReg tmp = newVRegI(env);
2891 vassert(sizeof(u) == 8);
2892 vassert(sizeof(u.u64) == 8);
2893 vassert(sizeof(u.f64) == 8);
2894
2895 if (e->Iex.Const.con->tag == Ico_F64) {
2896 u.f64 = e->Iex.Const.con->Ico.F64;
2897 }
2898 else if (e->Iex.Const.con->tag == Ico_F64i) {
2899 u.u64 = e->Iex.Const.con->Ico.F64i;
2900 }
2901 else
2902 vpanic("iselDblExpr(amd64): const");
2903
2904 addInstr(env, AMD64Instr_Imm64(u.u64, tmp));
2905 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp)));
2906 addInstr(env, AMD64Instr_SseLdSt(
2907 True/*load*/, 8, res,
2908 AMD64AMode_IR(0, hregAMD64_RSP())
2909 ));
2910 add_to_rsp(env, 8);
2911 return res;
2912 }
2913
2914 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2915 AMD64AMode* am;
2916 HReg res = newVRegV(env);
2917 vassert(e->Iex.Load.ty == Ity_F64);
2918 am = iselIntExpr_AMode(env, e->Iex.Load.addr);
2919 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2920 return res;
2921 }
2922
2923 if (e->tag == Iex_Get) {
2924 AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
2925 hregAMD64_RBP() );
2926 HReg res = newVRegV(env);
2927 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2928 return res;
2929 }
2930
2931 if (e->tag == Iex_GetI) {
2932 AMD64AMode* am
2933 = genGuestArrayOffset(
2934 env, e->Iex.GetI.descr,
2935 e->Iex.GetI.ix, e->Iex.GetI.bias );
2936 HReg res = newVRegV(env);
2937 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
2938 return res;
2939 }
2940
2941 if (e->tag == Iex_Triop) {
2942 AMD64SseOp op = Asse_INVALID;
2943 switch (e->Iex.Triop.op) {
2944 case Iop_AddF64: op = Asse_ADDF; break;
2945 case Iop_SubF64: op = Asse_SUBF; break;
2946 case Iop_MulF64: op = Asse_MULF; break;
2947 case Iop_DivF64: op = Asse_DIVF; break;
2948 default: break;
2949 }
2950 if (op != Asse_INVALID) {
2951 HReg dst = newVRegV(env);
2952 HReg argL = iselDblExpr(env, e->Iex.Triop.arg2);
2953 HReg argR = iselDblExpr(env, e->Iex.Triop.arg3);
2954 addInstr(env, mk_vMOVsd_RR(argL, dst));
2955 /* XXXROUNDINGFIXME */
2956 /* set roundingmode here */
2957 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
2958 return dst;
2959 }
2960 }
2961
2962 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
2963 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2964 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
2965 HReg dst = newVRegV(env);
2966
2967 /* rf now holds the value to be rounded. The first thing to do
2968 is set the FPU's rounding mode accordingly. */
2969
2970 /* Set host x87 rounding mode */
2971 set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
2972
2973 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
2974 addInstr(env, AMD64Instr_A87Free(1));
2975 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
2976 addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
2977 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
2978 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
2979
2980 /* Restore default x87 rounding. */
2981 set_FPU_rounding_default( env );
2982
2983 return dst;
2984 }
2985
2986 if (e->tag == Iex_Triop
2987 && (e->Iex.Triop.op == Iop_ScaleF64
2988 || e->Iex.Triop.op == Iop_AtanF64
2989 || e->Iex.Triop.op == Iop_Yl2xF64
2990 || e->Iex.Triop.op == Iop_Yl2xp1F64
2991 || e->Iex.Triop.op == Iop_PRemF64
2992 || e->Iex.Triop.op == Iop_PRem1F64)
2993 ) {
2994 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
2995 HReg arg1 = iselDblExpr(env, e->Iex.Triop.arg2);
2996 HReg arg2 = iselDblExpr(env, e->Iex.Triop.arg3);
2997 HReg dst = newVRegV(env);
2998 Bool arg2first = toBool(e->Iex.Triop.op == Iop_ScaleF64
2999 || e->Iex.Triop.op == Iop_PRemF64
3000 || e->Iex.Triop.op == Iop_PRem1F64);
3001 addInstr(env, AMD64Instr_A87Free(2));
3002
3003 /* one arg -> top of x87 stack */
3004 addInstr(env, AMD64Instr_SseLdSt(
3005 False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp));
3006 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
3007
3008 /* other arg -> top of x87 stack */
3009 addInstr(env, AMD64Instr_SseLdSt(
3010 False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp));
3011 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
3012
3013 /* do it */
3014 /* XXXROUNDINGFIXME */
3015 /* set roundingmode here */
3016 switch (e->Iex.Triop.op) {
3017 case Iop_ScaleF64:
3018 addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE));
3019 break;
3020 case Iop_AtanF64:
3021 addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN));
3022 break;
3023 case Iop_Yl2xF64:
3024 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X));
3025 break;
3026 case Iop_Yl2xp1F64:
3027 addInstr(env, AMD64Instr_A87FpOp(Afp_YL2XP1));
3028 break;
3029 case Iop_PRemF64:
3030 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
3031 break;
3032 case Iop_PRem1F64:
3033 addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
3034 break;
3035 default:
3036 vassert(0);
3037 }
3038
3039 /* save result */
3040 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
3041 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3042 return dst;
3043 }
3044
3045 if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
3046 HReg dst = newVRegV(env);
3047 HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
3048 set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
3049 addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst ));
3050 set_SSE_rounding_default( env );
3051 return dst;
3052 }
3053
3054 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32StoF64) {
3055 HReg dst = newVRegV(env);
3056 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
3057 set_SSE_rounding_default( env );
3058 addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst ));
3059 return dst;
3060 }
3061
3062 if (e->tag == Iex_Unop
3063 && (e->Iex.Unop.op == Iop_NegF64
3064 || e->Iex.Unop.op == Iop_AbsF64)) {
3065 /* Sigh ... very rough code. Could do much better. */
3066 /* Get the 128-bit literal 00---0 10---0 into a register
3067 and xor/nand it with the value to be negated. */
3068 HReg r1 = newVRegI(env);
3069 HReg dst = newVRegV(env);
3070 HReg tmp = newVRegV(env);
3071 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3072 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3073 addInstr(env, mk_vMOVsd_RR(src,tmp));
3074 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3075 addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 ));
3076 addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
3077 addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
3078
3079 if (e->Iex.Unop.op == Iop_NegF64)
3080 addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
3081 else
3082 addInstr(env, AMD64Instr_SseReRg(Asse_ANDN, tmp, dst));
3083
3084 add_to_rsp(env, 16);
3085 return dst;
3086 }
3087
3088 if (e->tag == Iex_Binop) {
3089 A87FpOp fpop = Afp_INVALID;
3090 switch (e->Iex.Binop.op) {
3091 case Iop_SqrtF64: fpop = Afp_SQRT; break;
3092 case Iop_SinF64: fpop = Afp_SIN; break;
3093 case Iop_CosF64: fpop = Afp_COS; break;
3094 case Iop_TanF64: fpop = Afp_TAN; break;
3095 case Iop_2xm1F64: fpop = Afp_2XM1; break;
3096 default: break;
3097 }
3098 if (fpop != Afp_INVALID) {
3099 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
3100 HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
3101 HReg dst = newVRegV(env);
3102 Int nNeeded = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1;
3103 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
3104 addInstr(env, AMD64Instr_A87Free(nNeeded));
3105 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
3106 /* XXXROUNDINGFIXME */
3107 /* set roundingmode here */
3108 addInstr(env, AMD64Instr_A87FpOp(fpop));
3109 if (e->Iex.Binop.op==Iop_TanF64) {
3110 /* get rid of the extra 1.0 that fptan pushes */
3111 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
3112 }
3113 addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
3114 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3115 return dst;
3116 }
3117 }
3118
3119 if (e->tag == Iex_Unop) {
3120 switch (e->Iex.Unop.op) {
3121 //.. case Iop_I32toF64: {
3122 //.. HReg dst = newVRegF(env);
3123 //.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
3124 //.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
3125 //.. set_FPU_rounding_default(env);
3126 //.. addInstr(env, X86Instr_FpLdStI(
3127 //.. True/*load*/, 4, dst,
3128 //.. X86AMode_IR(0, hregX86_ESP())));
3129 //.. add_to_esp(env, 4);
3130 //.. return dst;
3131 //.. }
3132 case Iop_ReinterpI64asF64: {
3133 /* Given an I64, produce an IEEE754 double with the same
3134 bit pattern. */
3135 AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
3136 HReg dst = newVRegV(env);
3137 AMD64RI* src = iselIntExpr_RI(env, e->Iex.Unop.arg);
3138 /* paranoia */
3139 set_SSE_rounding_default(env);
3140 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, src, m8_rsp));
3141 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
3142 return dst;
3143 }
3144 case Iop_F32toF64: {
3145 HReg f32;
3146 HReg f64 = newVRegV(env);
3147 /* this shouldn't be necessary, but be paranoid ... */
3148 set_SSE_rounding_default(env);
3149 f32 = iselFltExpr(env, e->Iex.Unop.arg);
3150 addInstr(env, AMD64Instr_SseSDSS(False/*S->D*/, f32, f64));
3151 return f64;
3152 }
3153 default:
3154 break;
3155 }
3156 }
3157
3158 /* --------- MULTIPLEX --------- */
3159 if (e->tag == Iex_Mux0X) {
3160 HReg r8, rX, r0, dst;
3161 vassert(ty == Ity_F64);
3162 vassert(typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8);
3163 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
3164 rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
3165 r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
3166 dst = newVRegV(env);
3167 addInstr(env, mk_vMOVsd_RR(rX,dst));
3168 addInstr(env, AMD64Instr_Test64(0xFF, r8));
3169 addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst));
3170 return dst;
3171 }
3172
3173 ppIRExpr(e);
3174 vpanic("iselDblExpr_wrk");
3175 }
3176
3177
3178 /*---------------------------------------------------------*/
3179 /*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
3180 /*---------------------------------------------------------*/
3181
iselVecExpr(ISelEnv * env,IRExpr * e)3182 static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
3183 {
3184 HReg r = iselVecExpr_wrk( env, e );
3185 # if 0
3186 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3187 # endif
3188 vassert(hregClass(r) == HRcVec128);
3189 vassert(hregIsVirtual(r));
3190 return r;
3191 }
3192
3193
3194 /* DO NOT CALL THIS DIRECTLY */
iselVecExpr_wrk(ISelEnv * env,IRExpr * e)3195 static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
3196 {
3197 HWord fn = 0; /* address of helper fn, if required */
3198 Bool arg1isEReg = False;
3199 AMD64SseOp op = Asse_INVALID;
3200 IRType ty = typeOfIRExpr(env->type_env,e);
3201 vassert(e);
3202 vassert(ty == Ity_V128);
3203
3204 if (e->tag == Iex_RdTmp) {
3205 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3206 }
3207
3208 if (e->tag == Iex_Get) {
3209 HReg dst = newVRegV(env);
3210 addInstr(env, AMD64Instr_SseLdSt(
3211 True/*load*/,
3212 16,
3213 dst,
3214 AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP())
3215 )
3216 );
3217 return dst;
3218 }
3219
3220 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3221 HReg dst = newVRegV(env);
3222 AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
3223 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
3224 return dst;
3225 }
3226
3227 if (e->tag == Iex_Const) {
3228 HReg dst = newVRegV(env);
3229 vassert(e->Iex.Const.con->tag == Ico_V128);
3230 switch (e->Iex.Const.con->Ico.V128) {
3231 case 0x0000:
3232 dst = generate_zeroes_V128(env);
3233 break;
3234 case 0xFFFF:
3235 dst = generate_ones_V128(env);
3236 break;
3237 default: {
3238 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3239 /* do push_uimm64 twice, first time for the high-order half. */
3240 push_uimm64(env, bitmask8_to_bytemask64(
3241 (e->Iex.Const.con->Ico.V128 >> 8) & 0xFF
3242 ));
3243 push_uimm64(env, bitmask8_to_bytemask64(
3244 (e->Iex.Const.con->Ico.V128 >> 0) & 0xFF
3245 ));
3246 addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
3247 add_to_rsp(env, 16);
3248 break;
3249 }
3250 }
3251 return dst;
3252 }
3253
3254 if (e->tag == Iex_Unop) {
3255 switch (e->Iex.Unop.op) {
3256
3257 case Iop_NotV128: {
3258 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3259 return do_sse_NotV128(env, arg);
3260 }
3261
3262 case Iop_CmpNEZ64x2: {
3263 /* We can use SSE2 instructions for this. */
3264 /* Ideally, we want to do a 64Ix2 comparison against zero of
3265 the operand. Problem is no such insn exists. Solution
3266 therefore is to do a 32Ix4 comparison instead, and bitwise-
3267 negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
3268 let the not'd result of this initial comparison be a:b:c:d.
3269 What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
3270 pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
3271 giving the required result.
3272
3273 The required selection sequence is 2,3,0,1, which
3274 according to Intel's documentation means the pshufd
3275 literal value is 0xB1, that is,
3276 (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
3277 */
3278 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3279 HReg tmp = generate_zeroes_V128(env);
3280 HReg dst = newVRegV(env);
3281 addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, arg, tmp));
3282 tmp = do_sse_NotV128(env, tmp);
3283 addInstr(env, AMD64Instr_SseShuf(0xB1, tmp, dst));
3284 addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmp, dst));
3285 return dst;
3286 }
3287
3288 case Iop_CmpNEZ32x4: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
3289 case Iop_CmpNEZ16x8: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
3290 case Iop_CmpNEZ8x16: op = Asse_CMPEQ8; goto do_CmpNEZ_vector;
3291 do_CmpNEZ_vector:
3292 {
3293 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3294 HReg tmp = newVRegV(env);
3295 HReg zero = generate_zeroes_V128(env);
3296 HReg dst;
3297 addInstr(env, mk_vMOVsd_RR(arg, tmp));
3298 addInstr(env, AMD64Instr_SseReRg(op, zero, tmp));
3299 dst = do_sse_NotV128(env, tmp);
3300 return dst;
3301 }
3302
3303 case Iop_Recip32Fx4: op = Asse_RCPF; goto do_32Fx4_unary;
3304 case Iop_RSqrt32Fx4: op = Asse_RSQRTF; goto do_32Fx4_unary;
3305 case Iop_Sqrt32Fx4: op = Asse_SQRTF; goto do_32Fx4_unary;
3306 do_32Fx4_unary:
3307 {
3308 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3309 HReg dst = newVRegV(env);
3310 addInstr(env, AMD64Instr_Sse32Fx4(op, arg, dst));
3311 return dst;
3312 }
3313
3314 //.. case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary;
3315 //.. case Iop_RSqrt64Fx2: op = Asse_RSQRTF; goto do_64Fx2_unary;
3316 case Iop_Sqrt64Fx2: op = Asse_SQRTF; goto do_64Fx2_unary;
3317 do_64Fx2_unary:
3318 {
3319 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3320 HReg dst = newVRegV(env);
3321 addInstr(env, AMD64Instr_Sse64Fx2(op, arg, dst));
3322 return dst;
3323 }
3324
3325 case Iop_Recip32F0x4: op = Asse_RCPF; goto do_32F0x4_unary;
3326 case Iop_RSqrt32F0x4: op = Asse_RSQRTF; goto do_32F0x4_unary;
3327 case Iop_Sqrt32F0x4: op = Asse_SQRTF; goto do_32F0x4_unary;
3328 do_32F0x4_unary:
3329 {
3330 /* A bit subtle. We have to copy the arg to the result
3331 register first, because actually doing the SSE scalar insn
3332 leaves the upper 3/4 of the destination register
3333 unchanged. Whereas the required semantics of these
3334 primops is that the upper 3/4 is simply copied in from the
3335 argument. */
3336 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3337 HReg dst = newVRegV(env);
3338 addInstr(env, mk_vMOVsd_RR(arg, dst));
3339 addInstr(env, AMD64Instr_Sse32FLo(op, arg, dst));
3340 return dst;
3341 }
3342
3343 //.. case Iop_Recip64F0x2: op = Xsse_RCPF; goto do_64F0x2_unary;
3344 //.. case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary;
3345 case Iop_Sqrt64F0x2: op = Asse_SQRTF; goto do_64F0x2_unary;
3346 do_64F0x2_unary:
3347 {
3348 /* A bit subtle. We have to copy the arg to the result
3349 register first, because actually doing the SSE scalar insn
3350 leaves the upper half of the destination register
3351 unchanged. Whereas the required semantics of these
3352 primops is that the upper half is simply copied in from the
3353 argument. */
3354 HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
3355 HReg dst = newVRegV(env);
3356 addInstr(env, mk_vMOVsd_RR(arg, dst));
3357 addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst));
3358 return dst;
3359 }
3360
3361 case Iop_32UtoV128: {
3362 HReg dst = newVRegV(env);
3363 AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP());
3364 AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg);
3365 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32));
3366 addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32));
3367 return dst;
3368 }
3369
3370 case Iop_64UtoV128: {
3371 HReg dst = newVRegV(env);
3372 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3373 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
3374 addInstr(env, AMD64Instr_Push(rmi));
3375 addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0));
3376 add_to_rsp(env, 8);
3377 return dst;
3378 }
3379
3380 default:
3381 break;
3382 } /* switch (e->Iex.Unop.op) */
3383 } /* if (e->tag == Iex_Unop) */
3384
3385 if (e->tag == Iex_Binop) {
3386 switch (e->Iex.Binop.op) {
3387
3388 case Iop_SetV128lo64: {
3389 HReg dst = newVRegV(env);
3390 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3391 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3392 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3393 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3394 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp_m16));
3395 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
3396 return dst;
3397 }
3398
3399 case Iop_SetV128lo32: {
3400 HReg dst = newVRegV(env);
3401 HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
3402 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3403 AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
3404 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
3405 addInstr(env, AMD64Instr_Store(4, srcI, rsp_m16));
3406 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
3407 return dst;
3408 }
3409
3410 case Iop_64HLtoV128: {
3411 AMD64AMode* rsp = AMD64AMode_IR(0, hregAMD64_RSP());
3412 HReg dst = newVRegV(env);
3413 /* do this via the stack (easy, convenient, etc) */
3414 addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg1)));
3415 addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg2)));
3416 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp));
3417 add_to_rsp(env, 16);
3418 return dst;
3419 }
3420
3421 case Iop_CmpEQ32Fx4: op = Asse_CMPEQF; goto do_32Fx4;
3422 case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4;
3423 case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4;
3424 case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4;
3425 case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4;
3426 case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4;
3427 case Iop_Max32Fx4: op = Asse_MAXF; goto do_32Fx4;
3428 case Iop_Min32Fx4: op = Asse_MINF; goto do_32Fx4;
3429 case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4;
3430 case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4;
3431 do_32Fx4:
3432 {
3433 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3434 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3435 HReg dst = newVRegV(env);
3436 addInstr(env, mk_vMOVsd_RR(argL, dst));
3437 addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
3438 return dst;
3439 }
3440
3441 case Iop_CmpEQ64Fx2: op = Asse_CMPEQF; goto do_64Fx2;
3442 case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2;
3443 case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2;
3444 case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2;
3445 case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2;
3446 case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2;
3447 case Iop_Max64Fx2: op = Asse_MAXF; goto do_64Fx2;
3448 case Iop_Min64Fx2: op = Asse_MINF; goto do_64Fx2;
3449 case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2;
3450 case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2;
3451 do_64Fx2:
3452 {
3453 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3454 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3455 HReg dst = newVRegV(env);
3456 addInstr(env, mk_vMOVsd_RR(argL, dst));
3457 addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
3458 return dst;
3459 }
3460
3461 case Iop_CmpEQ32F0x4: op = Asse_CMPEQF; goto do_32F0x4;
3462 case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4;
3463 case Iop_CmpLE32F0x4: op = Asse_CMPLEF; goto do_32F0x4;
3464 case Iop_CmpUN32F0x4: op = Asse_CMPUNF; goto do_32F0x4;
3465 case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4;
3466 case Iop_Div32F0x4: op = Asse_DIVF; goto do_32F0x4;
3467 case Iop_Max32F0x4: op = Asse_MAXF; goto do_32F0x4;
3468 case Iop_Min32F0x4: op = Asse_MINF; goto do_32F0x4;
3469 case Iop_Mul32F0x4: op = Asse_MULF; goto do_32F0x4;
3470 case Iop_Sub32F0x4: op = Asse_SUBF; goto do_32F0x4;
3471 do_32F0x4: {
3472 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3473 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3474 HReg dst = newVRegV(env);
3475 addInstr(env, mk_vMOVsd_RR(argL, dst));
3476 addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst));
3477 return dst;
3478 }
3479
3480 case Iop_CmpEQ64F0x2: op = Asse_CMPEQF; goto do_64F0x2;
3481 case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2;
3482 case Iop_CmpLE64F0x2: op = Asse_CMPLEF; goto do_64F0x2;
3483 case Iop_CmpUN64F0x2: op = Asse_CMPUNF; goto do_64F0x2;
3484 case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2;
3485 case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2;
3486 case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2;
3487 case Iop_Min64F0x2: op = Asse_MINF; goto do_64F0x2;
3488 case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2;
3489 case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2;
3490 do_64F0x2: {
3491 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3492 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3493 HReg dst = newVRegV(env);
3494 addInstr(env, mk_vMOVsd_RR(argL, dst));
3495 addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
3496 return dst;
3497 }
3498
3499 case Iop_QNarrow32Sx4:
3500 op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
3501 case Iop_QNarrow16Sx8:
3502 op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
3503 case Iop_QNarrow16Ux8:
3504 op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
3505
3506 case Iop_InterleaveHI8x16:
3507 op = Asse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
3508 case Iop_InterleaveHI16x8:
3509 op = Asse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
3510 case Iop_InterleaveHI32x4:
3511 op = Asse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
3512 case Iop_InterleaveHI64x2:
3513 op = Asse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
3514
3515 case Iop_InterleaveLO8x16:
3516 op = Asse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
3517 case Iop_InterleaveLO16x8:
3518 op = Asse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
3519 case Iop_InterleaveLO32x4:
3520 op = Asse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
3521 case Iop_InterleaveLO64x2:
3522 op = Asse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
3523
3524 case Iop_AndV128: op = Asse_AND; goto do_SseReRg;
3525 case Iop_OrV128: op = Asse_OR; goto do_SseReRg;
3526 case Iop_XorV128: op = Asse_XOR; goto do_SseReRg;
3527 case Iop_Add8x16: op = Asse_ADD8; goto do_SseReRg;
3528 case Iop_Add16x8: op = Asse_ADD16; goto do_SseReRg;
3529 case Iop_Add32x4: op = Asse_ADD32; goto do_SseReRg;
3530 case Iop_Add64x2: op = Asse_ADD64; goto do_SseReRg;
3531 case Iop_QAdd8Sx16: op = Asse_QADD8S; goto do_SseReRg;
3532 case Iop_QAdd16Sx8: op = Asse_QADD16S; goto do_SseReRg;
3533 case Iop_QAdd8Ux16: op = Asse_QADD8U; goto do_SseReRg;
3534 case Iop_QAdd16Ux8: op = Asse_QADD16U; goto do_SseReRg;
3535 case Iop_Avg8Ux16: op = Asse_AVG8U; goto do_SseReRg;
3536 case Iop_Avg16Ux8: op = Asse_AVG16U; goto do_SseReRg;
3537 case Iop_CmpEQ8x16: op = Asse_CMPEQ8; goto do_SseReRg;
3538 case Iop_CmpEQ16x8: op = Asse_CMPEQ16; goto do_SseReRg;
3539 case Iop_CmpEQ32x4: op = Asse_CMPEQ32; goto do_SseReRg;
3540 case Iop_CmpGT8Sx16: op = Asse_CMPGT8S; goto do_SseReRg;
3541 case Iop_CmpGT16Sx8: op = Asse_CMPGT16S; goto do_SseReRg;
3542 case Iop_CmpGT32Sx4: op = Asse_CMPGT32S; goto do_SseReRg;
3543 case Iop_Max16Sx8: op = Asse_MAX16S; goto do_SseReRg;
3544 case Iop_Max8Ux16: op = Asse_MAX8U; goto do_SseReRg;
3545 case Iop_Min16Sx8: op = Asse_MIN16S; goto do_SseReRg;
3546 case Iop_Min8Ux16: op = Asse_MIN8U; goto do_SseReRg;
3547 case Iop_MulHi16Ux8: op = Asse_MULHI16U; goto do_SseReRg;
3548 case Iop_MulHi16Sx8: op = Asse_MULHI16S; goto do_SseReRg;
3549 case Iop_Mul16x8: op = Asse_MUL16; goto do_SseReRg;
3550 case Iop_Sub8x16: op = Asse_SUB8; goto do_SseReRg;
3551 case Iop_Sub16x8: op = Asse_SUB16; goto do_SseReRg;
3552 case Iop_Sub32x4: op = Asse_SUB32; goto do_SseReRg;
3553 case Iop_Sub64x2: op = Asse_SUB64; goto do_SseReRg;
3554 case Iop_QSub8Sx16: op = Asse_QSUB8S; goto do_SseReRg;
3555 case Iop_QSub16Sx8: op = Asse_QSUB16S; goto do_SseReRg;
3556 case Iop_QSub8Ux16: op = Asse_QSUB8U; goto do_SseReRg;
3557 case Iop_QSub16Ux8: op = Asse_QSUB16U; goto do_SseReRg;
3558 do_SseReRg: {
3559 HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
3560 HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
3561 HReg dst = newVRegV(env);
3562 if (arg1isEReg) {
3563 addInstr(env, mk_vMOVsd_RR(arg2, dst));
3564 addInstr(env, AMD64Instr_SseReRg(op, arg1, dst));
3565 } else {
3566 addInstr(env, mk_vMOVsd_RR(arg1, dst));
3567 addInstr(env, AMD64Instr_SseReRg(op, arg2, dst));
3568 }
3569 return dst;
3570 }
3571
3572 case Iop_ShlN16x8: op = Asse_SHL16; goto do_SseShift;
3573 case Iop_ShlN32x4: op = Asse_SHL32; goto do_SseShift;
3574 case Iop_ShlN64x2: op = Asse_SHL64; goto do_SseShift;
3575 case Iop_SarN16x8: op = Asse_SAR16; goto do_SseShift;
3576 case Iop_SarN32x4: op = Asse_SAR32; goto do_SseShift;
3577 case Iop_ShrN16x8: op = Asse_SHR16; goto do_SseShift;
3578 case Iop_ShrN32x4: op = Asse_SHR32; goto do_SseShift;
3579 case Iop_ShrN64x2: op = Asse_SHR64; goto do_SseShift;
3580 do_SseShift: {
3581 HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
3582 AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
3583 AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
3584 HReg ereg = newVRegV(env);
3585 HReg dst = newVRegV(env);
3586 addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
3587 addInstr(env, AMD64Instr_Push(rmi));
3588 addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
3589 addInstr(env, mk_vMOVsd_RR(greg, dst));
3590 addInstr(env, AMD64Instr_SseReRg(op, ereg, dst));
3591 add_to_rsp(env, 16);
3592 return dst;
3593 }
3594
3595 case Iop_Mul32x4: fn = (HWord)h_generic_calc_Mul32x4;
3596 goto do_SseAssistedBinary;
3597 case Iop_Max32Sx4: fn = (HWord)h_generic_calc_Max32Sx4;
3598 goto do_SseAssistedBinary;
3599 case Iop_Min32Sx4: fn = (HWord)h_generic_calc_Min32Sx4;
3600 goto do_SseAssistedBinary;
3601 case Iop_Max32Ux4: fn = (HWord)h_generic_calc_Max32Ux4;
3602 goto do_SseAssistedBinary;
3603 case Iop_Min32Ux4: fn = (HWord)h_generic_calc_Min32Ux4;
3604 goto do_SseAssistedBinary;
3605 case Iop_Max16Ux8: fn = (HWord)h_generic_calc_Max16Ux8;
3606 goto do_SseAssistedBinary;
3607 case Iop_Min16Ux8: fn = (HWord)h_generic_calc_Min16Ux8;
3608 goto do_SseAssistedBinary;
3609 case Iop_Max8Sx16: fn = (HWord)h_generic_calc_Max8Sx16;
3610 goto do_SseAssistedBinary;
3611 case Iop_Min8Sx16: fn = (HWord)h_generic_calc_Min8Sx16;
3612 goto do_SseAssistedBinary;
3613 case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2;
3614 goto do_SseAssistedBinary;
3615 do_SseAssistedBinary: {
3616 /* RRRufff! RRRufff code is what we're generating here. Oh
3617 well. */
3618 vassert(fn != 0);
3619 HReg dst = newVRegV(env);
3620 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3621 HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
3622 HReg argp = newVRegI(env);
3623 /* subq $112, %rsp -- make a space*/
3624 sub_from_rsp(env, 112);
3625 /* leaq 48(%rsp), %r_argp -- point into it */
3626 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
3627 argp));
3628 /* andq $-16, %r_argp -- 16-align the pointer */
3629 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
3630 AMD64RMI_Imm( ~(UInt)15 ),
3631 argp));
3632 /* Prepare 3 arg regs:
3633 leaq 0(%r_argp), %rdi
3634 leaq 16(%r_argp), %rsi
3635 leaq 32(%r_argp), %rdx
3636 */
3637 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
3638 hregAMD64_RDI()));
3639 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
3640 hregAMD64_RSI()));
3641 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
3642 hregAMD64_RDX()));
3643 /* Store the two args, at (%rsi) and (%rdx):
3644 movupd %argL, 0(%rsi)
3645 movupd %argR, 0(%rdx)
3646 */
3647 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
3648 AMD64AMode_IR(0, hregAMD64_RSI())));
3649 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argR,
3650 AMD64AMode_IR(0, hregAMD64_RDX())));
3651 /* call the helper */
3652 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3 ));
3653 /* fetch the result from memory, using %r_argp, which the
3654 register allocator will keep alive across the call. */
3655 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
3656 AMD64AMode_IR(0, argp)));
3657 /* and finally, clear the space */
3658 add_to_rsp(env, 112);
3659 return dst;
3660 }
3661
3662 case Iop_SarN64x2: fn = (HWord)h_generic_calc_SarN64x2;
3663 goto do_SseAssistedVectorAndScalar;
3664 case Iop_SarN8x16: fn = (HWord)h_generic_calc_SarN8x16;
3665 goto do_SseAssistedVectorAndScalar;
3666 do_SseAssistedVectorAndScalar: {
3667 /* RRRufff! RRRufff code is what we're generating here. Oh
3668 well. */
3669 vassert(fn != 0);
3670 HReg dst = newVRegV(env);
3671 HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
3672 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3673 HReg argp = newVRegI(env);
3674 /* subq $112, %rsp -- make a space*/
3675 sub_from_rsp(env, 112);
3676 /* leaq 48(%rsp), %r_argp -- point into it */
3677 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
3678 argp));
3679 /* andq $-16, %r_argp -- 16-align the pointer */
3680 addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
3681 AMD64RMI_Imm( ~(UInt)15 ),
3682 argp));
3683 /* Prepare 2 vector arg regs:
3684 leaq 0(%r_argp), %rdi
3685 leaq 16(%r_argp), %rsi
3686 */
3687 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
3688 hregAMD64_RDI()));
3689 addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
3690 hregAMD64_RSI()));
3691 /* Store the vector arg, at (%rsi):
3692 movupd %argL, 0(%rsi)
3693 */
3694 addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
3695 AMD64AMode_IR(0, hregAMD64_RSI())));
3696 /* And get the scalar value into rdx */
3697 addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RDX()));
3698
3699 /* call the helper */
3700 addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3 ));
3701 /* fetch the result from memory, using %r_argp, which the
3702 register allocator will keep alive across the call. */
3703 addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
3704 AMD64AMode_IR(0, argp)));
3705 /* and finally, clear the space */
3706 add_to_rsp(env, 112);
3707 return dst;
3708 }
3709
3710 default:
3711 break;
3712 } /* switch (e->Iex.Binop.op) */
3713 } /* if (e->tag == Iex_Binop) */
3714
3715 if (e->tag == Iex_Mux0X) {
3716 HReg r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
3717 HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX);
3718 HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0);
3719 HReg dst = newVRegV(env);
3720 addInstr(env, mk_vMOVsd_RR(rX,dst));
3721 addInstr(env, AMD64Instr_Test64(0xFF, r8));
3722 addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst));
3723 return dst;
3724 }
3725
3726 //vec_fail:
3727 vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n",
3728 LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
3729 ppIRExpr(e);
3730 vpanic("iselVecExpr_wrk");
3731 }
3732
3733
3734 /*---------------------------------------------------------*/
3735 /*--- ISEL: Statements ---*/
3736 /*---------------------------------------------------------*/
3737
iselStmt(ISelEnv * env,IRStmt * stmt)3738 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3739 {
3740 if (vex_traceflags & VEX_TRACE_VCODE) {
3741 vex_printf("\n-- ");
3742 ppIRStmt(stmt);
3743 vex_printf("\n");
3744 }
3745
3746 switch (stmt->tag) {
3747
3748 /* --------- STORE --------- */
3749 case Ist_Store: {
3750 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3751 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3752 IREndness end = stmt->Ist.Store.end;
3753
3754 if (tya != Ity_I64 || end != Iend_LE)
3755 goto stmt_fail;
3756
3757 if (tyd == Ity_I64) {
3758 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3759 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
3760 addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am));
3761 return;
3762 }
3763 if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) {
3764 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3765 HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
3766 addInstr(env, AMD64Instr_Store(
3767 toUChar(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4)),
3768 r,am));
3769 return;
3770 }
3771 if (tyd == Ity_F64) {
3772 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3773 HReg r = iselDblExpr(env, stmt->Ist.Store.data);
3774 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am));
3775 return;
3776 }
3777 if (tyd == Ity_F32) {
3778 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3779 HReg r = iselFltExpr(env, stmt->Ist.Store.data);
3780 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, r, am));
3781 return;
3782 }
3783 if (tyd == Ity_V128) {
3784 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
3785 HReg r = iselVecExpr(env, stmt->Ist.Store.data);
3786 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am));
3787 return;
3788 }
3789 break;
3790 }
3791
3792 /* --------- PUT --------- */
3793 case Ist_Put: {
3794 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3795 if (ty == Ity_I64) {
3796 /* We're going to write to memory, so compute the RHS into an
3797 AMD64RI. */
3798 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
3799 addInstr(env,
3800 AMD64Instr_Alu64M(
3801 Aalu_MOV,
3802 ri,
3803 AMD64AMode_IR(stmt->Ist.Put.offset,
3804 hregAMD64_RBP())
3805 ));
3806 return;
3807 }
3808 if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
3809 HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
3810 addInstr(env, AMD64Instr_Store(
3811 toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
3812 r,
3813 AMD64AMode_IR(stmt->Ist.Put.offset,
3814 hregAMD64_RBP())));
3815 return;
3816 }
3817 if (ty == Ity_V128) {
3818 HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
3819 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset,
3820 hregAMD64_RBP());
3821 addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am));
3822 return;
3823 }
3824 if (ty == Ity_F32) {
3825 HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
3826 AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP());
3827 set_SSE_rounding_default(env); /* paranoia */
3828 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am ));
3829 return;
3830 }
3831 if (ty == Ity_F64) {
3832 HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
3833 AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset,
3834 hregAMD64_RBP() );
3835 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am ));
3836 return;
3837 }
3838 break;
3839 }
3840
3841 /* --------- Indexed PUT --------- */
3842 case Ist_PutI: {
3843 AMD64AMode* am
3844 = genGuestArrayOffset(
3845 env, stmt->Ist.PutI.descr,
3846 stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
3847
3848 IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
3849 if (ty == Ity_F64) {
3850 HReg val = iselDblExpr(env, stmt->Ist.PutI.data);
3851 addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am ));
3852 return;
3853 }
3854 if (ty == Ity_I8) {
3855 HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
3856 addInstr(env, AMD64Instr_Store( 1, r, am ));
3857 return;
3858 }
3859 if (ty == Ity_I64) {
3860 AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.PutI.data);
3861 addInstr(env, AMD64Instr_Alu64M( Aalu_MOV, ri, am ));
3862 return;
3863 }
3864 break;
3865 }
3866
3867 /* --------- TMP --------- */
3868 case Ist_WrTmp: {
3869 IRTemp tmp = stmt->Ist.WrTmp.tmp;
3870 IRType ty = typeOfIRTemp(env->type_env, tmp);
3871
3872 /* optimisation: if stmt->Ist.WrTmp.data is Add64(..,..),
3873 compute it into an AMode and then use LEA. This usually
3874 produces fewer instructions, often because (for memcheck
3875 created IR) we get t = address-expression, (t is later used
3876 twice) and so doing this naturally turns address-expression
3877 back into an AMD64 amode. */
3878 if (ty == Ity_I64
3879 && stmt->Ist.WrTmp.data->tag == Iex_Binop
3880 && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add64) {
3881 AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
3882 HReg dst = lookupIRTemp(env, tmp);
3883 if (am->tag == Aam_IR && am->Aam.IR.imm == 0) {
3884 /* Hmm, iselIntExpr_AMode wimped out and just computed the
3885 value into a register. Just emit a normal reg-reg move
3886 so reg-alloc can coalesce it away in the usual way. */
3887 HReg src = am->Aam.IR.reg;
3888 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst));
3889 } else {
3890 addInstr(env, AMD64Instr_Lea64(am,dst));
3891 }
3892 return;
3893 }
3894
3895 if (ty == Ity_I64 || ty == Ity_I32
3896 || ty == Ity_I16 || ty == Ity_I8) {
3897 AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
3898 HReg dst = lookupIRTemp(env, tmp);
3899 addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst));
3900 return;
3901 }
3902 if (ty == Ity_I128) {
3903 HReg rHi, rLo, dstHi, dstLo;
3904 iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
3905 lookupIRTemp128( &dstHi, &dstLo, env, tmp);
3906 addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
3907 addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
3908 return;
3909 }
3910 if (ty == Ity_I1) {
3911 AMD64CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
3912 HReg dst = lookupIRTemp(env, tmp);
3913 addInstr(env, AMD64Instr_Set64(cond, dst));
3914 return;
3915 }
3916 if (ty == Ity_F64) {
3917 HReg dst = lookupIRTemp(env, tmp);
3918 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
3919 addInstr(env, mk_vMOVsd_RR(src, dst));
3920 return;
3921 }
3922 if (ty == Ity_F32) {
3923 HReg dst = lookupIRTemp(env, tmp);
3924 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
3925 addInstr(env, mk_vMOVsd_RR(src, dst));
3926 return;
3927 }
3928 if (ty == Ity_V128) {
3929 HReg dst = lookupIRTemp(env, tmp);
3930 HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
3931 addInstr(env, mk_vMOVsd_RR(src, dst));
3932 return;
3933 }
3934 break;
3935 }
3936
3937 /* --------- Call to DIRTY helper --------- */
3938 case Ist_Dirty: {
3939 IRType retty;
3940 IRDirty* d = stmt->Ist.Dirty.details;
3941 Bool passBBP = False;
3942
3943 if (d->nFxState == 0)
3944 vassert(!d->needsBBP);
3945
3946 passBBP = toBool(d->nFxState > 0 && d->needsBBP);
3947
3948 /* Marshal args, do the call, clear stack. */
3949 doHelperCall( env, passBBP, d->guard, d->cee, d->args );
3950
3951 /* Now figure out what to do with the returned value, if any. */
3952 if (d->tmp == IRTemp_INVALID)
3953 /* No return value. Nothing to do. */
3954 return;
3955
3956 retty = typeOfIRTemp(env->type_env, d->tmp);
3957 if (retty == Ity_I64 || retty == Ity_I32
3958 || retty == Ity_I16 || retty == Ity_I8) {
3959 /* The returned value is in %rax. Park it in the register
3960 associated with tmp. */
3961 HReg dst = lookupIRTemp(env, d->tmp);
3962 addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) );
3963 return;
3964 }
3965 break;
3966 }
3967
3968 /* --------- MEM FENCE --------- */
3969 case Ist_MBE:
3970 switch (stmt->Ist.MBE.event) {
3971 case Imbe_Fence:
3972 addInstr(env, AMD64Instr_MFence());
3973 return;
3974 default:
3975 break;
3976 }
3977 break;
3978
3979 /* --------- ACAS --------- */
3980 case Ist_CAS:
3981 if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
3982 /* "normal" singleton CAS */
3983 UChar sz;
3984 IRCAS* cas = stmt->Ist.CAS.details;
3985 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
3986 /* get: cas->expd into %rax, and cas->data into %rbx */
3987 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
3988 HReg rData = iselIntExpr_R(env, cas->dataLo);
3989 HReg rExpd = iselIntExpr_R(env, cas->expdLo);
3990 HReg rOld = lookupIRTemp(env, cas->oldLo);
3991 vassert(cas->expdHi == NULL);
3992 vassert(cas->dataHi == NULL);
3993 addInstr(env, mk_iMOVsd_RR(rExpd, rOld));
3994 addInstr(env, mk_iMOVsd_RR(rExpd, hregAMD64_RAX()));
3995 addInstr(env, mk_iMOVsd_RR(rData, hregAMD64_RBX()));
3996 switch (ty) {
3997 case Ity_I64: sz = 8; break;
3998 case Ity_I32: sz = 4; break;
3999 case Ity_I16: sz = 2; break;
4000 case Ity_I8: sz = 1; break;
4001 default: goto unhandled_cas;
4002 }
4003 addInstr(env, AMD64Instr_ACAS(am, sz));
4004 addInstr(env, AMD64Instr_CMov64(
4005 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOld));
4006 return;
4007 } else {
4008 /* double CAS */
4009 UChar sz;
4010 IRCAS* cas = stmt->Ist.CAS.details;
4011 IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
4012 /* only 32-bit and 64-bit allowed in this case */
4013 /* get: cas->expdLo into %rax, and cas->dataLo into %rbx */
4014 /* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */
4015 AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
4016 HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
4017 HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
4018 HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
4019 HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
4020 HReg rOldHi = lookupIRTemp(env, cas->oldHi);
4021 HReg rOldLo = lookupIRTemp(env, cas->oldLo);
4022 switch (ty) {
4023 case Ity_I64:
4024 if (!(env->hwcaps & VEX_HWCAPS_AMD64_CX16))
4025 goto unhandled_cas; /* we'd have to generate
4026 cmpxchg16b, but the host
4027 doesn't support that */
4028 sz = 8;
4029 break;
4030 case Ity_I32:
4031 sz = 4;
4032 break;
4033 default:
4034 goto unhandled_cas;
4035 }
4036 addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
4037 addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
4038 addInstr(env, mk_iMOVsd_RR(rExpdHi, hregAMD64_RDX()));
4039 addInstr(env, mk_iMOVsd_RR(rExpdLo, hregAMD64_RAX()));
4040 addInstr(env, mk_iMOVsd_RR(rDataHi, hregAMD64_RCX()));
4041 addInstr(env, mk_iMOVsd_RR(rDataLo, hregAMD64_RBX()));
4042 addInstr(env, AMD64Instr_DACAS(am, sz));
4043 addInstr(env,
4044 AMD64Instr_CMov64(
4045 Acc_NZ, AMD64RM_Reg(hregAMD64_RDX()), rOldHi));
4046 addInstr(env,
4047 AMD64Instr_CMov64(
4048 Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOldLo));
4049 return;
4050 }
4051 unhandled_cas:
4052 break;
4053
4054 /* --------- INSTR MARK --------- */
4055 /* Doesn't generate any executable code ... */
4056 case Ist_IMark:
4057 return;
4058
4059 /* --------- ABI HINT --------- */
4060 /* These have no meaning (denotation in the IR) and so we ignore
4061 them ... if any actually made it this far. */
4062 case Ist_AbiHint:
4063 return;
4064
4065 /* --------- NO-OP --------- */
4066 case Ist_NoOp:
4067 return;
4068
4069 /* --------- EXIT --------- */
4070 case Ist_Exit: {
4071 AMD64RI* dst;
4072 AMD64CondCode cc;
4073 if (stmt->Ist.Exit.dst->tag != Ico_U64)
4074 vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value");
4075 dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
4076 cc = iselCondCode(env,stmt->Ist.Exit.guard);
4077 addInstr(env, AMD64Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
4078 return;
4079 }
4080
4081 default: break;
4082 }
4083 stmt_fail:
4084 ppIRStmt(stmt);
4085 vpanic("iselStmt(amd64)");
4086 }
4087
4088
4089 /*---------------------------------------------------------*/
4090 /*--- ISEL: Basic block terminators (Nexts) ---*/
4091 /*---------------------------------------------------------*/
4092
iselNext(ISelEnv * env,IRExpr * next,IRJumpKind jk)4093 static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
4094 {
4095 AMD64RI* ri;
4096 if (vex_traceflags & VEX_TRACE_VCODE) {
4097 vex_printf("\n-- goto {");
4098 ppIRJumpKind(jk);
4099 vex_printf("} ");
4100 ppIRExpr(next);
4101 vex_printf("\n");
4102 }
4103 ri = iselIntExpr_RI(env, next);
4104 addInstr(env, AMD64Instr_Goto(jk, Acc_ALWAYS,ri));
4105 }
4106
4107
4108 /*---------------------------------------------------------*/
4109 /*--- Insn selector top-level ---*/
4110 /*---------------------------------------------------------*/
4111
4112 /* Translate an entire SB to amd64 code. */
4113
iselSB_AMD64(IRSB * bb,VexArch arch_host,VexArchInfo * archinfo_host,VexAbiInfo * vbi)4114 HInstrArray* iselSB_AMD64 ( IRSB* bb, VexArch arch_host,
4115 VexArchInfo* archinfo_host,
4116 VexAbiInfo* vbi/*UNUSED*/ )
4117 {
4118 Int i, j;
4119 HReg hreg, hregHI;
4120 ISelEnv* env;
4121 UInt hwcaps_host = archinfo_host->hwcaps;
4122
4123 /* sanity ... */
4124 vassert(arch_host == VexArchAMD64);
4125 vassert(0 == (hwcaps_host
4126 & ~(VEX_HWCAPS_AMD64_SSE3
4127 | VEX_HWCAPS_AMD64_CX16
4128 | VEX_HWCAPS_AMD64_LZCNT)));
4129
4130 /* Make up an initial environment to use. */
4131 env = LibVEX_Alloc(sizeof(ISelEnv));
4132 env->vreg_ctr = 0;
4133
4134 /* Set up output code array. */
4135 env->code = newHInstrArray();
4136
4137 /* Copy BB's type env. */
4138 env->type_env = bb->tyenv;
4139
4140 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
4141 change as we go along. */
4142 env->n_vregmap = bb->tyenv->types_used;
4143 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
4144 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
4145
4146 /* and finally ... */
4147 env->hwcaps = hwcaps_host;
4148
4149 /* For each IR temporary, allocate a suitably-kinded virtual
4150 register. */
4151 j = 0;
4152 for (i = 0; i < env->n_vregmap; i++) {
4153 hregHI = hreg = INVALID_HREG;
4154 switch (bb->tyenv->types[i]) {
4155 case Ity_I1:
4156 case Ity_I8:
4157 case Ity_I16:
4158 case Ity_I32:
4159 case Ity_I64: hreg = mkHReg(j++, HRcInt64, True); break;
4160 case Ity_I128: hreg = mkHReg(j++, HRcInt64, True);
4161 hregHI = mkHReg(j++, HRcInt64, True); break;
4162 case Ity_F32:
4163 case Ity_F64:
4164 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
4165 default: ppIRType(bb->tyenv->types[i]);
4166 vpanic("iselBB(amd64): IRTemp type");
4167 }
4168 env->vregmap[i] = hreg;
4169 env->vregmapHI[i] = hregHI;
4170 }
4171 env->vreg_ctr = j;
4172
4173 /* Ok, finally we can iterate over the statements. */
4174 for (i = 0; i < bb->stmts_used; i++)
4175 if (bb->stmts[i])
4176 iselStmt(env,bb->stmts[i]);
4177
4178 iselNext(env,bb->next,bb->jumpkind);
4179
4180 /* record the number of vregs we used. */
4181 env->code->n_vregs = env->vreg_ctr;
4182 return env->code;
4183 }
4184
4185
4186 /*---------------------------------------------------------------*/
4187 /*--- end host_amd64_isel.c ---*/
4188 /*---------------------------------------------------------------*/
4189