1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm_isel.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2004-2010 OpenWorks LLP
11 info@open-works.net
12
13 NEON support is
14 Copyright (C) 2010-2010 Samsung Electronics
15 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
17
18 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
22
23 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
27
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, write to the Free Software
30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 02110-1301, USA.
32
33 The GNU General Public License is contained in the file COPYING.
34 */
35
36 #include "libvex_basictypes.h"
37 #include "libvex_ir.h"
38 #include "libvex.h"
39 #include "ir_match.h"
40
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "host_generic_regs.h"
44 #include "host_generic_simd64.h" // for 32-bit SIMD helpers
45 #include "host_arm_defs.h"
46
47
48 /*---------------------------------------------------------*/
49 /*--- ARMvfp control word stuff ---*/
50 /*---------------------------------------------------------*/
51
52 /* Vex-generated code expects to run with the FPU set as follows: all
53 exceptions masked, round-to-nearest, non-vector mode, with the NZCV
54 flags cleared, and FZ (flush to zero) disabled. Curiously enough,
55 this corresponds to a FPSCR value of zero.
56
57 fpscr should therefore be zero on entry to Vex-generated code, and
58 should be unchanged at exit. (Or at least the bottom 28 bits
59 should be zero).
60 */
61
62 #define DEFAULT_FPSCR 0
63
64
65 /*---------------------------------------------------------*/
66 /*--- ISelEnv ---*/
67 /*---------------------------------------------------------*/
68
69 /* This carries around:
70
71 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
72 might encounter. This is computed before insn selection starts,
73 and does not change.
74
75 - A mapping from IRTemp to HReg. This tells the insn selector
76 which virtual register(s) are associated with each IRTemp
77 temporary. This is computed before insn selection starts, and
78 does not change. We expect this mapping to map precisely the
79 same set of IRTemps as the type mapping does.
80
81 - vregmap holds the primary register for the IRTemp.
82 - vregmapHI is only used for 64-bit integer-typed
83 IRTemps. It holds the identity of a second
84 32-bit virtual HReg, which holds the high half
85 of the value.
86
87 - The name of the vreg in which we stash a copy of the link reg, so
88 helper functions don't kill it.
89
90 - The code array, that is, the insns selected so far.
91
92 - A counter, for generating new virtual registers.
93
94 - The host hardware capabilities word. This is set at the start
95 and does not change.
96
97 Note, this is all host-independent. */
98
99 typedef
100 struct {
101 IRTypeEnv* type_env;
102
103 HReg* vregmap;
104 HReg* vregmapHI;
105 Int n_vregmap;
106
107 HReg savedLR;
108
109 HInstrArray* code;
110
111 Int vreg_ctr;
112
113 UInt hwcaps;
114 }
115 ISelEnv;
116
lookupIRTemp(ISelEnv * env,IRTemp tmp)117 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
118 {
119 vassert(tmp >= 0);
120 vassert(tmp < env->n_vregmap);
121 return env->vregmap[tmp];
122 }
123
lookupIRTemp64(HReg * vrHI,HReg * vrLO,ISelEnv * env,IRTemp tmp)124 static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
125 {
126 vassert(tmp >= 0);
127 vassert(tmp < env->n_vregmap);
128 vassert(env->vregmapHI[tmp] != INVALID_HREG);
129 *vrLO = env->vregmap[tmp];
130 *vrHI = env->vregmapHI[tmp];
131 }
132
addInstr(ISelEnv * env,ARMInstr * instr)133 static void addInstr ( ISelEnv* env, ARMInstr* instr )
134 {
135 addHInstr(env->code, instr);
136 if (vex_traceflags & VEX_TRACE_VCODE) {
137 ppARMInstr(instr);
138 vex_printf("\n");
139 }
140 #if 0
141 if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary
142 || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS
143 || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) {
144 ppARMInstr(instr);
145 vex_printf("\n");
146 }
147 #endif
148 }
149
newVRegI(ISelEnv * env)150 static HReg newVRegI ( ISelEnv* env )
151 {
152 HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
153 env->vreg_ctr++;
154 return reg;
155 }
156
newVRegD(ISelEnv * env)157 static HReg newVRegD ( ISelEnv* env )
158 {
159 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
160 env->vreg_ctr++;
161 return reg;
162 }
163
newVRegF(ISelEnv * env)164 static HReg newVRegF ( ISelEnv* env )
165 {
166 HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
167 env->vreg_ctr++;
168 return reg;
169 }
170
newVRegV(ISelEnv * env)171 static HReg newVRegV ( ISelEnv* env )
172 {
173 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
174 env->vreg_ctr++;
175 return reg;
176 }
177
178 /* These are duplicated in guest_arm_toIR.c */
unop(IROp op,IRExpr * a)179 static IRExpr* unop ( IROp op, IRExpr* a )
180 {
181 return IRExpr_Unop(op, a);
182 }
183
binop(IROp op,IRExpr * a1,IRExpr * a2)184 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
185 {
186 return IRExpr_Binop(op, a1, a2);
187 }
188
bind(Int binder)189 static IRExpr* bind ( Int binder )
190 {
191 return IRExpr_Binder(binder);
192 }
193
194
195 /*---------------------------------------------------------*/
196 /*--- ISEL: Forward declarations ---*/
197 /*---------------------------------------------------------*/
198
199 /* These are organised as iselXXX and iselXXX_wrk pairs. The
200 iselXXX_wrk do the real work, but are not to be called directly.
201 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
202 checks that all returned registers are virtual. You should not
203 call the _wrk version directly.
204 */
205 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
206 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e );
207
208 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
209 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e );
210
211 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
212 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e );
213
214 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
215 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e );
216
217 static ARMRI84* iselIntExpr_RI84_wrk
218 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
219 static ARMRI84* iselIntExpr_RI84
220 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
221
222 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e );
223 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e );
224
225 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
226 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e );
227
228 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
229 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
230
231 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
232 ISelEnv* env, IRExpr* e );
233 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
234 ISelEnv* env, IRExpr* e );
235
236 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
237 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
238
239 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
240 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
241
242 static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e );
243 static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e );
244
245 static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e );
246 static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e );
247
248 /*---------------------------------------------------------*/
249 /*--- ISEL: Misc helpers ---*/
250 /*---------------------------------------------------------*/
251
ROR32(UInt x,UInt sh)252 static UInt ROR32 ( UInt x, UInt sh ) {
253 vassert(sh >= 0 && sh < 32);
254 if (sh == 0)
255 return x;
256 else
257 return (x << (32-sh)) | (x >> sh);
258 }
259
260 /* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
261 form, and if so return the components. */
fitsIn8x4(UInt * u8,UInt * u4,UInt u)262 static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
263 {
264 UInt i;
265 for (i = 0; i < 16; i++) {
266 if (0 == (u & 0xFFFFFF00)) {
267 *u8 = u;
268 *u4 = i;
269 return True;
270 }
271 u = ROR32(u, 30);
272 }
273 vassert(i == 16);
274 return False;
275 }
276
277 /* Make a int reg-reg move. */
mk_iMOVds_RR(HReg dst,HReg src)278 static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
279 {
280 vassert(hregClass(src) == HRcInt32);
281 vassert(hregClass(dst) == HRcInt32);
282 return ARMInstr_Mov(dst, ARMRI84_R(src));
283 }
284
285 /* Set the VFP unit's rounding mode to default (round to nearest). */
set_VFP_rounding_default(ISelEnv * env)286 static void set_VFP_rounding_default ( ISelEnv* env )
287 {
288 /* mov rTmp, #DEFAULT_FPSCR
289 fmxr fpscr, rTmp
290 */
291 HReg rTmp = newVRegI(env);
292 addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
293 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
294 }
295
296 /* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
297 expression denoting a value in the range 0 .. 3, indicating a round
298 mode encoded as per type IRRoundingMode. Set FPSCR to have the
299 same rounding.
300 */
301 static
set_VFP_rounding_mode(ISelEnv * env,IRExpr * mode)302 void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
303 {
304 /* This isn't simple, because 'mode' carries an IR rounding
305 encoding, and we need to translate that to an ARMvfp one:
306 The IR encoding:
307 00 to nearest (the default)
308 10 to +infinity
309 01 to -infinity
310 11 to zero
311 The ARMvfp encoding:
312 00 to nearest
313 01 to +infinity
314 10 to -infinity
315 11 to zero
316 Easy enough to do; just swap the two bits.
317 */
318 HReg irrm = iselIntExpr_R(env, mode);
319 HReg tL = newVRegI(env);
320 HReg tR = newVRegI(env);
321 HReg t3 = newVRegI(env);
322 /* tL = irrm << 1;
323 tR = irrm >> 1; if we're lucky, these will issue together
324 tL &= 2;
325 tR &= 1; ditto
326 t3 = tL | tR;
327 t3 <<= 22;
328 fmxr fpscr, t3
329 */
330 addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
331 addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
332 addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
333 addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
334 addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
335 addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
336 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
337 }
338
339
340 /*---------------------------------------------------------*/
341 /*--- ISEL: Function call helpers ---*/
342 /*---------------------------------------------------------*/
343
344 /* Used only in doHelperCall. See big comment in doHelperCall re
345 handling of register-parameter args. This function figures out
346 whether evaluation of an expression might require use of a fixed
347 register. If in doubt return True (safe but suboptimal).
348 */
349 static
mightRequireFixedRegs(IRExpr * e)350 Bool mightRequireFixedRegs ( IRExpr* e )
351 {
352 switch (e->tag) {
353 case Iex_RdTmp: case Iex_Const: case Iex_Get:
354 return False;
355 default:
356 return True;
357 }
358 }
359
360
361 /* Do a complete function call. guard is a Ity_Bit expression
362 indicating whether or not the call happens. If guard==NULL, the
363 call is unconditional. Returns True iff it managed to handle this
364 combination of arg/return types, else returns False. */
365
366 static
doHelperCall(ISelEnv * env,Bool passBBP,IRExpr * guard,IRCallee * cee,IRExpr ** args)367 Bool doHelperCall ( ISelEnv* env,
368 Bool passBBP,
369 IRExpr* guard, IRCallee* cee, IRExpr** args )
370 {
371 ARMCondCode cc;
372 HReg argregs[ARM_N_ARGREGS];
373 HReg tmpregs[ARM_N_ARGREGS];
374 Bool go_fast;
375 Int n_args, i, nextArgReg;
376 ULong target;
377
378 vassert(ARM_N_ARGREGS == 4);
379
380 /* Marshal args for a call and do the call.
381
382 If passBBP is True, r8 (the baseblock pointer) is to be passed
383 as the first arg.
384
385 This function only deals with a tiny set of possibilities, which
386 cover all helpers in practice. The restrictions are that only
387 arguments in registers are supported, hence only ARM_N_REGPARMS
388 x 32 integer bits in total can be passed. In fact the only
389 supported arg types are I32 and I64.
390
391 Generating code which is both efficient and correct when
392 parameters are to be passed in registers is difficult, for the
393 reasons elaborated in detail in comments attached to
394 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
395 of the method described in those comments.
396
397 The problem is split into two cases: the fast scheme and the
398 slow scheme. In the fast scheme, arguments are computed
399 directly into the target (real) registers. This is only safe
400 when we can be sure that computation of each argument will not
401 trash any real registers set by computation of any other
402 argument.
403
404 In the slow scheme, all args are first computed into vregs, and
405 once they are all done, they are moved to the relevant real
406 regs. This always gives correct code, but it also gives a bunch
407 of vreg-to-rreg moves which are usually redundant but are hard
408 for the register allocator to get rid of.
409
410 To decide which scheme to use, all argument expressions are
411 first examined. If they are all so simple that it is clear they
412 will be evaluated without use of any fixed registers, use the
413 fast scheme, else use the slow scheme. Note also that only
414 unconditional calls may use the fast scheme, since having to
415 compute a condition expression could itself trash real
416 registers.
417
418 Note this requires being able to examine an expression and
419 determine whether or not evaluation of it might use a fixed
420 register. That requires knowledge of how the rest of this insn
421 selector works. Currently just the following 3 are regarded as
422 safe -- hopefully they cover the majority of arguments in
423 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
424 */
425
426 /* Note that the cee->regparms field is meaningless on ARM hosts
427 (since there is only one calling convention) and so we always
428 ignore it. */
429
430 n_args = 0;
431 for (i = 0; args[i]; i++)
432 n_args++;
433
434 argregs[0] = hregARM_R0();
435 argregs[1] = hregARM_R1();
436 argregs[2] = hregARM_R2();
437 argregs[3] = hregARM_R3();
438
439 tmpregs[0] = tmpregs[1] = tmpregs[2] =
440 tmpregs[3] = INVALID_HREG;
441
442 /* First decide which scheme (slow or fast) is to be used. First
443 assume the fast scheme, and select slow if any contraindications
444 (wow) appear. */
445
446 go_fast = True;
447
448 if (guard) {
449 if (guard->tag == Iex_Const
450 && guard->Iex.Const.con->tag == Ico_U1
451 && guard->Iex.Const.con->Ico.U1 == True) {
452 /* unconditional */
453 } else {
454 /* Not manifestly unconditional -- be conservative. */
455 go_fast = False;
456 }
457 }
458
459 if (go_fast) {
460 for (i = 0; i < n_args; i++) {
461 if (mightRequireFixedRegs(args[i])) {
462 go_fast = False;
463 break;
464 }
465 }
466 }
467 /* At this point the scheme to use has been established. Generate
468 code to get the arg values into the argument rregs. If we run
469 out of arg regs, give up. */
470
471 if (go_fast) {
472
473 /* FAST SCHEME */
474 nextArgReg = 0;
475 if (passBBP) {
476 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
477 hregARM_R8() ));
478 nextArgReg++;
479 }
480
481 for (i = 0; i < n_args; i++) {
482 IRType aTy = typeOfIRExpr(env->type_env, args[i]);
483 if (nextArgReg >= ARM_N_ARGREGS)
484 return False; /* out of argregs */
485 if (aTy == Ity_I32) {
486 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
487 iselIntExpr_R(env, args[i]) ));
488 nextArgReg++;
489 }
490 else if (aTy == Ity_I64) {
491 /* 64-bit args must be passed in an a reg-pair of the form
492 n:n+1, where n is even. Hence either r0:r1 or r2:r3.
493 On a little-endian host, the less significant word is
494 passed in the lower-numbered register. */
495 if (nextArgReg & 1) {
496 if (nextArgReg >= ARM_N_ARGREGS)
497 return False; /* out of argregs */
498 addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
499 nextArgReg++;
500 }
501 if (nextArgReg >= ARM_N_ARGREGS)
502 return False; /* out of argregs */
503 HReg raHi, raLo;
504 iselInt64Expr(&raHi, &raLo, env, args[i]);
505 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
506 nextArgReg++;
507 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
508 nextArgReg++;
509 }
510 else
511 return False; /* unhandled arg type */
512 }
513
514 /* Fast scheme only applies for unconditional calls. Hence: */
515 cc = ARMcc_AL;
516
517 } else {
518
519 /* SLOW SCHEME; move via temporaries */
520 nextArgReg = 0;
521
522 if (passBBP) {
523 /* This is pretty stupid; better to move directly to r0
524 after the rest of the args are done. */
525 tmpregs[nextArgReg] = newVRegI(env);
526 addInstr(env, mk_iMOVds_RR( tmpregs[nextArgReg],
527 hregARM_R8() ));
528 nextArgReg++;
529 }
530
531 for (i = 0; i < n_args; i++) {
532 IRType aTy = typeOfIRExpr(env->type_env, args[i]);
533 if (nextArgReg >= ARM_N_ARGREGS)
534 return False; /* out of argregs */
535 if (aTy == Ity_I32) {
536 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
537 nextArgReg++;
538 }
539 else if (aTy == Ity_I64) {
540 /* Same comment applies as in the Fast-scheme case. */
541 if (nextArgReg & 1)
542 nextArgReg++;
543 if (nextArgReg + 1 >= ARM_N_ARGREGS)
544 return False; /* out of argregs */
545 HReg raHi, raLo;
546 iselInt64Expr(&raHi, &raLo, env, args[i]);
547 tmpregs[nextArgReg] = raLo;
548 nextArgReg++;
549 tmpregs[nextArgReg] = raHi;
550 nextArgReg++;
551 }
552 }
553
554 /* Now we can compute the condition. We can't do it earlier
555 because the argument computations could trash the condition
556 codes. Be a bit clever to handle the common case where the
557 guard is 1:Bit. */
558 cc = ARMcc_AL;
559 if (guard) {
560 if (guard->tag == Iex_Const
561 && guard->Iex.Const.con->tag == Ico_U1
562 && guard->Iex.Const.con->Ico.U1 == True) {
563 /* unconditional -- do nothing */
564 } else {
565 cc = iselCondCode( env, guard );
566 }
567 }
568
569 /* Move the args to their final destinations. */
570 for (i = 0; i < nextArgReg; i++) {
571 if (tmpregs[i] == INVALID_HREG) { // Skip invalid regs
572 addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
573 continue;
574 }
575 /* None of these insns, including any spill code that might
576 be generated, may alter the condition codes. */
577 addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
578 }
579
580 }
581
582 /* Should be assured by checks above */
583 vassert(nextArgReg <= ARM_N_ARGREGS);
584
585 target = (HWord)Ptr_to_ULong(cee->addr);
586
587 /* nextArgReg doles out argument registers. Since these are
588 assigned in the order r0, r1, r2, r3, its numeric value at this
589 point, which must be between 0 and 4 inclusive, is going to be
590 equal to the number of arg regs in use for the call. Hence bake
591 that number into the call (we'll need to know it when doing
592 register allocation, to know what regs the call reads.)
593
594 There is a bit of a twist -- harmless but worth recording.
595 Suppose the arg types are (Ity_I32, Ity_I64). Then we will have
596 the first arg in r0 and the second in r3:r2, but r1 isn't used.
597 We nevertheless have nextArgReg==4 and bake that into the call
598 instruction. This will mean the register allocator wil believe
599 this insn reads r1 when in fact it doesn't. But that's
600 harmless; it just artificially extends the live range of r1
601 unnecessarily. The best fix would be to put into the
602 instruction, a bitmask indicating which of r0/1/2/3 carry live
603 values. But that's too much hassle. */
604
605 /* Finally, the call itself. */
606 addInstr(env, ARMInstr_Call( cc, target, nextArgReg ));
607
608 return True; /* success */
609 }
610
611
612 /*---------------------------------------------------------*/
613 /*--- ISEL: Integer expressions (32/16/8 bit) ---*/
614 /*---------------------------------------------------------*/
615
616 /* Select insns for an integer-typed expression, and add them to the
617 code list. Return a reg holding the result. This reg will be a
618 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
619 want to modify it, ask for a new vreg, copy it in there, and modify
620 the copy. The register allocator will do its best to map both
621 vregs to the same real register, so the copies will often disappear
622 later in the game.
623
624 This should handle expressions of 32, 16 and 8-bit type. All
625 results are returned in a 32-bit register. For 16- and 8-bit
626 expressions, the upper 16/24 bits are arbitrary, so you should mask
627 or sign extend partial values if necessary.
628 */
629
630 /* --------------------- AMode1 --------------------- */
631
632 /* Return an AMode1 which computes the value of the specified
633 expression, possibly also adding insns to the code list as a
634 result. The expression may only be a 32-bit one.
635 */
636
sane_AMode1(ARMAMode1 * am)637 static Bool sane_AMode1 ( ARMAMode1* am )
638 {
639 switch (am->tag) {
640 case ARMam1_RI:
641 return
642 toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
643 && (hregIsVirtual(am->ARMam1.RI.reg)
644 || am->ARMam1.RI.reg == hregARM_R8())
645 && am->ARMam1.RI.simm13 >= -4095
646 && am->ARMam1.RI.simm13 <= 4095 );
647 case ARMam1_RRS:
648 return
649 toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
650 && hregIsVirtual(am->ARMam1.RRS.base)
651 && hregClass(am->ARMam1.RRS.index) == HRcInt32
652 && hregIsVirtual(am->ARMam1.RRS.index)
653 && am->ARMam1.RRS.shift >= 0
654 && am->ARMam1.RRS.shift <= 3 );
655 default:
656 vpanic("sane_AMode: unknown ARM AMode1 tag");
657 }
658 }
659
iselIntExpr_AMode1(ISelEnv * env,IRExpr * e)660 static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
661 {
662 ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
663 vassert(sane_AMode1(am));
664 return am;
665 }
666
iselIntExpr_AMode1_wrk(ISelEnv * env,IRExpr * e)667 static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
668 {
669 IRType ty = typeOfIRExpr(env->type_env,e);
670 vassert(ty == Ity_I32);
671
672 /* FIXME: add RRS matching */
673
674 /* {Add32,Sub32}(expr,simm13) */
675 if (e->tag == Iex_Binop
676 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
677 && e->Iex.Binop.arg2->tag == Iex_Const
678 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
679 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
680 if (simm >= -4095 && simm <= 4095) {
681 HReg reg;
682 if (e->Iex.Binop.op == Iop_Sub32)
683 simm = -simm;
684 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
685 return ARMAMode1_RI(reg, simm);
686 }
687 }
688
689 /* Doesn't match anything in particular. Generate it into
690 a register and use that. */
691 {
692 HReg reg = iselIntExpr_R(env, e);
693 return ARMAMode1_RI(reg, 0);
694 }
695
696 }
697
698
699 /* --------------------- AMode2 --------------------- */
700
701 /* Return an AMode2 which computes the value of the specified
702 expression, possibly also adding insns to the code list as a
703 result. The expression may only be a 32-bit one.
704 */
705
sane_AMode2(ARMAMode2 * am)706 static Bool sane_AMode2 ( ARMAMode2* am )
707 {
708 switch (am->tag) {
709 case ARMam2_RI:
710 return
711 toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
712 && hregIsVirtual(am->ARMam2.RI.reg)
713 && am->ARMam2.RI.simm9 >= -255
714 && am->ARMam2.RI.simm9 <= 255 );
715 case ARMam2_RR:
716 return
717 toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
718 && hregIsVirtual(am->ARMam2.RR.base)
719 && hregClass(am->ARMam2.RR.index) == HRcInt32
720 && hregIsVirtual(am->ARMam2.RR.index) );
721 default:
722 vpanic("sane_AMode: unknown ARM AMode2 tag");
723 }
724 }
725
iselIntExpr_AMode2(ISelEnv * env,IRExpr * e)726 static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
727 {
728 ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
729 vassert(sane_AMode2(am));
730 return am;
731 }
732
iselIntExpr_AMode2_wrk(ISelEnv * env,IRExpr * e)733 static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
734 {
735 IRType ty = typeOfIRExpr(env->type_env,e);
736 vassert(ty == Ity_I32);
737
738 /* FIXME: add RR matching */
739
740 /* {Add32,Sub32}(expr,simm8) */
741 if (e->tag == Iex_Binop
742 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
743 && e->Iex.Binop.arg2->tag == Iex_Const
744 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
745 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
746 if (simm >= -255 && simm <= 255) {
747 HReg reg;
748 if (e->Iex.Binop.op == Iop_Sub32)
749 simm = -simm;
750 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
751 return ARMAMode2_RI(reg, simm);
752 }
753 }
754
755 /* Doesn't match anything in particular. Generate it into
756 a register and use that. */
757 {
758 HReg reg = iselIntExpr_R(env, e);
759 return ARMAMode2_RI(reg, 0);
760 }
761
762 }
763
764
765 /* --------------------- AModeV --------------------- */
766
767 /* Return an AModeV which computes the value of the specified
768 expression, possibly also adding insns to the code list as a
769 result. The expression may only be a 32-bit one.
770 */
771
sane_AModeV(ARMAModeV * am)772 static Bool sane_AModeV ( ARMAModeV* am )
773 {
774 return toBool( hregClass(am->reg) == HRcInt32
775 && hregIsVirtual(am->reg)
776 && am->simm11 >= -1020 && am->simm11 <= 1020
777 && 0 == (am->simm11 & 3) );
778 }
779
iselIntExpr_AModeV(ISelEnv * env,IRExpr * e)780 static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
781 {
782 ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
783 vassert(sane_AModeV(am));
784 return am;
785 }
786
iselIntExpr_AModeV_wrk(ISelEnv * env,IRExpr * e)787 static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
788 {
789 IRType ty = typeOfIRExpr(env->type_env,e);
790 vassert(ty == Ity_I32);
791
792 /* {Add32,Sub32}(expr, simm8 << 2) */
793 if (e->tag == Iex_Binop
794 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
795 && e->Iex.Binop.arg2->tag == Iex_Const
796 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
797 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
798 if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
799 HReg reg;
800 if (e->Iex.Binop.op == Iop_Sub32)
801 simm = -simm;
802 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
803 return mkARMAModeV(reg, simm);
804 }
805 }
806
807 /* Doesn't match anything in particular. Generate it into
808 a register and use that. */
809 {
810 HReg reg = iselIntExpr_R(env, e);
811 return mkARMAModeV(reg, 0);
812 }
813
814 }
815
816 /* -------------------- AModeN -------------------- */
817
iselIntExpr_AModeN(ISelEnv * env,IRExpr * e)818 static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
819 {
820 return iselIntExpr_AModeN_wrk(env, e);
821 }
822
iselIntExpr_AModeN_wrk(ISelEnv * env,IRExpr * e)823 static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
824 {
825 HReg reg = iselIntExpr_R(env, e);
826 return mkARMAModeN_R(reg);
827 }
828
829
830 /* --------------------- RI84 --------------------- */
831
832 /* Select instructions to generate 'e' into a RI84. If mayInv is
833 true, then the caller will also accept an I84 form that denotes
834 'not e'. In this case didInv may not be NULL, and *didInv is set
835 to True. This complication is so as to allow generation of an RI84
836 which is suitable for use in either an AND or BIC instruction,
837 without knowing (before this call) which one.
838 */
iselIntExpr_RI84(Bool * didInv,Bool mayInv,ISelEnv * env,IRExpr * e)839 static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
840 ISelEnv* env, IRExpr* e )
841 {
842 ARMRI84* ri;
843 if (mayInv)
844 vassert(didInv != NULL);
845 ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
846 /* sanity checks ... */
847 switch (ri->tag) {
848 case ARMri84_I84:
849 return ri;
850 case ARMri84_R:
851 vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
852 vassert(hregIsVirtual(ri->ARMri84.R.reg));
853 return ri;
854 default:
855 vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
856 }
857 }
858
859 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI84_wrk(Bool * didInv,Bool mayInv,ISelEnv * env,IRExpr * e)860 static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
861 ISelEnv* env, IRExpr* e )
862 {
863 IRType ty = typeOfIRExpr(env->type_env,e);
864 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
865
866 if (didInv) *didInv = False;
867
868 /* special case: immediate */
869 if (e->tag == Iex_Const) {
870 UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
871 switch (e->Iex.Const.con->tag) {
872 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
873 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
874 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
875 default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
876 }
877 if (fitsIn8x4(&u8, &u4, u)) {
878 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
879 }
880 if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
881 vassert(didInv);
882 *didInv = True;
883 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
884 }
885 /* else fail, fall through to default case */
886 }
887
888 /* default case: calculate into a register and return that */
889 {
890 HReg r = iselIntExpr_R ( env, e );
891 return ARMRI84_R(r);
892 }
893 }
894
895
896 /* --------------------- RI5 --------------------- */
897
898 /* Select instructions to generate 'e' into a RI5. */
899
iselIntExpr_RI5(ISelEnv * env,IRExpr * e)900 static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
901 {
902 ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
903 /* sanity checks ... */
904 switch (ri->tag) {
905 case ARMri5_I5:
906 return ri;
907 case ARMri5_R:
908 vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
909 vassert(hregIsVirtual(ri->ARMri5.R.reg));
910 return ri;
911 default:
912 vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
913 }
914 }
915
916 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI5_wrk(ISelEnv * env,IRExpr * e)917 static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
918 {
919 IRType ty = typeOfIRExpr(env->type_env,e);
920 vassert(ty == Ity_I32 || ty == Ity_I8);
921
922 /* special case: immediate */
923 if (e->tag == Iex_Const) {
924 UInt u; /* both invalid */
925 switch (e->Iex.Const.con->tag) {
926 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
927 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
928 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
929 default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
930 }
931 if (u >= 1 && u <= 31) {
932 return ARMRI5_I5(u);
933 }
934 /* else fail, fall through to default case */
935 }
936
937 /* default case: calculate into a register and return that */
938 {
939 HReg r = iselIntExpr_R ( env, e );
940 return ARMRI5_R(r);
941 }
942 }
943
944
945 /* ------------------- CondCode ------------------- */
946
947 /* Generate code to evaluated a bit-typed expression, returning the
948 condition code which would correspond when the expression would
949 notionally have returned 1. */
950
iselCondCode(ISelEnv * env,IRExpr * e)951 static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
952 {
953 ARMCondCode cc = iselCondCode_wrk(env,e);
954 vassert(cc != ARMcc_NV);
955 return cc;
956 }
957
iselCondCode_wrk(ISelEnv * env,IRExpr * e)958 static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
959 {
960 vassert(e);
961 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
962
963 /* var */
964 if (e->tag == Iex_RdTmp) {
965 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
966 /* CmpOrTst doesn't modify rTmp; so this is OK. */
967 ARMRI84* one = ARMRI84_I84(1,0);
968 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
969 return ARMcc_NE;
970 }
971
972 /* Not1(e) */
973 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
974 /* Generate code for the arg, and negate the test condition */
975 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
976 }
977
978 /* --- patterns rooted at: 32to1 --- */
979
980 if (e->tag == Iex_Unop
981 && e->Iex.Unop.op == Iop_32to1) {
982 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
983 ARMRI84* one = ARMRI84_I84(1,0);
984 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
985 return ARMcc_NE;
986 }
987
988 /* --- patterns rooted at: CmpNEZ8 --- */
989
990 if (e->tag == Iex_Unop
991 && e->Iex.Unop.op == Iop_CmpNEZ8) {
992 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
993 ARMRI84* xFF = ARMRI84_I84(0xFF,0);
994 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
995 return ARMcc_NE;
996 }
997
998 /* --- patterns rooted at: CmpNEZ32 --- */
999
1000 if (e->tag == Iex_Unop
1001 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1002 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1003 ARMRI84* zero = ARMRI84_I84(0,0);
1004 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1005 return ARMcc_NE;
1006 }
1007
1008 /* --- patterns rooted at: CmpNEZ64 --- */
1009
1010 if (e->tag == Iex_Unop
1011 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1012 HReg tHi, tLo;
1013 HReg tmp = newVRegI(env);
1014 ARMRI84* zero = ARMRI84_I84(0,0);
1015 iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1016 addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1017 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1018 return ARMcc_NE;
1019 }
1020
1021 /* --- Cmp*32*(x,y) --- */
1022 if (e->tag == Iex_Binop
1023 && (e->Iex.Binop.op == Iop_CmpEQ32
1024 || e->Iex.Binop.op == Iop_CmpNE32
1025 || e->Iex.Binop.op == Iop_CmpLT32S
1026 || e->Iex.Binop.op == Iop_CmpLT32U
1027 || e->Iex.Binop.op == Iop_CmpLE32S
1028 || e->Iex.Binop.op == Iop_CmpLE32U)) {
1029 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1030 ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1031 env, e->Iex.Binop.arg2);
1032 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1033 switch (e->Iex.Binop.op) {
1034 case Iop_CmpEQ32: return ARMcc_EQ;
1035 case Iop_CmpNE32: return ARMcc_NE;
1036 case Iop_CmpLT32S: return ARMcc_LT;
1037 case Iop_CmpLT32U: return ARMcc_LO;
1038 case Iop_CmpLE32S: return ARMcc_LE;
1039 case Iop_CmpLE32U: return ARMcc_LS;
1040 default: vpanic("iselCondCode(arm): CmpXX32");
1041 }
1042 }
1043
1044 /* --- CasCmpEQ* --- */
1045 /* Ist_Cas has a dummy argument to compare with, so comparison is
1046 always true. */
1047 if (e->tag == Iex_Binop
1048 && (e->Iex.Binop.op == Iop_CasCmpEQ32
1049 || e->Iex.Binop.op == Iop_CasCmpEQ16
1050 || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1051 return ARMcc_AL;
1052 }
1053
1054 ppIRExpr(e);
1055 vpanic("iselCondCode");
1056 }
1057
1058
1059 /* --------------------- Reg --------------------- */
1060
iselIntExpr_R(ISelEnv * env,IRExpr * e)1061 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1062 {
1063 HReg r = iselIntExpr_R_wrk(env, e);
1064 /* sanity checks ... */
1065 # if 0
1066 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1067 # endif
1068 vassert(hregClass(r) == HRcInt32);
1069 vassert(hregIsVirtual(r));
1070 return r;
1071 }
1072
1073 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_R_wrk(ISelEnv * env,IRExpr * e)1074 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1075 {
1076 IRType ty = typeOfIRExpr(env->type_env,e);
1077 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1078 // vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1079
1080 switch (e->tag) {
1081
1082 /* --------- TEMP --------- */
1083 case Iex_RdTmp: {
1084 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1085 }
1086
1087 /* --------- LOAD --------- */
1088 case Iex_Load: {
1089 HReg dst = newVRegI(env);
1090
1091 if (e->Iex.Load.end != Iend_LE)
1092 goto irreducible;
1093
1094 if (ty == Ity_I32) {
1095 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1096 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, dst, amode));
1097 return dst;
1098 }
1099 if (ty == Ity_I16) {
1100 ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
1101 addInstr(env, ARMInstr_LdSt16(True/*isLoad*/, False/*!signedLoad*/,
1102 dst, amode));
1103 return dst;
1104 }
1105 if (ty == Ity_I8) {
1106 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1107 addInstr(env, ARMInstr_LdSt8U(True/*isLoad*/, dst, amode));
1108 return dst;
1109 }
1110
1111 //zz if (ty == Ity_I16) {
1112 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1113 //zz return dst;
1114 //zz }
1115 //zz if (ty == Ity_I8) {
1116 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1117 //zz return dst;
1118 //zz }
1119 break;
1120 }
1121
1122 //zz /* --------- TERNARY OP --------- */
1123 //zz case Iex_Triop: {
1124 //zz /* C3210 flags following FPU partial remainder (fprem), both
1125 //zz IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1126 //zz if (e->Iex.Triop.op == Iop_PRemC3210F64
1127 //zz || e->Iex.Triop.op == Iop_PRem1C3210F64) {
1128 //zz HReg junk = newVRegF(env);
1129 //zz HReg dst = newVRegI(env);
1130 //zz HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
1131 //zz HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
1132 //zz /* XXXROUNDINGFIXME */
1133 //zz /* set roundingmode here */
1134 //zz addInstr(env, X86Instr_FpBinary(
1135 //zz e->Iex.Binop.op==Iop_PRemC3210F64
1136 //zz ? Xfp_PREM : Xfp_PREM1,
1137 //zz srcL,srcR,junk
1138 //zz ));
1139 //zz /* The previous pseudo-insn will have left the FPU's C3210
1140 //zz flags set correctly. So bag them. */
1141 //zz addInstr(env, X86Instr_FpStSW_AX());
1142 //zz addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1143 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1144 //zz return dst;
1145 //zz }
1146 //zz
1147 //zz break;
1148 //zz }
1149
1150 /* --------- BINARY OP --------- */
1151 case Iex_Binop: {
1152
1153 ARMAluOp aop = 0; /* invalid */
1154 ARMShiftOp sop = 0; /* invalid */
1155
1156 /* ADD/SUB/AND/OR/XOR */
1157 switch (e->Iex.Binop.op) {
1158 case Iop_And32: {
1159 Bool didInv = False;
1160 HReg dst = newVRegI(env);
1161 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1162 ARMRI84* argR = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1163 env, e->Iex.Binop.arg2);
1164 addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1165 dst, argL, argR));
1166 return dst;
1167 }
1168 case Iop_Or32: aop = ARMalu_OR; goto std_binop;
1169 case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1170 case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1171 case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1172 std_binop: {
1173 HReg dst = newVRegI(env);
1174 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1175 ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1176 env, e->Iex.Binop.arg2);
1177 addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1178 return dst;
1179 }
1180 default: break;
1181 }
1182
1183 /* SHL/SHR/SAR */
1184 switch (e->Iex.Binop.op) {
1185 case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1186 case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1187 case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1188 sh_binop: {
1189 HReg dst = newVRegI(env);
1190 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1191 ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1192 addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1193 vassert(ty == Ity_I32); /* else the IR is ill-typed */
1194 return dst;
1195 }
1196 default: break;
1197 }
1198
1199 /* MUL */
1200 if (e->Iex.Binop.op == Iop_Mul32) {
1201 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1202 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1203 HReg dst = newVRegI(env);
1204 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1205 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1206 addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1207 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1208 return dst;
1209 }
1210
1211 /* Handle misc other ops. */
1212
1213 if (e->Iex.Binop.op == Iop_Max32U) {
1214 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1215 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1216 HReg dst = newVRegI(env);
1217 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1218 ARMRI84_R(argR)));
1219 addInstr(env, mk_iMOVds_RR(dst, argL));
1220 addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1221 return dst;
1222 }
1223
1224 if (e->Iex.Binop.op == Iop_CmpF64) {
1225 HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1226 HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1227 HReg dst = newVRegI(env);
1228 /* Do the compare (FCMPD) and set NZCV in FPSCR. Then also do
1229 FMSTAT, so we can examine the results directly. */
1230 addInstr(env, ARMInstr_VCmpD(dL, dR));
1231 /* Create in dst, the IRCmpF64Result encoded result. */
1232 addInstr(env, ARMInstr_Imm32(dst, 0));
1233 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1234 addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1235 addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1236 addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1237 return dst;
1238 }
1239
1240 if (e->Iex.Binop.op == Iop_F64toI32S
1241 || e->Iex.Binop.op == Iop_F64toI32U) {
1242 /* Wretched uglyness all round, due to having to deal
1243 with rounding modes. Oh well. */
1244 /* FIXME: if arg1 is a constant indicating round-to-zero,
1245 then we could skip all this arsing around with FPSCR and
1246 simply emit FTO{S,U}IZD. */
1247 Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1248 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
1249 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1250 /* FTO{S,U}ID valF, valD */
1251 HReg valF = newVRegF(env);
1252 addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1253 valF, valD));
1254 set_VFP_rounding_default(env);
1255 /* VMOV dst, valF */
1256 HReg dst = newVRegI(env);
1257 addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1258 return dst;
1259 }
1260
1261 if (e->Iex.Binop.op == Iop_GetElem8x8
1262 || e->Iex.Binop.op == Iop_GetElem16x4
1263 || e->Iex.Binop.op == Iop_GetElem32x2) {
1264 HReg res = newVRegI(env);
1265 HReg arg = iselNeon64Expr(env, e->Iex.Triop.arg1);
1266 UInt index, size;
1267 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1268 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1269 vpanic("ARM target supports GetElem with constant "
1270 "second argument only\n");
1271 }
1272 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1273 switch (e->Iex.Binop.op) {
1274 case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1275 case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1276 case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1277 default: vassert(0);
1278 }
1279 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1280 mkARMNRS(ARMNRS_Reg, res, 0),
1281 mkARMNRS(ARMNRS_Scalar, arg, index),
1282 size, False));
1283 return res;
1284 }
1285
1286 if (e->Iex.Binop.op == Iop_GetElem8x16
1287 || e->Iex.Binop.op == Iop_GetElem16x8
1288 || e->Iex.Binop.op == Iop_GetElem32x4) {
1289 HReg res = newVRegI(env);
1290 HReg arg = iselNeonExpr(env, e->Iex.Triop.arg1);
1291 UInt index, size;
1292 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1293 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1294 vpanic("ARM target supports GetElem with constant "
1295 "second argument only\n");
1296 }
1297 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1298 switch (e->Iex.Binop.op) {
1299 case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1300 case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1301 case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1302 default: vassert(0);
1303 }
1304 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1305 mkARMNRS(ARMNRS_Reg, res, 0),
1306 mkARMNRS(ARMNRS_Scalar, arg, index),
1307 size, True));
1308 return res;
1309 }
1310
1311 /* All cases involving host-side helper calls. */
1312 void* fn = NULL;
1313 switch (e->Iex.Binop.op) {
1314 case Iop_Add16x2:
1315 fn = &h_generic_calc_Add16x2; break;
1316 case Iop_Sub16x2:
1317 fn = &h_generic_calc_Sub16x2; break;
1318 case Iop_HAdd16Ux2:
1319 fn = &h_generic_calc_HAdd16Ux2; break;
1320 case Iop_HAdd16Sx2:
1321 fn = &h_generic_calc_HAdd16Sx2; break;
1322 case Iop_HSub16Ux2:
1323 fn = &h_generic_calc_HSub16Ux2; break;
1324 case Iop_HSub16Sx2:
1325 fn = &h_generic_calc_HSub16Sx2; break;
1326 case Iop_QAdd16Sx2:
1327 fn = &h_generic_calc_QAdd16Sx2; break;
1328 case Iop_QSub16Sx2:
1329 fn = &h_generic_calc_QSub16Sx2; break;
1330 case Iop_Add8x4:
1331 fn = &h_generic_calc_Add8x4; break;
1332 case Iop_Sub8x4:
1333 fn = &h_generic_calc_Sub8x4; break;
1334 case Iop_HAdd8Ux4:
1335 fn = &h_generic_calc_HAdd8Ux4; break;
1336 case Iop_HAdd8Sx4:
1337 fn = &h_generic_calc_HAdd8Sx4; break;
1338 case Iop_HSub8Ux4:
1339 fn = &h_generic_calc_HSub8Ux4; break;
1340 case Iop_HSub8Sx4:
1341 fn = &h_generic_calc_HSub8Sx4; break;
1342 case Iop_QAdd8Sx4:
1343 fn = &h_generic_calc_QAdd8Sx4; break;
1344 case Iop_QAdd8Ux4:
1345 fn = &h_generic_calc_QAdd8Ux4; break;
1346 case Iop_QSub8Sx4:
1347 fn = &h_generic_calc_QSub8Sx4; break;
1348 case Iop_QSub8Ux4:
1349 fn = &h_generic_calc_QSub8Ux4; break;
1350 case Iop_Sad8Ux4:
1351 fn = &h_generic_calc_Sad8Ux4; break;
1352 default:
1353 break;
1354 }
1355
1356 if (fn) {
1357 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1358 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1359 HReg res = newVRegI(env);
1360 addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1361 addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
1362 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 2 ));
1363 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1364 return res;
1365 }
1366
1367 break;
1368 }
1369
1370 /* --------- UNARY OP --------- */
1371 case Iex_Unop: {
1372
1373 //zz /* 1Uto8(32to1(expr32)) */
1374 //zz if (e->Iex.Unop.op == Iop_1Uto8) {
1375 //zz DECLARE_PATTERN(p_32to1_then_1Uto8);
1376 //zz DEFINE_PATTERN(p_32to1_then_1Uto8,
1377 //zz unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1378 //zz if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1379 //zz IRExpr* expr32 = mi.bindee[0];
1380 //zz HReg dst = newVRegI(env);
1381 //zz HReg src = iselIntExpr_R(env, expr32);
1382 //zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1383 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1384 //zz X86RMI_Imm(1), dst));
1385 //zz return dst;
1386 //zz }
1387 //zz }
1388 //zz
1389 //zz /* 8Uto32(LDle(expr32)) */
1390 //zz if (e->Iex.Unop.op == Iop_8Uto32) {
1391 //zz DECLARE_PATTERN(p_LDle8_then_8Uto32);
1392 //zz DEFINE_PATTERN(p_LDle8_then_8Uto32,
1393 //zz unop(Iop_8Uto32,
1394 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1395 //zz if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1396 //zz HReg dst = newVRegI(env);
1397 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1398 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1399 //zz return dst;
1400 //zz }
1401 //zz }
1402 //zz
1403 //zz /* 8Sto32(LDle(expr32)) */
1404 //zz if (e->Iex.Unop.op == Iop_8Sto32) {
1405 //zz DECLARE_PATTERN(p_LDle8_then_8Sto32);
1406 //zz DEFINE_PATTERN(p_LDle8_then_8Sto32,
1407 //zz unop(Iop_8Sto32,
1408 //zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1409 //zz if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1410 //zz HReg dst = newVRegI(env);
1411 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1412 //zz addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1413 //zz return dst;
1414 //zz }
1415 //zz }
1416 //zz
1417 //zz /* 16Uto32(LDle(expr32)) */
1418 //zz if (e->Iex.Unop.op == Iop_16Uto32) {
1419 //zz DECLARE_PATTERN(p_LDle16_then_16Uto32);
1420 //zz DEFINE_PATTERN(p_LDle16_then_16Uto32,
1421 //zz unop(Iop_16Uto32,
1422 //zz IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1423 //zz if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1424 //zz HReg dst = newVRegI(env);
1425 //zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1426 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1427 //zz return dst;
1428 //zz }
1429 //zz }
1430 //zz
1431 //zz /* 8Uto32(GET:I8) */
1432 //zz if (e->Iex.Unop.op == Iop_8Uto32) {
1433 //zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1434 //zz HReg dst;
1435 //zz X86AMode* amode;
1436 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1437 //zz dst = newVRegI(env);
1438 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1439 //zz hregX86_EBP());
1440 //zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1441 //zz return dst;
1442 //zz }
1443 //zz }
1444 //zz
1445 //zz /* 16to32(GET:I16) */
1446 //zz if (e->Iex.Unop.op == Iop_16Uto32) {
1447 //zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1448 //zz HReg dst;
1449 //zz X86AMode* amode;
1450 //zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1451 //zz dst = newVRegI(env);
1452 //zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1453 //zz hregX86_EBP());
1454 //zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1455 //zz return dst;
1456 //zz }
1457 //zz }
1458
1459 switch (e->Iex.Unop.op) {
1460 case Iop_8Uto32: {
1461 HReg dst = newVRegI(env);
1462 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1463 addInstr(env, ARMInstr_Alu(ARMalu_AND,
1464 dst, src, ARMRI84_I84(0xFF,0)));
1465 return dst;
1466 }
1467 //zz case Iop_8Uto16:
1468 //zz case Iop_8Uto32:
1469 //zz case Iop_16Uto32: {
1470 //zz HReg dst = newVRegI(env);
1471 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1472 //zz UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1473 //zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1474 //zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1475 //zz X86RMI_Imm(mask), dst));
1476 //zz return dst;
1477 //zz }
1478 //zz case Iop_8Sto16:
1479 //zz case Iop_8Sto32:
1480 case Iop_16Uto32: {
1481 HReg dst = newVRegI(env);
1482 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1483 ARMRI5* amt = ARMRI5_I5(16);
1484 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1485 addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1486 return dst;
1487 }
1488 case Iop_8Sto32:
1489 case Iop_16Sto32: {
1490 HReg dst = newVRegI(env);
1491 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1492 ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1493 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1494 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1495 return dst;
1496 }
1497 //zz case Iop_Not8:
1498 //zz case Iop_Not16:
1499 case Iop_Not32: {
1500 HReg dst = newVRegI(env);
1501 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1502 addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1503 return dst;
1504 }
1505 case Iop_64HIto32: {
1506 HReg rHi, rLo;
1507 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1508 return rHi; /* and abandon rLo .. poor wee thing :-) */
1509 }
1510 case Iop_64to32: {
1511 HReg rHi, rLo;
1512 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1513 return rLo; /* similar stupid comment to the above ... */
1514 }
1515 case Iop_64to8: {
1516 HReg rHi, rLo;
1517 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
1518 HReg tHi = newVRegI(env);
1519 HReg tLo = newVRegI(env);
1520 HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1521 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1522 rHi = tHi;
1523 rLo = tLo;
1524 } else {
1525 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1526 }
1527 return rLo;
1528 }
1529 //zz case Iop_16HIto8:
1530 //zz case Iop_32HIto16: {
1531 //zz HReg dst = newVRegI(env);
1532 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1533 //zz Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1534 //zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1535 //zz addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1536 //zz return dst;
1537 //zz }
1538 case Iop_1Uto32:
1539 case Iop_1Uto8: {
1540 HReg dst = newVRegI(env);
1541 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1542 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1543 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1544 return dst;
1545 }
1546
1547 case Iop_1Sto32: {
1548 HReg dst = newVRegI(env);
1549 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1550 ARMRI5* amt = ARMRI5_I5(31);
1551 /* This is really rough. We could do much better here;
1552 perhaps mvn{cond} dst, #0 as the second insn?
1553 (same applies to 1Sto64) */
1554 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1555 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1556 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1557 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1558 return dst;
1559 }
1560
1561
1562 //zz case Iop_1Sto8:
1563 //zz case Iop_1Sto16:
1564 //zz case Iop_1Sto32: {
1565 //zz /* could do better than this, but for now ... */
1566 //zz HReg dst = newVRegI(env);
1567 //zz X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1568 //zz addInstr(env, X86Instr_Set32(cond,dst));
1569 //zz addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1570 //zz addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1571 //zz return dst;
1572 //zz }
1573 //zz case Iop_Ctz32: {
1574 //zz /* Count trailing zeroes, implemented by x86 'bsfl' */
1575 //zz HReg dst = newVRegI(env);
1576 //zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1577 //zz addInstr(env, X86Instr_Bsfr32(True,src,dst));
1578 //zz return dst;
1579 //zz }
1580 case Iop_Clz32: {
1581 /* Count leading zeroes; easy on ARM. */
1582 HReg dst = newVRegI(env);
1583 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1584 addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1585 return dst;
1586 }
1587
1588 case Iop_CmpwNEZ32: {
1589 HReg dst = newVRegI(env);
1590 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1591 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1592 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1593 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1594 return dst;
1595 }
1596
1597 case Iop_Left32: {
1598 HReg dst = newVRegI(env);
1599 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1600 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1601 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1602 return dst;
1603 }
1604
1605 //zz case Iop_V128to32: {
1606 //zz HReg dst = newVRegI(env);
1607 //zz HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1608 //zz X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1609 //zz sub_from_esp(env, 16);
1610 //zz addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1611 //zz addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1612 //zz add_to_esp(env, 16);
1613 //zz return dst;
1614 //zz }
1615 //zz
1616 case Iop_ReinterpF32asI32: {
1617 HReg dst = newVRegI(env);
1618 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1619 addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1620 return dst;
1621 }
1622
1623 //zz
1624 //zz case Iop_16to8:
1625 case Iop_32to8:
1626 case Iop_32to16:
1627 /* These are no-ops. */
1628 return iselIntExpr_R(env, e->Iex.Unop.arg);
1629
1630 default:
1631 break;
1632 }
1633
1634 /* All Unop cases involving host-side helper calls. */
1635 void* fn = NULL;
1636 switch (e->Iex.Unop.op) {
1637 case Iop_CmpNEZ16x2:
1638 fn = &h_generic_calc_CmpNEZ16x2; break;
1639 case Iop_CmpNEZ8x4:
1640 fn = &h_generic_calc_CmpNEZ8x4; break;
1641 default:
1642 break;
1643 }
1644
1645 if (fn) {
1646 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1647 HReg res = newVRegI(env);
1648 addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
1649 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 1 ));
1650 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1651 return res;
1652 }
1653
1654 break;
1655 }
1656
1657 /* --------- GET --------- */
1658 case Iex_Get: {
1659 if (ty == Ity_I32
1660 && 0 == (e->Iex.Get.offset & 3)
1661 && e->Iex.Get.offset < 4096-4) {
1662 HReg dst = newVRegI(env);
1663 addInstr(env, ARMInstr_LdSt32(
1664 True/*isLoad*/,
1665 dst,
1666 ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1667 return dst;
1668 }
1669 //zz if (ty == Ity_I8 || ty == Ity_I16) {
1670 //zz HReg dst = newVRegI(env);
1671 //zz addInstr(env, X86Instr_LoadEX(
1672 //zz toUChar(ty==Ity_I8 ? 1 : 2),
1673 //zz False,
1674 //zz X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1675 //zz dst));
1676 //zz return dst;
1677 //zz }
1678 break;
1679 }
1680
1681 //zz case Iex_GetI: {
1682 //zz X86AMode* am
1683 //zz = genGuestArrayOffset(
1684 //zz env, e->Iex.GetI.descr,
1685 //zz e->Iex.GetI.ix, e->Iex.GetI.bias );
1686 //zz HReg dst = newVRegI(env);
1687 //zz if (ty == Ity_I8) {
1688 //zz addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1689 //zz return dst;
1690 //zz }
1691 //zz if (ty == Ity_I32) {
1692 //zz addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1693 //zz return dst;
1694 //zz }
1695 //zz break;
1696 //zz }
1697
1698 /* --------- CCALL --------- */
1699 case Iex_CCall: {
1700 HReg dst = newVRegI(env);
1701 vassert(ty == e->Iex.CCall.retty);
1702
1703 /* be very restrictive for now. Only 32/64-bit ints allowed
1704 for args, and 32 bits for return type. */
1705 if (e->Iex.CCall.retty != Ity_I32)
1706 goto irreducible;
1707
1708 /* Marshal args, do the call, clear stack. */
1709 Bool ok = doHelperCall( env, False,
1710 NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1711 if (ok) {
1712 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1713 return dst;
1714 }
1715 /* else fall through; will hit the irreducible: label */
1716 }
1717
1718 /* --------- LITERAL --------- */
1719 /* 32 literals */
1720 case Iex_Const: {
1721 UInt u = 0;
1722 HReg dst = newVRegI(env);
1723 switch (e->Iex.Const.con->tag) {
1724 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1725 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1726 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1727 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
1728 }
1729 addInstr(env, ARMInstr_Imm32(dst, u));
1730 return dst;
1731 }
1732
1733 /* --------- MULTIPLEX --------- */
1734 case Iex_Mux0X: {
1735 IRExpr* cond = e->Iex.Mux0X.cond;
1736
1737 /* Mux0X( 32to8(1Uto32(ccexpr)), expr0, exprX ) */
1738 if (ty == Ity_I32
1739 && cond->tag == Iex_Unop
1740 && cond->Iex.Unop.op == Iop_32to8
1741 && cond->Iex.Unop.arg->tag == Iex_Unop
1742 && cond->Iex.Unop.arg->Iex.Unop.op == Iop_1Uto32) {
1743 ARMCondCode cc;
1744 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1745 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1746 HReg dst = newVRegI(env);
1747 addInstr(env, mk_iMOVds_RR(dst, rX));
1748 cc = iselCondCode(env, cond->Iex.Unop.arg->Iex.Unop.arg);
1749 addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
1750 return dst;
1751 }
1752
1753 /* Mux0X(cond, expr0, exprX) (general case) */
1754 if (ty == Ity_I32) {
1755 HReg r8;
1756 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1757 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1758 HReg dst = newVRegI(env);
1759 addInstr(env, mk_iMOVds_RR(dst, rX));
1760 r8 = iselIntExpr_R(env, cond);
1761 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
1762 ARMRI84_I84(0xFF,0)));
1763 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, r0));
1764 return dst;
1765 }
1766 break;
1767 }
1768
1769 default:
1770 break;
1771 } /* switch (e->tag) */
1772
1773 /* We get here if no pattern matched. */
1774 irreducible:
1775 ppIRExpr(e);
1776 vpanic("iselIntExpr_R: cannot reduce tree");
1777 }
1778
1779
1780 /* -------------------- 64-bit -------------------- */
1781
1782 /* Compute a 64-bit value into a register pair, which is returned as
1783 the first two parameters. As with iselIntExpr_R, these may be
1784 either real or virtual regs; in any case they must not be changed
1785 by subsequent code emitted by the caller. */
1786
iselInt64Expr(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)1787 static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1788 {
1789 iselInt64Expr_wrk(rHi, rLo, env, e);
1790 # if 0
1791 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1792 # endif
1793 vassert(hregClass(*rHi) == HRcInt32);
1794 vassert(hregIsVirtual(*rHi));
1795 vassert(hregClass(*rLo) == HRcInt32);
1796 vassert(hregIsVirtual(*rLo));
1797 }
1798
1799 /* DO NOT CALL THIS DIRECTLY ! */
iselInt64Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)1800 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1801 {
1802 vassert(e);
1803 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1804
1805 /* 64-bit literal */
1806 if (e->tag == Iex_Const) {
1807 ULong w64 = e->Iex.Const.con->Ico.U64;
1808 UInt wHi = toUInt(w64 >> 32);
1809 UInt wLo = toUInt(w64);
1810 HReg tHi = newVRegI(env);
1811 HReg tLo = newVRegI(env);
1812 vassert(e->Iex.Const.con->tag == Ico_U64);
1813 addInstr(env, ARMInstr_Imm32(tHi, wHi));
1814 addInstr(env, ARMInstr_Imm32(tLo, wLo));
1815 *rHi = tHi;
1816 *rLo = tLo;
1817 return;
1818 }
1819
1820 /* read 64-bit IRTemp */
1821 if (e->tag == Iex_RdTmp) {
1822 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
1823 HReg tHi = newVRegI(env);
1824 HReg tLo = newVRegI(env);
1825 HReg tmp = iselNeon64Expr(env, e);
1826 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1827 *rHi = tHi;
1828 *rLo = tLo;
1829 } else {
1830 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1831 }
1832 return;
1833 }
1834
1835 /* 64-bit load */
1836 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1837 HReg tLo, tHi, rA;
1838 vassert(e->Iex.Load.ty == Ity_I64);
1839 rA = iselIntExpr_R(env, e->Iex.Load.addr);
1840 tHi = newVRegI(env);
1841 tLo = newVRegI(env);
1842 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, ARMAMode1_RI(rA, 4)));
1843 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, ARMAMode1_RI(rA, 0)));
1844 *rHi = tHi;
1845 *rLo = tLo;
1846 return;
1847 }
1848
1849 /* 64-bit GET */
1850 if (e->tag == Iex_Get) {
1851 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
1852 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
1853 HReg tHi = newVRegI(env);
1854 HReg tLo = newVRegI(env);
1855 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, am4));
1856 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, am0));
1857 *rHi = tHi;
1858 *rLo = tLo;
1859 return;
1860 }
1861
1862 /* --------- BINARY ops --------- */
1863 if (e->tag == Iex_Binop) {
1864 switch (e->Iex.Binop.op) {
1865
1866 /* 32 x 32 -> 64 multiply */
1867 case Iop_MullS32:
1868 case Iop_MullU32: {
1869 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1870 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1871 HReg tHi = newVRegI(env);
1872 HReg tLo = newVRegI(env);
1873 ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32
1874 ? ARMmul_SX : ARMmul_ZX;
1875 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1876 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1877 addInstr(env, ARMInstr_Mul(mop));
1878 addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
1879 addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
1880 *rHi = tHi;
1881 *rLo = tLo;
1882 return;
1883 }
1884
1885 case Iop_Or64: {
1886 HReg xLo, xHi, yLo, yHi;
1887 HReg tHi = newVRegI(env);
1888 HReg tLo = newVRegI(env);
1889 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1890 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1891 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
1892 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
1893 *rHi = tHi;
1894 *rLo = tLo;
1895 return;
1896 }
1897
1898 case Iop_Add64: {
1899 HReg xLo, xHi, yLo, yHi;
1900 HReg tHi = newVRegI(env);
1901 HReg tLo = newVRegI(env);
1902 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1903 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1904 addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
1905 addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi)));
1906 *rHi = tHi;
1907 *rLo = tLo;
1908 return;
1909 }
1910
1911 /* 32HLto64(e1,e2) */
1912 case Iop_32HLto64: {
1913 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
1914 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
1915 return;
1916 }
1917
1918 default:
1919 break;
1920 }
1921 }
1922
1923 /* --------- UNARY ops --------- */
1924 if (e->tag == Iex_Unop) {
1925 switch (e->Iex.Unop.op) {
1926
1927 /* ReinterpF64asI64 */
1928 case Iop_ReinterpF64asI64: {
1929 HReg dstHi = newVRegI(env);
1930 HReg dstLo = newVRegI(env);
1931 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1932 addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
1933 *rHi = dstHi;
1934 *rLo = dstLo;
1935 return;
1936 }
1937
1938 /* Left64(e) */
1939 case Iop_Left64: {
1940 HReg yLo, yHi;
1941 HReg tHi = newVRegI(env);
1942 HReg tLo = newVRegI(env);
1943 HReg zero = newVRegI(env);
1944 /* yHi:yLo = arg */
1945 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
1946 /* zero = 0 */
1947 addInstr(env, ARMInstr_Imm32(zero, 0));
1948 /* tLo = 0 - yLo, and set carry */
1949 addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
1950 tLo, zero, ARMRI84_R(yLo)));
1951 /* tHi = 0 - yHi - carry */
1952 addInstr(env, ARMInstr_Alu(ARMalu_SBC,
1953 tHi, zero, ARMRI84_R(yHi)));
1954 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
1955 back in, so as to give the final result
1956 tHi:tLo = arg | -arg. */
1957 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
1958 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
1959 *rHi = tHi;
1960 *rLo = tLo;
1961 return;
1962 }
1963
1964 /* CmpwNEZ64(e) */
1965 case Iop_CmpwNEZ64: {
1966 HReg srcLo, srcHi;
1967 HReg tmp1 = newVRegI(env);
1968 HReg tmp2 = newVRegI(env);
1969 /* srcHi:srcLo = arg */
1970 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
1971 /* tmp1 = srcHi | srcLo */
1972 addInstr(env, ARMInstr_Alu(ARMalu_OR,
1973 tmp1, srcHi, ARMRI84_R(srcLo)));
1974 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
1975 addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
1976 addInstr(env, ARMInstr_Alu(ARMalu_OR,
1977 tmp2, tmp2, ARMRI84_R(tmp1)));
1978 addInstr(env, ARMInstr_Shift(ARMsh_SAR,
1979 tmp2, tmp2, ARMRI5_I5(31)));
1980 *rHi = tmp2;
1981 *rLo = tmp2;
1982 return;
1983 }
1984
1985 case Iop_1Sto64: {
1986 HReg dst = newVRegI(env);
1987 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1988 ARMRI5* amt = ARMRI5_I5(31);
1989 /* This is really rough. We could do much better here;
1990 perhaps mvn{cond} dst, #0 as the second insn?
1991 (same applies to 1Sto32) */
1992 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1993 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1994 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1995 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1996 *rHi = dst;
1997 *rLo = dst;
1998 return;
1999 }
2000
2001 default:
2002 break;
2003 }
2004 } /* if (e->tag == Iex_Unop) */
2005
2006 /* --------- MULTIPLEX --------- */
2007 if (e->tag == Iex_Mux0X) {
2008 IRType ty8;
2009 HReg r8, rXhi, rXlo, r0hi, r0lo, dstHi, dstLo;
2010 ty8 = typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond);
2011 vassert(ty8 == Ity_I8);
2012 iselInt64Expr(&rXhi, &rXlo, env, e->Iex.Mux0X.exprX);
2013 iselInt64Expr(&r0hi, &r0lo, env, e->Iex.Mux0X.expr0);
2014 dstHi = newVRegI(env);
2015 dstLo = newVRegI(env);
2016 addInstr(env, mk_iMOVds_RR(dstHi, rXhi));
2017 addInstr(env, mk_iMOVds_RR(dstLo, rXlo));
2018 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2019 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
2020 ARMRI84_I84(0xFF,0)));
2021 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstHi, ARMRI84_R(r0hi)));
2022 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstLo, ARMRI84_R(r0lo)));
2023 *rHi = dstHi;
2024 *rLo = dstLo;
2025 return;
2026 }
2027
2028 /* It is convenient sometimes to call iselInt64Expr even when we
2029 have NEON support (e.g. in do_helper_call we need 64-bit
2030 arguments as 2 x 32 regs). */
2031 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
2032 HReg tHi = newVRegI(env);
2033 HReg tLo = newVRegI(env);
2034 HReg tmp = iselNeon64Expr(env, e);
2035 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2036 *rHi = tHi;
2037 *rLo = tLo;
2038 return ;
2039 }
2040
2041 ppIRExpr(e);
2042 vpanic("iselInt64Expr");
2043 }
2044
2045
2046 /*---------------------------------------------------------*/
2047 /*--- ISEL: Vector (NEON) expressions (64 or 128 bit) ---*/
2048 /*---------------------------------------------------------*/
2049
iselNeon64Expr(ISelEnv * env,IRExpr * e)2050 static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2051 {
2052 HReg r = iselNeon64Expr_wrk( env, e );
2053 vassert(hregClass(r) == HRcFlt64);
2054 vassert(hregIsVirtual(r));
2055 return r;
2056 }
2057
2058 /* DO NOT CALL THIS DIRECTLY */
iselNeon64Expr_wrk(ISelEnv * env,IRExpr * e)2059 static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2060 {
2061 IRType ty = typeOfIRExpr(env->type_env, e);
2062 MatchInfo mi;
2063 vassert(e);
2064 vassert(ty == Ity_I64);
2065
2066 if (e->tag == Iex_RdTmp) {
2067 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2068 }
2069
2070 if (e->tag == Iex_Const) {
2071 HReg rLo, rHi;
2072 HReg res = newVRegD(env);
2073 iselInt64Expr(&rHi, &rLo, env, e);
2074 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2075 return res;
2076 }
2077
2078 /* 64-bit load */
2079 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2080 HReg res = newVRegD(env);
2081 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2082 vassert(ty == Ity_I64);
2083 addInstr(env, ARMInstr_NLdStD(True, res, am));
2084 return res;
2085 }
2086
2087 /* 64-bit GET */
2088 if (e->tag == Iex_Get) {
2089 HReg addr = newVRegI(env);
2090 HReg res = newVRegD(env);
2091 vassert(ty == Ity_I64);
2092 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2093 addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2094 return res;
2095 }
2096
2097 /* --------- BINARY ops --------- */
2098 if (e->tag == Iex_Binop) {
2099 switch (e->Iex.Binop.op) {
2100
2101 /* 32 x 32 -> 64 multiply */
2102 case Iop_MullS32:
2103 case Iop_MullU32: {
2104 HReg rLo, rHi;
2105 HReg res = newVRegD(env);
2106 iselInt64Expr(&rHi, &rLo, env, e);
2107 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2108 return res;
2109 }
2110
2111 case Iop_And64: {
2112 HReg res = newVRegD(env);
2113 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2114 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2115 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2116 res, argL, argR, 4, False));
2117 return res;
2118 }
2119 case Iop_Or64: {
2120 HReg res = newVRegD(env);
2121 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2122 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2123 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2124 res, argL, argR, 4, False));
2125 return res;
2126 }
2127 case Iop_Xor64: {
2128 HReg res = newVRegD(env);
2129 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2130 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2131 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2132 res, argL, argR, 4, False));
2133 return res;
2134 }
2135
2136 /* 32HLto64(e1,e2) */
2137 case Iop_32HLto64: {
2138 HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2139 HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2140 HReg res = newVRegD(env);
2141 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2142 return res;
2143 }
2144
2145 case Iop_Add8x8:
2146 case Iop_Add16x4:
2147 case Iop_Add32x2:
2148 case Iop_Add64: {
2149 HReg res = newVRegD(env);
2150 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2151 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2152 UInt size;
2153 switch (e->Iex.Binop.op) {
2154 case Iop_Add8x8: size = 0; break;
2155 case Iop_Add16x4: size = 1; break;
2156 case Iop_Add32x2: size = 2; break;
2157 case Iop_Add64: size = 3; break;
2158 default: vassert(0);
2159 }
2160 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2161 res, argL, argR, size, False));
2162 return res;
2163 }
2164 case Iop_Add32Fx2: {
2165 HReg res = newVRegD(env);
2166 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2167 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2168 UInt size = 0;
2169 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2170 res, argL, argR, size, False));
2171 return res;
2172 }
2173 case Iop_Recps32Fx2: {
2174 HReg res = newVRegD(env);
2175 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2176 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2177 UInt size = 0;
2178 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2179 res, argL, argR, size, False));
2180 return res;
2181 }
2182 case Iop_Rsqrts32Fx2: {
2183 HReg res = newVRegD(env);
2184 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2185 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2186 UInt size = 0;
2187 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2188 res, argL, argR, size, False));
2189 return res;
2190 }
2191 case Iop_InterleaveOddLanes8x8:
2192 case Iop_InterleaveOddLanes16x4:
2193 case Iop_InterleaveLO32x2:
2194 case Iop_InterleaveEvenLanes8x8:
2195 case Iop_InterleaveEvenLanes16x4:
2196 case Iop_InterleaveHI32x2: {
2197 HReg tmp = newVRegD(env);
2198 HReg res = newVRegD(env);
2199 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2200 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2201 UInt size;
2202 UInt is_lo;
2203 switch (e->Iex.Binop.op) {
2204 case Iop_InterleaveOddLanes8x8: is_lo = 1; size = 0; break;
2205 case Iop_InterleaveEvenLanes8x8: is_lo = 0; size = 0; break;
2206 case Iop_InterleaveOddLanes16x4: is_lo = 1; size = 1; break;
2207 case Iop_InterleaveEvenLanes16x4: is_lo = 0; size = 1; break;
2208 case Iop_InterleaveLO32x2: is_lo = 1; size = 2; break;
2209 case Iop_InterleaveHI32x2: is_lo = 0; size = 2; break;
2210 default: vassert(0);
2211 }
2212 if (is_lo) {
2213 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2214 tmp, argL, 4, False));
2215 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2216 res, argR, 4, False));
2217 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2218 res, tmp, size, False));
2219 } else {
2220 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2221 tmp, argR, 4, False));
2222 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2223 res, argL, 4, False));
2224 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2225 tmp, res, size, False));
2226 }
2227 return res;
2228 }
2229 case Iop_InterleaveHI8x8:
2230 case Iop_InterleaveHI16x4:
2231 case Iop_InterleaveLO8x8:
2232 case Iop_InterleaveLO16x4: {
2233 HReg tmp = newVRegD(env);
2234 HReg res = newVRegD(env);
2235 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2236 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2237 UInt size;
2238 UInt is_lo;
2239 switch (e->Iex.Binop.op) {
2240 case Iop_InterleaveHI8x8: is_lo = 1; size = 0; break;
2241 case Iop_InterleaveLO8x8: is_lo = 0; size = 0; break;
2242 case Iop_InterleaveHI16x4: is_lo = 1; size = 1; break;
2243 case Iop_InterleaveLO16x4: is_lo = 0; size = 1; break;
2244 default: vassert(0);
2245 }
2246 if (is_lo) {
2247 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2248 tmp, argL, 4, False));
2249 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2250 res, argR, 4, False));
2251 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2252 res, tmp, size, False));
2253 } else {
2254 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2255 tmp, argR, 4, False));
2256 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2257 res, argL, 4, False));
2258 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2259 tmp, res, size, False));
2260 }
2261 return res;
2262 }
2263 case Iop_CatOddLanes8x8:
2264 case Iop_CatOddLanes16x4:
2265 case Iop_CatEvenLanes8x8:
2266 case Iop_CatEvenLanes16x4: {
2267 HReg tmp = newVRegD(env);
2268 HReg res = newVRegD(env);
2269 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2270 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2271 UInt size;
2272 UInt is_lo;
2273 switch (e->Iex.Binop.op) {
2274 case Iop_CatOddLanes8x8: is_lo = 1; size = 0; break;
2275 case Iop_CatEvenLanes8x8: is_lo = 0; size = 0; break;
2276 case Iop_CatOddLanes16x4: is_lo = 1; size = 1; break;
2277 case Iop_CatEvenLanes16x4: is_lo = 0; size = 1; break;
2278 default: vassert(0);
2279 }
2280 if (is_lo) {
2281 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2282 tmp, argL, 4, False));
2283 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2284 res, argR, 4, False));
2285 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2286 res, tmp, size, False));
2287 } else {
2288 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2289 tmp, argR, 4, False));
2290 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2291 res, argL, 4, False));
2292 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2293 tmp, res, size, False));
2294 }
2295 return res;
2296 }
2297 case Iop_QAdd8Ux8:
2298 case Iop_QAdd16Ux4:
2299 case Iop_QAdd32Ux2:
2300 case Iop_QAdd64Ux1: {
2301 HReg res = newVRegD(env);
2302 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2303 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2304 UInt size;
2305 switch (e->Iex.Binop.op) {
2306 case Iop_QAdd8Ux8: size = 0; break;
2307 case Iop_QAdd16Ux4: size = 1; break;
2308 case Iop_QAdd32Ux2: size = 2; break;
2309 case Iop_QAdd64Ux1: size = 3; break;
2310 default: vassert(0);
2311 }
2312 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2313 res, argL, argR, size, False));
2314 return res;
2315 }
2316 case Iop_QAdd8Sx8:
2317 case Iop_QAdd16Sx4:
2318 case Iop_QAdd32Sx2:
2319 case Iop_QAdd64Sx1: {
2320 HReg res = newVRegD(env);
2321 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2322 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2323 UInt size;
2324 switch (e->Iex.Binop.op) {
2325 case Iop_QAdd8Sx8: size = 0; break;
2326 case Iop_QAdd16Sx4: size = 1; break;
2327 case Iop_QAdd32Sx2: size = 2; break;
2328 case Iop_QAdd64Sx1: size = 3; break;
2329 default: vassert(0);
2330 }
2331 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2332 res, argL, argR, size, False));
2333 return res;
2334 }
2335 case Iop_Sub8x8:
2336 case Iop_Sub16x4:
2337 case Iop_Sub32x2:
2338 case Iop_Sub64: {
2339 HReg res = newVRegD(env);
2340 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2341 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2342 UInt size;
2343 switch (e->Iex.Binop.op) {
2344 case Iop_Sub8x8: size = 0; break;
2345 case Iop_Sub16x4: size = 1; break;
2346 case Iop_Sub32x2: size = 2; break;
2347 case Iop_Sub64: size = 3; break;
2348 default: vassert(0);
2349 }
2350 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2351 res, argL, argR, size, False));
2352 return res;
2353 }
2354 case Iop_Sub32Fx2: {
2355 HReg res = newVRegD(env);
2356 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2357 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2358 UInt size = 0;
2359 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2360 res, argL, argR, size, False));
2361 return res;
2362 }
2363 case Iop_QSub8Ux8:
2364 case Iop_QSub16Ux4:
2365 case Iop_QSub32Ux2:
2366 case Iop_QSub64Ux1: {
2367 HReg res = newVRegD(env);
2368 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2369 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2370 UInt size;
2371 switch (e->Iex.Binop.op) {
2372 case Iop_QSub8Ux8: size = 0; break;
2373 case Iop_QSub16Ux4: size = 1; break;
2374 case Iop_QSub32Ux2: size = 2; break;
2375 case Iop_QSub64Ux1: size = 3; break;
2376 default: vassert(0);
2377 }
2378 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2379 res, argL, argR, size, False));
2380 return res;
2381 }
2382 case Iop_QSub8Sx8:
2383 case Iop_QSub16Sx4:
2384 case Iop_QSub32Sx2:
2385 case Iop_QSub64Sx1: {
2386 HReg res = newVRegD(env);
2387 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2388 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2389 UInt size;
2390 switch (e->Iex.Binop.op) {
2391 case Iop_QSub8Sx8: size = 0; break;
2392 case Iop_QSub16Sx4: size = 1; break;
2393 case Iop_QSub32Sx2: size = 2; break;
2394 case Iop_QSub64Sx1: size = 3; break;
2395 default: vassert(0);
2396 }
2397 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2398 res, argL, argR, size, False));
2399 return res;
2400 }
2401 case Iop_Max8Ux8:
2402 case Iop_Max16Ux4:
2403 case Iop_Max32Ux2: {
2404 HReg res = newVRegD(env);
2405 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2406 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2407 UInt size;
2408 switch (e->Iex.Binop.op) {
2409 case Iop_Max8Ux8: size = 0; break;
2410 case Iop_Max16Ux4: size = 1; break;
2411 case Iop_Max32Ux2: size = 2; break;
2412 default: vassert(0);
2413 }
2414 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2415 res, argL, argR, size, False));
2416 return res;
2417 }
2418 case Iop_Max8Sx8:
2419 case Iop_Max16Sx4:
2420 case Iop_Max32Sx2: {
2421 HReg res = newVRegD(env);
2422 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2423 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2424 UInt size;
2425 switch (e->Iex.Binop.op) {
2426 case Iop_Max8Sx8: size = 0; break;
2427 case Iop_Max16Sx4: size = 1; break;
2428 case Iop_Max32Sx2: size = 2; break;
2429 default: vassert(0);
2430 }
2431 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2432 res, argL, argR, size, False));
2433 return res;
2434 }
2435 case Iop_Min8Ux8:
2436 case Iop_Min16Ux4:
2437 case Iop_Min32Ux2: {
2438 HReg res = newVRegD(env);
2439 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2440 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2441 UInt size;
2442 switch (e->Iex.Binop.op) {
2443 case Iop_Min8Ux8: size = 0; break;
2444 case Iop_Min16Ux4: size = 1; break;
2445 case Iop_Min32Ux2: size = 2; break;
2446 default: vassert(0);
2447 }
2448 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2449 res, argL, argR, size, False));
2450 return res;
2451 }
2452 case Iop_Min8Sx8:
2453 case Iop_Min16Sx4:
2454 case Iop_Min32Sx2: {
2455 HReg res = newVRegD(env);
2456 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2457 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2458 UInt size;
2459 switch (e->Iex.Binop.op) {
2460 case Iop_Min8Sx8: size = 0; break;
2461 case Iop_Min16Sx4: size = 1; break;
2462 case Iop_Min32Sx2: size = 2; break;
2463 default: vassert(0);
2464 }
2465 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2466 res, argL, argR, size, False));
2467 return res;
2468 }
2469 case Iop_Sar8x8:
2470 case Iop_Sar16x4:
2471 case Iop_Sar32x2: {
2472 HReg res = newVRegD(env);
2473 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2474 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2475 HReg argR2 = newVRegD(env);
2476 HReg zero = newVRegD(env);
2477 UInt size;
2478 switch (e->Iex.Binop.op) {
2479 case Iop_Sar8x8: size = 0; break;
2480 case Iop_Sar16x4: size = 1; break;
2481 case Iop_Sar32x2: size = 2; break;
2482 case Iop_Sar64: size = 3; break;
2483 default: vassert(0);
2484 }
2485 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2486 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2487 argR2, zero, argR, size, False));
2488 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2489 res, argL, argR2, size, False));
2490 return res;
2491 }
2492 case Iop_Sal8x8:
2493 case Iop_Sal16x4:
2494 case Iop_Sal32x2:
2495 case Iop_Sal64x1: {
2496 HReg res = newVRegD(env);
2497 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2498 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2499 UInt size;
2500 switch (e->Iex.Binop.op) {
2501 case Iop_Sal8x8: size = 0; break;
2502 case Iop_Sal16x4: size = 1; break;
2503 case Iop_Sal32x2: size = 2; break;
2504 case Iop_Sal64x1: size = 3; break;
2505 default: vassert(0);
2506 }
2507 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2508 res, argL, argR, size, False));
2509 return res;
2510 }
2511 case Iop_Shr8x8:
2512 case Iop_Shr16x4:
2513 case Iop_Shr32x2: {
2514 HReg res = newVRegD(env);
2515 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2516 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2517 HReg argR2 = newVRegD(env);
2518 HReg zero = newVRegD(env);
2519 UInt size;
2520 switch (e->Iex.Binop.op) {
2521 case Iop_Shr8x8: size = 0; break;
2522 case Iop_Shr16x4: size = 1; break;
2523 case Iop_Shr32x2: size = 2; break;
2524 default: vassert(0);
2525 }
2526 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2527 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2528 argR2, zero, argR, size, False));
2529 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2530 res, argL, argR2, size, False));
2531 return res;
2532 }
2533 case Iop_Shl8x8:
2534 case Iop_Shl16x4:
2535 case Iop_Shl32x2: {
2536 HReg res = newVRegD(env);
2537 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2538 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2539 UInt size;
2540 switch (e->Iex.Binop.op) {
2541 case Iop_Shl8x8: size = 0; break;
2542 case Iop_Shl16x4: size = 1; break;
2543 case Iop_Shl32x2: size = 2; break;
2544 default: vassert(0);
2545 }
2546 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2547 res, argL, argR, size, False));
2548 return res;
2549 }
2550 case Iop_QShl8x8:
2551 case Iop_QShl16x4:
2552 case Iop_QShl32x2:
2553 case Iop_QShl64x1: {
2554 HReg res = newVRegD(env);
2555 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2556 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2557 UInt size;
2558 switch (e->Iex.Binop.op) {
2559 case Iop_QShl8x8: size = 0; break;
2560 case Iop_QShl16x4: size = 1; break;
2561 case Iop_QShl32x2: size = 2; break;
2562 case Iop_QShl64x1: size = 3; break;
2563 default: vassert(0);
2564 }
2565 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2566 res, argL, argR, size, False));
2567 return res;
2568 }
2569 case Iop_QSal8x8:
2570 case Iop_QSal16x4:
2571 case Iop_QSal32x2:
2572 case Iop_QSal64x1: {
2573 HReg res = newVRegD(env);
2574 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2575 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2576 UInt size;
2577 switch (e->Iex.Binop.op) {
2578 case Iop_QSal8x8: size = 0; break;
2579 case Iop_QSal16x4: size = 1; break;
2580 case Iop_QSal32x2: size = 2; break;
2581 case Iop_QSal64x1: size = 3; break;
2582 default: vassert(0);
2583 }
2584 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2585 res, argL, argR, size, False));
2586 return res;
2587 }
2588 case Iop_QShlN8x8:
2589 case Iop_QShlN16x4:
2590 case Iop_QShlN32x2:
2591 case Iop_QShlN64x1: {
2592 HReg res = newVRegD(env);
2593 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2594 UInt size, imm;
2595 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2596 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2597 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2598 "second argument only\n");
2599 }
2600 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2601 switch (e->Iex.Binop.op) {
2602 case Iop_QShlN8x8: size = 8 | imm; break;
2603 case Iop_QShlN16x4: size = 16 | imm; break;
2604 case Iop_QShlN32x2: size = 32 | imm; break;
2605 case Iop_QShlN64x1: size = 64 | imm; break;
2606 default: vassert(0);
2607 }
2608 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2609 res, argL, size, False));
2610 return res;
2611 }
2612 case Iop_QShlN8Sx8:
2613 case Iop_QShlN16Sx4:
2614 case Iop_QShlN32Sx2:
2615 case Iop_QShlN64Sx1: {
2616 HReg res = newVRegD(env);
2617 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2618 UInt size, imm;
2619 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2620 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2621 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2622 "second argument only\n");
2623 }
2624 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2625 switch (e->Iex.Binop.op) {
2626 case Iop_QShlN8Sx8: size = 8 | imm; break;
2627 case Iop_QShlN16Sx4: size = 16 | imm; break;
2628 case Iop_QShlN32Sx2: size = 32 | imm; break;
2629 case Iop_QShlN64Sx1: size = 64 | imm; break;
2630 default: vassert(0);
2631 }
2632 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2633 res, argL, size, False));
2634 return res;
2635 }
2636 case Iop_QSalN8x8:
2637 case Iop_QSalN16x4:
2638 case Iop_QSalN32x2:
2639 case Iop_QSalN64x1: {
2640 HReg res = newVRegD(env);
2641 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2642 UInt size, imm;
2643 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2644 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2645 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2646 "second argument only\n");
2647 }
2648 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2649 switch (e->Iex.Binop.op) {
2650 case Iop_QSalN8x8: size = 8 | imm; break;
2651 case Iop_QSalN16x4: size = 16 | imm; break;
2652 case Iop_QSalN32x2: size = 32 | imm; break;
2653 case Iop_QSalN64x1: size = 64 | imm; break;
2654 default: vassert(0);
2655 }
2656 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2657 res, argL, size, False));
2658 return res;
2659 }
2660 case Iop_ShrN8x8:
2661 case Iop_ShrN16x4:
2662 case Iop_ShrN32x2:
2663 case Iop_Shr64: {
2664 HReg res = newVRegD(env);
2665 HReg tmp = newVRegD(env);
2666 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2667 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2668 HReg argR2 = newVRegI(env);
2669 UInt size;
2670 switch (e->Iex.Binop.op) {
2671 case Iop_ShrN8x8: size = 0; break;
2672 case Iop_ShrN16x4: size = 1; break;
2673 case Iop_ShrN32x2: size = 2; break;
2674 case Iop_Shr64: size = 3; break;
2675 default: vassert(0);
2676 }
2677 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2678 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2679 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2680 res, argL, tmp, size, False));
2681 return res;
2682 }
2683 case Iop_ShlN8x8:
2684 case Iop_ShlN16x4:
2685 case Iop_ShlN32x2:
2686 case Iop_Shl64: {
2687 HReg res = newVRegD(env);
2688 HReg tmp = newVRegD(env);
2689 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2690 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2691 UInt size;
2692 switch (e->Iex.Binop.op) {
2693 case Iop_ShlN8x8: size = 0; break;
2694 case Iop_ShlN16x4: size = 1; break;
2695 case Iop_ShlN32x2: size = 2; break;
2696 case Iop_Shl64: size = 3; break;
2697 default: vassert(0);
2698 }
2699 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, False));
2700 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2701 res, argL, tmp, size, False));
2702 return res;
2703 }
2704 case Iop_SarN8x8:
2705 case Iop_SarN16x4:
2706 case Iop_SarN32x2:
2707 case Iop_Sar64: {
2708 HReg res = newVRegD(env);
2709 HReg tmp = newVRegD(env);
2710 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2711 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2712 HReg argR2 = newVRegI(env);
2713 UInt size;
2714 switch (e->Iex.Binop.op) {
2715 case Iop_SarN8x8: size = 0; break;
2716 case Iop_SarN16x4: size = 1; break;
2717 case Iop_SarN32x2: size = 2; break;
2718 case Iop_Sar64: size = 3; break;
2719 default: vassert(0);
2720 }
2721 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2722 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2723 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2724 res, argL, tmp, size, False));
2725 return res;
2726 }
2727 case Iop_CmpGT8Ux8:
2728 case Iop_CmpGT16Ux4:
2729 case Iop_CmpGT32Ux2: {
2730 HReg res = newVRegD(env);
2731 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2732 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2733 UInt size;
2734 switch (e->Iex.Binop.op) {
2735 case Iop_CmpGT8Ux8: size = 0; break;
2736 case Iop_CmpGT16Ux4: size = 1; break;
2737 case Iop_CmpGT32Ux2: size = 2; break;
2738 default: vassert(0);
2739 }
2740 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
2741 res, argL, argR, size, False));
2742 return res;
2743 }
2744 case Iop_CmpGT8Sx8:
2745 case Iop_CmpGT16Sx4:
2746 case Iop_CmpGT32Sx2: {
2747 HReg res = newVRegD(env);
2748 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2749 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2750 UInt size;
2751 switch (e->Iex.Binop.op) {
2752 case Iop_CmpGT8Sx8: size = 0; break;
2753 case Iop_CmpGT16Sx4: size = 1; break;
2754 case Iop_CmpGT32Sx2: size = 2; break;
2755 default: vassert(0);
2756 }
2757 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
2758 res, argL, argR, size, False));
2759 return res;
2760 }
2761 case Iop_CmpEQ8x8:
2762 case Iop_CmpEQ16x4:
2763 case Iop_CmpEQ32x2: {
2764 HReg res = newVRegD(env);
2765 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2766 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2767 UInt size;
2768 switch (e->Iex.Binop.op) {
2769 case Iop_CmpEQ8x8: size = 0; break;
2770 case Iop_CmpEQ16x4: size = 1; break;
2771 case Iop_CmpEQ32x2: size = 2; break;
2772 default: vassert(0);
2773 }
2774 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
2775 res, argL, argR, size, False));
2776 return res;
2777 }
2778 case Iop_Mul8x8:
2779 case Iop_Mul16x4:
2780 case Iop_Mul32x2: {
2781 HReg res = newVRegD(env);
2782 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2783 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2784 UInt size = 0;
2785 switch(e->Iex.Binop.op) {
2786 case Iop_Mul8x8: size = 0; break;
2787 case Iop_Mul16x4: size = 1; break;
2788 case Iop_Mul32x2: size = 2; break;
2789 default: vassert(0);
2790 }
2791 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
2792 res, argL, argR, size, False));
2793 return res;
2794 }
2795 case Iop_Mul32Fx2: {
2796 HReg res = newVRegD(env);
2797 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2798 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2799 UInt size = 0;
2800 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
2801 res, argL, argR, size, False));
2802 return res;
2803 }
2804 case Iop_QDMulHi16Sx4:
2805 case Iop_QDMulHi32Sx2: {
2806 HReg res = newVRegD(env);
2807 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2808 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2809 UInt size = 0;
2810 switch(e->Iex.Binop.op) {
2811 case Iop_QDMulHi16Sx4: size = 1; break;
2812 case Iop_QDMulHi32Sx2: size = 2; break;
2813 default: vassert(0);
2814 }
2815 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
2816 res, argL, argR, size, False));
2817 return res;
2818 }
2819
2820 case Iop_QRDMulHi16Sx4:
2821 case Iop_QRDMulHi32Sx2: {
2822 HReg res = newVRegD(env);
2823 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2824 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2825 UInt size = 0;
2826 switch(e->Iex.Binop.op) {
2827 case Iop_QRDMulHi16Sx4: size = 1; break;
2828 case Iop_QRDMulHi32Sx2: size = 2; break;
2829 default: vassert(0);
2830 }
2831 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
2832 res, argL, argR, size, False));
2833 return res;
2834 }
2835
2836 case Iop_PwAdd8x8:
2837 case Iop_PwAdd16x4:
2838 case Iop_PwAdd32x2: {
2839 HReg res = newVRegD(env);
2840 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2841 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2842 UInt size = 0;
2843 switch(e->Iex.Binop.op) {
2844 case Iop_PwAdd8x8: size = 0; break;
2845 case Iop_PwAdd16x4: size = 1; break;
2846 case Iop_PwAdd32x2: size = 2; break;
2847 default: vassert(0);
2848 }
2849 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
2850 res, argL, argR, size, False));
2851 return res;
2852 }
2853 case Iop_PwAdd32Fx2: {
2854 HReg res = newVRegD(env);
2855 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2856 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2857 UInt size = 0;
2858 addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
2859 res, argL, argR, size, False));
2860 return res;
2861 }
2862 case Iop_PwMin8Ux8:
2863 case Iop_PwMin16Ux4:
2864 case Iop_PwMin32Ux2: {
2865 HReg res = newVRegD(env);
2866 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2867 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2868 UInt size = 0;
2869 switch(e->Iex.Binop.op) {
2870 case Iop_PwMin8Ux8: size = 0; break;
2871 case Iop_PwMin16Ux4: size = 1; break;
2872 case Iop_PwMin32Ux2: size = 2; break;
2873 default: vassert(0);
2874 }
2875 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
2876 res, argL, argR, size, False));
2877 return res;
2878 }
2879 case Iop_PwMin8Sx8:
2880 case Iop_PwMin16Sx4:
2881 case Iop_PwMin32Sx2: {
2882 HReg res = newVRegD(env);
2883 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2884 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2885 UInt size = 0;
2886 switch(e->Iex.Binop.op) {
2887 case Iop_PwMin8Sx8: size = 0; break;
2888 case Iop_PwMin16Sx4: size = 1; break;
2889 case Iop_PwMin32Sx2: size = 2; break;
2890 default: vassert(0);
2891 }
2892 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
2893 res, argL, argR, size, False));
2894 return res;
2895 }
2896 case Iop_PwMax8Ux8:
2897 case Iop_PwMax16Ux4:
2898 case Iop_PwMax32Ux2: {
2899 HReg res = newVRegD(env);
2900 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2901 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2902 UInt size = 0;
2903 switch(e->Iex.Binop.op) {
2904 case Iop_PwMax8Ux8: size = 0; break;
2905 case Iop_PwMax16Ux4: size = 1; break;
2906 case Iop_PwMax32Ux2: size = 2; break;
2907 default: vassert(0);
2908 }
2909 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
2910 res, argL, argR, size, False));
2911 return res;
2912 }
2913 case Iop_PwMax8Sx8:
2914 case Iop_PwMax16Sx4:
2915 case Iop_PwMax32Sx2: {
2916 HReg res = newVRegD(env);
2917 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2918 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2919 UInt size = 0;
2920 switch(e->Iex.Binop.op) {
2921 case Iop_PwMax8Sx8: size = 0; break;
2922 case Iop_PwMax16Sx4: size = 1; break;
2923 case Iop_PwMax32Sx2: size = 2; break;
2924 default: vassert(0);
2925 }
2926 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
2927 res, argL, argR, size, False));
2928 return res;
2929 }
2930 case Iop_Perm8x8: {
2931 HReg res = newVRegD(env);
2932 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2933 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2934 addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
2935 res, argL, argR, 0, False));
2936 return res;
2937 }
2938 case Iop_PolynomialMul8x8: {
2939 HReg res = newVRegD(env);
2940 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2941 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2942 UInt size = 0;
2943 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
2944 res, argL, argR, size, False));
2945 return res;
2946 }
2947 case Iop_Max32Fx2: {
2948 HReg res = newVRegD(env);
2949 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2950 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2951 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
2952 res, argL, argR, 2, False));
2953 return res;
2954 }
2955 case Iop_Min32Fx2: {
2956 HReg res = newVRegD(env);
2957 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2958 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2959 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
2960 res, argL, argR, 2, False));
2961 return res;
2962 }
2963 case Iop_PwMax32Fx2: {
2964 HReg res = newVRegD(env);
2965 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2966 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2967 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
2968 res, argL, argR, 2, False));
2969 return res;
2970 }
2971 case Iop_PwMin32Fx2: {
2972 HReg res = newVRegD(env);
2973 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2974 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2975 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
2976 res, argL, argR, 2, False));
2977 return res;
2978 }
2979 case Iop_CmpGT32Fx2: {
2980 HReg res = newVRegD(env);
2981 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2982 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2983 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
2984 res, argL, argR, 2, False));
2985 return res;
2986 }
2987 case Iop_CmpGE32Fx2: {
2988 HReg res = newVRegD(env);
2989 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2990 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2991 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
2992 res, argL, argR, 2, False));
2993 return res;
2994 }
2995 case Iop_CmpEQ32Fx2: {
2996 HReg res = newVRegD(env);
2997 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2998 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2999 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3000 res, argL, argR, 2, False));
3001 return res;
3002 }
3003 case Iop_F32ToFixed32Ux2_RZ:
3004 case Iop_F32ToFixed32Sx2_RZ:
3005 case Iop_Fixed32UToF32x2_RN:
3006 case Iop_Fixed32SToF32x2_RN: {
3007 HReg res = newVRegD(env);
3008 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3009 ARMNeonUnOp op;
3010 UInt imm6;
3011 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3012 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3013 vpanic("ARM supports FP <-> Fixed conversion with constant "
3014 "second argument less than 33 only\n");
3015 }
3016 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3017 vassert(imm6 <= 32 && imm6 > 0);
3018 imm6 = 64 - imm6;
3019 switch(e->Iex.Binop.op) {
3020 case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3021 case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3022 case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3023 case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3024 default: vassert(0);
3025 }
3026 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3027 return res;
3028 }
3029 /*
3030 FIXME: is this here or not?
3031 case Iop_VDup8x8:
3032 case Iop_VDup16x4:
3033 case Iop_VDup32x2: {
3034 HReg res = newVRegD(env);
3035 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3036 UInt index;
3037 UInt imm4;
3038 UInt size = 0;
3039 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3040 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3041 vpanic("ARM supports Iop_VDup with constant "
3042 "second argument less than 16 only\n");
3043 }
3044 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3045 switch(e->Iex.Binop.op) {
3046 case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3047 case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3048 case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3049 default: vassert(0);
3050 }
3051 if (imm4 >= 16) {
3052 vpanic("ARM supports Iop_VDup with constant "
3053 "second argument less than 16 only\n");
3054 }
3055 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3056 res, argL, imm4, False));
3057 return res;
3058 }
3059 */
3060 default:
3061 break;
3062 }
3063 }
3064
3065 /* --------- UNARY ops --------- */
3066 if (e->tag == Iex_Unop) {
3067 switch (e->Iex.Unop.op) {
3068
3069 /* ReinterpF64asI64 */
3070 case Iop_ReinterpF64asI64:
3071 /* Left64(e) */
3072 case Iop_Left64:
3073 /* CmpwNEZ64(e) */
3074 //case Iop_CmpwNEZ64:
3075 case Iop_1Sto64: {
3076 HReg rLo, rHi;
3077 HReg res = newVRegD(env);
3078 iselInt64Expr(&rHi, &rLo, env, e);
3079 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3080 return res;
3081 }
3082 case Iop_Not64: {
3083 DECLARE_PATTERN(p_veqz_8x8);
3084 DECLARE_PATTERN(p_veqz_16x4);
3085 DECLARE_PATTERN(p_veqz_32x2);
3086 DECLARE_PATTERN(p_vcge_8sx8);
3087 DECLARE_PATTERN(p_vcge_16sx4);
3088 DECLARE_PATTERN(p_vcge_32sx2);
3089 DECLARE_PATTERN(p_vcge_8ux8);
3090 DECLARE_PATTERN(p_vcge_16ux4);
3091 DECLARE_PATTERN(p_vcge_32ux2);
3092 DEFINE_PATTERN(p_veqz_8x8,
3093 unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3094 DEFINE_PATTERN(p_veqz_16x4,
3095 unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3096 DEFINE_PATTERN(p_veqz_32x2,
3097 unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3098 DEFINE_PATTERN(p_vcge_8sx8,
3099 unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3100 DEFINE_PATTERN(p_vcge_16sx4,
3101 unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3102 DEFINE_PATTERN(p_vcge_32sx2,
3103 unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3104 DEFINE_PATTERN(p_vcge_8ux8,
3105 unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3106 DEFINE_PATTERN(p_vcge_16ux4,
3107 unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3108 DEFINE_PATTERN(p_vcge_32ux2,
3109 unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3110 if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3111 HReg res = newVRegD(env);
3112 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3113 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3114 return res;
3115 } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3116 HReg res = newVRegD(env);
3117 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3118 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3119 return res;
3120 } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3121 HReg res = newVRegD(env);
3122 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3123 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3124 return res;
3125 } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3126 HReg res = newVRegD(env);
3127 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3128 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3129 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3130 res, argL, argR, 0, False));
3131 return res;
3132 } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3133 HReg res = newVRegD(env);
3134 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3135 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3136 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3137 res, argL, argR, 1, False));
3138 return res;
3139 } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3140 HReg res = newVRegD(env);
3141 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3142 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3143 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3144 res, argL, argR, 2, False));
3145 return res;
3146 } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3147 HReg res = newVRegD(env);
3148 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3149 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3150 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3151 res, argL, argR, 0, False));
3152 return res;
3153 } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3154 HReg res = newVRegD(env);
3155 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3156 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3157 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3158 res, argL, argR, 1, False));
3159 return res;
3160 } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3161 HReg res = newVRegD(env);
3162 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3163 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3164 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3165 res, argL, argR, 2, False));
3166 return res;
3167 } else {
3168 HReg res = newVRegD(env);
3169 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3170 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3171 return res;
3172 }
3173 }
3174 case Iop_Dup8x8:
3175 case Iop_Dup16x4:
3176 case Iop_Dup32x2: {
3177 HReg res, arg;
3178 UInt size;
3179 DECLARE_PATTERN(p_vdup_8x8);
3180 DECLARE_PATTERN(p_vdup_16x4);
3181 DECLARE_PATTERN(p_vdup_32x2);
3182 DEFINE_PATTERN(p_vdup_8x8,
3183 unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3184 DEFINE_PATTERN(p_vdup_16x4,
3185 unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3186 DEFINE_PATTERN(p_vdup_32x2,
3187 unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3188 if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3189 UInt index;
3190 UInt imm4;
3191 if (mi.bindee[1]->tag == Iex_Const &&
3192 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3193 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3194 imm4 = (index << 1) + 1;
3195 if (index < 8) {
3196 res = newVRegD(env);
3197 arg = iselNeon64Expr(env, mi.bindee[0]);
3198 addInstr(env, ARMInstr_NUnaryS(
3199 ARMneon_VDUP,
3200 mkARMNRS(ARMNRS_Reg, res, 0),
3201 mkARMNRS(ARMNRS_Scalar, arg, index),
3202 imm4, False
3203 ));
3204 return res;
3205 }
3206 }
3207 } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3208 UInt index;
3209 UInt imm4;
3210 if (mi.bindee[1]->tag == Iex_Const &&
3211 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3212 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3213 imm4 = (index << 2) + 2;
3214 if (index < 4) {
3215 res = newVRegD(env);
3216 arg = iselNeon64Expr(env, mi.bindee[0]);
3217 addInstr(env, ARMInstr_NUnaryS(
3218 ARMneon_VDUP,
3219 mkARMNRS(ARMNRS_Reg, res, 0),
3220 mkARMNRS(ARMNRS_Scalar, arg, index),
3221 imm4, False
3222 ));
3223 return res;
3224 }
3225 }
3226 } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3227 UInt index;
3228 UInt imm4;
3229 if (mi.bindee[1]->tag == Iex_Const &&
3230 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3231 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3232 imm4 = (index << 3) + 4;
3233 if (index < 2) {
3234 res = newVRegD(env);
3235 arg = iselNeon64Expr(env, mi.bindee[0]);
3236 addInstr(env, ARMInstr_NUnaryS(
3237 ARMneon_VDUP,
3238 mkARMNRS(ARMNRS_Reg, res, 0),
3239 mkARMNRS(ARMNRS_Scalar, arg, index),
3240 imm4, False
3241 ));
3242 return res;
3243 }
3244 }
3245 }
3246 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3247 res = newVRegD(env);
3248 switch (e->Iex.Unop.op) {
3249 case Iop_Dup8x8: size = 0; break;
3250 case Iop_Dup16x4: size = 1; break;
3251 case Iop_Dup32x2: size = 2; break;
3252 default: vassert(0);
3253 }
3254 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3255 return res;
3256 }
3257 case Iop_Abs8x8:
3258 case Iop_Abs16x4:
3259 case Iop_Abs32x2: {
3260 HReg res = newVRegD(env);
3261 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3262 UInt size = 0;
3263 switch(e->Iex.Binop.op) {
3264 case Iop_Abs8x8: size = 0; break;
3265 case Iop_Abs16x4: size = 1; break;
3266 case Iop_Abs32x2: size = 2; break;
3267 default: vassert(0);
3268 }
3269 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3270 return res;
3271 }
3272 case Iop_Reverse64_8x8:
3273 case Iop_Reverse64_16x4:
3274 case Iop_Reverse64_32x2: {
3275 HReg res = newVRegD(env);
3276 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3277 UInt size = 0;
3278 switch(e->Iex.Binop.op) {
3279 case Iop_Reverse64_8x8: size = 0; break;
3280 case Iop_Reverse64_16x4: size = 1; break;
3281 case Iop_Reverse64_32x2: size = 2; break;
3282 default: vassert(0);
3283 }
3284 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3285 res, arg, size, False));
3286 return res;
3287 }
3288 case Iop_Reverse32_8x8:
3289 case Iop_Reverse32_16x4: {
3290 HReg res = newVRegD(env);
3291 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3292 UInt size = 0;
3293 switch(e->Iex.Binop.op) {
3294 case Iop_Reverse32_8x8: size = 0; break;
3295 case Iop_Reverse32_16x4: size = 1; break;
3296 default: vassert(0);
3297 }
3298 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3299 res, arg, size, False));
3300 return res;
3301 }
3302 case Iop_Reverse16_8x8: {
3303 HReg res = newVRegD(env);
3304 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3305 UInt size = 0;
3306 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3307 res, arg, size, False));
3308 return res;
3309 }
3310 case Iop_CmpwNEZ64: {
3311 HReg x_lsh = newVRegD(env);
3312 HReg x_rsh = newVRegD(env);
3313 HReg lsh_amt = newVRegD(env);
3314 HReg rsh_amt = newVRegD(env);
3315 HReg zero = newVRegD(env);
3316 HReg tmp = newVRegD(env);
3317 HReg tmp2 = newVRegD(env);
3318 HReg res = newVRegD(env);
3319 HReg x = newVRegD(env);
3320 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3321 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3322 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3323 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3324 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3325 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3326 rsh_amt, zero, lsh_amt, 2, False));
3327 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3328 x_lsh, x, lsh_amt, 3, False));
3329 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3330 x_rsh, x, rsh_amt, 3, False));
3331 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3332 tmp, x_lsh, x_rsh, 0, False));
3333 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3334 res, tmp, x, 0, False));
3335 return res;
3336 }
3337 case Iop_CmpNEZ8x8:
3338 case Iop_CmpNEZ16x4:
3339 case Iop_CmpNEZ32x2: {
3340 HReg res = newVRegD(env);
3341 HReg tmp = newVRegD(env);
3342 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3343 UInt size;
3344 switch (e->Iex.Unop.op) {
3345 case Iop_CmpNEZ8x8: size = 0; break;
3346 case Iop_CmpNEZ16x4: size = 1; break;
3347 case Iop_CmpNEZ32x2: size = 2; break;
3348 default: vassert(0);
3349 }
3350 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3351 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3352 return res;
3353 }
3354 case Iop_Shorten16x8:
3355 case Iop_Shorten32x4:
3356 case Iop_Shorten64x2: {
3357 HReg res = newVRegD(env);
3358 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3359 UInt size = 0;
3360 switch(e->Iex.Binop.op) {
3361 case Iop_Shorten16x8: size = 0; break;
3362 case Iop_Shorten32x4: size = 1; break;
3363 case Iop_Shorten64x2: size = 2; break;
3364 default: vassert(0);
3365 }
3366 addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3367 res, arg, size, False));
3368 return res;
3369 }
3370 case Iop_QShortenS16Sx8:
3371 case Iop_QShortenS32Sx4:
3372 case Iop_QShortenS64Sx2: {
3373 HReg res = newVRegD(env);
3374 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3375 UInt size = 0;
3376 switch(e->Iex.Binop.op) {
3377 case Iop_QShortenS16Sx8: size = 0; break;
3378 case Iop_QShortenS32Sx4: size = 1; break;
3379 case Iop_QShortenS64Sx2: size = 2; break;
3380 default: vassert(0);
3381 }
3382 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3383 res, arg, size, False));
3384 return res;
3385 }
3386 case Iop_QShortenU16Sx8:
3387 case Iop_QShortenU32Sx4:
3388 case Iop_QShortenU64Sx2: {
3389 HReg res = newVRegD(env);
3390 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3391 UInt size = 0;
3392 switch(e->Iex.Binop.op) {
3393 case Iop_QShortenU16Sx8: size = 0; break;
3394 case Iop_QShortenU32Sx4: size = 1; break;
3395 case Iop_QShortenU64Sx2: size = 2; break;
3396 default: vassert(0);
3397 }
3398 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3399 res, arg, size, False));
3400 return res;
3401 }
3402 case Iop_QShortenU16Ux8:
3403 case Iop_QShortenU32Ux4:
3404 case Iop_QShortenU64Ux2: {
3405 HReg res = newVRegD(env);
3406 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3407 UInt size = 0;
3408 switch(e->Iex.Binop.op) {
3409 case Iop_QShortenU16Ux8: size = 0; break;
3410 case Iop_QShortenU32Ux4: size = 1; break;
3411 case Iop_QShortenU64Ux2: size = 2; break;
3412 default: vassert(0);
3413 }
3414 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3415 res, arg, size, False));
3416 return res;
3417 }
3418 case Iop_PwAddL8Sx8:
3419 case Iop_PwAddL16Sx4:
3420 case Iop_PwAddL32Sx2: {
3421 HReg res = newVRegD(env);
3422 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3423 UInt size = 0;
3424 switch(e->Iex.Binop.op) {
3425 case Iop_PwAddL8Sx8: size = 0; break;
3426 case Iop_PwAddL16Sx4: size = 1; break;
3427 case Iop_PwAddL32Sx2: size = 2; break;
3428 default: vassert(0);
3429 }
3430 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3431 res, arg, size, False));
3432 return res;
3433 }
3434 case Iop_PwAddL8Ux8:
3435 case Iop_PwAddL16Ux4:
3436 case Iop_PwAddL32Ux2: {
3437 HReg res = newVRegD(env);
3438 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3439 UInt size = 0;
3440 switch(e->Iex.Binop.op) {
3441 case Iop_PwAddL8Ux8: size = 0; break;
3442 case Iop_PwAddL16Ux4: size = 1; break;
3443 case Iop_PwAddL32Ux2: size = 2; break;
3444 default: vassert(0);
3445 }
3446 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3447 res, arg, size, False));
3448 return res;
3449 }
3450 case Iop_Cnt8x8: {
3451 HReg res = newVRegD(env);
3452 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3453 UInt size = 0;
3454 addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3455 res, arg, size, False));
3456 return res;
3457 }
3458 case Iop_Clz8Sx8:
3459 case Iop_Clz16Sx4:
3460 case Iop_Clz32Sx2: {
3461 HReg res = newVRegD(env);
3462 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3463 UInt size = 0;
3464 switch(e->Iex.Binop.op) {
3465 case Iop_Clz8Sx8: size = 0; break;
3466 case Iop_Clz16Sx4: size = 1; break;
3467 case Iop_Clz32Sx2: size = 2; break;
3468 default: vassert(0);
3469 }
3470 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3471 res, arg, size, False));
3472 return res;
3473 }
3474 case Iop_Cls8Sx8:
3475 case Iop_Cls16Sx4:
3476 case Iop_Cls32Sx2: {
3477 HReg res = newVRegD(env);
3478 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3479 UInt size = 0;
3480 switch(e->Iex.Binop.op) {
3481 case Iop_Cls8Sx8: size = 0; break;
3482 case Iop_Cls16Sx4: size = 1; break;
3483 case Iop_Cls32Sx2: size = 2; break;
3484 default: vassert(0);
3485 }
3486 addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3487 res, arg, size, False));
3488 return res;
3489 }
3490 case Iop_FtoI32Sx2_RZ: {
3491 HReg res = newVRegD(env);
3492 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3493 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3494 res, arg, 2, False));
3495 return res;
3496 }
3497 case Iop_FtoI32Ux2_RZ: {
3498 HReg res = newVRegD(env);
3499 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3500 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3501 res, arg, 2, False));
3502 return res;
3503 }
3504 case Iop_I32StoFx2: {
3505 HReg res = newVRegD(env);
3506 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3507 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3508 res, arg, 2, False));
3509 return res;
3510 }
3511 case Iop_I32UtoFx2: {
3512 HReg res = newVRegD(env);
3513 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3514 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3515 res, arg, 2, False));
3516 return res;
3517 }
3518 case Iop_F32toF16x4: {
3519 HReg res = newVRegD(env);
3520 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3521 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3522 res, arg, 2, False));
3523 return res;
3524 }
3525 case Iop_Recip32Fx2: {
3526 HReg res = newVRegD(env);
3527 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3528 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3529 res, argL, 0, False));
3530 return res;
3531 }
3532 case Iop_Recip32x2: {
3533 HReg res = newVRegD(env);
3534 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3535 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3536 res, argL, 0, False));
3537 return res;
3538 }
3539 case Iop_Abs32Fx2: {
3540 DECLARE_PATTERN(p_vabd_32fx2);
3541 DEFINE_PATTERN(p_vabd_32fx2,
3542 unop(Iop_Abs32Fx2,
3543 binop(Iop_Sub32Fx2,
3544 bind(0),
3545 bind(1))));
3546 if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3547 HReg res = newVRegD(env);
3548 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3549 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3550 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3551 res, argL, argR, 0, False));
3552 return res;
3553 } else {
3554 HReg res = newVRegD(env);
3555 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3556 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3557 res, arg, 0, False));
3558 return res;
3559 }
3560 }
3561 case Iop_Rsqrte32Fx2: {
3562 HReg res = newVRegD(env);
3563 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3564 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3565 res, arg, 0, False));
3566 return res;
3567 }
3568 case Iop_Rsqrte32x2: {
3569 HReg res = newVRegD(env);
3570 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3571 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3572 res, arg, 0, False));
3573 return res;
3574 }
3575 case Iop_Neg32Fx2: {
3576 HReg res = newVRegD(env);
3577 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3578 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3579 res, arg, 0, False));
3580 return res;
3581 }
3582 default:
3583 break;
3584 }
3585 } /* if (e->tag == Iex_Unop) */
3586
3587 if (e->tag == Iex_Triop) {
3588 switch (e->Iex.Triop.op) {
3589 case Iop_Extract64: {
3590 HReg res = newVRegD(env);
3591 HReg argL = iselNeon64Expr(env, e->Iex.Triop.arg1);
3592 HReg argR = iselNeon64Expr(env, e->Iex.Triop.arg2);
3593 UInt imm4;
3594 if (e->Iex.Triop.arg3->tag != Iex_Const ||
3595 typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
3596 vpanic("ARM target supports Iop_Extract64 with constant "
3597 "third argument less than 16 only\n");
3598 }
3599 imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
3600 if (imm4 >= 8) {
3601 vpanic("ARM target supports Iop_Extract64 with constant "
3602 "third argument less than 16 only\n");
3603 }
3604 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3605 res, argL, argR, imm4, False));
3606 return res;
3607 }
3608 case Iop_SetElem8x8:
3609 case Iop_SetElem16x4:
3610 case Iop_SetElem32x2: {
3611 HReg res = newVRegD(env);
3612 HReg dreg = iselNeon64Expr(env, e->Iex.Triop.arg1);
3613 HReg arg = iselIntExpr_R(env, e->Iex.Triop.arg3);
3614 UInt index, size;
3615 if (e->Iex.Triop.arg2->tag != Iex_Const ||
3616 typeOfIRExpr(env->type_env, e->Iex.Triop.arg2) != Ity_I8) {
3617 vpanic("ARM target supports SetElem with constant "
3618 "second argument only\n");
3619 }
3620 index = e->Iex.Triop.arg2->Iex.Const.con->Ico.U8;
3621 switch (e->Iex.Triop.op) {
3622 case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3623 case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3624 case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3625 default: vassert(0);
3626 }
3627 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3628 addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3629 mkARMNRS(ARMNRS_Scalar, res, index),
3630 mkARMNRS(ARMNRS_Reg, arg, 0),
3631 size, False));
3632 return res;
3633 }
3634 default:
3635 break;
3636 }
3637 }
3638
3639 /* --------- MULTIPLEX --------- */
3640 if (e->tag == Iex_Mux0X) {
3641 HReg rLo, rHi;
3642 HReg res = newVRegD(env);
3643 iselInt64Expr(&rHi, &rLo, env, e);
3644 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3645 return res;
3646 }
3647
3648 ppIRExpr(e);
3649 vpanic("iselNeon64Expr");
3650 }
3651
iselNeonExpr(ISelEnv * env,IRExpr * e)3652 static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
3653 {
3654 HReg r = iselNeonExpr_wrk( env, e );
3655 vassert(hregClass(r) == HRcVec128);
3656 vassert(hregIsVirtual(r));
3657 return r;
3658 }
3659
3660 /* DO NOT CALL THIS DIRECTLY */
iselNeonExpr_wrk(ISelEnv * env,IRExpr * e)3661 static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
3662 {
3663 IRType ty = typeOfIRExpr(env->type_env, e);
3664 MatchInfo mi;
3665 vassert(e);
3666 vassert(ty == Ity_V128);
3667
3668 if (e->tag == Iex_RdTmp) {
3669 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3670 }
3671
3672 if (e->tag == Iex_Const) {
3673 /* At the moment there should be no 128-bit constants in IR for ARM
3674 generated during disassemble. They are represented as Iop_64HLtoV128
3675 binary operation and are handled among binary ops. */
3676 /* But zero can be created by valgrind internal optimizer */
3677 if (e->Iex.Const.con->Ico.V128 == 0) {
3678 HReg res = newVRegV(env);
3679 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(0, 0)));
3680 return res;
3681 }
3682 ppIRExpr(e);
3683 vpanic("128-bit constant is not implemented");
3684 }
3685
3686 if (e->tag == Iex_Load) {
3687 HReg res = newVRegV(env);
3688 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
3689 vassert(ty == Ity_V128);
3690 addInstr(env, ARMInstr_NLdStQ(True, res, am));
3691 return res;
3692 }
3693
3694 if (e->tag == Iex_Get) {
3695 HReg addr = newVRegI(env);
3696 HReg res = newVRegV(env);
3697 vassert(ty == Ity_V128);
3698 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
3699 addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
3700 return res;
3701 }
3702
3703 if (e->tag == Iex_Unop) {
3704 switch (e->Iex.Unop.op) {
3705 case Iop_NotV128: {
3706 DECLARE_PATTERN(p_veqz_8x16);
3707 DECLARE_PATTERN(p_veqz_16x8);
3708 DECLARE_PATTERN(p_veqz_32x4);
3709 DECLARE_PATTERN(p_vcge_8sx16);
3710 DECLARE_PATTERN(p_vcge_16sx8);
3711 DECLARE_PATTERN(p_vcge_32sx4);
3712 DECLARE_PATTERN(p_vcge_8ux16);
3713 DECLARE_PATTERN(p_vcge_16ux8);
3714 DECLARE_PATTERN(p_vcge_32ux4);
3715 DEFINE_PATTERN(p_veqz_8x16,
3716 unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
3717 DEFINE_PATTERN(p_veqz_16x8,
3718 unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
3719 DEFINE_PATTERN(p_veqz_32x4,
3720 unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
3721 DEFINE_PATTERN(p_vcge_8sx16,
3722 unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
3723 DEFINE_PATTERN(p_vcge_16sx8,
3724 unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
3725 DEFINE_PATTERN(p_vcge_32sx4,
3726 unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
3727 DEFINE_PATTERN(p_vcge_8ux16,
3728 unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
3729 DEFINE_PATTERN(p_vcge_16ux8,
3730 unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
3731 DEFINE_PATTERN(p_vcge_32ux4,
3732 unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
3733 if (matchIRExpr(&mi, p_veqz_8x16, e)) {
3734 HReg res = newVRegV(env);
3735 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3736 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
3737 return res;
3738 } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
3739 HReg res = newVRegV(env);
3740 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3741 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
3742 return res;
3743 } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
3744 HReg res = newVRegV(env);
3745 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3746 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
3747 return res;
3748 } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
3749 HReg res = newVRegV(env);
3750 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3751 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3752 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3753 res, argL, argR, 0, True));
3754 return res;
3755 } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
3756 HReg res = newVRegV(env);
3757 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3758 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3759 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3760 res, argL, argR, 1, True));
3761 return res;
3762 } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
3763 HReg res = newVRegV(env);
3764 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3765 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3766 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3767 res, argL, argR, 2, True));
3768 return res;
3769 } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
3770 HReg res = newVRegV(env);
3771 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3772 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3773 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3774 res, argL, argR, 0, True));
3775 return res;
3776 } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
3777 HReg res = newVRegV(env);
3778 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3779 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3780 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3781 res, argL, argR, 1, True));
3782 return res;
3783 } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
3784 HReg res = newVRegV(env);
3785 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3786 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3787 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3788 res, argL, argR, 2, True));
3789 return res;
3790 } else {
3791 HReg res = newVRegV(env);
3792 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3793 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
3794 return res;
3795 }
3796 }
3797 case Iop_Dup8x16:
3798 case Iop_Dup16x8:
3799 case Iop_Dup32x4: {
3800 HReg res, arg;
3801 UInt size;
3802 DECLARE_PATTERN(p_vdup_8x16);
3803 DECLARE_PATTERN(p_vdup_16x8);
3804 DECLARE_PATTERN(p_vdup_32x4);
3805 DEFINE_PATTERN(p_vdup_8x16,
3806 unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
3807 DEFINE_PATTERN(p_vdup_16x8,
3808 unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
3809 DEFINE_PATTERN(p_vdup_32x4,
3810 unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
3811 if (matchIRExpr(&mi, p_vdup_8x16, e)) {
3812 UInt index;
3813 UInt imm4;
3814 if (mi.bindee[1]->tag == Iex_Const &&
3815 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3816 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3817 imm4 = (index << 1) + 1;
3818 if (index < 8) {
3819 res = newVRegV(env);
3820 arg = iselNeon64Expr(env, mi.bindee[0]);
3821 addInstr(env, ARMInstr_NUnaryS(
3822 ARMneon_VDUP,
3823 mkARMNRS(ARMNRS_Reg, res, 0),
3824 mkARMNRS(ARMNRS_Scalar, arg, index),
3825 imm4, True
3826 ));
3827 return res;
3828 }
3829 }
3830 } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
3831 UInt index;
3832 UInt imm4;
3833 if (mi.bindee[1]->tag == Iex_Const &&
3834 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3835 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3836 imm4 = (index << 2) + 2;
3837 if (index < 4) {
3838 res = newVRegV(env);
3839 arg = iselNeon64Expr(env, mi.bindee[0]);
3840 addInstr(env, ARMInstr_NUnaryS(
3841 ARMneon_VDUP,
3842 mkARMNRS(ARMNRS_Reg, res, 0),
3843 mkARMNRS(ARMNRS_Scalar, arg, index),
3844 imm4, True
3845 ));
3846 return res;
3847 }
3848 }
3849 } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
3850 UInt index;
3851 UInt imm4;
3852 if (mi.bindee[1]->tag == Iex_Const &&
3853 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3854 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3855 imm4 = (index << 3) + 4;
3856 if (index < 2) {
3857 res = newVRegV(env);
3858 arg = iselNeon64Expr(env, mi.bindee[0]);
3859 addInstr(env, ARMInstr_NUnaryS(
3860 ARMneon_VDUP,
3861 mkARMNRS(ARMNRS_Reg, res, 0),
3862 mkARMNRS(ARMNRS_Scalar, arg, index),
3863 imm4, True
3864 ));
3865 return res;
3866 }
3867 }
3868 }
3869 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3870 res = newVRegV(env);
3871 switch (e->Iex.Unop.op) {
3872 case Iop_Dup8x16: size = 0; break;
3873 case Iop_Dup16x8: size = 1; break;
3874 case Iop_Dup32x4: size = 2; break;
3875 default: vassert(0);
3876 }
3877 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
3878 return res;
3879 }
3880 case Iop_Abs8x16:
3881 case Iop_Abs16x8:
3882 case Iop_Abs32x4: {
3883 HReg res = newVRegV(env);
3884 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3885 UInt size = 0;
3886 switch(e->Iex.Binop.op) {
3887 case Iop_Abs8x16: size = 0; break;
3888 case Iop_Abs16x8: size = 1; break;
3889 case Iop_Abs32x4: size = 2; break;
3890 default: vassert(0);
3891 }
3892 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
3893 return res;
3894 }
3895 case Iop_Reverse64_8x16:
3896 case Iop_Reverse64_16x8:
3897 case Iop_Reverse64_32x4: {
3898 HReg res = newVRegV(env);
3899 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3900 UInt size = 0;
3901 switch(e->Iex.Binop.op) {
3902 case Iop_Reverse64_8x16: size = 0; break;
3903 case Iop_Reverse64_16x8: size = 1; break;
3904 case Iop_Reverse64_32x4: size = 2; break;
3905 default: vassert(0);
3906 }
3907 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3908 res, arg, size, True));
3909 return res;
3910 }
3911 case Iop_Reverse32_8x16:
3912 case Iop_Reverse32_16x8: {
3913 HReg res = newVRegV(env);
3914 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3915 UInt size = 0;
3916 switch(e->Iex.Binop.op) {
3917 case Iop_Reverse32_8x16: size = 0; break;
3918 case Iop_Reverse32_16x8: size = 1; break;
3919 default: vassert(0);
3920 }
3921 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3922 res, arg, size, True));
3923 return res;
3924 }
3925 case Iop_Reverse16_8x16: {
3926 HReg res = newVRegV(env);
3927 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3928 UInt size = 0;
3929 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3930 res, arg, size, True));
3931 return res;
3932 }
3933 case Iop_CmpNEZ64x2: {
3934 HReg x_lsh = newVRegV(env);
3935 HReg x_rsh = newVRegV(env);
3936 HReg lsh_amt = newVRegV(env);
3937 HReg rsh_amt = newVRegV(env);
3938 HReg zero = newVRegV(env);
3939 HReg tmp = newVRegV(env);
3940 HReg tmp2 = newVRegV(env);
3941 HReg res = newVRegV(env);
3942 HReg x = newVRegV(env);
3943 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3944 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
3945 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
3946 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3947 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3948 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3949 rsh_amt, zero, lsh_amt, 2, True));
3950 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3951 x_lsh, x, lsh_amt, 3, True));
3952 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3953 x_rsh, x, rsh_amt, 3, True));
3954 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3955 tmp, x_lsh, x_rsh, 0, True));
3956 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3957 res, tmp, x, 0, True));
3958 return res;
3959 }
3960 case Iop_CmpNEZ8x16:
3961 case Iop_CmpNEZ16x8:
3962 case Iop_CmpNEZ32x4: {
3963 HReg res = newVRegV(env);
3964 HReg tmp = newVRegV(env);
3965 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3966 UInt size;
3967 switch (e->Iex.Unop.op) {
3968 case Iop_CmpNEZ8x16: size = 0; break;
3969 case Iop_CmpNEZ16x8: size = 1; break;
3970 case Iop_CmpNEZ32x4: size = 2; break;
3971 default: vassert(0);
3972 }
3973 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
3974 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
3975 return res;
3976 }
3977 case Iop_Longen8Ux8:
3978 case Iop_Longen16Ux4:
3979 case Iop_Longen32Ux2: {
3980 HReg res = newVRegV(env);
3981 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3982 UInt size;
3983 switch (e->Iex.Unop.op) {
3984 case Iop_Longen8Ux8: size = 0; break;
3985 case Iop_Longen16Ux4: size = 1; break;
3986 case Iop_Longen32Ux2: size = 2; break;
3987 default: vassert(0);
3988 }
3989 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
3990 res, arg, size, True));
3991 return res;
3992 }
3993 case Iop_Longen8Sx8:
3994 case Iop_Longen16Sx4:
3995 case Iop_Longen32Sx2: {
3996 HReg res = newVRegV(env);
3997 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3998 UInt size;
3999 switch (e->Iex.Unop.op) {
4000 case Iop_Longen8Sx8: size = 0; break;
4001 case Iop_Longen16Sx4: size = 1; break;
4002 case Iop_Longen32Sx2: size = 2; break;
4003 default: vassert(0);
4004 }
4005 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4006 res, arg, size, True));
4007 return res;
4008 }
4009 case Iop_PwAddL8Sx16:
4010 case Iop_PwAddL16Sx8:
4011 case Iop_PwAddL32Sx4: {
4012 HReg res = newVRegV(env);
4013 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4014 UInt size = 0;
4015 switch(e->Iex.Binop.op) {
4016 case Iop_PwAddL8Sx16: size = 0; break;
4017 case Iop_PwAddL16Sx8: size = 1; break;
4018 case Iop_PwAddL32Sx4: size = 2; break;
4019 default: vassert(0);
4020 }
4021 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4022 res, arg, size, True));
4023 return res;
4024 }
4025 case Iop_PwAddL8Ux16:
4026 case Iop_PwAddL16Ux8:
4027 case Iop_PwAddL32Ux4: {
4028 HReg res = newVRegV(env);
4029 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4030 UInt size = 0;
4031 switch(e->Iex.Binop.op) {
4032 case Iop_PwAddL8Ux16: size = 0; break;
4033 case Iop_PwAddL16Ux8: size = 1; break;
4034 case Iop_PwAddL32Ux4: size = 2; break;
4035 default: vassert(0);
4036 }
4037 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4038 res, arg, size, True));
4039 return res;
4040 }
4041 case Iop_Cnt8x16: {
4042 HReg res = newVRegV(env);
4043 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4044 UInt size = 0;
4045 addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4046 return res;
4047 }
4048 case Iop_Clz8Sx16:
4049 case Iop_Clz16Sx8:
4050 case Iop_Clz32Sx4: {
4051 HReg res = newVRegV(env);
4052 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4053 UInt size = 0;
4054 switch(e->Iex.Binop.op) {
4055 case Iop_Clz8Sx16: size = 0; break;
4056 case Iop_Clz16Sx8: size = 1; break;
4057 case Iop_Clz32Sx4: size = 2; break;
4058 default: vassert(0);
4059 }
4060 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4061 return res;
4062 }
4063 case Iop_Cls8Sx16:
4064 case Iop_Cls16Sx8:
4065 case Iop_Cls32Sx4: {
4066 HReg res = newVRegV(env);
4067 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4068 UInt size = 0;
4069 switch(e->Iex.Binop.op) {
4070 case Iop_Cls8Sx16: size = 0; break;
4071 case Iop_Cls16Sx8: size = 1; break;
4072 case Iop_Cls32Sx4: size = 2; break;
4073 default: vassert(0);
4074 }
4075 addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4076 return res;
4077 }
4078 case Iop_FtoI32Sx4_RZ: {
4079 HReg res = newVRegV(env);
4080 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4081 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4082 res, arg, 2, True));
4083 return res;
4084 }
4085 case Iop_FtoI32Ux4_RZ: {
4086 HReg res = newVRegV(env);
4087 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4088 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4089 res, arg, 2, True));
4090 return res;
4091 }
4092 case Iop_I32StoFx4: {
4093 HReg res = newVRegV(env);
4094 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4095 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4096 res, arg, 2, True));
4097 return res;
4098 }
4099 case Iop_I32UtoFx4: {
4100 HReg res = newVRegV(env);
4101 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4102 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4103 res, arg, 2, True));
4104 return res;
4105 }
4106 case Iop_F16toF32x4: {
4107 HReg res = newVRegV(env);
4108 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4109 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4110 res, arg, 2, True));
4111 return res;
4112 }
4113 case Iop_Recip32Fx4: {
4114 HReg res = newVRegV(env);
4115 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4116 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4117 res, argL, 0, True));
4118 return res;
4119 }
4120 case Iop_Recip32x4: {
4121 HReg res = newVRegV(env);
4122 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4123 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4124 res, argL, 0, True));
4125 return res;
4126 }
4127 case Iop_Abs32Fx4: {
4128 DECLARE_PATTERN(p_vabd_32fx4);
4129 DEFINE_PATTERN(p_vabd_32fx4,
4130 unop(Iop_Abs32Fx4,
4131 binop(Iop_Sub32Fx4,
4132 bind(0),
4133 bind(1))));
4134 if (matchIRExpr(&mi, p_vabd_32fx4, e)) {
4135 HReg res = newVRegV(env);
4136 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4137 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4138 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
4139 res, argL, argR, 0, True));
4140 return res;
4141 } else {
4142 HReg res = newVRegV(env);
4143 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4144 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4145 res, argL, 0, True));
4146 return res;
4147 }
4148 }
4149 case Iop_Rsqrte32Fx4: {
4150 HReg res = newVRegV(env);
4151 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4152 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4153 res, argL, 0, True));
4154 return res;
4155 }
4156 case Iop_Rsqrte32x4: {
4157 HReg res = newVRegV(env);
4158 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4159 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4160 res, argL, 0, True));
4161 return res;
4162 }
4163 case Iop_Neg32Fx4: {
4164 HReg res = newVRegV(env);
4165 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4166 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4167 res, arg, 0, True));
4168 return res;
4169 }
4170 /* ... */
4171 default:
4172 break;
4173 }
4174 }
4175
4176 if (e->tag == Iex_Binop) {
4177 switch (e->Iex.Binop.op) {
4178 case Iop_64HLtoV128:
4179 /* Try to match into single "VMOV reg, imm" instruction */
4180 if (e->Iex.Binop.arg1->tag == Iex_Const &&
4181 e->Iex.Binop.arg2->tag == Iex_Const &&
4182 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4183 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4184 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4185 e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4186 ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4187 ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4188 if (imm) {
4189 HReg res = newVRegV(env);
4190 addInstr(env, ARMInstr_NeonImm(res, imm));
4191 return res;
4192 }
4193 if ((imm64 >> 32) == 0LL &&
4194 (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4195 HReg tmp1 = newVRegV(env);
4196 HReg tmp2 = newVRegV(env);
4197 HReg res = newVRegV(env);
4198 if (imm->type < 10) {
4199 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4200 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4201 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4202 res, tmp1, tmp2, 4, True));
4203 return res;
4204 }
4205 }
4206 if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4207 (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4208 HReg tmp1 = newVRegV(env);
4209 HReg tmp2 = newVRegV(env);
4210 HReg res = newVRegV(env);
4211 if (imm->type < 10) {
4212 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4213 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4214 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4215 res, tmp1, tmp2, 4, True));
4216 return res;
4217 }
4218 }
4219 }
4220 /* Does not match "VMOV Reg, Imm" form */
4221 goto neon_expr_bad;
4222 case Iop_AndV128: {
4223 HReg res = newVRegV(env);
4224 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4225 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4226 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4227 res, argL, argR, 4, True));
4228 return res;
4229 }
4230 case Iop_OrV128: {
4231 HReg res = newVRegV(env);
4232 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4233 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4234 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4235 res, argL, argR, 4, True));
4236 return res;
4237 }
4238 case Iop_XorV128: {
4239 HReg res = newVRegV(env);
4240 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4241 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4242 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4243 res, argL, argR, 4, True));
4244 return res;
4245 }
4246 case Iop_Add8x16:
4247 case Iop_Add16x8:
4248 case Iop_Add32x4:
4249 case Iop_Add64x2: {
4250 /*
4251 FIXME: remove this if not used
4252 DECLARE_PATTERN(p_vrhadd_32sx4);
4253 ULong one = (1LL << 32) | 1LL;
4254 DEFINE_PATTERN(p_vrhadd_32sx4,
4255 binop(Iop_Add32x4,
4256 binop(Iop_Add32x4,
4257 binop(Iop_SarN32x4,
4258 bind(0),
4259 mkU8(1)),
4260 binop(Iop_SarN32x4,
4261 bind(1),
4262 mkU8(1))),
4263 binop(Iop_SarN32x4,
4264 binop(Iop_Add32x4,
4265 binop(Iop_Add32x4,
4266 binop(Iop_AndV128,
4267 bind(0),
4268 mkU128(one)),
4269 binop(Iop_AndV128,
4270 bind(1),
4271 mkU128(one))),
4272 mkU128(one)),
4273 mkU8(1))));
4274 */
4275 HReg res = newVRegV(env);
4276 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4277 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4278 UInt size;
4279 switch (e->Iex.Binop.op) {
4280 case Iop_Add8x16: size = 0; break;
4281 case Iop_Add16x8: size = 1; break;
4282 case Iop_Add32x4: size = 2; break;
4283 case Iop_Add64x2: size = 3; break;
4284 default:
4285 ppIROp(e->Iex.Binop.op);
4286 vpanic("Illegal element size in VADD");
4287 }
4288 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4289 res, argL, argR, size, True));
4290 return res;
4291 }
4292 case Iop_Add32Fx4: {
4293 HReg res = newVRegV(env);
4294 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4295 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4296 UInt size = 0;
4297 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
4298 res, argL, argR, size, True));
4299 return res;
4300 }
4301 case Iop_Recps32Fx4: {
4302 HReg res = newVRegV(env);
4303 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4304 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4305 UInt size = 0;
4306 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4307 res, argL, argR, size, True));
4308 return res;
4309 }
4310 case Iop_Rsqrts32Fx4: {
4311 HReg res = newVRegV(env);
4312 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4313 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4314 UInt size = 0;
4315 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4316 res, argL, argR, size, True));
4317 return res;
4318 }
4319 case Iop_InterleaveEvenLanes8x16:
4320 case Iop_InterleaveEvenLanes16x8:
4321 case Iop_InterleaveEvenLanes32x4:
4322 case Iop_InterleaveOddLanes8x16:
4323 case Iop_InterleaveOddLanes16x8:
4324 case Iop_InterleaveOddLanes32x4: {
4325 HReg tmp = newVRegV(env);
4326 HReg res = newVRegV(env);
4327 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4328 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4329 UInt size;
4330 UInt is_lo;
4331 switch (e->Iex.Binop.op) {
4332 case Iop_InterleaveEvenLanes8x16: is_lo = 0; size = 0; break;
4333 case Iop_InterleaveOddLanes8x16: is_lo = 1; size = 0; break;
4334 case Iop_InterleaveEvenLanes16x8: is_lo = 0; size = 1; break;
4335 case Iop_InterleaveOddLanes16x8: is_lo = 1; size = 1; break;
4336 case Iop_InterleaveEvenLanes32x4: is_lo = 0; size = 2; break;
4337 case Iop_InterleaveOddLanes32x4: is_lo = 1; size = 2; break;
4338 default:
4339 ppIROp(e->Iex.Binop.op);
4340 vpanic("Illegal element size in VTRN");
4341 }
4342 if (is_lo) {
4343 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4344 tmp, argL, 4, True));
4345 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4346 res, argR, 4, True));
4347 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4348 res, tmp, size, True));
4349 } else {
4350 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4351 tmp, argR, 4, True));
4352 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4353 res, argL, 4, True));
4354 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4355 tmp, res, size, True));
4356 }
4357 return res;
4358 }
4359 case Iop_InterleaveHI8x16:
4360 case Iop_InterleaveHI16x8:
4361 case Iop_InterleaveHI32x4:
4362 case Iop_InterleaveLO8x16:
4363 case Iop_InterleaveLO16x8:
4364 case Iop_InterleaveLO32x4: {
4365 HReg tmp = newVRegV(env);
4366 HReg res = newVRegV(env);
4367 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4368 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4369 UInt size;
4370 UInt is_lo;
4371 switch (e->Iex.Binop.op) {
4372 case Iop_InterleaveHI8x16: is_lo = 1; size = 0; break;
4373 case Iop_InterleaveLO8x16: is_lo = 0; size = 0; break;
4374 case Iop_InterleaveHI16x8: is_lo = 1; size = 1; break;
4375 case Iop_InterleaveLO16x8: is_lo = 0; size = 1; break;
4376 case Iop_InterleaveHI32x4: is_lo = 1; size = 2; break;
4377 case Iop_InterleaveLO32x4: is_lo = 0; size = 2; break;
4378 default:
4379 ppIROp(e->Iex.Binop.op);
4380 vpanic("Illegal element size in VZIP");
4381 }
4382 if (is_lo) {
4383 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4384 tmp, argL, 4, True));
4385 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4386 res, argR, 4, True));
4387 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4388 res, tmp, size, True));
4389 } else {
4390 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4391 tmp, argR, 4, True));
4392 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4393 res, argL, 4, True));
4394 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4395 tmp, res, size, True));
4396 }
4397 return res;
4398 }
4399 case Iop_CatOddLanes8x16:
4400 case Iop_CatOddLanes16x8:
4401 case Iop_CatOddLanes32x4:
4402 case Iop_CatEvenLanes8x16:
4403 case Iop_CatEvenLanes16x8:
4404 case Iop_CatEvenLanes32x4: {
4405 HReg tmp = newVRegV(env);
4406 HReg res = newVRegV(env);
4407 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4408 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4409 UInt size;
4410 UInt is_lo;
4411 switch (e->Iex.Binop.op) {
4412 case Iop_CatOddLanes8x16: is_lo = 1; size = 0; break;
4413 case Iop_CatEvenLanes8x16: is_lo = 0; size = 0; break;
4414 case Iop_CatOddLanes16x8: is_lo = 1; size = 1; break;
4415 case Iop_CatEvenLanes16x8: is_lo = 0; size = 1; break;
4416 case Iop_CatOddLanes32x4: is_lo = 1; size = 2; break;
4417 case Iop_CatEvenLanes32x4: is_lo = 0; size = 2; break;
4418 default:
4419 ppIROp(e->Iex.Binop.op);
4420 vpanic("Illegal element size in VUZP");
4421 }
4422 if (is_lo) {
4423 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4424 tmp, argL, 4, True));
4425 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4426 res, argR, 4, True));
4427 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4428 res, tmp, size, True));
4429 } else {
4430 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4431 tmp, argR, 4, True));
4432 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4433 res, argL, 4, True));
4434 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4435 tmp, res, size, True));
4436 }
4437 return res;
4438 }
4439 case Iop_QAdd8Ux16:
4440 case Iop_QAdd16Ux8:
4441 case Iop_QAdd32Ux4:
4442 case Iop_QAdd64Ux2: {
4443 HReg res = newVRegV(env);
4444 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4445 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4446 UInt size;
4447 switch (e->Iex.Binop.op) {
4448 case Iop_QAdd8Ux16: size = 0; break;
4449 case Iop_QAdd16Ux8: size = 1; break;
4450 case Iop_QAdd32Ux4: size = 2; break;
4451 case Iop_QAdd64Ux2: size = 3; break;
4452 default:
4453 ppIROp(e->Iex.Binop.op);
4454 vpanic("Illegal element size in VQADDU");
4455 }
4456 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4457 res, argL, argR, size, True));
4458 return res;
4459 }
4460 case Iop_QAdd8Sx16:
4461 case Iop_QAdd16Sx8:
4462 case Iop_QAdd32Sx4:
4463 case Iop_QAdd64Sx2: {
4464 HReg res = newVRegV(env);
4465 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4466 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4467 UInt size;
4468 switch (e->Iex.Binop.op) {
4469 case Iop_QAdd8Sx16: size = 0; break;
4470 case Iop_QAdd16Sx8: size = 1; break;
4471 case Iop_QAdd32Sx4: size = 2; break;
4472 case Iop_QAdd64Sx2: size = 3; break;
4473 default:
4474 ppIROp(e->Iex.Binop.op);
4475 vpanic("Illegal element size in VQADDS");
4476 }
4477 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4478 res, argL, argR, size, True));
4479 return res;
4480 }
4481 case Iop_Sub8x16:
4482 case Iop_Sub16x8:
4483 case Iop_Sub32x4:
4484 case Iop_Sub64x2: {
4485 HReg res = newVRegV(env);
4486 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4487 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4488 UInt size;
4489 switch (e->Iex.Binop.op) {
4490 case Iop_Sub8x16: size = 0; break;
4491 case Iop_Sub16x8: size = 1; break;
4492 case Iop_Sub32x4: size = 2; break;
4493 case Iop_Sub64x2: size = 3; break;
4494 default:
4495 ppIROp(e->Iex.Binop.op);
4496 vpanic("Illegal element size in VSUB");
4497 }
4498 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4499 res, argL, argR, size, True));
4500 return res;
4501 }
4502 case Iop_Sub32Fx4: {
4503 HReg res = newVRegV(env);
4504 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4505 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4506 UInt size = 0;
4507 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
4508 res, argL, argR, size, True));
4509 return res;
4510 }
4511 case Iop_QSub8Ux16:
4512 case Iop_QSub16Ux8:
4513 case Iop_QSub32Ux4:
4514 case Iop_QSub64Ux2: {
4515 HReg res = newVRegV(env);
4516 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4517 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4518 UInt size;
4519 switch (e->Iex.Binop.op) {
4520 case Iop_QSub8Ux16: size = 0; break;
4521 case Iop_QSub16Ux8: size = 1; break;
4522 case Iop_QSub32Ux4: size = 2; break;
4523 case Iop_QSub64Ux2: size = 3; break;
4524 default:
4525 ppIROp(e->Iex.Binop.op);
4526 vpanic("Illegal element size in VQSUBU");
4527 }
4528 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4529 res, argL, argR, size, True));
4530 return res;
4531 }
4532 case Iop_QSub8Sx16:
4533 case Iop_QSub16Sx8:
4534 case Iop_QSub32Sx4:
4535 case Iop_QSub64Sx2: {
4536 HReg res = newVRegV(env);
4537 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4538 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4539 UInt size;
4540 switch (e->Iex.Binop.op) {
4541 case Iop_QSub8Sx16: size = 0; break;
4542 case Iop_QSub16Sx8: size = 1; break;
4543 case Iop_QSub32Sx4: size = 2; break;
4544 case Iop_QSub64Sx2: size = 3; break;
4545 default:
4546 ppIROp(e->Iex.Binop.op);
4547 vpanic("Illegal element size in VQSUBS");
4548 }
4549 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4550 res, argL, argR, size, True));
4551 return res;
4552 }
4553 case Iop_Max8Ux16:
4554 case Iop_Max16Ux8:
4555 case Iop_Max32Ux4: {
4556 HReg res = newVRegV(env);
4557 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4558 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4559 UInt size;
4560 switch (e->Iex.Binop.op) {
4561 case Iop_Max8Ux16: size = 0; break;
4562 case Iop_Max16Ux8: size = 1; break;
4563 case Iop_Max32Ux4: size = 2; break;
4564 default: vpanic("Illegal element size in VMAXU");
4565 }
4566 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4567 res, argL, argR, size, True));
4568 return res;
4569 }
4570 case Iop_Max8Sx16:
4571 case Iop_Max16Sx8:
4572 case Iop_Max32Sx4: {
4573 HReg res = newVRegV(env);
4574 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4575 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4576 UInt size;
4577 switch (e->Iex.Binop.op) {
4578 case Iop_Max8Sx16: size = 0; break;
4579 case Iop_Max16Sx8: size = 1; break;
4580 case Iop_Max32Sx4: size = 2; break;
4581 default: vpanic("Illegal element size in VMAXU");
4582 }
4583 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4584 res, argL, argR, size, True));
4585 return res;
4586 }
4587 case Iop_Min8Ux16:
4588 case Iop_Min16Ux8:
4589 case Iop_Min32Ux4: {
4590 HReg res = newVRegV(env);
4591 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4592 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4593 UInt size;
4594 switch (e->Iex.Binop.op) {
4595 case Iop_Min8Ux16: size = 0; break;
4596 case Iop_Min16Ux8: size = 1; break;
4597 case Iop_Min32Ux4: size = 2; break;
4598 default: vpanic("Illegal element size in VMAXU");
4599 }
4600 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4601 res, argL, argR, size, True));
4602 return res;
4603 }
4604 case Iop_Min8Sx16:
4605 case Iop_Min16Sx8:
4606 case Iop_Min32Sx4: {
4607 HReg res = newVRegV(env);
4608 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4609 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4610 UInt size;
4611 switch (e->Iex.Binop.op) {
4612 case Iop_Min8Sx16: size = 0; break;
4613 case Iop_Min16Sx8: size = 1; break;
4614 case Iop_Min32Sx4: size = 2; break;
4615 default: vpanic("Illegal element size in VMAXU");
4616 }
4617 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4618 res, argL, argR, size, True));
4619 return res;
4620 }
4621 case Iop_Sar8x16:
4622 case Iop_Sar16x8:
4623 case Iop_Sar32x4:
4624 case Iop_Sar64x2: {
4625 HReg res = newVRegV(env);
4626 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4627 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4628 HReg argR2 = newVRegV(env);
4629 HReg zero = newVRegV(env);
4630 UInt size;
4631 switch (e->Iex.Binop.op) {
4632 case Iop_Sar8x16: size = 0; break;
4633 case Iop_Sar16x8: size = 1; break;
4634 case Iop_Sar32x4: size = 2; break;
4635 case Iop_Sar64x2: size = 3; break;
4636 default: vassert(0);
4637 }
4638 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4639 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4640 argR2, zero, argR, size, True));
4641 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4642 res, argL, argR2, size, True));
4643 return res;
4644 }
4645 case Iop_Sal8x16:
4646 case Iop_Sal16x8:
4647 case Iop_Sal32x4:
4648 case Iop_Sal64x2: {
4649 HReg res = newVRegV(env);
4650 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4651 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4652 UInt size;
4653 switch (e->Iex.Binop.op) {
4654 case Iop_Sal8x16: size = 0; break;
4655 case Iop_Sal16x8: size = 1; break;
4656 case Iop_Sal32x4: size = 2; break;
4657 case Iop_Sal64x2: size = 3; break;
4658 default: vassert(0);
4659 }
4660 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4661 res, argL, argR, size, True));
4662 return res;
4663 }
4664 case Iop_Shr8x16:
4665 case Iop_Shr16x8:
4666 case Iop_Shr32x4:
4667 case Iop_Shr64x2: {
4668 HReg res = newVRegV(env);
4669 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4670 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4671 HReg argR2 = newVRegV(env);
4672 HReg zero = newVRegV(env);
4673 UInt size;
4674 switch (e->Iex.Binop.op) {
4675 case Iop_Shr8x16: size = 0; break;
4676 case Iop_Shr16x8: size = 1; break;
4677 case Iop_Shr32x4: size = 2; break;
4678 case Iop_Shr64x2: size = 3; break;
4679 default: vassert(0);
4680 }
4681 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4682 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4683 argR2, zero, argR, size, True));
4684 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4685 res, argL, argR2, size, True));
4686 return res;
4687 }
4688 case Iop_Shl8x16:
4689 case Iop_Shl16x8:
4690 case Iop_Shl32x4:
4691 case Iop_Shl64x2: {
4692 HReg res = newVRegV(env);
4693 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4694 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4695 UInt size;
4696 switch (e->Iex.Binop.op) {
4697 case Iop_Shl8x16: size = 0; break;
4698 case Iop_Shl16x8: size = 1; break;
4699 case Iop_Shl32x4: size = 2; break;
4700 case Iop_Shl64x2: size = 3; break;
4701 default: vassert(0);
4702 }
4703 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4704 res, argL, argR, size, True));
4705 return res;
4706 }
4707 case Iop_QShl8x16:
4708 case Iop_QShl16x8:
4709 case Iop_QShl32x4:
4710 case Iop_QShl64x2: {
4711 HReg res = newVRegV(env);
4712 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4713 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4714 UInt size;
4715 switch (e->Iex.Binop.op) {
4716 case Iop_QShl8x16: size = 0; break;
4717 case Iop_QShl16x8: size = 1; break;
4718 case Iop_QShl32x4: size = 2; break;
4719 case Iop_QShl64x2: size = 3; break;
4720 default: vassert(0);
4721 }
4722 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4723 res, argL, argR, size, True));
4724 return res;
4725 }
4726 case Iop_QSal8x16:
4727 case Iop_QSal16x8:
4728 case Iop_QSal32x4:
4729 case Iop_QSal64x2: {
4730 HReg res = newVRegV(env);
4731 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4732 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4733 UInt size;
4734 switch (e->Iex.Binop.op) {
4735 case Iop_QSal8x16: size = 0; break;
4736 case Iop_QSal16x8: size = 1; break;
4737 case Iop_QSal32x4: size = 2; break;
4738 case Iop_QSal64x2: size = 3; break;
4739 default: vassert(0);
4740 }
4741 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
4742 res, argL, argR, size, True));
4743 return res;
4744 }
4745 case Iop_QShlN8x16:
4746 case Iop_QShlN16x8:
4747 case Iop_QShlN32x4:
4748 case Iop_QShlN64x2: {
4749 HReg res = newVRegV(env);
4750 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4751 UInt size, imm;
4752 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4753 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4754 vpanic("ARM taget supports Iop_QShlNAxB with constant "
4755 "second argument only\n");
4756 }
4757 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4758 switch (e->Iex.Binop.op) {
4759 case Iop_QShlN8x16: size = 8 | imm; break;
4760 case Iop_QShlN16x8: size = 16 | imm; break;
4761 case Iop_QShlN32x4: size = 32 | imm; break;
4762 case Iop_QShlN64x2: size = 64 | imm; break;
4763 default: vassert(0);
4764 }
4765 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
4766 res, argL, size, True));
4767 return res;
4768 }
4769 case Iop_QShlN8Sx16:
4770 case Iop_QShlN16Sx8:
4771 case Iop_QShlN32Sx4:
4772 case Iop_QShlN64Sx2: {
4773 HReg res = newVRegV(env);
4774 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4775 UInt size, imm;
4776 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4777 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4778 vpanic("ARM taget supports Iop_QShlNASxB with constant "
4779 "second argument only\n");
4780 }
4781 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4782 switch (e->Iex.Binop.op) {
4783 case Iop_QShlN8Sx16: size = 8 | imm; break;
4784 case Iop_QShlN16Sx8: size = 16 | imm; break;
4785 case Iop_QShlN32Sx4: size = 32 | imm; break;
4786 case Iop_QShlN64Sx2: size = 64 | imm; break;
4787 default: vassert(0);
4788 }
4789 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
4790 res, argL, size, True));
4791 return res;
4792 }
4793 case Iop_QSalN8x16:
4794 case Iop_QSalN16x8:
4795 case Iop_QSalN32x4:
4796 case Iop_QSalN64x2: {
4797 HReg res = newVRegV(env);
4798 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4799 UInt size, imm;
4800 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4801 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4802 vpanic("ARM taget supports Iop_QShlNAxB with constant "
4803 "second argument only\n");
4804 }
4805 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4806 switch (e->Iex.Binop.op) {
4807 case Iop_QSalN8x16: size = 8 | imm; break;
4808 case Iop_QSalN16x8: size = 16 | imm; break;
4809 case Iop_QSalN32x4: size = 32 | imm; break;
4810 case Iop_QSalN64x2: size = 64 | imm; break;
4811 default: vassert(0);
4812 }
4813 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
4814 res, argL, size, True));
4815 return res;
4816 }
4817 case Iop_ShrN8x16:
4818 case Iop_ShrN16x8:
4819 case Iop_ShrN32x4:
4820 case Iop_ShrN64x2: {
4821 HReg res = newVRegV(env);
4822 HReg tmp = newVRegV(env);
4823 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4824 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4825 HReg argR2 = newVRegI(env);
4826 UInt size;
4827 switch (e->Iex.Binop.op) {
4828 case Iop_ShrN8x16: size = 0; break;
4829 case Iop_ShrN16x8: size = 1; break;
4830 case Iop_ShrN32x4: size = 2; break;
4831 case Iop_ShrN64x2: size = 3; break;
4832 default: vassert(0);
4833 }
4834 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4835 addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
4836 tmp, argR2, 0, True));
4837 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4838 res, argL, tmp, size, True));
4839 return res;
4840 }
4841 case Iop_ShlN8x16:
4842 case Iop_ShlN16x8:
4843 case Iop_ShlN32x4:
4844 case Iop_ShlN64x2: {
4845 HReg res = newVRegV(env);
4846 HReg tmp = newVRegV(env);
4847 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4848 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4849 UInt size;
4850 switch (e->Iex.Binop.op) {
4851 case Iop_ShlN8x16: size = 0; break;
4852 case Iop_ShlN16x8: size = 1; break;
4853 case Iop_ShlN32x4: size = 2; break;
4854 case Iop_ShlN64x2: size = 3; break;
4855 default: vassert(0);
4856 }
4857 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
4858 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4859 res, argL, tmp, size, True));
4860 return res;
4861 }
4862 case Iop_SarN8x16:
4863 case Iop_SarN16x8:
4864 case Iop_SarN32x4:
4865 case Iop_SarN64x2: {
4866 HReg res = newVRegV(env);
4867 HReg tmp = newVRegV(env);
4868 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4869 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4870 HReg argR2 = newVRegI(env);
4871 UInt size;
4872 switch (e->Iex.Binop.op) {
4873 case Iop_SarN8x16: size = 0; break;
4874 case Iop_SarN16x8: size = 1; break;
4875 case Iop_SarN32x4: size = 2; break;
4876 case Iop_SarN64x2: size = 3; break;
4877 default: vassert(0);
4878 }
4879 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4880 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
4881 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4882 res, argL, tmp, size, True));
4883 return res;
4884 }
4885 case Iop_CmpGT8Ux16:
4886 case Iop_CmpGT16Ux8:
4887 case Iop_CmpGT32Ux4: {
4888 HReg res = newVRegV(env);
4889 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4890 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4891 UInt size;
4892 switch (e->Iex.Binop.op) {
4893 case Iop_CmpGT8Ux16: size = 0; break;
4894 case Iop_CmpGT16Ux8: size = 1; break;
4895 case Iop_CmpGT32Ux4: size = 2; break;
4896 default: vassert(0);
4897 }
4898 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
4899 res, argL, argR, size, True));
4900 return res;
4901 }
4902 case Iop_CmpGT8Sx16:
4903 case Iop_CmpGT16Sx8:
4904 case Iop_CmpGT32Sx4: {
4905 HReg res = newVRegV(env);
4906 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4907 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4908 UInt size;
4909 switch (e->Iex.Binop.op) {
4910 case Iop_CmpGT8Sx16: size = 0; break;
4911 case Iop_CmpGT16Sx8: size = 1; break;
4912 case Iop_CmpGT32Sx4: size = 2; break;
4913 default: vassert(0);
4914 }
4915 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
4916 res, argL, argR, size, True));
4917 return res;
4918 }
4919 case Iop_CmpEQ8x16:
4920 case Iop_CmpEQ16x8:
4921 case Iop_CmpEQ32x4: {
4922 HReg res = newVRegV(env);
4923 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4924 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4925 UInt size;
4926 switch (e->Iex.Binop.op) {
4927 case Iop_CmpEQ8x16: size = 0; break;
4928 case Iop_CmpEQ16x8: size = 1; break;
4929 case Iop_CmpEQ32x4: size = 2; break;
4930 default: vassert(0);
4931 }
4932 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
4933 res, argL, argR, size, True));
4934 return res;
4935 }
4936 case Iop_Mul8x16:
4937 case Iop_Mul16x8:
4938 case Iop_Mul32x4: {
4939 HReg res = newVRegV(env);
4940 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4941 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4942 UInt size = 0;
4943 switch(e->Iex.Binop.op) {
4944 case Iop_Mul8x16: size = 0; break;
4945 case Iop_Mul16x8: size = 1; break;
4946 case Iop_Mul32x4: size = 2; break;
4947 default: vassert(0);
4948 }
4949 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
4950 res, argL, argR, size, True));
4951 return res;
4952 }
4953 case Iop_Mul32Fx4: {
4954 HReg res = newVRegV(env);
4955 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4956 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4957 UInt size = 0;
4958 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
4959 res, argL, argR, size, True));
4960 return res;
4961 }
4962 case Iop_Mull8Ux8:
4963 case Iop_Mull16Ux4:
4964 case Iop_Mull32Ux2: {
4965 HReg res = newVRegV(env);
4966 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
4967 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
4968 UInt size = 0;
4969 switch(e->Iex.Binop.op) {
4970 case Iop_Mull8Ux8: size = 0; break;
4971 case Iop_Mull16Ux4: size = 1; break;
4972 case Iop_Mull32Ux2: size = 2; break;
4973 default: vassert(0);
4974 }
4975 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
4976 res, argL, argR, size, True));
4977 return res;
4978 }
4979
4980 case Iop_Mull8Sx8:
4981 case Iop_Mull16Sx4:
4982 case Iop_Mull32Sx2: {
4983 HReg res = newVRegV(env);
4984 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
4985 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
4986 UInt size = 0;
4987 switch(e->Iex.Binop.op) {
4988 case Iop_Mull8Sx8: size = 0; break;
4989 case Iop_Mull16Sx4: size = 1; break;
4990 case Iop_Mull32Sx2: size = 2; break;
4991 default: vassert(0);
4992 }
4993 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
4994 res, argL, argR, size, True));
4995 return res;
4996 }
4997
4998 case Iop_QDMulHi16Sx8:
4999 case Iop_QDMulHi32Sx4: {
5000 HReg res = newVRegV(env);
5001 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5002 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5003 UInt size = 0;
5004 switch(e->Iex.Binop.op) {
5005 case Iop_QDMulHi16Sx8: size = 1; break;
5006 case Iop_QDMulHi32Sx4: size = 2; break;
5007 default: vassert(0);
5008 }
5009 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5010 res, argL, argR, size, True));
5011 return res;
5012 }
5013
5014 case Iop_QRDMulHi16Sx8:
5015 case Iop_QRDMulHi32Sx4: {
5016 HReg res = newVRegV(env);
5017 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5018 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5019 UInt size = 0;
5020 switch(e->Iex.Binop.op) {
5021 case Iop_QRDMulHi16Sx8: size = 1; break;
5022 case Iop_QRDMulHi32Sx4: size = 2; break;
5023 default: vassert(0);
5024 }
5025 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5026 res, argL, argR, size, True));
5027 return res;
5028 }
5029
5030 case Iop_QDMulLong16Sx4:
5031 case Iop_QDMulLong32Sx2: {
5032 HReg res = newVRegV(env);
5033 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5034 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5035 UInt size = 0;
5036 switch(e->Iex.Binop.op) {
5037 case Iop_QDMulLong16Sx4: size = 1; break;
5038 case Iop_QDMulLong32Sx2: size = 2; break;
5039 default: vassert(0);
5040 }
5041 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5042 res, argL, argR, size, True));
5043 return res;
5044 }
5045 case Iop_PolynomialMul8x16: {
5046 HReg res = newVRegV(env);
5047 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5048 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5049 UInt size = 0;
5050 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5051 res, argL, argR, size, True));
5052 return res;
5053 }
5054 case Iop_Max32Fx4: {
5055 HReg res = newVRegV(env);
5056 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5057 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5058 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5059 res, argL, argR, 2, True));
5060 return res;
5061 }
5062 case Iop_Min32Fx4: {
5063 HReg res = newVRegV(env);
5064 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5065 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5066 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5067 res, argL, argR, 2, True));
5068 return res;
5069 }
5070 case Iop_PwMax32Fx4: {
5071 HReg res = newVRegV(env);
5072 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5073 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5074 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5075 res, argL, argR, 2, True));
5076 return res;
5077 }
5078 case Iop_PwMin32Fx4: {
5079 HReg res = newVRegV(env);
5080 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5081 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5082 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5083 res, argL, argR, 2, True));
5084 return res;
5085 }
5086 case Iop_CmpGT32Fx4: {
5087 HReg res = newVRegV(env);
5088 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5089 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5090 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5091 res, argL, argR, 2, True));
5092 return res;
5093 }
5094 case Iop_CmpGE32Fx4: {
5095 HReg res = newVRegV(env);
5096 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5097 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5098 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5099 res, argL, argR, 2, True));
5100 return res;
5101 }
5102 case Iop_CmpEQ32Fx4: {
5103 HReg res = newVRegV(env);
5104 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5105 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5106 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5107 res, argL, argR, 2, True));
5108 return res;
5109 }
5110
5111 case Iop_PolynomialMull8x8: {
5112 HReg res = newVRegV(env);
5113 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5114 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5115 UInt size = 0;
5116 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5117 res, argL, argR, size, True));
5118 return res;
5119 }
5120 case Iop_F32ToFixed32Ux4_RZ:
5121 case Iop_F32ToFixed32Sx4_RZ:
5122 case Iop_Fixed32UToF32x4_RN:
5123 case Iop_Fixed32SToF32x4_RN: {
5124 HReg res = newVRegV(env);
5125 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5126 ARMNeonUnOp op;
5127 UInt imm6;
5128 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5129 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5130 vpanic("ARM supports FP <-> Fixed conversion with constant "
5131 "second argument less than 33 only\n");
5132 }
5133 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5134 vassert(imm6 <= 32 && imm6 > 0);
5135 imm6 = 64 - imm6;
5136 switch(e->Iex.Binop.op) {
5137 case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5138 case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5139 case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5140 case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5141 default: vassert(0);
5142 }
5143 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5144 return res;
5145 }
5146 /*
5147 FIXME remove if not used
5148 case Iop_VDup8x16:
5149 case Iop_VDup16x8:
5150 case Iop_VDup32x4: {
5151 HReg res = newVRegV(env);
5152 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5153 UInt imm4;
5154 UInt index;
5155 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5156 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5157 vpanic("ARM supports Iop_VDup with constant "
5158 "second argument less than 16 only\n");
5159 }
5160 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5161 switch(e->Iex.Binop.op) {
5162 case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5163 case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5164 case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5165 default: vassert(0);
5166 }
5167 if (imm4 >= 16) {
5168 vpanic("ARM supports Iop_VDup with constant "
5169 "second argument less than 16 only\n");
5170 }
5171 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5172 res, argL, imm4, True));
5173 return res;
5174 }
5175 */
5176 case Iop_PwAdd8x16:
5177 case Iop_PwAdd16x8:
5178 case Iop_PwAdd32x4: {
5179 HReg res = newVRegV(env);
5180 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5181 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5182 UInt size = 0;
5183 switch(e->Iex.Binop.op) {
5184 case Iop_PwAdd8x16: size = 0; break;
5185 case Iop_PwAdd16x8: size = 1; break;
5186 case Iop_PwAdd32x4: size = 2; break;
5187 default: vassert(0);
5188 }
5189 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5190 res, argL, argR, size, True));
5191 return res;
5192 }
5193 /* ... */
5194 default:
5195 break;
5196 }
5197 }
5198
5199 if (e->tag == Iex_Triop) {
5200 switch (e->Iex.Triop.op) {
5201 case Iop_ExtractV128: {
5202 HReg res = newVRegV(env);
5203 HReg argL = iselNeonExpr(env, e->Iex.Triop.arg1);
5204 HReg argR = iselNeonExpr(env, e->Iex.Triop.arg2);
5205 UInt imm4;
5206 if (e->Iex.Triop.arg3->tag != Iex_Const ||
5207 typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
5208 vpanic("ARM target supports Iop_ExtractV128 with constant "
5209 "third argument less than 16 only\n");
5210 }
5211 imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
5212 if (imm4 >= 16) {
5213 vpanic("ARM target supports Iop_ExtractV128 with constant "
5214 "third argument less than 16 only\n");
5215 }
5216 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5217 res, argL, argR, imm4, True));
5218 return res;
5219 }
5220 default:
5221 break;
5222 }
5223 }
5224
5225 if (e->tag == Iex_Mux0X) {
5226 HReg r8;
5227 HReg rX = iselNeonExpr(env, e->Iex.Mux0X.exprX);
5228 HReg r0 = iselNeonExpr(env, e->Iex.Mux0X.expr0);
5229 HReg dst = newVRegV(env);
5230 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, rX, 4, True));
5231 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5232 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5233 ARMRI84_I84(0xFF,0)));
5234 addInstr(env, ARMInstr_NCMovQ(ARMcc_EQ, dst, r0));
5235 return dst;
5236 }
5237
5238 neon_expr_bad:
5239 ppIRExpr(e);
5240 vpanic("iselNeonExpr_wrk");
5241 }
5242
5243 /*---------------------------------------------------------*/
5244 /*--- ISEL: Floating point expressions (64 bit) ---*/
5245 /*---------------------------------------------------------*/
5246
5247 /* Compute a 64-bit floating point value into a register, the identity
5248 of which is returned. As with iselIntExpr_R, the reg may be either
5249 real or virtual; in any case it must not be changed by subsequent
5250 code emitted by the caller. */
5251
iselDblExpr(ISelEnv * env,IRExpr * e)5252 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5253 {
5254 HReg r = iselDblExpr_wrk( env, e );
5255 # if 0
5256 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5257 # endif
5258 vassert(hregClass(r) == HRcFlt64);
5259 vassert(hregIsVirtual(r));
5260 return r;
5261 }
5262
5263 /* DO NOT CALL THIS DIRECTLY */
iselDblExpr_wrk(ISelEnv * env,IRExpr * e)5264 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5265 {
5266 IRType ty = typeOfIRExpr(env->type_env,e);
5267 vassert(e);
5268 vassert(ty == Ity_F64);
5269
5270 if (e->tag == Iex_RdTmp) {
5271 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5272 }
5273
5274 if (e->tag == Iex_Const) {
5275 /* Just handle the zero case. */
5276 IRConst* con = e->Iex.Const.con;
5277 if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5278 HReg z32 = newVRegI(env);
5279 HReg dst = newVRegD(env);
5280 addInstr(env, ARMInstr_Imm32(z32, 0));
5281 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5282 return dst;
5283 }
5284 }
5285
5286 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5287 ARMAModeV* am;
5288 HReg res = newVRegD(env);
5289 vassert(e->Iex.Load.ty == Ity_F64);
5290 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5291 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5292 return res;
5293 }
5294
5295 if (e->tag == Iex_Get) {
5296 // XXX This won't work if offset > 1020 or is not 0 % 4.
5297 // In which case we'll have to generate more longwinded code.
5298 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5299 HReg res = newVRegD(env);
5300 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5301 return res;
5302 }
5303
5304 if (e->tag == Iex_Unop) {
5305 switch (e->Iex.Unop.op) {
5306 case Iop_ReinterpI64asF64: {
5307 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5308 return iselNeon64Expr(env, e->Iex.Unop.arg);
5309 } else {
5310 HReg srcHi, srcLo;
5311 HReg dst = newVRegD(env);
5312 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5313 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5314 return dst;
5315 }
5316 }
5317 case Iop_NegF64: {
5318 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5319 HReg dst = newVRegD(env);
5320 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5321 return dst;
5322 }
5323 case Iop_AbsF64: {
5324 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5325 HReg dst = newVRegD(env);
5326 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5327 return dst;
5328 }
5329 case Iop_F32toF64: {
5330 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5331 HReg dst = newVRegD(env);
5332 addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5333 return dst;
5334 }
5335 case Iop_I32UtoF64:
5336 case Iop_I32StoF64: {
5337 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5338 HReg f32 = newVRegF(env);
5339 HReg dst = newVRegD(env);
5340 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5341 /* VMOV f32, src */
5342 addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5343 /* FSITOD dst, f32 */
5344 addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5345 dst, f32));
5346 return dst;
5347 }
5348 default:
5349 break;
5350 }
5351 }
5352
5353 if (e->tag == Iex_Binop) {
5354 switch (e->Iex.Binop.op) {
5355 case Iop_SqrtF64: {
5356 /* first arg is rounding mode; we ignore it. */
5357 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5358 HReg dst = newVRegD(env);
5359 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5360 return dst;
5361 }
5362 default:
5363 break;
5364 }
5365 }
5366
5367 if (e->tag == Iex_Triop) {
5368 switch (e->Iex.Triop.op) {
5369 case Iop_DivF64:
5370 case Iop_MulF64:
5371 case Iop_AddF64:
5372 case Iop_SubF64: {
5373 ARMVfpOp op = 0; /*INVALID*/
5374 HReg argL = iselDblExpr(env, e->Iex.Triop.arg2);
5375 HReg argR = iselDblExpr(env, e->Iex.Triop.arg3);
5376 HReg dst = newVRegD(env);
5377 switch (e->Iex.Triop.op) {
5378 case Iop_DivF64: op = ARMvfp_DIV; break;
5379 case Iop_MulF64: op = ARMvfp_MUL; break;
5380 case Iop_AddF64: op = ARMvfp_ADD; break;
5381 case Iop_SubF64: op = ARMvfp_SUB; break;
5382 default: vassert(0);
5383 }
5384 addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5385 return dst;
5386 }
5387 default:
5388 break;
5389 }
5390 }
5391
5392 if (e->tag == Iex_Mux0X) {
5393 if (ty == Ity_F64
5394 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5395 HReg r8;
5396 HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
5397 HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
5398 HReg dst = newVRegD(env);
5399 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, rX));
5400 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5401 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5402 ARMRI84_I84(0xFF,0)));
5403 addInstr(env, ARMInstr_VCMovD(ARMcc_EQ, dst, r0));
5404 return dst;
5405 }
5406 }
5407
5408 ppIRExpr(e);
5409 vpanic("iselDblExpr_wrk");
5410 }
5411
5412
5413 /*---------------------------------------------------------*/
5414 /*--- ISEL: Floating point expressions (32 bit) ---*/
5415 /*---------------------------------------------------------*/
5416
5417 /* Compute a 64-bit floating point value into a register, the identity
5418 of which is returned. As with iselIntExpr_R, the reg may be either
5419 real or virtual; in any case it must not be changed by subsequent
5420 code emitted by the caller. */
5421
iselFltExpr(ISelEnv * env,IRExpr * e)5422 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5423 {
5424 HReg r = iselFltExpr_wrk( env, e );
5425 # if 0
5426 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5427 # endif
5428 vassert(hregClass(r) == HRcFlt32);
5429 vassert(hregIsVirtual(r));
5430 return r;
5431 }
5432
5433 /* DO NOT CALL THIS DIRECTLY */
iselFltExpr_wrk(ISelEnv * env,IRExpr * e)5434 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5435 {
5436 IRType ty = typeOfIRExpr(env->type_env,e);
5437 vassert(e);
5438 vassert(ty == Ity_F32);
5439
5440 if (e->tag == Iex_RdTmp) {
5441 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5442 }
5443
5444 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5445 ARMAModeV* am;
5446 HReg res = newVRegF(env);
5447 vassert(e->Iex.Load.ty == Ity_F32);
5448 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5449 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5450 return res;
5451 }
5452
5453 if (e->tag == Iex_Get) {
5454 // XXX This won't work if offset > 1020 or is not 0 % 4.
5455 // In which case we'll have to generate more longwinded code.
5456 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5457 HReg res = newVRegF(env);
5458 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5459 return res;
5460 }
5461
5462 if (e->tag == Iex_Unop) {
5463 switch (e->Iex.Unop.op) {
5464 case Iop_ReinterpI32asF32: {
5465 HReg dst = newVRegF(env);
5466 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5467 addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5468 return dst;
5469 }
5470 case Iop_NegF32: {
5471 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5472 HReg dst = newVRegF(env);
5473 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5474 return dst;
5475 }
5476 case Iop_AbsF32: {
5477 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5478 HReg dst = newVRegF(env);
5479 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5480 return dst;
5481 }
5482 default:
5483 break;
5484 }
5485 }
5486
5487 if (e->tag == Iex_Binop) {
5488 switch (e->Iex.Binop.op) {
5489 case Iop_SqrtF32: {
5490 /* first arg is rounding mode; we ignore it. */
5491 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5492 HReg dst = newVRegF(env);
5493 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5494 return dst;
5495 }
5496 case Iop_F64toF32: {
5497 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5498 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5499 HReg valS = newVRegF(env);
5500 /* FCVTSD valS, valD */
5501 addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5502 set_VFP_rounding_default(env);
5503 return valS;
5504 }
5505 default:
5506 break;
5507 }
5508 }
5509
5510 if (e->tag == Iex_Triop) {
5511 switch (e->Iex.Triop.op) {
5512 case Iop_DivF32:
5513 case Iop_MulF32:
5514 case Iop_AddF32:
5515 case Iop_SubF32: {
5516 ARMVfpOp op = 0; /*INVALID*/
5517 HReg argL = iselFltExpr(env, e->Iex.Triop.arg2);
5518 HReg argR = iselFltExpr(env, e->Iex.Triop.arg3);
5519 HReg dst = newVRegF(env);
5520 switch (e->Iex.Triop.op) {
5521 case Iop_DivF32: op = ARMvfp_DIV; break;
5522 case Iop_MulF32: op = ARMvfp_MUL; break;
5523 case Iop_AddF32: op = ARMvfp_ADD; break;
5524 case Iop_SubF32: op = ARMvfp_SUB; break;
5525 default: vassert(0);
5526 }
5527 addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5528 return dst;
5529 }
5530 default:
5531 break;
5532 }
5533 }
5534
5535 if (e->tag == Iex_Mux0X) {
5536 if (ty == Ity_F32
5537 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5538 HReg r8;
5539 HReg rX = iselFltExpr(env, e->Iex.Mux0X.exprX);
5540 HReg r0 = iselFltExpr(env, e->Iex.Mux0X.expr0);
5541 HReg dst = newVRegF(env);
5542 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, rX));
5543 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5544 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5545 ARMRI84_I84(0xFF,0)));
5546 addInstr(env, ARMInstr_VCMovS(ARMcc_EQ, dst, r0));
5547 return dst;
5548 }
5549 }
5550
5551 ppIRExpr(e);
5552 vpanic("iselFltExpr_wrk");
5553 }
5554
5555
5556 /*---------------------------------------------------------*/
5557 /*--- ISEL: Statements ---*/
5558 /*---------------------------------------------------------*/
5559
iselStmt(ISelEnv * env,IRStmt * stmt)5560 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5561 {
5562 if (vex_traceflags & VEX_TRACE_VCODE) {
5563 vex_printf("\n-- ");
5564 ppIRStmt(stmt);
5565 vex_printf("\n");
5566 }
5567 switch (stmt->tag) {
5568
5569 /* --------- STORE --------- */
5570 /* little-endian write to memory */
5571 case Ist_Store: {
5572 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5573 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5574 IREndness end = stmt->Ist.Store.end;
5575
5576 if (tya != Ity_I32 || end != Iend_LE)
5577 goto stmt_fail;
5578
5579 if (tyd == Ity_I32) {
5580 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5581 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5582 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5583 return;
5584 }
5585 if (tyd == Ity_I16) {
5586 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5587 ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
5588 addInstr(env, ARMInstr_LdSt16(False/*!isLoad*/,
5589 False/*!isSignedLoad*/, rD, am));
5590 return;
5591 }
5592 if (tyd == Ity_I8) {
5593 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5594 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5595 addInstr(env, ARMInstr_LdSt8U(False/*!isLoad*/, rD, am));
5596 return;
5597 }
5598 if (tyd == Ity_I64) {
5599 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5600 HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5601 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5602 addInstr(env, ARMInstr_NLdStD(False, dD, am));
5603 } else {
5604 HReg rDhi, rDlo, rA;
5605 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5606 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
5607 addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDhi,
5608 ARMAMode1_RI(rA,4)));
5609 addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDlo,
5610 ARMAMode1_RI(rA,0)));
5611 }
5612 return;
5613 }
5614 if (tyd == Ity_F64) {
5615 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
5616 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5617 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5618 return;
5619 }
5620 if (tyd == Ity_F32) {
5621 HReg fD = iselFltExpr(env, stmt->Ist.Store.data);
5622 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5623 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5624 return;
5625 }
5626 if (tyd == Ity_V128) {
5627 HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
5628 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5629 addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5630 return;
5631 }
5632
5633 break;
5634 }
5635
5636 /* --------- PUT --------- */
5637 /* write guest state, fixed offset */
5638 case Ist_Put: {
5639 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
5640
5641 if (tyd == Ity_I32) {
5642 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
5643 ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
5644 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5645 return;
5646 }
5647 if (tyd == Ity_I64) {
5648 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5649 HReg addr = newVRegI(env);
5650 HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
5651 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5652 stmt->Ist.Put.offset));
5653 addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
5654 } else {
5655 HReg rDhi, rDlo;
5656 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
5657 stmt->Ist.Put.offset + 0);
5658 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
5659 stmt->Ist.Put.offset + 4);
5660 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
5661 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDhi, am4));
5662 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDlo, am0));
5663 }
5664 return;
5665 }
5666 if (tyd == Ity_F64) {
5667 // XXX This won't work if offset > 1020 or is not 0 % 4.
5668 // In which case we'll have to generate more longwinded code.
5669 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5670 HReg rD = iselDblExpr(env, stmt->Ist.Put.data);
5671 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
5672 return;
5673 }
5674 if (tyd == Ity_F32) {
5675 // XXX This won't work if offset > 1020 or is not 0 % 4.
5676 // In which case we'll have to generate more longwinded code.
5677 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5678 HReg rD = iselFltExpr(env, stmt->Ist.Put.data);
5679 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
5680 return;
5681 }
5682 if (tyd == Ity_V128) {
5683 HReg addr = newVRegI(env);
5684 HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
5685 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5686 stmt->Ist.Put.offset));
5687 addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
5688 return;
5689 }
5690 break;
5691 }
5692
5693 //zz /* --------- Indexed PUT --------- */
5694 //zz /* write guest state, run-time offset */
5695 //zz case Ist_PutI: {
5696 //zz ARMAMode2* am2
5697 //zz = genGuestArrayOffset(
5698 //zz env, stmt->Ist.PutI.descr,
5699 //zz stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
5700 //zz
5701 //zz IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
5702 //zz
5703 //zz if (tyd == Ity_I8) {
5704 //zz HReg reg = iselIntExpr_R(env, stmt->Ist.PutI.data);
5705 //zz addInstr(env, ARMInstr_StoreB(reg, am2));
5706 //zz return;
5707 //zz }
5708 //zz// CAB: Ity_I32, Ity_I16 ?
5709 //zz break;
5710 //zz }
5711
5712 /* --------- TMP --------- */
5713 /* assign value to temporary */
5714 case Ist_WrTmp: {
5715 IRTemp tmp = stmt->Ist.WrTmp.tmp;
5716 IRType ty = typeOfIRTemp(env->type_env, tmp);
5717
5718 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
5719 ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
5720 env, stmt->Ist.WrTmp.data);
5721 HReg dst = lookupIRTemp(env, tmp);
5722 addInstr(env, ARMInstr_Mov(dst,ri84));
5723 return;
5724 }
5725 if (ty == Ity_I1) {
5726 HReg dst = lookupIRTemp(env, tmp);
5727 ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
5728 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
5729 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
5730 return;
5731 }
5732 if (ty == Ity_I64) {
5733 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5734 HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
5735 HReg dst = lookupIRTemp(env, tmp);
5736 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
5737 } else {
5738 HReg rHi, rLo, dstHi, dstLo;
5739 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
5740 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
5741 addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
5742 addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
5743 }
5744 return;
5745 }
5746 if (ty == Ity_F64) {
5747 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
5748 HReg dst = lookupIRTemp(env, tmp);
5749 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
5750 return;
5751 }
5752 if (ty == Ity_F32) {
5753 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
5754 HReg dst = lookupIRTemp(env, tmp);
5755 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
5756 return;
5757 }
5758 if (ty == Ity_V128) {
5759 HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
5760 HReg dst = lookupIRTemp(env, tmp);
5761 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
5762 return;
5763 }
5764 break;
5765 }
5766
5767 /* --------- Call to DIRTY helper --------- */
5768 /* call complex ("dirty") helper function */
5769 case Ist_Dirty: {
5770 IRType retty;
5771 IRDirty* d = stmt->Ist.Dirty.details;
5772 Bool passBBP = False;
5773
5774 if (d->nFxState == 0)
5775 vassert(!d->needsBBP);
5776
5777 passBBP = toBool(d->nFxState > 0 && d->needsBBP);
5778
5779 /* Marshal args, do the call, clear stack. */
5780 Bool ok = doHelperCall( env, passBBP, d->guard, d->cee, d->args );
5781 if (!ok)
5782 break; /* will go to stmt_fail: */
5783
5784 /* Now figure out what to do with the returned value, if any. */
5785 if (d->tmp == IRTemp_INVALID)
5786 /* No return value. Nothing to do. */
5787 return;
5788
5789 retty = typeOfIRTemp(env->type_env, d->tmp);
5790
5791 if (retty == Ity_I64) {
5792 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5793 HReg tmp = lookupIRTemp(env, d->tmp);
5794 addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
5795 hregARM_R0()));
5796 } else {
5797 HReg dstHi, dstLo;
5798 /* The returned value is in r1:r0. Park it in the
5799 register-pair associated with tmp. */
5800 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
5801 addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
5802 addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
5803 }
5804 return;
5805 }
5806 if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
5807 /* The returned value is in r0. Park it in the register
5808 associated with tmp. */
5809 HReg dst = lookupIRTemp(env, d->tmp);
5810 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
5811 return;
5812 }
5813
5814 break;
5815 }
5816
5817 /* --------- Load Linked and Store Conditional --------- */
5818 case Ist_LLSC: {
5819 if (stmt->Ist.LLSC.storedata == NULL) {
5820 /* LL */
5821 IRTemp res = stmt->Ist.LLSC.result;
5822 IRType ty = typeOfIRTemp(env->type_env, res);
5823 if (ty == Ity_I32 || ty == Ity_I8) {
5824 Int szB = 0;
5825 HReg r_dst = lookupIRTemp(env, res);
5826 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5827 switch (ty) {
5828 case Ity_I8: szB = 1; break;
5829 case Ity_I32: szB = 4; break;
5830 default: vassert(0);
5831 }
5832 addInstr(env, mk_iMOVds_RR(hregARM_R0(), raddr));
5833 addInstr(env, ARMInstr_LdrEX(szB));
5834 addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
5835 return;
5836 } else if (ty == Ity_I64) {
5837 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5838 HReg dstHi, dstLo;
5839 addInstr(env, mk_iMOVds_RR(hregARM_R0(), raddr));
5840 addInstr(env, ARMInstr_LdrEX(8 /* 64-bit */));
5841 lookupIRTemp64(&dstHi, &dstLo, env, res);
5842 addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R2()) );
5843 addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R3()) );
5844 return;
5845 }
5846 /* else fall thru; is unhandled */
5847 } else {
5848 /* SC */
5849 IRTemp res = stmt->Ist.LLSC.result;
5850 IRType ty = typeOfIRTemp(env->type_env, res);
5851 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
5852 vassert(ty == Ity_I1);
5853 if (tyd == Ity_I32 || tyd == Ity_I8) {
5854 Int szB = 0;
5855 HReg r_res = lookupIRTemp(env, res);
5856 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
5857 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5858 ARMRI84* one = ARMRI84_I84(1,0);
5859 switch (tyd) {
5860 case Ity_I8: szB = 1; break;
5861 case Ity_I32: szB = 4; break;
5862 default: vassert(0);
5863 }
5864 addInstr(env, mk_iMOVds_RR(hregARM_R0(), rA));
5865 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
5866 addInstr(env, ARMInstr_StrEX(szB));
5867 /* now r1 is 1 if failed, 0 if success. Change to IR
5868 conventions (0 is fail, 1 is success). Also transfer
5869 result to r_res. */
5870 addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R1(), one));
5871 /* And be conservative -- mask off all but the lowest bit */
5872 addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
5873 return;
5874 } else if (tyd == Ity_I64) {
5875 HReg r_res = lookupIRTemp(env, res);
5876 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5877 ARMRI84* one = ARMRI84_I84(1,0);
5878 HReg rDHi, rDLo;
5879 iselInt64Expr(&rDHi, &rDLo, env, stmt->Ist.LLSC.storedata);
5880 addInstr(env, mk_iMOVds_RR(hregARM_R0(), rA));
5881 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDHi));
5882 addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDLo));
5883 addInstr(env, ARMInstr_StrEX(8 /* 64-bit */));
5884 /* now r1 is 1 if failed, 0 if success. Change to IR
5885 conventions (0 is fail, 1 is success). Also transfer
5886 result to r_res. */
5887 addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R1(), one));
5888 /* And be conservative -- mask off all but the lowest bit */
5889 addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
5890 return;
5891 }
5892 /* else fall thru; is unhandled */
5893 }
5894 break;
5895 }
5896
5897 /* --------- MEM FENCE --------- */
5898 case Ist_MBE:
5899 switch (stmt->Ist.MBE.event) {
5900 case Imbe_Fence:
5901 addInstr(env,ARMInstr_MFence());
5902 return;
5903 default:
5904 break;
5905 }
5906 break;
5907
5908 /* --------- INSTR MARK --------- */
5909 /* Doesn't generate any executable code ... */
5910 case Ist_IMark:
5911 return;
5912
5913 /* --------- NO-OP --------- */
5914 case Ist_NoOp:
5915 return;
5916
5917 /* --------- EXIT --------- */
5918 case Ist_Exit: {
5919 HReg gnext;
5920 ARMCondCode cc;
5921 if (stmt->Ist.Exit.dst->tag != Ico_U32)
5922 vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
5923 gnext = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
5924 cc = iselCondCode(env, stmt->Ist.Exit.guard);
5925 addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
5926 addInstr(env, ARMInstr_Goto(stmt->Ist.Exit.jk, cc, gnext));
5927 return;
5928 }
5929
5930 default: break;
5931 }
5932 stmt_fail:
5933 ppIRStmt(stmt);
5934 vpanic("iselStmt");
5935 }
5936
5937
5938 /*---------------------------------------------------------*/
5939 /*--- ISEL: Basic block terminators (Nexts) ---*/
5940 /*---------------------------------------------------------*/
5941
iselNext(ISelEnv * env,IRExpr * next,IRJumpKind jk)5942 static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
5943 {
5944 HReg rDst;
5945 if (vex_traceflags & VEX_TRACE_VCODE) {
5946 vex_printf("\n-- goto {");
5947 ppIRJumpKind(jk);
5948 vex_printf("} ");
5949 ppIRExpr(next);
5950 vex_printf("\n");
5951 }
5952 rDst = iselIntExpr_R(env, next);
5953 addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
5954 addInstr(env, ARMInstr_Goto(jk, ARMcc_AL, rDst));
5955 }
5956
5957
5958 /*---------------------------------------------------------*/
5959 /*--- Insn selector top-level ---*/
5960 /*---------------------------------------------------------*/
5961
5962 /* Translate an entire SB to arm code. */
5963
iselSB_ARM(IRSB * bb,VexArch arch_host,VexArchInfo * archinfo_host,VexAbiInfo * vbi)5964 HInstrArray* iselSB_ARM ( IRSB* bb, VexArch arch_host,
5965 VexArchInfo* archinfo_host,
5966 VexAbiInfo* vbi/*UNUSED*/ )
5967 {
5968 Int i, j;
5969 HReg hreg, hregHI;
5970 ISelEnv* env;
5971 UInt hwcaps_host = archinfo_host->hwcaps;
5972 Bool neon = False;
5973 static UInt counter = 0;
5974
5975 /* sanity ... */
5976 vassert(arch_host == VexArchARM);
5977
5978 /* hwcaps should not change from one ISEL call to another. */
5979 arm_hwcaps = hwcaps_host;
5980
5981 /* Make up an initial environment to use. */
5982 env = LibVEX_Alloc(sizeof(ISelEnv));
5983 env->vreg_ctr = 0;
5984
5985 /* Set up output code array. */
5986 env->code = newHInstrArray();
5987
5988 /* Copy BB's type env. */
5989 env->type_env = bb->tyenv;
5990
5991 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
5992 change as we go along. */
5993 env->n_vregmap = bb->tyenv->types_used;
5994 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
5995 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
5996
5997 /* For each IR temporary, allocate a suitably-kinded virtual
5998 register. */
5999 j = 0;
6000 for (i = 0; i < env->n_vregmap; i++) {
6001 hregHI = hreg = INVALID_HREG;
6002 switch (bb->tyenv->types[i]) {
6003 case Ity_I1:
6004 case Ity_I8:
6005 case Ity_I16:
6006 case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break;
6007 case Ity_I64:
6008 if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
6009 hreg = mkHReg(j++, HRcFlt64, True);
6010 neon = True;
6011 } else {
6012 hregHI = mkHReg(j++, HRcInt32, True);
6013 hreg = mkHReg(j++, HRcInt32, True);
6014 }
6015 break;
6016 case Ity_F32: hreg = mkHReg(j++, HRcFlt32, True); break;
6017 case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break;
6018 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True);
6019 neon = True; break;
6020 default: ppIRType(bb->tyenv->types[i]);
6021 vpanic("iselBB: IRTemp type");
6022 }
6023 env->vregmap[i] = hreg;
6024 env->vregmapHI[i] = hregHI;
6025 }
6026 env->vreg_ctr = j;
6027
6028 /* Keep a copy of the link reg, since any call to a helper function
6029 will trash it, and we can't get back to the dispatcher once that
6030 happens. */
6031 env->savedLR = newVRegI(env);
6032 addInstr(env, mk_iMOVds_RR(env->savedLR, hregARM_R14()));
6033
6034 /* Ok, finally we can iterate over the statements. */
6035 for (i = 0; i < bb->stmts_used; i++)
6036 iselStmt(env,bb->stmts[i]);
6037
6038 iselNext(env,bb->next,bb->jumpkind);
6039
6040 /* record the number of vregs we used. */
6041 env->code->n_vregs = env->vreg_ctr;
6042 counter++;
6043 return env->code;
6044 }
6045
6046
6047 /*---------------------------------------------------------------*/
6048 /*--- end host_arm_isel.c ---*/
6049 /*---------------------------------------------------------------*/
6050