1
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm64_isel.c ---*/
4 /*---------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2013-2013 OpenWorks
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 #include "libvex_basictypes.h"
32 #include "libvex_ir.h"
33 #include "libvex.h"
34 #include "ir_match.h"
35
36 #include "main_util.h"
37 #include "main_globals.h"
38 #include "host_generic_regs.h"
39 #include "host_generic_simd64.h" // for 32-bit SIMD helpers
40 #include "host_arm64_defs.h"
41
42
43 //ZZ /*---------------------------------------------------------*/
44 //ZZ /*--- ARMvfp control word stuff ---*/
45 //ZZ /*---------------------------------------------------------*/
46 //ZZ
47 //ZZ /* Vex-generated code expects to run with the FPU set as follows: all
48 //ZZ exceptions masked, round-to-nearest, non-vector mode, with the NZCV
49 //ZZ flags cleared, and FZ (flush to zero) disabled. Curiously enough,
50 //ZZ this corresponds to a FPSCR value of zero.
51 //ZZ
52 //ZZ fpscr should therefore be zero on entry to Vex-generated code, and
53 //ZZ should be unchanged at exit. (Or at least the bottom 28 bits
54 //ZZ should be zero).
55 //ZZ */
56 //ZZ
57 //ZZ #define DEFAULT_FPSCR 0
58
59
60 /*---------------------------------------------------------*/
61 /*--- ISelEnv ---*/
62 /*---------------------------------------------------------*/
63
64 /* This carries around:
65
66 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
67 might encounter. This is computed before insn selection starts,
68 and does not change.
69
70 - A mapping from IRTemp to HReg. This tells the insn selector
71 which virtual register is associated with each IRTemp temporary.
72 This is computed before insn selection starts, and does not
73 change. We expect this mapping to map precisely the same set of
74 IRTemps as the type mapping does.
75
76 |vregmap| holds the primary register for the IRTemp.
77 |vregmapHI| is only used for 128-bit integer-typed
78 IRTemps. It holds the identity of a second
79 64-bit virtual HReg, which holds the high half
80 of the value.
81
82 - The code array, that is, the insns selected so far.
83
84 - A counter, for generating new virtual registers.
85
86 - The host hardware capabilities word. This is set at the start
87 and does not change.
88
89 - A Bool for indicating whether we may generate chain-me
90 instructions for control flow transfers, or whether we must use
91 XAssisted.
92
93 - The maximum guest address of any guest insn in this block.
94 Actually, the address of the highest-addressed byte from any insn
95 in this block. Is set at the start and does not change. This is
96 used for detecting jumps which are definitely forward-edges from
97 this block, and therefore can be made (chained) to the fast entry
98 point of the destination, thereby avoiding the destination's
99 event check.
100
101 - An IRExpr*, which may be NULL, holding the IR expression (an
102 IRRoundingMode-encoded value) to which the FPU's rounding mode
103 was most recently set. Setting to NULL is always safe. Used to
104 avoid redundant settings of the FPU's rounding mode, as
105 described in set_FPCR_rounding_mode below.
106
107 Note, this is all (well, mostly) host-independent.
108 */
109
110 typedef
111 struct {
112 /* Constant -- are set at the start and do not change. */
113 IRTypeEnv* type_env;
114
115 HReg* vregmap;
116 HReg* vregmapHI;
117 Int n_vregmap;
118
119 UInt hwcaps;
120
121 Bool chainingAllowed;
122 Addr64 max_ga;
123
124 /* These are modified as we go along. */
125 HInstrArray* code;
126 Int vreg_ctr;
127
128 IRExpr* previous_rm;
129 }
130 ISelEnv;
131
lookupIRTemp(ISelEnv * env,IRTemp tmp)132 static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
133 {
134 vassert(tmp >= 0);
135 vassert(tmp < env->n_vregmap);
136 return env->vregmap[tmp];
137 }
138
addInstr(ISelEnv * env,ARM64Instr * instr)139 static void addInstr ( ISelEnv* env, ARM64Instr* instr )
140 {
141 addHInstr(env->code, instr);
142 if (vex_traceflags & VEX_TRACE_VCODE) {
143 ppARM64Instr(instr);
144 vex_printf("\n");
145 }
146 }
147
newVRegI(ISelEnv * env)148 static HReg newVRegI ( ISelEnv* env )
149 {
150 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
151 env->vreg_ctr++;
152 return reg;
153 }
154
newVRegD(ISelEnv * env)155 static HReg newVRegD ( ISelEnv* env )
156 {
157 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
158 env->vreg_ctr++;
159 return reg;
160 }
161
162 //ZZ static HReg newVRegF ( ISelEnv* env )
163 //ZZ {
164 //ZZ HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
165 //ZZ env->vreg_ctr++;
166 //ZZ return reg;
167 //ZZ }
168
newVRegV(ISelEnv * env)169 static HReg newVRegV ( ISelEnv* env )
170 {
171 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
172 env->vreg_ctr++;
173 return reg;
174 }
175
176 //ZZ /* These are duplicated in guest_arm_toIR.c */
177 //ZZ static IRExpr* unop ( IROp op, IRExpr* a )
178 //ZZ {
179 //ZZ return IRExpr_Unop(op, a);
180 //ZZ }
181 //ZZ
182 //ZZ static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
183 //ZZ {
184 //ZZ return IRExpr_Binop(op, a1, a2);
185 //ZZ }
186 //ZZ
187 //ZZ static IRExpr* bind ( Int binder )
188 //ZZ {
189 //ZZ return IRExpr_Binder(binder);
190 //ZZ }
191
192
193 /*---------------------------------------------------------*/
194 /*--- ISEL: Forward declarations ---*/
195 /*---------------------------------------------------------*/
196
197 /* These are organised as iselXXX and iselXXX_wrk pairs. The
198 iselXXX_wrk do the real work, but are not to be called directly.
199 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
200 checks that all returned registers are virtual. You should not
201 call the _wrk version directly.
202
203 Because some forms of ARM64 memory amodes are implicitly scaled by
204 the access size, iselIntExpr_AMode takes an IRType which tells it
205 the type of the access for which the amode is to be used. This
206 type needs to be correct, else you'll get incorrect code.
207 */
208 static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
209 IRExpr* e, IRType dty );
210 static ARM64AMode* iselIntExpr_AMode ( ISelEnv* env,
211 IRExpr* e, IRType dty );
212
213 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e );
214 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e );
215
216 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e );
217 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e );
218
219 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e );
220 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e );
221
222 static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
223 static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
224
225 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
226 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
227
228 static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
229 ISelEnv* env, IRExpr* e );
230 static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo,
231 ISelEnv* env, IRExpr* e );
232
233
234 //ZZ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
235 //ZZ ISelEnv* env, IRExpr* e );
236 //ZZ static void iselInt64Expr ( HReg* rHi, HReg* rLo,
237 //ZZ ISelEnv* env, IRExpr* e );
238
239 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
240 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
241
242 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
243 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
244
245 //ZZ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e );
246 //ZZ static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e );
247
248 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e );
249 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e );
250
251 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 );
252
253
254 /*---------------------------------------------------------*/
255 /*--- ISEL: Misc helpers ---*/
256 /*---------------------------------------------------------*/
257
258 /* Generate an amode suitable for a 64-bit sized access relative to
259 the baseblock register (X21). This generates an RI12 amode, which
260 means its scaled by the access size, which is why the access size
261 -- 64 bit -- is stated explicitly here. Consequently |off| needs
262 to be divisible by 8. */
mk_baseblock_64bit_access_amode(UInt off)263 static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off )
264 {
265 vassert(off < (8 << 12)); /* otherwise it's unrepresentable */
266 vassert((off & 7) == 0); /* ditto */
267 return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/);
268 }
269
270 /* Ditto, for 32 bit accesses. */
mk_baseblock_32bit_access_amode(UInt off)271 static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off )
272 {
273 vassert(off < (4 << 12)); /* otherwise it's unrepresentable */
274 vassert((off & 3) == 0); /* ditto */
275 return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/);
276 }
277
278 /* Ditto, for 16 bit accesses. */
mk_baseblock_16bit_access_amode(UInt off)279 static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off )
280 {
281 vassert(off < (2 << 12)); /* otherwise it's unrepresentable */
282 vassert((off & 1) == 0); /* ditto */
283 return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/);
284 }
285
286 /* Ditto, for 8 bit accesses. */
mk_baseblock_8bit_access_amode(UInt off)287 static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off )
288 {
289 vassert(off < (1 << 12)); /* otherwise it's unrepresentable */
290 return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/);
291 }
292
mk_baseblock_128bit_access_addr(ISelEnv * env,UInt off)293 static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off )
294 {
295 vassert(off < (1<<12));
296 HReg r = newVRegI(env);
297 addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(),
298 ARM64RIA_I12(off,0), True/*isAdd*/));
299 return r;
300 }
301
get_baseblock_register(void)302 static HReg get_baseblock_register ( void )
303 {
304 return hregARM64_X21();
305 }
306
307 /* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
308 a new register, and return the new register. */
widen_z_32_to_64(ISelEnv * env,HReg src)309 static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src )
310 {
311 HReg dst = newVRegI(env);
312 ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
313 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
314 return dst;
315 }
316
317 /* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
318 a new register, and return the new register. */
widen_s_16_to_64(ISelEnv * env,HReg src)319 static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src )
320 {
321 HReg dst = newVRegI(env);
322 ARM64RI6* n48 = ARM64RI6_I6(48);
323 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
324 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR));
325 return dst;
326 }
327
328 /* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
329 a new register, and return the new register. */
widen_z_16_to_64(ISelEnv * env,HReg src)330 static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src )
331 {
332 HReg dst = newVRegI(env);
333 ARM64RI6* n48 = ARM64RI6_I6(48);
334 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
335 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SHR));
336 return dst;
337 }
338
339 /* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
340 a new register, and return the new register. */
widen_s_32_to_64(ISelEnv * env,HReg src)341 static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src )
342 {
343 HReg dst = newVRegI(env);
344 ARM64RI6* n32 = ARM64RI6_I6(32);
345 addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL));
346 addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR));
347 return dst;
348 }
349
350 /* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
351 a new register, and return the new register. */
widen_s_8_to_64(ISelEnv * env,HReg src)352 static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src )
353 {
354 HReg dst = newVRegI(env);
355 ARM64RI6* n56 = ARM64RI6_I6(56);
356 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
357 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR));
358 return dst;
359 }
360
widen_z_8_to_64(ISelEnv * env,HReg src)361 static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src )
362 {
363 HReg dst = newVRegI(env);
364 ARM64RI6* n56 = ARM64RI6_I6(56);
365 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
366 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SHR));
367 return dst;
368 }
369
370 /* Is this IRExpr_Const(IRConst_U64(0)) ? */
isZeroU64(IRExpr * e)371 static Bool isZeroU64 ( IRExpr* e ) {
372 if (e->tag != Iex_Const) return False;
373 IRConst* con = e->Iex.Const.con;
374 vassert(con->tag == Ico_U64);
375 return con->Ico.U64 == 0;
376 }
377
378
379 /*---------------------------------------------------------*/
380 /*--- ISEL: FP rounding mode helpers ---*/
381 /*---------------------------------------------------------*/
382
383 /* Set the FP rounding mode: 'mode' is an I32-typed expression
384 denoting a value in the range 0 .. 3, indicating a round mode
385 encoded as per type IRRoundingMode -- the first four values only
386 (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the PPC
387 FSCR to have the same rounding.
388
389 For speed & simplicity, we're setting the *entire* FPCR here.
390
391 Setting the rounding mode is expensive. So this function tries to
392 avoid repeatedly setting the rounding mode to the same thing by
393 first comparing 'mode' to the 'mode' tree supplied in the previous
394 call to this function, if any. (The previous value is stored in
395 env->previous_rm.) If 'mode' is a single IR temporary 't' and
396 env->previous_rm is also just 't', then the setting is skipped.
397
398 This is safe because of the SSA property of IR: an IR temporary can
399 only be defined once and so will have the same value regardless of
400 where it appears in the block. Cool stuff, SSA.
401
402 A safety condition: all attempts to set the RM must be aware of
403 this mechanism - by being routed through the functions here.
404
405 Of course this only helps if blocks where the RM is set more than
406 once and it is set to the same value each time, *and* that value is
407 held in the same IR temporary each time. In order to assure the
408 latter as much as possible, the IR optimiser takes care to do CSE
409 on any block with any sign of floating point activity.
410 */
411 static
set_FPCR_rounding_mode(ISelEnv * env,IRExpr * mode)412 void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode )
413 {
414 vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
415
416 /* Do we need to do anything? */
417 if (env->previous_rm
418 && env->previous_rm->tag == Iex_RdTmp
419 && mode->tag == Iex_RdTmp
420 && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
421 /* no - setting it to what it was before. */
422 vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
423 return;
424 }
425
426 /* No luck - we better set it, and remember what we set it to. */
427 env->previous_rm = mode;
428
429 /* Only supporting the rounding-mode bits - the rest of FPCR is set
430 to zero - so we can set the whole register at once (faster). */
431
432 /* This isn't simple, because 'mode' carries an IR rounding
433 encoding, and we need to translate that to an ARM64 FP one:
434 The IR encoding:
435 00 to nearest (the default)
436 10 to +infinity
437 01 to -infinity
438 11 to zero
439 The ARM64 FP encoding:
440 00 to nearest
441 01 to +infinity
442 10 to -infinity
443 11 to zero
444 Easy enough to do; just swap the two bits.
445 */
446 HReg irrm = iselIntExpr_R(env, mode);
447 HReg tL = newVRegI(env);
448 HReg tR = newVRegI(env);
449 HReg t3 = newVRegI(env);
450 /* tL = irrm << 1;
451 tR = irrm >> 1; if we're lucky, these will issue together
452 tL &= 2;
453 tR &= 1; ditto
454 t3 = tL | tR;
455 t3 <<= 22;
456 fmxr fpscr, t3
457 */
458 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
459 ARM64RIL* ril_two = mb_mkARM64RIL_I(2);
460 vassert(ril_one && ril_two);
461 addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL));
462 addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR));
463 addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND));
464 addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND));
465 addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR));
466 addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL));
467 addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3));
468 }
469
470
471 /*---------------------------------------------------------*/
472 /*--- ISEL: Function call helpers ---*/
473 /*---------------------------------------------------------*/
474
475 /* Used only in doHelperCall. See big comment in doHelperCall re
476 handling of register-parameter args. This function figures out
477 whether evaluation of an expression might require use of a fixed
478 register. If in doubt return True (safe but suboptimal).
479 */
480 static
mightRequireFixedRegs(IRExpr * e)481 Bool mightRequireFixedRegs ( IRExpr* e )
482 {
483 if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
484 // These are always "safe" -- either a copy of SP in some
485 // arbitrary vreg, or a copy of x21, respectively.
486 return False;
487 }
488 /* Else it's a "normal" expression. */
489 switch (e->tag) {
490 case Iex_RdTmp: case Iex_Const: case Iex_Get:
491 return False;
492 default:
493 return True;
494 }
495 }
496
497
498 /* Do a complete function call. |guard| is a Ity_Bit expression
499 indicating whether or not the call happens. If guard==NULL, the
500 call is unconditional. |retloc| is set to indicate where the
501 return value is after the call. The caller (of this fn) must
502 generate code to add |stackAdjustAfterCall| to the stack pointer
503 after the call is done. Returns True iff it managed to handle this
504 combination of arg/return types, else returns False. */
505
506 static
doHelperCall(UInt * stackAdjustAfterCall,RetLoc * retloc,ISelEnv * env,IRExpr * guard,IRCallee * cee,IRType retTy,IRExpr ** args)507 Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
508 /*OUT*/RetLoc* retloc,
509 ISelEnv* env,
510 IRExpr* guard,
511 IRCallee* cee, IRType retTy, IRExpr** args )
512 {
513 ARM64CondCode cc;
514 HReg argregs[ARM64_N_ARGREGS];
515 HReg tmpregs[ARM64_N_ARGREGS];
516 Bool go_fast;
517 Int n_args, i, nextArgReg;
518 ULong target;
519
520 vassert(ARM64_N_ARGREGS == 8);
521
522 /* Set default returns. We'll update them later if needed. */
523 *stackAdjustAfterCall = 0;
524 *retloc = mk_RetLoc_INVALID();
525
526 /* These are used for cross-checking that IR-level constraints on
527 the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
528 UInt nVECRETs = 0;
529 UInt nBBPTRs = 0;
530
531 /* Marshal args for a call and do the call.
532
533 This function only deals with a tiny set of possibilities, which
534 cover all helpers in practice. The restrictions are that only
535 arguments in registers are supported, hence only
536 ARM64_N_REGPARMS x 64 integer bits in total can be passed. In
537 fact the only supported arg type is I64.
538
539 The return type can be I{64,32} or V128. In the V128 case, it
540 is expected that |args| will contain the special node
541 IRExpr_VECRET(), in which case this routine generates code to
542 allocate space on the stack for the vector return value. Since
543 we are not passing any scalars on the stack, it is enough to
544 preallocate the return space before marshalling any arguments,
545 in this case.
546
547 |args| may also contain IRExpr_BBPTR(), in which case the
548 value in x21 is passed as the corresponding argument.
549
550 Generating code which is both efficient and correct when
551 parameters are to be passed in registers is difficult, for the
552 reasons elaborated in detail in comments attached to
553 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
554 of the method described in those comments.
555
556 The problem is split into two cases: the fast scheme and the
557 slow scheme. In the fast scheme, arguments are computed
558 directly into the target (real) registers. This is only safe
559 when we can be sure that computation of each argument will not
560 trash any real registers set by computation of any other
561 argument.
562
563 In the slow scheme, all args are first computed into vregs, and
564 once they are all done, they are moved to the relevant real
565 regs. This always gives correct code, but it also gives a bunch
566 of vreg-to-rreg moves which are usually redundant but are hard
567 for the register allocator to get rid of.
568
569 To decide which scheme to use, all argument expressions are
570 first examined. If they are all so simple that it is clear they
571 will be evaluated without use of any fixed registers, use the
572 fast scheme, else use the slow scheme. Note also that only
573 unconditional calls may use the fast scheme, since having to
574 compute a condition expression could itself trash real
575 registers.
576
577 Note this requires being able to examine an expression and
578 determine whether or not evaluation of it might use a fixed
579 register. That requires knowledge of how the rest of this insn
580 selector works. Currently just the following 3 are regarded as
581 safe -- hopefully they cover the majority of arguments in
582 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
583 */
584
585 /* Note that the cee->regparms field is meaningless on ARM64 hosts
586 (since there is only one calling convention) and so we always
587 ignore it. */
588
589 n_args = 0;
590 for (i = 0; args[i]; i++) {
591 IRExpr* arg = args[i];
592 if (UNLIKELY(arg->tag == Iex_VECRET)) {
593 nVECRETs++;
594 } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
595 nBBPTRs++;
596 }
597 n_args++;
598 }
599
600 /* If this fails, the IR is ill-formed */
601 vassert(nBBPTRs == 0 || nBBPTRs == 1);
602
603 /* If we have a VECRET, allocate space on the stack for the return
604 value, and record the stack pointer after that. */
605 HReg r_vecRetAddr = INVALID_HREG;
606 if (nVECRETs == 1) {
607 vassert(retTy == Ity_V128 || retTy == Ity_V256);
608 vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
609 r_vecRetAddr = newVRegI(env);
610 addInstr(env, ARM64Instr_AddToSP(-16));
611 addInstr(env, ARM64Instr_FromSP(r_vecRetAddr));
612 } else {
613 // If either of these fail, the IR is ill-formed
614 vassert(retTy != Ity_V128 && retTy != Ity_V256);
615 vassert(nVECRETs == 0);
616 }
617
618 argregs[0] = hregARM64_X0();
619 argregs[1] = hregARM64_X1();
620 argregs[2] = hregARM64_X2();
621 argregs[3] = hregARM64_X3();
622 argregs[4] = hregARM64_X4();
623 argregs[5] = hregARM64_X5();
624 argregs[6] = hregARM64_X6();
625 argregs[7] = hregARM64_X7();
626
627 tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
628 tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
629
630 /* First decide which scheme (slow or fast) is to be used. First
631 assume the fast scheme, and select slow if any contraindications
632 (wow) appear. */
633
634 go_fast = True;
635
636 if (guard) {
637 if (guard->tag == Iex_Const
638 && guard->Iex.Const.con->tag == Ico_U1
639 && guard->Iex.Const.con->Ico.U1 == True) {
640 /* unconditional */
641 } else {
642 /* Not manifestly unconditional -- be conservative. */
643 go_fast = False;
644 }
645 }
646
647 if (go_fast) {
648 for (i = 0; i < n_args; i++) {
649 if (mightRequireFixedRegs(args[i])) {
650 go_fast = False;
651 break;
652 }
653 }
654 }
655
656 if (go_fast) {
657 if (retTy == Ity_V128 || retTy == Ity_V256)
658 go_fast = False;
659 }
660
661 /* At this point the scheme to use has been established. Generate
662 code to get the arg values into the argument rregs. If we run
663 out of arg regs, give up. */
664
665 if (go_fast) {
666
667 /* FAST SCHEME */
668 nextArgReg = 0;
669
670 for (i = 0; i < n_args; i++) {
671 IRExpr* arg = args[i];
672
673 IRType aTy = Ity_INVALID;
674 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
675 aTy = typeOfIRExpr(env->type_env, args[i]);
676
677 if (nextArgReg >= ARM64_N_ARGREGS)
678 return False; /* out of argregs */
679
680 if (aTy == Ity_I64) {
681 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
682 iselIntExpr_R(env, args[i]) ));
683 nextArgReg++;
684 }
685 else if (arg->tag == Iex_BBPTR) {
686 vassert(0); //ATC
687 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
688 hregARM64_X21() ));
689 nextArgReg++;
690 }
691 else if (arg->tag == Iex_VECRET) {
692 // because of the go_fast logic above, we can't get here,
693 // since vector return values makes us use the slow path
694 // instead.
695 vassert(0);
696 }
697 else
698 return False; /* unhandled arg type */
699 }
700
701 /* Fast scheme only applies for unconditional calls. Hence: */
702 cc = ARM64cc_AL;
703
704 } else {
705
706 /* SLOW SCHEME; move via temporaries */
707 nextArgReg = 0;
708
709 for (i = 0; i < n_args; i++) {
710 IRExpr* arg = args[i];
711
712 IRType aTy = Ity_INVALID;
713 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
714 aTy = typeOfIRExpr(env->type_env, args[i]);
715
716 if (nextArgReg >= ARM64_N_ARGREGS)
717 return False; /* out of argregs */
718
719 if (aTy == Ity_I64) {
720 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
721 nextArgReg++;
722 }
723 else if (arg->tag == Iex_BBPTR) {
724 vassert(0); //ATC
725 tmpregs[nextArgReg] = hregARM64_X21();
726 nextArgReg++;
727 }
728 else if (arg->tag == Iex_VECRET) {
729 vassert(!hregIsInvalid(r_vecRetAddr));
730 tmpregs[nextArgReg] = r_vecRetAddr;
731 nextArgReg++;
732 }
733 else
734 return False; /* unhandled arg type */
735 }
736
737 /* Now we can compute the condition. We can't do it earlier
738 because the argument computations could trash the condition
739 codes. Be a bit clever to handle the common case where the
740 guard is 1:Bit. */
741 cc = ARM64cc_AL;
742 if (guard) {
743 if (guard->tag == Iex_Const
744 && guard->Iex.Const.con->tag == Ico_U1
745 && guard->Iex.Const.con->Ico.U1 == True) {
746 /* unconditional -- do nothing */
747 } else {
748 cc = iselCondCode( env, guard );
749 }
750 }
751
752 /* Move the args to their final destinations. */
753 for (i = 0; i < nextArgReg; i++) {
754 vassert(!(hregIsInvalid(tmpregs[i])));
755 /* None of these insns, including any spill code that might
756 be generated, may alter the condition codes. */
757 addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) );
758 }
759
760 }
761
762 /* Should be assured by checks above */
763 vassert(nextArgReg <= ARM64_N_ARGREGS);
764
765 /* Do final checks, set the return values, and generate the call
766 instruction proper. */
767 vassert(nBBPTRs == 0 || nBBPTRs == 1);
768 vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0);
769 vassert(*stackAdjustAfterCall == 0);
770 vassert(is_RetLoc_INVALID(*retloc));
771 switch (retTy) {
772 case Ity_INVALID:
773 /* Function doesn't return a value. */
774 *retloc = mk_RetLoc_simple(RLPri_None);
775 break;
776 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
777 *retloc = mk_RetLoc_simple(RLPri_Int);
778 break;
779 case Ity_V128:
780 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
781 *stackAdjustAfterCall = 16;
782 break;
783 case Ity_V256:
784 vassert(0); // ATC
785 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
786 *stackAdjustAfterCall = 32;
787 break;
788 default:
789 /* IR can denote other possible return types, but we don't
790 handle those here. */
791 vassert(0);
792 }
793
794 /* Finally, generate the call itself. This needs the *retloc value
795 set in the switch above, which is why it's at the end. */
796
797 /* nextArgReg doles out argument registers. Since these are
798 assigned in the order x0 .. x7, its numeric value at this point,
799 which must be between 0 and 8 inclusive, is going to be equal to
800 the number of arg regs in use for the call. Hence bake that
801 number into the call (we'll need to know it when doing register
802 allocation, to know what regs the call reads.) */
803
804 target = (HWord)Ptr_to_ULong(cee->addr);
805 addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc ));
806
807 return True; /* success */
808 }
809
810
811 /*---------------------------------------------------------*/
812 /*--- ISEL: Integer expressions (64/32 bit) ---*/
813 /*---------------------------------------------------------*/
814
815 /* Select insns for an integer-typed expression, and add them to the
816 code list. Return a reg holding the result. This reg will be a
817 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
818 want to modify it, ask for a new vreg, copy it in there, and modify
819 the copy. The register allocator will do its best to map both
820 vregs to the same real register, so the copies will often disappear
821 later in the game.
822
823 This should handle expressions of 64- and 32-bit type. All results
824 are returned in a 64-bit register. For 32-bit expressions, the
825 upper 32 bits are arbitrary, so you should mask or sign extend
826 partial values if necessary.
827 */
828
829 /* --------------------- AMode --------------------- */
830
831 /* Return an AMode which computes the value of the specified
832 expression, possibly also adding insns to the code list as a
833 result. The expression may only be a 64-bit one.
834 */
835
isValidScale(UChar scale)836 static Bool isValidScale ( UChar scale )
837 {
838 switch (scale) {
839 case 1: case 2: case 4: case 8: /* case 16: ??*/ return True;
840 default: return False;
841 }
842 }
843
sane_AMode(ARM64AMode * am)844 static Bool sane_AMode ( ARM64AMode* am )
845 {
846 switch (am->tag) {
847 case ARM64am_RI9:
848 return
849 toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64
850 && (hregIsVirtual(am->ARM64am.RI9.reg)
851 /* || sameHReg(am->ARM64am.RI9.reg,
852 hregARM64_X21()) */ )
853 && am->ARM64am.RI9.simm9 >= -256
854 && am->ARM64am.RI9.simm9 <= 255 );
855 case ARM64am_RI12:
856 return
857 toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64
858 && (hregIsVirtual(am->ARM64am.RI12.reg)
859 /* || sameHReg(am->ARM64am.RI12.reg,
860 hregARM64_X21()) */ )
861 && am->ARM64am.RI12.uimm12 < 4096
862 && isValidScale(am->ARM64am.RI12.szB) );
863 case ARM64am_RR:
864 return
865 toBool( hregClass(am->ARM64am.RR.base) == HRcInt64
866 && hregIsVirtual(am->ARM64am.RR.base)
867 && hregClass(am->ARM64am.RR.index) == HRcInt64
868 && hregIsVirtual(am->ARM64am.RR.index) );
869 default:
870 vpanic("sane_AMode: unknown ARM64 AMode1 tag");
871 }
872 }
873
874 static
iselIntExpr_AMode(ISelEnv * env,IRExpr * e,IRType dty)875 ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty )
876 {
877 ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
878 vassert(sane_AMode(am));
879 return am;
880 }
881
882 static
iselIntExpr_AMode_wrk(ISelEnv * env,IRExpr * e,IRType dty)883 ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty )
884 {
885 IRType ty = typeOfIRExpr(env->type_env,e);
886 vassert(ty == Ity_I64);
887
888 ULong szBbits = 0;
889 switch (dty) {
890 case Ity_I64: szBbits = 3; break;
891 case Ity_I32: szBbits = 2; break;
892 case Ity_I16: szBbits = 1; break;
893 case Ity_I8: szBbits = 0; break;
894 default: vassert(0);
895 }
896
897 /* {Add64,Sub64}(expr,simm9). We don't care about |dty| here since
898 we're going to create an amode suitable for LDU* or STU*
899 instructions, which use unscaled immediate offsets. */
900 if (e->tag == Iex_Binop
901 && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64)
902 && e->Iex.Binop.arg2->tag == Iex_Const
903 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
904 Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
905 if (simm >= -255 && simm <= 255) {
906 /* Although the gating condition might seem to be
907 simm >= -256 && simm <= 255
908 we will need to negate simm in the case where the op is Sub64.
909 Hence limit the lower value to -255 in order that its negation
910 is representable. */
911 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
912 if (e->Iex.Binop.op == Iop_Sub64) simm = -simm;
913 return ARM64AMode_RI9(reg, (Int)simm);
914 }
915 }
916
917 /* Add64(expr, uimm12 * transfer-size) */
918 if (e->tag == Iex_Binop
919 && e->Iex.Binop.op == Iop_Add64
920 && e->Iex.Binop.arg2->tag == Iex_Const
921 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
922 ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
923 ULong szB = 1 << szBbits;
924 if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */
925 && (uimm >> szBbits) < 4096) {
926 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
927 return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB);
928 }
929 }
930
931 /* Add64(expr1, expr2) */
932 if (e->tag == Iex_Binop
933 && e->Iex.Binop.op == Iop_Add64) {
934 HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
935 HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
936 return ARM64AMode_RR(reg1, reg2);
937 }
938
939 /* Doesn't match anything in particular. Generate it into
940 a register and use that. */
941 HReg reg = iselIntExpr_R(env, e);
942 return ARM64AMode_RI9(reg, 0);
943 }
944
945 //ZZ /* --------------------- AModeV --------------------- */
946 //ZZ
947 //ZZ /* Return an AModeV which computes the value of the specified
948 //ZZ expression, possibly also adding insns to the code list as a
949 //ZZ result. The expression may only be a 32-bit one.
950 //ZZ */
951 //ZZ
952 //ZZ static Bool sane_AModeV ( ARMAModeV* am )
953 //ZZ {
954 //ZZ return toBool( hregClass(am->reg) == HRcInt32
955 //ZZ && hregIsVirtual(am->reg)
956 //ZZ && am->simm11 >= -1020 && am->simm11 <= 1020
957 //ZZ && 0 == (am->simm11 & 3) );
958 //ZZ }
959 //ZZ
960 //ZZ static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
961 //ZZ {
962 //ZZ ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
963 //ZZ vassert(sane_AModeV(am));
964 //ZZ return am;
965 //ZZ }
966 //ZZ
967 //ZZ static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
968 //ZZ {
969 //ZZ IRType ty = typeOfIRExpr(env->type_env,e);
970 //ZZ vassert(ty == Ity_I32);
971 //ZZ
972 //ZZ /* {Add32,Sub32}(expr, simm8 << 2) */
973 //ZZ if (e->tag == Iex_Binop
974 //ZZ && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
975 //ZZ && e->Iex.Binop.arg2->tag == Iex_Const
976 //ZZ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
977 //ZZ Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
978 //ZZ if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
979 //ZZ HReg reg;
980 //ZZ if (e->Iex.Binop.op == Iop_Sub32)
981 //ZZ simm = -simm;
982 //ZZ reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
983 //ZZ return mkARMAModeV(reg, simm);
984 //ZZ }
985 //ZZ }
986 //ZZ
987 //ZZ /* Doesn't match anything in particular. Generate it into
988 //ZZ a register and use that. */
989 //ZZ {
990 //ZZ HReg reg = iselIntExpr_R(env, e);
991 //ZZ return mkARMAModeV(reg, 0);
992 //ZZ }
993 //ZZ
994 //ZZ }
995 //ZZ
996 //ZZ /* -------------------- AModeN -------------------- */
997 //ZZ
998 //ZZ static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
999 //ZZ {
1000 //ZZ return iselIntExpr_AModeN_wrk(env, e);
1001 //ZZ }
1002 //ZZ
1003 //ZZ static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
1004 //ZZ {
1005 //ZZ HReg reg = iselIntExpr_R(env, e);
1006 //ZZ return mkARMAModeN_R(reg);
1007 //ZZ }
1008 //ZZ
1009 //ZZ
1010 //ZZ /* --------------------- RI84 --------------------- */
1011 //ZZ
1012 //ZZ /* Select instructions to generate 'e' into a RI84. If mayInv is
1013 //ZZ true, then the caller will also accept an I84 form that denotes
1014 //ZZ 'not e'. In this case didInv may not be NULL, and *didInv is set
1015 //ZZ to True. This complication is so as to allow generation of an RI84
1016 //ZZ which is suitable for use in either an AND or BIC instruction,
1017 //ZZ without knowing (before this call) which one.
1018 //ZZ */
1019 //ZZ static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
1020 //ZZ ISelEnv* env, IRExpr* e )
1021 //ZZ {
1022 //ZZ ARMRI84* ri;
1023 //ZZ if (mayInv)
1024 //ZZ vassert(didInv != NULL);
1025 //ZZ ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
1026 //ZZ /* sanity checks ... */
1027 //ZZ switch (ri->tag) {
1028 //ZZ case ARMri84_I84:
1029 //ZZ return ri;
1030 //ZZ case ARMri84_R:
1031 //ZZ vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
1032 //ZZ vassert(hregIsVirtual(ri->ARMri84.R.reg));
1033 //ZZ return ri;
1034 //ZZ default:
1035 //ZZ vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
1036 //ZZ }
1037 //ZZ }
1038 //ZZ
1039 //ZZ /* DO NOT CALL THIS DIRECTLY ! */
1040 //ZZ static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
1041 //ZZ ISelEnv* env, IRExpr* e )
1042 //ZZ {
1043 //ZZ IRType ty = typeOfIRExpr(env->type_env,e);
1044 //ZZ vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1045 //ZZ
1046 //ZZ if (didInv) *didInv = False;
1047 //ZZ
1048 //ZZ /* special case: immediate */
1049 //ZZ if (e->tag == Iex_Const) {
1050 //ZZ UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
1051 //ZZ switch (e->Iex.Const.con->tag) {
1052 //ZZ case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1053 //ZZ case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1054 //ZZ case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1055 //ZZ default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
1056 //ZZ }
1057 //ZZ if (fitsIn8x4(&u8, &u4, u)) {
1058 //ZZ return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1059 //ZZ }
1060 //ZZ if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
1061 //ZZ vassert(didInv);
1062 //ZZ *didInv = True;
1063 //ZZ return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1064 //ZZ }
1065 //ZZ /* else fail, fall through to default case */
1066 //ZZ }
1067 //ZZ
1068 //ZZ /* default case: calculate into a register and return that */
1069 //ZZ {
1070 //ZZ HReg r = iselIntExpr_R ( env, e );
1071 //ZZ return ARMRI84_R(r);
1072 //ZZ }
1073 //ZZ }
1074
1075
1076 /* --------------------- RIA --------------------- */
1077
1078 /* Select instructions to generate 'e' into a RIA. */
1079
iselIntExpr_RIA(ISelEnv * env,IRExpr * e)1080 static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e )
1081 {
1082 ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e);
1083 /* sanity checks ... */
1084 switch (ri->tag) {
1085 case ARM64riA_I12:
1086 vassert(ri->ARM64riA.I12.imm12 < 4096);
1087 vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12);
1088 return ri;
1089 case ARM64riA_R:
1090 vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64);
1091 vassert(hregIsVirtual(ri->ARM64riA.R.reg));
1092 return ri;
1093 default:
1094 vpanic("iselIntExpr_RIA: unknown arm RIA tag");
1095 }
1096 }
1097
1098 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RIA_wrk(ISelEnv * env,IRExpr * e)1099 static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e )
1100 {
1101 IRType ty = typeOfIRExpr(env->type_env,e);
1102 vassert(ty == Ity_I64 || ty == Ity_I32);
1103
1104 /* special case: immediate */
1105 if (e->tag == Iex_Const) {
1106 ULong u = 0xF000000ULL; /* invalid */
1107 switch (e->Iex.Const.con->tag) {
1108 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
1109 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1110 default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
1111 }
1112 if (0 == (u & ~(0xFFFULL << 0)))
1113 return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0);
1114 if (0 == (u & ~(0xFFFULL << 12)))
1115 return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12);
1116 /* else fail, fall through to default case */
1117 }
1118
1119 /* default case: calculate into a register and return that */
1120 {
1121 HReg r = iselIntExpr_R ( env, e );
1122 return ARM64RIA_R(r);
1123 }
1124 }
1125
1126
1127 /* --------------------- RIL --------------------- */
1128
1129 /* Select instructions to generate 'e' into a RIL. At this point we
1130 have to deal with the strange bitfield-immediate encoding for logic
1131 instructions. */
1132
1133
1134 // The following four functions
1135 // CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
1136 // are copied, with modifications, from
1137 // https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
1138 // which has the following copyright notice:
1139 /*
1140 Copyright 2013, ARM Limited
1141 All rights reserved.
1142
1143 Redistribution and use in source and binary forms, with or without
1144 modification, are permitted provided that the following conditions are met:
1145
1146 * Redistributions of source code must retain the above copyright notice,
1147 this list of conditions and the following disclaimer.
1148 * Redistributions in binary form must reproduce the above copyright notice,
1149 this list of conditions and the following disclaimer in the documentation
1150 and/or other materials provided with the distribution.
1151 * Neither the name of ARM Limited nor the names of its contributors may be
1152 used to endorse or promote products derived from this software without
1153 specific prior written permission.
1154
1155 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
1156 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1157 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1158 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
1159 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1160 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1161 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
1162 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
1163 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1164 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1165 */
1166
CountLeadingZeros(ULong value,Int width)1167 static Int CountLeadingZeros(ULong value, Int width)
1168 {
1169 vassert(width == 32 || width == 64);
1170 Int count = 0;
1171 ULong bit_test = 1ULL << (width - 1);
1172 while ((count < width) && ((bit_test & value) == 0)) {
1173 count++;
1174 bit_test >>= 1;
1175 }
1176 return count;
1177 }
1178
CountTrailingZeros(ULong value,Int width)1179 static Int CountTrailingZeros(ULong value, Int width)
1180 {
1181 vassert(width == 32 || width == 64);
1182 Int count = 0;
1183 while ((count < width) && (((value >> count) & 1) == 0)) {
1184 count++;
1185 }
1186 return count;
1187 }
1188
CountSetBits(ULong value,Int width)1189 static Int CountSetBits(ULong value, Int width)
1190 {
1191 // TODO: Other widths could be added here, as the implementation already
1192 // supports them.
1193 vassert(width == 32 || width == 64);
1194
1195 // Mask out unused bits to ensure that they are not counted.
1196 value &= (0xffffffffffffffffULL >> (64-width));
1197
1198 // Add up the set bits.
1199 // The algorithm works by adding pairs of bit fields together iteratively,
1200 // where the size of each bit field doubles each time.
1201 // An example for an 8-bit value:
1202 // Bits: h g f e d c b a
1203 // \ | \ | \ | \ |
1204 // value = h+g f+e d+c b+a
1205 // \ | \ |
1206 // value = h+g+f+e d+c+b+a
1207 // \ |
1208 // value = h+g+f+e+d+c+b+a
1209 value = ((value >> 1) & 0x5555555555555555ULL)
1210 + (value & 0x5555555555555555ULL);
1211 value = ((value >> 2) & 0x3333333333333333ULL)
1212 + (value & 0x3333333333333333ULL);
1213 value = ((value >> 4) & 0x0f0f0f0f0f0f0f0fULL)
1214 + (value & 0x0f0f0f0f0f0f0f0fULL);
1215 value = ((value >> 8) & 0x00ff00ff00ff00ffULL)
1216 + (value & 0x00ff00ff00ff00ffULL);
1217 value = ((value >> 16) & 0x0000ffff0000ffffULL)
1218 + (value & 0x0000ffff0000ffffULL);
1219 value = ((value >> 32) & 0x00000000ffffffffULL)
1220 + (value & 0x00000000ffffffffULL);
1221
1222 return value;
1223 }
1224
isImmLogical(UInt * n,UInt * imm_s,UInt * imm_r,ULong value,UInt width)1225 static Bool isImmLogical ( /*OUT*/UInt* n,
1226 /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r,
1227 ULong value, UInt width )
1228 {
1229 // Test if a given value can be encoded in the immediate field of a
1230 // logical instruction.
1231
1232 // If it can be encoded, the function returns true, and values
1233 // pointed to by n, imm_s and imm_r are updated with immediates
1234 // encoded in the format required by the corresponding fields in the
1235 // logical instruction. If it can not be encoded, the function
1236 // returns false, and the values pointed to by n, imm_s and imm_r
1237 // are undefined.
1238 vassert(n != NULL && imm_s != NULL && imm_r != NULL);
1239 vassert(width == 32 || width == 64);
1240
1241 // Logical immediates are encoded using parameters n, imm_s and imm_r using
1242 // the following table:
1243 //
1244 // N imms immr size S R
1245 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
1246 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
1247 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
1248 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
1249 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
1250 // 0 11110s xxxxxr 2 UInt(s) UInt(r)
1251 // (s bits must not be all set)
1252 //
1253 // A pattern is constructed of size bits, where the least significant S+1
1254 // bits are set. The pattern is rotated right by R, and repeated across a
1255 // 32 or 64-bit value, depending on destination register width.
1256 //
1257 // To test if an arbitrary immediate can be encoded using this scheme, an
1258 // iterative algorithm is used.
1259 //
1260 // TODO: This code does not consider using X/W register overlap to support
1261 // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
1262 // are an encodable logical immediate.
1263
1264 // 1. If the value has all set or all clear bits, it can't be encoded.
1265 if ((value == 0) || (value == 0xffffffffffffffffULL) ||
1266 ((width == 32) && (value == 0xffffffff))) {
1267 return False;
1268 }
1269
1270 UInt lead_zero = CountLeadingZeros(value, width);
1271 UInt lead_one = CountLeadingZeros(~value, width);
1272 UInt trail_zero = CountTrailingZeros(value, width);
1273 UInt trail_one = CountTrailingZeros(~value, width);
1274 UInt set_bits = CountSetBits(value, width);
1275
1276 // The fixed bits in the immediate s field.
1277 // If width == 64 (X reg), start at 0xFFFFFF80.
1278 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
1279 // widths won't be executed.
1280 Int imm_s_fixed = (width == 64) ? -128 : -64;
1281 Int imm_s_mask = 0x3F;
1282
1283 for (;;) {
1284 // 2. If the value is two bits wide, it can be encoded.
1285 if (width == 2) {
1286 *n = 0;
1287 *imm_s = 0x3C;
1288 *imm_r = (value & 3) - 1;
1289 return True;
1290 }
1291
1292 *n = (width == 64) ? 1 : 0;
1293 *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
1294 if ((lead_zero + set_bits) == width) {
1295 *imm_r = 0;
1296 } else {
1297 *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
1298 }
1299
1300 // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
1301 // the bit width of the value, it can be encoded.
1302 if (lead_zero + trail_zero + set_bits == width) {
1303 return True;
1304 }
1305
1306 // 4. If the sum of leading ones, trailing ones and unset bits in the
1307 // value is equal to the bit width of the value, it can be encoded.
1308 if (lead_one + trail_one + (width - set_bits) == width) {
1309 return True;
1310 }
1311
1312 // 5. If the most-significant half of the bitwise value is equal to the
1313 // least-significant half, return to step 2 using the least-significant
1314 // half of the value.
1315 ULong mask = (1ULL << (width >> 1)) - 1;
1316 if ((value & mask) == ((value >> (width >> 1)) & mask)) {
1317 width >>= 1;
1318 set_bits >>= 1;
1319 imm_s_fixed >>= 1;
1320 continue;
1321 }
1322
1323 // 6. Otherwise, the value can't be encoded.
1324 return False;
1325 }
1326 }
1327
1328
1329 /* Create a RIL for the given immediate, if it is representable, or
1330 return NULL if not. */
1331
mb_mkARM64RIL_I(ULong imm64)1332 static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 )
1333 {
1334 UInt n = 0, imm_s = 0, imm_r = 0;
1335 Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64);
1336 if (!ok) return NULL;
1337 vassert(n < 2 && imm_s < 64 && imm_r < 64);
1338 return ARM64RIL_I13(n, imm_r, imm_s);
1339 }
1340
1341 /* So, finally .. */
1342
iselIntExpr_RIL(ISelEnv * env,IRExpr * e)1343 static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e )
1344 {
1345 ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e);
1346 /* sanity checks ... */
1347 switch (ri->tag) {
1348 case ARM64riL_I13:
1349 vassert(ri->ARM64riL.I13.bitN < 2);
1350 vassert(ri->ARM64riL.I13.immR < 64);
1351 vassert(ri->ARM64riL.I13.immS < 64);
1352 return ri;
1353 case ARM64riL_R:
1354 vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64);
1355 vassert(hregIsVirtual(ri->ARM64riL.R.reg));
1356 return ri;
1357 default:
1358 vpanic("iselIntExpr_RIL: unknown arm RIL tag");
1359 }
1360 }
1361
1362 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RIL_wrk(ISelEnv * env,IRExpr * e)1363 static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e )
1364 {
1365 IRType ty = typeOfIRExpr(env->type_env,e);
1366 vassert(ty == Ity_I64 || ty == Ity_I32);
1367
1368 /* special case: immediate */
1369 if (e->tag == Iex_Const) {
1370 ARM64RIL* maybe = NULL;
1371 if (ty == Ity_I64) {
1372 vassert(e->Iex.Const.con->tag == Ico_U64);
1373 maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64);
1374 } else {
1375 vassert(ty == Ity_I32);
1376 vassert(e->Iex.Const.con->tag == Ico_U32);
1377 UInt u32 = e->Iex.Const.con->Ico.U32;
1378 ULong u64 = (ULong)u32;
1379 /* First try with 32 leading zeroes. */
1380 maybe = mb_mkARM64RIL_I(u64);
1381 /* If that doesn't work, try with 2 copies, since it doesn't
1382 matter what winds up in the upper 32 bits. */
1383 if (!maybe) {
1384 maybe = mb_mkARM64RIL_I((u64 << 32) | u64);
1385 }
1386 }
1387 if (maybe) return maybe;
1388 /* else fail, fall through to default case */
1389 }
1390
1391 /* default case: calculate into a register and return that */
1392 {
1393 HReg r = iselIntExpr_R ( env, e );
1394 return ARM64RIL_R(r);
1395 }
1396 }
1397
1398
1399 /* --------------------- RI6 --------------------- */
1400
1401 /* Select instructions to generate 'e' into a RI6. */
1402
iselIntExpr_RI6(ISelEnv * env,IRExpr * e)1403 static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e )
1404 {
1405 ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e);
1406 /* sanity checks ... */
1407 switch (ri->tag) {
1408 case ARM64ri6_I6:
1409 vassert(ri->ARM64ri6.I6.imm6 < 64);
1410 vassert(ri->ARM64ri6.I6.imm6 > 0);
1411 return ri;
1412 case ARM64ri6_R:
1413 vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64);
1414 vassert(hregIsVirtual(ri->ARM64ri6.R.reg));
1415 return ri;
1416 default:
1417 vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
1418 }
1419 }
1420
1421 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_RI6_wrk(ISelEnv * env,IRExpr * e)1422 static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e )
1423 {
1424 IRType ty = typeOfIRExpr(env->type_env,e);
1425 vassert(ty == Ity_I64 || ty == Ity_I8);
1426
1427 /* special case: immediate */
1428 if (e->tag == Iex_Const) {
1429 switch (e->Iex.Const.con->tag) {
1430 case Ico_U8: {
1431 UInt u = e->Iex.Const.con->Ico.U8;
1432 if (u > 0 && u < 64)
1433 return ARM64RI6_I6(u);
1434 break;
1435 default:
1436 break;
1437 }
1438 }
1439 /* else fail, fall through to default case */
1440 }
1441
1442 /* default case: calculate into a register and return that */
1443 {
1444 HReg r = iselIntExpr_R ( env, e );
1445 return ARM64RI6_R(r);
1446 }
1447 }
1448
1449
1450 /* ------------------- CondCode ------------------- */
1451
1452 /* Generate code to evaluated a bit-typed expression, returning the
1453 condition code which would correspond when the expression would
1454 notionally have returned 1. */
1455
iselCondCode(ISelEnv * env,IRExpr * e)1456 static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1457 {
1458 ARM64CondCode cc = iselCondCode_wrk(env,e);
1459 vassert(cc != ARM64cc_NV);
1460 return cc;
1461 }
1462
iselCondCode_wrk(ISelEnv * env,IRExpr * e)1463 static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1464 {
1465 vassert(e);
1466 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1467
1468 /* var */
1469 if (e->tag == Iex_RdTmp) {
1470 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1471 /* Cmp doesn't modify rTmp; so this is OK. */
1472 ARM64RIL* one = mb_mkARM64RIL_I(1);
1473 vassert(one);
1474 addInstr(env, ARM64Instr_Test(rTmp, one));
1475 return ARM64cc_NE;
1476 }
1477
1478 /* Not1(e) */
1479 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1480 /* Generate code for the arg, and negate the test condition */
1481 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1482 if (cc == ARM64cc_AL || cc == ARM64cc_NV) {
1483 return ARM64cc_AL;
1484 } else {
1485 return 1 ^ cc;
1486 }
1487 }
1488
1489 /* --- patterns rooted at: 64to1 --- */
1490
1491 if (e->tag == Iex_Unop
1492 && e->Iex.Unop.op == Iop_64to1) {
1493 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1494 ARM64RIL* one = mb_mkARM64RIL_I(1);
1495 vassert(one); /* '1' must be representable */
1496 addInstr(env, ARM64Instr_Test(rTmp, one));
1497 return ARM64cc_NE;
1498 }
1499
1500 /* --- patterns rooted at: CmpNEZ8 --- */
1501
1502 if (e->tag == Iex_Unop
1503 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1504 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1505 ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF);
1506 addInstr(env, ARM64Instr_Test(r1, xFF));
1507 return ARM64cc_NE;
1508 }
1509
1510 /* --- patterns rooted at: CmpNEZ64 --- */
1511
1512 if (e->tag == Iex_Unop
1513 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1514 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1515 ARM64RIA* zero = ARM64RIA_I12(0,0);
1516 addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/));
1517 return ARM64cc_NE;
1518 }
1519
1520 /* --- patterns rooted at: CmpNEZ32 --- */
1521
1522 if (e->tag == Iex_Unop
1523 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1524 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1525 ARM64RIA* zero = ARM64RIA_I12(0,0);
1526 addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/));
1527 return ARM64cc_NE;
1528 }
1529
1530 /* --- Cmp*64*(x,y) --- */
1531 if (e->tag == Iex_Binop
1532 && (e->Iex.Binop.op == Iop_CmpEQ64
1533 || e->Iex.Binop.op == Iop_CmpNE64
1534 || e->Iex.Binop.op == Iop_CmpLT64S
1535 || e->Iex.Binop.op == Iop_CmpLT64U
1536 || e->Iex.Binop.op == Iop_CmpLE64S
1537 || e->Iex.Binop.op == Iop_CmpLE64U)) {
1538 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1539 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1540 addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
1541 switch (e->Iex.Binop.op) {
1542 case Iop_CmpEQ64: return ARM64cc_EQ;
1543 case Iop_CmpNE64: return ARM64cc_NE;
1544 case Iop_CmpLT64S: return ARM64cc_LT;
1545 case Iop_CmpLT64U: return ARM64cc_CC;
1546 case Iop_CmpLE64S: return ARM64cc_LE;
1547 case Iop_CmpLE64U: return ARM64cc_LS;
1548 default: vpanic("iselCondCode(arm64): CmpXX64");
1549 }
1550 }
1551
1552 /* --- Cmp*32*(x,y) --- */
1553 if (e->tag == Iex_Binop
1554 && (e->Iex.Binop.op == Iop_CmpEQ32
1555 || e->Iex.Binop.op == Iop_CmpNE32
1556 || e->Iex.Binop.op == Iop_CmpLT32S
1557 || e->Iex.Binop.op == Iop_CmpLT32U
1558 || e->Iex.Binop.op == Iop_CmpLE32S
1559 || e->Iex.Binop.op == Iop_CmpLE32U)) {
1560 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1561 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1562 addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
1563 switch (e->Iex.Binop.op) {
1564 case Iop_CmpEQ32: return ARM64cc_EQ;
1565 case Iop_CmpNE32: return ARM64cc_NE;
1566 case Iop_CmpLT32S: return ARM64cc_LT;
1567 case Iop_CmpLT32U: return ARM64cc_CC;
1568 case Iop_CmpLE32S: return ARM64cc_LE;
1569 case Iop_CmpLE32U: return ARM64cc_LS;
1570 default: vpanic("iselCondCode(arm64): CmpXX32");
1571 }
1572 }
1573
1574 //ZZ /* const */
1575 //ZZ /* Constant 1:Bit */
1576 //ZZ if (e->tag == Iex_Const) {
1577 //ZZ HReg r;
1578 //ZZ vassert(e->Iex.Const.con->tag == Ico_U1);
1579 //ZZ vassert(e->Iex.Const.con->Ico.U1 == True
1580 //ZZ || e->Iex.Const.con->Ico.U1 == False);
1581 //ZZ r = newVRegI(env);
1582 //ZZ addInstr(env, ARMInstr_Imm32(r, 0));
1583 //ZZ addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r)));
1584 //ZZ return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE;
1585 //ZZ }
1586 //ZZ
1587 //ZZ // JRS 2013-Jan-03: this seems completely nonsensical
1588 //ZZ /* --- CasCmpEQ* --- */
1589 //ZZ /* Ist_Cas has a dummy argument to compare with, so comparison is
1590 //ZZ always true. */
1591 //ZZ //if (e->tag == Iex_Binop
1592 //ZZ // && (e->Iex.Binop.op == Iop_CasCmpEQ32
1593 //ZZ // || e->Iex.Binop.op == Iop_CasCmpEQ16
1594 //ZZ // || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1595 //ZZ // return ARMcc_AL;
1596 //ZZ //}
1597
1598 ppIRExpr(e);
1599 vpanic("iselCondCode");
1600 }
1601
1602
1603 /* --------------------- Reg --------------------- */
1604
iselIntExpr_R(ISelEnv * env,IRExpr * e)1605 static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1606 {
1607 HReg r = iselIntExpr_R_wrk(env, e);
1608 /* sanity checks ... */
1609 # if 0
1610 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1611 # endif
1612 vassert(hregClass(r) == HRcInt64);
1613 vassert(hregIsVirtual(r));
1614 return r;
1615 }
1616
1617 /* DO NOT CALL THIS DIRECTLY ! */
iselIntExpr_R_wrk(ISelEnv * env,IRExpr * e)1618 static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1619 {
1620 IRType ty = typeOfIRExpr(env->type_env,e);
1621 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1622
1623 switch (e->tag) {
1624
1625 /* --------- TEMP --------- */
1626 case Iex_RdTmp: {
1627 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1628 }
1629
1630 /* --------- LOAD --------- */
1631 case Iex_Load: {
1632 HReg dst = newVRegI(env);
1633
1634 if (e->Iex.Load.end != Iend_LE)
1635 goto irreducible;
1636
1637 if (ty == Ity_I64) {
1638 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1639 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode));
1640 return dst;
1641 }
1642 if (ty == Ity_I32) {
1643 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1644 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode));
1645 return dst;
1646 }
1647 if (ty == Ity_I16) {
1648 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1649 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode));
1650 return dst;
1651 }
1652 if (ty == Ity_I8) {
1653 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1654 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode));
1655 return dst;
1656 }
1657 break;
1658 }
1659
1660 /* --------- BINARY OP --------- */
1661 case Iex_Binop: {
1662
1663 ARM64LogicOp lop = 0; /* invalid */
1664 ARM64ShiftOp sop = 0; /* invalid */
1665
1666 /* Special-case 0-x into a Neg instruction. Not because it's
1667 particularly useful but more so as to give value flow using
1668 this instruction, so as to check its assembly correctness for
1669 implementation of Left32/Left64. */
1670 switch (e->Iex.Binop.op) {
1671 case Iop_Sub64:
1672 if (isZeroU64(e->Iex.Binop.arg1)) {
1673 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1674 HReg dst = newVRegI(env);
1675 addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG));
1676 return dst;
1677 }
1678 break;
1679 default:
1680 break;
1681 }
1682
1683 /* ADD/SUB */
1684 switch (e->Iex.Binop.op) {
1685 case Iop_Add64: case Iop_Add32:
1686 case Iop_Sub64: case Iop_Sub32: {
1687 Bool isAdd = e->Iex.Binop.op == Iop_Add64
1688 || e->Iex.Binop.op == Iop_Add32;
1689 HReg dst = newVRegI(env);
1690 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1691 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1692 addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd));
1693 return dst;
1694 }
1695 default:
1696 break;
1697 }
1698
1699 /* AND/OR/XOR */
1700 switch (e->Iex.Binop.op) {
1701 case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
1702 case Iop_Or64: case Iop_Or32: lop = ARM64lo_OR; goto log_binop;
1703 case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop;
1704 log_binop: {
1705 HReg dst = newVRegI(env);
1706 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1707 ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2);
1708 addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop));
1709 return dst;
1710 }
1711 default:
1712 break;
1713 }
1714
1715 /* SHL/SHR/SAR */
1716 switch (e->Iex.Binop.op) {
1717 case Iop_Shr64: sop = ARM64sh_SHR; goto sh_binop;
1718 case Iop_Sar64: sop = ARM64sh_SAR; goto sh_binop;
1719 case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop;
1720 sh_binop: {
1721 HReg dst = newVRegI(env);
1722 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1723 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1724 addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop));
1725 return dst;
1726 }
1727 case Iop_Shr32:
1728 case Iop_Sar32: {
1729 Bool zx = e->Iex.Binop.op == Iop_Shr32;
1730 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1731 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1732 HReg dst = zx ? widen_z_32_to_64(env, argL)
1733 : widen_s_32_to_64(env, argL);
1734 addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR));
1735 return dst;
1736 }
1737 default: break;
1738 }
1739
1740 /* MUL */
1741 if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) {
1742 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1743 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1744 HReg dst = newVRegI(env);
1745 addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN));
1746 return dst;
1747 }
1748
1749 /* MULL */
1750 if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
1751 Bool isS = e->Iex.Binop.op == Iop_MullS32;
1752 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1753 HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL);
1754 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1755 HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR);
1756 HReg dst = newVRegI(env);
1757 addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN));
1758 return dst;
1759 }
1760
1761 /* Handle misc other ops. */
1762
1763 if (e->Iex.Binop.op == Iop_Max32U) {
1764 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1765 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1766 HReg dst = newVRegI(env);
1767 addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/));
1768 addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS));
1769 return dst;
1770 }
1771
1772 if (e->Iex.Binop.op == Iop_32HLto64) {
1773 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1774 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1775 HReg lo32 = widen_z_32_to_64(env, lo32s);
1776 HReg hi32 = newVRegI(env);
1777 addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32),
1778 ARM64sh_SHL));
1779 addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32),
1780 ARM64lo_OR));
1781 return hi32;
1782 }
1783
1784 if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) {
1785 Bool isD = e->Iex.Binop.op == Iop_CmpF64;
1786 HReg dL = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1);
1787 HReg dR = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2);
1788 HReg dst = newVRegI(env);
1789 HReg imm = newVRegI(env);
1790 /* Do the compare (FCMP), which sets NZCV in PSTATE. Then
1791 create in dst, the IRCmpF64Result encoded result. */
1792 addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR));
1793 addInstr(env, ARM64Instr_Imm64(dst, 0));
1794 addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ
1795 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ));
1796 addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT
1797 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI));
1798 addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT
1799 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT));
1800 addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN
1801 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS));
1802 return dst;
1803 }
1804
1805 { /* local scope */
1806 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
1807 Bool srcIsD = False;
1808 switch (e->Iex.Binop.op) {
1809 case Iop_F64toI64S:
1810 cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break;
1811 case Iop_F64toI64U:
1812 cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break;
1813 case Iop_F64toI32S:
1814 cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break;
1815 case Iop_F64toI32U:
1816 cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
1817 case Iop_F32toI32S:
1818 cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
1819 case Iop_F32toI32U:
1820 cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break;
1821 case Iop_F32toI64S:
1822 cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break;
1823 case Iop_F32toI64U:
1824 cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
1825 default:
1826 break;
1827 }
1828 if (cvt_op != ARM64cvt_INVALID) {
1829 /* This is all a bit dodgy, because we can't handle a
1830 non-constant (not-known-at-JIT-time) rounding mode
1831 indication. That's because there's no instruction
1832 AFAICS that does this conversion but rounds according to
1833 FPCR.RM, so we have to bake the rounding mode into the
1834 instruction right now. But that should be OK because
1835 (1) the front end attaches a literal Irrm_ value to the
1836 conversion binop, and (2) iropt will never float that
1837 off via CSE, into a literal. Hence we should always
1838 have an Irrm_ value as the first arg. */
1839 IRExpr* arg1 = e->Iex.Binop.arg1;
1840 if (arg1->tag != Iex_Const) goto irreducible;
1841 IRConst* arg1con = arg1->Iex.Const.con;
1842 vassert(arg1con->tag == Ico_U32); // else ill-typed IR
1843 UInt irrm = arg1con->Ico.U32;
1844 /* Find the ARM-encoded equivalent for |irrm|. */
1845 UInt armrm = 4; /* impossible */
1846 switch (irrm) {
1847 case Irrm_NEAREST: armrm = 0; break;
1848 case Irrm_NegINF: armrm = 2; break;
1849 case Irrm_PosINF: armrm = 1; break;
1850 case Irrm_ZERO: armrm = 3; break;
1851 default: goto irreducible;
1852 }
1853 HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
1854 (env, e->Iex.Binop.arg2);
1855 HReg dst = newVRegI(env);
1856 addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm));
1857 return dst;
1858 }
1859 } /* local scope */
1860
1861 //ZZ if (e->Iex.Binop.op == Iop_GetElem8x8
1862 //ZZ || e->Iex.Binop.op == Iop_GetElem16x4
1863 //ZZ || e->Iex.Binop.op == Iop_GetElem32x2) {
1864 //ZZ HReg res = newVRegI(env);
1865 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
1866 //ZZ UInt index, size;
1867 //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
1868 //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1869 //ZZ vpanic("ARM target supports GetElem with constant "
1870 //ZZ "second argument only\n");
1871 //ZZ }
1872 //ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1873 //ZZ switch (e->Iex.Binop.op) {
1874 //ZZ case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1875 //ZZ case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1876 //ZZ case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1877 //ZZ default: vassert(0);
1878 //ZZ }
1879 //ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1880 //ZZ mkARMNRS(ARMNRS_Reg, res, 0),
1881 //ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
1882 //ZZ size, False));
1883 //ZZ return res;
1884 //ZZ }
1885 //ZZ
1886 //ZZ if (e->Iex.Binop.op == Iop_GetElem8x16
1887 //ZZ || e->Iex.Binop.op == Iop_GetElem16x8
1888 //ZZ || e->Iex.Binop.op == Iop_GetElem32x4) {
1889 //ZZ HReg res = newVRegI(env);
1890 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
1891 //ZZ UInt index, size;
1892 //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
1893 //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1894 //ZZ vpanic("ARM target supports GetElem with constant "
1895 //ZZ "second argument only\n");
1896 //ZZ }
1897 //ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1898 //ZZ switch (e->Iex.Binop.op) {
1899 //ZZ case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1900 //ZZ case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1901 //ZZ case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1902 //ZZ default: vassert(0);
1903 //ZZ }
1904 //ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1905 //ZZ mkARMNRS(ARMNRS_Reg, res, 0),
1906 //ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
1907 //ZZ size, True));
1908 //ZZ return res;
1909 //ZZ }
1910
1911 /* All cases involving host-side helper calls. */
1912 void* fn = NULL;
1913 switch (e->Iex.Binop.op) {
1914 //ZZ case Iop_Add16x2:
1915 //ZZ fn = &h_generic_calc_Add16x2; break;
1916 //ZZ case Iop_Sub16x2:
1917 //ZZ fn = &h_generic_calc_Sub16x2; break;
1918 //ZZ case Iop_HAdd16Ux2:
1919 //ZZ fn = &h_generic_calc_HAdd16Ux2; break;
1920 //ZZ case Iop_HAdd16Sx2:
1921 //ZZ fn = &h_generic_calc_HAdd16Sx2; break;
1922 //ZZ case Iop_HSub16Ux2:
1923 //ZZ fn = &h_generic_calc_HSub16Ux2; break;
1924 //ZZ case Iop_HSub16Sx2:
1925 //ZZ fn = &h_generic_calc_HSub16Sx2; break;
1926 //ZZ case Iop_QAdd16Sx2:
1927 //ZZ fn = &h_generic_calc_QAdd16Sx2; break;
1928 //ZZ case Iop_QAdd16Ux2:
1929 //ZZ fn = &h_generic_calc_QAdd16Ux2; break;
1930 //ZZ case Iop_QSub16Sx2:
1931 //ZZ fn = &h_generic_calc_QSub16Sx2; break;
1932 //ZZ case Iop_Add8x4:
1933 //ZZ fn = &h_generic_calc_Add8x4; break;
1934 //ZZ case Iop_Sub8x4:
1935 //ZZ fn = &h_generic_calc_Sub8x4; break;
1936 //ZZ case Iop_HAdd8Ux4:
1937 //ZZ fn = &h_generic_calc_HAdd8Ux4; break;
1938 //ZZ case Iop_HAdd8Sx4:
1939 //ZZ fn = &h_generic_calc_HAdd8Sx4; break;
1940 //ZZ case Iop_HSub8Ux4:
1941 //ZZ fn = &h_generic_calc_HSub8Ux4; break;
1942 //ZZ case Iop_HSub8Sx4:
1943 //ZZ fn = &h_generic_calc_HSub8Sx4; break;
1944 //ZZ case Iop_QAdd8Sx4:
1945 //ZZ fn = &h_generic_calc_QAdd8Sx4; break;
1946 //ZZ case Iop_QAdd8Ux4:
1947 //ZZ fn = &h_generic_calc_QAdd8Ux4; break;
1948 //ZZ case Iop_QSub8Sx4:
1949 //ZZ fn = &h_generic_calc_QSub8Sx4; break;
1950 //ZZ case Iop_QSub8Ux4:
1951 //ZZ fn = &h_generic_calc_QSub8Ux4; break;
1952 //ZZ case Iop_Sad8Ux4:
1953 //ZZ fn = &h_generic_calc_Sad8Ux4; break;
1954 //ZZ case Iop_QAdd32S:
1955 //ZZ fn = &h_generic_calc_QAdd32S; break;
1956 //ZZ case Iop_QSub32S:
1957 //ZZ fn = &h_generic_calc_QSub32S; break;
1958 //ZZ case Iop_QSub16Ux2:
1959 //ZZ fn = &h_generic_calc_QSub16Ux2; break;
1960 case Iop_DivU32:
1961 fn = &h_calc_udiv32_w_arm_semantics; break;
1962 case Iop_DivS32:
1963 fn = &h_calc_sdiv32_w_arm_semantics; break;
1964 case Iop_DivU64:
1965 fn = &h_calc_udiv64_w_arm_semantics; break;
1966 case Iop_DivS64:
1967 fn = &h_calc_sdiv64_w_arm_semantics; break;
1968 default:
1969 break;
1970 }
1971
1972 if (fn) {
1973 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1974 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1975 HReg res = newVRegI(env);
1976 addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL));
1977 addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR));
1978 addInstr(env, ARM64Instr_Call( ARM64cc_AL, (HWord)Ptr_to_ULong(fn),
1979 2, mk_RetLoc_simple(RLPri_Int) ));
1980 addInstr(env, ARM64Instr_MovI(res, hregARM64_X0()));
1981 return res;
1982 }
1983
1984 break;
1985 }
1986
1987 /* --------- UNARY OP --------- */
1988 case Iex_Unop: {
1989
1990 switch (e->Iex.Unop.op) {
1991 case Iop_16Uto64: {
1992 /* This probably doesn't occur often enough to be worth
1993 rolling the extension into the load. */
1994 IRExpr* arg = e->Iex.Unop.arg;
1995 HReg src = iselIntExpr_R(env, arg);
1996 HReg dst = widen_z_16_to_64(env, src);
1997 return dst;
1998 }
1999 case Iop_32Uto64: {
2000 IRExpr* arg = e->Iex.Unop.arg;
2001 if (arg->tag == Iex_Load) {
2002 /* This correctly zero extends because _LdSt32 is
2003 defined to do a zero extending load. */
2004 HReg dst = newVRegI(env);
2005 ARM64AMode* am
2006 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32);
2007 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
2008 return dst;
2009 }
2010 /* else be lame and mask it */
2011 HReg src = iselIntExpr_R(env, arg);
2012 HReg dst = widen_z_32_to_64(env, src);
2013 return dst;
2014 }
2015 case Iop_8Uto32: /* Just freeload on the 8Uto64 case */
2016 case Iop_8Uto64: {
2017 IRExpr* arg = e->Iex.Unop.arg;
2018 if (arg->tag == Iex_Load) {
2019 /* This correctly zero extends because _LdSt8 is
2020 defined to do a zero extending load. */
2021 HReg dst = newVRegI(env);
2022 ARM64AMode* am
2023 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8);
2024 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
2025 return dst;
2026 }
2027 /* else be lame and mask it */
2028 HReg src = iselIntExpr_R(env, arg);
2029 HReg dst = widen_z_8_to_64(env, src);
2030 return dst;
2031 }
2032 case Iop_128HIto64: {
2033 HReg rHi, rLo;
2034 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2035 return rHi; /* and abandon rLo */
2036 }
2037 case Iop_8Sto32: case Iop_8Sto64: {
2038 IRExpr* arg = e->Iex.Unop.arg;
2039 HReg src = iselIntExpr_R(env, arg);
2040 HReg dst = widen_s_8_to_64(env, src);
2041 return dst;
2042 }
2043 case Iop_16Sto32: case Iop_16Sto64: {
2044 IRExpr* arg = e->Iex.Unop.arg;
2045 HReg src = iselIntExpr_R(env, arg);
2046 HReg dst = widen_s_16_to_64(env, src);
2047 return dst;
2048 }
2049 case Iop_32Sto64: {
2050 IRExpr* arg = e->Iex.Unop.arg;
2051 HReg src = iselIntExpr_R(env, arg);
2052 HReg dst = widen_s_32_to_64(env, src);
2053 return dst;
2054 }
2055 case Iop_Not32:
2056 case Iop_Not64: {
2057 HReg dst = newVRegI(env);
2058 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2059 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT));
2060 return dst;
2061 }
2062 case Iop_Clz64: {
2063 HReg dst = newVRegI(env);
2064 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2065 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ));
2066 return dst;
2067 }
2068 case Iop_Left32:
2069 case Iop_Left64: {
2070 /* Left64(src) = src | -src. Left32 can use the same
2071 implementation since in that case we don't care what
2072 the upper 32 bits become. */
2073 HReg dst = newVRegI(env);
2074 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2075 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2076 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2077 ARM64lo_OR));
2078 return dst;
2079 }
2080 case Iop_CmpwNEZ64: {
2081 /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
2082 = Left64(src) >>s 63 */
2083 HReg dst = newVRegI(env);
2084 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2085 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2086 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2087 ARM64lo_OR));
2088 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2089 ARM64sh_SAR));
2090 return dst;
2091 }
2092 case Iop_CmpwNEZ32: {
2093 /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
2094 = Left64(src & 0xFFFFFFFF) >>s 63 */
2095 HReg dst = newVRegI(env);
2096 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
2097 HReg src = widen_z_32_to_64(env, pre);
2098 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2099 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2100 ARM64lo_OR));
2101 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2102 ARM64sh_SAR));
2103 return dst;
2104 }
2105 case Iop_V128to64: case Iop_V128HIto64: {
2106 HReg dst = newVRegI(env);
2107 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2108 UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0;
2109 addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
2110 return dst;
2111 }
2112 case Iop_1Sto32:
2113 case Iop_1Sto64: {
2114 /* As with the iselStmt case for 'tmp:I1 = expr', we could
2115 do a lot better here if it ever became necessary. */
2116 HReg zero = newVRegI(env);
2117 HReg one = newVRegI(env);
2118 HReg dst = newVRegI(env);
2119 addInstr(env, ARM64Instr_Imm64(zero, 0));
2120 addInstr(env, ARM64Instr_Imm64(one, 1));
2121 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
2122 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
2123 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2124 ARM64sh_SHL));
2125 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2126 ARM64sh_SAR));
2127 return dst;
2128 }
2129 case Iop_NarrowUn16to8x8:
2130 case Iop_NarrowUn32to16x4:
2131 case Iop_NarrowUn64to32x2: {
2132 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2133 HReg tmp = newVRegV(env);
2134 HReg dst = newVRegI(env);
2135 UInt dszBlg2 = 3; /* illegal */
2136 switch (e->Iex.Unop.op) {
2137 case Iop_NarrowUn16to8x8: dszBlg2 = 0; break; // 16to8_x8
2138 case Iop_NarrowUn32to16x4: dszBlg2 = 1; break; // 32to16_x4
2139 case Iop_NarrowUn64to32x2: dszBlg2 = 2; break; // 64to32_x2
2140 default: vassert(0);
2141 }
2142 addInstr(env, ARM64Instr_VNarrowV(dszBlg2, tmp, src));
2143 addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/));
2144 return dst;
2145 }
2146 //ZZ case Iop_64HIto32: {
2147 //ZZ HReg rHi, rLo;
2148 //ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2149 //ZZ return rHi; /* and abandon rLo .. poor wee thing :-) */
2150 //ZZ }
2151 //ZZ case Iop_64to32: {
2152 //ZZ HReg rHi, rLo;
2153 //ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2154 //ZZ return rLo; /* similar stupid comment to the above ... */
2155 //ZZ }
2156 //ZZ case Iop_64to8: {
2157 //ZZ HReg rHi, rLo;
2158 //ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2159 //ZZ HReg tHi = newVRegI(env);
2160 //ZZ HReg tLo = newVRegI(env);
2161 //ZZ HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
2162 //ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2163 //ZZ rHi = tHi;
2164 //ZZ rLo = tLo;
2165 //ZZ } else {
2166 //ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2167 //ZZ }
2168 //ZZ return rLo;
2169 //ZZ }
2170
2171 case Iop_1Uto64: {
2172 /* 1Uto64(tmp). */
2173 HReg dst = newVRegI(env);
2174 if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
2175 ARM64RIL* one = mb_mkARM64RIL_I(1);
2176 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
2177 vassert(one);
2178 addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND));
2179 } else {
2180 /* CLONE-01 */
2181 HReg zero = newVRegI(env);
2182 HReg one = newVRegI(env);
2183 addInstr(env, ARM64Instr_Imm64(zero, 0));
2184 addInstr(env, ARM64Instr_Imm64(one, 1));
2185 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
2186 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
2187 }
2188 return dst;
2189 }
2190 //ZZ case Iop_1Uto8: {
2191 //ZZ HReg dst = newVRegI(env);
2192 //ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2193 //ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2194 //ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2195 //ZZ return dst;
2196 //ZZ }
2197 //ZZ
2198 //ZZ case Iop_1Sto32: {
2199 //ZZ HReg dst = newVRegI(env);
2200 //ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2201 //ZZ ARMRI5* amt = ARMRI5_I5(31);
2202 //ZZ /* This is really rough. We could do much better here;
2203 //ZZ perhaps mvn{cond} dst, #0 as the second insn?
2204 //ZZ (same applies to 1Sto64) */
2205 //ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2206 //ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2207 //ZZ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2208 //ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2209 //ZZ return dst;
2210 //ZZ }
2211 //ZZ
2212 //ZZ case Iop_Clz32: {
2213 //ZZ /* Count leading zeroes; easy on ARM. */
2214 //ZZ HReg dst = newVRegI(env);
2215 //ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2216 //ZZ addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
2217 //ZZ return dst;
2218 //ZZ }
2219 //ZZ
2220 //ZZ case Iop_CmpwNEZ32: {
2221 //ZZ HReg dst = newVRegI(env);
2222 //ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2223 //ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
2224 //ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
2225 //ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
2226 //ZZ return dst;
2227 //ZZ }
2228 //ZZ
2229 //ZZ case Iop_ReinterpF32asI32: {
2230 //ZZ HReg dst = newVRegI(env);
2231 //ZZ HReg src = iselFltExpr(env, e->Iex.Unop.arg);
2232 //ZZ addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
2233 //ZZ return dst;
2234 //ZZ }
2235
2236 case Iop_64to32:
2237 case Iop_64to16:
2238 case Iop_64to8:
2239 /* These are no-ops. */
2240 return iselIntExpr_R(env, e->Iex.Unop.arg);
2241
2242 default:
2243 break;
2244 }
2245
2246 //ZZ /* All Unop cases involving host-side helper calls. */
2247 //ZZ void* fn = NULL;
2248 //ZZ switch (e->Iex.Unop.op) {
2249 //ZZ case Iop_CmpNEZ16x2:
2250 //ZZ fn = &h_generic_calc_CmpNEZ16x2; break;
2251 //ZZ case Iop_CmpNEZ8x4:
2252 //ZZ fn = &h_generic_calc_CmpNEZ8x4; break;
2253 //ZZ default:
2254 //ZZ break;
2255 //ZZ }
2256 //ZZ
2257 //ZZ if (fn) {
2258 //ZZ HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2259 //ZZ HReg res = newVRegI(env);
2260 //ZZ addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
2261 //ZZ addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn),
2262 //ZZ 1, RetLocInt ));
2263 //ZZ addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
2264 //ZZ return res;
2265 //ZZ }
2266
2267 break;
2268 }
2269
2270 /* --------- GET --------- */
2271 case Iex_Get: {
2272 if (ty == Ity_I64
2273 && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) {
2274 HReg dst = newVRegI(env);
2275 ARM64AMode* am
2276 = mk_baseblock_64bit_access_amode(e->Iex.Get.offset);
2277 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am));
2278 return dst;
2279 }
2280 if (ty == Ity_I32
2281 && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) {
2282 HReg dst = newVRegI(env);
2283 ARM64AMode* am
2284 = mk_baseblock_32bit_access_amode(e->Iex.Get.offset);
2285 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
2286 return dst;
2287 }
2288 if (ty == Ity_I16
2289 && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) {
2290 HReg dst = newVRegI(env);
2291 ARM64AMode* am
2292 = mk_baseblock_16bit_access_amode(e->Iex.Get.offset);
2293 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am));
2294 return dst;
2295 }
2296 if (ty == Ity_I8
2297 /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) {
2298 HReg dst = newVRegI(env);
2299 ARM64AMode* am
2300 = mk_baseblock_8bit_access_amode(e->Iex.Get.offset);
2301 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
2302 return dst;
2303 }
2304 break;
2305 }
2306
2307 /* --------- CCALL --------- */
2308 case Iex_CCall: {
2309 HReg dst = newVRegI(env);
2310 vassert(ty == e->Iex.CCall.retty);
2311
2312 /* be very restrictive for now. Only 64-bit ints allowed for
2313 args, and 64 bits for return type. Don't forget to change
2314 the RetLoc if more types are allowed in future. */
2315 if (e->Iex.CCall.retty != Ity_I64)
2316 goto irreducible;
2317
2318 /* Marshal args, do the call, clear stack. */
2319 UInt addToSp = 0;
2320 RetLoc rloc = mk_RetLoc_INVALID();
2321 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2322 e->Iex.CCall.cee, e->Iex.CCall.retty,
2323 e->Iex.CCall.args );
2324 /* */
2325 if (ok) {
2326 vassert(is_sane_RetLoc(rloc));
2327 vassert(rloc.pri == RLPri_Int);
2328 vassert(addToSp == 0);
2329 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()));
2330 return dst;
2331 }
2332 /* else fall through; will hit the irreducible: label */
2333 }
2334
2335 /* --------- LITERAL --------- */
2336 /* 64-bit literals */
2337 case Iex_Const: {
2338 ULong u = 0;
2339 HReg dst = newVRegI(env);
2340 switch (e->Iex.Const.con->tag) {
2341 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
2342 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2343 case Ico_U16: u = e->Iex.Const.con->Ico.U16; break;
2344 case Ico_U8: u = e->Iex.Const.con->Ico.U8; break;
2345 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)");
2346 }
2347 addInstr(env, ARM64Instr_Imm64(dst, u));
2348 return dst;
2349 }
2350
2351 /* --------- MULTIPLEX --------- */
2352 case Iex_ITE: {
2353 /* ITE(ccexpr, iftrue, iffalse) */
2354 if (ty == Ity_I64 || ty == Ity_I32) {
2355 ARM64CondCode cc;
2356 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2357 HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse);
2358 HReg dst = newVRegI(env);
2359 cc = iselCondCode(env, e->Iex.ITE.cond);
2360 addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc));
2361 return dst;
2362 }
2363 break;
2364 }
2365
2366 default:
2367 break;
2368 } /* switch (e->tag) */
2369
2370 /* We get here if no pattern matched. */
2371 irreducible:
2372 ppIRExpr(e);
2373 vpanic("iselIntExpr_R: cannot reduce tree");
2374 }
2375
2376
2377 /*---------------------------------------------------------*/
2378 /*--- ISEL: Integer expressions (128 bit) ---*/
2379 /*---------------------------------------------------------*/
2380
2381 /* Compute a 128-bit value into a register pair, which is returned as
2382 the first two parameters. As with iselIntExpr_R, these may be
2383 either real or virtual regs; in any case they must not be changed
2384 by subsequent code emitted by the caller. */
2385
iselInt128Expr(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)2386 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2387 ISelEnv* env, IRExpr* e )
2388 {
2389 iselInt128Expr_wrk(rHi, rLo, env, e);
2390 # if 0
2391 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2392 # endif
2393 vassert(hregClass(*rHi) == HRcInt64);
2394 vassert(hregIsVirtual(*rHi));
2395 vassert(hregClass(*rLo) == HRcInt64);
2396 vassert(hregIsVirtual(*rLo));
2397 }
2398
2399 /* DO NOT CALL THIS DIRECTLY ! */
iselInt128Expr_wrk(HReg * rHi,HReg * rLo,ISelEnv * env,IRExpr * e)2400 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2401 ISelEnv* env, IRExpr* e )
2402 {
2403 vassert(e);
2404 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2405
2406 /* --------- BINARY ops --------- */
2407 if (e->tag == Iex_Binop) {
2408 switch (e->Iex.Binop.op) {
2409 /* 64 x 64 -> 128 multiply */
2410 case Iop_MullU64:
2411 case Iop_MullS64: {
2412 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2413 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2414 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2415 HReg dstLo = newVRegI(env);
2416 HReg dstHi = newVRegI(env);
2417 addInstr(env, ARM64Instr_Mul(dstLo, argL, argR,
2418 ARM64mul_PLAIN));
2419 addInstr(env, ARM64Instr_Mul(dstHi, argL, argR,
2420 syned ? ARM64mul_SX : ARM64mul_ZX));
2421 *rHi = dstHi;
2422 *rLo = dstLo;
2423 return;
2424 }
2425 /* 64HLto128(e1,e2) */
2426 case Iop_64HLto128:
2427 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2428 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2429 return;
2430 default:
2431 break;
2432 }
2433 } /* if (e->tag == Iex_Binop) */
2434
2435 ppIRExpr(e);
2436 vpanic("iselInt128Expr(arm64)");
2437 }
2438
2439
2440 //ZZ /* -------------------- 64-bit -------------------- */
2441 //ZZ
2442 //ZZ /* Compute a 64-bit value into a register pair, which is returned as
2443 //ZZ the first two parameters. As with iselIntExpr_R, these may be
2444 //ZZ either real or virtual regs; in any case they must not be changed
2445 //ZZ by subsequent code emitted by the caller. */
2446 //ZZ
2447 //ZZ static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
2448 //ZZ {
2449 //ZZ iselInt64Expr_wrk(rHi, rLo, env, e);
2450 //ZZ # if 0
2451 //ZZ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2452 //ZZ # endif
2453 //ZZ vassert(hregClass(*rHi) == HRcInt32);
2454 //ZZ vassert(hregIsVirtual(*rHi));
2455 //ZZ vassert(hregClass(*rLo) == HRcInt32);
2456 //ZZ vassert(hregIsVirtual(*rLo));
2457 //ZZ }
2458 //ZZ
2459 //ZZ /* DO NOT CALL THIS DIRECTLY ! */
2460 //ZZ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
2461 //ZZ {
2462 //ZZ vassert(e);
2463 //ZZ vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
2464 //ZZ
2465 //ZZ /* 64-bit literal */
2466 //ZZ if (e->tag == Iex_Const) {
2467 //ZZ ULong w64 = e->Iex.Const.con->Ico.U64;
2468 //ZZ UInt wHi = toUInt(w64 >> 32);
2469 //ZZ UInt wLo = toUInt(w64);
2470 //ZZ HReg tHi = newVRegI(env);
2471 //ZZ HReg tLo = newVRegI(env);
2472 //ZZ vassert(e->Iex.Const.con->tag == Ico_U64);
2473 //ZZ addInstr(env, ARMInstr_Imm32(tHi, wHi));
2474 //ZZ addInstr(env, ARMInstr_Imm32(tLo, wLo));
2475 //ZZ *rHi = tHi;
2476 //ZZ *rLo = tLo;
2477 //ZZ return;
2478 //ZZ }
2479 //ZZ
2480 //ZZ /* read 64-bit IRTemp */
2481 //ZZ if (e->tag == Iex_RdTmp) {
2482 //ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2483 //ZZ HReg tHi = newVRegI(env);
2484 //ZZ HReg tLo = newVRegI(env);
2485 //ZZ HReg tmp = iselNeon64Expr(env, e);
2486 //ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2487 //ZZ *rHi = tHi;
2488 //ZZ *rLo = tLo;
2489 //ZZ } else {
2490 //ZZ lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
2491 //ZZ }
2492 //ZZ return;
2493 //ZZ }
2494 //ZZ
2495 //ZZ /* 64-bit load */
2496 //ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2497 //ZZ HReg tLo, tHi, rA;
2498 //ZZ vassert(e->Iex.Load.ty == Ity_I64);
2499 //ZZ rA = iselIntExpr_R(env, e->Iex.Load.addr);
2500 //ZZ tHi = newVRegI(env);
2501 //ZZ tLo = newVRegI(env);
2502 //ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2503 //ZZ tHi, ARMAMode1_RI(rA, 4)));
2504 //ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2505 //ZZ tLo, ARMAMode1_RI(rA, 0)));
2506 //ZZ *rHi = tHi;
2507 //ZZ *rLo = tLo;
2508 //ZZ return;
2509 //ZZ }
2510 //ZZ
2511 //ZZ /* 64-bit GET */
2512 //ZZ if (e->tag == Iex_Get) {
2513 //ZZ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
2514 //ZZ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
2515 //ZZ HReg tHi = newVRegI(env);
2516 //ZZ HReg tLo = newVRegI(env);
2517 //ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4));
2518 //ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0));
2519 //ZZ *rHi = tHi;
2520 //ZZ *rLo = tLo;
2521 //ZZ return;
2522 //ZZ }
2523 //ZZ
2524 //ZZ /* --------- BINARY ops --------- */
2525 //ZZ if (e->tag == Iex_Binop) {
2526 //ZZ switch (e->Iex.Binop.op) {
2527 //ZZ
2528 //ZZ /* 32 x 32 -> 64 multiply */
2529 //ZZ case Iop_MullS32:
2530 //ZZ case Iop_MullU32: {
2531 //ZZ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2532 //ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2533 //ZZ HReg tHi = newVRegI(env);
2534 //ZZ HReg tLo = newVRegI(env);
2535 //ZZ ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32
2536 //ZZ ? ARMmul_SX : ARMmul_ZX;
2537 //ZZ addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
2538 //ZZ addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
2539 //ZZ addInstr(env, ARMInstr_Mul(mop));
2540 //ZZ addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
2541 //ZZ addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
2542 //ZZ *rHi = tHi;
2543 //ZZ *rLo = tLo;
2544 //ZZ return;
2545 //ZZ }
2546 //ZZ
2547 //ZZ case Iop_Or64: {
2548 //ZZ HReg xLo, xHi, yLo, yHi;
2549 //ZZ HReg tHi = newVRegI(env);
2550 //ZZ HReg tLo = newVRegI(env);
2551 //ZZ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2552 //ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2553 //ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
2554 //ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
2555 //ZZ *rHi = tHi;
2556 //ZZ *rLo = tLo;
2557 //ZZ return;
2558 //ZZ }
2559 //ZZ
2560 //ZZ case Iop_Add64: {
2561 //ZZ HReg xLo, xHi, yLo, yHi;
2562 //ZZ HReg tHi = newVRegI(env);
2563 //ZZ HReg tLo = newVRegI(env);
2564 //ZZ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2565 //ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2566 //ZZ addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
2567 //ZZ addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi)));
2568 //ZZ *rHi = tHi;
2569 //ZZ *rLo = tLo;
2570 //ZZ return;
2571 //ZZ }
2572 //ZZ
2573 //ZZ /* 32HLto64(e1,e2) */
2574 //ZZ case Iop_32HLto64: {
2575 //ZZ *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2576 //ZZ *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2577 //ZZ return;
2578 //ZZ }
2579 //ZZ
2580 //ZZ default:
2581 //ZZ break;
2582 //ZZ }
2583 //ZZ }
2584 //ZZ
2585 //ZZ /* --------- UNARY ops --------- */
2586 //ZZ if (e->tag == Iex_Unop) {
2587 //ZZ switch (e->Iex.Unop.op) {
2588 //ZZ
2589 //ZZ /* ReinterpF64asI64 */
2590 //ZZ case Iop_ReinterpF64asI64: {
2591 //ZZ HReg dstHi = newVRegI(env);
2592 //ZZ HReg dstLo = newVRegI(env);
2593 //ZZ HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2594 //ZZ addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
2595 //ZZ *rHi = dstHi;
2596 //ZZ *rLo = dstLo;
2597 //ZZ return;
2598 //ZZ }
2599 //ZZ
2600 //ZZ /* Left64(e) */
2601 //ZZ case Iop_Left64: {
2602 //ZZ HReg yLo, yHi;
2603 //ZZ HReg tHi = newVRegI(env);
2604 //ZZ HReg tLo = newVRegI(env);
2605 //ZZ HReg zero = newVRegI(env);
2606 //ZZ /* yHi:yLo = arg */
2607 //ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2608 //ZZ /* zero = 0 */
2609 //ZZ addInstr(env, ARMInstr_Imm32(zero, 0));
2610 //ZZ /* tLo = 0 - yLo, and set carry */
2611 //ZZ addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
2612 //ZZ tLo, zero, ARMRI84_R(yLo)));
2613 //ZZ /* tHi = 0 - yHi - carry */
2614 //ZZ addInstr(env, ARMInstr_Alu(ARMalu_SBC,
2615 //ZZ tHi, zero, ARMRI84_R(yHi)));
2616 //ZZ /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2617 //ZZ back in, so as to give the final result
2618 //ZZ tHi:tLo = arg | -arg. */
2619 //ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
2620 //ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
2621 //ZZ *rHi = tHi;
2622 //ZZ *rLo = tLo;
2623 //ZZ return;
2624 //ZZ }
2625 //ZZ
2626 //ZZ /* CmpwNEZ64(e) */
2627 //ZZ case Iop_CmpwNEZ64: {
2628 //ZZ HReg srcLo, srcHi;
2629 //ZZ HReg tmp1 = newVRegI(env);
2630 //ZZ HReg tmp2 = newVRegI(env);
2631 //ZZ /* srcHi:srcLo = arg */
2632 //ZZ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2633 //ZZ /* tmp1 = srcHi | srcLo */
2634 //ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR,
2635 //ZZ tmp1, srcHi, ARMRI84_R(srcLo)));
2636 //ZZ /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2637 //ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
2638 //ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR,
2639 //ZZ tmp2, tmp2, ARMRI84_R(tmp1)));
2640 //ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR,
2641 //ZZ tmp2, tmp2, ARMRI5_I5(31)));
2642 //ZZ *rHi = tmp2;
2643 //ZZ *rLo = tmp2;
2644 //ZZ return;
2645 //ZZ }
2646 //ZZ
2647 //ZZ case Iop_1Sto64: {
2648 //ZZ HReg dst = newVRegI(env);
2649 //ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2650 //ZZ ARMRI5* amt = ARMRI5_I5(31);
2651 //ZZ /* This is really rough. We could do much better here;
2652 //ZZ perhaps mvn{cond} dst, #0 as the second insn?
2653 //ZZ (same applies to 1Sto32) */
2654 //ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2655 //ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2656 //ZZ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2657 //ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2658 //ZZ *rHi = dst;
2659 //ZZ *rLo = dst;
2660 //ZZ return;
2661 //ZZ }
2662 //ZZ
2663 //ZZ default:
2664 //ZZ break;
2665 //ZZ }
2666 //ZZ } /* if (e->tag == Iex_Unop) */
2667 //ZZ
2668 //ZZ /* --------- MULTIPLEX --------- */
2669 //ZZ if (e->tag == Iex_ITE) { // VFD
2670 //ZZ IRType tyC;
2671 //ZZ HReg r1hi, r1lo, r0hi, r0lo, dstHi, dstLo;
2672 //ZZ ARMCondCode cc;
2673 //ZZ tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond);
2674 //ZZ vassert(tyC == Ity_I1);
2675 //ZZ iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue);
2676 //ZZ iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse);
2677 //ZZ dstHi = newVRegI(env);
2678 //ZZ dstLo = newVRegI(env);
2679 //ZZ addInstr(env, mk_iMOVds_RR(dstHi, r1hi));
2680 //ZZ addInstr(env, mk_iMOVds_RR(dstLo, r1lo));
2681 //ZZ cc = iselCondCode(env, e->Iex.ITE.cond);
2682 //ZZ addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi)));
2683 //ZZ addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo)));
2684 //ZZ *rHi = dstHi;
2685 //ZZ *rLo = dstLo;
2686 //ZZ return;
2687 //ZZ }
2688 //ZZ
2689 //ZZ /* It is convenient sometimes to call iselInt64Expr even when we
2690 //ZZ have NEON support (e.g. in do_helper_call we need 64-bit
2691 //ZZ arguments as 2 x 32 regs). */
2692 //ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2693 //ZZ HReg tHi = newVRegI(env);
2694 //ZZ HReg tLo = newVRegI(env);
2695 //ZZ HReg tmp = iselNeon64Expr(env, e);
2696 //ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2697 //ZZ *rHi = tHi;
2698 //ZZ *rLo = tLo;
2699 //ZZ return ;
2700 //ZZ }
2701 //ZZ
2702 //ZZ ppIRExpr(e);
2703 //ZZ vpanic("iselInt64Expr");
2704 //ZZ }
2705 //ZZ
2706 //ZZ
2707 //ZZ /*---------------------------------------------------------*/
2708 //ZZ /*--- ISEL: Vector (NEON) expressions (64 bit) ---*/
2709 //ZZ /*---------------------------------------------------------*/
2710 //ZZ
2711 //ZZ static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2712 //ZZ {
2713 //ZZ HReg r = iselNeon64Expr_wrk( env, e );
2714 //ZZ vassert(hregClass(r) == HRcFlt64);
2715 //ZZ vassert(hregIsVirtual(r));
2716 //ZZ return r;
2717 //ZZ }
2718 //ZZ
2719 //ZZ /* DO NOT CALL THIS DIRECTLY */
2720 //ZZ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2721 //ZZ {
2722 //ZZ IRType ty = typeOfIRExpr(env->type_env, e);
2723 //ZZ MatchInfo mi;
2724 //ZZ vassert(e);
2725 //ZZ vassert(ty == Ity_I64);
2726 //ZZ
2727 //ZZ if (e->tag == Iex_RdTmp) {
2728 //ZZ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2729 //ZZ }
2730 //ZZ
2731 //ZZ if (e->tag == Iex_Const) {
2732 //ZZ HReg rLo, rHi;
2733 //ZZ HReg res = newVRegD(env);
2734 //ZZ iselInt64Expr(&rHi, &rLo, env, e);
2735 //ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2736 //ZZ return res;
2737 //ZZ }
2738 //ZZ
2739 //ZZ /* 64-bit load */
2740 //ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2741 //ZZ HReg res = newVRegD(env);
2742 //ZZ ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2743 //ZZ vassert(ty == Ity_I64);
2744 //ZZ addInstr(env, ARMInstr_NLdStD(True, res, am));
2745 //ZZ return res;
2746 //ZZ }
2747 //ZZ
2748 //ZZ /* 64-bit GET */
2749 //ZZ if (e->tag == Iex_Get) {
2750 //ZZ HReg addr = newVRegI(env);
2751 //ZZ HReg res = newVRegD(env);
2752 //ZZ vassert(ty == Ity_I64);
2753 //ZZ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2754 //ZZ addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2755 //ZZ return res;
2756 //ZZ }
2757 //ZZ
2758 //ZZ /* --------- BINARY ops --------- */
2759 //ZZ if (e->tag == Iex_Binop) {
2760 //ZZ switch (e->Iex.Binop.op) {
2761 //ZZ
2762 //ZZ /* 32 x 32 -> 64 multiply */
2763 //ZZ case Iop_MullS32:
2764 //ZZ case Iop_MullU32: {
2765 //ZZ HReg rLo, rHi;
2766 //ZZ HReg res = newVRegD(env);
2767 //ZZ iselInt64Expr(&rHi, &rLo, env, e);
2768 //ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2769 //ZZ return res;
2770 //ZZ }
2771 //ZZ
2772 //ZZ case Iop_And64: {
2773 //ZZ HReg res = newVRegD(env);
2774 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2775 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2776 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2777 //ZZ res, argL, argR, 4, False));
2778 //ZZ return res;
2779 //ZZ }
2780 //ZZ case Iop_Or64: {
2781 //ZZ HReg res = newVRegD(env);
2782 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2783 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2784 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2785 //ZZ res, argL, argR, 4, False));
2786 //ZZ return res;
2787 //ZZ }
2788 //ZZ case Iop_Xor64: {
2789 //ZZ HReg res = newVRegD(env);
2790 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2791 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2792 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2793 //ZZ res, argL, argR, 4, False));
2794 //ZZ return res;
2795 //ZZ }
2796 //ZZ
2797 //ZZ /* 32HLto64(e1,e2) */
2798 //ZZ case Iop_32HLto64: {
2799 //ZZ HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2800 //ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2801 //ZZ HReg res = newVRegD(env);
2802 //ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2803 //ZZ return res;
2804 //ZZ }
2805 //ZZ
2806 //ZZ case Iop_Add8x8:
2807 //ZZ case Iop_Add16x4:
2808 //ZZ case Iop_Add32x2:
2809 //ZZ case Iop_Add64: {
2810 //ZZ HReg res = newVRegD(env);
2811 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2812 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2813 //ZZ UInt size;
2814 //ZZ switch (e->Iex.Binop.op) {
2815 //ZZ case Iop_Add8x8: size = 0; break;
2816 //ZZ case Iop_Add16x4: size = 1; break;
2817 //ZZ case Iop_Add32x2: size = 2; break;
2818 //ZZ case Iop_Add64: size = 3; break;
2819 //ZZ default: vassert(0);
2820 //ZZ }
2821 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2822 //ZZ res, argL, argR, size, False));
2823 //ZZ return res;
2824 //ZZ }
2825 //ZZ case Iop_Add32Fx2: {
2826 //ZZ HReg res = newVRegD(env);
2827 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2828 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2829 //ZZ UInt size = 0;
2830 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2831 //ZZ res, argL, argR, size, False));
2832 //ZZ return res;
2833 //ZZ }
2834 //ZZ case Iop_Recps32Fx2: {
2835 //ZZ HReg res = newVRegD(env);
2836 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2837 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2838 //ZZ UInt size = 0;
2839 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2840 //ZZ res, argL, argR, size, False));
2841 //ZZ return res;
2842 //ZZ }
2843 //ZZ case Iop_Rsqrts32Fx2: {
2844 //ZZ HReg res = newVRegD(env);
2845 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2846 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2847 //ZZ UInt size = 0;
2848 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2849 //ZZ res, argL, argR, size, False));
2850 //ZZ return res;
2851 //ZZ }
2852 //ZZ
2853 //ZZ // These 6 verified 18 Apr 2013
2854 //ZZ case Iop_InterleaveHI32x2:
2855 //ZZ case Iop_InterleaveLO32x2:
2856 //ZZ case Iop_InterleaveOddLanes8x8:
2857 //ZZ case Iop_InterleaveEvenLanes8x8:
2858 //ZZ case Iop_InterleaveOddLanes16x4:
2859 //ZZ case Iop_InterleaveEvenLanes16x4: {
2860 //ZZ HReg rD = newVRegD(env);
2861 //ZZ HReg rM = newVRegD(env);
2862 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2863 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2864 //ZZ UInt size;
2865 //ZZ Bool resRd; // is the result in rD or rM ?
2866 //ZZ switch (e->Iex.Binop.op) {
2867 //ZZ case Iop_InterleaveOddLanes8x8: resRd = False; size = 0; break;
2868 //ZZ case Iop_InterleaveEvenLanes8x8: resRd = True; size = 0; break;
2869 //ZZ case Iop_InterleaveOddLanes16x4: resRd = False; size = 1; break;
2870 //ZZ case Iop_InterleaveEvenLanes16x4: resRd = True; size = 1; break;
2871 //ZZ case Iop_InterleaveHI32x2: resRd = False; size = 2; break;
2872 //ZZ case Iop_InterleaveLO32x2: resRd = True; size = 2; break;
2873 //ZZ default: vassert(0);
2874 //ZZ }
2875 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2876 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2877 //ZZ addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False));
2878 //ZZ return resRd ? rD : rM;
2879 //ZZ }
2880 //ZZ
2881 //ZZ // These 4 verified 18 Apr 2013
2882 //ZZ case Iop_InterleaveHI8x8:
2883 //ZZ case Iop_InterleaveLO8x8:
2884 //ZZ case Iop_InterleaveHI16x4:
2885 //ZZ case Iop_InterleaveLO16x4: {
2886 //ZZ HReg rD = newVRegD(env);
2887 //ZZ HReg rM = newVRegD(env);
2888 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2889 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2890 //ZZ UInt size;
2891 //ZZ Bool resRd; // is the result in rD or rM ?
2892 //ZZ switch (e->Iex.Binop.op) {
2893 //ZZ case Iop_InterleaveHI8x8: resRd = False; size = 0; break;
2894 //ZZ case Iop_InterleaveLO8x8: resRd = True; size = 0; break;
2895 //ZZ case Iop_InterleaveHI16x4: resRd = False; size = 1; break;
2896 //ZZ case Iop_InterleaveLO16x4: resRd = True; size = 1; break;
2897 //ZZ default: vassert(0);
2898 //ZZ }
2899 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2900 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2901 //ZZ addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False));
2902 //ZZ return resRd ? rD : rM;
2903 //ZZ }
2904 //ZZ
2905 //ZZ // These 4 verified 18 Apr 2013
2906 //ZZ case Iop_CatOddLanes8x8:
2907 //ZZ case Iop_CatEvenLanes8x8:
2908 //ZZ case Iop_CatOddLanes16x4:
2909 //ZZ case Iop_CatEvenLanes16x4: {
2910 //ZZ HReg rD = newVRegD(env);
2911 //ZZ HReg rM = newVRegD(env);
2912 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2913 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2914 //ZZ UInt size;
2915 //ZZ Bool resRd; // is the result in rD or rM ?
2916 //ZZ switch (e->Iex.Binop.op) {
2917 //ZZ case Iop_CatOddLanes8x8: resRd = False; size = 0; break;
2918 //ZZ case Iop_CatEvenLanes8x8: resRd = True; size = 0; break;
2919 //ZZ case Iop_CatOddLanes16x4: resRd = False; size = 1; break;
2920 //ZZ case Iop_CatEvenLanes16x4: resRd = True; size = 1; break;
2921 //ZZ default: vassert(0);
2922 //ZZ }
2923 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2924 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2925 //ZZ addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False));
2926 //ZZ return resRd ? rD : rM;
2927 //ZZ }
2928 //ZZ
2929 //ZZ case Iop_QAdd8Ux8:
2930 //ZZ case Iop_QAdd16Ux4:
2931 //ZZ case Iop_QAdd32Ux2:
2932 //ZZ case Iop_QAdd64Ux1: {
2933 //ZZ HReg res = newVRegD(env);
2934 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2935 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2936 //ZZ UInt size;
2937 //ZZ switch (e->Iex.Binop.op) {
2938 //ZZ case Iop_QAdd8Ux8: size = 0; break;
2939 //ZZ case Iop_QAdd16Ux4: size = 1; break;
2940 //ZZ case Iop_QAdd32Ux2: size = 2; break;
2941 //ZZ case Iop_QAdd64Ux1: size = 3; break;
2942 //ZZ default: vassert(0);
2943 //ZZ }
2944 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2945 //ZZ res, argL, argR, size, False));
2946 //ZZ return res;
2947 //ZZ }
2948 //ZZ case Iop_QAdd8Sx8:
2949 //ZZ case Iop_QAdd16Sx4:
2950 //ZZ case Iop_QAdd32Sx2:
2951 //ZZ case Iop_QAdd64Sx1: {
2952 //ZZ HReg res = newVRegD(env);
2953 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2954 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2955 //ZZ UInt size;
2956 //ZZ switch (e->Iex.Binop.op) {
2957 //ZZ case Iop_QAdd8Sx8: size = 0; break;
2958 //ZZ case Iop_QAdd16Sx4: size = 1; break;
2959 //ZZ case Iop_QAdd32Sx2: size = 2; break;
2960 //ZZ case Iop_QAdd64Sx1: size = 3; break;
2961 //ZZ default: vassert(0);
2962 //ZZ }
2963 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2964 //ZZ res, argL, argR, size, False));
2965 //ZZ return res;
2966 //ZZ }
2967 //ZZ case Iop_Sub8x8:
2968 //ZZ case Iop_Sub16x4:
2969 //ZZ case Iop_Sub32x2:
2970 //ZZ case Iop_Sub64: {
2971 //ZZ HReg res = newVRegD(env);
2972 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2973 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2974 //ZZ UInt size;
2975 //ZZ switch (e->Iex.Binop.op) {
2976 //ZZ case Iop_Sub8x8: size = 0; break;
2977 //ZZ case Iop_Sub16x4: size = 1; break;
2978 //ZZ case Iop_Sub32x2: size = 2; break;
2979 //ZZ case Iop_Sub64: size = 3; break;
2980 //ZZ default: vassert(0);
2981 //ZZ }
2982 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2983 //ZZ res, argL, argR, size, False));
2984 //ZZ return res;
2985 //ZZ }
2986 //ZZ case Iop_Sub32Fx2: {
2987 //ZZ HReg res = newVRegD(env);
2988 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2989 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2990 //ZZ UInt size = 0;
2991 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2992 //ZZ res, argL, argR, size, False));
2993 //ZZ return res;
2994 //ZZ }
2995 //ZZ case Iop_QSub8Ux8:
2996 //ZZ case Iop_QSub16Ux4:
2997 //ZZ case Iop_QSub32Ux2:
2998 //ZZ case Iop_QSub64Ux1: {
2999 //ZZ HReg res = newVRegD(env);
3000 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3001 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3002 //ZZ UInt size;
3003 //ZZ switch (e->Iex.Binop.op) {
3004 //ZZ case Iop_QSub8Ux8: size = 0; break;
3005 //ZZ case Iop_QSub16Ux4: size = 1; break;
3006 //ZZ case Iop_QSub32Ux2: size = 2; break;
3007 //ZZ case Iop_QSub64Ux1: size = 3; break;
3008 //ZZ default: vassert(0);
3009 //ZZ }
3010 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
3011 //ZZ res, argL, argR, size, False));
3012 //ZZ return res;
3013 //ZZ }
3014 //ZZ case Iop_QSub8Sx8:
3015 //ZZ case Iop_QSub16Sx4:
3016 //ZZ case Iop_QSub32Sx2:
3017 //ZZ case Iop_QSub64Sx1: {
3018 //ZZ HReg res = newVRegD(env);
3019 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3020 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3021 //ZZ UInt size;
3022 //ZZ switch (e->Iex.Binop.op) {
3023 //ZZ case Iop_QSub8Sx8: size = 0; break;
3024 //ZZ case Iop_QSub16Sx4: size = 1; break;
3025 //ZZ case Iop_QSub32Sx2: size = 2; break;
3026 //ZZ case Iop_QSub64Sx1: size = 3; break;
3027 //ZZ default: vassert(0);
3028 //ZZ }
3029 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
3030 //ZZ res, argL, argR, size, False));
3031 //ZZ return res;
3032 //ZZ }
3033 //ZZ case Iop_Max8Ux8:
3034 //ZZ case Iop_Max16Ux4:
3035 //ZZ case Iop_Max32Ux2: {
3036 //ZZ HReg res = newVRegD(env);
3037 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3038 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3039 //ZZ UInt size;
3040 //ZZ switch (e->Iex.Binop.op) {
3041 //ZZ case Iop_Max8Ux8: size = 0; break;
3042 //ZZ case Iop_Max16Ux4: size = 1; break;
3043 //ZZ case Iop_Max32Ux2: size = 2; break;
3044 //ZZ default: vassert(0);
3045 //ZZ }
3046 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
3047 //ZZ res, argL, argR, size, False));
3048 //ZZ return res;
3049 //ZZ }
3050 //ZZ case Iop_Max8Sx8:
3051 //ZZ case Iop_Max16Sx4:
3052 //ZZ case Iop_Max32Sx2: {
3053 //ZZ HReg res = newVRegD(env);
3054 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3055 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3056 //ZZ UInt size;
3057 //ZZ switch (e->Iex.Binop.op) {
3058 //ZZ case Iop_Max8Sx8: size = 0; break;
3059 //ZZ case Iop_Max16Sx4: size = 1; break;
3060 //ZZ case Iop_Max32Sx2: size = 2; break;
3061 //ZZ default: vassert(0);
3062 //ZZ }
3063 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
3064 //ZZ res, argL, argR, size, False));
3065 //ZZ return res;
3066 //ZZ }
3067 //ZZ case Iop_Min8Ux8:
3068 //ZZ case Iop_Min16Ux4:
3069 //ZZ case Iop_Min32Ux2: {
3070 //ZZ HReg res = newVRegD(env);
3071 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3072 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3073 //ZZ UInt size;
3074 //ZZ switch (e->Iex.Binop.op) {
3075 //ZZ case Iop_Min8Ux8: size = 0; break;
3076 //ZZ case Iop_Min16Ux4: size = 1; break;
3077 //ZZ case Iop_Min32Ux2: size = 2; break;
3078 //ZZ default: vassert(0);
3079 //ZZ }
3080 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
3081 //ZZ res, argL, argR, size, False));
3082 //ZZ return res;
3083 //ZZ }
3084 //ZZ case Iop_Min8Sx8:
3085 //ZZ case Iop_Min16Sx4:
3086 //ZZ case Iop_Min32Sx2: {
3087 //ZZ HReg res = newVRegD(env);
3088 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3089 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3090 //ZZ UInt size;
3091 //ZZ switch (e->Iex.Binop.op) {
3092 //ZZ case Iop_Min8Sx8: size = 0; break;
3093 //ZZ case Iop_Min16Sx4: size = 1; break;
3094 //ZZ case Iop_Min32Sx2: size = 2; break;
3095 //ZZ default: vassert(0);
3096 //ZZ }
3097 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
3098 //ZZ res, argL, argR, size, False));
3099 //ZZ return res;
3100 //ZZ }
3101 //ZZ case Iop_Sar8x8:
3102 //ZZ case Iop_Sar16x4:
3103 //ZZ case Iop_Sar32x2: {
3104 //ZZ HReg res = newVRegD(env);
3105 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3106 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3107 //ZZ HReg argR2 = newVRegD(env);
3108 //ZZ HReg zero = newVRegD(env);
3109 //ZZ UInt size;
3110 //ZZ switch (e->Iex.Binop.op) {
3111 //ZZ case Iop_Sar8x8: size = 0; break;
3112 //ZZ case Iop_Sar16x4: size = 1; break;
3113 //ZZ case Iop_Sar32x2: size = 2; break;
3114 //ZZ case Iop_Sar64: size = 3; break;
3115 //ZZ default: vassert(0);
3116 //ZZ }
3117 //ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
3118 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3119 //ZZ argR2, zero, argR, size, False));
3120 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
3121 //ZZ res, argL, argR2, size, False));
3122 //ZZ return res;
3123 //ZZ }
3124 //ZZ case Iop_Sal8x8:
3125 //ZZ case Iop_Sal16x4:
3126 //ZZ case Iop_Sal32x2:
3127 //ZZ case Iop_Sal64x1: {
3128 //ZZ HReg res = newVRegD(env);
3129 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3130 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3131 //ZZ UInt size;
3132 //ZZ switch (e->Iex.Binop.op) {
3133 //ZZ case Iop_Sal8x8: size = 0; break;
3134 //ZZ case Iop_Sal16x4: size = 1; break;
3135 //ZZ case Iop_Sal32x2: size = 2; break;
3136 //ZZ case Iop_Sal64x1: size = 3; break;
3137 //ZZ default: vassert(0);
3138 //ZZ }
3139 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
3140 //ZZ res, argL, argR, size, False));
3141 //ZZ return res;
3142 //ZZ }
3143 //ZZ case Iop_Shr8x8:
3144 //ZZ case Iop_Shr16x4:
3145 //ZZ case Iop_Shr32x2: {
3146 //ZZ HReg res = newVRegD(env);
3147 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3148 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3149 //ZZ HReg argR2 = newVRegD(env);
3150 //ZZ HReg zero = newVRegD(env);
3151 //ZZ UInt size;
3152 //ZZ switch (e->Iex.Binop.op) {
3153 //ZZ case Iop_Shr8x8: size = 0; break;
3154 //ZZ case Iop_Shr16x4: size = 1; break;
3155 //ZZ case Iop_Shr32x2: size = 2; break;
3156 //ZZ default: vassert(0);
3157 //ZZ }
3158 //ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
3159 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3160 //ZZ argR2, zero, argR, size, False));
3161 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3162 //ZZ res, argL, argR2, size, False));
3163 //ZZ return res;
3164 //ZZ }
3165 //ZZ case Iop_Shl8x8:
3166 //ZZ case Iop_Shl16x4:
3167 //ZZ case Iop_Shl32x2: {
3168 //ZZ HReg res = newVRegD(env);
3169 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3170 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3171 //ZZ UInt size;
3172 //ZZ switch (e->Iex.Binop.op) {
3173 //ZZ case Iop_Shl8x8: size = 0; break;
3174 //ZZ case Iop_Shl16x4: size = 1; break;
3175 //ZZ case Iop_Shl32x2: size = 2; break;
3176 //ZZ default: vassert(0);
3177 //ZZ }
3178 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3179 //ZZ res, argL, argR, size, False));
3180 //ZZ return res;
3181 //ZZ }
3182 //ZZ case Iop_QShl8x8:
3183 //ZZ case Iop_QShl16x4:
3184 //ZZ case Iop_QShl32x2:
3185 //ZZ case Iop_QShl64x1: {
3186 //ZZ HReg res = newVRegD(env);
3187 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3188 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3189 //ZZ UInt size;
3190 //ZZ switch (e->Iex.Binop.op) {
3191 //ZZ case Iop_QShl8x8: size = 0; break;
3192 //ZZ case Iop_QShl16x4: size = 1; break;
3193 //ZZ case Iop_QShl32x2: size = 2; break;
3194 //ZZ case Iop_QShl64x1: size = 3; break;
3195 //ZZ default: vassert(0);
3196 //ZZ }
3197 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
3198 //ZZ res, argL, argR, size, False));
3199 //ZZ return res;
3200 //ZZ }
3201 //ZZ case Iop_QSal8x8:
3202 //ZZ case Iop_QSal16x4:
3203 //ZZ case Iop_QSal32x2:
3204 //ZZ case Iop_QSal64x1: {
3205 //ZZ HReg res = newVRegD(env);
3206 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3207 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3208 //ZZ UInt size;
3209 //ZZ switch (e->Iex.Binop.op) {
3210 //ZZ case Iop_QSal8x8: size = 0; break;
3211 //ZZ case Iop_QSal16x4: size = 1; break;
3212 //ZZ case Iop_QSal32x2: size = 2; break;
3213 //ZZ case Iop_QSal64x1: size = 3; break;
3214 //ZZ default: vassert(0);
3215 //ZZ }
3216 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
3217 //ZZ res, argL, argR, size, False));
3218 //ZZ return res;
3219 //ZZ }
3220 //ZZ case Iop_QShlN8x8:
3221 //ZZ case Iop_QShlN16x4:
3222 //ZZ case Iop_QShlN32x2:
3223 //ZZ case Iop_QShlN64x1: {
3224 //ZZ HReg res = newVRegD(env);
3225 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3226 //ZZ UInt size, imm;
3227 //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3228 //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3229 //ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
3230 //ZZ "second argument only\n");
3231 //ZZ }
3232 //ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3233 //ZZ switch (e->Iex.Binop.op) {
3234 //ZZ case Iop_QShlN8x8: size = 8 | imm; break;
3235 //ZZ case Iop_QShlN16x4: size = 16 | imm; break;
3236 //ZZ case Iop_QShlN32x2: size = 32 | imm; break;
3237 //ZZ case Iop_QShlN64x1: size = 64 | imm; break;
3238 //ZZ default: vassert(0);
3239 //ZZ }
3240 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
3241 //ZZ res, argL, size, False));
3242 //ZZ return res;
3243 //ZZ }
3244 //ZZ case Iop_QShlN8Sx8:
3245 //ZZ case Iop_QShlN16Sx4:
3246 //ZZ case Iop_QShlN32Sx2:
3247 //ZZ case Iop_QShlN64Sx1: {
3248 //ZZ HReg res = newVRegD(env);
3249 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3250 //ZZ UInt size, imm;
3251 //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3252 //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3253 //ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
3254 //ZZ "second argument only\n");
3255 //ZZ }
3256 //ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3257 //ZZ switch (e->Iex.Binop.op) {
3258 //ZZ case Iop_QShlN8Sx8: size = 8 | imm; break;
3259 //ZZ case Iop_QShlN16Sx4: size = 16 | imm; break;
3260 //ZZ case Iop_QShlN32Sx2: size = 32 | imm; break;
3261 //ZZ case Iop_QShlN64Sx1: size = 64 | imm; break;
3262 //ZZ default: vassert(0);
3263 //ZZ }
3264 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
3265 //ZZ res, argL, size, False));
3266 //ZZ return res;
3267 //ZZ }
3268 //ZZ case Iop_QSalN8x8:
3269 //ZZ case Iop_QSalN16x4:
3270 //ZZ case Iop_QSalN32x2:
3271 //ZZ case Iop_QSalN64x1: {
3272 //ZZ HReg res = newVRegD(env);
3273 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3274 //ZZ UInt size, imm;
3275 //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3276 //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3277 //ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
3278 //ZZ "second argument only\n");
3279 //ZZ }
3280 //ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3281 //ZZ switch (e->Iex.Binop.op) {
3282 //ZZ case Iop_QSalN8x8: size = 8 | imm; break;
3283 //ZZ case Iop_QSalN16x4: size = 16 | imm; break;
3284 //ZZ case Iop_QSalN32x2: size = 32 | imm; break;
3285 //ZZ case Iop_QSalN64x1: size = 64 | imm; break;
3286 //ZZ default: vassert(0);
3287 //ZZ }
3288 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
3289 //ZZ res, argL, size, False));
3290 //ZZ return res;
3291 //ZZ }
3292 //ZZ case Iop_ShrN8x8:
3293 //ZZ case Iop_ShrN16x4:
3294 //ZZ case Iop_ShrN32x2:
3295 //ZZ case Iop_Shr64: {
3296 //ZZ HReg res = newVRegD(env);
3297 //ZZ HReg tmp = newVRegD(env);
3298 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3299 //ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3300 //ZZ HReg argR2 = newVRegI(env);
3301 //ZZ UInt size;
3302 //ZZ switch (e->Iex.Binop.op) {
3303 //ZZ case Iop_ShrN8x8: size = 0; break;
3304 //ZZ case Iop_ShrN16x4: size = 1; break;
3305 //ZZ case Iop_ShrN32x2: size = 2; break;
3306 //ZZ case Iop_Shr64: size = 3; break;
3307 //ZZ default: vassert(0);
3308 //ZZ }
3309 //ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
3310 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
3311 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3312 //ZZ res, argL, tmp, size, False));
3313 //ZZ return res;
3314 //ZZ }
3315 //ZZ case Iop_ShlN8x8:
3316 //ZZ case Iop_ShlN16x4:
3317 //ZZ case Iop_ShlN32x2:
3318 //ZZ case Iop_Shl64: {
3319 //ZZ HReg res = newVRegD(env);
3320 //ZZ HReg tmp = newVRegD(env);
3321 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3322 //ZZ /* special-case Shl64(x, imm8) since the Neon front
3323 //ZZ end produces a lot of those for V{LD,ST}{1,2,3,4}. */
3324 //ZZ if (e->Iex.Binop.op == Iop_Shl64
3325 //ZZ && e->Iex.Binop.arg2->tag == Iex_Const) {
3326 //ZZ vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
3327 //ZZ Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3328 //ZZ if (nshift >= 1 && nshift <= 63) {
3329 //ZZ addInstr(env, ARMInstr_NShl64(res, argL, nshift));
3330 //ZZ return res;
3331 //ZZ }
3332 //ZZ /* else fall through to general case */
3333 //ZZ }
3334 //ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3335 //ZZ UInt size;
3336 //ZZ switch (e->Iex.Binop.op) {
3337 //ZZ case Iop_ShlN8x8: size = 0; break;
3338 //ZZ case Iop_ShlN16x4: size = 1; break;
3339 //ZZ case Iop_ShlN32x2: size = 2; break;
3340 //ZZ case Iop_Shl64: size = 3; break;
3341 //ZZ default: vassert(0);
3342 //ZZ }
3343 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
3344 //ZZ tmp, argR, 0, False));
3345 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3346 //ZZ res, argL, tmp, size, False));
3347 //ZZ return res;
3348 //ZZ }
3349 //ZZ case Iop_SarN8x8:
3350 //ZZ case Iop_SarN16x4:
3351 //ZZ case Iop_SarN32x2:
3352 //ZZ case Iop_Sar64: {
3353 //ZZ HReg res = newVRegD(env);
3354 //ZZ HReg tmp = newVRegD(env);
3355 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3356 //ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3357 //ZZ HReg argR2 = newVRegI(env);
3358 //ZZ UInt size;
3359 //ZZ switch (e->Iex.Binop.op) {
3360 //ZZ case Iop_SarN8x8: size = 0; break;
3361 //ZZ case Iop_SarN16x4: size = 1; break;
3362 //ZZ case Iop_SarN32x2: size = 2; break;
3363 //ZZ case Iop_Sar64: size = 3; break;
3364 //ZZ default: vassert(0);
3365 //ZZ }
3366 //ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
3367 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
3368 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
3369 //ZZ res, argL, tmp, size, False));
3370 //ZZ return res;
3371 //ZZ }
3372 //ZZ case Iop_CmpGT8Ux8:
3373 //ZZ case Iop_CmpGT16Ux4:
3374 //ZZ case Iop_CmpGT32Ux2: {
3375 //ZZ HReg res = newVRegD(env);
3376 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3377 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3378 //ZZ UInt size;
3379 //ZZ switch (e->Iex.Binop.op) {
3380 //ZZ case Iop_CmpGT8Ux8: size = 0; break;
3381 //ZZ case Iop_CmpGT16Ux4: size = 1; break;
3382 //ZZ case Iop_CmpGT32Ux2: size = 2; break;
3383 //ZZ default: vassert(0);
3384 //ZZ }
3385 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
3386 //ZZ res, argL, argR, size, False));
3387 //ZZ return res;
3388 //ZZ }
3389 //ZZ case Iop_CmpGT8Sx8:
3390 //ZZ case Iop_CmpGT16Sx4:
3391 //ZZ case Iop_CmpGT32Sx2: {
3392 //ZZ HReg res = newVRegD(env);
3393 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3394 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3395 //ZZ UInt size;
3396 //ZZ switch (e->Iex.Binop.op) {
3397 //ZZ case Iop_CmpGT8Sx8: size = 0; break;
3398 //ZZ case Iop_CmpGT16Sx4: size = 1; break;
3399 //ZZ case Iop_CmpGT32Sx2: size = 2; break;
3400 //ZZ default: vassert(0);
3401 //ZZ }
3402 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
3403 //ZZ res, argL, argR, size, False));
3404 //ZZ return res;
3405 //ZZ }
3406 //ZZ case Iop_CmpEQ8x8:
3407 //ZZ case Iop_CmpEQ16x4:
3408 //ZZ case Iop_CmpEQ32x2: {
3409 //ZZ HReg res = newVRegD(env);
3410 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3411 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3412 //ZZ UInt size;
3413 //ZZ switch (e->Iex.Binop.op) {
3414 //ZZ case Iop_CmpEQ8x8: size = 0; break;
3415 //ZZ case Iop_CmpEQ16x4: size = 1; break;
3416 //ZZ case Iop_CmpEQ32x2: size = 2; break;
3417 //ZZ default: vassert(0);
3418 //ZZ }
3419 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
3420 //ZZ res, argL, argR, size, False));
3421 //ZZ return res;
3422 //ZZ }
3423 //ZZ case Iop_Mul8x8:
3424 //ZZ case Iop_Mul16x4:
3425 //ZZ case Iop_Mul32x2: {
3426 //ZZ HReg res = newVRegD(env);
3427 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3428 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3429 //ZZ UInt size = 0;
3430 //ZZ switch(e->Iex.Binop.op) {
3431 //ZZ case Iop_Mul8x8: size = 0; break;
3432 //ZZ case Iop_Mul16x4: size = 1; break;
3433 //ZZ case Iop_Mul32x2: size = 2; break;
3434 //ZZ default: vassert(0);
3435 //ZZ }
3436 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
3437 //ZZ res, argL, argR, size, False));
3438 //ZZ return res;
3439 //ZZ }
3440 //ZZ case Iop_Mul32Fx2: {
3441 //ZZ HReg res = newVRegD(env);
3442 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3443 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3444 //ZZ UInt size = 0;
3445 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
3446 //ZZ res, argL, argR, size, False));
3447 //ZZ return res;
3448 //ZZ }
3449 //ZZ case Iop_QDMulHi16Sx4:
3450 //ZZ case Iop_QDMulHi32Sx2: {
3451 //ZZ HReg res = newVRegD(env);
3452 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3453 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3454 //ZZ UInt size = 0;
3455 //ZZ switch(e->Iex.Binop.op) {
3456 //ZZ case Iop_QDMulHi16Sx4: size = 1; break;
3457 //ZZ case Iop_QDMulHi32Sx2: size = 2; break;
3458 //ZZ default: vassert(0);
3459 //ZZ }
3460 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
3461 //ZZ res, argL, argR, size, False));
3462 //ZZ return res;
3463 //ZZ }
3464 //ZZ
3465 //ZZ case Iop_QRDMulHi16Sx4:
3466 //ZZ case Iop_QRDMulHi32Sx2: {
3467 //ZZ HReg res = newVRegD(env);
3468 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3469 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3470 //ZZ UInt size = 0;
3471 //ZZ switch(e->Iex.Binop.op) {
3472 //ZZ case Iop_QRDMulHi16Sx4: size = 1; break;
3473 //ZZ case Iop_QRDMulHi32Sx2: size = 2; break;
3474 //ZZ default: vassert(0);
3475 //ZZ }
3476 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
3477 //ZZ res, argL, argR, size, False));
3478 //ZZ return res;
3479 //ZZ }
3480 //ZZ
3481 //ZZ case Iop_PwAdd8x8:
3482 //ZZ case Iop_PwAdd16x4:
3483 //ZZ case Iop_PwAdd32x2: {
3484 //ZZ HReg res = newVRegD(env);
3485 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3486 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3487 //ZZ UInt size = 0;
3488 //ZZ switch(e->Iex.Binop.op) {
3489 //ZZ case Iop_PwAdd8x8: size = 0; break;
3490 //ZZ case Iop_PwAdd16x4: size = 1; break;
3491 //ZZ case Iop_PwAdd32x2: size = 2; break;
3492 //ZZ default: vassert(0);
3493 //ZZ }
3494 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
3495 //ZZ res, argL, argR, size, False));
3496 //ZZ return res;
3497 //ZZ }
3498 //ZZ case Iop_PwAdd32Fx2: {
3499 //ZZ HReg res = newVRegD(env);
3500 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3501 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3502 //ZZ UInt size = 0;
3503 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
3504 //ZZ res, argL, argR, size, False));
3505 //ZZ return res;
3506 //ZZ }
3507 //ZZ case Iop_PwMin8Ux8:
3508 //ZZ case Iop_PwMin16Ux4:
3509 //ZZ case Iop_PwMin32Ux2: {
3510 //ZZ HReg res = newVRegD(env);
3511 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3512 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3513 //ZZ UInt size = 0;
3514 //ZZ switch(e->Iex.Binop.op) {
3515 //ZZ case Iop_PwMin8Ux8: size = 0; break;
3516 //ZZ case Iop_PwMin16Ux4: size = 1; break;
3517 //ZZ case Iop_PwMin32Ux2: size = 2; break;
3518 //ZZ default: vassert(0);
3519 //ZZ }
3520 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
3521 //ZZ res, argL, argR, size, False));
3522 //ZZ return res;
3523 //ZZ }
3524 //ZZ case Iop_PwMin8Sx8:
3525 //ZZ case Iop_PwMin16Sx4:
3526 //ZZ case Iop_PwMin32Sx2: {
3527 //ZZ HReg res = newVRegD(env);
3528 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3529 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3530 //ZZ UInt size = 0;
3531 //ZZ switch(e->Iex.Binop.op) {
3532 //ZZ case Iop_PwMin8Sx8: size = 0; break;
3533 //ZZ case Iop_PwMin16Sx4: size = 1; break;
3534 //ZZ case Iop_PwMin32Sx2: size = 2; break;
3535 //ZZ default: vassert(0);
3536 //ZZ }
3537 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
3538 //ZZ res, argL, argR, size, False));
3539 //ZZ return res;
3540 //ZZ }
3541 //ZZ case Iop_PwMax8Ux8:
3542 //ZZ case Iop_PwMax16Ux4:
3543 //ZZ case Iop_PwMax32Ux2: {
3544 //ZZ HReg res = newVRegD(env);
3545 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3546 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3547 //ZZ UInt size = 0;
3548 //ZZ switch(e->Iex.Binop.op) {
3549 //ZZ case Iop_PwMax8Ux8: size = 0; break;
3550 //ZZ case Iop_PwMax16Ux4: size = 1; break;
3551 //ZZ case Iop_PwMax32Ux2: size = 2; break;
3552 //ZZ default: vassert(0);
3553 //ZZ }
3554 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
3555 //ZZ res, argL, argR, size, False));
3556 //ZZ return res;
3557 //ZZ }
3558 //ZZ case Iop_PwMax8Sx8:
3559 //ZZ case Iop_PwMax16Sx4:
3560 //ZZ case Iop_PwMax32Sx2: {
3561 //ZZ HReg res = newVRegD(env);
3562 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3563 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3564 //ZZ UInt size = 0;
3565 //ZZ switch(e->Iex.Binop.op) {
3566 //ZZ case Iop_PwMax8Sx8: size = 0; break;
3567 //ZZ case Iop_PwMax16Sx4: size = 1; break;
3568 //ZZ case Iop_PwMax32Sx2: size = 2; break;
3569 //ZZ default: vassert(0);
3570 //ZZ }
3571 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
3572 //ZZ res, argL, argR, size, False));
3573 //ZZ return res;
3574 //ZZ }
3575 //ZZ case Iop_Perm8x8: {
3576 //ZZ HReg res = newVRegD(env);
3577 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3578 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3579 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
3580 //ZZ res, argL, argR, 0, False));
3581 //ZZ return res;
3582 //ZZ }
3583 //ZZ case Iop_PolynomialMul8x8: {
3584 //ZZ HReg res = newVRegD(env);
3585 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3586 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3587 //ZZ UInt size = 0;
3588 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
3589 //ZZ res, argL, argR, size, False));
3590 //ZZ return res;
3591 //ZZ }
3592 //ZZ case Iop_Max32Fx2: {
3593 //ZZ HReg res = newVRegD(env);
3594 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3595 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3596 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
3597 //ZZ res, argL, argR, 2, False));
3598 //ZZ return res;
3599 //ZZ }
3600 //ZZ case Iop_Min32Fx2: {
3601 //ZZ HReg res = newVRegD(env);
3602 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3603 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3604 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
3605 //ZZ res, argL, argR, 2, False));
3606 //ZZ return res;
3607 //ZZ }
3608 //ZZ case Iop_PwMax32Fx2: {
3609 //ZZ HReg res = newVRegD(env);
3610 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3611 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3612 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
3613 //ZZ res, argL, argR, 2, False));
3614 //ZZ return res;
3615 //ZZ }
3616 //ZZ case Iop_PwMin32Fx2: {
3617 //ZZ HReg res = newVRegD(env);
3618 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3619 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3620 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
3621 //ZZ res, argL, argR, 2, False));
3622 //ZZ return res;
3623 //ZZ }
3624 //ZZ case Iop_CmpGT32Fx2: {
3625 //ZZ HReg res = newVRegD(env);
3626 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3627 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3628 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
3629 //ZZ res, argL, argR, 2, False));
3630 //ZZ return res;
3631 //ZZ }
3632 //ZZ case Iop_CmpGE32Fx2: {
3633 //ZZ HReg res = newVRegD(env);
3634 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3635 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3636 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3637 //ZZ res, argL, argR, 2, False));
3638 //ZZ return res;
3639 //ZZ }
3640 //ZZ case Iop_CmpEQ32Fx2: {
3641 //ZZ HReg res = newVRegD(env);
3642 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3643 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3644 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3645 //ZZ res, argL, argR, 2, False));
3646 //ZZ return res;
3647 //ZZ }
3648 //ZZ case Iop_F32ToFixed32Ux2_RZ:
3649 //ZZ case Iop_F32ToFixed32Sx2_RZ:
3650 //ZZ case Iop_Fixed32UToF32x2_RN:
3651 //ZZ case Iop_Fixed32SToF32x2_RN: {
3652 //ZZ HReg res = newVRegD(env);
3653 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3654 //ZZ ARMNeonUnOp op;
3655 //ZZ UInt imm6;
3656 //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3657 //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3658 //ZZ vpanic("ARM supports FP <-> Fixed conversion with constant "
3659 //ZZ "second argument less than 33 only\n");
3660 //ZZ }
3661 //ZZ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3662 //ZZ vassert(imm6 <= 32 && imm6 > 0);
3663 //ZZ imm6 = 64 - imm6;
3664 //ZZ switch(e->Iex.Binop.op) {
3665 //ZZ case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3666 //ZZ case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3667 //ZZ case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3668 //ZZ case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3669 //ZZ default: vassert(0);
3670 //ZZ }
3671 //ZZ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3672 //ZZ return res;
3673 //ZZ }
3674 //ZZ /*
3675 //ZZ FIXME: is this here or not?
3676 //ZZ case Iop_VDup8x8:
3677 //ZZ case Iop_VDup16x4:
3678 //ZZ case Iop_VDup32x2: {
3679 //ZZ HReg res = newVRegD(env);
3680 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3681 //ZZ UInt index;
3682 //ZZ UInt imm4;
3683 //ZZ UInt size = 0;
3684 //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3685 //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3686 //ZZ vpanic("ARM supports Iop_VDup with constant "
3687 //ZZ "second argument less than 16 only\n");
3688 //ZZ }
3689 //ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3690 //ZZ switch(e->Iex.Binop.op) {
3691 //ZZ case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3692 //ZZ case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3693 //ZZ case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3694 //ZZ default: vassert(0);
3695 //ZZ }
3696 //ZZ if (imm4 >= 16) {
3697 //ZZ vpanic("ARM supports Iop_VDup with constant "
3698 //ZZ "second argument less than 16 only\n");
3699 //ZZ }
3700 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3701 //ZZ res, argL, imm4, False));
3702 //ZZ return res;
3703 //ZZ }
3704 //ZZ */
3705 //ZZ default:
3706 //ZZ break;
3707 //ZZ }
3708 //ZZ }
3709 //ZZ
3710 //ZZ /* --------- UNARY ops --------- */
3711 //ZZ if (e->tag == Iex_Unop) {
3712 //ZZ switch (e->Iex.Unop.op) {
3713 //ZZ
3714 //ZZ /* 32Uto64 */
3715 //ZZ case Iop_32Uto64: {
3716 //ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3717 //ZZ HReg rHi = newVRegI(env);
3718 //ZZ HReg res = newVRegD(env);
3719 //ZZ addInstr(env, ARMInstr_Imm32(rHi, 0));
3720 //ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3721 //ZZ return res;
3722 //ZZ }
3723 //ZZ
3724 //ZZ /* 32Sto64 */
3725 //ZZ case Iop_32Sto64: {
3726 //ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3727 //ZZ HReg rHi = newVRegI(env);
3728 //ZZ addInstr(env, mk_iMOVds_RR(rHi, rLo));
3729 //ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31)));
3730 //ZZ HReg res = newVRegD(env);
3731 //ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3732 //ZZ return res;
3733 //ZZ }
3734 //ZZ
3735 //ZZ /* The next 3 are pass-throughs */
3736 //ZZ /* ReinterpF64asI64 */
3737 //ZZ case Iop_ReinterpF64asI64:
3738 //ZZ /* Left64(e) */
3739 //ZZ case Iop_Left64:
3740 //ZZ /* CmpwNEZ64(e) */
3741 //ZZ case Iop_1Sto64: {
3742 //ZZ HReg rLo, rHi;
3743 //ZZ HReg res = newVRegD(env);
3744 //ZZ iselInt64Expr(&rHi, &rLo, env, e);
3745 //ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3746 //ZZ return res;
3747 //ZZ }
3748 //ZZ
3749 //ZZ case Iop_Not64: {
3750 //ZZ DECLARE_PATTERN(p_veqz_8x8);
3751 //ZZ DECLARE_PATTERN(p_veqz_16x4);
3752 //ZZ DECLARE_PATTERN(p_veqz_32x2);
3753 //ZZ DECLARE_PATTERN(p_vcge_8sx8);
3754 //ZZ DECLARE_PATTERN(p_vcge_16sx4);
3755 //ZZ DECLARE_PATTERN(p_vcge_32sx2);
3756 //ZZ DECLARE_PATTERN(p_vcge_8ux8);
3757 //ZZ DECLARE_PATTERN(p_vcge_16ux4);
3758 //ZZ DECLARE_PATTERN(p_vcge_32ux2);
3759 //ZZ DEFINE_PATTERN(p_veqz_8x8,
3760 //ZZ unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3761 //ZZ DEFINE_PATTERN(p_veqz_16x4,
3762 //ZZ unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3763 //ZZ DEFINE_PATTERN(p_veqz_32x2,
3764 //ZZ unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3765 //ZZ DEFINE_PATTERN(p_vcge_8sx8,
3766 //ZZ unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3767 //ZZ DEFINE_PATTERN(p_vcge_16sx4,
3768 //ZZ unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3769 //ZZ DEFINE_PATTERN(p_vcge_32sx2,
3770 //ZZ unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3771 //ZZ DEFINE_PATTERN(p_vcge_8ux8,
3772 //ZZ unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3773 //ZZ DEFINE_PATTERN(p_vcge_16ux4,
3774 //ZZ unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3775 //ZZ DEFINE_PATTERN(p_vcge_32ux2,
3776 //ZZ unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3777 //ZZ if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3778 //ZZ HReg res = newVRegD(env);
3779 //ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3780 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3781 //ZZ return res;
3782 //ZZ } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3783 //ZZ HReg res = newVRegD(env);
3784 //ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3785 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3786 //ZZ return res;
3787 //ZZ } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3788 //ZZ HReg res = newVRegD(env);
3789 //ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3790 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3791 //ZZ return res;
3792 //ZZ } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3793 //ZZ HReg res = newVRegD(env);
3794 //ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3795 //ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3796 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3797 //ZZ res, argL, argR, 0, False));
3798 //ZZ return res;
3799 //ZZ } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3800 //ZZ HReg res = newVRegD(env);
3801 //ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3802 //ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3803 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3804 //ZZ res, argL, argR, 1, False));
3805 //ZZ return res;
3806 //ZZ } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3807 //ZZ HReg res = newVRegD(env);
3808 //ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3809 //ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3810 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3811 //ZZ res, argL, argR, 2, False));
3812 //ZZ return res;
3813 //ZZ } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3814 //ZZ HReg res = newVRegD(env);
3815 //ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3816 //ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3817 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3818 //ZZ res, argL, argR, 0, False));
3819 //ZZ return res;
3820 //ZZ } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3821 //ZZ HReg res = newVRegD(env);
3822 //ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3823 //ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3824 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3825 //ZZ res, argL, argR, 1, False));
3826 //ZZ return res;
3827 //ZZ } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3828 //ZZ HReg res = newVRegD(env);
3829 //ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3830 //ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3831 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3832 //ZZ res, argL, argR, 2, False));
3833 //ZZ return res;
3834 //ZZ } else {
3835 //ZZ HReg res = newVRegD(env);
3836 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3837 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3838 //ZZ return res;
3839 //ZZ }
3840 //ZZ }
3841 //ZZ case Iop_Dup8x8:
3842 //ZZ case Iop_Dup16x4:
3843 //ZZ case Iop_Dup32x2: {
3844 //ZZ HReg res, arg;
3845 //ZZ UInt size;
3846 //ZZ DECLARE_PATTERN(p_vdup_8x8);
3847 //ZZ DECLARE_PATTERN(p_vdup_16x4);
3848 //ZZ DECLARE_PATTERN(p_vdup_32x2);
3849 //ZZ DEFINE_PATTERN(p_vdup_8x8,
3850 //ZZ unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3851 //ZZ DEFINE_PATTERN(p_vdup_16x4,
3852 //ZZ unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3853 //ZZ DEFINE_PATTERN(p_vdup_32x2,
3854 //ZZ unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3855 //ZZ if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3856 //ZZ UInt index;
3857 //ZZ UInt imm4;
3858 //ZZ if (mi.bindee[1]->tag == Iex_Const &&
3859 //ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3860 //ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3861 //ZZ imm4 = (index << 1) + 1;
3862 //ZZ if (index < 8) {
3863 //ZZ res = newVRegD(env);
3864 //ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
3865 //ZZ addInstr(env, ARMInstr_NUnaryS(
3866 //ZZ ARMneon_VDUP,
3867 //ZZ mkARMNRS(ARMNRS_Reg, res, 0),
3868 //ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
3869 //ZZ imm4, False
3870 //ZZ ));
3871 //ZZ return res;
3872 //ZZ }
3873 //ZZ }
3874 //ZZ } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3875 //ZZ UInt index;
3876 //ZZ UInt imm4;
3877 //ZZ if (mi.bindee[1]->tag == Iex_Const &&
3878 //ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3879 //ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3880 //ZZ imm4 = (index << 2) + 2;
3881 //ZZ if (index < 4) {
3882 //ZZ res = newVRegD(env);
3883 //ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
3884 //ZZ addInstr(env, ARMInstr_NUnaryS(
3885 //ZZ ARMneon_VDUP,
3886 //ZZ mkARMNRS(ARMNRS_Reg, res, 0),
3887 //ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
3888 //ZZ imm4, False
3889 //ZZ ));
3890 //ZZ return res;
3891 //ZZ }
3892 //ZZ }
3893 //ZZ } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3894 //ZZ UInt index;
3895 //ZZ UInt imm4;
3896 //ZZ if (mi.bindee[1]->tag == Iex_Const &&
3897 //ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3898 //ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3899 //ZZ imm4 = (index << 3) + 4;
3900 //ZZ if (index < 2) {
3901 //ZZ res = newVRegD(env);
3902 //ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
3903 //ZZ addInstr(env, ARMInstr_NUnaryS(
3904 //ZZ ARMneon_VDUP,
3905 //ZZ mkARMNRS(ARMNRS_Reg, res, 0),
3906 //ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
3907 //ZZ imm4, False
3908 //ZZ ));
3909 //ZZ return res;
3910 //ZZ }
3911 //ZZ }
3912 //ZZ }
3913 //ZZ arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3914 //ZZ res = newVRegD(env);
3915 //ZZ switch (e->Iex.Unop.op) {
3916 //ZZ case Iop_Dup8x8: size = 0; break;
3917 //ZZ case Iop_Dup16x4: size = 1; break;
3918 //ZZ case Iop_Dup32x2: size = 2; break;
3919 //ZZ default: vassert(0);
3920 //ZZ }
3921 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3922 //ZZ return res;
3923 //ZZ }
3924 //ZZ case Iop_Abs8x8:
3925 //ZZ case Iop_Abs16x4:
3926 //ZZ case Iop_Abs32x2: {
3927 //ZZ HReg res = newVRegD(env);
3928 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3929 //ZZ UInt size = 0;
3930 //ZZ switch(e->Iex.Binop.op) {
3931 //ZZ case Iop_Abs8x8: size = 0; break;
3932 //ZZ case Iop_Abs16x4: size = 1; break;
3933 //ZZ case Iop_Abs32x2: size = 2; break;
3934 //ZZ default: vassert(0);
3935 //ZZ }
3936 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3937 //ZZ return res;
3938 //ZZ }
3939 //ZZ case Iop_Reverse64_8x8:
3940 //ZZ case Iop_Reverse64_16x4:
3941 //ZZ case Iop_Reverse64_32x2: {
3942 //ZZ HReg res = newVRegD(env);
3943 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3944 //ZZ UInt size = 0;
3945 //ZZ switch(e->Iex.Binop.op) {
3946 //ZZ case Iop_Reverse64_8x8: size = 0; break;
3947 //ZZ case Iop_Reverse64_16x4: size = 1; break;
3948 //ZZ case Iop_Reverse64_32x2: size = 2; break;
3949 //ZZ default: vassert(0);
3950 //ZZ }
3951 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3952 //ZZ res, arg, size, False));
3953 //ZZ return res;
3954 //ZZ }
3955 //ZZ case Iop_Reverse32_8x8:
3956 //ZZ case Iop_Reverse32_16x4: {
3957 //ZZ HReg res = newVRegD(env);
3958 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3959 //ZZ UInt size = 0;
3960 //ZZ switch(e->Iex.Binop.op) {
3961 //ZZ case Iop_Reverse32_8x8: size = 0; break;
3962 //ZZ case Iop_Reverse32_16x4: size = 1; break;
3963 //ZZ default: vassert(0);
3964 //ZZ }
3965 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3966 //ZZ res, arg, size, False));
3967 //ZZ return res;
3968 //ZZ }
3969 //ZZ case Iop_Reverse16_8x8: {
3970 //ZZ HReg res = newVRegD(env);
3971 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3972 //ZZ UInt size = 0;
3973 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3974 //ZZ res, arg, size, False));
3975 //ZZ return res;
3976 //ZZ }
3977 //ZZ case Iop_CmpwNEZ64: {
3978 //ZZ HReg x_lsh = newVRegD(env);
3979 //ZZ HReg x_rsh = newVRegD(env);
3980 //ZZ HReg lsh_amt = newVRegD(env);
3981 //ZZ HReg rsh_amt = newVRegD(env);
3982 //ZZ HReg zero = newVRegD(env);
3983 //ZZ HReg tmp = newVRegD(env);
3984 //ZZ HReg tmp2 = newVRegD(env);
3985 //ZZ HReg res = newVRegD(env);
3986 //ZZ HReg x = newVRegD(env);
3987 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3988 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3989 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3990 //ZZ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3991 //ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3992 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3993 //ZZ rsh_amt, zero, lsh_amt, 2, False));
3994 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3995 //ZZ x_lsh, x, lsh_amt, 3, False));
3996 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3997 //ZZ x_rsh, x, rsh_amt, 3, False));
3998 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3999 //ZZ tmp, x_lsh, x_rsh, 0, False));
4000 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4001 //ZZ res, tmp, x, 0, False));
4002 //ZZ return res;
4003 //ZZ }
4004 //ZZ case Iop_CmpNEZ8x8:
4005 //ZZ case Iop_CmpNEZ16x4:
4006 //ZZ case Iop_CmpNEZ32x2: {
4007 //ZZ HReg res = newVRegD(env);
4008 //ZZ HReg tmp = newVRegD(env);
4009 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4010 //ZZ UInt size;
4011 //ZZ switch (e->Iex.Unop.op) {
4012 //ZZ case Iop_CmpNEZ8x8: size = 0; break;
4013 //ZZ case Iop_CmpNEZ16x4: size = 1; break;
4014 //ZZ case Iop_CmpNEZ32x2: size = 2; break;
4015 //ZZ default: vassert(0);
4016 //ZZ }
4017 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
4018 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
4019 //ZZ return res;
4020 //ZZ }
4021 //ZZ case Iop_NarrowUn16to8x8:
4022 //ZZ case Iop_NarrowUn32to16x4:
4023 //ZZ case Iop_NarrowUn64to32x2: {
4024 //ZZ HReg res = newVRegD(env);
4025 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4026 //ZZ UInt size = 0;
4027 //ZZ switch(e->Iex.Binop.op) {
4028 //ZZ case Iop_NarrowUn16to8x8: size = 0; break;
4029 //ZZ case Iop_NarrowUn32to16x4: size = 1; break;
4030 //ZZ case Iop_NarrowUn64to32x2: size = 2; break;
4031 //ZZ default: vassert(0);
4032 //ZZ }
4033 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
4034 //ZZ res, arg, size, False));
4035 //ZZ return res;
4036 //ZZ }
4037 //ZZ case Iop_QNarrowUn16Sto8Sx8:
4038 //ZZ case Iop_QNarrowUn32Sto16Sx4:
4039 //ZZ case Iop_QNarrowUn64Sto32Sx2: {
4040 //ZZ HReg res = newVRegD(env);
4041 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4042 //ZZ UInt size = 0;
4043 //ZZ switch(e->Iex.Binop.op) {
4044 //ZZ case Iop_QNarrowUn16Sto8Sx8: size = 0; break;
4045 //ZZ case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
4046 //ZZ case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
4047 //ZZ default: vassert(0);
4048 //ZZ }
4049 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
4050 //ZZ res, arg, size, False));
4051 //ZZ return res;
4052 //ZZ }
4053 //ZZ case Iop_QNarrowUn16Sto8Ux8:
4054 //ZZ case Iop_QNarrowUn32Sto16Ux4:
4055 //ZZ case Iop_QNarrowUn64Sto32Ux2: {
4056 //ZZ HReg res = newVRegD(env);
4057 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4058 //ZZ UInt size = 0;
4059 //ZZ switch(e->Iex.Binop.op) {
4060 //ZZ case Iop_QNarrowUn16Sto8Ux8: size = 0; break;
4061 //ZZ case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
4062 //ZZ case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
4063 //ZZ default: vassert(0);
4064 //ZZ }
4065 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
4066 //ZZ res, arg, size, False));
4067 //ZZ return res;
4068 //ZZ }
4069 //ZZ case Iop_QNarrowUn16Uto8Ux8:
4070 //ZZ case Iop_QNarrowUn32Uto16Ux4:
4071 //ZZ case Iop_QNarrowUn64Uto32Ux2: {
4072 //ZZ HReg res = newVRegD(env);
4073 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4074 //ZZ UInt size = 0;
4075 //ZZ switch(e->Iex.Binop.op) {
4076 //ZZ case Iop_QNarrowUn16Uto8Ux8: size = 0; break;
4077 //ZZ case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
4078 //ZZ case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
4079 //ZZ default: vassert(0);
4080 //ZZ }
4081 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
4082 //ZZ res, arg, size, False));
4083 //ZZ return res;
4084 //ZZ }
4085 //ZZ case Iop_PwAddL8Sx8:
4086 //ZZ case Iop_PwAddL16Sx4:
4087 //ZZ case Iop_PwAddL32Sx2: {
4088 //ZZ HReg res = newVRegD(env);
4089 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4090 //ZZ UInt size = 0;
4091 //ZZ switch(e->Iex.Binop.op) {
4092 //ZZ case Iop_PwAddL8Sx8: size = 0; break;
4093 //ZZ case Iop_PwAddL16Sx4: size = 1; break;
4094 //ZZ case Iop_PwAddL32Sx2: size = 2; break;
4095 //ZZ default: vassert(0);
4096 //ZZ }
4097 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4098 //ZZ res, arg, size, False));
4099 //ZZ return res;
4100 //ZZ }
4101 //ZZ case Iop_PwAddL8Ux8:
4102 //ZZ case Iop_PwAddL16Ux4:
4103 //ZZ case Iop_PwAddL32Ux2: {
4104 //ZZ HReg res = newVRegD(env);
4105 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4106 //ZZ UInt size = 0;
4107 //ZZ switch(e->Iex.Binop.op) {
4108 //ZZ case Iop_PwAddL8Ux8: size = 0; break;
4109 //ZZ case Iop_PwAddL16Ux4: size = 1; break;
4110 //ZZ case Iop_PwAddL32Ux2: size = 2; break;
4111 //ZZ default: vassert(0);
4112 //ZZ }
4113 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4114 //ZZ res, arg, size, False));
4115 //ZZ return res;
4116 //ZZ }
4117 //ZZ case Iop_Cnt8x8: {
4118 //ZZ HReg res = newVRegD(env);
4119 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4120 //ZZ UInt size = 0;
4121 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
4122 //ZZ res, arg, size, False));
4123 //ZZ return res;
4124 //ZZ }
4125 //ZZ case Iop_Clz8Sx8:
4126 //ZZ case Iop_Clz16Sx4:
4127 //ZZ case Iop_Clz32Sx2: {
4128 //ZZ HReg res = newVRegD(env);
4129 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4130 //ZZ UInt size = 0;
4131 //ZZ switch(e->Iex.Binop.op) {
4132 //ZZ case Iop_Clz8Sx8: size = 0; break;
4133 //ZZ case Iop_Clz16Sx4: size = 1; break;
4134 //ZZ case Iop_Clz32Sx2: size = 2; break;
4135 //ZZ default: vassert(0);
4136 //ZZ }
4137 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
4138 //ZZ res, arg, size, False));
4139 //ZZ return res;
4140 //ZZ }
4141 //ZZ case Iop_Cls8Sx8:
4142 //ZZ case Iop_Cls16Sx4:
4143 //ZZ case Iop_Cls32Sx2: {
4144 //ZZ HReg res = newVRegD(env);
4145 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4146 //ZZ UInt size = 0;
4147 //ZZ switch(e->Iex.Binop.op) {
4148 //ZZ case Iop_Cls8Sx8: size = 0; break;
4149 //ZZ case Iop_Cls16Sx4: size = 1; break;
4150 //ZZ case Iop_Cls32Sx2: size = 2; break;
4151 //ZZ default: vassert(0);
4152 //ZZ }
4153 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
4154 //ZZ res, arg, size, False));
4155 //ZZ return res;
4156 //ZZ }
4157 //ZZ case Iop_FtoI32Sx2_RZ: {
4158 //ZZ HReg res = newVRegD(env);
4159 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4160 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4161 //ZZ res, arg, 2, False));
4162 //ZZ return res;
4163 //ZZ }
4164 //ZZ case Iop_FtoI32Ux2_RZ: {
4165 //ZZ HReg res = newVRegD(env);
4166 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4167 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4168 //ZZ res, arg, 2, False));
4169 //ZZ return res;
4170 //ZZ }
4171 //ZZ case Iop_I32StoFx2: {
4172 //ZZ HReg res = newVRegD(env);
4173 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4174 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4175 //ZZ res, arg, 2, False));
4176 //ZZ return res;
4177 //ZZ }
4178 //ZZ case Iop_I32UtoFx2: {
4179 //ZZ HReg res = newVRegD(env);
4180 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4181 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4182 //ZZ res, arg, 2, False));
4183 //ZZ return res;
4184 //ZZ }
4185 //ZZ case Iop_F32toF16x4: {
4186 //ZZ HReg res = newVRegD(env);
4187 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4188 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
4189 //ZZ res, arg, 2, False));
4190 //ZZ return res;
4191 //ZZ }
4192 //ZZ case Iop_Recip32Fx2: {
4193 //ZZ HReg res = newVRegD(env);
4194 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
4195 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4196 //ZZ res, argL, 0, False));
4197 //ZZ return res;
4198 //ZZ }
4199 //ZZ case Iop_Recip32x2: {
4200 //ZZ HReg res = newVRegD(env);
4201 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
4202 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4203 //ZZ res, argL, 0, False));
4204 //ZZ return res;
4205 //ZZ }
4206 //ZZ case Iop_Abs32Fx2: {
4207 //ZZ DECLARE_PATTERN(p_vabd_32fx2);
4208 //ZZ DEFINE_PATTERN(p_vabd_32fx2,
4209 //ZZ unop(Iop_Abs32Fx2,
4210 //ZZ binop(Iop_Sub32Fx2,
4211 //ZZ bind(0),
4212 //ZZ bind(1))));
4213 //ZZ if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
4214 //ZZ HReg res = newVRegD(env);
4215 //ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
4216 //ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
4217 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
4218 //ZZ res, argL, argR, 0, False));
4219 //ZZ return res;
4220 //ZZ } else {
4221 //ZZ HReg res = newVRegD(env);
4222 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4223 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4224 //ZZ res, arg, 0, False));
4225 //ZZ return res;
4226 //ZZ }
4227 //ZZ }
4228 //ZZ case Iop_Rsqrte32Fx2: {
4229 //ZZ HReg res = newVRegD(env);
4230 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4231 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4232 //ZZ res, arg, 0, False));
4233 //ZZ return res;
4234 //ZZ }
4235 //ZZ case Iop_Rsqrte32x2: {
4236 //ZZ HReg res = newVRegD(env);
4237 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4238 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4239 //ZZ res, arg, 0, False));
4240 //ZZ return res;
4241 //ZZ }
4242 //ZZ case Iop_Neg32Fx2: {
4243 //ZZ HReg res = newVRegD(env);
4244 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4245 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4246 //ZZ res, arg, 0, False));
4247 //ZZ return res;
4248 //ZZ }
4249 //ZZ default:
4250 //ZZ break;
4251 //ZZ }
4252 //ZZ } /* if (e->tag == Iex_Unop) */
4253 //ZZ
4254 //ZZ if (e->tag == Iex_Triop) {
4255 //ZZ IRTriop *triop = e->Iex.Triop.details;
4256 //ZZ
4257 //ZZ switch (triop->op) {
4258 //ZZ case Iop_Extract64: {
4259 //ZZ HReg res = newVRegD(env);
4260 //ZZ HReg argL = iselNeon64Expr(env, triop->arg1);
4261 //ZZ HReg argR = iselNeon64Expr(env, triop->arg2);
4262 //ZZ UInt imm4;
4263 //ZZ if (triop->arg3->tag != Iex_Const ||
4264 //ZZ typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
4265 //ZZ vpanic("ARM target supports Iop_Extract64 with constant "
4266 //ZZ "third argument less than 16 only\n");
4267 //ZZ }
4268 //ZZ imm4 = triop->arg3->Iex.Const.con->Ico.U8;
4269 //ZZ if (imm4 >= 8) {
4270 //ZZ vpanic("ARM target supports Iop_Extract64 with constant "
4271 //ZZ "third argument less than 16 only\n");
4272 //ZZ }
4273 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
4274 //ZZ res, argL, argR, imm4, False));
4275 //ZZ return res;
4276 //ZZ }
4277 //ZZ case Iop_SetElem8x8:
4278 //ZZ case Iop_SetElem16x4:
4279 //ZZ case Iop_SetElem32x2: {
4280 //ZZ HReg res = newVRegD(env);
4281 //ZZ HReg dreg = iselNeon64Expr(env, triop->arg1);
4282 //ZZ HReg arg = iselIntExpr_R(env, triop->arg3);
4283 //ZZ UInt index, size;
4284 //ZZ if (triop->arg2->tag != Iex_Const ||
4285 //ZZ typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
4286 //ZZ vpanic("ARM target supports SetElem with constant "
4287 //ZZ "second argument only\n");
4288 //ZZ }
4289 //ZZ index = triop->arg2->Iex.Const.con->Ico.U8;
4290 //ZZ switch (triop->op) {
4291 //ZZ case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
4292 //ZZ case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
4293 //ZZ case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
4294 //ZZ default: vassert(0);
4295 //ZZ }
4296 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
4297 //ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
4298 //ZZ mkARMNRS(ARMNRS_Scalar, res, index),
4299 //ZZ mkARMNRS(ARMNRS_Reg, arg, 0),
4300 //ZZ size, False));
4301 //ZZ return res;
4302 //ZZ }
4303 //ZZ default:
4304 //ZZ break;
4305 //ZZ }
4306 //ZZ }
4307 //ZZ
4308 //ZZ /* --------- MULTIPLEX --------- */
4309 //ZZ if (e->tag == Iex_ITE) { // VFD
4310 //ZZ HReg rLo, rHi;
4311 //ZZ HReg res = newVRegD(env);
4312 //ZZ iselInt64Expr(&rHi, &rLo, env, e);
4313 //ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
4314 //ZZ return res;
4315 //ZZ }
4316 //ZZ
4317 //ZZ ppIRExpr(e);
4318 //ZZ vpanic("iselNeon64Expr");
4319 //ZZ }
4320
4321
4322 /*---------------------------------------------------------*/
4323 /*--- ISEL: Vector (NEON) expressions (128 bit) ---*/
4324 /*---------------------------------------------------------*/
4325
iselV128Expr(ISelEnv * env,IRExpr * e)4326 static HReg iselV128Expr ( ISelEnv* env, IRExpr* e )
4327 {
4328 HReg r = iselV128Expr_wrk( env, e );
4329 vassert(hregClass(r) == HRcVec128);
4330 vassert(hregIsVirtual(r));
4331 return r;
4332 }
4333
4334 /* DO NOT CALL THIS DIRECTLY */
iselV128Expr_wrk(ISelEnv * env,IRExpr * e)4335 static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
4336 {
4337 IRType ty = typeOfIRExpr(env->type_env, e);
4338 vassert(e);
4339 vassert(ty == Ity_V128);
4340
4341 if (e->tag == Iex_RdTmp) {
4342 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4343 }
4344
4345 if (e->tag == Iex_Const) {
4346 /* Only a very limited range of constants is handled. */
4347 vassert(e->Iex.Const.con->tag == Ico_V128);
4348 UShort con = e->Iex.Const.con->Ico.V128;
4349 if (con == 0x0000) {
4350 HReg res = newVRegV(env);
4351 addInstr(env, ARM64Instr_VImmQ(res, con));
4352 return res;
4353 }
4354 /* Unhandled */
4355 goto v128_expr_bad;
4356 }
4357
4358 if (e->tag == Iex_Load) {
4359 HReg res = newVRegV(env);
4360 HReg rN = iselIntExpr_R(env, e->Iex.Load.addr);
4361 vassert(ty == Ity_V128);
4362 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN));
4363 return res;
4364 }
4365
4366 if (e->tag == Iex_Get) {
4367 UInt offs = (UInt)e->Iex.Get.offset;
4368 if (offs < (1<<12)) {
4369 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
4370 HReg res = newVRegV(env);
4371 vassert(ty == Ity_V128);
4372 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr));
4373 return res;
4374 }
4375 goto v128_expr_bad;
4376 }
4377
4378 if (e->tag == Iex_Unop) {
4379
4380 /* Iop_ZeroHIXXofV128 cases */
4381 UShort imm16 = 0;
4382 switch (e->Iex.Unop.op) {
4383 case Iop_ZeroHI64ofV128: imm16 = 0x00FF; break;
4384 case Iop_ZeroHI96ofV128: imm16 = 0x000F; break;
4385 case Iop_ZeroHI112ofV128: imm16 = 0x0003; break;
4386 case Iop_ZeroHI120ofV128: imm16 = 0x0001; break;
4387 default: break;
4388 }
4389 if (imm16 != 0) {
4390 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
4391 HReg imm = newVRegV(env);
4392 HReg res = newVRegV(env);
4393 addInstr(env, ARM64Instr_VImmQ(imm, imm16));
4394 addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm));
4395 return res;
4396 }
4397
4398 /* Other cases */
4399 switch (e->Iex.Unop.op) {
4400 case Iop_Cnt8x16:
4401 case Iop_NotV128:
4402 case Iop_AddLV8Ux16:
4403 case Iop_AddLV16Ux8:
4404 case Iop_AddLV32Ux4:
4405 case Iop_AddLV8Sx16:
4406 case Iop_AddLV16Sx8:
4407 case Iop_AddLV32Sx4:
4408 case Iop_Abs64Fx2:
4409 case Iop_Abs32Fx4:
4410 case Iop_Neg64Fx2:
4411 case Iop_Neg32Fx4: {
4412 HReg res = newVRegV(env);
4413 HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
4414 ARM64VecUnaryOp op = ARM64vecu_INVALID;
4415 switch (e->Iex.Unop.op) {
4416 case Iop_NotV128: op = ARM64vecu_NOT; break;
4417 case Iop_Cnt8x16: op = ARM64vecu_CNT; break;
4418 case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break;
4419 case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break;
4420 case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break;
4421 case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break;
4422 case Iop_AddLV8Ux16: op = ARM64vecu_UADDLV8x16; break;
4423 case Iop_AddLV16Ux8: op = ARM64vecu_UADDLV16x8; break;
4424 case Iop_AddLV32Ux4: op = ARM64vecu_UADDLV32x4; break;
4425 case Iop_AddLV8Sx16: op = ARM64vecu_SADDLV8x16; break;
4426 case Iop_AddLV16Sx8: op = ARM64vecu_SADDLV16x8; break;
4427 case Iop_AddLV32Sx4: op = ARM64vecu_SADDLV32x4; break;
4428 default: vassert(0);
4429 }
4430 addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
4431 return res;
4432 }
4433 case Iop_CmpNEZ8x16:
4434 case Iop_CmpNEZ16x8:
4435 case Iop_CmpNEZ32x4:
4436 case Iop_CmpNEZ64x2: {
4437 HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
4438 HReg zero = newVRegV(env);
4439 HReg res = newVRegV(env);
4440 ARM64VecBinOp cmp = ARM64vecb_INVALID;
4441 switch (e->Iex.Unop.op) {
4442 case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break;
4443 case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break;
4444 case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break;
4445 case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break;
4446 default: vassert(0);
4447 }
4448 // This is pretty feeble. Better: use CMP against zero
4449 // and avoid the extra instruction and extra register.
4450 addInstr(env, ARM64Instr_VImmQ(zero, 0x0000));
4451 addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero));
4452 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
4453 return res;
4454 }
4455
4456 case Iop_Widen8Uto16x8:
4457 case Iop_Widen16Uto32x4:
4458 case Iop_Widen32Uto64x2:
4459 case Iop_Widen8Sto16x8:
4460 case Iop_Widen16Sto32x4:
4461 case Iop_Widen32Sto64x2: {
4462 HReg res = newVRegV(env);
4463 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
4464 ARM64VecUnaryOp wop = ARM64vecu_INVALID;
4465 switch (e->Iex.Unop.op) {
4466 case Iop_Widen8Uto16x8: wop = ARM64vecu_VMOVL8U; break;
4467 case Iop_Widen16Uto32x4: wop = ARM64vecu_VMOVL16U; break;
4468 case Iop_Widen32Uto64x2: wop = ARM64vecu_VMOVL32U; break;
4469 case Iop_Widen8Sto16x8: wop = ARM64vecu_VMOVL8S; break;
4470 case Iop_Widen16Sto32x4: wop = ARM64vecu_VMOVL16S; break;
4471 case Iop_Widen32Sto64x2: wop = ARM64vecu_VMOVL32S; break;
4472 default: vassert(0);
4473 }
4474 addInstr(env, ARM64Instr_VUnaryV(wop, res, arg));
4475 return res;
4476 }
4477 //ZZ case Iop_NotV128: {
4478 //ZZ DECLARE_PATTERN(p_veqz_8x16);
4479 //ZZ DECLARE_PATTERN(p_veqz_16x8);
4480 //ZZ DECLARE_PATTERN(p_veqz_32x4);
4481 //ZZ DECLARE_PATTERN(p_vcge_8sx16);
4482 //ZZ DECLARE_PATTERN(p_vcge_16sx8);
4483 //ZZ DECLARE_PATTERN(p_vcge_32sx4);
4484 //ZZ DECLARE_PATTERN(p_vcge_8ux16);
4485 //ZZ DECLARE_PATTERN(p_vcge_16ux8);
4486 //ZZ DECLARE_PATTERN(p_vcge_32ux4);
4487 //ZZ DEFINE_PATTERN(p_veqz_8x16,
4488 //ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
4489 //ZZ DEFINE_PATTERN(p_veqz_16x8,
4490 //ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
4491 //ZZ DEFINE_PATTERN(p_veqz_32x4,
4492 //ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
4493 //ZZ DEFINE_PATTERN(p_vcge_8sx16,
4494 //ZZ unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
4495 //ZZ DEFINE_PATTERN(p_vcge_16sx8,
4496 //ZZ unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
4497 //ZZ DEFINE_PATTERN(p_vcge_32sx4,
4498 //ZZ unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
4499 //ZZ DEFINE_PATTERN(p_vcge_8ux16,
4500 //ZZ unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
4501 //ZZ DEFINE_PATTERN(p_vcge_16ux8,
4502 //ZZ unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
4503 //ZZ DEFINE_PATTERN(p_vcge_32ux4,
4504 //ZZ unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
4505 //ZZ if (matchIRExpr(&mi, p_veqz_8x16, e)) {
4506 //ZZ HReg res = newVRegV(env);
4507 //ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]);
4508 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
4509 //ZZ return res;
4510 //ZZ } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
4511 //ZZ HReg res = newVRegV(env);
4512 //ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]);
4513 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
4514 //ZZ return res;
4515 //ZZ } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
4516 //ZZ HReg res = newVRegV(env);
4517 //ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]);
4518 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
4519 //ZZ return res;
4520 //ZZ } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
4521 //ZZ HReg res = newVRegV(env);
4522 //ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4523 //ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4524 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4525 //ZZ res, argL, argR, 0, True));
4526 //ZZ return res;
4527 //ZZ } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
4528 //ZZ HReg res = newVRegV(env);
4529 //ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4530 //ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4531 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4532 //ZZ res, argL, argR, 1, True));
4533 //ZZ return res;
4534 //ZZ } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
4535 //ZZ HReg res = newVRegV(env);
4536 //ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4537 //ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4538 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4539 //ZZ res, argL, argR, 2, True));
4540 //ZZ return res;
4541 //ZZ } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
4542 //ZZ HReg res = newVRegV(env);
4543 //ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4544 //ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4545 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4546 //ZZ res, argL, argR, 0, True));
4547 //ZZ return res;
4548 //ZZ } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
4549 //ZZ HReg res = newVRegV(env);
4550 //ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4551 //ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4552 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4553 //ZZ res, argL, argR, 1, True));
4554 //ZZ return res;
4555 //ZZ } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
4556 //ZZ HReg res = newVRegV(env);
4557 //ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4558 //ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4559 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4560 //ZZ res, argL, argR, 2, True));
4561 //ZZ return res;
4562 //ZZ } else {
4563 //ZZ HReg res = newVRegV(env);
4564 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4565 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
4566 //ZZ return res;
4567 //ZZ }
4568 //ZZ }
4569 //ZZ case Iop_Dup8x16:
4570 //ZZ case Iop_Dup16x8:
4571 //ZZ case Iop_Dup32x4: {
4572 //ZZ HReg res, arg;
4573 //ZZ UInt size;
4574 //ZZ DECLARE_PATTERN(p_vdup_8x16);
4575 //ZZ DECLARE_PATTERN(p_vdup_16x8);
4576 //ZZ DECLARE_PATTERN(p_vdup_32x4);
4577 //ZZ DEFINE_PATTERN(p_vdup_8x16,
4578 //ZZ unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
4579 //ZZ DEFINE_PATTERN(p_vdup_16x8,
4580 //ZZ unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
4581 //ZZ DEFINE_PATTERN(p_vdup_32x4,
4582 //ZZ unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
4583 //ZZ if (matchIRExpr(&mi, p_vdup_8x16, e)) {
4584 //ZZ UInt index;
4585 //ZZ UInt imm4;
4586 //ZZ if (mi.bindee[1]->tag == Iex_Const &&
4587 //ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4588 //ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4589 //ZZ imm4 = (index << 1) + 1;
4590 //ZZ if (index < 8) {
4591 //ZZ res = newVRegV(env);
4592 //ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
4593 //ZZ addInstr(env, ARMInstr_NUnaryS(
4594 //ZZ ARMneon_VDUP,
4595 //ZZ mkARMNRS(ARMNRS_Reg, res, 0),
4596 //ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
4597 //ZZ imm4, True
4598 //ZZ ));
4599 //ZZ return res;
4600 //ZZ }
4601 //ZZ }
4602 //ZZ } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
4603 //ZZ UInt index;
4604 //ZZ UInt imm4;
4605 //ZZ if (mi.bindee[1]->tag == Iex_Const &&
4606 //ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4607 //ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4608 //ZZ imm4 = (index << 2) + 2;
4609 //ZZ if (index < 4) {
4610 //ZZ res = newVRegV(env);
4611 //ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
4612 //ZZ addInstr(env, ARMInstr_NUnaryS(
4613 //ZZ ARMneon_VDUP,
4614 //ZZ mkARMNRS(ARMNRS_Reg, res, 0),
4615 //ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
4616 //ZZ imm4, True
4617 //ZZ ));
4618 //ZZ return res;
4619 //ZZ }
4620 //ZZ }
4621 //ZZ } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
4622 //ZZ UInt index;
4623 //ZZ UInt imm4;
4624 //ZZ if (mi.bindee[1]->tag == Iex_Const &&
4625 //ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4626 //ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4627 //ZZ imm4 = (index << 3) + 4;
4628 //ZZ if (index < 2) {
4629 //ZZ res = newVRegV(env);
4630 //ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
4631 //ZZ addInstr(env, ARMInstr_NUnaryS(
4632 //ZZ ARMneon_VDUP,
4633 //ZZ mkARMNRS(ARMNRS_Reg, res, 0),
4634 //ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
4635 //ZZ imm4, True
4636 //ZZ ));
4637 //ZZ return res;
4638 //ZZ }
4639 //ZZ }
4640 //ZZ }
4641 //ZZ arg = iselIntExpr_R(env, e->Iex.Unop.arg);
4642 //ZZ res = newVRegV(env);
4643 //ZZ switch (e->Iex.Unop.op) {
4644 //ZZ case Iop_Dup8x16: size = 0; break;
4645 //ZZ case Iop_Dup16x8: size = 1; break;
4646 //ZZ case Iop_Dup32x4: size = 2; break;
4647 //ZZ default: vassert(0);
4648 //ZZ }
4649 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
4650 //ZZ return res;
4651 //ZZ }
4652 //ZZ case Iop_Abs8x16:
4653 //ZZ case Iop_Abs16x8:
4654 //ZZ case Iop_Abs32x4: {
4655 //ZZ HReg res = newVRegV(env);
4656 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4657 //ZZ UInt size = 0;
4658 //ZZ switch(e->Iex.Binop.op) {
4659 //ZZ case Iop_Abs8x16: size = 0; break;
4660 //ZZ case Iop_Abs16x8: size = 1; break;
4661 //ZZ case Iop_Abs32x4: size = 2; break;
4662 //ZZ default: vassert(0);
4663 //ZZ }
4664 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
4665 //ZZ return res;
4666 //ZZ }
4667 //ZZ case Iop_Reverse64_8x16:
4668 //ZZ case Iop_Reverse64_16x8:
4669 //ZZ case Iop_Reverse64_32x4: {
4670 //ZZ HReg res = newVRegV(env);
4671 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4672 //ZZ UInt size = 0;
4673 //ZZ switch(e->Iex.Binop.op) {
4674 //ZZ case Iop_Reverse64_8x16: size = 0; break;
4675 //ZZ case Iop_Reverse64_16x8: size = 1; break;
4676 //ZZ case Iop_Reverse64_32x4: size = 2; break;
4677 //ZZ default: vassert(0);
4678 //ZZ }
4679 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
4680 //ZZ res, arg, size, True));
4681 //ZZ return res;
4682 //ZZ }
4683 //ZZ case Iop_Reverse32_8x16:
4684 //ZZ case Iop_Reverse32_16x8: {
4685 //ZZ HReg res = newVRegV(env);
4686 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4687 //ZZ UInt size = 0;
4688 //ZZ switch(e->Iex.Binop.op) {
4689 //ZZ case Iop_Reverse32_8x16: size = 0; break;
4690 //ZZ case Iop_Reverse32_16x8: size = 1; break;
4691 //ZZ default: vassert(0);
4692 //ZZ }
4693 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
4694 //ZZ res, arg, size, True));
4695 //ZZ return res;
4696 //ZZ }
4697 //ZZ case Iop_Reverse16_8x16: {
4698 //ZZ HReg res = newVRegV(env);
4699 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4700 //ZZ UInt size = 0;
4701 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
4702 //ZZ res, arg, size, True));
4703 //ZZ return res;
4704 //ZZ }
4705 //ZZ case Iop_CmpNEZ64x2: {
4706 //ZZ HReg x_lsh = newVRegV(env);
4707 //ZZ HReg x_rsh = newVRegV(env);
4708 //ZZ HReg lsh_amt = newVRegV(env);
4709 //ZZ HReg rsh_amt = newVRegV(env);
4710 //ZZ HReg zero = newVRegV(env);
4711 //ZZ HReg tmp = newVRegV(env);
4712 //ZZ HReg tmp2 = newVRegV(env);
4713 //ZZ HReg res = newVRegV(env);
4714 //ZZ HReg x = newVRegV(env);
4715 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4716 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
4717 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
4718 //ZZ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
4719 //ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
4720 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4721 //ZZ rsh_amt, zero, lsh_amt, 2, True));
4722 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4723 //ZZ x_lsh, x, lsh_amt, 3, True));
4724 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4725 //ZZ x_rsh, x, rsh_amt, 3, True));
4726 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4727 //ZZ tmp, x_lsh, x_rsh, 0, True));
4728 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4729 //ZZ res, tmp, x, 0, True));
4730 //ZZ return res;
4731 //ZZ }
4732 //ZZ case Iop_Widen8Sto16x8:
4733 //ZZ case Iop_Widen16Sto32x4:
4734 //ZZ case Iop_Widen32Sto64x2: {
4735 //ZZ HReg res = newVRegV(env);
4736 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4737 //ZZ UInt size;
4738 //ZZ switch (e->Iex.Unop.op) {
4739 //ZZ case Iop_Widen8Sto16x8: size = 0; break;
4740 //ZZ case Iop_Widen16Sto32x4: size = 1; break;
4741 //ZZ case Iop_Widen32Sto64x2: size = 2; break;
4742 //ZZ default: vassert(0);
4743 //ZZ }
4744 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4745 //ZZ res, arg, size, True));
4746 //ZZ return res;
4747 //ZZ }
4748 //ZZ case Iop_PwAddL8Sx16:
4749 //ZZ case Iop_PwAddL16Sx8:
4750 //ZZ case Iop_PwAddL32Sx4: {
4751 //ZZ HReg res = newVRegV(env);
4752 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4753 //ZZ UInt size = 0;
4754 //ZZ switch(e->Iex.Binop.op) {
4755 //ZZ case Iop_PwAddL8Sx16: size = 0; break;
4756 //ZZ case Iop_PwAddL16Sx8: size = 1; break;
4757 //ZZ case Iop_PwAddL32Sx4: size = 2; break;
4758 //ZZ default: vassert(0);
4759 //ZZ }
4760 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4761 //ZZ res, arg, size, True));
4762 //ZZ return res;
4763 //ZZ }
4764 //ZZ case Iop_PwAddL8Ux16:
4765 //ZZ case Iop_PwAddL16Ux8:
4766 //ZZ case Iop_PwAddL32Ux4: {
4767 //ZZ HReg res = newVRegV(env);
4768 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4769 //ZZ UInt size = 0;
4770 //ZZ switch(e->Iex.Binop.op) {
4771 //ZZ case Iop_PwAddL8Ux16: size = 0; break;
4772 //ZZ case Iop_PwAddL16Ux8: size = 1; break;
4773 //ZZ case Iop_PwAddL32Ux4: size = 2; break;
4774 //ZZ default: vassert(0);
4775 //ZZ }
4776 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4777 //ZZ res, arg, size, True));
4778 //ZZ return res;
4779 //ZZ }
4780 //ZZ case Iop_Cnt8x16: {
4781 //ZZ HReg res = newVRegV(env);
4782 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4783 //ZZ UInt size = 0;
4784 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4785 //ZZ return res;
4786 //ZZ }
4787 //ZZ case Iop_Clz8Sx16:
4788 //ZZ case Iop_Clz16Sx8:
4789 //ZZ case Iop_Clz32Sx4: {
4790 //ZZ HReg res = newVRegV(env);
4791 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4792 //ZZ UInt size = 0;
4793 //ZZ switch(e->Iex.Binop.op) {
4794 //ZZ case Iop_Clz8Sx16: size = 0; break;
4795 //ZZ case Iop_Clz16Sx8: size = 1; break;
4796 //ZZ case Iop_Clz32Sx4: size = 2; break;
4797 //ZZ default: vassert(0);
4798 //ZZ }
4799 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4800 //ZZ return res;
4801 //ZZ }
4802 //ZZ case Iop_Cls8Sx16:
4803 //ZZ case Iop_Cls16Sx8:
4804 //ZZ case Iop_Cls32Sx4: {
4805 //ZZ HReg res = newVRegV(env);
4806 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4807 //ZZ UInt size = 0;
4808 //ZZ switch(e->Iex.Binop.op) {
4809 //ZZ case Iop_Cls8Sx16: size = 0; break;
4810 //ZZ case Iop_Cls16Sx8: size = 1; break;
4811 //ZZ case Iop_Cls32Sx4: size = 2; break;
4812 //ZZ default: vassert(0);
4813 //ZZ }
4814 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4815 //ZZ return res;
4816 //ZZ }
4817 //ZZ case Iop_FtoI32Sx4_RZ: {
4818 //ZZ HReg res = newVRegV(env);
4819 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4820 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4821 //ZZ res, arg, 2, True));
4822 //ZZ return res;
4823 //ZZ }
4824 //ZZ case Iop_FtoI32Ux4_RZ: {
4825 //ZZ HReg res = newVRegV(env);
4826 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4827 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4828 //ZZ res, arg, 2, True));
4829 //ZZ return res;
4830 //ZZ }
4831 //ZZ case Iop_I32StoFx4: {
4832 //ZZ HReg res = newVRegV(env);
4833 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4834 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4835 //ZZ res, arg, 2, True));
4836 //ZZ return res;
4837 //ZZ }
4838 //ZZ case Iop_I32UtoFx4: {
4839 //ZZ HReg res = newVRegV(env);
4840 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4841 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4842 //ZZ res, arg, 2, True));
4843 //ZZ return res;
4844 //ZZ }
4845 //ZZ case Iop_F16toF32x4: {
4846 //ZZ HReg res = newVRegV(env);
4847 //ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4848 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4849 //ZZ res, arg, 2, True));
4850 //ZZ return res;
4851 //ZZ }
4852 //ZZ case Iop_Recip32Fx4: {
4853 //ZZ HReg res = newVRegV(env);
4854 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4855 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4856 //ZZ res, argL, 0, True));
4857 //ZZ return res;
4858 //ZZ }
4859 //ZZ case Iop_Recip32x4: {
4860 //ZZ HReg res = newVRegV(env);
4861 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4862 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4863 //ZZ res, argL, 0, True));
4864 //ZZ return res;
4865 //ZZ }
4866 //ZZ case Iop_Rsqrte32Fx4: {
4867 //ZZ HReg res = newVRegV(env);
4868 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4869 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4870 //ZZ res, argL, 0, True));
4871 //ZZ return res;
4872 //ZZ }
4873 //ZZ case Iop_Rsqrte32x4: {
4874 //ZZ HReg res = newVRegV(env);
4875 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4876 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4877 //ZZ res, argL, 0, True));
4878 //ZZ return res;
4879 //ZZ }
4880 /* ... */
4881 default:
4882 break;
4883 } /* switch on the unop */
4884 } /* if (e->tag == Iex_Unop) */
4885
4886 if (e->tag == Iex_Binop) {
4887 switch (e->Iex.Binop.op) {
4888 case Iop_64HLtoV128: {
4889 HReg res = newVRegV(env);
4890 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
4891 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4892 addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
4893 return res;
4894 }
4895 //ZZ case Iop_AndV128: {
4896 //ZZ HReg res = newVRegV(env);
4897 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4898 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4899 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4900 //ZZ res, argL, argR, 4, True));
4901 //ZZ return res;
4902 //ZZ }
4903 //ZZ case Iop_OrV128: {
4904 //ZZ HReg res = newVRegV(env);
4905 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4906 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4907 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4908 //ZZ res, argL, argR, 4, True));
4909 //ZZ return res;
4910 //ZZ }
4911 //ZZ case Iop_XorV128: {
4912 //ZZ HReg res = newVRegV(env);
4913 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4914 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4915 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4916 //ZZ res, argL, argR, 4, True));
4917 //ZZ return res;
4918 //ZZ }
4919 //ZZ case Iop_Add8x16:
4920 //ZZ case Iop_Add16x8:
4921 //ZZ case Iop_Add32x4:
4922 case Iop_AndV128:
4923 case Iop_OrV128:
4924 case Iop_XorV128:
4925 case Iop_Max32Ux4:
4926 case Iop_Max16Ux8:
4927 case Iop_Max8Ux16:
4928 case Iop_Min32Ux4:
4929 case Iop_Min16Ux8:
4930 case Iop_Min8Ux16:
4931 case Iop_Max32Sx4:
4932 case Iop_Max16Sx8:
4933 case Iop_Max8Sx16:
4934 case Iop_Min32Sx4:
4935 case Iop_Min16Sx8:
4936 case Iop_Min8Sx16:
4937 case Iop_Add64x2:
4938 case Iop_Add32x4:
4939 case Iop_Add16x8:
4940 case Iop_Add8x16:
4941 case Iop_Sub64x2:
4942 case Iop_Sub32x4:
4943 case Iop_Sub16x8:
4944 case Iop_Sub8x16:
4945 case Iop_Mul32x4:
4946 case Iop_Mul16x8:
4947 case Iop_Mul8x16:
4948 case Iop_CmpEQ64x2:
4949 case Iop_CmpEQ32x4:
4950 case Iop_CmpEQ16x8:
4951 case Iop_CmpEQ8x16:
4952 case Iop_CmpGT64Ux2:
4953 case Iop_CmpGT32Ux4:
4954 case Iop_CmpGT16Ux8:
4955 case Iop_CmpGT8Ux16:
4956 case Iop_CmpGT64Sx2:
4957 case Iop_CmpGT32Sx4:
4958 case Iop_CmpGT16Sx8:
4959 case Iop_CmpGT8Sx16:
4960 case Iop_CmpEQ64Fx2:
4961 case Iop_CmpEQ32Fx4:
4962 case Iop_CmpLE64Fx2:
4963 case Iop_CmpLE32Fx4:
4964 case Iop_CmpLT64Fx2:
4965 case Iop_CmpLT32Fx4:
4966 case Iop_Perm8x16:
4967 {
4968 HReg res = newVRegV(env);
4969 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
4970 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
4971 Bool sw = False;
4972 ARM64VecBinOp op = ARM64vecb_INVALID;
4973 switch (e->Iex.Binop.op) {
4974 case Iop_AndV128: op = ARM64vecb_AND; break;
4975 case Iop_OrV128: op = ARM64vecb_ORR; break;
4976 case Iop_XorV128: op = ARM64vecb_XOR; break;
4977 case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break;
4978 case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break;
4979 case Iop_Max8Ux16: op = ARM64vecb_UMAX8x16; break;
4980 case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break;
4981 case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break;
4982 case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break;
4983 case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break;
4984 case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break;
4985 case Iop_Max8Sx16: op = ARM64vecb_SMAX8x16; break;
4986 case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break;
4987 case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break;
4988 case Iop_Min8Sx16: op = ARM64vecb_SMIN8x16; break;
4989 case Iop_Add64x2: op = ARM64vecb_ADD64x2; break;
4990 case Iop_Add32x4: op = ARM64vecb_ADD32x4; break;
4991 case Iop_Add16x8: op = ARM64vecb_ADD16x8; break;
4992 case Iop_Add8x16: op = ARM64vecb_ADD8x16; break;
4993 case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break;
4994 case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break;
4995 case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break;
4996 case Iop_Sub8x16: op = ARM64vecb_SUB8x16; break;
4997 case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break;
4998 case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break;
4999 case Iop_Mul8x16: op = ARM64vecb_MUL8x16; break;
5000 case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break;
5001 case Iop_CmpEQ32x4: op = ARM64vecb_CMEQ32x4; break;
5002 case Iop_CmpEQ16x8: op = ARM64vecb_CMEQ16x8; break;
5003 case Iop_CmpEQ8x16: op = ARM64vecb_CMEQ8x16; break;
5004 case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break;
5005 case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break;
5006 case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break;
5007 case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break;
5008 case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break;
5009 case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break;
5010 case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break;
5011 case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break;
5012 case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break;
5013 case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break;
5014 case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break;
5015 case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break;
5016 case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break;
5017 case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break;
5018 case Iop_Perm8x16: op = ARM64vecb_TBL1; break;
5019 default: vassert(0);
5020 }
5021 if (sw) {
5022 addInstr(env, ARM64Instr_VBinV(op, res, argR, argL));
5023 } else {
5024 addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
5025 }
5026 return res;
5027 }
5028 //ZZ case Iop_Add32Fx4: {
5029 //ZZ HReg res = newVRegV(env);
5030 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5031 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5032 //ZZ UInt size = 0;
5033 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
5034 //ZZ res, argL, argR, size, True));
5035 //ZZ return res;
5036 //ZZ }
5037 //ZZ case Iop_Recps32Fx4: {
5038 //ZZ HReg res = newVRegV(env);
5039 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5040 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5041 //ZZ UInt size = 0;
5042 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
5043 //ZZ res, argL, argR, size, True));
5044 //ZZ return res;
5045 //ZZ }
5046 //ZZ case Iop_Rsqrts32Fx4: {
5047 //ZZ HReg res = newVRegV(env);
5048 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5049 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5050 //ZZ UInt size = 0;
5051 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
5052 //ZZ res, argL, argR, size, True));
5053 //ZZ return res;
5054 //ZZ }
5055 //ZZ
5056 //ZZ // These 6 verified 18 Apr 2013
5057 //ZZ case Iop_InterleaveEvenLanes8x16:
5058 //ZZ case Iop_InterleaveOddLanes8x16:
5059 //ZZ case Iop_InterleaveEvenLanes16x8:
5060 //ZZ case Iop_InterleaveOddLanes16x8:
5061 //ZZ case Iop_InterleaveEvenLanes32x4:
5062 //ZZ case Iop_InterleaveOddLanes32x4: {
5063 //ZZ HReg rD = newVRegV(env);
5064 //ZZ HReg rM = newVRegV(env);
5065 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5066 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5067 //ZZ UInt size;
5068 //ZZ Bool resRd; // is the result in rD or rM ?
5069 //ZZ switch (e->Iex.Binop.op) {
5070 //ZZ case Iop_InterleaveOddLanes8x16: resRd = False; size = 0; break;
5071 //ZZ case Iop_InterleaveEvenLanes8x16: resRd = True; size = 0; break;
5072 //ZZ case Iop_InterleaveOddLanes16x8: resRd = False; size = 1; break;
5073 //ZZ case Iop_InterleaveEvenLanes16x8: resRd = True; size = 1; break;
5074 //ZZ case Iop_InterleaveOddLanes32x4: resRd = False; size = 2; break;
5075 //ZZ case Iop_InterleaveEvenLanes32x4: resRd = True; size = 2; break;
5076 //ZZ default: vassert(0);
5077 //ZZ }
5078 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
5079 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
5080 //ZZ addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True));
5081 //ZZ return resRd ? rD : rM;
5082 //ZZ }
5083 //ZZ
5084 //ZZ // These 6 verified 18 Apr 2013
5085 //ZZ case Iop_InterleaveHI8x16:
5086 //ZZ case Iop_InterleaveLO8x16:
5087 //ZZ case Iop_InterleaveHI16x8:
5088 //ZZ case Iop_InterleaveLO16x8:
5089 //ZZ case Iop_InterleaveHI32x4:
5090 //ZZ case Iop_InterleaveLO32x4: {
5091 //ZZ HReg rD = newVRegV(env);
5092 //ZZ HReg rM = newVRegV(env);
5093 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5094 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5095 //ZZ UInt size;
5096 //ZZ Bool resRd; // is the result in rD or rM ?
5097 //ZZ switch (e->Iex.Binop.op) {
5098 //ZZ case Iop_InterleaveHI8x16: resRd = False; size = 0; break;
5099 //ZZ case Iop_InterleaveLO8x16: resRd = True; size = 0; break;
5100 //ZZ case Iop_InterleaveHI16x8: resRd = False; size = 1; break;
5101 //ZZ case Iop_InterleaveLO16x8: resRd = True; size = 1; break;
5102 //ZZ case Iop_InterleaveHI32x4: resRd = False; size = 2; break;
5103 //ZZ case Iop_InterleaveLO32x4: resRd = True; size = 2; break;
5104 //ZZ default: vassert(0);
5105 //ZZ }
5106 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
5107 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
5108 //ZZ addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True));
5109 //ZZ return resRd ? rD : rM;
5110 //ZZ }
5111 //ZZ
5112 //ZZ // These 6 verified 18 Apr 2013
5113 //ZZ case Iop_CatOddLanes8x16:
5114 //ZZ case Iop_CatEvenLanes8x16:
5115 //ZZ case Iop_CatOddLanes16x8:
5116 //ZZ case Iop_CatEvenLanes16x8:
5117 //ZZ case Iop_CatOddLanes32x4:
5118 //ZZ case Iop_CatEvenLanes32x4: {
5119 //ZZ HReg rD = newVRegV(env);
5120 //ZZ HReg rM = newVRegV(env);
5121 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5122 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5123 //ZZ UInt size;
5124 //ZZ Bool resRd; // is the result in rD or rM ?
5125 //ZZ switch (e->Iex.Binop.op) {
5126 //ZZ case Iop_CatOddLanes8x16: resRd = False; size = 0; break;
5127 //ZZ case Iop_CatEvenLanes8x16: resRd = True; size = 0; break;
5128 //ZZ case Iop_CatOddLanes16x8: resRd = False; size = 1; break;
5129 //ZZ case Iop_CatEvenLanes16x8: resRd = True; size = 1; break;
5130 //ZZ case Iop_CatOddLanes32x4: resRd = False; size = 2; break;
5131 //ZZ case Iop_CatEvenLanes32x4: resRd = True; size = 2; break;
5132 //ZZ default: vassert(0);
5133 //ZZ }
5134 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
5135 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
5136 //ZZ addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True));
5137 //ZZ return resRd ? rD : rM;
5138 //ZZ }
5139 //ZZ
5140 //ZZ case Iop_QAdd8Ux16:
5141 //ZZ case Iop_QAdd16Ux8:
5142 //ZZ case Iop_QAdd32Ux4:
5143 //ZZ case Iop_QAdd64Ux2: {
5144 //ZZ HReg res = newVRegV(env);
5145 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5146 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5147 //ZZ UInt size;
5148 //ZZ switch (e->Iex.Binop.op) {
5149 //ZZ case Iop_QAdd8Ux16: size = 0; break;
5150 //ZZ case Iop_QAdd16Ux8: size = 1; break;
5151 //ZZ case Iop_QAdd32Ux4: size = 2; break;
5152 //ZZ case Iop_QAdd64Ux2: size = 3; break;
5153 //ZZ default:
5154 //ZZ ppIROp(e->Iex.Binop.op);
5155 //ZZ vpanic("Illegal element size in VQADDU");
5156 //ZZ }
5157 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
5158 //ZZ res, argL, argR, size, True));
5159 //ZZ return res;
5160 //ZZ }
5161 //ZZ case Iop_QAdd8Sx16:
5162 //ZZ case Iop_QAdd16Sx8:
5163 //ZZ case Iop_QAdd32Sx4:
5164 //ZZ case Iop_QAdd64Sx2: {
5165 //ZZ HReg res = newVRegV(env);
5166 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5167 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5168 //ZZ UInt size;
5169 //ZZ switch (e->Iex.Binop.op) {
5170 //ZZ case Iop_QAdd8Sx16: size = 0; break;
5171 //ZZ case Iop_QAdd16Sx8: size = 1; break;
5172 //ZZ case Iop_QAdd32Sx4: size = 2; break;
5173 //ZZ case Iop_QAdd64Sx2: size = 3; break;
5174 //ZZ default:
5175 //ZZ ppIROp(e->Iex.Binop.op);
5176 //ZZ vpanic("Illegal element size in VQADDS");
5177 //ZZ }
5178 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
5179 //ZZ res, argL, argR, size, True));
5180 //ZZ return res;
5181 //ZZ }
5182 //ZZ case Iop_Sub8x16:
5183 //ZZ case Iop_Sub16x8:
5184 //ZZ case Iop_Sub32x4:
5185 //ZZ case Iop_Sub64x2: {
5186 //ZZ HReg res = newVRegV(env);
5187 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5188 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5189 //ZZ UInt size;
5190 //ZZ switch (e->Iex.Binop.op) {
5191 //ZZ case Iop_Sub8x16: size = 0; break;
5192 //ZZ case Iop_Sub16x8: size = 1; break;
5193 //ZZ case Iop_Sub32x4: size = 2; break;
5194 //ZZ case Iop_Sub64x2: size = 3; break;
5195 //ZZ default:
5196 //ZZ ppIROp(e->Iex.Binop.op);
5197 //ZZ vpanic("Illegal element size in VSUB");
5198 //ZZ }
5199 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
5200 //ZZ res, argL, argR, size, True));
5201 //ZZ return res;
5202 //ZZ }
5203 //ZZ case Iop_Sub32Fx4: {
5204 //ZZ HReg res = newVRegV(env);
5205 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5206 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5207 //ZZ UInt size = 0;
5208 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
5209 //ZZ res, argL, argR, size, True));
5210 //ZZ return res;
5211 //ZZ }
5212 //ZZ case Iop_QSub8Ux16:
5213 //ZZ case Iop_QSub16Ux8:
5214 //ZZ case Iop_QSub32Ux4:
5215 //ZZ case Iop_QSub64Ux2: {
5216 //ZZ HReg res = newVRegV(env);
5217 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5218 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5219 //ZZ UInt size;
5220 //ZZ switch (e->Iex.Binop.op) {
5221 //ZZ case Iop_QSub8Ux16: size = 0; break;
5222 //ZZ case Iop_QSub16Ux8: size = 1; break;
5223 //ZZ case Iop_QSub32Ux4: size = 2; break;
5224 //ZZ case Iop_QSub64Ux2: size = 3; break;
5225 //ZZ default:
5226 //ZZ ppIROp(e->Iex.Binop.op);
5227 //ZZ vpanic("Illegal element size in VQSUBU");
5228 //ZZ }
5229 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
5230 //ZZ res, argL, argR, size, True));
5231 //ZZ return res;
5232 //ZZ }
5233 //ZZ case Iop_QSub8Sx16:
5234 //ZZ case Iop_QSub16Sx8:
5235 //ZZ case Iop_QSub32Sx4:
5236 //ZZ case Iop_QSub64Sx2: {
5237 //ZZ HReg res = newVRegV(env);
5238 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5239 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5240 //ZZ UInt size;
5241 //ZZ switch (e->Iex.Binop.op) {
5242 //ZZ case Iop_QSub8Sx16: size = 0; break;
5243 //ZZ case Iop_QSub16Sx8: size = 1; break;
5244 //ZZ case Iop_QSub32Sx4: size = 2; break;
5245 //ZZ case Iop_QSub64Sx2: size = 3; break;
5246 //ZZ default:
5247 //ZZ ppIROp(e->Iex.Binop.op);
5248 //ZZ vpanic("Illegal element size in VQSUBS");
5249 //ZZ }
5250 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
5251 //ZZ res, argL, argR, size, True));
5252 //ZZ return res;
5253 //ZZ }
5254 //ZZ case Iop_Max8Ux16:
5255 //ZZ case Iop_Max16Ux8:
5256 //ZZ case Iop_Max32Ux4: {
5257 //ZZ HReg res = newVRegV(env);
5258 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5259 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5260 //ZZ UInt size;
5261 //ZZ switch (e->Iex.Binop.op) {
5262 //ZZ case Iop_Max8Ux16: size = 0; break;
5263 //ZZ case Iop_Max16Ux8: size = 1; break;
5264 //ZZ case Iop_Max32Ux4: size = 2; break;
5265 //ZZ default: vpanic("Illegal element size in VMAXU");
5266 //ZZ }
5267 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
5268 //ZZ res, argL, argR, size, True));
5269 //ZZ return res;
5270 //ZZ }
5271 //ZZ case Iop_Max8Sx16:
5272 //ZZ case Iop_Max16Sx8:
5273 //ZZ case Iop_Max32Sx4: {
5274 //ZZ HReg res = newVRegV(env);
5275 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5276 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5277 //ZZ UInt size;
5278 //ZZ switch (e->Iex.Binop.op) {
5279 //ZZ case Iop_Max8Sx16: size = 0; break;
5280 //ZZ case Iop_Max16Sx8: size = 1; break;
5281 //ZZ case Iop_Max32Sx4: size = 2; break;
5282 //ZZ default: vpanic("Illegal element size in VMAXU");
5283 //ZZ }
5284 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
5285 //ZZ res, argL, argR, size, True));
5286 //ZZ return res;
5287 //ZZ }
5288 //ZZ case Iop_Min8Ux16:
5289 //ZZ case Iop_Min16Ux8:
5290 //ZZ case Iop_Min32Ux4: {
5291 //ZZ HReg res = newVRegV(env);
5292 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5293 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5294 //ZZ UInt size;
5295 //ZZ switch (e->Iex.Binop.op) {
5296 //ZZ case Iop_Min8Ux16: size = 0; break;
5297 //ZZ case Iop_Min16Ux8: size = 1; break;
5298 //ZZ case Iop_Min32Ux4: size = 2; break;
5299 //ZZ default: vpanic("Illegal element size in VMAXU");
5300 //ZZ }
5301 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
5302 //ZZ res, argL, argR, size, True));
5303 //ZZ return res;
5304 //ZZ }
5305 //ZZ case Iop_Min8Sx16:
5306 //ZZ case Iop_Min16Sx8:
5307 //ZZ case Iop_Min32Sx4: {
5308 //ZZ HReg res = newVRegV(env);
5309 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5310 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5311 //ZZ UInt size;
5312 //ZZ switch (e->Iex.Binop.op) {
5313 //ZZ case Iop_Min8Sx16: size = 0; break;
5314 //ZZ case Iop_Min16Sx8: size = 1; break;
5315 //ZZ case Iop_Min32Sx4: size = 2; break;
5316 //ZZ default: vpanic("Illegal element size in VMAXU");
5317 //ZZ }
5318 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
5319 //ZZ res, argL, argR, size, True));
5320 //ZZ return res;
5321 //ZZ }
5322 //ZZ case Iop_Sar8x16:
5323 //ZZ case Iop_Sar16x8:
5324 //ZZ case Iop_Sar32x4:
5325 //ZZ case Iop_Sar64x2: {
5326 //ZZ HReg res = newVRegV(env);
5327 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5328 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5329 //ZZ HReg argR2 = newVRegV(env);
5330 //ZZ HReg zero = newVRegV(env);
5331 //ZZ UInt size;
5332 //ZZ switch (e->Iex.Binop.op) {
5333 //ZZ case Iop_Sar8x16: size = 0; break;
5334 //ZZ case Iop_Sar16x8: size = 1; break;
5335 //ZZ case Iop_Sar32x4: size = 2; break;
5336 //ZZ case Iop_Sar64x2: size = 3; break;
5337 //ZZ default: vassert(0);
5338 //ZZ }
5339 //ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
5340 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
5341 //ZZ argR2, zero, argR, size, True));
5342 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
5343 //ZZ res, argL, argR2, size, True));
5344 //ZZ return res;
5345 //ZZ }
5346 //ZZ case Iop_Sal8x16:
5347 //ZZ case Iop_Sal16x8:
5348 //ZZ case Iop_Sal32x4:
5349 //ZZ case Iop_Sal64x2: {
5350 //ZZ HReg res = newVRegV(env);
5351 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5352 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5353 //ZZ UInt size;
5354 //ZZ switch (e->Iex.Binop.op) {
5355 //ZZ case Iop_Sal8x16: size = 0; break;
5356 //ZZ case Iop_Sal16x8: size = 1; break;
5357 //ZZ case Iop_Sal32x4: size = 2; break;
5358 //ZZ case Iop_Sal64x2: size = 3; break;
5359 //ZZ default: vassert(0);
5360 //ZZ }
5361 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
5362 //ZZ res, argL, argR, size, True));
5363 //ZZ return res;
5364 //ZZ }
5365 //ZZ case Iop_Shr8x16:
5366 //ZZ case Iop_Shr16x8:
5367 //ZZ case Iop_Shr32x4:
5368 //ZZ case Iop_Shr64x2: {
5369 //ZZ HReg res = newVRegV(env);
5370 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5371 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5372 //ZZ HReg argR2 = newVRegV(env);
5373 //ZZ HReg zero = newVRegV(env);
5374 //ZZ UInt size;
5375 //ZZ switch (e->Iex.Binop.op) {
5376 //ZZ case Iop_Shr8x16: size = 0; break;
5377 //ZZ case Iop_Shr16x8: size = 1; break;
5378 //ZZ case Iop_Shr32x4: size = 2; break;
5379 //ZZ case Iop_Shr64x2: size = 3; break;
5380 //ZZ default: vassert(0);
5381 //ZZ }
5382 //ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
5383 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
5384 //ZZ argR2, zero, argR, size, True));
5385 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5386 //ZZ res, argL, argR2, size, True));
5387 //ZZ return res;
5388 //ZZ }
5389 //ZZ case Iop_Shl8x16:
5390 //ZZ case Iop_Shl16x8:
5391 //ZZ case Iop_Shl32x4:
5392 //ZZ case Iop_Shl64x2: {
5393 //ZZ HReg res = newVRegV(env);
5394 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5395 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5396 //ZZ UInt size;
5397 //ZZ switch (e->Iex.Binop.op) {
5398 //ZZ case Iop_Shl8x16: size = 0; break;
5399 //ZZ case Iop_Shl16x8: size = 1; break;
5400 //ZZ case Iop_Shl32x4: size = 2; break;
5401 //ZZ case Iop_Shl64x2: size = 3; break;
5402 //ZZ default: vassert(0);
5403 //ZZ }
5404 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5405 //ZZ res, argL, argR, size, True));
5406 //ZZ return res;
5407 //ZZ }
5408 //ZZ case Iop_QShl8x16:
5409 //ZZ case Iop_QShl16x8:
5410 //ZZ case Iop_QShl32x4:
5411 //ZZ case Iop_QShl64x2: {
5412 //ZZ HReg res = newVRegV(env);
5413 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5414 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5415 //ZZ UInt size;
5416 //ZZ switch (e->Iex.Binop.op) {
5417 //ZZ case Iop_QShl8x16: size = 0; break;
5418 //ZZ case Iop_QShl16x8: size = 1; break;
5419 //ZZ case Iop_QShl32x4: size = 2; break;
5420 //ZZ case Iop_QShl64x2: size = 3; break;
5421 //ZZ default: vassert(0);
5422 //ZZ }
5423 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
5424 //ZZ res, argL, argR, size, True));
5425 //ZZ return res;
5426 //ZZ }
5427 //ZZ case Iop_QSal8x16:
5428 //ZZ case Iop_QSal16x8:
5429 //ZZ case Iop_QSal32x4:
5430 //ZZ case Iop_QSal64x2: {
5431 //ZZ HReg res = newVRegV(env);
5432 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5433 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5434 //ZZ UInt size;
5435 //ZZ switch (e->Iex.Binop.op) {
5436 //ZZ case Iop_QSal8x16: size = 0; break;
5437 //ZZ case Iop_QSal16x8: size = 1; break;
5438 //ZZ case Iop_QSal32x4: size = 2; break;
5439 //ZZ case Iop_QSal64x2: size = 3; break;
5440 //ZZ default: vassert(0);
5441 //ZZ }
5442 //ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
5443 //ZZ res, argL, argR, size, True));
5444 //ZZ return res;
5445 //ZZ }
5446 //ZZ case Iop_QShlN8x16:
5447 //ZZ case Iop_QShlN16x8:
5448 //ZZ case Iop_QShlN32x4:
5449 //ZZ case Iop_QShlN64x2: {
5450 //ZZ HReg res = newVRegV(env);
5451 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5452 //ZZ UInt size, imm;
5453 //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5454 //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5455 //ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
5456 //ZZ "second argument only\n");
5457 //ZZ }
5458 //ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5459 //ZZ switch (e->Iex.Binop.op) {
5460 //ZZ case Iop_QShlN8x16: size = 8 | imm; break;
5461 //ZZ case Iop_QShlN16x8: size = 16 | imm; break;
5462 //ZZ case Iop_QShlN32x4: size = 32 | imm; break;
5463 //ZZ case Iop_QShlN64x2: size = 64 | imm; break;
5464 //ZZ default: vassert(0);
5465 //ZZ }
5466 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
5467 //ZZ res, argL, size, True));
5468 //ZZ return res;
5469 //ZZ }
5470 //ZZ case Iop_QShlN8Sx16:
5471 //ZZ case Iop_QShlN16Sx8:
5472 //ZZ case Iop_QShlN32Sx4:
5473 //ZZ case Iop_QShlN64Sx2: {
5474 //ZZ HReg res = newVRegV(env);
5475 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5476 //ZZ UInt size, imm;
5477 //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5478 //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5479 //ZZ vpanic("ARM taget supports Iop_QShlNASxB with constant "
5480 //ZZ "second argument only\n");
5481 //ZZ }
5482 //ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5483 //ZZ switch (e->Iex.Binop.op) {
5484 //ZZ case Iop_QShlN8Sx16: size = 8 | imm; break;
5485 //ZZ case Iop_QShlN16Sx8: size = 16 | imm; break;
5486 //ZZ case Iop_QShlN32Sx4: size = 32 | imm; break;
5487 //ZZ case Iop_QShlN64Sx2: size = 64 | imm; break;
5488 //ZZ default: vassert(0);
5489 //ZZ }
5490 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
5491 //ZZ res, argL, size, True));
5492 //ZZ return res;
5493 //ZZ }
5494 //ZZ case Iop_QSalN8x16:
5495 //ZZ case Iop_QSalN16x8:
5496 //ZZ case Iop_QSalN32x4:
5497 //ZZ case Iop_QSalN64x2: {
5498 //ZZ HReg res = newVRegV(env);
5499 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5500 //ZZ UInt size, imm;
5501 //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5502 //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5503 //ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
5504 //ZZ "second argument only\n");
5505 //ZZ }
5506 //ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5507 //ZZ switch (e->Iex.Binop.op) {
5508 //ZZ case Iop_QSalN8x16: size = 8 | imm; break;
5509 //ZZ case Iop_QSalN16x8: size = 16 | imm; break;
5510 //ZZ case Iop_QSalN32x4: size = 32 | imm; break;
5511 //ZZ case Iop_QSalN64x2: size = 64 | imm; break;
5512 //ZZ default: vassert(0);
5513 //ZZ }
5514 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
5515 //ZZ res, argL, size, True));
5516 //ZZ return res;
5517 //ZZ }
5518 case Iop_ShrN64x2:
5519 case Iop_ShrN32x4:
5520 case Iop_ShrN16x8:
5521 case Iop_ShrN8x16:
5522 case Iop_SarN64x2:
5523 case Iop_SarN32x4:
5524 case Iop_SarN16x8:
5525 case Iop_SarN8x16:
5526 case Iop_ShlN64x2:
5527 case Iop_ShlN32x4:
5528 case Iop_ShlN16x8:
5529 case Iop_ShlN8x16:
5530 {
5531 IRExpr* argL = e->Iex.Binop.arg1;
5532 IRExpr* argR = e->Iex.Binop.arg2;
5533 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
5534 UInt amt = argR->Iex.Const.con->Ico.U8;
5535 UInt limit = 0;
5536 ARM64VecShiftOp op = ARM64vecsh_INVALID;
5537 switch (e->Iex.Binop.op) {
5538 case Iop_ShrN64x2:
5539 op = ARM64vecsh_USHR64x2; limit = 63; break;
5540 case Iop_ShrN32x4:
5541 op = ARM64vecsh_USHR32x4; limit = 31; break;
5542 case Iop_ShrN16x8:
5543 op = ARM64vecsh_USHR16x8; limit = 15; break;
5544 case Iop_ShrN8x16:
5545 op = ARM64vecsh_USHR8x16; limit = 7; break;
5546 case Iop_SarN64x2:
5547 op = ARM64vecsh_SSHR64x2; limit = 63; break;
5548 case Iop_SarN32x4:
5549 op = ARM64vecsh_SSHR32x4; limit = 31; break;
5550 case Iop_SarN16x8:
5551 op = ARM64vecsh_SSHR16x8; limit = 15; break;
5552 case Iop_SarN8x16:
5553 op = ARM64vecsh_SSHR8x16; limit = 7; break;
5554 case Iop_ShlN64x2:
5555 op = ARM64vecsh_SHL64x2; limit = 63; break;
5556 case Iop_ShlN32x4:
5557 op = ARM64vecsh_SHL32x4; limit = 31; break;
5558 case Iop_ShlN16x8:
5559 op = ARM64vecsh_SHL16x8; limit = 15; break;
5560 case Iop_ShlN8x16:
5561 op = ARM64vecsh_SHL8x16; limit = 7; break;
5562 default:
5563 vassert(0);
5564 }
5565 if (op != ARM64vecsh_INVALID && amt >= 0 && amt <= limit) {
5566 HReg src = iselV128Expr(env, argL);
5567 HReg dst = newVRegV(env);
5568 if (amt > 0) {
5569 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
5570 } else {
5571 dst = src;
5572 }
5573 return dst;
5574 }
5575 }
5576 /* else fall out; this is unhandled */
5577 break;
5578 }
5579 //ZZ case Iop_CmpGT8Ux16:
5580 //ZZ case Iop_CmpGT16Ux8:
5581 //ZZ case Iop_CmpGT32Ux4: {
5582 //ZZ HReg res = newVRegV(env);
5583 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5584 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5585 //ZZ UInt size;
5586 //ZZ switch (e->Iex.Binop.op) {
5587 //ZZ case Iop_CmpGT8Ux16: size = 0; break;
5588 //ZZ case Iop_CmpGT16Ux8: size = 1; break;
5589 //ZZ case Iop_CmpGT32Ux4: size = 2; break;
5590 //ZZ default: vassert(0);
5591 //ZZ }
5592 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
5593 //ZZ res, argL, argR, size, True));
5594 //ZZ return res;
5595 //ZZ }
5596 //ZZ case Iop_CmpGT8Sx16:
5597 //ZZ case Iop_CmpGT16Sx8:
5598 //ZZ case Iop_CmpGT32Sx4: {
5599 //ZZ HReg res = newVRegV(env);
5600 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5601 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5602 //ZZ UInt size;
5603 //ZZ switch (e->Iex.Binop.op) {
5604 //ZZ case Iop_CmpGT8Sx16: size = 0; break;
5605 //ZZ case Iop_CmpGT16Sx8: size = 1; break;
5606 //ZZ case Iop_CmpGT32Sx4: size = 2; break;
5607 //ZZ default: vassert(0);
5608 //ZZ }
5609 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
5610 //ZZ res, argL, argR, size, True));
5611 //ZZ return res;
5612 //ZZ }
5613 //ZZ case Iop_CmpEQ8x16:
5614 //ZZ case Iop_CmpEQ16x8:
5615 //ZZ case Iop_CmpEQ32x4: {
5616 //ZZ HReg res = newVRegV(env);
5617 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5618 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5619 //ZZ UInt size;
5620 //ZZ switch (e->Iex.Binop.op) {
5621 //ZZ case Iop_CmpEQ8x16: size = 0; break;
5622 //ZZ case Iop_CmpEQ16x8: size = 1; break;
5623 //ZZ case Iop_CmpEQ32x4: size = 2; break;
5624 //ZZ default: vassert(0);
5625 //ZZ }
5626 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
5627 //ZZ res, argL, argR, size, True));
5628 //ZZ return res;
5629 //ZZ }
5630 //ZZ case Iop_Mul8x16:
5631 //ZZ case Iop_Mul16x8:
5632 //ZZ case Iop_Mul32x4: {
5633 //ZZ HReg res = newVRegV(env);
5634 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5635 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5636 //ZZ UInt size = 0;
5637 //ZZ switch(e->Iex.Binop.op) {
5638 //ZZ case Iop_Mul8x16: size = 0; break;
5639 //ZZ case Iop_Mul16x8: size = 1; break;
5640 //ZZ case Iop_Mul32x4: size = 2; break;
5641 //ZZ default: vassert(0);
5642 //ZZ }
5643 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
5644 //ZZ res, argL, argR, size, True));
5645 //ZZ return res;
5646 //ZZ }
5647 //ZZ case Iop_Mul32Fx4: {
5648 //ZZ HReg res = newVRegV(env);
5649 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5650 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5651 //ZZ UInt size = 0;
5652 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
5653 //ZZ res, argL, argR, size, True));
5654 //ZZ return res;
5655 //ZZ }
5656 case Iop_Mull8Ux8:
5657 case Iop_Mull16Ux4:
5658 case Iop_Mull32Ux2: {
5659 HReg res = newVRegV(env);
5660 HReg argL = iselDblExpr(env, e->Iex.Binop.arg1);
5661 HReg argR = iselDblExpr(env, e->Iex.Binop.arg2);
5662 UInt size = 0;
5663 ARM64VecBinOp op = ARM64vecb_INVALID;
5664
5665 switch(e->Iex.Binop.op) {
5666 case Iop_Mull8Ux8: op = ARM64vecb_UMULL8x8; break;
5667 case Iop_Mull16Ux4: op = ARM64vecb_UMULL16x4; break;
5668 case Iop_Mull32Ux2: op = ARM64vecb_UMULL32x2; break;
5669 default: vassert(0);
5670 }
5671 addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
5672 return res;
5673 }
5674 //ZZ
5675 //ZZ case Iop_Mull8Sx8:
5676 //ZZ case Iop_Mull16Sx4:
5677 //ZZ case Iop_Mull32Sx2: {
5678 //ZZ HReg res = newVRegV(env);
5679 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5680 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5681 //ZZ UInt size = 0;
5682 //ZZ switch(e->Iex.Binop.op) {
5683 //ZZ case Iop_Mull8Sx8: size = 0; break;
5684 //ZZ case Iop_Mull16Sx4: size = 1; break;
5685 //ZZ case Iop_Mull32Sx2: size = 2; break;
5686 //ZZ default: vassert(0);
5687 //ZZ }
5688 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5689 //ZZ res, argL, argR, size, True));
5690 //ZZ return res;
5691 //ZZ }
5692 //ZZ
5693 //ZZ case Iop_QDMulHi16Sx8:
5694 //ZZ case Iop_QDMulHi32Sx4: {
5695 //ZZ HReg res = newVRegV(env);
5696 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5697 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5698 //ZZ UInt size = 0;
5699 //ZZ switch(e->Iex.Binop.op) {
5700 //ZZ case Iop_QDMulHi16Sx8: size = 1; break;
5701 //ZZ case Iop_QDMulHi32Sx4: size = 2; break;
5702 //ZZ default: vassert(0);
5703 //ZZ }
5704 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5705 //ZZ res, argL, argR, size, True));
5706 //ZZ return res;
5707 //ZZ }
5708 //ZZ
5709 //ZZ case Iop_QRDMulHi16Sx8:
5710 //ZZ case Iop_QRDMulHi32Sx4: {
5711 //ZZ HReg res = newVRegV(env);
5712 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5713 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5714 //ZZ UInt size = 0;
5715 //ZZ switch(e->Iex.Binop.op) {
5716 //ZZ case Iop_QRDMulHi16Sx8: size = 1; break;
5717 //ZZ case Iop_QRDMulHi32Sx4: size = 2; break;
5718 //ZZ default: vassert(0);
5719 //ZZ }
5720 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5721 //ZZ res, argL, argR, size, True));
5722 //ZZ return res;
5723 //ZZ }
5724 //ZZ
5725 //ZZ case Iop_QDMulLong16Sx4:
5726 //ZZ case Iop_QDMulLong32Sx2: {
5727 //ZZ HReg res = newVRegV(env);
5728 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5729 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5730 //ZZ UInt size = 0;
5731 //ZZ switch(e->Iex.Binop.op) {
5732 //ZZ case Iop_QDMulLong16Sx4: size = 1; break;
5733 //ZZ case Iop_QDMulLong32Sx2: size = 2; break;
5734 //ZZ default: vassert(0);
5735 //ZZ }
5736 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5737 //ZZ res, argL, argR, size, True));
5738 //ZZ return res;
5739 //ZZ }
5740 //ZZ case Iop_PolynomialMul8x16: {
5741 //ZZ HReg res = newVRegV(env);
5742 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5743 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5744 //ZZ UInt size = 0;
5745 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5746 //ZZ res, argL, argR, size, True));
5747 //ZZ return res;
5748 //ZZ }
5749 //ZZ case Iop_Max32Fx4: {
5750 //ZZ HReg res = newVRegV(env);
5751 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5752 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5753 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5754 //ZZ res, argL, argR, 2, True));
5755 //ZZ return res;
5756 //ZZ }
5757 //ZZ case Iop_Min32Fx4: {
5758 //ZZ HReg res = newVRegV(env);
5759 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5760 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5761 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5762 //ZZ res, argL, argR, 2, True));
5763 //ZZ return res;
5764 //ZZ }
5765 //ZZ case Iop_PwMax32Fx4: {
5766 //ZZ HReg res = newVRegV(env);
5767 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5768 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5769 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5770 //ZZ res, argL, argR, 2, True));
5771 //ZZ return res;
5772 //ZZ }
5773 //ZZ case Iop_PwMin32Fx4: {
5774 //ZZ HReg res = newVRegV(env);
5775 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5776 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5777 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5778 //ZZ res, argL, argR, 2, True));
5779 //ZZ return res;
5780 //ZZ }
5781 //ZZ case Iop_CmpGT32Fx4: {
5782 //ZZ HReg res = newVRegV(env);
5783 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5784 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5785 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5786 //ZZ res, argL, argR, 2, True));
5787 //ZZ return res;
5788 //ZZ }
5789 //ZZ case Iop_CmpGE32Fx4: {
5790 //ZZ HReg res = newVRegV(env);
5791 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5792 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5793 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5794 //ZZ res, argL, argR, 2, True));
5795 //ZZ return res;
5796 //ZZ }
5797 //ZZ case Iop_CmpEQ32Fx4: {
5798 //ZZ HReg res = newVRegV(env);
5799 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5800 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5801 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5802 //ZZ res, argL, argR, 2, True));
5803 //ZZ return res;
5804 //ZZ }
5805 //ZZ
5806 //ZZ case Iop_PolynomialMull8x8: {
5807 //ZZ HReg res = newVRegV(env);
5808 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5809 //ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5810 //ZZ UInt size = 0;
5811 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5812 //ZZ res, argL, argR, size, True));
5813 //ZZ return res;
5814 //ZZ }
5815 //ZZ case Iop_F32ToFixed32Ux4_RZ:
5816 //ZZ case Iop_F32ToFixed32Sx4_RZ:
5817 //ZZ case Iop_Fixed32UToF32x4_RN:
5818 //ZZ case Iop_Fixed32SToF32x4_RN: {
5819 //ZZ HReg res = newVRegV(env);
5820 //ZZ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5821 //ZZ ARMNeonUnOp op;
5822 //ZZ UInt imm6;
5823 //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5824 //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5825 //ZZ vpanic("ARM supports FP <-> Fixed conversion with constant "
5826 //ZZ "second argument less than 33 only\n");
5827 //ZZ }
5828 //ZZ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5829 //ZZ vassert(imm6 <= 32 && imm6 > 0);
5830 //ZZ imm6 = 64 - imm6;
5831 //ZZ switch(e->Iex.Binop.op) {
5832 //ZZ case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5833 //ZZ case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5834 //ZZ case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5835 //ZZ case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5836 //ZZ default: vassert(0);
5837 //ZZ }
5838 //ZZ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5839 //ZZ return res;
5840 //ZZ }
5841 //ZZ /*
5842 //ZZ FIXME remove if not used
5843 //ZZ case Iop_VDup8x16:
5844 //ZZ case Iop_VDup16x8:
5845 //ZZ case Iop_VDup32x4: {
5846 //ZZ HReg res = newVRegV(env);
5847 //ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5848 //ZZ UInt imm4;
5849 //ZZ UInt index;
5850 //ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5851 //ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5852 //ZZ vpanic("ARM supports Iop_VDup with constant "
5853 //ZZ "second argument less than 16 only\n");
5854 //ZZ }
5855 //ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5856 //ZZ switch(e->Iex.Binop.op) {
5857 //ZZ case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5858 //ZZ case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5859 //ZZ case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5860 //ZZ default: vassert(0);
5861 //ZZ }
5862 //ZZ if (imm4 >= 16) {
5863 //ZZ vpanic("ARM supports Iop_VDup with constant "
5864 //ZZ "second argument less than 16 only\n");
5865 //ZZ }
5866 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5867 //ZZ res, argL, imm4, True));
5868 //ZZ return res;
5869 //ZZ }
5870 //ZZ */
5871 //ZZ case Iop_PwAdd8x16:
5872 //ZZ case Iop_PwAdd16x8:
5873 //ZZ case Iop_PwAdd32x4: {
5874 //ZZ HReg res = newVRegV(env);
5875 //ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5876 //ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5877 //ZZ UInt size = 0;
5878 //ZZ switch(e->Iex.Binop.op) {
5879 //ZZ case Iop_PwAdd8x16: size = 0; break;
5880 //ZZ case Iop_PwAdd16x8: size = 1; break;
5881 //ZZ case Iop_PwAdd32x4: size = 2; break;
5882 //ZZ default: vassert(0);
5883 //ZZ }
5884 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5885 //ZZ res, argL, argR, size, True));
5886 //ZZ return res;
5887 //ZZ }
5888 /* ... */
5889 default:
5890 break;
5891 } /* switch on the binop */
5892 } /* if (e->tag == Iex_Binop) */
5893
5894 if (e->tag == Iex_Triop) {
5895 IRTriop* triop = e->Iex.Triop.details;
5896 ARM64VecBinOp vecbop = ARM64vecb_INVALID;
5897 switch (triop->op) {
5898 case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break;
5899 case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break;
5900 case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break;
5901 case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break;
5902 case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break;
5903 case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break;
5904 case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break;
5905 case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break;
5906 default: break;
5907 }
5908 if (vecbop != ARM64vecb_INVALID) {
5909 HReg argL = iselV128Expr(env, triop->arg2);
5910 HReg argR = iselV128Expr(env, triop->arg3);
5911 HReg dst = newVRegV(env);
5912 set_FPCR_rounding_mode(env, triop->arg1);
5913 addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR));
5914 return dst;
5915 }
5916
5917 //ZZ switch (triop->op) {
5918 //ZZ case Iop_ExtractV128: {
5919 //ZZ HReg res = newVRegV(env);
5920 //ZZ HReg argL = iselNeonExpr(env, triop->arg1);
5921 //ZZ HReg argR = iselNeonExpr(env, triop->arg2);
5922 //ZZ UInt imm4;
5923 //ZZ if (triop->arg3->tag != Iex_Const ||
5924 //ZZ typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
5925 //ZZ vpanic("ARM target supports Iop_ExtractV128 with constant "
5926 //ZZ "third argument less than 16 only\n");
5927 //ZZ }
5928 //ZZ imm4 = triop->arg3->Iex.Const.con->Ico.U8;
5929 //ZZ if (imm4 >= 16) {
5930 //ZZ vpanic("ARM target supports Iop_ExtractV128 with constant "
5931 //ZZ "third argument less than 16 only\n");
5932 //ZZ }
5933 //ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5934 //ZZ res, argL, argR, imm4, True));
5935 //ZZ return res;
5936 //ZZ }
5937 //ZZ default:
5938 //ZZ break;
5939 //ZZ }
5940 }
5941
5942 //ZZ if (e->tag == Iex_ITE) { // VFD
5943 //ZZ ARMCondCode cc;
5944 //ZZ HReg r1 = iselNeonExpr(env, e->Iex.ITE.iftrue);
5945 //ZZ HReg r0 = iselNeonExpr(env, e->Iex.ITE.iffalse);
5946 //ZZ HReg dst = newVRegV(env);
5947 //ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True));
5948 //ZZ cc = iselCondCode(env, e->Iex.ITE.cond);
5949 //ZZ addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0));
5950 //ZZ return dst;
5951 //ZZ }
5952
5953 v128_expr_bad:
5954 ppIRExpr(e);
5955 vpanic("iselV128Expr_wrk");
5956 }
5957
5958
5959 /*---------------------------------------------------------*/
5960 /*--- ISEL: Floating point expressions (64 bit) ---*/
5961 /*---------------------------------------------------------*/
5962
5963 /* Compute a 64-bit floating point value into a register, the identity
5964 of which is returned. As with iselIntExpr_R, the reg may be either
5965 real or virtual; in any case it must not be changed by subsequent
5966 code emitted by the caller. */
5967
iselDblExpr(ISelEnv * env,IRExpr * e)5968 static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5969 {
5970 HReg r = iselDblExpr_wrk( env, e );
5971 # if 0
5972 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5973 # endif
5974 vassert(hregClass(r) == HRcFlt64);
5975 vassert(hregIsVirtual(r));
5976 return r;
5977 }
5978
5979 /* DO NOT CALL THIS DIRECTLY */
iselDblExpr_wrk(ISelEnv * env,IRExpr * e)5980 static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5981 {
5982 IRType ty = typeOfIRExpr(env->type_env,e);
5983 vassert(e);
5984 vassert(ty == Ity_F64 || ty == Ity_I64);
5985
5986 if (e->tag == Iex_RdTmp) {
5987 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5988 }
5989
5990 if (e->tag == Iex_Const) {
5991 IRConst* con = e->Iex.Const.con;
5992 if (con->tag == Ico_F64i) {
5993 HReg src = newVRegI(env);
5994 HReg dst = newVRegD(env);
5995 addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i));
5996 addInstr(env, ARM64Instr_VDfromX(dst, src));
5997 return dst;
5998 }
5999 }
6000
6001 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
6002 vassert(e->Iex.Load.ty == Ity_F64 || e->Iex.Load.ty == Ity_I64);
6003 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
6004 HReg res = newVRegD(env);
6005 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
6006 return res;
6007 }
6008
6009 if (e->tag == Iex_Get) {
6010 Int offs = e->Iex.Get.offset;
6011 if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) {
6012 HReg rD = newVRegD(env);
6013 HReg rN = get_baseblock_register();
6014 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs));
6015 return rD;
6016 }
6017 }
6018
6019 if (e->tag == Iex_Unop) {
6020 switch (e->Iex.Unop.op) {
6021 //ZZ case Iop_ReinterpI64asF64: {
6022 //ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6023 //ZZ return iselNeon64Expr(env, e->Iex.Unop.arg);
6024 //ZZ } else {
6025 //ZZ HReg srcHi, srcLo;
6026 //ZZ HReg dst = newVRegD(env);
6027 //ZZ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
6028 //ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
6029 //ZZ return dst;
6030 //ZZ }
6031 //ZZ }
6032 case Iop_NegF64: {
6033 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
6034 HReg dst = newVRegD(env);
6035 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src));
6036 return dst;
6037 }
6038 case Iop_AbsF64: {
6039 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
6040 HReg dst = newVRegD(env);
6041 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src));
6042 return dst;
6043 }
6044 case Iop_F32toF64: {
6045 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
6046 HReg dst = newVRegD(env);
6047 addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
6048 return dst;
6049 }
6050 case Iop_I32UtoF64:
6051 case Iop_I32StoF64: {
6052 /* Rounding mode is not involved here, since the
6053 conversion can always be done without loss of
6054 precision. */
6055 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
6056 HReg dst = newVRegD(env);
6057 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
6058 ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U;
6059 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
6060 return dst;
6061 }
6062 default:
6063 break;
6064 }
6065 }
6066
6067 if (e->tag == Iex_Binop) {
6068 switch (e->Iex.Binop.op) {
6069 case Iop_RoundF64toInt: {
6070 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
6071 HReg dst = newVRegD(env);
6072 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6073 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINT, dst, src));
6074 return dst;
6075 }
6076 case Iop_SqrtF64: {
6077 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
6078 HReg dst = newVRegD(env);
6079 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6080 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_SQRT, dst, src));
6081 return dst;
6082 }
6083 case Iop_I64StoF64:
6084 case Iop_I64UtoF64: {
6085 ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64
6086 ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U;
6087 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
6088 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6089 HReg dstS = newVRegD(env);
6090 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
6091 return dstS;
6092 }
6093 default:
6094 break;
6095 }
6096 }
6097
6098 if (e->tag == Iex_Triop) {
6099 IRTriop* triop = e->Iex.Triop.details;
6100 ARM64FpBinOp dblop = ARM64fpb_INVALID;
6101 switch (triop->op) {
6102 case Iop_DivF64: dblop = ARM64fpb_DIV; break;
6103 case Iop_MulF64: dblop = ARM64fpb_MUL; break;
6104 case Iop_SubF64: dblop = ARM64fpb_SUB; break;
6105 case Iop_AddF64: dblop = ARM64fpb_ADD; break;
6106 default: break;
6107 }
6108 if (dblop != ARM64fpb_INVALID) {
6109 HReg argL = iselDblExpr(env, triop->arg2);
6110 HReg argR = iselDblExpr(env, triop->arg3);
6111 HReg dst = newVRegD(env);
6112 set_FPCR_rounding_mode(env, triop->arg1);
6113 addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR));
6114 return dst;
6115 }
6116 }
6117
6118 //ZZ if (e->tag == Iex_ITE) { // VFD
6119 //ZZ if (ty == Ity_F64
6120 //ZZ && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
6121 //ZZ HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
6122 //ZZ HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
6123 //ZZ HReg dst = newVRegD(env);
6124 //ZZ addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1));
6125 //ZZ ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
6126 //ZZ addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0));
6127 //ZZ return dst;
6128 //ZZ }
6129 //ZZ }
6130
6131 ppIRExpr(e);
6132 vpanic("iselDblExpr_wrk");
6133 }
6134
6135
6136 /*---------------------------------------------------------*/
6137 /*--- ISEL: Floating point expressions (32 bit) ---*/
6138 /*---------------------------------------------------------*/
6139
6140 /* Compute a 32-bit floating point value into a register, the identity
6141 of which is returned. As with iselIntExpr_R, the reg may be either
6142 real or virtual; in any case it must not be changed by subsequent
6143 code emitted by the caller. Values are generated into HRcFlt64
6144 registers despite the values themselves being Ity_F32s. */
6145
iselFltExpr(ISelEnv * env,IRExpr * e)6146 static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
6147 {
6148 HReg r = iselFltExpr_wrk( env, e );
6149 # if 0
6150 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
6151 # endif
6152 vassert(hregClass(r) == HRcFlt64);
6153 vassert(hregIsVirtual(r));
6154 return r;
6155 }
6156
6157 /* DO NOT CALL THIS DIRECTLY */
iselFltExpr_wrk(ISelEnv * env,IRExpr * e)6158 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
6159 {
6160 IRType ty = typeOfIRExpr(env->type_env,e);
6161 vassert(e);
6162 vassert(ty == Ity_F32);
6163
6164 if (e->tag == Iex_RdTmp) {
6165 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
6166 }
6167
6168 if (e->tag == Iex_Const) {
6169 /* This is something of a kludge. Since a 32 bit floating point
6170 zero is just .. all zeroes, just create a 64 bit zero word
6171 and transfer it. This avoids having to create a SfromW
6172 instruction for this specific case. */
6173 IRConst* con = e->Iex.Const.con;
6174 if (con->tag == Ico_F32i && con->Ico.F32i == 0) {
6175 HReg src = newVRegI(env);
6176 HReg dst = newVRegD(env);
6177 addInstr(env, ARM64Instr_Imm64(src, 0));
6178 addInstr(env, ARM64Instr_VDfromX(dst, src));
6179 return dst;
6180 }
6181 }
6182
6183 //ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
6184 //ZZ ARMAModeV* am;
6185 //ZZ HReg res = newVRegF(env);
6186 //ZZ vassert(e->Iex.Load.ty == Ity_F32);
6187 //ZZ am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
6188 //ZZ addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
6189 //ZZ return res;
6190 //ZZ }
6191
6192 if (e->tag == Iex_Get) {
6193 Int offs = e->Iex.Get.offset;
6194 if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) {
6195 HReg rD = newVRegD(env);
6196 HReg rN = get_baseblock_register();
6197 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs));
6198 return rD;
6199 }
6200 }
6201
6202 if (e->tag == Iex_Unop) {
6203 switch (e->Iex.Unop.op) {
6204 //ZZ case Iop_ReinterpI32asF32: {
6205 //ZZ HReg dst = newVRegF(env);
6206 //ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
6207 //ZZ addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
6208 //ZZ return dst;
6209 //ZZ }
6210 case Iop_NegF32: {
6211 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
6212 HReg dst = newVRegD(env);
6213 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src));
6214 return dst;
6215 }
6216 case Iop_AbsF32: {
6217 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
6218 HReg dst = newVRegD(env);
6219 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
6220 return dst;
6221 }
6222 default:
6223 break;
6224 }
6225 }
6226
6227 if (e->tag == Iex_Binop) {
6228 switch (e->Iex.Binop.op) {
6229 case Iop_RoundF32toInt: {
6230 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
6231 HReg dst = newVRegD(env);
6232 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6233 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINT, dst, src));
6234 return dst;
6235 }
6236 case Iop_SqrtF32: {
6237 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
6238 HReg dst = newVRegD(env);
6239 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6240 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_SQRT, dst, src));
6241 return dst;
6242 }
6243 case Iop_F64toF32: {
6244 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
6245 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6246 HReg dstS = newVRegD(env);
6247 addInstr(env, ARM64Instr_VCvtSD(False/*dToS*/, dstS, srcD));
6248 return dstS;
6249 }
6250 case Iop_I32UtoF32:
6251 case Iop_I32StoF32:
6252 case Iop_I64UtoF32:
6253 case Iop_I64StoF32: {
6254 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
6255 switch (e->Iex.Binop.op) {
6256 case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break;
6257 case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
6258 case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
6259 case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
6260 default: vassert(0);
6261 }
6262 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
6263 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6264 HReg dstS = newVRegD(env);
6265 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
6266 return dstS;
6267 }
6268 default:
6269 break;
6270 }
6271 }
6272
6273 if (e->tag == Iex_Triop) {
6274 IRTriop* triop = e->Iex.Triop.details;
6275 ARM64FpBinOp sglop = ARM64fpb_INVALID;
6276 switch (triop->op) {
6277 case Iop_DivF32: sglop = ARM64fpb_DIV; break;
6278 case Iop_MulF32: sglop = ARM64fpb_MUL; break;
6279 case Iop_SubF32: sglop = ARM64fpb_SUB; break;
6280 case Iop_AddF32: sglop = ARM64fpb_ADD; break;
6281 default: break;
6282 }
6283 if (sglop != ARM64fpb_INVALID) {
6284 HReg argL = iselFltExpr(env, triop->arg2);
6285 HReg argR = iselFltExpr(env, triop->arg3);
6286 HReg dst = newVRegD(env);
6287 set_FPCR_rounding_mode(env, triop->arg1);
6288 addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR));
6289 return dst;
6290 }
6291 }
6292
6293 //ZZ
6294 //ZZ if (e->tag == Iex_ITE) { // VFD
6295 //ZZ if (ty == Ity_F32
6296 //ZZ && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
6297 //ZZ ARMCondCode cc;
6298 //ZZ HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue);
6299 //ZZ HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse);
6300 //ZZ HReg dst = newVRegF(env);
6301 //ZZ addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1));
6302 //ZZ cc = iselCondCode(env, e->Iex.ITE.cond);
6303 //ZZ addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0));
6304 //ZZ return dst;
6305 //ZZ }
6306 //ZZ }
6307
6308 ppIRExpr(e);
6309 vpanic("iselFltExpr_wrk");
6310 }
6311
6312
6313 /*---------------------------------------------------------*/
6314 /*--- ISEL: Statements ---*/
6315 /*---------------------------------------------------------*/
6316
iselStmt(ISelEnv * env,IRStmt * stmt)6317 static void iselStmt ( ISelEnv* env, IRStmt* stmt )
6318 {
6319 if (vex_traceflags & VEX_TRACE_VCODE) {
6320 vex_printf("\n-- ");
6321 ppIRStmt(stmt);
6322 vex_printf("\n");
6323 }
6324 switch (stmt->tag) {
6325
6326 /* --------- STORE --------- */
6327 /* little-endian write to memory */
6328 case Ist_Store: {
6329 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
6330 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
6331 IREndness end = stmt->Ist.Store.end;
6332
6333 if (tya != Ity_I64 || end != Iend_LE)
6334 goto stmt_fail;
6335
6336 if (tyd == Ity_I64) {
6337 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6338 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6339 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
6340 return;
6341 }
6342 if (tyd == Ity_I32) {
6343 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6344 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6345 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
6346 return;
6347 }
6348 if (tyd == Ity_I16) {
6349 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6350 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6351 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
6352 return;
6353 }
6354 if (tyd == Ity_I8) {
6355 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6356 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6357 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
6358 return;
6359 }
6360 if (tyd == Ity_V128) {
6361 HReg qD = iselV128Expr(env, stmt->Ist.Store.data);
6362 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
6363 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
6364 return;
6365 }
6366 if (tyd == Ity_F64) {
6367 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
6368 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
6369 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0));
6370 return;
6371 }
6372 if (tyd == Ity_F32) {
6373 HReg sD = iselFltExpr(env, stmt->Ist.Store.data);
6374 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
6375 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0));
6376 return;
6377 }
6378
6379 //ZZ if (tyd == Ity_I16) {
6380 //ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6381 //ZZ ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
6382 //ZZ addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
6383 //ZZ False/*!isLoad*/,
6384 //ZZ False/*!isSignedLoad*/, rD, am));
6385 //ZZ return;
6386 //ZZ }
6387 //ZZ if (tyd == Ity_I8) {
6388 //ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6389 //ZZ ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
6390 //ZZ addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am));
6391 //ZZ return;
6392 //ZZ }
6393 //ZZ if (tyd == Ity_I64) {
6394 //ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6395 //ZZ HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
6396 //ZZ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
6397 //ZZ addInstr(env, ARMInstr_NLdStD(False, dD, am));
6398 //ZZ } else {
6399 //ZZ HReg rDhi, rDlo, rA;
6400 //ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
6401 //ZZ rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
6402 //ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi,
6403 //ZZ ARMAMode1_RI(rA,4)));
6404 //ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo,
6405 //ZZ ARMAMode1_RI(rA,0)));
6406 //ZZ }
6407 //ZZ return;
6408 //ZZ }
6409 //ZZ if (tyd == Ity_F64) {
6410 //ZZ HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
6411 //ZZ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
6412 //ZZ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
6413 //ZZ return;
6414 //ZZ }
6415 //ZZ if (tyd == Ity_F32) {
6416 //ZZ HReg fD = iselFltExpr(env, stmt->Ist.Store.data);
6417 //ZZ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
6418 //ZZ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
6419 //ZZ return;
6420 //ZZ }
6421 //ZZ if (tyd == Ity_V128) {
6422 //ZZ HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
6423 //ZZ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
6424 //ZZ addInstr(env, ARMInstr_NLdStQ(False, qD, am));
6425 //ZZ return;
6426 //ZZ }
6427
6428 break;
6429 }
6430
6431 //ZZ /* --------- CONDITIONAL STORE --------- */
6432 //ZZ /* conditional little-endian write to memory */
6433 //ZZ case Ist_StoreG: {
6434 //ZZ IRStoreG* sg = stmt->Ist.StoreG.details;
6435 //ZZ IRType tya = typeOfIRExpr(env->type_env, sg->addr);
6436 //ZZ IRType tyd = typeOfIRExpr(env->type_env, sg->data);
6437 //ZZ IREndness end = sg->end;
6438 //ZZ
6439 //ZZ if (tya != Ity_I32 || end != Iend_LE)
6440 //ZZ goto stmt_fail;
6441 //ZZ
6442 //ZZ switch (tyd) {
6443 //ZZ case Ity_I8:
6444 //ZZ case Ity_I32: {
6445 //ZZ HReg rD = iselIntExpr_R(env, sg->data);
6446 //ZZ ARMAMode1* am = iselIntExpr_AMode1(env, sg->addr);
6447 //ZZ ARMCondCode cc = iselCondCode(env, sg->guard);
6448 //ZZ addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U)
6449 //ZZ (cc, False/*!isLoad*/, rD, am));
6450 //ZZ return;
6451 //ZZ }
6452 //ZZ case Ity_I16: {
6453 //ZZ HReg rD = iselIntExpr_R(env, sg->data);
6454 //ZZ ARMAMode2* am = iselIntExpr_AMode2(env, sg->addr);
6455 //ZZ ARMCondCode cc = iselCondCode(env, sg->guard);
6456 //ZZ addInstr(env, ARMInstr_LdSt16(cc,
6457 //ZZ False/*!isLoad*/,
6458 //ZZ False/*!isSignedLoad*/, rD, am));
6459 //ZZ return;
6460 //ZZ }
6461 //ZZ default:
6462 //ZZ break;
6463 //ZZ }
6464 //ZZ break;
6465 //ZZ }
6466 //ZZ
6467 //ZZ /* --------- CONDITIONAL LOAD --------- */
6468 //ZZ /* conditional little-endian load from memory */
6469 //ZZ case Ist_LoadG: {
6470 //ZZ IRLoadG* lg = stmt->Ist.LoadG.details;
6471 //ZZ IRType tya = typeOfIRExpr(env->type_env, lg->addr);
6472 //ZZ IREndness end = lg->end;
6473 //ZZ
6474 //ZZ if (tya != Ity_I32 || end != Iend_LE)
6475 //ZZ goto stmt_fail;
6476 //ZZ
6477 //ZZ switch (lg->cvt) {
6478 //ZZ case ILGop_8Uto32:
6479 //ZZ case ILGop_Ident32: {
6480 //ZZ HReg rAlt = iselIntExpr_R(env, lg->alt);
6481 //ZZ ARMAMode1* am = iselIntExpr_AMode1(env, lg->addr);
6482 //ZZ HReg rD = lookupIRTemp(env, lg->dst);
6483 //ZZ addInstr(env, mk_iMOVds_RR(rD, rAlt));
6484 //ZZ ARMCondCode cc = iselCondCode(env, lg->guard);
6485 //ZZ addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32
6486 //ZZ : ARMInstr_LdSt8U)
6487 //ZZ (cc, True/*isLoad*/, rD, am));
6488 //ZZ return;
6489 //ZZ }
6490 //ZZ case ILGop_16Sto32:
6491 //ZZ case ILGop_16Uto32:
6492 //ZZ case ILGop_8Sto32: {
6493 //ZZ HReg rAlt = iselIntExpr_R(env, lg->alt);
6494 //ZZ ARMAMode2* am = iselIntExpr_AMode2(env, lg->addr);
6495 //ZZ HReg rD = lookupIRTemp(env, lg->dst);
6496 //ZZ addInstr(env, mk_iMOVds_RR(rD, rAlt));
6497 //ZZ ARMCondCode cc = iselCondCode(env, lg->guard);
6498 //ZZ if (lg->cvt == ILGop_8Sto32) {
6499 //ZZ addInstr(env, ARMInstr_Ld8S(cc, rD, am));
6500 //ZZ } else {
6501 //ZZ vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32);
6502 //ZZ Bool sx = lg->cvt == ILGop_16Sto32;
6503 //ZZ addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am));
6504 //ZZ }
6505 //ZZ return;
6506 //ZZ }
6507 //ZZ default:
6508 //ZZ break;
6509 //ZZ }
6510 //ZZ break;
6511 //ZZ }
6512
6513 /* --------- PUT --------- */
6514 /* write guest state, fixed offset */
6515 case Ist_Put: {
6516 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
6517 UInt offs = (UInt)stmt->Ist.Put.offset;
6518 if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) {
6519 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6520 ARM64AMode* am = mk_baseblock_64bit_access_amode(offs);
6521 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
6522 return;
6523 }
6524 if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) {
6525 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6526 ARM64AMode* am = mk_baseblock_32bit_access_amode(offs);
6527 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
6528 return;
6529 }
6530 if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) {
6531 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6532 ARM64AMode* am = mk_baseblock_16bit_access_amode(offs);
6533 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
6534 return;
6535 }
6536 if (tyd == Ity_I8 && offs < (1<<12)) {
6537 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6538 ARM64AMode* am = mk_baseblock_8bit_access_amode(offs);
6539 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
6540 return;
6541 }
6542 if (tyd == Ity_V128 && offs < (1<<12)) {
6543 HReg qD = iselV128Expr(env, stmt->Ist.Put.data);
6544 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
6545 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
6546 return;
6547 }
6548 if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) {
6549 HReg dD = iselDblExpr(env, stmt->Ist.Put.data);
6550 HReg bbp = get_baseblock_register();
6551 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs));
6552 return;
6553 }
6554 if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
6555 HReg dD = iselFltExpr(env, stmt->Ist.Put.data);
6556 HReg bbp = get_baseblock_register();
6557 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, dD, bbp, offs));
6558 return;
6559 }
6560
6561 //ZZ if (tyd == Ity_I64) {
6562 //ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6563 //ZZ HReg addr = newVRegI(env);
6564 //ZZ HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
6565 //ZZ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
6566 //ZZ stmt->Ist.Put.offset));
6567 //ZZ addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
6568 //ZZ } else {
6569 //ZZ HReg rDhi, rDlo;
6570 //ZZ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
6571 //ZZ stmt->Ist.Put.offset + 0);
6572 //ZZ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
6573 //ZZ stmt->Ist.Put.offset + 4);
6574 //ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
6575 //ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6576 //ZZ rDhi, am4));
6577 //ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6578 //ZZ rDlo, am0));
6579 //ZZ }
6580 //ZZ return;
6581 //ZZ }
6582 //ZZ if (tyd == Ity_F64) {
6583 //ZZ // XXX This won't work if offset > 1020 or is not 0 % 4.
6584 //ZZ // In which case we'll have to generate more longwinded code.
6585 //ZZ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6586 //ZZ HReg rD = iselDblExpr(env, stmt->Ist.Put.data);
6587 //ZZ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
6588 //ZZ return;
6589 //ZZ }
6590 //ZZ if (tyd == Ity_F32) {
6591 //ZZ // XXX This won't work if offset > 1020 or is not 0 % 4.
6592 //ZZ // In which case we'll have to generate more longwinded code.
6593 //ZZ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6594 //ZZ HReg rD = iselFltExpr(env, stmt->Ist.Put.data);
6595 //ZZ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
6596 //ZZ return;
6597 //ZZ }
6598 break;
6599 }
6600
6601 /* --------- TMP --------- */
6602 /* assign value to temporary */
6603 case Ist_WrTmp: {
6604 IRTemp tmp = stmt->Ist.WrTmp.tmp;
6605 IRType ty = typeOfIRTemp(env->type_env, tmp);
6606
6607 if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6608 /* We could do a lot better here. But for the time being: */
6609 HReg dst = lookupIRTemp(env, tmp);
6610 HReg rD = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
6611 addInstr(env, ARM64Instr_MovI(dst, rD));
6612 return;
6613 }
6614 if (ty == Ity_I1) {
6615 /* Here, we are generating a I1 value into a 64 bit register.
6616 Make sure the value in the register is only zero or one,
6617 but no other. This allows optimisation of the
6618 1Uto64(tmp:I1) case, by making it simply a copy of the
6619 register holding 'tmp'. The point being that the value in
6620 the register holding 'tmp' can only have been created
6621 here. LATER: that seems dangerous; safer to do 'tmp & 1'
6622 in that case. Also, could do this just with a single CINC
6623 insn. */
6624 /* CLONE-01 */
6625 HReg zero = newVRegI(env);
6626 HReg one = newVRegI(env);
6627 HReg dst = lookupIRTemp(env, tmp);
6628 addInstr(env, ARM64Instr_Imm64(zero, 0));
6629 addInstr(env, ARM64Instr_Imm64(one, 1));
6630 ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data);
6631 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
6632 return;
6633 }
6634 if (ty == Ity_F64) {
6635 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
6636 HReg dst = lookupIRTemp(env, tmp);
6637 addInstr(env, ARM64Instr_VMov(8, dst, src));
6638 return;
6639 }
6640 if (ty == Ity_F32) {
6641 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
6642 HReg dst = lookupIRTemp(env, tmp);
6643 addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
6644 return;
6645 }
6646 if (ty == Ity_V128) {
6647 HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
6648 HReg dst = lookupIRTemp(env, tmp);
6649 addInstr(env, ARM64Instr_VMov(16, dst, src));
6650 return;
6651 }
6652 break;
6653 }
6654
6655 /* --------- Call to DIRTY helper --------- */
6656 /* call complex ("dirty") helper function */
6657 case Ist_Dirty: {
6658 IRDirty* d = stmt->Ist.Dirty.details;
6659
6660 /* Figure out the return type, if any. */
6661 IRType retty = Ity_INVALID;
6662 if (d->tmp != IRTemp_INVALID)
6663 retty = typeOfIRTemp(env->type_env, d->tmp);
6664
6665 Bool retty_ok = False;
6666 switch (retty) {
6667 case Ity_INVALID: /* function doesn't return anything */
6668 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
6669 case Ity_V128:
6670 retty_ok = True; break;
6671 default:
6672 break;
6673 }
6674 if (!retty_ok)
6675 break; /* will go to stmt_fail: */
6676
6677 /* Marshal args, do the call, and set the return value to 0x555..555
6678 if this is a conditional call that returns a value and the
6679 call is skipped. */
6680 UInt addToSp = 0;
6681 RetLoc rloc = mk_RetLoc_INVALID();
6682 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
6683 vassert(is_sane_RetLoc(rloc));
6684
6685 /* Now figure out what to do with the returned value, if any. */
6686 switch (retty) {
6687 case Ity_INVALID: {
6688 /* No return value. Nothing to do. */
6689 vassert(d->tmp == IRTemp_INVALID);
6690 vassert(rloc.pri == RLPri_None);
6691 vassert(addToSp == 0);
6692 return;
6693 }
6694 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
6695 vassert(rloc.pri == RLPri_Int);
6696 vassert(addToSp == 0);
6697 /* The returned value is in x0. Park it in the register
6698 associated with tmp. */
6699 HReg dst = lookupIRTemp(env, d->tmp);
6700 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) );
6701 return;
6702 }
6703 case Ity_V128: {
6704 /* The returned value is on the stack, and *retloc tells
6705 us where. Fish it off the stack and then move the
6706 stack pointer upwards to clear it, as directed by
6707 doHelperCall. */
6708 vassert(rloc.pri == RLPri_V128SpRel);
6709 vassert(rloc.spOff < 256); // stay sane
6710 vassert(addToSp >= 16); // ditto
6711 vassert(addToSp < 256); // ditto
6712 HReg dst = lookupIRTemp(env, d->tmp);
6713 HReg tmp = newVRegI(env); // the address of the returned value
6714 addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP
6715 addInstr(env, ARM64Instr_Arith(tmp, tmp,
6716 ARM64RIA_I12((UShort)rloc.spOff, 0),
6717 True/*isAdd*/ ));
6718 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp));
6719 addInstr(env, ARM64Instr_AddToSP(addToSp));
6720 return;
6721 }
6722 default:
6723 /*NOTREACHED*/
6724 vassert(0);
6725 }
6726 break;
6727 }
6728
6729 /* --------- Load Linked and Store Conditional --------- */
6730 case Ist_LLSC: {
6731 if (stmt->Ist.LLSC.storedata == NULL) {
6732 /* LL */
6733 IRTemp res = stmt->Ist.LLSC.result;
6734 IRType ty = typeOfIRTemp(env->type_env, res);
6735 if (ty == Ity_I64 || ty == Ity_I32
6736 || ty == Ity_I16 || ty == Ity_I8) {
6737 Int szB = 0;
6738 HReg r_dst = lookupIRTemp(env, res);
6739 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6740 switch (ty) {
6741 case Ity_I8: szB = 1; break;
6742 case Ity_I16: szB = 2; break;
6743 case Ity_I32: szB = 4; break;
6744 case Ity_I64: szB = 8; break;
6745 default: vassert(0);
6746 }
6747 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
6748 addInstr(env, ARM64Instr_LdrEX(szB));
6749 addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
6750 return;
6751 }
6752 goto stmt_fail;
6753 } else {
6754 /* SC */
6755 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
6756 if (tyd == Ity_I64 || tyd == Ity_I32
6757 || tyd == Ity_I16 || tyd == Ity_I8) {
6758 Int szB = 0;
6759 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
6760 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6761 switch (tyd) {
6762 case Ity_I8: szB = 1; break;
6763 case Ity_I16: szB = 2; break;
6764 case Ity_I32: szB = 4; break;
6765 case Ity_I64: szB = 8; break;
6766 default: vassert(0);
6767 }
6768 addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
6769 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
6770 addInstr(env, ARM64Instr_StrEX(szB));
6771 } else {
6772 goto stmt_fail;
6773 }
6774 /* now r0 is 1 if failed, 0 if success. Change to IR
6775 conventions (0 is fail, 1 is success). Also transfer
6776 result to r_res. */
6777 IRTemp res = stmt->Ist.LLSC.result;
6778 IRType ty = typeOfIRTemp(env->type_env, res);
6779 HReg r_res = lookupIRTemp(env, res);
6780 ARM64RIL* one = mb_mkARM64RIL_I(1);
6781 vassert(ty == Ity_I1);
6782 vassert(one);
6783 addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one,
6784 ARM64lo_XOR));
6785 /* And be conservative -- mask off all but the lowest bit. */
6786 addInstr(env, ARM64Instr_Logic(r_res, r_res, one,
6787 ARM64lo_AND));
6788 return;
6789 }
6790 break;
6791 }
6792
6793 /* --------- MEM FENCE --------- */
6794 case Ist_MBE:
6795 switch (stmt->Ist.MBE.event) {
6796 case Imbe_Fence:
6797 addInstr(env, ARM64Instr_MFence());
6798 return;
6799 //ZZ case Imbe_CancelReservation:
6800 //ZZ addInstr(env, ARMInstr_CLREX());
6801 //ZZ return;
6802 default:
6803 break;
6804 }
6805 break;
6806
6807 /* --------- INSTR MARK --------- */
6808 /* Doesn't generate any executable code ... */
6809 case Ist_IMark:
6810 return;
6811
6812 /* --------- NO-OP --------- */
6813 case Ist_NoOp:
6814 return;
6815
6816 /* --------- EXIT --------- */
6817 case Ist_Exit: {
6818 if (stmt->Ist.Exit.dst->tag != Ico_U64)
6819 vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
6820
6821 ARM64CondCode cc
6822 = iselCondCode(env, stmt->Ist.Exit.guard);
6823 ARM64AMode* amPC
6824 = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP);
6825
6826 /* Case: boring transfer to known address */
6827 if (stmt->Ist.Exit.jk == Ijk_Boring
6828 /*ATC || stmt->Ist.Exit.jk == Ijk_Call */
6829 /*ATC || stmt->Ist.Exit.jk == Ijk_Ret */ ) {
6830 if (env->chainingAllowed) {
6831 /* .. almost always true .. */
6832 /* Skip the event check at the dst if this is a forwards
6833 edge. */
6834 Bool toFastEP
6835 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
6836 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6837 addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
6838 amPC, cc, toFastEP));
6839 } else {
6840 /* .. very occasionally .. */
6841 /* We can't use chaining, so ask for an assisted transfer,
6842 as that's the only alternative that is allowable. */
6843 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6844 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring));
6845 }
6846 return;
6847 }
6848
6849 //ZZ /* Case: assisted transfer to arbitrary address */
6850 //ZZ switch (stmt->Ist.Exit.jk) {
6851 //ZZ /* Keep this list in sync with that in iselNext below */
6852 //ZZ case Ijk_ClientReq:
6853 //ZZ case Ijk_NoDecode:
6854 //ZZ case Ijk_NoRedir:
6855 //ZZ case Ijk_Sys_syscall:
6856 //ZZ case Ijk_InvalICache:
6857 //ZZ case Ijk_Yield:
6858 //ZZ {
6859 //ZZ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6860 //ZZ addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6861 //ZZ stmt->Ist.Exit.jk));
6862 //ZZ return;
6863 //ZZ }
6864 //ZZ default:
6865 //ZZ break;
6866 //ZZ }
6867
6868 /* Do we ever expect to see any other kind? */
6869 goto stmt_fail;
6870 }
6871
6872 default: break;
6873 }
6874 stmt_fail:
6875 ppIRStmt(stmt);
6876 vpanic("iselStmt");
6877 }
6878
6879
6880 /*---------------------------------------------------------*/
6881 /*--- ISEL: Basic block terminators (Nexts) ---*/
6882 /*---------------------------------------------------------*/
6883
iselNext(ISelEnv * env,IRExpr * next,IRJumpKind jk,Int offsIP)6884 static void iselNext ( ISelEnv* env,
6885 IRExpr* next, IRJumpKind jk, Int offsIP )
6886 {
6887 if (vex_traceflags & VEX_TRACE_VCODE) {
6888 vex_printf( "\n-- PUT(%d) = ", offsIP);
6889 ppIRExpr( next );
6890 vex_printf( "; exit-");
6891 ppIRJumpKind(jk);
6892 vex_printf( "\n");
6893 }
6894
6895 /* Case: boring transfer to known address */
6896 if (next->tag == Iex_Const) {
6897 IRConst* cdst = next->Iex.Const.con;
6898 vassert(cdst->tag == Ico_U64);
6899 if (jk == Ijk_Boring || jk == Ijk_Call) {
6900 /* Boring transfer to known address */
6901 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
6902 if (env->chainingAllowed) {
6903 /* .. almost always true .. */
6904 /* Skip the event check at the dst if this is a forwards
6905 edge. */
6906 Bool toFastEP
6907 = ((Addr64)cdst->Ico.U64) > env->max_ga;
6908 if (0) vex_printf("%s", toFastEP ? "X" : ".");
6909 addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64,
6910 amPC, ARM64cc_AL,
6911 toFastEP));
6912 } else {
6913 /* .. very occasionally .. */
6914 /* We can't use chaining, so ask for an assisted transfer,
6915 as that's the only alternative that is allowable. */
6916 HReg r = iselIntExpr_R(env, next);
6917 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
6918 Ijk_Boring));
6919 }
6920 return;
6921 }
6922 }
6923
6924 /* Case: call/return (==boring) transfer to any address */
6925 switch (jk) {
6926 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6927 HReg r = iselIntExpr_R(env, next);
6928 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
6929 if (env->chainingAllowed) {
6930 addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL));
6931 } else {
6932 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
6933 Ijk_Boring));
6934 }
6935 return;
6936 }
6937 default:
6938 break;
6939 }
6940
6941 /* Case: assisted transfer to arbitrary address */
6942 switch (jk) {
6943 /* Keep this list in sync with that for Ist_Exit above */
6944 case Ijk_ClientReq:
6945 case Ijk_NoDecode:
6946 case Ijk_NoRedir:
6947 case Ijk_Sys_syscall:
6948 case Ijk_InvalICache:
6949 case Ijk_FlushDCache:
6950 //ZZ case Ijk_Yield:
6951 {
6952 HReg r = iselIntExpr_R(env, next);
6953 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
6954 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
6955 return;
6956 }
6957 default:
6958 break;
6959 }
6960
6961 vex_printf( "\n-- PUT(%d) = ", offsIP);
6962 ppIRExpr( next );
6963 vex_printf( "; exit-");
6964 ppIRJumpKind(jk);
6965 vex_printf( "\n");
6966 vassert(0); // are we expecting any other kind?
6967 }
6968
6969
6970 /*---------------------------------------------------------*/
6971 /*--- Insn selector top-level ---*/
6972 /*---------------------------------------------------------*/
6973
6974 /* Translate an entire SB to arm64 code. */
6975
iselSB_ARM64(IRSB * bb,VexArch arch_host,VexArchInfo * archinfo_host,VexAbiInfo * vbi,Int offs_Host_EvC_Counter,Int offs_Host_EvC_FailAddr,Bool chainingAllowed,Bool addProfInc,Addr64 max_ga)6976 HInstrArray* iselSB_ARM64 ( IRSB* bb,
6977 VexArch arch_host,
6978 VexArchInfo* archinfo_host,
6979 VexAbiInfo* vbi/*UNUSED*/,
6980 Int offs_Host_EvC_Counter,
6981 Int offs_Host_EvC_FailAddr,
6982 Bool chainingAllowed,
6983 Bool addProfInc,
6984 Addr64 max_ga )
6985 {
6986 Int i, j;
6987 HReg hreg, hregHI;
6988 ISelEnv* env;
6989 UInt hwcaps_host = archinfo_host->hwcaps;
6990 ARM64AMode *amCounter, *amFailAddr;
6991
6992 /* sanity ... */
6993 vassert(arch_host == VexArchARM64);
6994
6995 /* guard against unexpected space regressions */
6996 vassert(sizeof(ARM64Instr) <= 32);
6997
6998 /* Make up an initial environment to use. */
6999 env = LibVEX_Alloc(sizeof(ISelEnv));
7000 env->vreg_ctr = 0;
7001
7002 /* Set up output code array. */
7003 env->code = newHInstrArray();
7004
7005 /* Copy BB's type env. */
7006 env->type_env = bb->tyenv;
7007
7008 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
7009 change as we go along. */
7010 env->n_vregmap = bb->tyenv->types_used;
7011 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
7012 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
7013
7014 /* and finally ... */
7015 env->chainingAllowed = chainingAllowed;
7016 env->hwcaps = hwcaps_host;
7017 env->previous_rm = NULL;
7018 env->max_ga = max_ga;
7019
7020 /* For each IR temporary, allocate a suitably-kinded virtual
7021 register. */
7022 j = 0;
7023 for (i = 0; i < env->n_vregmap; i++) {
7024 hregHI = hreg = INVALID_HREG;
7025 switch (bb->tyenv->types[i]) {
7026 case Ity_I1:
7027 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
7028 hreg = mkHReg(j++, HRcInt64, True);
7029 break;
7030 case Ity_I128:
7031 hreg = mkHReg(j++, HRcInt64, True);
7032 hregHI = mkHReg(j++, HRcInt64, True);
7033 break;
7034 case Ity_F32: // we'll use HRcFlt64 regs for F32 too
7035 case Ity_F64:
7036 hreg = mkHReg(j++, HRcFlt64, True);
7037 break;
7038 case Ity_V128:
7039 hreg = mkHReg(j++, HRcVec128, True);
7040 break;
7041 default:
7042 ppIRType(bb->tyenv->types[i]);
7043 vpanic("iselBB(arm64): IRTemp type");
7044 }
7045 env->vregmap[i] = hreg;
7046 env->vregmapHI[i] = hregHI;
7047 }
7048 env->vreg_ctr = j;
7049
7050 /* The very first instruction must be an event check. */
7051 amCounter = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter);
7052 amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr);
7053 addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr));
7054
7055 /* Possibly a block counter increment (for profiling). At this
7056 point we don't know the address of the counter, so just pretend
7057 it is zero. It will have to be patched later, but before this
7058 translation is used, by a call to LibVEX_patchProfCtr. */
7059 if (addProfInc) {
7060 vassert(0);
7061 //addInstr(env, ARM64Instr_ProfInc());
7062 }
7063
7064 /* Ok, finally we can iterate over the statements. */
7065 for (i = 0; i < bb->stmts_used; i++)
7066 iselStmt(env, bb->stmts[i]);
7067
7068 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
7069
7070 /* record the number of vregs we used. */
7071 env->code->n_vregs = env->vreg_ctr;
7072 return env->code;
7073 }
7074
7075
7076 /*---------------------------------------------------------------*/
7077 /*--- end host_arm64_isel.c ---*/
7078 /*---------------------------------------------------------------*/
7079